Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband Pull more infiniband changes from Roland Dreier: "Second batch of InfiniBand/RDMA changes for 3.8: - cxgb4 changes to fix lookup engine hash collisions - mlx4 changes to make flow steering usable - fix to IPoIB to avoid pinning dst reference for too long" * tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: RDMA/cxgb4: Fix bug for active and passive LE hash collision path RDMA/cxgb4: Fix LE hash collision bug for passive open connection RDMA/cxgb4: Fix LE hash collision bug for active open connection mlx4_core: Allow choosing flow steering mode mlx4_core: Adjustments to Flow Steering activation logic for SR-IOV mlx4_core: Fix error flow in the flow steering wrapper mlx4_core: Add QPN enforcement for flow steering rules set by VFs cxgb4: Add LE hash collision bug fix path in LLD driver cxgb4: Add T4 filter support IPoIB: Call skb_dst_drop() once skb is enqueued for sending

commit: 184e2516614f7055d4c3a2e63fd8a3eb95fff6d6 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Fri Dec 21 16:40:26 2012 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Fri Dec 21 16:40:26 2012 -0800
tree: 9822dd3cc97f8cfed3cbda6167818b60355cc7ec
parent: 0264405b84505f60ae00625f261e75a32c7ddf56 [diff]
parent: d72623b665d84b1e07fe43854e83387fce8dd134 [diff]
diff --git a/.gitignore b/.gitignore
index 92bd0e4..3b8b9b3 100644
--- a/.gitignore
+++ b/.gitignore

@@ -60,7 +60,6 @@
 # Generated include files
 #
 include/config
-include/linux/version.h
 include/generated
 arch/*/include/generated
 

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index ceb1ff7..8afe64f 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX

@@ -136,8 +136,6 @@
 	- dir with docs about the fault injection capabilities infrastructure.
 fb/
 	- directory with info on the frame buffer graphics abstraction layer.
-feature-removal-schedule.txt
-	- list of files and features that are going to be removed.
 filesystems/
 	- info on the vfs and the various filesystems that Linux supports.
 firmware_class/

diff --git a/Documentation/ABI/README b/Documentation/ABI/README
index 9feaf16..1006982 100644
--- a/Documentation/ABI/README
+++ b/Documentation/ABI/README

@@ -36,9 +36,6 @@
 	the kernel, but are marked to be removed at some later point in
 	time.  The description of the interface will document the reason
 	why it is obsolete and when it can be expected to be removed.
-	The file Documentation/feature-removal-schedule.txt may describe
-	some of these interfaces, giving a schedule for when they will
-	be removed.
 
   removed/
 	This directory contains a list of the old interfaces that have

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 49b82ca..ce259c1 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node

@@ -1,7 +1,101 @@
+What:		/sys/devices/system/node/possible
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		Nodes that could be possibly become online at some point.
+
+What:		/sys/devices/system/node/online
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		Nodes that are online.
+
+What:		/sys/devices/system/node/has_normal_memory
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		Nodes that have regular memory.
+
+What:		/sys/devices/system/node/has_cpu
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		Nodes that have one or more CPUs.
+
+What:		/sys/devices/system/node/has_high_memory
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		Nodes that have regular or high memory.
+		Depends on CONFIG_HIGHMEM.
+
 What:		/sys/devices/system/node/nodeX
 Date:		October 2002
 Contact:	Linux Memory Management list <linux-mm@kvack.org>
 Description:
 		When CONFIG_NUMA is enabled, this is a directory containing
 		information on node X such as what CPUs are local to the
-		node.
+		node. Each file is detailed next.
+
+What:		/sys/devices/system/node/nodeX/cpumap
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		The node's cpumap.
+
+What:		/sys/devices/system/node/nodeX/cpulist
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		The CPUs associated to the node.
+
+What:		/sys/devices/system/node/nodeX/meminfo
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		Provides information about the node's distribution and memory
+		utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.txt
+
+What:		/sys/devices/system/node/nodeX/numastat
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		The node's hit/miss statistics, in units of pages.
+		See Documentation/numastat.txt
+
+What:		/sys/devices/system/node/nodeX/distance
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		Distance between the node and all the other nodes
+		in the system.
+
+What:		/sys/devices/system/node/nodeX/vmstat
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		The node's zoned virtual memory statistics.
+		This is a superset of numastat.
+
+What:		/sys/devices/system/node/nodeX/compact
+Date:		February 2010
+Contact:	Mel Gorman <mel@csn.ul.ie>
+Description:
+		When this file is written to, all memory within that node
+		will be compacted. When it completes, memory will be freed
+		into blocks which have as many contiguous pages as possible
+
+What:		/sys/devices/system/node/nodeX/scan_unevictable_pages
+Date:		October 2008
+Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com>
+Description:
+		When set, it triggers scanning the node's unevictable lists
+		and move any pages that have become evictable onto the respective
+		zone's inactive list. See mm/vmscan.c
+
+What:		/sys/devices/system/node/nodeX/hugepages/hugepages-<size>/
+Date:		December 2009
+Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com>
+Description:
+		The node's huge page size control/query attributes.
+		See Documentation/vm/hugetlbpage.txt
\ No newline at end of file

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 9869466..ec0a38e 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy

@@ -23,7 +23,7 @@
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
 
-		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK]
+		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK][MODULE_CHECK]
 			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
 			fsmagic:= hex value
 			uid:= decimal value
@@ -53,6 +53,7 @@
 			measure func=BPRM_CHECK
 			measure func=FILE_MMAP mask=MAY_EXEC
 			measure func=FILE_CHECK mask=MAY_READ uid=0
+			measure func=MODULE_CHECK uid=0
 			appraise fowner=0
 
 		The default policy measures all executables in bprm_check,

diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index 1cf2adf..cd9213c 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd

@@ -70,6 +70,10 @@
 
 	A directory per each snapshot
 
+parent
+
+	Information identifying the pool, image, and snapshot id for
+	the parent image in a layered rbd image (format 2 only).
 
 Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
 -------------------------------------------------------------

diff --git a/Documentation/ABI/testing/sysfs-devices-node b/Documentation/ABI/testing/sysfs-devices-node
deleted file mode 100644
index 453a210..0000000
--- a/Documentation/ABI/testing/sysfs-devices-node
+++ /dev/null

@@ -1,7 +0,0 @@
-What:		/sys/devices/system/node/nodeX/compact
-Date:		February 2010
-Contact:	Mel Gorman <mel@csn.ul.ie>
-Description:
-		When this file is written to, all memory within that node
-		will be compacted. When it completes, memory will be freed
-		into blocks which have as many contiguous pages as possible

diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index a0b6250..4a4fb29 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt

@@ -468,11 +468,46 @@
 	size_t size = buffer->len;
 
 	dma_handle = dma_map_single(dev, addr, size, direction);
+	if (dma_mapping_error(dma_handle)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling;
+	}
 
 and to unmap it:
 
 	dma_unmap_single(dev, dma_handle, size, direction);
 
+You should call dma_mapping_error() as dma_map_single() could fail and return
+error. Not all dma implementations support dma_mapping_error() interface.
+However, it is a good practice to call dma_mapping_error() interface, which
+will invoke the generic mapping error check interface. Doing so will ensure
+that the mapping code will work correctly on all dma implementations without
+any dependency on the specifics of the underlying implementation. Using the
+returned address without checking for errors could result in failures ranging
+from panics to silent data corruption. Couple of example of incorrect ways to
+check for errors that make assumptions about the underlying dma implementation
+are as follows and these are applicable to dma_map_page() as well.
+
+Incorrect example 1:
+	dma_addr_t dma_handle;
+
+	dma_handle = dma_map_single(dev, addr, size, direction);
+	if ((dma_handle & 0xffff != 0) || (dma_handle >= 0x1000000)) {
+		goto map_error;
+	}
+
+Incorrect example 2:
+	dma_addr_t dma_handle;
+
+	dma_handle = dma_map_single(dev, addr, size, direction);
+	if (dma_handle == DMA_ERROR_CODE) {
+		goto map_error;
+	}
+
 You should call dma_unmap_single when the DMA activity is finished, e.g.
 from the interrupt which told you that the DMA transfer is done.
 
@@ -489,6 +524,14 @@
 	size_t size = buffer->len;
 
 	dma_handle = dma_map_page(dev, page, offset, size, direction);
+	if (dma_mapping_error(dma_handle)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling;
+	}
 
 	...
 
@@ -496,6 +539,12 @@
 
 Here, "offset" means byte offset within the given page.
 
+You should call dma_mapping_error() as dma_map_page() could fail and return
+error as outlined under the dma_map_single() discussion.
+
+You should call dma_unmap_page when the DMA activity is finished, e.g.
+from the interrupt which told you that the DMA transfer is done.
+
 With scatterlists, you map a region gathered from several regions by:
 
 	int i, count = dma_map_sg(dev, sglist, nents, direction);
@@ -578,6 +627,14 @@
 		dma_addr_t mapping;
 
 		mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(dma_handle)) {
+			/*
+			 * reduce current DMA mapping usage,
+			 * delay and try again later or
+			 * reset driver.
+			 */
+			goto map_error_handling;
+		}
 
 		cp->rx_buf = buffer;
 		cp->rx_len = len;
@@ -658,6 +715,75 @@
 		 * delay and try again later or
 		 * reset driver.
 		 */
+		goto map_error_handling;
+	}
+
+- unmap pages that are already mapped, when mapping error occurs in the middle
+  of a multiple page mapping attempt. These example are applicable to
+  dma_map_page() as well.
+
+Example 1:
+	dma_addr_t dma_handle1;
+	dma_addr_t dma_handle2;
+
+	dma_handle1 = dma_map_single(dev, addr, size, direction);
+	if (dma_mapping_error(dev, dma_handle1)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling1;
+	}
+	dma_handle2 = dma_map_single(dev, addr, size, direction);
+	if (dma_mapping_error(dev, dma_handle2)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling2;
+	}
+
+	...
+
+	map_error_handling2:
+		dma_unmap_single(dma_handle1);
+	map_error_handling1:
+
+Example 2: (if buffers are allocated a loop, unmap all mapped buffers when
+	    mapping error is detected in the middle)
+
+	dma_addr_t dma_addr;
+	dma_addr_t array[DMA_BUFFERS];
+	int save_index = 0;
+
+	for (i = 0; i < DMA_BUFFERS; i++) {
+
+		...
+
+		dma_addr = dma_map_single(dev, addr, size, direction);
+		if (dma_mapping_error(dev, dma_addr)) {
+			/*
+			 * reduce current DMA mapping usage,
+			 * delay and try again later or
+			 * reset driver.
+			 */
+			goto map_error_handling;
+		}
+		array[i].dma_addr = dma_addr;
+		save_index++;
+	}
+
+	...
+
+	map_error_handling:
+
+	for (i = 0; i < save_index; i++) {
+
+		...
+
+		dma_unmap_single(array[i].dma_addr);
 	}
 
 Networking drivers must call dev_kfree_skb to free the socket buffer

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 66bd97a..78a6c56 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt

@@ -678,3 +678,15 @@
 of preallocated entries is defined per architecture. If it is too low for you
 boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
 architectural default.
+
+void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+dma-debug interface debug_dma_mapping_error() to debug drivers that fail
+to check dma mapping errors on addresses returned by dma_map_single() and
+dma_map_page() interfaces. This interface clears a flag set by
+debug_dma_map_page() to indicate that dma_mapping_error() has been called by
+the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
+this flag is still set, prints warning message that includes call trace that
+leads up to the unmap. This interface can be called from dma_mapping_error()
+routines to enable dma mapping error check debugging.
+

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index f503090..e59480d 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt

@@ -91,3 +91,12 @@
 dma_unmap_{single,page,sg} functions family to force buffer to stay in
 device domain after releasing a mapping for it. Use this attribute with
 care!
+
+DMA_ATTR_FORCE_CONTIGUOUS
+-------------------------
+
+By default DMA-mapping subsystem is allowed to assemble the buffer
+allocated by dma_alloc_attrs() function from individual pages if it can
+be mapped as contiguous chunk into device dma address space. By
+specifing this attribute the allocated buffer is forced to be contiguous
+also in physical memory.

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index b030052..4ee2304 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl

@@ -1141,23 +1141,13 @@
             the <methodname>page_flip</methodname> operation will be called with a
             non-NULL <parameter>event</parameter> argument pointing to a
             <structname>drm_pending_vblank_event</structname> instance. Upon page
-            flip completion the driver must fill the
-            <parameter>event</parameter>::<structfield>event</structfield>
-            <structfield>sequence</structfield>, <structfield>tv_sec</structfield>
-            and <structfield>tv_usec</structfield> fields with the associated
-            vertical blanking count and timestamp, add the event to the
-            <parameter>drm_file</parameter> list of events to be signaled, and wake
-            up any waiting process. This can be performed with
+            flip completion the driver must call <methodname>drm_send_vblank_event</methodname>
+            to fill in the event and send to wake up any waiting processes.
+            This can be performed with
             <programlisting><![CDATA[
-            struct timeval now;
-
-            event->event.sequence = drm_vblank_count_and_time(..., &now);
-            event->event.tv_sec = now.tv_sec;
-            event->event.tv_usec = now.tv_usec;
-
             spin_lock_irqsave(&dev->event_lock, flags);
-            list_add_tail(&event->base.link, &event->base.file_priv->event_list);
-            wake_up_interruptible(&event->base.file_priv->event_wait);
+            ...
+            drm_send_vblank_event(dev, pipe, event);
             spin_unlock_irqrestore(&dev->event_lock, flags);
             ]]></programlisting>
           </para>
@@ -1621,10 +1611,10 @@
     </sect2>
   </sect1>
 
-  <!-- Internals: mid-layer helper functions -->
+  <!-- Internals: kms helper functions -->
 
   <sect1>
-    <title>Mid-layer Helper Functions</title>
+    <title>Mode Setting Helper Functions</title>
     <para>
       The CRTC, encoder and connector functions provided by the drivers
       implement the DRM API. They're called by the DRM core and ioctl handlers
@@ -2106,6 +2096,21 @@
         </listitem>
       </itemizedlist>
     </sect2>
+    <sect2>
+      <title>Modeset Helper Functions Reference</title>
+!Edrivers/gpu/drm/drm_crtc_helper.c
+    </sect2>
+    <sect2>
+      <title>fbdev Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_fb_helper.c fbdev helpers
+!Edrivers/gpu/drm/drm_fb_helper.c
+    </sect2>
+    <sect2>
+      <title>Display Port Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_dp_helper.c dp helpers
+!Iinclude/drm/drm_dp_helper.h
+!Edrivers/gpu/drm/drm_dp_helper.c
+    </sect2>
   </sect1>
 
   <!-- Internals: vertical blanking -->

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 00687ee..f75ab4c 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl

@@ -58,6 +58,9 @@
 
      <sect1><title>String Conversions</title>
 !Elib/vsprintf.c
+!Finclude/linux/kernel.h kstrtol
+!Finclude/linux/kernel.h kstrtoul
+!Elib/kstrtox.c
      </sect1>
      <sect1><title>String Manipulation</title>
 <!-- All functions are exported at now

diff --git a/Documentation/aoe/aoe.txt b/Documentation/aoe/aoe.txt
index bfc9cb1..c71487d 100644
--- a/Documentation/aoe/aoe.txt
+++ b/Documentation/aoe/aoe.txt

@@ -125,7 +125,9 @@
   The aoe_deadsecs module parameter determines the maximum number of
   seconds that the driver will wait for an AoE device to provide a
   response to an AoE command.  After aoe_deadsecs seconds have
-  elapsed, the AoE device will be marked as "down".
+  elapsed, the AoE device will be marked as "down".  A value of zero
+  is supported for testing purposes and makes the aoe driver keep
+  trying AoE commands forever.
 
   The aoe_maxout module parameter has a default of 128.  This is the
   maximum number of unresponded packets that will be sent to an AoE

diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
index f5e4caa..1529394 100644
--- a/Documentation/backlight/lp855x-driver.txt
+++ b/Documentation/backlight/lp855x-driver.txt

@@ -35,11 +35,8 @@
 * mode : Brightness control mode. PWM or register based.
 * device_control : Value of DEVICE CONTROL register.
 * initial_brightness : Initial value of backlight brightness.
-* pwm_data : Platform specific pwm generation functions.
+* period_ns : Platform specific PWM period value. unit is nano.
 	     Only valid when brightness is pwm input mode.
-	     Functions should be implemented by PWM driver.
-	     - pwm_set_intensity() : set duty of PWM
-	     - pwm_get_intensity() : get current duty of PWM
 * load_new_rom_data :
 	0 : use default configuration data
 	1 : update values of eeprom or eprom registers on loading driver
@@ -71,8 +68,5 @@
 	.mode = PWM_BASED,
 	.device_control = PWM_CONFIG(LP8556),
 	.initial_brightness = INITIAL_BRT,
-	.pwm_data = {
-		     .pwm_set_intensity = platform_pwm_set_intensity,
-		     .pwm_get_intensity = platform_pwm_get_intensity,
-		     },
+	.period_ns = 1000000,
 };

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index a25cb3f..8b8c28b 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt

@@ -71,6 +71,11 @@
  memory.oom_control		 # set/show oom controls.
  memory.numa_stat		 # show the number of memory usage per numa node
 
+ memory.kmem.limit_in_bytes      # set/show hard limit for kernel memory
+ memory.kmem.usage_in_bytes      # show current kernel memory allocation
+ memory.kmem.failcnt             # show the number of kernel memory usage hits limits
+ memory.kmem.max_usage_in_bytes  # show max kernel memory usage recorded
+
  memory.kmem.tcp.limit_in_bytes  # set/show hard limit for tcp buf memory
  memory.kmem.tcp.usage_in_bytes  # show current tcp buf memory allocation
  memory.kmem.tcp.failcnt            # show the number of tcp buf memory usage hits limits
@@ -268,20 +273,73 @@
 different than user memory, since it can't be swapped out, which makes it
 possible to DoS the system by consuming too much of this precious resource.
 
+Kernel memory won't be accounted at all until limit on a group is set. This
+allows for existing setups to continue working without disruption.  The limit
+cannot be set if the cgroup have children, or if there are already tasks in the
+cgroup. Attempting to set the limit under those conditions will return -EBUSY.
+When use_hierarchy == 1 and a group is accounted, its children will
+automatically be accounted regardless of their limit value.
+
+After a group is first limited, it will be kept being accounted until it
+is removed. The memory limitation itself, can of course be removed by writing
+-1 to memory.kmem.limit_in_bytes. In this case, kmem will be accounted, but not
+limited.
+
 Kernel memory limits are not imposed for the root cgroup. Usage for the root
-cgroup may or may not be accounted.
+cgroup may or may not be accounted. The memory used is accumulated into
+memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
+(currently only for tcp).
+The main "kmem" counter is fed into the main counter, so kmem charges will
+also be visible from the user counter.
 
 Currently no soft limit is implemented for kernel memory. It is future work
 to trigger slab reclaim when those limits are reached.
 
 2.7.1 Current Kernel Memory resources accounted
 
+* stack pages: every process consumes some stack pages. By accounting into
+kernel memory, we prevent new processes from being created when the kernel
+memory usage is too high.
+
+* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy
+of each kmem_cache is created everytime the cache is touched by the first time
+from inside the memcg. The creation is done lazily, so some objects can still be
+skipped while the cache is being created. All objects in a slab page should
+belong to the same memcg. This only fails to hold when a task is migrated to a
+different memcg during the page allocation by the cache.
+
 * sockets memory pressure: some sockets protocols have memory pressure
 thresholds. The Memory Controller allows them to be controlled individually
 per cgroup, instead of globally.
 
 * tcp memory pressure: sockets memory pressure for the tcp protocol.
 
+2.7.3 Common use cases
+
+Because the "kmem" counter is fed to the main user counter, kernel memory can
+never be limited completely independently of user memory. Say "U" is the user
+limit, and "K" the kernel limit. There are three possible ways limits can be
+set:
+
+    U != 0, K = unlimited:
+    This is the standard memcg limitation mechanism already present before kmem
+    accounting. Kernel memory is completely ignored.
+
+    U != 0, K < U:
+    Kernel memory is a subset of the user memory. This setup is useful in
+    deployments where the total amount of memory per-cgroup is overcommited.
+    Overcommiting kernel memory limits is definitely not recommended, since the
+    box can still run out of non-reclaimable memory.
+    In this case, the admin could set up K so that the sum of all groups is
+    never greater than the total memory, and freely set U at the cost of his
+    QoS.
+
+    U != 0, K >= U:
+    Since kmem charges will also be fed to the user counter and reclaim will be
+    triggered for the cgroup for both kinds of memory. This setup gives the
+    admin a unified view of memory, and it is also useful for people who just
+    want to track kernel memory usage.
+
 3. User Interface
 
 0. Configuration
@@ -290,6 +348,7 @@
 b. Enable CONFIG_RESOURCE_COUNTERS
 c. Enable CONFIG_MEMCG
 d. Enable CONFIG_MEMCG_SWAP (to use swap extension)
+d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
 
 1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
 # mount -t tmpfs none /sys/fs/cgroup
@@ -406,6 +465,11 @@
   Because rmdir() moves all pages to parent, some out-of-use page caches can be
   moved to the parent. If you want to avoid that, force_empty will be useful.
 
+  Also, note that when memory.kmem.limit_in_bytes is set the charges due to
+  kernel pages will still be seen. This is not considered a failure and the
+  write will still return success. In this case, it is expected that
+  memory.kmem.usage_in_bytes == memory.usage_in_bytes.
+
   About use_hierarchy, see Section 6.
 
 5.2 stat file

diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index 0c4a344..c4d99ed 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt

@@ -83,16 +83,17 @@
 	res_counter->lock internally (it must be called with res_counter->lock
 	held). The force parameter indicates whether we can bypass the limit.
 
- e. void res_counter_uncharge[_locked]
+ e. u64 res_counter_uncharge[_locked]
 			(struct res_counter *rc, unsigned long val)
 
 	When a resource is released (freed) it should be de-accounted
 	from the resource counter it was accounted to.  This is called
-	"uncharging".
+	"uncharging". The return value of this function indicate the amount
+	of charges still present in the counter.
 
 	The _locked routines imply that the res_counter->lock is taken.
 
- f. void res_counter_uncharge_until
+ f. u64 res_counter_uncharge_until
 		(struct res_counter *rc, struct res_counter *top,
 		 unsinged long val)
 

diff --git a/Documentation/devicetree/bindings/arm/davinci/nand.txt b/Documentation/devicetree/bindings/arm/davinci/nand.txt
index 49fc7ad..3545ea7 100644
--- a/Documentation/devicetree/bindings/arm/davinci/nand.txt
+++ b/Documentation/devicetree/bindings/arm/davinci/nand.txt

@@ -23,6 +23,9 @@
 - ti,davinci-nand-buswidth: buswidth 8 or 16
 - ti,davinci-nand-use-bbt: use flash based bad block table support.
 
+nand device bindings may contain additional sub-nodes describing
+partitions of the address space. See partition.txt for more detail.
+
 Example(da850 EVM ):
 nand_cs3@62000000 {
 	compatible = "ti,davinci-nand";
@@ -35,4 +38,9 @@
 	ti,davinci-ecc-mode = "hw";
 	ti,davinci-ecc-bits = <4>;
 	ti,davinci-nand-use-bbt;
+
+	partition@180000 {
+		label = "ubifs";
+		reg = <0x180000 0x7e80000>;
+	};
 };

diff --git a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
new file mode 100644
index 0000000..b4fa934
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt

@@ -0,0 +1,191 @@
+NVIDIA Tegra host1x
+
+Required properties:
+- compatible: "nvidia,tegra<chip>-host1x"
+- reg: Physical base address and length of the controller's registers.
+- interrupts: The interrupt outputs from the controller.
+- #address-cells: The number of cells used to represent physical base addresses
+  in the host1x address space. Should be 1.
+- #size-cells: The number of cells used to represent the size of an address
+  range in the host1x address space. Should be 1.
+- ranges: The mapping of the host1x address space to the CPU address space.
+
+The host1x top-level node defines a number of children, each representing one
+of the following host1x client modules:
+
+- mpe: video encoder
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-mpe"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+
+- vi: video input
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-vi"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+
+- epp: encoder pre-processor
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-epp"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+
+- isp: image signal processor
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-isp"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+
+- gr2d: 2D graphics engine
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-gr2d"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+
+- gr3d: 3D graphics engine
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-gr3d"
+  - reg: Physical base address and length of the controller's registers.
+
+- dc: display controller
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-dc"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+
+  Each display controller node has a child node, named "rgb", that represents
+  the RGB output associated with the controller. It can take the following
+  optional properties:
+  - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+  - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
+  - nvidia,edid: supplies a binary EDID blob
+
+- hdmi: High Definition Multimedia Interface
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-hdmi"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+  - vdd-supply: regulator for supply voltage
+  - pll-supply: regulator for PLL
+
+  Optional properties:
+  - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+  - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
+  - nvidia,edid: supplies a binary EDID blob
+
+- tvo: TV encoder output
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-tvo"
+  - reg: Physical base address and length of the controller's registers.
+  - interrupts: The interrupt outputs from the controller.
+
+- dsi: display serial interface
+
+  Required properties:
+  - compatible: "nvidia,tegra<chip>-dsi"
+  - reg: Physical base address and length of the controller's registers.
+
+Example:
+
+/ {
+	...
+
+	host1x {
+		compatible = "nvidia,tegra20-host1x", "simple-bus";
+		reg = <0x50000000 0x00024000>;
+		interrupts = <0 65 0x04   /* mpcore syncpt */
+			      0 67 0x04>; /* mpcore general */
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		ranges = <0x54000000 0x54000000 0x04000000>;
+
+		mpe {
+			compatible = "nvidia,tegra20-mpe";
+			reg = <0x54040000 0x00040000>;
+			interrupts = <0 68 0x04>;
+		};
+
+		vi {
+			compatible = "nvidia,tegra20-vi";
+			reg = <0x54080000 0x00040000>;
+			interrupts = <0 69 0x04>;
+		};
+
+		epp {
+			compatible = "nvidia,tegra20-epp";
+			reg = <0x540c0000 0x00040000>;
+			interrupts = <0 70 0x04>;
+		};
+
+		isp {
+			compatible = "nvidia,tegra20-isp";
+			reg = <0x54100000 0x00040000>;
+			interrupts = <0 71 0x04>;
+		};
+
+		gr2d {
+			compatible = "nvidia,tegra20-gr2d";
+			reg = <0x54140000 0x00040000>;
+			interrupts = <0 72 0x04>;
+		};
+
+		gr3d {
+			compatible = "nvidia,tegra20-gr3d";
+			reg = <0x54180000 0x00040000>;
+		};
+
+		dc@54200000 {
+			compatible = "nvidia,tegra20-dc";
+			reg = <0x54200000 0x00040000>;
+			interrupts = <0 73 0x04>;
+
+			rgb {
+				status = "disabled";
+			};
+		};
+
+		dc@54240000 {
+			compatible = "nvidia,tegra20-dc";
+			reg = <0x54240000 0x00040000>;
+			interrupts = <0 74 0x04>;
+
+			rgb {
+				status = "disabled";
+			};
+		};
+
+		hdmi {
+			compatible = "nvidia,tegra20-hdmi";
+			reg = <0x54280000 0x00040000>;
+			interrupts = <0 75 0x04>;
+			status = "disabled";
+		};
+
+		tvo {
+			compatible = "nvidia,tegra20-tvo";
+			reg = <0x542c0000 0x00040000>;
+			interrupts = <0 76 0x04>;
+			status = "disabled";
+		};
+
+		dsi {
+			compatible = "nvidia,tegra20-dsi";
+			reg = <0x54300000 0x00040000>;
+			status = "disabled";
+		};
+	};
+
+	...
+};

diff --git a/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
new file mode 100644
index 0000000..8ce9cd2
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt

@@ -0,0 +1,27 @@
+Device tree bindings for i2c-cbus-gpio driver
+
+Required properties:
+	- compatible = "i2c-cbus-gpio";
+	- gpios: clk, dat, sel
+	- #address-cells = <1>;
+	- #size-cells = <0>;
+
+Optional properties:
+	- child nodes conforming to i2c bus binding
+
+Example:
+
+i2c@0 {
+	compatible = "i2c-cbus-gpio";
+	gpios = <&gpio 66 0 /* clk */
+		 &gpio 65 0 /* dat */
+		 &gpio 64 0 /* sel */
+		>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	retu-mfd: retu@1 {
+		compatible = "retu-mfd";
+		reg = <0x1>;
+	};
+};

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt
new file mode 100644
index 0000000..66709a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt

@@ -0,0 +1,81 @@
+GPIO-based I2C Bus Mux
+
+This binding describes an I2C bus multiplexer that uses GPIOs to
+route the I2C signals.
+
+                                  +-----+  +-----+
+                                  | dev |  | dev |
+    +------------+                +-----+  +-----+
+    | SoC        |                   |        |
+    |            |          /--------+--------+
+    |   +------+ |  +------+    child bus A, on GPIO value set to 0
+    |   | I2C  |-|--| Mux  |
+    |   +------+ |  +--+---+    child bus B, on GPIO value set to 1
+    |            |     |    \----------+--------+--------+
+    |   +------+ |     |               |        |        |
+    |   | GPIO |-|-----+            +-----+  +-----+  +-----+
+    |   +------+ |                  | dev |  | dev |  | dev |
+    +------------+                  +-----+  +-----+  +-----+
+
+Required properties:
+- compatible: i2c-mux-gpio
+- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
+  port is connected to.
+- mux-gpios: list of gpios used to control the muxer
+* Standard I2C mux properties. See mux.txt in this directory.
+* I2C child bus nodes. See mux.txt in this directory.
+
+Optional properties:
+- idle-state: value to set the muxer to when idle. When no value is
+  given, it defaults to the last value used.
+
+For each i2c child node, an I2C child bus will be created. They will
+be numbered based on their order in the device tree.
+
+Whenever an access is made to a device on a child bus, the value set
+in the revelant node's reg property will be output using the list of
+GPIOs, the first in the list holding the least-significant value.
+
+If an idle state is defined, using the idle-state (optional) property,
+whenever an access is not being made to a device on a child bus, the
+GPIOs will be set according to the idle value.
+
+If an idle state is not defined, the most recently used value will be
+left programmed into hardware whenever no access is being made to a
+device on a child bus.
+
+Example:
+	i2cmux {
+		compatible = "i2c-mux-gpio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		mux-gpios = <&gpio1 22 0 &gpio1 23 0>;
+		i2c-parent = <&i2c1>;
+
+		i2c@1 {
+			reg = <1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			ssd1307: oled@3c {
+				compatible = "solomon,ssd1307fb-i2c";
+				reg = <0x3c>;
+				pwms = <&pwm 4 3000>;
+				reset-gpios = <&gpio2 7 1>;
+				reset-active-low;
+			};
+		};
+
+		i2c@3 {
+			reg = <3>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			pca9555: pca9555@20 {
+				compatible = "nxp,pca9555";
+				gpio-controller;
+				#gpio-cells = <2>;
+				reg = <0x20>;
+			};
+		};
+	};

diff --git a/Documentation/devicetree/bindings/i2c/i2c-ocores.txt b/Documentation/devicetree/bindings/i2c/i2c-ocores.txt
index c15781f..1637c29 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-ocores.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-ocores.txt

@@ -1,7 +1,7 @@
 Device tree configuration for i2c-ocores
 
 Required properties:
-- compatible      : "opencores,i2c-ocores"
+- compatible      : "opencores,i2c-ocores" or "aeroflexgaisler,i2cmst"
 - reg             : bus address start and address range size of device
 - interrupts      : interrupt number
 - clock-frequency : frequency of bus clock in Hz

diff --git a/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt
index b6cb5a1..e9611ac 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt

@@ -13,11 +13,17 @@
   - interrupts: interrupt number to the cpu.
   - samsung,i2c-sda-delay: Delay (in ns) applied to data line (SDA) edges.
 
+Required for all cases except "samsung,s3c2440-hdmiphy-i2c":
+  - Samsung GPIO variant (deprecated):
+    - gpios: The order of the gpios should be the following: <SDA, SCL>.
+      The gpio specifier depends on the gpio controller. Required in all
+      cases except for "samsung,s3c2440-hdmiphy-i2c" whose input/output
+      lines are permanently wired to the respective clienta
+  - Pinctrl variant (preferred, if available):
+    - pinctrl-0: Pin control group to be used for this controller.
+    - pinctrl-names: Should contain only one value - "default".
+
 Optional properties:
-  - gpios: The order of the gpios should be the following: <SDA, SCL>.
-    The gpio specifier depends on the gpio controller. Required in all
-    cases except for "samsung,s3c2440-hdmiphy-i2c" whose input/output
-    lines are permanently wired to the respective client
   - samsung,i2c-slave-addr: Slave address in multi-master enviroment. If not
     specified, default value is 0.
   - samsung,i2c-max-bus-freq: Desired frequency in Hz of the bus. If not
@@ -31,8 +37,14 @@
 		interrupts = <345>;
 		samsung,i2c-sda-delay = <100>;
 		samsung,i2c-max-bus-freq = <100000>;
+		/* Samsung GPIO variant begins here */
 		gpios = <&gpd1 2 0 /* SDA */
 			 &gpd1 3 0 /* SCL */>;
+		/* Samsung GPIO variant ends here */
+		/* Pinctrl variant begins here */
+		pinctrl-0 = <&i2c3_bus>;
+		pinctrl-names = "default";
+		/* Pinctrl variant ends here */
 		#address-cells = <1>;
 		#size-cells = <0>;
 

diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt
new file mode 100644
index 0000000..ead641c
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt

@@ -0,0 +1,46 @@
+* GPIO driven matrix keypad device tree bindings
+
+GPIO driven matrix keypad is used to interface a SoC with a matrix keypad.
+The matrix keypad supports multiple row and column lines, a key can be
+placed at each intersection of a unique row and a unique column. The matrix
+keypad can sense a key-press and key-release by means of GPIO lines and
+report the event using GPIO interrupts to the cpu.
+
+Required Properties:
+- compatible:		Should be "gpio-matrix-keypad"
+- row-gpios:		List of gpios used as row lines. The gpio specifier
+			for this property depends on the gpio controller to
+			which these row lines are connected.
+- col-gpios:		List of gpios used as column lines. The gpio specifier
+			for this property depends on the gpio controller to
+			which these column lines are connected.
+- linux,keymap:		The definition can be found at
+			bindings/input/matrix-keymap.txt
+
+Optional Properties:
+- linux,no-autorepeat:	do no enable autorepeat feature.
+- linux,wakeup:		use any event on keypad as wakeup event.
+- debounce-delay-ms:	debounce interval in milliseconds
+- col-scan-delay-us:	delay, measured in microseconds, that is needed
+			before we can scan keypad after activating column gpio
+
+Example:
+	matrix-keypad {
+		compatible = "gpio-matrix-keypad";
+		debounce-delay-ms = <5>;
+		col-scan-delay-us = <2>;
+
+		row-gpios = <&gpio2 25 0
+			     &gpio2 26 0
+			     &gpio2 27 0>;
+
+		col-gpios = <&gpio2 21 0
+			     &gpio2 22 0>;
+
+		linux,keymap = <0x0000008B
+				0x0100009E
+				0x02000069
+				0x0001006A
+				0x0101001C
+				0x0201006C>;
+	};

diff --git a/Documentation/devicetree/bindings/input/pwm-beeper.txt b/Documentation/devicetree/bindings/input/pwm-beeper.txt
new file mode 100644
index 0000000..be332ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/pwm-beeper.txt

@@ -0,0 +1,7 @@
+* PWM beeper device tree bindings
+
+Registers a PWM device as beeper.
+
+Required properties:
+- compatible: should be "pwm-beeper"
+- pwms: phandle to the physical PWM device

diff --git a/Documentation/devicetree/bindings/input/stmpe-keypad.txt b/Documentation/devicetree/bindings/input/stmpe-keypad.txt
new file mode 100644
index 0000000..1b97222
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/stmpe-keypad.txt

@@ -0,0 +1,39 @@
+* STMPE Keypad
+
+Required properties:
+ - compatible               : "st,stmpe-keypad"
+ - linux,keymap             : See ./matrix-keymap.txt
+
+Optional properties:
+ - debounce-interval        : Debouncing interval time in milliseconds
+ - st,scan-count            : Scanning cycles elapsed before key data is updated
+ - st,no-autorepeat         : If specified device will not autorepeat
+
+Example:
+
+	stmpe_keypad {
+		compatible = "st,stmpe-keypad";
+
+		debounce-interval = <64>;
+		st,scan-count = <8>;
+		st,no-autorepeat;
+
+		linux,keymap = <0x205006b
+				0x4010074
+				0x3050072
+				0x1030004
+				0x502006a
+				0x500000a
+				0x5008b
+				0x706001c
+				0x405000b
+				0x6070003
+				0x3040067
+				0x303006c
+				0x60400e7
+				0x602009e
+				0x4020073
+				0x5050002
+				0x4030069
+				0x3020008>;
+	};

diff --git a/Documentation/devicetree/bindings/input/tca8418_keypad.txt b/Documentation/devicetree/bindings/input/tca8418_keypad.txt
new file mode 100644
index 0000000..2a1538f
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/tca8418_keypad.txt

@@ -0,0 +1,8 @@
+
+Required properties:
+- compatible: "ti,tca8418"
+- reg: the I2C address
+- interrupts: IRQ line number, should trigger on falling edge
+- keypad,num-rows: The number of rows
+- keypad,num-columns: The number of columns
+- linux,keymap: Keys definitions, see keypad-matrix.

diff --git a/Documentation/devicetree/bindings/input/touchscreen/mms114.txt b/Documentation/devicetree/bindings/input/touchscreen/mms114.txt
new file mode 100644
index 0000000..89d4c56
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/mms114.txt

@@ -0,0 +1,34 @@
+* MELFAS MMS114 touchscreen controller
+
+Required properties:
+- compatible: must be "melfas,mms114"
+- reg: I2C address of the chip
+- interrupts: interrupt to which the chip is connected
+- x-size: horizontal resolution of touchscreen
+- y-size: vertical resolution of touchscreen
+
+Optional properties:
+- contact-threshold:
+- moving-threshold:
+- x-invert: invert X axis
+- y-invert: invert Y axis
+
+Example:
+
+	i2c@00000000 {
+		/* ... */
+
+		touchscreen@48 {
+			compatible = "melfas,mms114";
+			reg = <0x48>;
+			interrupts = <39 0>;
+			x-size = <720>;
+			y-size = <1280>;
+			contact-threshold = <10>;
+			moving-threshold = <10>;
+			x-invert;
+			y-invert;
+		};
+
+		/* ... */
+	};

diff --git a/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt b/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt
new file mode 100644
index 0000000..127baa3
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt

@@ -0,0 +1,43 @@
+STMPE Touchscreen
+----------------
+
+Required properties:
+ - compatible: "st,stmpe-ts"
+
+Optional properties:
+- st,sample-time: ADC converstion time in number of clock.  (0 -> 36 clocks, 1 ->
+  44 clocks, 2 -> 56 clocks, 3 -> 64 clocks, 4 -> 80 clocks, 5 -> 96 clocks, 6
+  -> 144 clocks), recommended is 4.
+- st,mod-12b: ADC Bit mode (0 -> 10bit ADC, 1 -> 12bit ADC)
+- st,ref-sel: ADC reference source (0 -> internal reference, 1 -> external
+  reference)
+- st,adc-freq: ADC Clock speed (0 -> 1.625 MHz, 1 -> 3.25 MHz, 2 || 3 -> 6.5 MHz)
+- st,ave-ctrl: Sample average control (0 -> 1 sample, 1 -> 2 samples, 2 -> 4
+  samples, 3 -> 8 samples)
+- st,touch-det-delay: Touch detect interrupt delay (0 -> 10 us, 1 -> 50 us, 2 ->
+  100 us, 3 -> 500 us, 4-> 1 ms, 5 -> 5 ms, 6 -> 10 ms, 7 -> 50 ms) recommended
+  is 3
+- st,settling: Panel driver settling time (0 -> 10 us, 1 -> 100 us, 2 -> 500 us, 3
+  -> 1 ms, 4 -> 5 ms, 5 -> 10 ms, 6 for 50 ms, 7 -> 100 ms) recommended is 2
+- st,fraction-z: Length of the fractional part in z (fraction-z ([0..7]) = Count of
+  the fractional part) recommended is 7
+- st,i-drive: current limit value of the touchscreen drivers (0 -> 20 mA typical 35
+  mA max, 1 -> 50 mA typical 80 mA max)
+
+Node name must be stmpe_touchscreen and should be child node of stmpe node to
+which it belongs.
+
+Example:
+
+	stmpe_touchscreen {
+		compatible = "st,stmpe-ts";
+		st,sample-time = <4>;
+		st,mod-12b = <1>;
+		st,ref-sel = <0>;
+		st,adc-freq = <1>;
+		st,ave-ctrl = <1>;
+		st,touch-det-delay = <2>;
+		st,settling = <2>;
+		st,fraction-z = <7>;
+		st,i-drive = <1>;
+	};

diff --git a/Documentation/devicetree/bindings/mfd/stmpe.txt b/Documentation/devicetree/bindings/mfd/stmpe.txt
new file mode 100644
index 0000000..56edb552
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/stmpe.txt

@@ -0,0 +1,28 @@
+* ST Microelectronics STMPE Multi-Functional Device
+
+STMPE is an MFD device which may expose the following inbuilt devices: gpio,
+keypad, touchscreen, adc, pwm, rotator.
+
+Required properties:
+ - compatible                   : "st,stmpe[610|801|811|1601|2401|2403]"
+ - reg                          : I2C/SPI address of the device
+
+Optional properties:
+ - interrupts                   : The interrupt outputs from the controller
+ - interrupt-controller         : Marks the device node as an interrupt controller
+ - interrupt-parent             : Specifies which IRQ controller we're connected to
+ - wakeup-source                : Marks the input device as wakable
+ - st,autosleep-timeout         : Valid entries (ms); 4, 16, 32, 64, 128, 256, 512 and 1024
+
+Example:
+
+	stmpe1601: stmpe1601@40 {
+		compatible = "st,stmpe1601";
+		reg = <0x40>;
+		interrupts = <26 0x4>;
+		interrupt-parent = <&gpio6>;
+		interrupt-controller;
+
+		wakeup-source;
+		st,autosleep-timeout = <1024>;
+	};

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
new file mode 100644
index 0000000..b04d03a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt

@@ -0,0 +1,23 @@
+* Denali NAND controller
+
+Required properties:
+  - compatible : should be "denali,denali-nand-dt"
+  - reg : should contain registers location and length for data and reg.
+  - reg-names: Should contain the reg names "nand_data" and "denali_reg"
+  - interrupts : The interrupt number.
+  - dm-mask : DMA bit mask
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Examples:
+
+nand: nand@ff900000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "denali,denali-nand-dt";
+	reg = <0xff900000 0x100000>, <0xffb80000 0x10000>;
+	reg-names = "nand_data", "denali_reg";
+	interrupts = <0 144 4>;
+	dma-mask = <0xffffffff>;
+};

diff --git a/Documentation/devicetree/bindings/mtd/flctl-nand.txt b/Documentation/devicetree/bindings/mtd/flctl-nand.txt
new file mode 100644
index 0000000..427f46d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/flctl-nand.txt

@@ -0,0 +1,49 @@
+FLCTL NAND controller
+
+Required properties:
+- compatible : "renesas,shmobile-flctl-sh7372"
+- reg : Address range of the FLCTL
+- interrupts : flste IRQ number
+- nand-bus-width : bus width to NAND chip
+
+Optional properties:
+- dmas: DMA specifier(s)
+- dma-names: name for each DMA specifier. Valid names are
+	     "data_tx", "data_rx", "ecc_tx", "ecc_rx"
+
+The DMA fields are not used yet in the driver but are listed here for
+completing the bindings.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Example:
+
+	flctl@e6a30000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "renesas,shmobile-flctl-sh7372";
+		reg = <0xe6a30000 0x100>;
+		interrupts = <0x0d80>;
+
+		nand-bus-width = <16>;
+
+		dmas = <&dmac 1 /* data_tx */
+			&dmac 2;> /* data_rx */
+		dma-names = "data_tx", "data_rx";
+
+		system@0 {
+			label = "system";
+			reg = <0x0 0x8000000>;
+		};
+
+		userdata@8000000 {
+			label = "userdata";
+			reg = <0x8000000 0x10000000>;
+		};
+
+		cache@18000000 {
+			label = "cache";
+			reg = <0x18000000 0x8000000>;
+		};
+	};

diff --git a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
index e2c663b..e3ea32e 100644
--- a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt

@@ -3,9 +3,7 @@
 Required properties:
 - compatible : "st,spear600-fsmc-nand"
 - reg : Address range of the mtd chip
-- reg-names: Should contain the reg names "fsmc_regs" and "nand_data"
-- st,ale-off : Chip specific offset to ALE
-- st,cle-off : Chip specific offset to CLE
+- reg-names: Should contain the reg names "fsmc_regs", "nand_data", "nand_addr" and "nand_cmd"
 
 Optional properties:
 - bank-width : Width (in bytes) of the device.  If not present, the width
@@ -19,10 +17,10 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0xd1800000 0x1000	/* FSMC Register */
-		       0xd2000000 0x4000>;	/* NAND Base */
-		reg-names = "fsmc_regs", "nand_data";
-		st,ale-off = <0x20000>;
-		st,cle-off = <0x10000>;
+		       0xd2000000 0x0010	/* NAND Base DATA */
+		       0xd2020000 0x0010	/* NAND Base ADDR */
+		       0xd2010000 0x0010>;	/* NAND Base CMD */
+		reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 
 		bank-width = <1>;
 		nand-skip-bbtscan;

diff --git a/Documentation/devicetree/bindings/mtd/m25p80.txt b/Documentation/devicetree/bindings/mtd/m25p80.txt
new file mode 100644
index 0000000..6d3d576
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/m25p80.txt

@@ -0,0 +1,29 @@
+* MTD SPI driver for ST M25Pxx (and similar) serial flash chips
+
+Required properties:
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+  representing partitions.
+- compatible : Should be the manufacturer and the name of the chip. Bear in mind
+               the DT binding is not Linux-only, but in case of Linux, see the
+               "m25p_ids" table in drivers/mtd/devices/m25p80.c for the list of
+               supported chips.
+- reg : Chip-Select number
+- spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
+
+Optional properties:
+- m25p,fast-read : Use the "fast read" opcode to read data from the chip instead
+                   of the usual "read" opcode. This opcode is not supported by
+                   all chips and support for it can not be detected at runtime.
+                   Refer to your chips' datasheet to check if this is supported
+                   by your chip.
+
+Example:
+
+	flash: m25p80@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "spansion,m25p80";
+		reg = <0>;
+		spi-max-frequency = <40000000>;
+		m25p,fast-read;
+	};

diff --git a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
index 94de19b..dab7847 100644
--- a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
+++ b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt

@@ -23,6 +23,9 @@
    unaligned accesses as implemented in the JFFS2 code via memcpy().
    By defining "no-unaligned-direct-access", the flash will not be
    exposed directly to the MTD users (e.g. JFFS2) any more.
+ - linux,mtd-name: allow to specify the mtd name for retro capability with
+   physmap-flash drivers as boot loader pass the mtd partition via the old
+   device name physmap-flash.
 
 For JEDEC compatible devices, the following additional properties
 are defined:

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/raideng.txt b/Documentation/devicetree/bindings/powerpc/fsl/raideng.txt
new file mode 100644
index 0000000..4ad29b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/raideng.txt

@@ -0,0 +1,81 @@
+* Freescale 85xx RAID Engine nodes
+
+RAID Engine nodes are defined to describe on-chip RAID accelerators.  Each RAID
+Engine should have a separate node.
+
+Supported chips:
+P5020, P5040
+
+Required properties:
+
+- compatible:	Should contain "fsl,raideng-v1.0" as the value
+		This identifies RAID Engine block. 1 in 1.0 represents
+		major number whereas 0 represents minor number. The
+		version matches the hardware IP version.
+- reg:		offset and length of the register set for the device
+- ranges:	standard ranges property specifying the translation
+		between child address space and parent address space
+
+Example:
+	/* P5020 */
+	raideng: raideng@320000 {
+		compatible = "fsl,raideng-v1.0";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg	= <0x320000 0x10000>;
+		ranges	= <0 0x320000 0x10000>;
+	};
+
+
+There must be a sub-node for each job queue present in RAID Engine
+This node must be a sub-node of the main RAID Engine node
+
+- compatible:	Should contain "fsl,raideng-v1.0-job-queue" as the value
+		This identifies the job queue interface
+- reg:		offset and length of the register set for job queue
+- ranges:	standard ranges property specifying the translation
+		between child address space and parent address space
+
+Example:
+	/* P5020 */
+	raideng_jq0@1000 {
+		compatible = "fsl,raideng-v1.0-job-queue";
+		reg	   = <0x1000 0x1000>;
+		ranges	   = <0x0 0x1000 0x1000>;
+	};
+
+
+There must be a sub-node for each job ring present in RAID Engine
+This node must be a sub-node of job queue node
+
+- compatible:	Must contain "fsl,raideng-v1.0-job-ring" as the value
+		This identifies job ring. Should contain either
+		"fsl,raideng-v1.0-hp-ring" or "fsl,raideng-v1.0-lp-ring"
+		depending upon whether ring has high or low priority
+- reg:		offset and length of the register set for job ring
+- interrupts:	interrupt mapping for job ring IRQ
+
+Optional property:
+
+- fsl,liodn:	Specifies the LIODN to be used for Job Ring. This
+		property is normally set by firmware. Value
+		is of 12-bits which is the LIODN number for this JR.
+		This property is used by the IOMMU (PAMU) to distinquish
+		transactions from this JR and than be able to do address
+		translation & protection accordingly.
+
+Example:
+	/* P5020 */
+	raideng_jq0@1000 {
+		compatible = "fsl,raideng-v1.0-job-queue";
+		reg	   = <0x1000 0x1000>;
+		ranges	   = <0x0 0x1000 0x1000>;
+
+		raideng_jr0: jr@0 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+			reg	   = <0x0 0x400>;
+			interrupts = <139 2 0 0>;
+			interrupt-parent = <&mpic>;
+			fsl,liodn = <0x41>;
+		};
+	};

diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
new file mode 100644
index 0000000..131e8c11
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt

@@ -0,0 +1,23 @@
+TI SOC ECAP based APWM controller
+
+Required properties:
+- compatible: Must be "ti,am33xx-ecap"
+- #pwm-cells: Should be 3. Number of cells being used to specify PWM property.
+  First cell specifies the per-chip index of the PWM to use, the second
+  cell is the period in nanoseconds and bit 0 in the third cell is used to
+  encode the polarity of PWM output. Set bit 0 of the third in PWM specifier
+  to 1 for inverse polarity & set to 0 for normal polarity.
+- reg: physical base address and size of the registers map.
+
+Optional properties:
+- ti,hwmods: Name of the hwmod associated to the ECAP:
+  "ecap<x>", <x> being the 0-based instance number from the HW spec
+
+Example:
+
+ecap0: ecap@0 {
+	compatible = "ti,am33xx-ecap";
+	#pwm-cells = <3>;
+	reg = <0x48300100 0x80>;
+	ti,hwmods = "ecap0";
+};

diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
new file mode 100644
index 0000000..4fc7079
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt

@@ -0,0 +1,23 @@
+TI SOC EHRPWM based PWM controller
+
+Required properties:
+- compatible : Must be "ti,am33xx-ehrpwm"
+- #pwm-cells: Should be 3. Number of cells being used to specify PWM property.
+  First cell specifies the per-chip index of the PWM to use, the second
+  cell is the period in nanoseconds and bit 0 in the third cell is used to
+  encode the polarity of PWM output. Set bit 0 of the third in PWM specifier
+  to 1 for inverse polarity & set to 0 for normal polarity.
+- reg: physical base address and size of the registers map.
+
+Optional properties:
+- ti,hwmods: Name of the hwmod associated to the EHRPWM:
+  "ehrpwm<x>", <x> being the 0-based instance number from the HW spec
+
+Example:
+
+ehrpwm0: ehrpwm@0 {
+	compatible = "ti,am33xx-ehrpwm";
+	#pwm-cells = <3>;
+	reg = <0x48300200 0x100>;
+	ti,hwmods = "ehrpwm0";
+};

diff --git a/Documentation/devicetree/bindings/pwm/pwm-tipwmss.txt b/Documentation/devicetree/bindings/pwm/pwm-tipwmss.txt
new file mode 100644
index 0000000..f7eae77
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-tipwmss.txt

@@ -0,0 +1,31 @@
+TI SOC based PWM Subsystem
+
+Required properties:
+- compatible: Must be "ti,am33xx-pwmss";
+- reg: physical base address and size of the registers map.
+- address-cells: Specify the number of u32 entries needed in child nodes.
+		  Should set to 1.
+- size-cells: specify number of u32 entries needed to specify child nodes size
+		in reg property. Should set to 1.
+- ranges: describes the address mapping of a memory-mapped bus. Should set to
+	   physical address map of child's base address, physical address within
+	   parent's address  space and length of the address map. For am33xx,
+	   3 set of child register maps present, ECAP register space, EQEP
+	   register space, EHRPWM register space.
+
+Also child nodes should also populated under PWMSS DT node.
+
+Example:
+pwmss0: pwmss@48300000 {
+	compatible = "ti,am33xx-pwmss";
+	reg = <0x48300000 0x10>;
+	ti,hwmods = "epwmss0";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	status = "disabled";
+	ranges = <0x48300100 0x48300100 0x80   /* ECAP */
+		  0x48300180 0x48300180 0x80   /* EQEP */
+		  0x48300200 0x48300200 0x80>; /* EHRPWM */
+
+	/* child nodes go here */
+};

diff --git a/Documentation/devicetree/bindings/pwm/pwm.txt b/Documentation/devicetree/bindings/pwm/pwm.txt
index 73ec962..06e6724 100644
--- a/Documentation/devicetree/bindings/pwm/pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm.txt

@@ -37,10 +37,21 @@
 		pwm-names = "backlight";
 	};
 
+Note that in the example above, specifying the "pwm-names" is redundant
+because the name "backlight" would be used as fallback anyway.
+
 pwm-specifier typically encodes the chip-relative PWM number and the PWM
-period in nanoseconds. Note that in the example above, specifying the
-"pwm-names" is redundant because the name "backlight" would be used as
-fallback anyway.
+period in nanoseconds.
+
+Optionally, the pwm-specifier can encode a number of flags in a third cell:
+- bit 0: PWM signal polarity (0: normal polarity, 1: inverse polarity)
+
+Example with optional PWM specifier for inverse polarity
+
+	bl: backlight {
+		pwms = <&pwm 0 5000000 1>;
+		pwm-names = "backlight";
+	};
 
 2) PWM controller nodes
 -----------------------

diff --git a/Documentation/devicetree/bindings/pwm/spear-pwm.txt b/Documentation/devicetree/bindings/pwm/spear-pwm.txt
new file mode 100644
index 0000000..3ac779d
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/spear-pwm.txt

@@ -0,0 +1,18 @@
+== ST SPEAr SoC PWM controller ==
+
+Required properties:
+- compatible: should be one of:
+  - "st,spear320-pwm"
+  - "st,spear1340-pwm"
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: number of cells used to specify PWM which is fixed to 2 on
+  SPEAr. The first cell specifies the per-chip index of the PWM to use and
+  the second cell is the period in nanoseconds.
+
+Example:
+
+        pwm: pwm@a8000000 {
+            compatible ="st,spear320-pwm";
+            reg = <0xa8000000 0x1000>;
+            #pwm-cells = <2>;
+        };

diff --git a/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt b/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt
new file mode 100644
index 0000000..2943ee5
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt

@@ -0,0 +1,17 @@
+Texas Instruments TWL series PWM drivers
+
+Supported PWMs:
+On TWL4030 series: PWM1 and PWM2
+On TWL6030 series: PWM0 and PWM1
+
+Required properties:
+- compatible: "ti,twl4030-pwm" or "ti,twl6030-pwm"
+- #pwm-cells: should be 2.  The first cell specifies the per-chip index
+  of the PWM to use and the second cell is the period in nanoseconds.
+
+Example:
+
+twl_pwm: pwm {
+	compatible = "ti,twl6030-pwm";
+	#pwm-cells = <2>;
+};

diff --git a/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt b/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt
new file mode 100644
index 0000000..cb64f3a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt

@@ -0,0 +1,17 @@
+Texas Instruments TWL series PWM drivers connected to LED terminals
+
+Supported PWMs:
+On TWL4030 series: PWMA and PWMB (connected to LEDA and LEDB terminals)
+On TWL6030 series: LED PWM (mainly used as charging indicator LED)
+
+Required properties:
+- compatible: "ti,twl4030-pwmled" or "ti,twl6030-pwmled"
+- #pwm-cells: should be 2.  The first cell specifies the per-chip index
+  of the PWM to use and the second cell is the period in nanoseconds.
+
+Example:
+
+twl_pwmled: pwmled {
+	compatible = "ti,twl6030-pwmled";
+	#pwm-cells = <2>;
+};

diff --git a/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
new file mode 100644
index 0000000..bcc6367
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt

@@ -0,0 +1,17 @@
+VIA/Wondermedia VT8500/WM8xxx series SoC PWM controller
+
+Required properties:
+- compatible: should be "via,vt8500-pwm"
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: should be 2.  The first cell specifies the per-chip index
+  of the PWM to use and the second cell is the period in nanoseconds.
+- clocks: phandle to the PWM source clock
+
+Example:
+
+pwm1: pwm@d8220000 {
+	#pwm-cells = <2>;
+	compatible = "via,vt8500-pwm";
+	reg = <0xd8220000 0x1000>;
+	clocks = <&clkpwm>;
+};

diff --git a/Documentation/devicetree/bindings/regulator/tps65217.txt b/Documentation/devicetree/bindings/regulator/tps65217.txt
index d316fb8..4f05d20 100644
--- a/Documentation/devicetree/bindings/regulator/tps65217.txt
+++ b/Documentation/devicetree/bindings/regulator/tps65217.txt

@@ -11,6 +11,9 @@
   using the standard binding for regulators found at
   Documentation/devicetree/bindings/regulator/regulator.txt.
 
+Optional properties:
+- ti,pmic-shutdown-controller: Telling the PMIC to shutdown on PWR_EN toggle.
+
   The valid names for regulators are:
   tps65217: dcdc1, dcdc2, dcdc3, ldo1, ldo2, ldo3 and ldo4
 
@@ -20,6 +23,7 @@
 
 	tps: tps@24 {
 		compatible = "ti,tps65217";
+		ti,pmic-shutdown-controller;
 
 		regulators {
 			dcdc1_reg: dcdc1 {

diff --git a/Documentation/devicetree/bindings/rtc/imxdi-rtc.txt b/Documentation/devicetree/bindings/rtc/imxdi-rtc.txt
new file mode 100644
index 0000000..c9d80d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/imxdi-rtc.txt

@@ -0,0 +1,17 @@
+* i.MX25 Real Time Clock controller
+
+This binding supports the following chips: i.MX25, i.MX53
+
+Required properties:
+- compatible: should be: "fsl,imx25-rtc"
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- interrupts: rtc alarm interrupt
+
+Example:
+
+rtc@80056000 {
+	compatible = "fsl,imx53-rtc", "fsl,imx25-rtc";
+	reg = <0x80056000 2000>;
+	interrupts = <29>;
+};

diff --git a/Documentation/devicetree/bindings/rtc/rtc-omap.txt b/Documentation/devicetree/bindings/rtc/rtc-omap.txt
new file mode 100644
index 0000000..b47aa41
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-omap.txt

@@ -0,0 +1,17 @@
+TI Real Time Clock
+
+Required properties:
+- compatible: "ti,da830-rtc"
+- reg: Address range of rtc register set
+- interrupts: rtc timer, alarm interrupts in order
+- interrupt-parent: phandle for the interrupt controller
+
+Example:
+
+rtc@1c23000 {
+	compatible = "ti,da830-rtc";
+	reg = <0x23000 0x1000>;
+	interrupts = <19
+		      19>;
+	interrupt-parent = <&intc>;
+};

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt
index 8cf24f6..7b53da5 100644
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt

@@ -13,7 +13,7 @@
 
 Example:
 
-spi@7000d600 {
+spi@7000c380 {
 	compatible = "nvidia,tegra20-sflash";
 	reg = <0x7000c380 0x80>;
 	interrupts = <0 39 0x04>;

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
index f5b1ad1..eefe15e 100644
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt

@@ -13,7 +13,7 @@
 
 Example:
 
-slink@7000d600 {
+spi@7000d600 {
 	compatible = "nvidia,tegra20-slink";
 	reg = <0x7000d600 0x200>;
 	interrupts = <0 82 0x04>;

diff --git a/Documentation/devicetree/bindings/spi/spi_atmel.txt b/Documentation/devicetree/bindings/spi/spi_atmel.txt
new file mode 100644
index 0000000..07e04cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_atmel.txt

@@ -0,0 +1,26 @@
+Atmel SPI device
+
+Required properties:
+- compatible : should be "atmel,at91rm9200-spi".
+- reg: Address and length of the register set for the device
+- interrupts: Should contain spi interrupt
+- cs-gpios: chipselects
+
+Example:
+
+spi1: spi@fffcc000 {
+	compatible = "atmel,at91rm9200-spi";
+	reg = <0xfffcc000 0x4000>;
+	interrupts = <13 4 5>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cs-gpios = <&pioB 3 0>;
+	status = "okay";
+
+	mmc-slot@0 {
+		compatible = "mmc-spi-slot";
+		reg = <0>;
+		gpios = <&pioC 4 0>;	/* CD */
+		spi-max-frequency = <25000000>;
+	};
+};

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 7b52ba7..8042050 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX

@@ -50,6 +50,8 @@
 	- info, mount options and specifications for the Ext4 filesystem.
 files.txt
 	- info on file management in the Linux kernel.
+f2fs.txt
+	- info and mount options for the F2FS filesystem.
 fuse.txt
 	- info on the Filesystem in User SpacE including mount options.
 gfs2.txt

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index e540a24..f48e0c6 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking

@@ -80,7 +80,6 @@
 readlink:	no
 follow_link:	no
 put_link:	no
-truncate:	yes		(see below)
 setattr:	yes
 permission:	no (may not block if called in rcu-walk mode)
 get_acl:	no
@@ -96,11 +95,6 @@
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
 	cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
-	->truncate() is never called directly - it's a callback, not a
-method. It's called by vmtruncate() - deprecated library function used by
-->setattr(). Locking information above applies to that call (i.e. is
-inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been
-passed).
 
 See Documentation/filesystems/directory-locking for more detailed discussion
 of the locking scheme for directory operations.

diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
index 382d52c..d78bab9 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.txt

@@ -308,6 +308,18 @@
      obtained by calling object->cookie->def->get_aux()/get_attr().
 
 
+ (*) Invalidate data object [mandatory]:
+
+	int (*invalidate_object)(struct fscache_operation *op)
+
+     This is called to invalidate a data object (as pointed to by op->object).
+     All the data stored for this object should be discarded and an
+     attr_changed operation should be performed.  The caller will follow up
+     with an object update operation.
+
+     fscache_op_complete() must be called on op before returning.
+
+
  (*) Discard object [mandatory]:
 
 	void (*drop_object)(struct fscache_object *object)
@@ -419,7 +431,10 @@
 
      If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
      returned if possible or fscache_end_io() called with a suitable error
-     code..
+     code.
+
+     fscache_put_retrieval() should be called after a page or pages are dealt
+     with.  This will complete the operation when all pages are dealt with.
 
 
  (*) Request pages be read from cache [mandatory]:
@@ -526,6 +541,27 @@
      error value should be 0 if successful and an error otherwise.
 
 
+ (*) Record that one or more pages being retrieved or allocated have been dealt
+     with:
+
+	void fscache_retrieval_complete(struct fscache_retrieval *op,
+					int n_pages);
+
+     This is called to record the fact that one or more pages have been dealt
+     with and are no longer the concern of this operation.  When the number of
+     pages remaining in the operation reaches 0, the operation will be
+     completed.
+
+
+ (*) Record operation completion:
+
+	void fscache_op_complete(struct fscache_operation *op);
+
+     This is called to record the completion of an operation.  This deducts
+     this operation from the parent object's run state, potentially permitting
+     one or more pending operations to start running.
+
+
  (*) Set highest store limit:
 
 	void fscache_set_store_limit(struct fscache_object *object,

diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 7cc6bf2..97e6c0e 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt

@@ -35,8 +35,9 @@
 	(12) Index and data file update
 	(13) Miscellaneous cookie operations
 	(14) Cookie unregistration
-	(15) Index and data file invalidation
-	(16) FS-Cache specific page flags.
+	(15) Index invalidation
+	(16) Data file invalidation
+	(17) FS-Cache specific page flags.
 
 
 =============================
@@ -767,13 +768,42 @@
 first.
 
 
-================================
-INDEX AND DATA FILE INVALIDATION
-================================
+==================
+INDEX INVALIDATION
+==================
 
-There is no direct way to invalidate an index subtree or a data file.  To do
-this, the caller should relinquish and retire the cookie they have, and then
-acquire a new one.
+There is no direct way to invalidate an index subtree.  To do this, the caller
+should relinquish and retire the cookie they have, and then acquire a new one.
+
+
+======================
+DATA FILE INVALIDATION
+======================
+
+Sometimes it will be necessary to invalidate an object that contains data.
+Typically this will be necessary when the server tells the netfs of a foreign
+change - at which point the netfs has to throw away all the state it had for an
+inode and reload from the server.
+
+To indicate that a cache object should be invalidated, the following function
+can be called:
+
+	void fscache_invalidate(struct fscache_cookie *cookie);
+
+This can be called with spinlocks held as it defers the work to a thread pool.
+All extant storage, retrieval and attribute change ops at this point are
+cancelled and discarded.  Some future operations will be rejected until the
+cache has had a chance to insert a barrier in the operations queue.  After
+that, operations will be queued again behind the invalidation operation.
+
+The invalidation operation will perform an attribute change operation and an
+auxiliary data update operation as it is very likely these will have changed.
+
+Using the following function, the netfs can wait for the invalidation operation
+to have reached a point at which it can start submitting ordinary operations
+once again:
+
+	void fscache_wait_on_invalidate(struct fscache_cookie *cookie);
 
 
 ===========================

diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt
index 5831334..100ff41 100644
--- a/Documentation/filesystems/caching/object.txt
+++ b/Documentation/filesystems/caching/object.txt

@@ -216,7 +216,14 @@
      The normal running state.  In this state, requests the netfs makes will be
      passed on to the cache.
 
- (6) State FSCACHE_OBJECT_UPDATING.
+ (6) State FSCACHE_OBJECT_INVALIDATING.
+
+     The object is undergoing invalidation.  When the state comes here, it
+     discards all pending read, write and attribute change operations as it is
+     going to clear out the cache entirely and reinitialise it.  It will then
+     continue to the FSCACHE_OBJECT_UPDATING state.
+
+ (7) State FSCACHE_OBJECT_UPDATING.
 
      The state machine comes here to update the object in the cache from the
      netfs's records.  This involves updating the auxiliary data that is used
@@ -225,13 +232,13 @@
 And there are terminal states in which an object cleans itself up, deallocates
 memory and potentially deletes stuff from disk:
 
- (7) State FSCACHE_OBJECT_LC_DYING.
+ (8) State FSCACHE_OBJECT_LC_DYING.
 
      The object comes here if it is dying because of a lookup or creation
      error.  This would be due to a disk error or system error of some sort.
      Temporary data is cleaned up, and the parent is released.
 
- (8) State FSCACHE_OBJECT_DYING.
+ (9) State FSCACHE_OBJECT_DYING.
 
      The object comes here if it is dying due to an error, because its parent
      cookie has been relinquished by the netfs or because the cache is being
@@ -241,27 +248,27 @@
      can destroy themselves.  This object waits for all its children to go away
      before advancing to the next state.
 
- (9) State FSCACHE_OBJECT_ABORT_INIT.
+(10) State FSCACHE_OBJECT_ABORT_INIT.
 
      The object comes to this state if it was waiting on its parent in
      FSCACHE_OBJECT_INIT, but its parent died.  The object will destroy itself
      so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
 
-(10) State FSCACHE_OBJECT_RELEASING.
-(11) State FSCACHE_OBJECT_RECYCLING.
+(11) State FSCACHE_OBJECT_RELEASING.
+(12) State FSCACHE_OBJECT_RECYCLING.
 
      The object comes to one of these two states when dying once it is rid of
      all its children, if it is dying because the netfs relinquished its
      cookie.  In the first state, the cached data is expected to persist, and
      in the second it will be deleted.
 
-(12) State FSCACHE_OBJECT_WITHDRAWING.
+(13) State FSCACHE_OBJECT_WITHDRAWING.
 
      The object transits to this state if the cache decides it wants to
      withdraw the object from service, perhaps to make space, but also due to
      error or just because the whole cache is being withdrawn.
 
-(13) State FSCACHE_OBJECT_DEAD.
+(14) State FSCACHE_OBJECT_DEAD.
 
      The object transits to this state when the in-memory object record is
      ready to be deleted.  The object processor shouldn't ever see an object in

diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
index b6b070c..bee2a5f 100644
--- a/Documentation/filesystems/caching/operations.txt
+++ b/Documentation/filesystems/caching/operations.txt

@@ -174,7 +174,7 @@
      necessary (the object might have died whilst the thread was waiting).
 
      When it has finished doing its processing, it should call
-     fscache_put_operation() on it.
+     fscache_op_complete() and fscache_put_operation() on it.
 
  (4) The operation holds an effective lock upon the object, preventing other
      exclusive ops conflicting until it is released.  The operation can be

diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 104322b..34ea4f1 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt

@@ -200,12 +200,9 @@
 			table readahead algorithm will pre-read into
 			the buffer cache.  The default value is 32 blocks.
 
-nouser_xattr		Disables Extended User Attributes. If you have extended
-			attribute support enabled in the kernel configuration
-			(CONFIG_EXT4_FS_XATTR), extended attribute support
-			is enabled by default on mount. See the attr(5) manual
-			page and http://acl.bestbits.at/ for more information
-			about extended attributes.
+nouser_xattr		Disables Extended User Attributes.  See the
+			attr(5) manual page and http://acl.bestbits.at/
+			for more information about extended attributes.
 
 noacl			This option disables POSIX Access Control List
 			support. If ACL support is enabled in the kernel

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
new file mode 100644
index 0000000..8fbd8b4
--- /dev/null
+++ b/Documentation/filesystems/f2fs.txt

@@ -0,0 +1,421 @@
+================================================================================
+WHAT IS Flash-Friendly File System (F2FS)?
+================================================================================
+
+NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
+been equipped on a variety systems ranging from mobile to server systems. Since
+they are known to have different characteristics from the conventional rotating
+disks, a file system, an upper layer to the storage device, should adapt to the
+changes from the sketch in the design level.
+
+F2FS is a file system exploiting NAND flash memory-based storage devices, which
+is based on Log-structured File System (LFS). The design has been focused on
+addressing the fundamental issues in LFS, which are snowball effect of wandering
+tree and high cleaning overhead.
+
+Since a NAND flash memory-based storage device shows different characteristic
+according to its internal geometry or flash memory management scheme, namely FTL,
+F2FS and its tools support various parameters not only for configuring on-disk
+layout, but also for selecting allocation and cleaning algorithms.
+
+The file system formatting tool, "mkfs.f2fs", is available from the following
+git tree:
+>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
+
+For reporting bugs and sending patches, please use the following mailing list:
+>> linux-f2fs-devel@lists.sourceforge.net
+
+================================================================================
+BACKGROUND AND DESIGN ISSUES
+================================================================================
+
+Log-structured File System (LFS)
+--------------------------------
+"A log-structured file system writes all modifications to disk sequentially in
+a log-like structure, thereby speeding up  both file writing and crash recovery.
+The log is the only structure on disk; it contains indexing information so that
+files can be read back from the log efficiently. In order to maintain large free
+areas on disk for fast writing, we divide  the log into segments and use a
+segment cleaner to compress the live information from heavily fragmented
+segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and
+implementation of a log-structured file system", ACM Trans. Computer Systems
+10, 1, 26–52.
+
+Wandering Tree Problem
+----------------------
+In LFS, when a file data is updated and written to the end of log, its direct
+pointer block is updated due to the changed location. Then the indirect pointer
+block is also updated due to the direct pointer block update. In this manner,
+the upper index structures such as inode, inode map, and checkpoint block are
+also updated recursively. This problem is called as wandering tree problem [1],
+and in order to enhance the performance, it should eliminate or relax the update
+propagation as much as possible.
+
+[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/
+
+Cleaning Overhead
+-----------------
+Since LFS is based on out-of-place writes, it produces so many obsolete blocks
+scattered across the whole storage. In order to serve new empty log space, it
+needs to reclaim these obsolete blocks seamlessly to users. This job is called
+as a cleaning process.
+
+The process consists of three operations as follows.
+1. A victim segment is selected through referencing segment usage table.
+2. It loads parent index structures of all the data in the victim identified by
+   segment summary blocks.
+3. It checks the cross-reference between the data and its parent index structure.
+4. It moves valid data selectively.
+
+This cleaning job may cause unexpected long delays, so the most important goal
+is to hide the latencies to users. And also definitely, it should reduce the
+amount of valid data to be moved, and move them quickly as well.
+
+================================================================================
+KEY FEATURES
+================================================================================
+
+Flash Awareness
+---------------
+- Enlarge the random write area for better performance, but provide the high
+  spatial locality
+- Align FS data structures to the operational units in FTL as best efforts
+
+Wandering Tree Problem
+----------------------
+- Use a term, “node”, that represents inodes as well as various pointer blocks
+- Introduce Node Address Table (NAT) containing the locations of all the “node”
+  blocks; this will cut off the update propagation.
+
+Cleaning Overhead
+-----------------
+- Support a background cleaning process
+- Support greedy and cost-benefit algorithms for victim selection policies
+- Support multi-head logs for static/dynamic hot and cold data separation
+- Introduce adaptive logging for efficient block allocation
+
+================================================================================
+MOUNT OPTIONS
+================================================================================
+
+background_gc_off      Turn off cleaning operations, namely garbage collection,
+		       triggered in background when I/O subsystem is idle.
+disable_roll_forward   Disable the roll-forward recovery routine
+discard                Issue discard/TRIM commands when a segment is cleaned.
+no_heap                Disable heap-style segment allocation which finds free
+                       segments for data from the beginning of main area, while
+		       for node from the end of main area.
+nouser_xattr           Disable Extended User Attributes. Note: xattr is enabled
+                       by default if CONFIG_F2FS_FS_XATTR is selected.
+noacl                  Disable POSIX Access Control List. Note: acl is enabled
+                       by default if CONFIG_F2FS_FS_POSIX_ACL is selected.
+active_logs=%u         Support configuring the number of active logs. In the
+                       current design, f2fs supports only 2, 4, and 6 logs.
+                       Default number is 6.
+disable_ext_identify   Disable the extension list configured by mkfs, so f2fs
+                       does not aware of cold files such as media files.
+
+================================================================================
+DEBUGFS ENTRIES
+================================================================================
+
+/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as
+f2fs. Each file shows the whole f2fs information.
+
+/sys/kernel/debug/f2fs/status includes:
+ - major file system information managed by f2fs currently
+ - average SIT information about whole segments
+ - current memory footprint consumed by f2fs.
+
+================================================================================
+USAGE
+================================================================================
+
+1. Download userland tools and compile them.
+
+2. Skip, if f2fs was compiled statically inside kernel.
+   Otherwise, insert the f2fs.ko module.
+ # insmod f2fs.ko
+
+3. Create a directory trying to mount
+ # mkdir /mnt/f2fs
+
+4. Format the block device, and then mount as f2fs
+ # mkfs.f2fs -l label /dev/block_device
+ # mount -t f2fs /dev/block_device /mnt/f2fs
+
+Format options
+--------------
+-l [label]   : Give a volume label, up to 256 unicode name.
+-a [0 or 1]  : Split start location of each area for heap-based allocation.
+               1 is set by default, which performs this.
+-o [int]     : Set overprovision ratio in percent over volume size.
+               5 is set by default.
+-s [int]     : Set the number of segments per section.
+               1 is set by default.
+-z [int]     : Set the number of sections per zone.
+               1 is set by default.
+-e [str]     : Set basic extension list. e.g. "mp3,gif,mov"
+
+================================================================================
+DESIGN
+================================================================================
+
+On-disk Layout
+--------------
+
+F2FS divides the whole volume into a number of segments, each of which is fixed
+to 2MB in size. A section is composed of consecutive segments, and a zone
+consists of a set of sections. By default, section and zone sizes are set to one
+segment size identically, but users can easily modify the sizes by mkfs.
+
+F2FS splits the entire volume into six areas, and all the areas except superblock
+consists of multiple segments as described below.
+
+                                            align with the zone size <-|
+                 |-> align with the segment size
+     _________________________________________________________________________
+    |            |            |    Node     |   Segment   |   Segment  |      |
+    | Superblock | Checkpoint |   Address   |    Info.    |   Summary  | Main |
+    |    (SB)    |   (CP)     | Table (NAT) | Table (SIT) | Area (SSA) |      |
+    |____________|_____2______|______N______|______N______|______N_____|__N___|
+                                                                       .      .
+                                                             .                .
+                                                 .                            .
+                                    ._________________________________________.
+                                    |_Segment_|_..._|_Segment_|_..._|_Segment_|
+                                    .           .
+                                    ._________._________
+                                    |_section_|__...__|_
+                                    .            .
+		                    .________.
+	                            |__zone__|
+
+- Superblock (SB)
+ : It is located at the beginning of the partition, and there exist two copies
+   to avoid file system crash. It contains basic partition information and some
+   default parameters of f2fs.
+
+- Checkpoint (CP)
+ : It contains file system information, bitmaps for valid NAT/SIT sets, orphan
+   inode lists, and summary entries of current active segments.
+
+- Node Address Table (NAT)
+ : It is composed of a block address table for all the node blocks stored in
+   Main area.
+
+- Segment Information Table (SIT)
+ : It contains segment information such as valid block count and bitmap for the
+   validity of all the blocks.
+
+- Segment Summary Area (SSA)
+ : It contains summary entries which contains the owner information of all the
+   data and node blocks stored in Main area.
+
+- Main Area
+ : It contains file and directory data including their indices.
+
+In order to avoid misalignment between file system and flash-based storage, F2FS
+aligns the start block address of CP with the segment size. Also, it aligns the
+start block address of Main area with the zone size by reserving some segments
+in SSA area.
+
+Reference the following survey for additional technical details.
+https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey
+
+File System Metadata Structure
+------------------------------
+
+F2FS adopts the checkpointing scheme to maintain file system consistency. At
+mount time, F2FS first tries to find the last valid checkpoint data by scanning
+CP area. In order to reduce the scanning time, F2FS uses only two copies of CP.
+One of them always indicates the last valid data, which is called as shadow copy
+mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism.
+
+For file system consistency, each CP points to which NAT and SIT copies are
+valid, as shown as below.
+
+  +--------+----------+---------+
+  |   CP   |    NAT   |   SIT   |
+  +--------+----------+---------+
+  .         .          .          .
+  .            .              .              .
+  .               .                 .                 .
+  +-------+-------+--------+--------+--------+--------+
+  | CP #0 | CP #1 | NAT #0 | NAT #1 | SIT #0 | SIT #1 |
+  +-------+-------+--------+--------+--------+--------+
+     |             ^                          ^
+     |             |                          |
+     `----------------------------------------'
+
+Index Structure
+---------------
+
+The key data structure to manage the data locations is a "node". Similar to
+traditional file structures, F2FS has three types of node: inode, direct node,
+indirect node. F2FS assigns 4KB to an inode block which contains 923 data block
+indices, two direct node pointers, two indirect node pointers, and one double
+indirect node pointer as described below. One direct node block contains 1018
+data blocks, and one indirect node block contains also 1018 node blocks. Thus,
+one inode block (i.e., a file) covers:
+
+  4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB.
+
+   Inode block (4KB)
+     |- data (923)
+     |- direct node (2)
+     |          `- data (1018)
+     |- indirect node (2)
+     |            `- direct node (1018)
+     |                       `- data (1018)
+     `- double indirect node (1)
+                         `- indirect node (1018)
+			              `- direct node (1018)
+	                                         `- data (1018)
+
+Note that, all the node blocks are mapped by NAT which means the location of
+each node is translated by the NAT table. In the consideration of the wandering
+tree problem, F2FS is able to cut off the propagation of node updates caused by
+leaf data writes.
+
+Directory Structure
+-------------------
+
+A directory entry occupies 11 bytes, which consists of the following attributes.
+
+- hash		hash value of the file name
+- ino		inode number
+- len		the length of file name
+- type		file type such as directory, symlink, etc
+
+A dentry block consists of 214 dentry slots and file names. Therein a bitmap is
+used to represent whether each dentry is valid or not. A dentry block occupies
+4KB with the following composition.
+
+  Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) +
+	              dentries(11 * 214 bytes) + file name (8 * 214 bytes)
+
+                         [Bucket]
+             +--------------------------------+
+             |dentry block 1 | dentry block 2 |
+             +--------------------------------+
+             .               .
+       .                             .
+  .       [Dentry Block Structure: 4KB]       .
+  +--------+----------+----------+------------+
+  | bitmap | reserved | dentries | file names |
+  +--------+----------+----------+------------+
+  [Dentry Block: 4KB] .   .
+		 .               .
+            .                          .
+            +------+------+-----+------+
+            | hash | ino  | len | type |
+            +------+------+-----+------+
+            [Dentry Structure: 11 bytes]
+
+F2FS implements multi-level hash tables for directory structure. Each level has
+a hash table with dedicated number of hash buckets as shown below. Note that
+"A(2B)" means a bucket includes 2 data blocks.
+
+----------------------
+A : bucket
+B : block
+N : MAX_DIR_HASH_DEPTH
+----------------------
+
+level #0   | A(2B)
+           |
+level #1   | A(2B) - A(2B)
+           |
+level #2   | A(2B) - A(2B) - A(2B) - A(2B)
+     .     |   .       .       .       .
+level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
+     .     |   .       .       .       .
+level #N   | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
+
+The number of blocks and buckets are determined by,
+
+                            ,- 2, if n < MAX_DIR_HASH_DEPTH / 2,
+  # of blocks in level #n = |
+                            `- 4, Otherwise
+
+                             ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2,
+  # of buckets in level #n = |
+                             `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise
+
+When F2FS finds a file name in a directory, at first a hash value of the file
+name is calculated. Then, F2FS scans the hash table in level #0 to find the
+dentry consisting of the file name and its inode number. If not found, F2FS
+scans the next hash table in level #1. In this way, F2FS scans hash tables in
+each levels incrementally from 1 to N. In each levels F2FS needs to scan only
+one bucket determined by the following equation, which shows O(log(# of files))
+complexity.
+
+  bucket number to scan in level #n = (hash value) % (# of buckets in level #n)
+
+In the case of file creation, F2FS finds empty consecutive slots that cover the
+file name. F2FS searches the empty slots in the hash tables of whole levels from
+1 to N in the same way as the lookup operation.
+
+The following figure shows an example of two cases holding children.
+       --------------> Dir <--------------
+       |                                 |
+    child                             child
+
+    child - child                     [hole] - child
+
+    child - child - child             [hole] - [hole] - child
+
+   Case 1:                           Case 2:
+   Number of children = 6,           Number of children = 3,
+   File size = 7                     File size = 7
+
+Default Block Allocation
+------------------------
+
+At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node
+and Hot/Warm/Cold data.
+
+- Hot node	contains direct node blocks of directories.
+- Warm node	contains direct node blocks except hot node blocks.
+- Cold node	contains indirect node blocks
+- Hot data	contains dentry blocks
+- Warm data	contains data blocks except hot and cold data blocks
+- Cold data	contains multimedia data or migrated data blocks
+
+LFS has two schemes for free space management: threaded log and copy-and-compac-
+tion. The copy-and-compaction scheme which is known as cleaning, is well-suited
+for devices showing very good sequential write performance, since free segments
+are served all the time for writing new data. However, it suffers from cleaning
+overhead under high utilization. Contrarily, the threaded log scheme suffers
+from random writes, but no cleaning process is needed. F2FS adopts a hybrid
+scheme where the copy-and-compaction scheme is adopted by default, but the
+policy is dynamically changed to the threaded log scheme according to the file
+system status.
+
+In order to align F2FS with underlying flash-based storage, F2FS allocates a
+segment in a unit of section. F2FS expects that the section size would be the
+same as the unit size of garbage collection in FTL. Furthermore, with respect
+to the mapping granularity in FTL, F2FS allocates each section of the active
+logs from different zones as much as possible, since FTL can write the data in
+the active logs into one allocation unit according to its mapping granularity.
+
+Cleaning process
+----------------
+
+F2FS does cleaning both on demand and in the background. On-demand cleaning is
+triggered when there are not enough free segments to serve VFS calls. Background
+cleaner is operated by a kernel thread, and triggers the cleaning job when the
+system is idle.
+
+F2FS supports two victim selection policies: greedy and cost-benefit algorithms.
+In the greedy algorithm, F2FS selects a victim segment having the smallest number
+of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment
+according to the segment age and the number of valid blocks in order to address
+log block thrashing problem in the greedy algorithm. F2FS adopts the greedy
+algorithm for on-demand cleaner, while background cleaner adopts cost-benefit
+algorithm.
+
+In order to identify whether the data in the victim segment are valid or not,
+F2FS manages a bitmap. Each bit represents the validity of a block, and the
+bitmap is composed of a bit stream covering whole blocks in main area.

diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index 092fad9..01c2db7 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt

@@ -39,21 +39,10 @@
 	  from a linux client are possible, but we aren't really
 	  conformant with the spec (for example, we don't use kerberos
 	  on the backchannel correctly).
-	- Incomplete backchannel support: incomplete backchannel gss
-	  support and no support for BACKCHANNEL_CTL mean that
-	  callbacks (hence delegations and layouts) may not be
-	  available and clients confused by the incomplete
-	  implementation may fail.
 	- We do not support SSV, which provides security for shared
 	  client-server state (thus preventing unauthorized tampering
 	  with locks and opens, for example).  It is mandatory for
 	  servers to support this, though no clients use it yet.
-	- Mandatory operations which we do not support, such as
-	  DESTROY_CLIENTID, are not currently used by clients, but will be
-	  (and the spec recommends their uses in common cases), and
-	  clients should not be expected to know how to recover from the
-	  case where they are not supported.  This will eventually cause
-	  interoperability failures.
 
 In addition, some limitations are inherited from the current NFSv4
 implementation:
@@ -89,7 +78,7 @@
    |                      | MNI        | or OPT)      |                |
    +----------------------+------------+--------------+----------------+
    | ACCESS               | REQ        |              | Section 18.1   |
-NS | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
+I  | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
 I  | BIND_CONN_TO_SESSION | REQ        |              | Section 18.34  |
    | CLOSE                | REQ        |              | Section 18.2   |
    | COMMIT               | REQ        |              | Section 18.3   |
@@ -99,7 +88,7 @@
    | DELEGRETURN          | OPT        | FDELG,       | Section 18.6   |
    |                      |            | DDELG, pNFS  |                |
    |                      |            | (REQ)        |                |
-NS | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
+I  | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
 I  | DESTROY_SESSION      | REQ        |              | Section 18.37  |
 I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
 I  | FREE_STATEID         | REQ        |              | Section 18.38  |
@@ -192,7 +181,6 @@
 
 CREATE_SESSION:
 * backchannel attributes are ignored
-* backchannel security parameters are ignored
 
 SEQUENCE:
 * no support for dynamic slot table renegotiation (optional)
@@ -202,7 +190,7 @@
   ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
   fail to live up to the promise we made in CREATE_SESSION fore channel
   negotiation.
-* No more than one IO operation (read, write, readdir) allowed per
-  compound.
+* No more than one read-like operation allowed per compound; encoding
+  replies that cross page boundaries (except for read data) not handled.
 
 See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 0742fee..0472c31 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting

@@ -281,7 +281,7 @@
 
 [mandatory]
 
-	->truncate is going away.  The whole truncate sequence needs to be
+	->truncate is gone.  The whole truncate sequence needs to be
 implemented in ->setattr, which is now mandatory for filesystems
 implementing on-disk size changes.  Start with a copy of the old inode_setattr
 and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 3844d21..fd8d0d5 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt

@@ -41,6 +41,7 @@
   3.5	/proc/<pid>/mountinfo - Information about mounts
   3.6	/proc/<pid>/comm  & /proc/<pid>/task/<tid>/comm
   3.7   /proc/<pid>/task/<tid>/children - Information about task children
+  3.8   /proc/<pid>/fdinfo/<fd> - Information about opened file
 
   4	Configuring procfs
   4.1	Mount options
@@ -142,7 +143,7 @@
  pagemap	Page table
  stack		Report full stack trace, enable via CONFIG_STACKTRACE
  smaps		a extension based on maps, showing the memory consumption of
-		each mapping
+		each mapping and flags associated with it
 ..............................................................................
 
 For example, to get the status information of a process, all you have to do is
@@ -181,6 +182,7 @@
   CapPrm: 0000000000000000
   CapEff: 0000000000000000
   CapBnd: ffffffffffffffff
+  Seccomp:        0
   voluntary_ctxt_switches:        0
   nonvoluntary_ctxt_switches:     1
 
@@ -237,6 +239,7 @@
  CapPrm                      bitmap of permitted capabilities
  CapEff                      bitmap of effective capabilities
  CapBnd                      bitmap of capabilities bounding set
+ Seccomp                     seccomp mode, like prctl(PR_GET_SECCOMP, ...)
  Cpus_allowed                mask of CPUs on which this process may run
  Cpus_allowed_list           Same as previous, but in "list format"
  Mems_allowed                mask of memory nodes allowed to this process
@@ -415,8 +418,9 @@
 KernelPageSize:        4 kB
 MMUPageSize:           4 kB
 Locked:              374 kB
+VmFlags: rd ex mr mw me de
 
-The first of these lines shows the same information as is displayed for the
+the first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
 (size), the amount of the mapping that is currently resident in RAM (RSS), the
 process' proportional share of this mapping (PSS), the number of clean and
@@ -430,6 +434,41 @@
 "Swap" shows how much would-be-anonymous memory is also used, but out on
 swap.
 
+"VmFlags" field deserves a separate description. This member represents the kernel
+flags associated with the particular virtual memory area in two letter encoded
+manner. The codes are the following:
+    rd  - readable
+    wr  - writeable
+    ex  - executable
+    sh  - shared
+    mr  - may read
+    mw  - may write
+    me  - may execute
+    ms  - may share
+    gd  - stack segment growns down
+    pf  - pure PFN range
+    dw  - disabled write to the mapped file
+    lo  - pages are locked in memory
+    io  - memory mapped I/O area
+    sr  - sequential read advise provided
+    rr  - random read advise provided
+    dc  - do not copy area on fork
+    de  - do not expand area on remapping
+    ac  - area is accountable
+    nr  - swap space is not reserved for the area
+    ht  - area uses huge tlb pages
+    nl  - non-linear mapping
+    ar  - architecture specific flag
+    dd  - do not include area into core dump
+    mm  - mixed map area
+    hg  - huge page advise flag
+    nh  - no-huge page advise flag
+    mg  - mergable advise flag
+
+Note that there is no guarantee that every flag and associated mnemonic will
+be present in all further kernel releases. Things get changed, the flags may
+be vanished or the reverse -- new added.
+
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 
@@ -1595,6 +1634,93 @@
 if precise results are needed.
 
 
+3.7	/proc/<pid>/fdinfo/<fd> - Information about opened file
+---------------------------------------------------------------
+This file provides information associated with an opened file. The regular
+files have at least two fields -- 'pos' and 'flags'. The 'pos' represents
+the current offset of the opened file in decimal form [see lseek(2) for
+details] and 'flags' denotes the octal O_xxx mask the file has been
+created with [see open(2) for details].
+
+A typical output is
+
+	pos:	0
+	flags:	0100002
+
+The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
+pair provide additional information particular to the objects they represent.
+
+	Eventfd files
+	~~~~~~~~~~~~~
+	pos:	0
+	flags:	04002
+	eventfd-count:	5a
+
+	where 'eventfd-count' is hex value of a counter.
+
+	Signalfd files
+	~~~~~~~~~~~~~~
+	pos:	0
+	flags:	04002
+	sigmask:	0000000000000200
+
+	where 'sigmask' is hex value of the signal mask associated
+	with a file.
+
+	Epoll files
+	~~~~~~~~~~~
+	pos:	0
+	flags:	02
+	tfd:        5 events:       1d data: ffffffffffffffff
+
+	where 'tfd' is a target file descriptor number in decimal form,
+	'events' is events mask being watched and the 'data' is data
+	associated with a target [see epoll(7) for more details].
+
+	Fsnotify files
+	~~~~~~~~~~~~~~
+	For inotify files the format is the following
+
+	pos:	0
+	flags:	02000000
+	inotify wd:3 ino:9e7e sdev:800013 mask:800afce ignored_mask:0 fhandle-bytes:8 fhandle-type:1 f_handle:7e9e0000640d1b6d
+
+	where 'wd' is a watch descriptor in decimal form, ie a target file
+	descriptor number, 'ino' and 'sdev' are inode and device where the
+	target file resides and the 'mask' is the mask of events, all in hex
+	form [see inotify(7) for more details].
+
+	If the kernel was built with exportfs support, the path to the target
+	file is encoded as a file handle.  The file handle is provided by three
+	fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex
+	format.
+
+	If the kernel is built without exportfs support the file handle won't be
+	printed out.
+
+	If there is no inotify mark attached yet the 'inotify' line will be omitted.
+
+	For fanotify files the format is
+
+	pos:	0
+	flags:	02
+	fanotify flags:10 event-flags:0
+	fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003
+	fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4
+
+	where fanotify 'flags' and 'event-flags' are values used in fanotify_init
+	call, 'mnt_id' is the mount point identifier, 'mflags' is the value of
+	flags associated with mark which are tracked separately from events
+	mask. 'ino', 'sdev' are target inode and device, 'mask' is the events
+	mask and 'ignored_mask' is the mask of events which are to be ignored.
+	All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask'
+	does provide information about flags and mask used in fanotify_mark
+	call [see fsnotify manpage for details].
+
+	While the first three lines are mandatory and always printed, the rest is
+	optional and may be omitted if no marks created yet.
+
+
 ------------------------------------------------------------------------------
 Configuring procfs
 ------------------------------------------------------------------------------

diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index de1e6c4..d230dd9 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt

@@ -111,6 +111,15 @@
                  useful when mounting devices (like digital cameras)
                  that are set to UTC in order to avoid the pitfalls of
                  local time.
+time_offset=minutes
+	      -- Set offset for conversion of timestamps from local time
+		 used by FAT to UTC. I.e. <minutes> minutes will be subtracted
+		 from each timestamp to convert it to UTC used internally by
+		 Linux. This is useful when time zone set in sys_tz is
+		 not the time zone used by the filesystem. Note that this
+		 option still does not provide correct time stamps in all
+		 cases in presence of DST - time stamps in a different DST
+		 setting will be off by one hour.
 
 showexec      -- If set, the execute permission bits of the file will be
 		 allowed only if the extension part of the name is .EXE,

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 2ee133e..e3869098 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt

@@ -350,7 +350,6 @@
 	int (*readlink) (struct dentry *, char __user *,int);
         void * (*follow_link) (struct dentry *, struct nameidata *);
         void (*put_link) (struct dentry *, struct nameidata *, void *);
-	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
@@ -431,16 +430,6 @@
   	started might not be in the page cache at the end of the
   	walk).
 
-  truncate: Deprecated. This will not be called if ->setsize is defined.
-	Called by the VFS to change the size of a file.  The
- 	i_size field of the inode is set to the desired size by the
- 	VFS before this method is called.  This method is called by
- 	the truncate(2) system call and related functionality.
-
-	Note: ->truncate and vmtruncate are deprecated. Do not add new
-	instances/calls of these. Filesystems should be converted to do their
-	truncate sequence via ->setattr().
-
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
 

diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87
index 87850d8..8386aad 100644
--- a/Documentation/hwmon/it87
+++ b/Documentation/hwmon/it87

@@ -209,3 +209,13 @@
 Trip points must be set properly before switching to automatic fan speed
 control mode. The driver will perform basic integrity checks before
 actually switching to automatic control mode.
+
+
+Temperature offset attributes
+-----------------------------
+
+The driver supports temp[1-3]_offset sysfs attributes to adjust the reported
+temperature for thermal diodes or diode-connected thermal transistors.
+If a temperature sensor is configured for thermistors, the attribute values
+are ignored. If the thermal sensor type is Intel PECI, the temperature offset
+must be programmed to the critical CPU temperature.

diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
index 49f5b68..d1f2261 100644
--- a/Documentation/i2c/smbus-protocol
+++ b/Documentation/i2c/smbus-protocol

@@ -23,6 +23,12 @@
 single data byte, the functions using SMBus protocol operation names execute
 a different protocol operation entirely.
 
+Each transaction type corresponds to a functionality flag. Before calling a
+transaction function, a device driver should always check (just once) for
+the corresponding functionality flag to ensure that the underlying I2C
+adapter supports the transaction in question. See
+<file:Documentation/i2c/functionality> for the details.
+
 
 Key to symbols
 ==============
@@ -49,6 +55,8 @@
 
 A Addr Rd/Wr [A] P
 
+Functionality flag: I2C_FUNC_SMBUS_QUICK
+
 
 SMBus Receive Byte:  i2c_smbus_read_byte()
 ==========================================
@@ -60,6 +68,8 @@
 
 S Addr Rd [A] [Data] NA P
 
+Functionality flag: I2C_FUNC_SMBUS_READ_BYTE
+
 
 SMBus Send Byte:  i2c_smbus_write_byte()
 ========================================
@@ -69,6 +79,8 @@
 
 S Addr Wr [A] Data [A] P
 
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE
+
 
 SMBus Read Byte:  i2c_smbus_read_byte_data()
 ============================================
@@ -78,6 +90,8 @@
 
 S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
 
+Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA
+
 
 SMBus Read Word:  i2c_smbus_read_word_data()
 ============================================
@@ -88,6 +102,8 @@
 
 S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
 
+Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA
+
 Note the convenience function i2c_smbus_read_word_swapped is
 available for reads where the two data bytes are the other way
 around (not SMBus compliant, but very popular.)
@@ -102,6 +118,8 @@
 
 S Addr Wr [A] Comm [A] Data [A] P
 
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE_DATA
+
 
 SMBus Write Word:  i2c_smbus_write_word_data()
 ==============================================
@@ -112,6 +130,8 @@
 
 S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] P
 
+Functionality flag: I2C_FUNC_SMBUS_WRITE_WORD_DATA
+
 Note the convenience function i2c_smbus_write_word_swapped is
 available for writes where the two data bytes are the other way
 around (not SMBus compliant, but very popular.)
@@ -126,6 +146,8 @@
 S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] 
                              S Addr Rd [A] [DataLow] A [DataHigh] NA P
 
+Functionality flag: I2C_FUNC_SMBUS_PROC_CALL
+
 
 SMBus Block Read:  i2c_smbus_read_block_data()
 ==============================================
@@ -137,6 +159,8 @@
 S Addr Wr [A] Comm [A] 
            S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
 
+Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA
+
 
 SMBus Block Write:  i2c_smbus_write_block_data()
 ================================================
@@ -147,6 +171,8 @@
 
 S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P
 
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BLOCK_DATA
+
 
 SMBus Block Write - Block Read Process Call
 ===========================================
@@ -160,6 +186,8 @@
 S Addr Wr [A] Comm [A] Count [A] Data [A] ...
                              S Addr Rd [A] [Count] A [Data] ... A P
 
+Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL
+
 
 SMBus Host Notify
 =================
@@ -229,15 +257,7 @@
 S Addr Wr [A] Comm [A] 
            S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
 
-
-I2C Block Read (2 Comm bytes)
-=============================
-
-This command reads a block of bytes from a device, from a 
-designated register that is specified through the two Comm bytes.
-
-S Addr Wr [A] Comm1 [A] Comm2 [A] 
-           S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
+Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK
 
 
 I2C Block Write:  i2c_smbus_write_i2c_block_data()
@@ -249,3 +269,5 @@
 supported as they are indistinguishable from data.
 
 S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_I2C_BLOCK

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 20e248c..363e348 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -446,12 +446,6 @@
 			possible to determine what the correct size should be.
 			This option provides an override for these situations.
 
-	capability.disable=
-			[SECURITY] Disable capabilities.  This would normally
-			be used only if an alternative security model is to be
-			configured.  Potentially dangerous and should only be
-			used if you are entirely sure of the consequences.
-
 	ccw_timeout_log [S390]
 			See Documentation/s390/CommonIO for details.
 
@@ -1503,9 +1497,10 @@
 	mem=nn[KMG]	[KNL,BOOT] Force usage of a specific amount of memory
 			Amount of memory to be used when the kernel is not able
 			to see the whole system memory or for test.
-			[X86-32] Use together with memmap= to avoid physical
-			address space collisions. Without memmap= PCI devices
-			could be placed at addresses belonging to unused RAM.
+			[X86] Work as limiting max address. Use together
+			with memmap= to avoid physical address space collisions.
+			Without memmap= PCI devices could be placed at addresses
+			belonging to unused RAM.
 
 	mem=nopentium	[BUGS=X86-32] Disable usage of 4MB pages for kernel
 			memory.
@@ -2032,6 +2027,9 @@
 
 	nr_uarts=	[SERIAL] maximum number of UARTs to be registered.
 
+	numa_balancing=	[KNL,X86] Enable or disable automatic NUMA balancing.
+			Allowed values are enable and disable
+
 	numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
 			one of ['zone', 'node', 'default'] can be specified
 			This can be set from sysctl after boot.

diff --git a/Documentation/kref.txt b/Documentation/kref.txt
index 48ba715..ddf85a5 100644
--- a/Documentation/kref.txt
+++ b/Documentation/kref.txt

@@ -213,3 +213,91 @@
 and:
   http://www.kroah.com/linux/talks/ols_2004_kref_talk/
 
+
+The above example could also be optimized using kref_get_unless_zero() in
+the following way:
+
+static struct my_data *get_entry()
+{
+	struct my_data *entry = NULL;
+	mutex_lock(&mutex);
+	if (!list_empty(&q)) {
+		entry = container_of(q.next, struct my_data, link);
+		if (!kref_get_unless_zero(&entry->refcount))
+			entry = NULL;
+	}
+	mutex_unlock(&mutex);
+	return entry;
+}
+
+static void release_entry(struct kref *ref)
+{
+	struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+	mutex_lock(&mutex);
+	list_del(&entry->link);
+	mutex_unlock(&mutex);
+	kfree(entry);
+}
+
+static void put_entry(struct my_data *entry)
+{
+	kref_put(&entry->refcount, release_entry);
+}
+
+Which is useful to remove the mutex lock around kref_put() in put_entry(), but
+it's important that kref_get_unless_zero is enclosed in the same critical
+section that finds the entry in the lookup table,
+otherwise kref_get_unless_zero may reference already freed memory.
+Note that it is illegal to use kref_get_unless_zero without checking its
+return value. If you are sure (by already having a valid pointer) that
+kref_get_unless_zero() will return true, then use kref_get() instead.
+
+The function kref_get_unless_zero also makes it possible to use rcu
+locking for lookups in the above example:
+
+struct my_data
+{
+	struct rcu_head rhead;
+	.
+	struct kref refcount;
+	.
+	.
+};
+
+static struct my_data *get_entry_rcu()
+{
+	struct my_data *entry = NULL;
+	rcu_read_lock();
+	if (!list_empty(&q)) {
+		entry = container_of(q.next, struct my_data, link);
+		if (!kref_get_unless_zero(&entry->refcount))
+			entry = NULL;
+	}
+	rcu_read_unlock();
+	return entry;
+}
+
+static void release_entry_rcu(struct kref *ref)
+{
+	struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+	mutex_lock(&mutex);
+	list_del_rcu(&entry->link);
+	mutex_unlock(&mutex);
+	kfree_rcu(entry, rhead);
+}
+
+static void put_entry(struct my_data *entry)
+{
+	kref_put(&entry->refcount, release_entry_rcu);
+}
+
+But note that the struct kref member needs to remain in valid memory for a
+rcu grace period after release_entry_rcu was called. That can be accomplished
+by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
+before using kfree, but note that synchronize_rcu() may sleep for a
+substantial amount of time.
+
+
+Thomas Hellstrom <thellstrom@vmware.com>

diff --git a/Documentation/powerpc/ptrace.txt b/Documentation/powerpc/ptrace.txt
index f4a5499..f2a7a39 100644
--- a/Documentation/powerpc/ptrace.txt
+++ b/Documentation/powerpc/ptrace.txt

@@ -127,6 +127,22 @@
   p.addr2           = (uint64_t) end_range;
   p.condition_value = 0;
 
+- set a watchpoint in server processors (BookS)
+
+  p.version         = 1;
+  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_RW;
+  p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+  or
+  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+
+  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+  p.addr            = (uint64_t) begin_range;
+  /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
+   * addr2 - addr <= 8 Bytes.
+   */
+  p.addr2           = (uint64_t) end_range;
+  p.condition_value = 0;
+
 3. PTRACE_DELHWDEBUG
 
 Takes an integer which identifies an existing breakpoint or watchpoint

diff --git a/Documentation/prctl/seccomp_filter.txt b/Documentation/prctl/seccomp_filter.txt
index 597c3c5..1e469ef 100644
--- a/Documentation/prctl/seccomp_filter.txt
+++ b/Documentation/prctl/seccomp_filter.txt

@@ -95,12 +95,15 @@
 
 SECCOMP_RET_TRAP:
 	Results in the kernel sending a SIGSYS signal to the triggering
-	task without executing the system call.  The kernel will
-	rollback the register state to just before the system call
-	entry such that a signal handler in the task will be able to
-	inspect the ucontext_t->uc_mcontext registers and emulate
-	system call success or failure upon return from the signal
-	handler.
+	task without executing the system call.  siginfo->si_call_addr
+	will show the address of the system call instruction, and
+	siginfo->si_syscall and siginfo->si_arch will indicate which
+	syscall was attempted.  The program counter will be as though
+	the syscall happened (i.e. it will not point to the syscall
+	instruction).  The return value register will contain an arch-
+	dependent value -- if resuming execution, set it to something
+	sensible.  (The architecture dependency is because replacing
+	it with -ENOSYS could overwrite some useful information.)
 
 	The SECCOMP_RET_DATA portion of the return value will be passed
 	as si_errno.
@@ -123,6 +126,18 @@
 	the BPF program return value will be available to the tracer
 	via PTRACE_GETEVENTMSG.
 
+	The tracer can skip the system call by changing the syscall number
+	to -1.  Alternatively, the tracer can change the system call
+	requested by changing the system call to a valid syscall number.  If
+	the tracer asks to skip the system call, then the system call will
+	appear to return the value that the tracer puts in the return value
+	register.
+
+	The seccomp check will not be run again after the tracer is
+	notified.  (This means that seccomp-based sandboxes MUST NOT
+	allow use of ptrace, even of other sandboxed processes, without
+	extreme care; ptracers can use this mechanism to escape.)
+
 SECCOMP_RET_ALLOW:
 	Results in the system call being executed.
 
@@ -161,3 +176,50 @@
 support seccomp filter with minor fixup: SIGSYS support and seccomp return
 value checking.  Then it must just add CONFIG_HAVE_ARCH_SECCOMP_FILTER
 to its arch-specific Kconfig.
+
+
+
+Caveats
+-------
+
+The vDSO can cause some system calls to run entirely in userspace,
+leading to surprises when you run programs on different machines that
+fall back to real syscalls.  To minimize these surprises on x86, make
+sure you test with
+/sys/devices/system/clocksource/clocksource0/current_clocksource set to
+something like acpi_pm.
+
+On x86-64, vsyscall emulation is enabled by default.  (vsyscalls are
+legacy variants on vDSO calls.)  Currently, emulated vsyscalls will honor seccomp, with a few oddities:
+
+- A return value of SECCOMP_RET_TRAP will set a si_call_addr pointing to
+  the vsyscall entry for the given call and not the address after the
+  'syscall' instruction.  Any code which wants to restart the call
+  should be aware that (a) a ret instruction has been emulated and (b)
+  trying to resume the syscall will again trigger the standard vsyscall
+  emulation security checks, making resuming the syscall mostly
+  pointless.
+
+- A return value of SECCOMP_RET_TRACE will signal the tracer as usual,
+  but the syscall may not be changed to another system call using the
+  orig_rax register. It may only be changed to -1 order to skip the
+  currently emulated call. Any other change MAY terminate the process.
+  The rip value seen by the tracer will be the syscall entry address;
+  this is different from normal behavior.  The tracer MUST NOT modify
+  rip or rsp.  (Do not rely on other changes terminating the process.
+  They might work.  For example, on some kernels, choosing a syscall
+  that only exists in future kernels will be correctly emulated (by
+  returning -ENOSYS).
+
+To detect this quirky behavior, check for addr & ~0x0C00 ==
+0xFFFFFFFFFF600000.  (For SECCOMP_RET_TRACE, use rip.  For
+SECCOMP_RET_TRAP, use siginfo->si_call_addr.)  Do not check any other
+condition: future kernels may improve vsyscall emulation and current
+kernels in vsyscall=native mode will behave differently, but the
+instructions at 0xF...F600{0,4,8,C}00 will not be system calls in these
+cases.
+
+Note that modern systems are unlikely to use vsyscalls at all -- they
+are a legacy feature and they are considerably slower than standard
+syscalls.  New code will use the vDSO, and vDSO-issued system calls
+are indistinguishable from normal system calls.

diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX
index eeed1de..414235c 100644
--- a/Documentation/security/00-INDEX
+++ b/Documentation/security/00-INDEX

@@ -12,6 +12,8 @@
 	- documentation on the AppArmor security extension.
 credentials.txt
 	- documentation about credentials in Linux.
+keys-ecryptfs.txt
+	- description of the encryption keys for the ecryptfs filesystem.
 keys-request-key.txt
 	- description of the kernel key request service.
 keys-trusted-encrypted.txt

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 7d9ca92..7b4145d 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt

@@ -994,6 +994,23 @@
     reference pointer if successful.
 
 
+(*) A keyring can be created by:
+
+	struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
+				  const struct cred *cred,
+				  key_perm_t perm,
+				  unsigned long flags,
+				  struct key *dest);
+
+    This creates a keyring with the given attributes and returns it.  If dest
+    is not NULL, the new keyring will be linked into the keyring to which it
+    points.  No permission checks are made upon the destination keyring.
+
+    Error EDQUOT can be returned if the keyring would overload the quota (pass
+    KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted
+    towards the user's quota).  Error ENOMEM can also be returned.
+
+
 (*) To check the validity of a key, this function can be called:
 
 	int validate_key(struct key *key);

diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt
index 4909d41..eceab13 100644
--- a/Documentation/sparse.txt
+++ b/Documentation/sparse.txt

@@ -49,6 +49,24 @@
 __bitwise - noisy stuff; in particular, __le*/__be* are that.  We really
 don't want to drown in noise unless we'd explicitly asked for it.
 
+Using sparse for lock checking
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following macros are undefined for gcc and defined during a sparse
+run to use the "context" tracking feature of sparse, applied to
+locking.  These annotations tell sparse when a lock is held, with
+regard to the annotated function's entry and exit.
+
+__must_hold - The specified lock is held on function entry and exit.
+
+__acquires - The specified lock is held on function exit, but not entry.
+
+__releases - The specified lock is held on function entry, but not exit.
+
+If the function enters and exits without the lock held, acquiring and
+releasing the lock inside the function in a balanced way, no
+annotation is needed.  The tree annotations above are for cases where
+sparse would otherwise report a context imbalance.
 
 Getting sparse
 ~~~~~~~~~~~~~~

diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index f15cb74..406d82d 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt

@@ -373,7 +373,7 @@
 	1  Loadlin
 	2  bootsect-loader	(0x20, all other values reserved)
 	3  Syslinux
-	4  Etherboot/gPXE
+	4  Etherboot/gPXE/iPXE
 	5  ELILO
 	7  GRUB
 	8  U-Boot
@@ -381,6 +381,7 @@
 	A  Gujin
 	B  Qemu
 	C  Arcturus Networks uCbootloader
+	D  kexec-tools
 	E  Extended		(see ext_loader_type)
 	F  Special		(0xFF = undefined)
        10  Reserved

diff --git a/Documentation/xtensa/atomctl.txt b/Documentation/xtensa/atomctl.txt
new file mode 100644
index 0000000..10a8d1f
--- /dev/null
+++ b/Documentation/xtensa/atomctl.txt

@@ -0,0 +1,44 @@
+We Have Atomic Operation Control (ATOMCTL) Register.
+This register determines the effect of using a S32C1I instruction
+with various combinations of:
+
+     1. With and without an Coherent Cache Controller which
+        can do Atomic Transactions to the memory internally.
+
+     2. With and without An Intelligent Memory Controller which
+        can do Atomic Transactions itself.
+
+The Core comes up with a default value of for the three types of cache ops:
+
+      0x28: (WB: Internal, WT: Internal, BY:Exception)
+
+On the FPGA Cards we typically simulate an Intelligent Memory controller
+which can implement  RCW transactions. For FPGA cards with an External
+Memory controller we let it to the atomic operations internally while
+doing a Cached (WB) transaction and use the Memory RCW for un-cached
+operations.
+
+For systems without an coherent cache controller, non-MX, we always
+use the memory controllers RCW, thought non-MX controlers likely
+support the Internal Operation.
+
+CUSTOMER-WARNING:
+   Virtually all customers buy their memory controllers from vendors that
+   don't support atomic RCW memory transactions and will likely want to
+   configure this register to not use RCW.
+
+Developers might find using RCW in Bypass mode convenient when testing
+with the cache being bypassed; for example studying cache alias problems.
+
+See Section 4.3.12.4 of ISA; Bits:
+
+                             WB     WT      BY
+                           5   4 | 3   2 | 1   0
+  2 Bit
+  Field
+  Values     WB - Write Back         WT - Write Thru         BY - Bypass
+---------    ---------------         -----------------     ----------------
+    0        Exception               Exception               Exception
+    1        RCW Transaction         RCW Transaction         RCW Transaction
+    2        Internal Operation      Exception               Reserved
+    3        Reserved                Reserved                Reserved

diff --git a/MAINTAINERS b/MAINTAINERS
index f71d2f9..4e2a1f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -1280,7 +1280,7 @@
 F:	drivers/hwmon/asc7621.c
 
 ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS
-M:	Corentin Chary <corentincj@iksaif.net>
+M:	Corentin Chary <corentin.chary@gmail.com>
 L:	acpi4asus-user@lists.sourceforge.net
 L:	platform-driver-x86@vger.kernel.org
 W:	http://acpi4asus.sf.net
@@ -1929,7 +1929,7 @@
 
 CHINESE DOCUMENTATION
 M:	Harry Wei <harryxiyou@gmail.com>
-L:	xiyoulinuxkernelgroup@googlegroups.com
+L:	xiyoulinuxkernelgroup@googlegroups.com (subscribers-only)
 L:	linux-kernel@zh-kernel.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/zh_CN/
@@ -2549,6 +2549,15 @@
 F:	drivers/gpu/drm/exynos
 F:	include/drm/exynos*
 
+DRM DRIVERS FOR NVIDIA TEGRA
+M:	Thierry Reding <thierry.reding@avionic-design.de>
+L:	dri-devel@lists.freedesktop.org
+L:	linux-tegra@vger.kernel.org
+T:	git git://gitorious.org/thierryreding/linux.git
+S:	Maintained
+F:	drivers/gpu/drm/tegra/
+F:	Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
+
 DSCC4 DRIVER
 M:	Francois Romieu <romieu@fr.zoreil.com>
 L:	netdev@vger.kernel.org
@@ -2973,7 +2982,6 @@
 S:	Maintained
 F:	Documentation/filesystems/ext3.txt
 F:	fs/ext3/
-F:	include/linux/ext3*
 
 EXT4 FILE SYSTEM
 M:	"Theodore Ts'o" <tytso@mit.edu>
@@ -3120,7 +3128,8 @@
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git
 S:	Maintained
 F:	drivers/firewire/
-F:	include/linux/firewire*.h
+F:	include/linux/firewire.h
+F:	include/uapi/linux/firewire*.h
 F:	tools/firewire/
 
 FIRMWARE LOADER (request_firmware)
@@ -3712,7 +3721,7 @@
 M:	"Mark M. Hoffman" <mhoffman@lightlink.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
-F:	drivers/i2c/busses/i2c-stub.c
+F:	drivers/i2c/i2c-stub.c
 
 I2C SUBSYSTEM
 M:	Wolfram Sang <w.sang@pengutronix.de>
@@ -4305,7 +4314,6 @@
 L:	linux-ext4@vger.kernel.org
 S:	Maintained
 F:	fs/jbd/
-F:	include/linux/ext3_jbd.h
 F:	include/linux/jbd.h
 
 JOURNALLING LAYER FOR BLOCK DEVICES (JBD2)
@@ -6483,7 +6491,7 @@
 F:	include/media/saa7146*
 
 SAMSUNG LAPTOP DRIVER
-M:	Corentin Chary <corentincj@iksaif.net>
+M:	Corentin Chary <corentin.chary@gmail.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/samsung-laptop.c
@@ -7537,6 +7545,13 @@
 F:	sound/soc/codecs/lm49453*
 F:	sound/soc/codecs/isabelle*
 
+TI LP855x BACKLIGHT DRIVER
+M:	Milo Kim <milo.kim@ti.com>
+S:	Maintained
+F:	Documentation/backlight/lp855x-driver.txt
+F:	drivers/video/backlight/lp855x_bl.c
+F:	include/linux/platform_data/lp855x.h
+
 TI TWL4030 SERIES SOC CODEC DRIVER
 M:	Peter Ujfalusi <peter.ujfalusi@ti.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)

diff --git a/Makefile b/Makefile
index 540f7b2..4fe0559 100644
--- a/Makefile
+++ b/Makefile

@@ -124,7 +124,7 @@
 PHONY += $(MAKECMDGOALS) sub-make
 
 $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make
-	$(Q)@:
+	@:
 
 sub-make: FORCE
 	$(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \
@@ -981,6 +981,12 @@
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_modinst
 	$(call cmd,depmod)
 
+ifeq ($(CONFIG_MODULE_SIG), y)
+PHONY += modules_sign
+modules_sign:
+	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modsign
+endif
+
 else # CONFIG_MODULES
 
 # Modules not configured
@@ -1021,11 +1027,14 @@
 clean: rm-files := $(CLEAN_FILES)
 clean-dirs      := $(addprefix _clean_, . $(vmlinux-alldirs) Documentation samples)
 
-PHONY += $(clean-dirs) clean archclean
+PHONY += $(clean-dirs) clean archclean vmlinuxclean
 $(clean-dirs):
 	$(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
 
-clean: archclean
+vmlinuxclean:
+	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean
+
+clean: archclean vmlinuxclean
 
 # mrproper - Delete all generated files, including .config
 #
@@ -1252,7 +1261,6 @@
 endif # KBUILD_EXTMOD
 
 clean: $(clean-dirs)
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean
 	$(call cmd,rmdirs)
 	$(call cmd,rmfiles)
 	@find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \

diff --git a/arch/Kconfig b/arch/Kconfig
index 34884fa..7f8f281 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig

@@ -80,6 +80,7 @@
 	bool "Transparent user-space probes (EXPERIMENTAL)"
 	depends on UPROBE_EVENT && PERF_EVENTS
 	default n
+	select PERCPU_RWSEM
 	help
 	  Uprobes is the user-space counterpart to kprobes: they
 	  enable instrumentation applications (such as 'perf probe')
@@ -112,6 +113,25 @@
 	  See Documentation/unaligned-memory-access.txt for more
 	  information on the topic of unaligned memory accesses.
 
+config ARCH_USE_BUILTIN_BSWAP
+       bool
+       help
+	 Modern versions of GCC (since 4.4) have builtin functions
+	 for handling byte-swapping. Using these, instead of the old
+	 inline assembler that the architecture code provides in the
+	 __arch_bswapXX() macros, allows the compiler to see what's
+	 happening and offers more opportunity for optimisation. In
+	 particular, the compiler will be able to combine the byteswap
+	 with a nearby load or store and use load-and-swap or
+	 store-and-swap instructions if the architecture has them. It
+	 should almost *never* result in code which is worse than the
+	 hand-coded assembler in <asm/swab.h>.  But just in case it
+	 does, the use of the builtins is optional.
+
+	 Any architecture with load-and-swap or store-and-swap
+	 instructions should set this. And it shouldn't hurt to set it
+	 on architectures that don't have such instructions.
+
 config HAVE_SYSCALL_WRAPPERS
 	bool
 
@@ -271,12 +291,6 @@
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	bool
 
-config GENERIC_KERNEL_THREAD
-	bool
-
-config GENERIC_KERNEL_EXECVE
-	bool
-
 config HAVE_ARCH_SECCOMP_FILTER
 	bool
 	help
@@ -342,6 +356,9 @@
 	  Modules only use ELF REL relocations.  Modules with ELF RELA
 	  relocations will give an error.
 
+config GENERIC_SIGALTSTACK
+	bool
+
 #
 # ABI hall of shame
 #

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5dd7f5d..9d5904c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig

@@ -20,10 +20,9 @@
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select GENERIC_SIGALTSTACK
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index dcfabb9..a6e85f44 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild

@@ -1,14 +1,5 @@
-include include/asm-generic/Kbuild.asm
 
 generic-y += clkdev.h
 
-header-y += compiler.h
-header-y += console.h
-header-y += fpu.h
-header-y += gentrap.h
-header-y += pal.h
-header-y += reg.h
-header-y += regdef.h
-header-y += sysinfo.h
 generic-y += exec.h
 generic-y += trace_clock.h

diff --git a/arch/alpha/include/asm/a.out.h b/arch/alpha/include/asm/a.out.h
index acdc681..9abbd24 100644
--- a/arch/alpha/include/asm/a.out.h
+++ b/arch/alpha/include/asm/a.out.h

@@ -1,94 +1,8 @@
 #ifndef __ALPHA_A_OUT_H__
 #define __ALPHA_A_OUT_H__
 
-#include <linux/types.h>
+#include <uapi/asm/a.out.h>
 
-/*
- * OSF/1 ECOFF header structs.  ECOFF files consist of:
- * 	- a file header (struct filehdr),
- *	- an a.out header (struct aouthdr),
- *	- one or more section headers (struct scnhdr). 
- *	  The filhdr's "f_nscns" field contains the
- *	  number of section headers.
- */
-
-struct filehdr
-{
-	/* OSF/1 "file" header */
-	__u16 f_magic, f_nscns;
-	__u32 f_timdat;
-	__u64 f_symptr;
-	__u32 f_nsyms;
-	__u16 f_opthdr, f_flags;
-};
-
-struct aouthdr
-{
-	__u64 info;		/* after that it looks quite normal.. */
-	__u64 tsize;
-	__u64 dsize;
-	__u64 bsize;
-	__u64 entry;
-	__u64 text_start;	/* with a few additions that actually make sense */
-	__u64 data_start;
-	__u64 bss_start;
-	__u32 gprmask, fprmask;	/* bitmask of general & floating point regs used in binary */
-	__u64 gpvalue;
-};
-
-struct scnhdr
-{
-	char	s_name[8];
-	__u64	s_paddr;
-	__u64	s_vaddr;
-	__u64	s_size;
-	__u64	s_scnptr;
-	__u64	s_relptr;
-	__u64	s_lnnoptr;
-	__u16	s_nreloc;
-	__u16	s_nlnno;
-	__u32	s_flags;
-};
-
-struct exec
-{
-	/* OSF/1 "file" header */
-	struct filehdr		fh;
-	struct aouthdr		ah;
-};
-
-/*
- * Define's so that the kernel exec code can access the a.out header
- * fields...
- */
-#define	a_info		ah.info
-#define	a_text		ah.tsize
-#define a_data		ah.dsize
-#define a_bss		ah.bsize
-#define a_entry		ah.entry
-#define a_textstart	ah.text_start
-#define	a_datastart	ah.data_start
-#define	a_bssstart	ah.bss_start
-#define	a_gprmask	ah.gprmask
-#define a_fprmask	ah.fprmask
-#define a_gpvalue	ah.gpvalue
-
-#define N_TXTADDR(x) ((x).a_textstart)
-#define N_DATADDR(x) ((x).a_datastart)
-#define N_BSSADDR(x) ((x).a_bssstart)
-#define N_DRSIZE(x) 0
-#define N_TRSIZE(x) 0
-#define N_SYMSIZE(x) 0
-
-#define AOUTHSZ		sizeof(struct aouthdr)
-#define SCNHSZ		sizeof(struct scnhdr)
-#define SCNROUND	16
-
-#define N_TXTOFF(x) \
-  ((long) N_MAGIC(x) == ZMAGIC ? 0 : \
-   (sizeof(struct exec) + (x).fh.f_nscns*SCNHSZ + SCNROUND - 1) & ~(SCNROUND - 1))
-
-#ifdef __KERNEL__
 
 /* Assume that start addresses below 4G belong to a TASO application.
    Unfortunately, there is no proper bit in the exec header to check.
@@ -98,5 +12,4 @@
 	set_personality (((BFPM->taso || EX.ah.entry < 0x100000000L \
 			   ? ADDR_LIMIT_32BIT : 0) | PER_OSF4))
 
-#endif /* __KERNEL__ */
 #endif /* __A_OUT_GNU_H__ */

diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h
index da6bb19..a7720b9 100644
--- a/arch/alpha/include/asm/compiler.h
+++ b/arch/alpha/include/asm/compiler.h

@@ -1,119 +1,8 @@
 #ifndef __ALPHA_COMPILER_H
 #define __ALPHA_COMPILER_H
 
-/* 
- * Herein are macros we use when describing various patterns we want to GCC.
- * In all cases we can get better schedules out of the compiler if we hide
- * as little as possible inside inline assembly.  However, we want to be
- * able to know what we'll get out before giving up inline assembly.  Thus
- * these tests and macros.
- */
+#include <uapi/asm/compiler.h>
 
-#if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3
-# define __kernel_insbl(val, shift)	__builtin_alpha_insbl(val, shift)
-# define __kernel_inswl(val, shift)	__builtin_alpha_inswl(val, shift)
-# define __kernel_insql(val, shift)	__builtin_alpha_insql(val, shift)
-# define __kernel_inslh(val, shift)	__builtin_alpha_inslh(val, shift)
-# define __kernel_extbl(val, shift)	__builtin_alpha_extbl(val, shift)
-# define __kernel_extwl(val, shift)	__builtin_alpha_extwl(val, shift)
-# define __kernel_cmpbge(a, b)		__builtin_alpha_cmpbge(a, b)
-#else
-# define __kernel_insbl(val, shift)					\
-  ({ unsigned long __kir;						\
-     __asm__("insbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
-     __kir; })
-# define __kernel_inswl(val, shift)					\
-  ({ unsigned long __kir;						\
-     __asm__("inswl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
-     __kir; })
-# define __kernel_insql(val, shift)					\
-  ({ unsigned long __kir;						\
-     __asm__("insql %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
-     __kir; })
-# define __kernel_inslh(val, shift)					\
-  ({ unsigned long __kir;						\
-     __asm__("inslh %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
-     __kir; })
-# define __kernel_extbl(val, shift)					\
-  ({ unsigned long __kir;						\
-     __asm__("extbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
-     __kir; })
-# define __kernel_extwl(val, shift)					\
-  ({ unsigned long __kir;						\
-     __asm__("extwl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
-     __kir; })
-# define __kernel_cmpbge(a, b)						\
-  ({ unsigned long __kir;						\
-     __asm__("cmpbge %r2,%1,%0" : "=r"(__kir) : "rI"(b), "rJ"(a));	\
-     __kir; })
-#endif
-
-#ifdef __alpha_cix__
-# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3
-#  define __kernel_cttz(x)		__builtin_ctzl(x)
-#  define __kernel_ctlz(x)		__builtin_clzl(x)
-#  define __kernel_ctpop(x)		__builtin_popcountl(x)
-# else
-#  define __kernel_cttz(x)						\
-   ({ unsigned long __kir;						\
-      __asm__("cttz %1,%0" : "=r"(__kir) : "r"(x));			\
-      __kir; })
-#  define __kernel_ctlz(x)						\
-   ({ unsigned long __kir;						\
-      __asm__("ctlz %1,%0" : "=r"(__kir) : "r"(x));			\
-      __kir; })
-#  define __kernel_ctpop(x)						\
-   ({ unsigned long __kir;						\
-      __asm__("ctpop %1,%0" : "=r"(__kir) : "r"(x));			\
-      __kir; })
-# endif
-#else
-# define __kernel_cttz(x)						\
-  ({ unsigned long __kir;						\
-     __asm__(".arch ev67; cttz %1,%0" : "=r"(__kir) : "r"(x));		\
-     __kir; })
-# define __kernel_ctlz(x)						\
-  ({ unsigned long __kir;						\
-     __asm__(".arch ev67; ctlz %1,%0" : "=r"(__kir) : "r"(x));		\
-     __kir; })
-# define __kernel_ctpop(x)						\
-  ({ unsigned long __kir;						\
-     __asm__(".arch ev67; ctpop %1,%0" : "=r"(__kir) : "r"(x));		\
-     __kir; })
-#endif
-
-
-/* 
- * Beginning with EGCS 1.1, GCC defines __alpha_bwx__ when the BWX 
- * extension is enabled.  Previous versions did not define anything
- * we could test during compilation -- too bad, so sad.
- */
-
-#if defined(__alpha_bwx__)
-#define __kernel_ldbu(mem)	(mem)
-#define __kernel_ldwu(mem)	(mem)
-#define __kernel_stb(val,mem)	((mem) = (val))
-#define __kernel_stw(val,mem)	((mem) = (val))
-#else
-#define __kernel_ldbu(mem)				\
-  ({ unsigned char __kir;				\
-     __asm__(".arch ev56;				\
-	      ldbu %0,%1" : "=r"(__kir) : "m"(mem));	\
-     __kir; })
-#define __kernel_ldwu(mem)				\
-  ({ unsigned short __kir;				\
-     __asm__(".arch ev56;				\
-	      ldwu %0,%1" : "=r"(__kir) : "m"(mem));	\
-     __kir; })
-#define __kernel_stb(val,mem)				\
-  __asm__(".arch ev56;					\
-	   stb %1,%0" : "=m"(mem) : "r"(val))
-#define __kernel_stw(val,mem)				\
-  __asm__(".arch ev56;					\
-	   stw %1,%0" : "=m"(mem) : "r"(val))
-#endif
-
-#ifdef __KERNEL__
 /* Some idiots over in <linux/compiler.h> thought inline should imply
    always_inline.  This breaks stuff.  We'll include this file whenever
    we run into such problems.  */
@@ -125,6 +14,4 @@
 #undef __always_inline
 #define __always_inline		inline __attribute__((always_inline))
 
-#endif /* __KERNEL__ */
-
 #endif /* __ALPHA_COMPILER_H */

diff --git a/arch/alpha/include/asm/console.h b/arch/alpha/include/asm/console.h
index a3ce4e6..f2b584f 100644
--- a/arch/alpha/include/asm/console.h
+++ b/arch/alpha/include/asm/console.h

@@ -1,52 +1,8 @@
 #ifndef __AXP_CONSOLE_H
 #define __AXP_CONSOLE_H
 
-/*
- * Console callback routine numbers
- */
-#define CCB_GETC		0x01
-#define CCB_PUTS		0x02
-#define CCB_RESET_TERM		0x03
-#define CCB_SET_TERM_INT	0x04
-#define CCB_SET_TERM_CTL	0x05
-#define CCB_PROCESS_KEYCODE	0x06
-#define CCB_OPEN_CONSOLE	0x07
-#define CCB_CLOSE_CONSOLE	0x08
+#include <uapi/asm/console.h>
 
-#define CCB_OPEN		0x10
-#define CCB_CLOSE		0x11
-#define CCB_IOCTL		0x12
-#define CCB_READ		0x13
-#define CCB_WRITE		0x14
-
-#define CCB_SET_ENV		0x20
-#define CCB_RESET_ENV		0x21
-#define CCB_GET_ENV		0x22
-#define CCB_SAVE_ENV		0x23
-
-#define CCB_PSWITCH		0x30
-#define CCB_BIOS_EMUL		0x32
-
-/*
- * Environment variable numbers
- */
-#define ENV_AUTO_ACTION		0x01
-#define ENV_BOOT_DEV		0x02
-#define ENV_BOOTDEF_DEV		0x03
-#define ENV_BOOTED_DEV		0x04
-#define ENV_BOOT_FILE		0x05
-#define ENV_BOOTED_FILE		0x06
-#define ENV_BOOT_OSFLAGS	0x07
-#define ENV_BOOTED_OSFLAGS	0x08
-#define ENV_BOOT_RESET		0x09
-#define ENV_DUMP_DEV		0x0A
-#define ENV_ENABLE_AUDIT	0x0B
-#define ENV_LICENSE		0x0C
-#define ENV_CHAR_SET		0x0D
-#define ENV_LANGUAGE		0x0E
-#define ENV_TTY_DEV		0x0F
-
-#ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 extern long callback_puts(long unit, const char *s, long length);
 extern long callback_getc(long unit);
@@ -70,6 +26,4 @@
 extern int callback_init_done;
 extern void * callback_init(void *);
 #endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
 #endif /* __AXP_CONSOLE_H */

diff --git a/arch/alpha/include/asm/fpu.h b/arch/alpha/include/asm/fpu.h
index e477bcd..71c2095 100644
--- a/arch/alpha/include/asm/fpu.h
+++ b/arch/alpha/include/asm/fpu.h

@@ -1,128 +1,8 @@
 #ifndef __ASM_ALPHA_FPU_H
 #define __ASM_ALPHA_FPU_H
 
-#ifdef __KERNEL__
 #include <asm/special_insns.h>
-#endif
-
-/*
- * Alpha floating-point control register defines:
- */
-#define FPCR_DNOD	(1UL<<47)	/* denorm INV trap disable */
-#define FPCR_DNZ	(1UL<<48)	/* denorms to zero */
-#define FPCR_INVD	(1UL<<49)	/* invalid op disable (opt.) */
-#define FPCR_DZED	(1UL<<50)	/* division by zero disable (opt.) */
-#define FPCR_OVFD	(1UL<<51)	/* overflow disable (optional) */
-#define FPCR_INV	(1UL<<52)	/* invalid operation */
-#define FPCR_DZE	(1UL<<53)	/* division by zero */
-#define FPCR_OVF	(1UL<<54)	/* overflow */
-#define FPCR_UNF	(1UL<<55)	/* underflow */
-#define FPCR_INE	(1UL<<56)	/* inexact */
-#define FPCR_IOV	(1UL<<57)	/* integer overflow */
-#define FPCR_UNDZ	(1UL<<60)	/* underflow to zero (opt.) */
-#define FPCR_UNFD	(1UL<<61)	/* underflow disable (opt.) */
-#define FPCR_INED	(1UL<<62)	/* inexact disable (opt.) */
-#define FPCR_SUM	(1UL<<63)	/* summary bit */
-
-#define FPCR_DYN_SHIFT	58		/* first dynamic rounding mode bit */
-#define FPCR_DYN_CHOPPED (0x0UL << FPCR_DYN_SHIFT)	/* towards 0 */
-#define FPCR_DYN_MINUS	 (0x1UL << FPCR_DYN_SHIFT)	/* towards -INF */
-#define FPCR_DYN_NORMAL	 (0x2UL << FPCR_DYN_SHIFT)	/* towards nearest */
-#define FPCR_DYN_PLUS	 (0x3UL << FPCR_DYN_SHIFT)	/* towards +INF */
-#define FPCR_DYN_MASK	 (0x3UL << FPCR_DYN_SHIFT)
-
-#define FPCR_MASK	0xffff800000000000L
-
-/*
- * IEEE trap enables are implemented in software.  These per-thread
- * bits are stored in the "ieee_state" field of "struct thread_info".
- * Thus, the bits are defined so as not to conflict with the
- * floating-point enable bit (which is architected).  On top of that,
- * we want to make these bits compatible with OSF/1 so
- * ieee_set_fp_control() etc. can be implemented easily and
- * compatibly.  The corresponding definitions are in
- * /usr/include/machine/fpu.h under OSF/1.
- */
-#define IEEE_TRAP_ENABLE_INV	(1UL<<1)	/* invalid op */
-#define IEEE_TRAP_ENABLE_DZE	(1UL<<2)	/* division by zero */
-#define IEEE_TRAP_ENABLE_OVF	(1UL<<3)	/* overflow */
-#define IEEE_TRAP_ENABLE_UNF	(1UL<<4)	/* underflow */
-#define IEEE_TRAP_ENABLE_INE	(1UL<<5)	/* inexact */
-#define IEEE_TRAP_ENABLE_DNO	(1UL<<6)	/* denorm */
-#define IEEE_TRAP_ENABLE_MASK	(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\
-				 IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\
-				 IEEE_TRAP_ENABLE_INE | IEEE_TRAP_ENABLE_DNO)
-
-/* Denorm and Underflow flushing */
-#define IEEE_MAP_DMZ		(1UL<<12)	/* Map denorm inputs to zero */
-#define IEEE_MAP_UMZ		(1UL<<13)	/* Map underflowed outputs to zero */
-
-#define IEEE_MAP_MASK		(IEEE_MAP_DMZ | IEEE_MAP_UMZ)
-
-/* status bits coming from fpcr: */
-#define IEEE_STATUS_INV		(1UL<<17)
-#define IEEE_STATUS_DZE		(1UL<<18)
-#define IEEE_STATUS_OVF		(1UL<<19)
-#define IEEE_STATUS_UNF		(1UL<<20)
-#define IEEE_STATUS_INE		(1UL<<21)
-#define IEEE_STATUS_DNO		(1UL<<22)
-
-#define IEEE_STATUS_MASK	(IEEE_STATUS_INV | IEEE_STATUS_DZE |	\
-				 IEEE_STATUS_OVF | IEEE_STATUS_UNF |	\
-				 IEEE_STATUS_INE | IEEE_STATUS_DNO)
-
-#define IEEE_SW_MASK		(IEEE_TRAP_ENABLE_MASK |		\
-				 IEEE_STATUS_MASK | IEEE_MAP_MASK)
-
-#define IEEE_CURRENT_RM_SHIFT	32
-#define IEEE_CURRENT_RM_MASK	(3UL<<IEEE_CURRENT_RM_SHIFT)
-
-#define IEEE_STATUS_TO_EXCSUM_SHIFT	16
-
-#define IEEE_INHERIT    (1UL<<63)	/* inherit on thread create? */
-
-/*
- * Convert the software IEEE trap enable and status bits into the
- * hardware fpcr format. 
- *
- * Digital Unix engineers receive my thanks for not defining the
- * software bits identical to the hardware bits.  The chip designers
- * receive my thanks for making all the not-implemented fpcr bits
- * RAZ forcing us to use system calls to read/write this value.
- */
-
-static inline unsigned long
-ieee_swcr_to_fpcr(unsigned long sw)
-{
-	unsigned long fp;
-	fp = (sw & IEEE_STATUS_MASK) << 35;
-	fp |= (sw & IEEE_MAP_DMZ) << 36;
-	fp |= (sw & IEEE_STATUS_MASK ? FPCR_SUM : 0);
-	fp |= (~sw & (IEEE_TRAP_ENABLE_INV
-		      | IEEE_TRAP_ENABLE_DZE
-		      | IEEE_TRAP_ENABLE_OVF)) << 48;
-	fp |= (~sw & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE)) << 57;
-	fp |= (sw & IEEE_MAP_UMZ ? FPCR_UNDZ | FPCR_UNFD : 0);
-	fp |= (~sw & IEEE_TRAP_ENABLE_DNO) << 41;
-	return fp;
-}
-
-static inline unsigned long
-ieee_fpcr_to_swcr(unsigned long fp)
-{
-	unsigned long sw;
-	sw = (fp >> 35) & IEEE_STATUS_MASK;
-	sw |= (fp >> 36) & IEEE_MAP_DMZ;
-	sw |= (~fp >> 48) & (IEEE_TRAP_ENABLE_INV
-			     | IEEE_TRAP_ENABLE_DZE
-			     | IEEE_TRAP_ENABLE_OVF);
-	sw |= (~fp >> 57) & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE);
-	sw |= (fp >> 47) & IEEE_MAP_UMZ;
-	sw |= (~fp >> 41) & IEEE_TRAP_ENABLE_DNO;
-	return sw;
-}
-
-#ifdef __KERNEL__
+#include <uapi/asm/fpu.h>
 
 /* The following two functions don't need trapb/excb instructions
    around the mf_fpcr/mt_fpcr instructions because (a) the kernel
@@ -192,6 +72,4 @@
 extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
 extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
 
-#endif /* __KERNEL__ */
-
 #endif /* __ASM_ALPHA_FPU_H */

diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h
index 6699ee5..6fcd2b5 100644
--- a/arch/alpha/include/asm/pal.h
+++ b/arch/alpha/include/asm/pal.h

@@ -1,54 +1,8 @@
 #ifndef __ALPHA_PAL_H
 #define __ALPHA_PAL_H
 
-/*
- * Common PAL-code
- */
-#define PAL_halt	  0
-#define PAL_cflush	  1
-#define PAL_draina	  2
-#define PAL_bpt		128
-#define PAL_bugchk	129
-#define PAL_chmk	131
-#define PAL_callsys	131
-#define PAL_imb		134
-#define PAL_rduniq	158
-#define PAL_wruniq	159
-#define PAL_gentrap	170
-#define PAL_nphalt	190
+#include <uapi/asm/pal.h>
 
-/*
- * VMS specific PAL-code
- */
-#define PAL_swppal	10
-#define PAL_mfpr_vptb	41
-
-/*
- * OSF specific PAL-code
- */
-#define PAL_cserve	 9
-#define PAL_wripir	13
-#define PAL_rdmces	16
-#define PAL_wrmces	17
-#define PAL_wrfen	43
-#define PAL_wrvptptr	45
-#define PAL_jtopal	46
-#define PAL_swpctx	48
-#define PAL_wrval	49
-#define PAL_rdval	50
-#define PAL_tbi		51
-#define PAL_wrent	52
-#define PAL_swpipl	53
-#define PAL_rdps	54
-#define PAL_wrkgp	55
-#define PAL_wrusp	56
-#define PAL_wrperfmon	57
-#define PAL_rdusp	58
-#define PAL_whami	60
-#define PAL_retsys	61
-#define PAL_rti		63
-
-#ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
 extern void halt(void) __attribute__((noreturn));
@@ -158,6 +112,4 @@
 #define tbia()		__tbi(-2, /* no second argument */)
 
 #endif /* !__ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
 #endif /* __ALPHA_PAL_H */

diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h
index e691ecf..bf46af5 100644
--- a/arch/alpha/include/asm/param.h
+++ b/arch/alpha/include/asm/param.h

@@ -1,27 +1,9 @@
 #ifndef _ASM_ALPHA_PARAM_H
 #define _ASM_ALPHA_PARAM_H
 
-/* ??? Gross.  I don't want to parameterize this, and supposedly the
-   hardware ignores reprogramming.  We also need userland buy-in to the 
-   change in HZ, since this is visible in the wait4 resources etc.  */
+#include <uapi/asm/param.h>
 
-#ifdef __KERNEL__
 #define HZ		CONFIG_HZ
 #define USER_HZ		HZ
-#else
-#define HZ		1024
-#endif
-
-#define EXEC_PAGESIZE	8192
-
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
-#ifdef __KERNEL__
 # define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
-#endif
-
 #endif /* _ASM_ALPHA_PARAM_H */

diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index b4c5b2f..2112850 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h

@@ -1,77 +1,14 @@
 #ifndef _ASMAXP_PTRACE_H
 #define _ASMAXP_PTRACE_H
 
+#include <uapi/asm/ptrace.h>
 
-/*
- * This struct defines the way the registers are stored on the
- * kernel stack during a system call or other kernel entry
- *
- * NOTE! I want to minimize the overhead of system calls, so this
- * struct has as little information as possible.  I does not have
- *
- *  - floating point regs: the kernel doesn't change those
- *  - r9-15: saved by the C compiler
- *
- * This makes "fork()" and "exec()" a bit more complex, but should
- * give us low system call latency.
- */
-
-struct pt_regs {
-	unsigned long r0;
-	unsigned long r1;
-	unsigned long r2;
-	unsigned long r3;
-	unsigned long r4;
-	unsigned long r5;
-	unsigned long r6;
-	unsigned long r7;
-	unsigned long r8;
-	unsigned long r19;
-	unsigned long r20;
-	unsigned long r21;
-	unsigned long r22;
-	unsigned long r23;
-	unsigned long r24;
-	unsigned long r25;
-	unsigned long r26;
-	unsigned long r27;
-	unsigned long r28;
-	unsigned long hae;
-/* JRP - These are the values provided to a0-a2 by PALcode */
-	unsigned long trap_a0;
-	unsigned long trap_a1;
-	unsigned long trap_a2;
-/* These are saved by PAL-code: */
-	unsigned long ps;
-	unsigned long pc;
-	unsigned long gp;
-	unsigned long r16;
-	unsigned long r17;
-	unsigned long r18;
-};
-
-/*
- * This is the extended stack used by signal handlers and the context
- * switcher: it's pushed after the normal "struct pt_regs".
- */
-struct switch_stack {
-	unsigned long r9;
-	unsigned long r10;
-	unsigned long r11;
-	unsigned long r12;
-	unsigned long r13;
-	unsigned long r14;
-	unsigned long r15;
-	unsigned long r26;
-	unsigned long fp[32];	/* fp[31] is fpcr */
-};
-
-#ifdef __KERNEL__
 
 #define arch_has_single_step()		(1)
 #define user_mode(regs) (((regs)->ps & 8) != 0)
 #define instruction_pointer(regs) ((regs)->pc)
 #define profile_pc(regs) instruction_pointer(regs)
+#define current_user_stack_pointer() rdusp()
 
 #define task_pt_regs(task) \
   ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1)
@@ -83,5 +20,3 @@
 #define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
 
 #endif
-
-#endif

diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
index 4555286..8a1ac28 100644
--- a/arch/alpha/include/asm/signal.h
+++ b/arch/alpha/include/asm/signal.h

@@ -1,12 +1,8 @@
 #ifndef _ASMAXP_SIGNAL_H
 #define _ASMAXP_SIGNAL_H
 
-#include <linux/types.h>
+#include <uapi/asm/signal.h>
 
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-#ifdef __KERNEL__
 /* Digital Unix defines 64 signals.  Most things should be clean enough
    to redefine this at will, if care is taken to make libc match.  */
 
@@ -20,100 +16,6 @@
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-#define NSIG		32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
-
-
-/*
- * Linux/AXP has different signal numbers that Linux/i386: I'm trying
- * to make it OSF/1 binary compatible, at least for normal binaries.
- */
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGEMT		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGBUS		10
-#define SIGSEGV		11
-#define SIGSYS		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGURG		16
-#define SIGSTOP		17
-#define SIGTSTP		18
-#define SIGCONT		19
-#define SIGCHLD		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGIO		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGINFO		29
-#define SIGUSR1		30
-#define SIGUSR2		31
-
-#define SIGPOLL	SIGIO
-#define SIGPWR	SIGINFO
-#define SIGIOT	SIGABRT
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	32
-#define SIGRTMAX	_NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-
-#define SA_ONSTACK	0x00000001
-#define SA_RESTART	0x00000002
-#define SA_NOCLDSTOP	0x00000004
-#define SA_NODEFER	0x00000008
-#define SA_RESETHAND	0x00000010
-#define SA_NOCLDWAIT	0x00000020
-#define SA_SIGINFO	0x00000040
-
-#define SA_ONESHOT	SA_RESETHAND
-#define SA_NOMASK	SA_NODEFER
-
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	4096
-#define SIGSTKSZ	16384
-
-#define SIG_BLOCK          1	/* for blocking signals */
-#define SIG_UNBLOCK        2	/* for unblocking signals */
-#define SIG_SETMASK        3	/* for setting the signal mask */
-
-#include <asm-generic/signal-defs.h>
-
-#ifdef __KERNEL__
 struct osf_sigaction {
 	__sighandler_t	sa_handler;
 	old_sigset_t	sa_mask;
@@ -130,40 +32,5 @@
 	struct sigaction sa;
 	__sigrestore_t ka_restorer;
 };
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-struct sigaction {
-	union {
-	  __sighandler_t	_sa_handler;
-	  void (*_sa_sigaction)(int, struct siginfo *, void *);
-	} _u;
-	sigset_t	sa_mask;
-	int		sa_flags;
-};
-
-#define sa_handler	_u._sa_handler
-#define sa_sigaction	_u._sa_sigaction
-
-#endif /* __KERNEL__ */
-
-typedef struct sigaltstack {
-	void __user *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-/* sigstack(2) is deprecated, and will be withdrawn in a future version
-   of the X/Open CAE Specification.  Use sigaltstack instead.  It is only
-   implemented here for OSF/1 compatibility.  */
-
-struct sigstack {
-	void __user *ss_sp;
-	int ss_onstack;
-};
-
-#ifdef __KERNEL__
 #include <asm/sigcontext.h>
 #endif
-
-#endif

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 0087d05..8d806d8 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h

@@ -1,87 +1,10 @@
 #ifndef _ASM_SOCKET_H
 #define _ASM_SOCKET_H
 
-#include <asm/sockios.h>
+#include <uapi/asm/socket.h>
 
-/* For setsockopt(2) */
-/*
- * Note: we only bother about making the SOL_SOCKET options
- * same as OSF/1, as that's all that "normal" programs are
- * likely to set.  We don't necessarily want to be binary
- * compatible with _everything_. 
- */
-#define SOL_SOCKET	0xffff
-
-#define SO_DEBUG	0x0001
-#define SO_REUSEADDR	0x0004
-#define SO_KEEPALIVE	0x0008
-#define SO_DONTROUTE	0x0010
-#define SO_BROADCAST	0x0020
-#define SO_LINGER	0x0080
-#define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
-
-#define SO_TYPE		0x1008
-#define SO_ERROR	0x1007
-#define SO_SNDBUF	0x1001
-#define SO_RCVBUF	0x1002
-#define SO_SNDBUFFORCE	0x100a
-#define SO_RCVBUFFORCE	0x100b
-#define	SO_RCVLOWAT	0x1010
-#define	SO_SNDLOWAT	0x1011
-#define	SO_RCVTIMEO	0x1012
-#define	SO_SNDTIMEO	0x1013
-#define SO_ACCEPTCONN	0x1014
-#define SO_PROTOCOL	0x1028
-#define SO_DOMAIN	0x1029
-
-/* linux-specific, might as well be the same as on i386 */
-#define SO_NO_CHECK	11
-#define SO_PRIORITY	12
-#define SO_BSDCOMPAT	14
-
-#define SO_PASSCRED	17
-#define SO_PEERCRED	18
-#define SO_BINDTODEVICE 25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER        26
-#define SO_DETACH_FILTER        27
-#define SO_GET_FILTER		SO_ATTACH_FILTER
-
-#define SO_PEERNAME		28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
-
-#define SO_PEERSEC		30
-#define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION		19
-#define SO_SECURITY_ENCRYPTION_TRANSPORT	20
-#define SO_SECURITY_ENCRYPTION_NETWORK		21
-
-#define SO_MARK			36
-
-#define SO_TIMESTAMPING		37
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
-#define SO_RXQ_OVFL             40
-
-#define SO_WIFI_STATUS		41
-#define SCM_WIFI_STATUS		SO_WIFI_STATUS
-#define SO_PEEK_OFF		42
-
-/* Instruct lower device to use last 4-bytes of skb data as FCS */
-#define SO_NOFCS		43
-
-#ifdef __KERNEL__
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
 #define SOCK_NONBLOCK	0x40000000
-#endif /* __KERNEL__ */
-
 #endif /* _ASM_SOCKET_H */

diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h
index fa13716..7fde0f8 100644
--- a/arch/alpha/include/asm/termios.h
+++ b/arch/alpha/include/asm/termios.h

@@ -1,72 +1,8 @@
 #ifndef _ALPHA_TERMIOS_H
 #define _ALPHA_TERMIOS_H
 
-#include <asm/ioctls.h>
-#include <asm/termbits.h>
+#include <uapi/asm/termios.h>
 
-struct sgttyb {
-	char	sg_ispeed;
-	char	sg_ospeed;
-	char	sg_erase;
-	char	sg_kill;
-	short	sg_flags;
-};
-
-struct tchars {
-	char	t_intrc;
-	char	t_quitc;
-	char	t_startc;
-	char	t_stopc;
-	char	t_eofc;
-	char	t_brkc;
-};
-
-struct ltchars {
-	char	t_suspc;
-	char	t_dsuspc;
-	char	t_rprntc;
-	char	t_flushc;
-	char	t_werasc;
-	char	t_lnextc;
-};
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/*
- * c_cc characters in the termio structure.  Oh, how I love being
- * backwardly compatible.  Notice that character 4 and 5 are
- * interpreted differently depending on whether ICANON is set in
- * c_lflag.  If it's set, they are used as _VEOF and _VEOL, otherwise
- * as _VMIN and V_TIME.  This is for compatibility with OSF/1 (which
- * is compatible with sysV)...
- */
-#define _VINTR	0
-#define _VQUIT	1
-#define _VERASE	2
-#define _VKILL	3
-#define _VEOF	4
-#define _VMIN	4
-#define _VEOL	5
-#define _VTIME	5
-#define _VEOL2	6
-#define _VSWTC	7
-
-#ifdef __KERNEL__
 /*	eof=^D		eol=\0		eol2=\0		erase=del
 	werase=^W	kill=^U		reprint=^R	sxtc=\0
 	intr=^C		quit=^\		susp=^Z		<OSF/1 VDSUSP>
@@ -141,6 +77,4 @@
 #define kernel_termios_to_user_termios(u, k) \
 	copy_to_user(u, k, sizeof(struct termios))
 
-#endif	/* __KERNEL__ */
-
 #endif	/* _ALPHA_TERMIOS_H */

diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index 0a05790..f61e1a5 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h

@@ -1,18 +1,7 @@
 #ifndef _ALPHA_TYPES_H
 #define _ALPHA_TYPES_H
 
-/*
- * This file is never included by application software unless
- * explicitly requested (e.g., via linux/types.h) in which case the
- * application is Linux specific so (user-) name space pollution is
- * not a major issue.  However, for interoperability, libraries still
- * need to be careful to avoid a name clashes.
- */
-
-#ifdef __KERNEL__
 #include <asm-generic/int-ll64.h>
-#else
-#include <asm-generic/int-l64.h>
-#endif
+#include <uapi/asm/types.h>
 
 #endif /* _ALPHA_TYPES_H */

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index eb3a466..b3396ee 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h

@@ -1,474 +1,8 @@
 #ifndef _ALPHA_UNISTD_H
 #define _ALPHA_UNISTD_H
 
-#define __NR_osf_syscall	  0	/* not implemented */
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_osf_old_open	  5	/* not implemented */
-#define __NR_close		  6
-#define __NR_osf_wait4		  7
-#define __NR_osf_old_creat	  8	/* not implemented */
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_osf_execve		 11	/* not implemented */
-#define __NR_chdir		 12
-#define __NR_fchdir		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_chown		 16
-#define __NR_brk		 17
-#define __NR_osf_getfsstat	 18	/* not implemented */
-#define __NR_lseek		 19
-#define __NR_getxpid		 20
-#define __NR_osf_mount		 21
-#define __NR_umount		 22
-#define __NR_setuid		 23
-#define __NR_getxuid		 24
-#define __NR_exec_with_loader	 25	/* not implemented */
-#define __NR_ptrace		 26
-#define __NR_osf_nrecvmsg	 27	/* not implemented */
-#define __NR_osf_nsendmsg	 28	/* not implemented */
-#define __NR_osf_nrecvfrom	 29	/* not implemented */
-#define __NR_osf_naccept	 30	/* not implemented */
-#define __NR_osf_ngetpeername	 31	/* not implemented */
-#define __NR_osf_ngetsockname	 32	/* not implemented */
-#define __NR_access		 33
-#define __NR_osf_chflags	 34	/* not implemented */
-#define __NR_osf_fchflags	 35	/* not implemented */
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_osf_old_stat	 38	/* not implemented */
-#define __NR_setpgid		 39
-#define __NR_osf_old_lstat	 40	/* not implemented */
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_osf_set_program_attributes	43
-#define __NR_osf_profil		 44	/* not implemented */
-#define __NR_open		 45
-#define __NR_osf_old_sigaction	 46	/* not implemented */
-#define __NR_getxgid		 47
-#define __NR_osf_sigprocmask	 48
-#define __NR_osf_getlogin	 49	/* not implemented */
-#define __NR_osf_setlogin	 50	/* not implemented */
-#define __NR_acct		 51
-#define __NR_sigpending		 52
+#include <uapi/asm/unistd.h>
 
-#define __NR_ioctl		 54
-#define __NR_osf_reboot		 55	/* not implemented */
-#define __NR_osf_revoke		 56	/* not implemented */
-#define __NR_symlink		 57
-#define __NR_readlink		 58
-#define __NR_execve		 59
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_osf_old_fstat	 62	/* not implemented */
-#define __NR_getpgrp		 63
-#define __NR_getpagesize	 64
-#define __NR_osf_mremap		 65	/* not implemented */
-#define __NR_vfork		 66
-#define __NR_stat		 67
-#define __NR_lstat		 68
-#define __NR_osf_sbrk		 69	/* not implemented */
-#define __NR_osf_sstk		 70	/* not implemented */
-#define __NR_mmap		 71	/* OSF/1 mmap is superset of Linux */
-#define __NR_osf_old_vadvise	 72	/* not implemented */
-#define __NR_munmap		 73
-#define __NR_mprotect		 74
-#define __NR_madvise		 75
-#define __NR_vhangup		 76
-#define __NR_osf_kmodcall	 77	/* not implemented */
-#define __NR_osf_mincore	 78	/* not implemented */
-#define __NR_getgroups		 79
-#define __NR_setgroups		 80
-#define __NR_osf_old_getpgrp	 81	/* not implemented */
-#define __NR_setpgrp		 82	/* BSD alias for setpgid */
-#define __NR_osf_setitimer	 83
-#define __NR_osf_old_wait	 84	/* not implemented */
-#define __NR_osf_table		 85	/* not implemented */
-#define __NR_osf_getitimer	 86
-#define __NR_gethostname	 87
-#define __NR_sethostname	 88
-#define __NR_getdtablesize	 89
-#define __NR_dup2		 90
-#define __NR_fstat		 91
-#define __NR_fcntl		 92
-#define __NR_osf_select		 93
-#define __NR_poll		 94
-#define __NR_fsync		 95
-#define __NR_setpriority	 96
-#define __NR_socket		 97
-#define __NR_connect		 98
-#define __NR_accept		 99
-#define __NR_getpriority	100
-#define __NR_send		101
-#define __NR_recv		102
-#define __NR_sigreturn		103
-#define __NR_bind		104
-#define __NR_setsockopt		105
-#define __NR_listen		106
-#define __NR_osf_plock		107	/* not implemented */
-#define __NR_osf_old_sigvec	108	/* not implemented */
-#define __NR_osf_old_sigblock	109	/* not implemented */
-#define __NR_osf_old_sigsetmask	110	/* not implemented */
-#define __NR_sigsuspend		111
-#define __NR_osf_sigstack	112
-#define __NR_recvmsg		113
-#define __NR_sendmsg		114
-#define __NR_osf_old_vtrace	115	/* not implemented */
-#define __NR_osf_gettimeofday	116
-#define __NR_osf_getrusage	117
-#define __NR_getsockopt		118
-
-#define __NR_readv		120
-#define __NR_writev		121
-#define __NR_osf_settimeofday	122
-#define __NR_fchown		123
-#define __NR_fchmod		124
-#define __NR_recvfrom		125
-#define __NR_setreuid		126
-#define __NR_setregid		127
-#define __NR_rename		128
-#define __NR_truncate		129
-#define __NR_ftruncate		130
-#define __NR_flock		131
-#define __NR_setgid		132
-#define __NR_sendto		133
-#define __NR_shutdown		134
-#define __NR_socketpair		135
-#define __NR_mkdir		136
-#define __NR_rmdir		137
-#define __NR_osf_utimes		138
-#define __NR_osf_old_sigreturn	139	/* not implemented */
-#define __NR_osf_adjtime	140	/* not implemented */
-#define __NR_getpeername	141
-#define __NR_osf_gethostid	142	/* not implemented */
-#define __NR_osf_sethostid	143	/* not implemented */
-#define __NR_getrlimit		144
-#define __NR_setrlimit		145
-#define __NR_osf_old_killpg	146	/* not implemented */
-#define __NR_setsid		147
-#define __NR_quotactl		148
-#define __NR_osf_oldquota	149	/* not implemented */
-#define __NR_getsockname	150
-
-#define __NR_osf_pid_block	153	/* not implemented */
-#define __NR_osf_pid_unblock	154	/* not implemented */
-
-#define __NR_sigaction		156
-#define __NR_osf_sigwaitprim	157	/* not implemented */
-#define __NR_osf_nfssvc		158	/* not implemented */
-#define __NR_osf_getdirentries	159
-#define __NR_osf_statfs		160
-#define __NR_osf_fstatfs	161
-
-#define __NR_osf_asynch_daemon	163	/* not implemented */
-#define __NR_osf_getfh		164	/* not implemented */	
-#define __NR_osf_getdomainname	165
-#define __NR_setdomainname	166
-
-#define __NR_osf_exportfs	169	/* not implemented */
-
-#define __NR_osf_alt_plock	181	/* not implemented */
-
-#define __NR_osf_getmnt		184	/* not implemented */
-
-#define __NR_osf_alt_sigpending	187	/* not implemented */
-#define __NR_osf_alt_setsid	188	/* not implemented */
-
-#define __NR_osf_swapon		199
-#define __NR_msgctl		200
-#define __NR_msgget		201
-#define __NR_msgrcv		202
-#define __NR_msgsnd		203
-#define __NR_semctl		204
-#define __NR_semget		205
-#define __NR_semop		206
-#define __NR_osf_utsname	207
-#define __NR_lchown		208
-#define __NR_osf_shmat		209
-#define __NR_shmctl		210
-#define __NR_shmdt		211
-#define __NR_shmget		212
-#define __NR_osf_mvalid		213	/* not implemented */
-#define __NR_osf_getaddressconf	214	/* not implemented */
-#define __NR_osf_msleep		215	/* not implemented */
-#define __NR_osf_mwakeup	216	/* not implemented */
-#define __NR_msync		217
-#define __NR_osf_signal		218	/* not implemented */
-#define __NR_osf_utc_gettime	219	/* not implemented */
-#define __NR_osf_utc_adjtime	220	/* not implemented */
-
-#define __NR_osf_security	222	/* not implemented */
-#define __NR_osf_kloadcall	223	/* not implemented */
-
-#define __NR_osf_stat		224
-#define __NR_osf_lstat		225
-#define __NR_osf_fstat		226
-#define __NR_osf_statfs64	227
-#define __NR_osf_fstatfs64	228
-
-#define __NR_getpgid		233
-#define __NR_getsid		234
-#define __NR_sigaltstack	235
-#define __NR_osf_waitid		236	/* not implemented */
-#define __NR_osf_priocntlset	237	/* not implemented */
-#define __NR_osf_sigsendset	238	/* not implemented */
-#define __NR_osf_set_speculative	239	/* not implemented */
-#define __NR_osf_msfs_syscall	240	/* not implemented */
-#define __NR_osf_sysinfo	241
-#define __NR_osf_uadmin		242	/* not implemented */
-#define __NR_osf_fuser		243	/* not implemented */
-#define __NR_osf_proplist_syscall    244
-#define __NR_osf_ntp_adjtime	245	/* not implemented */
-#define __NR_osf_ntp_gettime	246	/* not implemented */
-#define __NR_osf_pathconf	247	/* not implemented */
-#define __NR_osf_fpathconf	248	/* not implemented */
-
-#define __NR_osf_uswitch	250	/* not implemented */
-#define __NR_osf_usleep_thread	251
-#define __NR_osf_audcntl	252	/* not implemented */
-#define __NR_osf_audgen		253	/* not implemented */
-#define __NR_sysfs		254
-#define __NR_osf_subsys_info	255	/* not implemented */
-#define __NR_osf_getsysinfo	256
-#define __NR_osf_setsysinfo	257
-#define __NR_osf_afs_syscall	258	/* not implemented */
-#define __NR_osf_swapctl	259	/* not implemented */
-#define __NR_osf_memcntl	260	/* not implemented */
-#define __NR_osf_fdatasync	261	/* not implemented */
-
-/*
- * Ignore legacy syscalls that we don't use.
- */
-#define __IGNORE_alarm
-#define __IGNORE_creat
-#define __IGNORE_getegid
-#define __IGNORE_geteuid
-#define __IGNORE_getgid
-#define __IGNORE_getpid
-#define __IGNORE_getppid
-#define __IGNORE_getuid
-#define __IGNORE_pause
-#define __IGNORE_time
-#define __IGNORE_utime
-#define __IGNORE_umount2
-
-/*
- * Linux-specific system calls begin at 300
- */
-#define __NR_bdflush		300
-#define __NR_sethae		301
-#define __NR_mount		302
-#define __NR_old_adjtimex	303
-#define __NR_swapoff		304
-#define __NR_getdents		305
-#define __NR_create_module	306
-#define __NR_init_module	307
-#define __NR_delete_module	308
-#define __NR_get_kernel_syms	309
-#define __NR_syslog		310
-#define __NR_reboot		311
-#define __NR_clone		312
-#define __NR_uselib		313
-#define __NR_mlock		314
-#define __NR_munlock		315
-#define __NR_mlockall		316
-#define __NR_munlockall		317
-#define __NR_sysinfo		318
-#define __NR__sysctl		319
-/* 320 was sys_idle.  */
-#define __NR_oldumount		321
-#define __NR_swapon		322
-#define __NR_times		323
-#define __NR_personality	324
-#define __NR_setfsuid		325
-#define __NR_setfsgid		326
-#define __NR_ustat		327
-#define __NR_statfs		328
-#define __NR_fstatfs		329
-#define __NR_sched_setparam		330
-#define __NR_sched_getparam		331
-#define __NR_sched_setscheduler		332
-#define __NR_sched_getscheduler		333
-#define __NR_sched_yield		334
-#define __NR_sched_get_priority_max	335
-#define __NR_sched_get_priority_min	336
-#define __NR_sched_rr_get_interval	337
-#define __NR_afs_syscall		338
-#define __NR_uname			339
-#define __NR_nanosleep			340
-#define __NR_mremap			341
-#define __NR_nfsservctl			342
-#define __NR_setresuid			343
-#define __NR_getresuid			344
-#define __NR_pciconfig_read		345
-#define __NR_pciconfig_write		346
-#define __NR_query_module		347
-#define __NR_prctl			348
-#define __NR_pread64			349
-#define __NR_pwrite64			350
-#define __NR_rt_sigreturn		351
-#define __NR_rt_sigaction		352
-#define __NR_rt_sigprocmask		353
-#define __NR_rt_sigpending		354
-#define __NR_rt_sigtimedwait		355
-#define __NR_rt_sigqueueinfo		356
-#define __NR_rt_sigsuspend		357
-#define __NR_select			358
-#define __NR_gettimeofday		359
-#define __NR_settimeofday		360
-#define __NR_getitimer			361
-#define __NR_setitimer			362
-#define __NR_utimes			363
-#define __NR_getrusage			364
-#define __NR_wait4			365
-#define __NR_adjtimex			366
-#define __NR_getcwd			367
-#define __NR_capget			368
-#define __NR_capset			369
-#define __NR_sendfile			370
-#define __NR_setresgid			371
-#define __NR_getresgid			372
-#define __NR_dipc			373
-#define __NR_pivot_root			374
-#define __NR_mincore			375
-#define __NR_pciconfig_iobase		376
-#define __NR_getdents64			377
-#define __NR_gettid			378
-#define __NR_readahead			379
-/* 380 is unused */
-#define __NR_tkill			381
-#define __NR_setxattr			382
-#define __NR_lsetxattr			383
-#define __NR_fsetxattr			384
-#define __NR_getxattr			385
-#define __NR_lgetxattr			386
-#define __NR_fgetxattr			387
-#define __NR_listxattr			388
-#define __NR_llistxattr			389
-#define __NR_flistxattr			390
-#define __NR_removexattr		391
-#define __NR_lremovexattr		392
-#define __NR_fremovexattr		393
-#define __NR_futex			394
-#define __NR_sched_setaffinity		395     
-#define __NR_sched_getaffinity		396
-#define __NR_tuxcall			397
-#define __NR_io_setup			398
-#define __NR_io_destroy			399
-#define __NR_io_getevents		400
-#define __NR_io_submit			401
-#define __NR_io_cancel			402
-#define __NR_exit_group			405
-#define __NR_lookup_dcookie		406
-#define __NR_epoll_create		407
-#define __NR_epoll_ctl			408
-#define __NR_epoll_wait			409
-/* Feb 2007: These three sys_epoll defines shouldn't be here but culling
- * them would break userspace apps ... we'll kill them off in 2010 :) */
-#define __NR_sys_epoll_create		__NR_epoll_create
-#define __NR_sys_epoll_ctl		__NR_epoll_ctl
-#define __NR_sys_epoll_wait		__NR_epoll_wait
-#define __NR_remap_file_pages		410
-#define __NR_set_tid_address		411
-#define __NR_restart_syscall		412
-#define __NR_fadvise64			413
-#define __NR_timer_create		414
-#define __NR_timer_settime		415
-#define __NR_timer_gettime		416
-#define __NR_timer_getoverrun		417
-#define __NR_timer_delete		418
-#define __NR_clock_settime		419
-#define __NR_clock_gettime		420
-#define __NR_clock_getres		421
-#define __NR_clock_nanosleep		422
-#define __NR_semtimedop			423
-#define __NR_tgkill			424
-#define __NR_stat64			425
-#define __NR_lstat64			426
-#define __NR_fstat64			427
-#define __NR_vserver			428
-#define __NR_mbind			429
-#define __NR_get_mempolicy		430
-#define __NR_set_mempolicy		431
-#define __NR_mq_open			432
-#define __NR_mq_unlink			433
-#define __NR_mq_timedsend		434
-#define __NR_mq_timedreceive		435
-#define __NR_mq_notify			436
-#define __NR_mq_getsetattr		437
-#define __NR_waitid			438
-#define __NR_add_key			439
-#define __NR_request_key		440
-#define __NR_keyctl			441
-#define __NR_ioprio_set			442
-#define __NR_ioprio_get			443
-#define __NR_inotify_init		444
-#define __NR_inotify_add_watch		445
-#define __NR_inotify_rm_watch		446
-#define __NR_fdatasync			447
-#define __NR_kexec_load			448
-#define __NR_migrate_pages		449
-#define __NR_openat			450
-#define __NR_mkdirat			451
-#define __NR_mknodat			452
-#define __NR_fchownat			453
-#define __NR_futimesat			454
-#define __NR_fstatat64			455
-#define __NR_unlinkat			456
-#define __NR_renameat			457
-#define __NR_linkat			458
-#define __NR_symlinkat			459
-#define __NR_readlinkat			460
-#define __NR_fchmodat			461
-#define __NR_faccessat			462
-#define __NR_pselect6			463
-#define __NR_ppoll			464
-#define __NR_unshare			465
-#define __NR_set_robust_list		466
-#define __NR_get_robust_list		467
-#define __NR_splice			468
-#define __NR_sync_file_range		469
-#define __NR_tee			470
-#define __NR_vmsplice			471
-#define __NR_move_pages			472
-#define __NR_getcpu			473
-#define __NR_epoll_pwait		474
-#define __NR_utimensat			475
-#define __NR_signalfd			476
-#define __NR_timerfd			477
-#define __NR_eventfd			478
-#define __NR_recvmmsg			479
-#define __NR_fallocate			480
-#define __NR_timerfd_create		481
-#define __NR_timerfd_settime		482
-#define __NR_timerfd_gettime		483
-#define __NR_signalfd4			484
-#define __NR_eventfd2			485
-#define __NR_epoll_create1		486
-#define __NR_dup3			487
-#define __NR_pipe2			488
-#define __NR_inotify_init1		489
-#define __NR_preadv			490
-#define __NR_pwritev			491
-#define __NR_rt_tgsigqueueinfo		492
-#define __NR_perf_event_open		493
-#define __NR_fanotify_init		494
-#define __NR_fanotify_mark		495
-#define __NR_prlimit64			496
-#define __NR_name_to_handle_at		497
-#define __NR_open_by_handle_at		498
-#define __NR_clock_adjtime		499
-#define __NR_syncfs			500
-#define __NR_setns			501
-#define __NR_accept4			502
-#define __NR_sendmmsg			503
-#define __NR_process_vm_readv		504
-#define __NR_process_vm_writev		505
-
-#ifdef __KERNEL__
 
 #define NR_SYSCALLS			506
 
@@ -481,7 +15,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
@@ -498,5 +31,4 @@
 
 #define cond_syscall(x)  asm(".weak\t" #x "\n" #x " = sys_ni_syscall")
 
-#endif /* __KERNEL__ */
 #endif /* _ALPHA_UNISTD_H */

diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index baebb3d..d96f2ef 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild

@@ -1,3 +1,43 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += a.out.h
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += compiler.h
+header-y += console.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += fpu.h
+header-y += gentrap.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += kvm_para.h
+header-y += mman.h
+header-y += msgbuf.h
+header-y += pal.h
+header-y += param.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += reg.h
+header-y += regdef.h
+header-y += resource.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += sysinfo.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += unistd.h

diff --git a/arch/alpha/include/uapi/asm/a.out.h b/arch/alpha/include/uapi/asm/a.out.h
new file mode 100644
index 0000000..5477072
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/a.out.h

@@ -0,0 +1,91 @@
+#ifndef _UAPI__ALPHA_A_OUT_H__
+#define _UAPI__ALPHA_A_OUT_H__
+
+#include <linux/types.h>
+
+/*
+ * OSF/1 ECOFF header structs.  ECOFF files consist of:
+ * 	- a file header (struct filehdr),
+ *	- an a.out header (struct aouthdr),
+ *	- one or more section headers (struct scnhdr). 
+ *	  The filhdr's "f_nscns" field contains the
+ *	  number of section headers.
+ */
+
+struct filehdr
+{
+	/* OSF/1 "file" header */
+	__u16 f_magic, f_nscns;
+	__u32 f_timdat;
+	__u64 f_symptr;
+	__u32 f_nsyms;
+	__u16 f_opthdr, f_flags;
+};
+
+struct aouthdr
+{
+	__u64 info;		/* after that it looks quite normal.. */
+	__u64 tsize;
+	__u64 dsize;
+	__u64 bsize;
+	__u64 entry;
+	__u64 text_start;	/* with a few additions that actually make sense */
+	__u64 data_start;
+	__u64 bss_start;
+	__u32 gprmask, fprmask;	/* bitmask of general & floating point regs used in binary */
+	__u64 gpvalue;
+};
+
+struct scnhdr
+{
+	char	s_name[8];
+	__u64	s_paddr;
+	__u64	s_vaddr;
+	__u64	s_size;
+	__u64	s_scnptr;
+	__u64	s_relptr;
+	__u64	s_lnnoptr;
+	__u16	s_nreloc;
+	__u16	s_nlnno;
+	__u32	s_flags;
+};
+
+struct exec
+{
+	/* OSF/1 "file" header */
+	struct filehdr		fh;
+	struct aouthdr		ah;
+};
+
+/*
+ * Define's so that the kernel exec code can access the a.out header
+ * fields...
+ */
+#define	a_info		ah.info
+#define	a_text		ah.tsize
+#define a_data		ah.dsize
+#define a_bss		ah.bsize
+#define a_entry		ah.entry
+#define a_textstart	ah.text_start
+#define	a_datastart	ah.data_start
+#define	a_bssstart	ah.bss_start
+#define	a_gprmask	ah.gprmask
+#define a_fprmask	ah.fprmask
+#define a_gpvalue	ah.gpvalue
+
+#define N_TXTADDR(x) ((x).a_textstart)
+#define N_DATADDR(x) ((x).a_datastart)
+#define N_BSSADDR(x) ((x).a_bssstart)
+#define N_DRSIZE(x) 0
+#define N_TRSIZE(x) 0
+#define N_SYMSIZE(x) 0
+
+#define AOUTHSZ		sizeof(struct aouthdr)
+#define SCNHSZ		sizeof(struct scnhdr)
+#define SCNROUND	16
+
+#define N_TXTOFF(x) \
+  ((long) N_MAGIC(x) == ZMAGIC ? 0 : \
+   (sizeof(struct exec) + (x).fh.f_nscns*SCNHSZ + SCNROUND - 1) & ~(SCNROUND - 1))
+
+#endif /* _UAPI__ALPHA_A_OUT_H__ */

diff --git a/arch/alpha/include/asm/auxvec.h b/arch/alpha/include/uapi/asm/auxvec.h
similarity index 100%
rename from arch/alpha/include/asm/auxvec.h
rename to arch/alpha/include/uapi/asm/auxvec.h


diff --git a/arch/alpha/include/asm/bitsperlong.h b/arch/alpha/include/uapi/asm/bitsperlong.h
similarity index 100%
rename from arch/alpha/include/asm/bitsperlong.h
rename to arch/alpha/include/uapi/asm/bitsperlong.h


diff --git a/arch/alpha/include/asm/byteorder.h b/arch/alpha/include/uapi/asm/byteorder.h
similarity index 100%
rename from arch/alpha/include/asm/byteorder.h
rename to arch/alpha/include/uapi/asm/byteorder.h


diff --git a/arch/alpha/include/uapi/asm/compiler.h b/arch/alpha/include/uapi/asm/compiler.h
new file mode 100644
index 0000000..32cc783
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/compiler.h

@@ -0,0 +1,117 @@
+#ifndef _UAPI__ALPHA_COMPILER_H
+#define _UAPI__ALPHA_COMPILER_H
+
+/* 
+ * Herein are macros we use when describing various patterns we want to GCC.
+ * In all cases we can get better schedules out of the compiler if we hide
+ * as little as possible inside inline assembly.  However, we want to be
+ * able to know what we'll get out before giving up inline assembly.  Thus
+ * these tests and macros.
+ */
+
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3
+# define __kernel_insbl(val, shift)	__builtin_alpha_insbl(val, shift)
+# define __kernel_inswl(val, shift)	__builtin_alpha_inswl(val, shift)
+# define __kernel_insql(val, shift)	__builtin_alpha_insql(val, shift)
+# define __kernel_inslh(val, shift)	__builtin_alpha_inslh(val, shift)
+# define __kernel_extbl(val, shift)	__builtin_alpha_extbl(val, shift)
+# define __kernel_extwl(val, shift)	__builtin_alpha_extwl(val, shift)
+# define __kernel_cmpbge(a, b)		__builtin_alpha_cmpbge(a, b)
+#else
+# define __kernel_insbl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("insbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_inswl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("inswl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_insql(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("insql %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_inslh(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("inslh %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_extbl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("extbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_extwl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("extwl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_cmpbge(a, b)						\
+  ({ unsigned long __kir;						\
+     __asm__("cmpbge %r2,%1,%0" : "=r"(__kir) : "rI"(b), "rJ"(a));	\
+     __kir; })
+#endif
+
+#ifdef __alpha_cix__
+# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3
+#  define __kernel_cttz(x)		__builtin_ctzl(x)
+#  define __kernel_ctlz(x)		__builtin_clzl(x)
+#  define __kernel_ctpop(x)		__builtin_popcountl(x)
+# else
+#  define __kernel_cttz(x)						\
+   ({ unsigned long __kir;						\
+      __asm__("cttz %1,%0" : "=r"(__kir) : "r"(x));			\
+      __kir; })
+#  define __kernel_ctlz(x)						\
+   ({ unsigned long __kir;						\
+      __asm__("ctlz %1,%0" : "=r"(__kir) : "r"(x));			\
+      __kir; })
+#  define __kernel_ctpop(x)						\
+   ({ unsigned long __kir;						\
+      __asm__("ctpop %1,%0" : "=r"(__kir) : "r"(x));			\
+      __kir; })
+# endif
+#else
+# define __kernel_cttz(x)						\
+  ({ unsigned long __kir;						\
+     __asm__(".arch ev67; cttz %1,%0" : "=r"(__kir) : "r"(x));		\
+     __kir; })
+# define __kernel_ctlz(x)						\
+  ({ unsigned long __kir;						\
+     __asm__(".arch ev67; ctlz %1,%0" : "=r"(__kir) : "r"(x));		\
+     __kir; })
+# define __kernel_ctpop(x)						\
+  ({ unsigned long __kir;						\
+     __asm__(".arch ev67; ctpop %1,%0" : "=r"(__kir) : "r"(x));		\
+     __kir; })
+#endif
+
+
+/* 
+ * Beginning with EGCS 1.1, GCC defines __alpha_bwx__ when the BWX 
+ * extension is enabled.  Previous versions did not define anything
+ * we could test during compilation -- too bad, so sad.
+ */
+
+#if defined(__alpha_bwx__)
+#define __kernel_ldbu(mem)	(mem)
+#define __kernel_ldwu(mem)	(mem)
+#define __kernel_stb(val,mem)	((mem) = (val))
+#define __kernel_stw(val,mem)	((mem) = (val))
+#else
+#define __kernel_ldbu(mem)				\
+  ({ unsigned char __kir;				\
+     __asm__(".arch ev56;				\
+	      ldbu %0,%1" : "=r"(__kir) : "m"(mem));	\
+     __kir; })
+#define __kernel_ldwu(mem)				\
+  ({ unsigned short __kir;				\
+     __asm__(".arch ev56;				\
+	      ldwu %0,%1" : "=r"(__kir) : "m"(mem));	\
+     __kir; })
+#define __kernel_stb(val,mem)				\
+  __asm__(".arch ev56;					\
+	   stb %1,%0" : "=m"(mem) : "r"(val))
+#define __kernel_stw(val,mem)				\
+  __asm__(".arch ev56;					\
+	   stw %1,%0" : "=m"(mem) : "r"(val))
+#endif
+
+
+#endif /* _UAPI__ALPHA_COMPILER_H */

diff --git a/arch/alpha/include/uapi/asm/console.h b/arch/alpha/include/uapi/asm/console.h
new file mode 100644
index 0000000..fd08a19
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/console.h

@@ -0,0 +1,50 @@
+#ifndef _UAPI__AXP_CONSOLE_H
+#define _UAPI__AXP_CONSOLE_H
+
+/*
+ * Console callback routine numbers
+ */
+#define CCB_GETC		0x01
+#define CCB_PUTS		0x02
+#define CCB_RESET_TERM		0x03
+#define CCB_SET_TERM_INT	0x04
+#define CCB_SET_TERM_CTL	0x05
+#define CCB_PROCESS_KEYCODE	0x06
+#define CCB_OPEN_CONSOLE	0x07
+#define CCB_CLOSE_CONSOLE	0x08
+
+#define CCB_OPEN		0x10
+#define CCB_CLOSE		0x11
+#define CCB_IOCTL		0x12
+#define CCB_READ		0x13
+#define CCB_WRITE		0x14
+
+#define CCB_SET_ENV		0x20
+#define CCB_RESET_ENV		0x21
+#define CCB_GET_ENV		0x22
+#define CCB_SAVE_ENV		0x23
+
+#define CCB_PSWITCH		0x30
+#define CCB_BIOS_EMUL		0x32
+
+/*
+ * Environment variable numbers
+ */
+#define ENV_AUTO_ACTION		0x01
+#define ENV_BOOT_DEV		0x02
+#define ENV_BOOTDEF_DEV		0x03
+#define ENV_BOOTED_DEV		0x04
+#define ENV_BOOT_FILE		0x05
+#define ENV_BOOTED_FILE		0x06
+#define ENV_BOOT_OSFLAGS	0x07
+#define ENV_BOOTED_OSFLAGS	0x08
+#define ENV_BOOT_RESET		0x09
+#define ENV_DUMP_DEV		0x0A
+#define ENV_ENABLE_AUDIT	0x0B
+#define ENV_LICENSE		0x0C
+#define ENV_CHAR_SET		0x0D
+#define ENV_LANGUAGE		0x0E
+#define ENV_TTY_DEV		0x0F
+
+
+#endif /* _UAPI__AXP_CONSOLE_H */

diff --git a/arch/alpha/include/asm/errno.h b/arch/alpha/include/uapi/asm/errno.h
similarity index 100%
rename from arch/alpha/include/asm/errno.h
rename to arch/alpha/include/uapi/asm/errno.h


diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/uapi/asm/fcntl.h
similarity index 100%
rename from arch/alpha/include/asm/fcntl.h
rename to arch/alpha/include/uapi/asm/fcntl.h


diff --git a/arch/alpha/include/uapi/asm/fpu.h b/arch/alpha/include/uapi/asm/fpu.h
new file mode 100644
index 0000000..21a053ca
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/fpu.h

@@ -0,0 +1,123 @@
+#ifndef _UAPI__ASM_ALPHA_FPU_H
+#define _UAPI__ASM_ALPHA_FPU_H
+
+
+/*
+ * Alpha floating-point control register defines:
+ */
+#define FPCR_DNOD	(1UL<<47)	/* denorm INV trap disable */
+#define FPCR_DNZ	(1UL<<48)	/* denorms to zero */
+#define FPCR_INVD	(1UL<<49)	/* invalid op disable (opt.) */
+#define FPCR_DZED	(1UL<<50)	/* division by zero disable (opt.) */
+#define FPCR_OVFD	(1UL<<51)	/* overflow disable (optional) */
+#define FPCR_INV	(1UL<<52)	/* invalid operation */
+#define FPCR_DZE	(1UL<<53)	/* division by zero */
+#define FPCR_OVF	(1UL<<54)	/* overflow */
+#define FPCR_UNF	(1UL<<55)	/* underflow */
+#define FPCR_INE	(1UL<<56)	/* inexact */
+#define FPCR_IOV	(1UL<<57)	/* integer overflow */
+#define FPCR_UNDZ	(1UL<<60)	/* underflow to zero (opt.) */
+#define FPCR_UNFD	(1UL<<61)	/* underflow disable (opt.) */
+#define FPCR_INED	(1UL<<62)	/* inexact disable (opt.) */
+#define FPCR_SUM	(1UL<<63)	/* summary bit */
+
+#define FPCR_DYN_SHIFT	58		/* first dynamic rounding mode bit */
+#define FPCR_DYN_CHOPPED (0x0UL << FPCR_DYN_SHIFT)	/* towards 0 */
+#define FPCR_DYN_MINUS	 (0x1UL << FPCR_DYN_SHIFT)	/* towards -INF */
+#define FPCR_DYN_NORMAL	 (0x2UL << FPCR_DYN_SHIFT)	/* towards nearest */
+#define FPCR_DYN_PLUS	 (0x3UL << FPCR_DYN_SHIFT)	/* towards +INF */
+#define FPCR_DYN_MASK	 (0x3UL << FPCR_DYN_SHIFT)
+
+#define FPCR_MASK	0xffff800000000000L
+
+/*
+ * IEEE trap enables are implemented in software.  These per-thread
+ * bits are stored in the "ieee_state" field of "struct thread_info".
+ * Thus, the bits are defined so as not to conflict with the
+ * floating-point enable bit (which is architected).  On top of that,
+ * we want to make these bits compatible with OSF/1 so
+ * ieee_set_fp_control() etc. can be implemented easily and
+ * compatibly.  The corresponding definitions are in
+ * /usr/include/machine/fpu.h under OSF/1.
+ */
+#define IEEE_TRAP_ENABLE_INV	(1UL<<1)	/* invalid op */
+#define IEEE_TRAP_ENABLE_DZE	(1UL<<2)	/* division by zero */
+#define IEEE_TRAP_ENABLE_OVF	(1UL<<3)	/* overflow */
+#define IEEE_TRAP_ENABLE_UNF	(1UL<<4)	/* underflow */
+#define IEEE_TRAP_ENABLE_INE	(1UL<<5)	/* inexact */
+#define IEEE_TRAP_ENABLE_DNO	(1UL<<6)	/* denorm */
+#define IEEE_TRAP_ENABLE_MASK	(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\
+				 IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\
+				 IEEE_TRAP_ENABLE_INE | IEEE_TRAP_ENABLE_DNO)
+
+/* Denorm and Underflow flushing */
+#define IEEE_MAP_DMZ		(1UL<<12)	/* Map denorm inputs to zero */
+#define IEEE_MAP_UMZ		(1UL<<13)	/* Map underflowed outputs to zero */
+
+#define IEEE_MAP_MASK		(IEEE_MAP_DMZ | IEEE_MAP_UMZ)
+
+/* status bits coming from fpcr: */
+#define IEEE_STATUS_INV		(1UL<<17)
+#define IEEE_STATUS_DZE		(1UL<<18)
+#define IEEE_STATUS_OVF		(1UL<<19)
+#define IEEE_STATUS_UNF		(1UL<<20)
+#define IEEE_STATUS_INE		(1UL<<21)
+#define IEEE_STATUS_DNO		(1UL<<22)
+
+#define IEEE_STATUS_MASK	(IEEE_STATUS_INV | IEEE_STATUS_DZE |	\
+				 IEEE_STATUS_OVF | IEEE_STATUS_UNF |	\
+				 IEEE_STATUS_INE | IEEE_STATUS_DNO)
+
+#define IEEE_SW_MASK		(IEEE_TRAP_ENABLE_MASK |		\
+				 IEEE_STATUS_MASK | IEEE_MAP_MASK)
+
+#define IEEE_CURRENT_RM_SHIFT	32
+#define IEEE_CURRENT_RM_MASK	(3UL<<IEEE_CURRENT_RM_SHIFT)
+
+#define IEEE_STATUS_TO_EXCSUM_SHIFT	16
+
+#define IEEE_INHERIT    (1UL<<63)	/* inherit on thread create? */
+
+/*
+ * Convert the software IEEE trap enable and status bits into the
+ * hardware fpcr format. 
+ *
+ * Digital Unix engineers receive my thanks for not defining the
+ * software bits identical to the hardware bits.  The chip designers
+ * receive my thanks for making all the not-implemented fpcr bits
+ * RAZ forcing us to use system calls to read/write this value.
+ */
+
+static inline unsigned long
+ieee_swcr_to_fpcr(unsigned long sw)
+{
+	unsigned long fp;
+	fp = (sw & IEEE_STATUS_MASK) << 35;
+	fp |= (sw & IEEE_MAP_DMZ) << 36;
+	fp |= (sw & IEEE_STATUS_MASK ? FPCR_SUM : 0);
+	fp |= (~sw & (IEEE_TRAP_ENABLE_INV
+		      | IEEE_TRAP_ENABLE_DZE
+		      | IEEE_TRAP_ENABLE_OVF)) << 48;
+	fp |= (~sw & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE)) << 57;
+	fp |= (sw & IEEE_MAP_UMZ ? FPCR_UNDZ | FPCR_UNFD : 0);
+	fp |= (~sw & IEEE_TRAP_ENABLE_DNO) << 41;
+	return fp;
+}
+
+static inline unsigned long
+ieee_fpcr_to_swcr(unsigned long fp)
+{
+	unsigned long sw;
+	sw = (fp >> 35) & IEEE_STATUS_MASK;
+	sw |= (fp >> 36) & IEEE_MAP_DMZ;
+	sw |= (~fp >> 48) & (IEEE_TRAP_ENABLE_INV
+			     | IEEE_TRAP_ENABLE_DZE
+			     | IEEE_TRAP_ENABLE_OVF);
+	sw |= (~fp >> 57) & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE);
+	sw |= (fp >> 47) & IEEE_MAP_UMZ;
+	sw |= (~fp >> 41) & IEEE_TRAP_ENABLE_DNO;
+	return sw;
+}
+
+
+#endif /* _UAPI__ASM_ALPHA_FPU_H */

diff --git a/arch/alpha/include/asm/gentrap.h b/arch/alpha/include/uapi/asm/gentrap.h
similarity index 100%
rename from arch/alpha/include/asm/gentrap.h
rename to arch/alpha/include/uapi/asm/gentrap.h


diff --git a/arch/alpha/include/asm/ioctl.h b/arch/alpha/include/uapi/asm/ioctl.h
similarity index 100%
rename from arch/alpha/include/asm/ioctl.h
rename to arch/alpha/include/uapi/asm/ioctl.h


diff --git a/arch/alpha/include/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
similarity index 100%
rename from arch/alpha/include/asm/ioctls.h
rename to arch/alpha/include/uapi/asm/ioctls.h


diff --git a/arch/alpha/include/asm/ipcbuf.h b/arch/alpha/include/uapi/asm/ipcbuf.h
similarity index 100%
rename from arch/alpha/include/asm/ipcbuf.h
rename to arch/alpha/include/uapi/asm/ipcbuf.h


diff --git a/arch/alpha/include/asm/kvm_para.h b/arch/alpha/include/uapi/asm/kvm_para.h
similarity index 100%
rename from arch/alpha/include/asm/kvm_para.h
rename to arch/alpha/include/uapi/asm/kvm_para.h


diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
similarity index 100%
rename from arch/alpha/include/asm/mman.h
rename to arch/alpha/include/uapi/asm/mman.h


diff --git a/arch/alpha/include/asm/msgbuf.h b/arch/alpha/include/uapi/asm/msgbuf.h
similarity index 100%
rename from arch/alpha/include/asm/msgbuf.h
rename to arch/alpha/include/uapi/asm/msgbuf.h


diff --git a/arch/alpha/include/uapi/asm/pal.h b/arch/alpha/include/uapi/asm/pal.h
new file mode 100644
index 0000000..3c0ce08
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/pal.h

@@ -0,0 +1,52 @@
+#ifndef _UAPI__ALPHA_PAL_H
+#define _UAPI__ALPHA_PAL_H
+
+/*
+ * Common PAL-code
+ */
+#define PAL_halt	  0
+#define PAL_cflush	  1
+#define PAL_draina	  2
+#define PAL_bpt		128
+#define PAL_bugchk	129
+#define PAL_chmk	131
+#define PAL_callsys	131
+#define PAL_imb		134
+#define PAL_rduniq	158
+#define PAL_wruniq	159
+#define PAL_gentrap	170
+#define PAL_nphalt	190
+
+/*
+ * VMS specific PAL-code
+ */
+#define PAL_swppal	10
+#define PAL_mfpr_vptb	41
+
+/*
+ * OSF specific PAL-code
+ */
+#define PAL_cserve	 9
+#define PAL_wripir	13
+#define PAL_rdmces	16
+#define PAL_wrmces	17
+#define PAL_wrfen	43
+#define PAL_wrvptptr	45
+#define PAL_jtopal	46
+#define PAL_swpctx	48
+#define PAL_wrval	49
+#define PAL_rdval	50
+#define PAL_tbi		51
+#define PAL_wrent	52
+#define PAL_swpipl	53
+#define PAL_rdps	54
+#define PAL_wrkgp	55
+#define PAL_wrusp	56
+#define PAL_wrperfmon	57
+#define PAL_rdusp	58
+#define PAL_whami	60
+#define PAL_retsys	61
+#define PAL_rti		63
+
+
+#endif /* _UAPI__ALPHA_PAL_H */

diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h
new file mode 100644
index 0000000..29daed8
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/param.h

@@ -0,0 +1,21 @@
+#ifndef _UAPI_ASM_ALPHA_PARAM_H
+#define _UAPI_ASM_ALPHA_PARAM_H
+
+/* ??? Gross.  I don't want to parameterize this, and supposedly the
+   hardware ignores reprogramming.  We also need userland buy-in to the 
+   change in HZ, since this is visible in the wait4 resources etc.  */
+
+#ifndef __KERNEL__
+#define HZ		1024
+#endif
+
+#define EXEC_PAGESIZE	8192
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+
+#endif /* _UAPI_ASM_ALPHA_PARAM_H */

diff --git a/arch/alpha/include/asm/poll.h b/arch/alpha/include/uapi/asm/poll.h
similarity index 100%
rename from arch/alpha/include/asm/poll.h
rename to arch/alpha/include/uapi/asm/poll.h


diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/uapi/asm/posix_types.h
similarity index 100%
rename from arch/alpha/include/asm/posix_types.h
rename to arch/alpha/include/uapi/asm/posix_types.h


diff --git a/arch/alpha/include/uapi/asm/ptrace.h b/arch/alpha/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..5ce83fa
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/ptrace.h

@@ -0,0 +1,70 @@
+#ifndef _UAPI_ASMAXP_PTRACE_H
+#define _UAPI_ASMAXP_PTRACE_H
+
+
+/*
+ * This struct defines the way the registers are stored on the
+ * kernel stack during a system call or other kernel entry
+ *
+ * NOTE! I want to minimize the overhead of system calls, so this
+ * struct has as little information as possible.  I does not have
+ *
+ *  - floating point regs: the kernel doesn't change those
+ *  - r9-15: saved by the C compiler
+ *
+ * This makes "fork()" and "exec()" a bit more complex, but should
+ * give us low system call latency.
+ */
+
+struct pt_regs {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r19;
+	unsigned long r20;
+	unsigned long r21;
+	unsigned long r22;
+	unsigned long r23;
+	unsigned long r24;
+	unsigned long r25;
+	unsigned long r26;
+	unsigned long r27;
+	unsigned long r28;
+	unsigned long hae;
+/* JRP - These are the values provided to a0-a2 by PALcode */
+	unsigned long trap_a0;
+	unsigned long trap_a1;
+	unsigned long trap_a2;
+/* These are saved by PAL-code: */
+	unsigned long ps;
+	unsigned long pc;
+	unsigned long gp;
+	unsigned long r16;
+	unsigned long r17;
+	unsigned long r18;
+};
+
+/*
+ * This is the extended stack used by signal handlers and the context
+ * switcher: it's pushed after the normal "struct pt_regs".
+ */
+struct switch_stack {
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long r26;
+	unsigned long fp[32];	/* fp[31] is fpcr */
+};
+
+
+#endif /* _UAPI_ASMAXP_PTRACE_H */

diff --git a/arch/alpha/include/asm/reg.h b/arch/alpha/include/uapi/asm/reg.h
similarity index 100%
rename from arch/alpha/include/asm/reg.h
rename to arch/alpha/include/uapi/asm/reg.h


diff --git a/arch/alpha/include/asm/regdef.h b/arch/alpha/include/uapi/asm/regdef.h
similarity index 100%
rename from arch/alpha/include/asm/regdef.h
rename to arch/alpha/include/uapi/asm/regdef.h


diff --git a/arch/alpha/include/asm/resource.h b/arch/alpha/include/uapi/asm/resource.h
similarity index 100%
rename from arch/alpha/include/asm/resource.h
rename to arch/alpha/include/uapi/asm/resource.h


diff --git a/arch/alpha/include/asm/sembuf.h b/arch/alpha/include/uapi/asm/sembuf.h
similarity index 100%
rename from arch/alpha/include/asm/sembuf.h
rename to arch/alpha/include/uapi/asm/sembuf.h


diff --git a/arch/alpha/include/asm/setup.h b/arch/alpha/include/uapi/asm/setup.h
similarity index 100%
rename from arch/alpha/include/asm/setup.h
rename to arch/alpha/include/uapi/asm/setup.h


diff --git a/arch/alpha/include/asm/shmbuf.h b/arch/alpha/include/uapi/asm/shmbuf.h
similarity index 100%
rename from arch/alpha/include/asm/shmbuf.h
rename to arch/alpha/include/uapi/asm/shmbuf.h


diff --git a/arch/alpha/include/asm/sigcontext.h b/arch/alpha/include/uapi/asm/sigcontext.h
similarity index 100%
rename from arch/alpha/include/asm/sigcontext.h
rename to arch/alpha/include/uapi/asm/sigcontext.h


diff --git a/arch/alpha/include/asm/siginfo.h b/arch/alpha/include/uapi/asm/siginfo.h
similarity index 100%
rename from arch/alpha/include/asm/siginfo.h
rename to arch/alpha/include/uapi/asm/siginfo.h


diff --git a/arch/alpha/include/uapi/asm/signal.h b/arch/alpha/include/uapi/asm/signal.h
new file mode 100644
index 0000000..dd4ca4bc
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/signal.h

@@ -0,0 +1,129 @@
+#ifndef _UAPI_ASMAXP_SIGNAL_H
+#define _UAPI_ASMAXP_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * Linux/AXP has different signal numbers that Linux/i386: I'm trying
+ * to make it OSF/1 binary compatible, at least for normal binaries.
+ */
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGEMT		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGBUS		10
+#define SIGSEGV		11
+#define SIGSYS		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGURG		16
+#define SIGSTOP		17
+#define SIGTSTP		18
+#define SIGCONT		19
+#define SIGCHLD		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGIO		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGINFO		29
+#define SIGUSR1		30
+#define SIGUSR2		31
+
+#define SIGPOLL	SIGIO
+#define SIGPWR	SIGINFO
+#define SIGIOT	SIGABRT
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+
+#define SA_ONSTACK	0x00000001
+#define SA_RESTART	0x00000002
+#define SA_NOCLDSTOP	0x00000004
+#define SA_NODEFER	0x00000008
+#define SA_RESETHAND	0x00000010
+#define SA_NOCLDWAIT	0x00000020
+#define SA_SIGINFO	0x00000040
+
+#define SA_ONESHOT	SA_RESETHAND
+#define SA_NOMASK	SA_NODEFER
+
+#define MINSIGSTKSZ	4096
+#define SIGSTKSZ	16384
+
+#define SIG_BLOCK          1	/* for blocking signals */
+#define SIG_UNBLOCK        2	/* for unblocking signals */
+#define SIG_SETMASK        3	/* for setting the signal mask */
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+	  __sighandler_t	_sa_handler;
+	  void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t	sa_mask;
+	int		sa_flags;
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+/* sigstack(2) is deprecated, and will be withdrawn in a future version
+   of the X/Open CAE Specification.  Use sigaltstack instead.  It is only
+   implemented here for OSF/1 compatibility.  */
+
+struct sigstack {
+	void __user *ss_sp;
+	int ss_onstack;
+};
+
+
+#endif /* _UAPI_ASMAXP_SIGNAL_H */

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
new file mode 100644
index 0000000..097c157
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/socket.h

@@ -0,0 +1,81 @@
+#ifndef _UAPI_ASM_SOCKET_H
+#define _UAPI_ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+/*
+ * Note: we only bother about making the SOL_SOCKET options
+ * same as OSF/1, as that's all that "normal" programs are
+ * likely to set.  We don't necessarily want to be binary
+ * compatible with _everything_. 
+ */
+#define SOL_SOCKET	0xffff
+
+#define SO_DEBUG	0x0001
+#define SO_REUSEADDR	0x0004
+#define SO_KEEPALIVE	0x0008
+#define SO_DONTROUTE	0x0010
+#define SO_BROADCAST	0x0020
+#define SO_LINGER	0x0080
+#define SO_OOBINLINE	0x0100
+/* To add :#define SO_REUSEPORT 0x0200 */
+
+#define SO_TYPE		0x1008
+#define SO_ERROR	0x1007
+#define SO_SNDBUF	0x1001
+#define SO_RCVBUF	0x1002
+#define SO_SNDBUFFORCE	0x100a
+#define SO_RCVBUFFORCE	0x100b
+#define	SO_RCVLOWAT	0x1010
+#define	SO_SNDLOWAT	0x1011
+#define	SO_RCVTIMEO	0x1012
+#define	SO_SNDTIMEO	0x1013
+#define SO_ACCEPTCONN	0x1014
+#define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
+
+/* linux-specific, might as well be the same as on i386 */
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_BSDCOMPAT	14
+
+#define SO_PASSCRED	17
+#define SO_PEERCRED	18
+#define SO_BINDTODEVICE 25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_PEERSEC		30
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		19
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	20
+#define SO_SECURITY_ENCRYPTION_NETWORK		21
+
+#define SO_MARK			36
+
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+#define SO_PEEK_OFF		42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		43
+
+
+#endif /* _UAPI_ASM_SOCKET_H */

diff --git a/arch/alpha/include/asm/sockios.h b/arch/alpha/include/uapi/asm/sockios.h
similarity index 100%
rename from arch/alpha/include/asm/sockios.h
rename to arch/alpha/include/uapi/asm/sockios.h


diff --git a/arch/alpha/include/asm/stat.h b/arch/alpha/include/uapi/asm/stat.h
similarity index 100%
rename from arch/alpha/include/asm/stat.h
rename to arch/alpha/include/uapi/asm/stat.h


diff --git a/arch/alpha/include/asm/statfs.h b/arch/alpha/include/uapi/asm/statfs.h
similarity index 100%
rename from arch/alpha/include/asm/statfs.h
rename to arch/alpha/include/uapi/asm/statfs.h


diff --git a/arch/alpha/include/asm/swab.h b/arch/alpha/include/uapi/asm/swab.h
similarity index 100%
rename from arch/alpha/include/asm/swab.h
rename to arch/alpha/include/uapi/asm/swab.h


diff --git a/arch/alpha/include/asm/sysinfo.h b/arch/alpha/include/uapi/asm/sysinfo.h
similarity index 100%
rename from arch/alpha/include/asm/sysinfo.h
rename to arch/alpha/include/uapi/asm/sysinfo.h


diff --git a/arch/alpha/include/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h
similarity index 100%
rename from arch/alpha/include/asm/termbits.h
rename to arch/alpha/include/uapi/asm/termbits.h


diff --git a/arch/alpha/include/uapi/asm/termios.h b/arch/alpha/include/uapi/asm/termios.h
new file mode 100644
index 0000000..580ed1e
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/termios.h

@@ -0,0 +1,70 @@
+#ifndef _UAPI_ALPHA_TERMIOS_H
+#define _UAPI_ALPHA_TERMIOS_H
+
+#include <asm/ioctls.h>
+#include <asm/termbits.h>
+
+struct sgttyb {
+	char	sg_ispeed;
+	char	sg_ospeed;
+	char	sg_erase;
+	char	sg_kill;
+	short	sg_flags;
+};
+
+struct tchars {
+	char	t_intrc;
+	char	t_quitc;
+	char	t_startc;
+	char	t_stopc;
+	char	t_eofc;
+	char	t_brkc;
+};
+
+struct ltchars {
+	char	t_suspc;
+	char	t_dsuspc;
+	char	t_rprntc;
+	char	t_flushc;
+	char	t_werasc;
+	char	t_lnextc;
+};
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/*
+ * c_cc characters in the termio structure.  Oh, how I love being
+ * backwardly compatible.  Notice that character 4 and 5 are
+ * interpreted differently depending on whether ICANON is set in
+ * c_lflag.  If it's set, they are used as _VEOF and _VEOL, otherwise
+ * as _VMIN and V_TIME.  This is for compatibility with OSF/1 (which
+ * is compatible with sysV)...
+ */
+#define _VINTR	0
+#define _VQUIT	1
+#define _VERASE	2
+#define _VKILL	3
+#define _VEOF	4
+#define _VMIN	4
+#define _VEOL	5
+#define _VTIME	5
+#define _VEOL2	6
+#define _VSWTC	7
+
+
+#endif /* _UAPI_ALPHA_TERMIOS_H */

diff --git a/arch/alpha/include/uapi/asm/types.h b/arch/alpha/include/uapi/asm/types.h
new file mode 100644
index 0000000..9fd3cd4
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/types.h

@@ -0,0 +1,16 @@
+#ifndef _UAPI_ALPHA_TYPES_H
+#define _UAPI_ALPHA_TYPES_H
+
+/*
+ * This file is never included by application software unless
+ * explicitly requested (e.g., via linux/types.h) in which case the
+ * application is Linux specific so (user-) name space pollution is
+ * not a major issue.  However, for interoperability, libraries still
+ * need to be careful to avoid a name clashes.
+ */
+
+#ifndef __KERNEL__
+#include <asm-generic/int-l64.h>
+#endif
+
+#endif /* _UAPI_ALPHA_TYPES_H */

diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..801d28b
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/unistd.h

@@ -0,0 +1,471 @@
+#ifndef _UAPI_ALPHA_UNISTD_H
+#define _UAPI_ALPHA_UNISTD_H
+
+#define __NR_osf_syscall	  0	/* not implemented */
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_osf_old_open	  5	/* not implemented */
+#define __NR_close		  6
+#define __NR_osf_wait4		  7
+#define __NR_osf_old_creat	  8	/* not implemented */
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_osf_execve		 11	/* not implemented */
+#define __NR_chdir		 12
+#define __NR_fchdir		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_chown		 16
+#define __NR_brk		 17
+#define __NR_osf_getfsstat	 18	/* not implemented */
+#define __NR_lseek		 19
+#define __NR_getxpid		 20
+#define __NR_osf_mount		 21
+#define __NR_umount		 22
+#define __NR_setuid		 23
+#define __NR_getxuid		 24
+#define __NR_exec_with_loader	 25	/* not implemented */
+#define __NR_ptrace		 26
+#define __NR_osf_nrecvmsg	 27	/* not implemented */
+#define __NR_osf_nsendmsg	 28	/* not implemented */
+#define __NR_osf_nrecvfrom	 29	/* not implemented */
+#define __NR_osf_naccept	 30	/* not implemented */
+#define __NR_osf_ngetpeername	 31	/* not implemented */
+#define __NR_osf_ngetsockname	 32	/* not implemented */
+#define __NR_access		 33
+#define __NR_osf_chflags	 34	/* not implemented */
+#define __NR_osf_fchflags	 35	/* not implemented */
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_osf_old_stat	 38	/* not implemented */
+#define __NR_setpgid		 39
+#define __NR_osf_old_lstat	 40	/* not implemented */
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_osf_set_program_attributes	43
+#define __NR_osf_profil		 44	/* not implemented */
+#define __NR_open		 45
+#define __NR_osf_old_sigaction	 46	/* not implemented */
+#define __NR_getxgid		 47
+#define __NR_osf_sigprocmask	 48
+#define __NR_osf_getlogin	 49	/* not implemented */
+#define __NR_osf_setlogin	 50	/* not implemented */
+#define __NR_acct		 51
+#define __NR_sigpending		 52
+
+#define __NR_ioctl		 54
+#define __NR_osf_reboot		 55	/* not implemented */
+#define __NR_osf_revoke		 56	/* not implemented */
+#define __NR_symlink		 57
+#define __NR_readlink		 58
+#define __NR_execve		 59
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_osf_old_fstat	 62	/* not implemented */
+#define __NR_getpgrp		 63
+#define __NR_getpagesize	 64
+#define __NR_osf_mremap		 65	/* not implemented */
+#define __NR_vfork		 66
+#define __NR_stat		 67
+#define __NR_lstat		 68
+#define __NR_osf_sbrk		 69	/* not implemented */
+#define __NR_osf_sstk		 70	/* not implemented */
+#define __NR_mmap		 71	/* OSF/1 mmap is superset of Linux */
+#define __NR_osf_old_vadvise	 72	/* not implemented */
+#define __NR_munmap		 73
+#define __NR_mprotect		 74
+#define __NR_madvise		 75
+#define __NR_vhangup		 76
+#define __NR_osf_kmodcall	 77	/* not implemented */
+#define __NR_osf_mincore	 78	/* not implemented */
+#define __NR_getgroups		 79
+#define __NR_setgroups		 80
+#define __NR_osf_old_getpgrp	 81	/* not implemented */
+#define __NR_setpgrp		 82	/* BSD alias for setpgid */
+#define __NR_osf_setitimer	 83
+#define __NR_osf_old_wait	 84	/* not implemented */
+#define __NR_osf_table		 85	/* not implemented */
+#define __NR_osf_getitimer	 86
+#define __NR_gethostname	 87
+#define __NR_sethostname	 88
+#define __NR_getdtablesize	 89
+#define __NR_dup2		 90
+#define __NR_fstat		 91
+#define __NR_fcntl		 92
+#define __NR_osf_select		 93
+#define __NR_poll		 94
+#define __NR_fsync		 95
+#define __NR_setpriority	 96
+#define __NR_socket		 97
+#define __NR_connect		 98
+#define __NR_accept		 99
+#define __NR_getpriority	100
+#define __NR_send		101
+#define __NR_recv		102
+#define __NR_sigreturn		103
+#define __NR_bind		104
+#define __NR_setsockopt		105
+#define __NR_listen		106
+#define __NR_osf_plock		107	/* not implemented */
+#define __NR_osf_old_sigvec	108	/* not implemented */
+#define __NR_osf_old_sigblock	109	/* not implemented */
+#define __NR_osf_old_sigsetmask	110	/* not implemented */
+#define __NR_sigsuspend		111
+#define __NR_osf_sigstack	112
+#define __NR_recvmsg		113
+#define __NR_sendmsg		114
+#define __NR_osf_old_vtrace	115	/* not implemented */
+#define __NR_osf_gettimeofday	116
+#define __NR_osf_getrusage	117
+#define __NR_getsockopt		118
+
+#define __NR_readv		120
+#define __NR_writev		121
+#define __NR_osf_settimeofday	122
+#define __NR_fchown		123
+#define __NR_fchmod		124
+#define __NR_recvfrom		125
+#define __NR_setreuid		126
+#define __NR_setregid		127
+#define __NR_rename		128
+#define __NR_truncate		129
+#define __NR_ftruncate		130
+#define __NR_flock		131
+#define __NR_setgid		132
+#define __NR_sendto		133
+#define __NR_shutdown		134
+#define __NR_socketpair		135
+#define __NR_mkdir		136
+#define __NR_rmdir		137
+#define __NR_osf_utimes		138
+#define __NR_osf_old_sigreturn	139	/* not implemented */
+#define __NR_osf_adjtime	140	/* not implemented */
+#define __NR_getpeername	141
+#define __NR_osf_gethostid	142	/* not implemented */
+#define __NR_osf_sethostid	143	/* not implemented */
+#define __NR_getrlimit		144
+#define __NR_setrlimit		145
+#define __NR_osf_old_killpg	146	/* not implemented */
+#define __NR_setsid		147
+#define __NR_quotactl		148
+#define __NR_osf_oldquota	149	/* not implemented */
+#define __NR_getsockname	150
+
+#define __NR_osf_pid_block	153	/* not implemented */
+#define __NR_osf_pid_unblock	154	/* not implemented */
+
+#define __NR_sigaction		156
+#define __NR_osf_sigwaitprim	157	/* not implemented */
+#define __NR_osf_nfssvc		158	/* not implemented */
+#define __NR_osf_getdirentries	159
+#define __NR_osf_statfs		160
+#define __NR_osf_fstatfs	161
+
+#define __NR_osf_asynch_daemon	163	/* not implemented */
+#define __NR_osf_getfh		164	/* not implemented */	
+#define __NR_osf_getdomainname	165
+#define __NR_setdomainname	166
+
+#define __NR_osf_exportfs	169	/* not implemented */
+
+#define __NR_osf_alt_plock	181	/* not implemented */
+
+#define __NR_osf_getmnt		184	/* not implemented */
+
+#define __NR_osf_alt_sigpending	187	/* not implemented */
+#define __NR_osf_alt_setsid	188	/* not implemented */
+
+#define __NR_osf_swapon		199
+#define __NR_msgctl		200
+#define __NR_msgget		201
+#define __NR_msgrcv		202
+#define __NR_msgsnd		203
+#define __NR_semctl		204
+#define __NR_semget		205
+#define __NR_semop		206
+#define __NR_osf_utsname	207
+#define __NR_lchown		208
+#define __NR_osf_shmat		209
+#define __NR_shmctl		210
+#define __NR_shmdt		211
+#define __NR_shmget		212
+#define __NR_osf_mvalid		213	/* not implemented */
+#define __NR_osf_getaddressconf	214	/* not implemented */
+#define __NR_osf_msleep		215	/* not implemented */
+#define __NR_osf_mwakeup	216	/* not implemented */
+#define __NR_msync		217
+#define __NR_osf_signal		218	/* not implemented */
+#define __NR_osf_utc_gettime	219	/* not implemented */
+#define __NR_osf_utc_adjtime	220	/* not implemented */
+
+#define __NR_osf_security	222	/* not implemented */
+#define __NR_osf_kloadcall	223	/* not implemented */
+
+#define __NR_osf_stat		224
+#define __NR_osf_lstat		225
+#define __NR_osf_fstat		226
+#define __NR_osf_statfs64	227
+#define __NR_osf_fstatfs64	228
+
+#define __NR_getpgid		233
+#define __NR_getsid		234
+#define __NR_sigaltstack	235
+#define __NR_osf_waitid		236	/* not implemented */
+#define __NR_osf_priocntlset	237	/* not implemented */
+#define __NR_osf_sigsendset	238	/* not implemented */
+#define __NR_osf_set_speculative	239	/* not implemented */
+#define __NR_osf_msfs_syscall	240	/* not implemented */
+#define __NR_osf_sysinfo	241
+#define __NR_osf_uadmin		242	/* not implemented */
+#define __NR_osf_fuser		243	/* not implemented */
+#define __NR_osf_proplist_syscall    244
+#define __NR_osf_ntp_adjtime	245	/* not implemented */
+#define __NR_osf_ntp_gettime	246	/* not implemented */
+#define __NR_osf_pathconf	247	/* not implemented */
+#define __NR_osf_fpathconf	248	/* not implemented */
+
+#define __NR_osf_uswitch	250	/* not implemented */
+#define __NR_osf_usleep_thread	251
+#define __NR_osf_audcntl	252	/* not implemented */
+#define __NR_osf_audgen		253	/* not implemented */
+#define __NR_sysfs		254
+#define __NR_osf_subsys_info	255	/* not implemented */
+#define __NR_osf_getsysinfo	256
+#define __NR_osf_setsysinfo	257
+#define __NR_osf_afs_syscall	258	/* not implemented */
+#define __NR_osf_swapctl	259	/* not implemented */
+#define __NR_osf_memcntl	260	/* not implemented */
+#define __NR_osf_fdatasync	261	/* not implemented */
+
+/*
+ * Ignore legacy syscalls that we don't use.
+ */
+#define __IGNORE_alarm
+#define __IGNORE_creat
+#define __IGNORE_getegid
+#define __IGNORE_geteuid
+#define __IGNORE_getgid
+#define __IGNORE_getpid
+#define __IGNORE_getppid
+#define __IGNORE_getuid
+#define __IGNORE_pause
+#define __IGNORE_time
+#define __IGNORE_utime
+#define __IGNORE_umount2
+
+/*
+ * Linux-specific system calls begin at 300
+ */
+#define __NR_bdflush		300
+#define __NR_sethae		301
+#define __NR_mount		302
+#define __NR_old_adjtimex	303
+#define __NR_swapoff		304
+#define __NR_getdents		305
+#define __NR_create_module	306
+#define __NR_init_module	307
+#define __NR_delete_module	308
+#define __NR_get_kernel_syms	309
+#define __NR_syslog		310
+#define __NR_reboot		311
+#define __NR_clone		312
+#define __NR_uselib		313
+#define __NR_mlock		314
+#define __NR_munlock		315
+#define __NR_mlockall		316
+#define __NR_munlockall		317
+#define __NR_sysinfo		318
+#define __NR__sysctl		319
+/* 320 was sys_idle.  */
+#define __NR_oldumount		321
+#define __NR_swapon		322
+#define __NR_times		323
+#define __NR_personality	324
+#define __NR_setfsuid		325
+#define __NR_setfsgid		326
+#define __NR_ustat		327
+#define __NR_statfs		328
+#define __NR_fstatfs		329
+#define __NR_sched_setparam		330
+#define __NR_sched_getparam		331
+#define __NR_sched_setscheduler		332
+#define __NR_sched_getscheduler		333
+#define __NR_sched_yield		334
+#define __NR_sched_get_priority_max	335
+#define __NR_sched_get_priority_min	336
+#define __NR_sched_rr_get_interval	337
+#define __NR_afs_syscall		338
+#define __NR_uname			339
+#define __NR_nanosleep			340
+#define __NR_mremap			341
+#define __NR_nfsservctl			342
+#define __NR_setresuid			343
+#define __NR_getresuid			344
+#define __NR_pciconfig_read		345
+#define __NR_pciconfig_write		346
+#define __NR_query_module		347
+#define __NR_prctl			348
+#define __NR_pread64			349
+#define __NR_pwrite64			350
+#define __NR_rt_sigreturn		351
+#define __NR_rt_sigaction		352
+#define __NR_rt_sigprocmask		353
+#define __NR_rt_sigpending		354
+#define __NR_rt_sigtimedwait		355
+#define __NR_rt_sigqueueinfo		356
+#define __NR_rt_sigsuspend		357
+#define __NR_select			358
+#define __NR_gettimeofday		359
+#define __NR_settimeofday		360
+#define __NR_getitimer			361
+#define __NR_setitimer			362
+#define __NR_utimes			363
+#define __NR_getrusage			364
+#define __NR_wait4			365
+#define __NR_adjtimex			366
+#define __NR_getcwd			367
+#define __NR_capget			368
+#define __NR_capset			369
+#define __NR_sendfile			370
+#define __NR_setresgid			371
+#define __NR_getresgid			372
+#define __NR_dipc			373
+#define __NR_pivot_root			374
+#define __NR_mincore			375
+#define __NR_pciconfig_iobase		376
+#define __NR_getdents64			377
+#define __NR_gettid			378
+#define __NR_readahead			379
+/* 380 is unused */
+#define __NR_tkill			381
+#define __NR_setxattr			382
+#define __NR_lsetxattr			383
+#define __NR_fsetxattr			384
+#define __NR_getxattr			385
+#define __NR_lgetxattr			386
+#define __NR_fgetxattr			387
+#define __NR_listxattr			388
+#define __NR_llistxattr			389
+#define __NR_flistxattr			390
+#define __NR_removexattr		391
+#define __NR_lremovexattr		392
+#define __NR_fremovexattr		393
+#define __NR_futex			394
+#define __NR_sched_setaffinity		395     
+#define __NR_sched_getaffinity		396
+#define __NR_tuxcall			397
+#define __NR_io_setup			398
+#define __NR_io_destroy			399
+#define __NR_io_getevents		400
+#define __NR_io_submit			401
+#define __NR_io_cancel			402
+#define __NR_exit_group			405
+#define __NR_lookup_dcookie		406
+#define __NR_epoll_create		407
+#define __NR_epoll_ctl			408
+#define __NR_epoll_wait			409
+/* Feb 2007: These three sys_epoll defines shouldn't be here but culling
+ * them would break userspace apps ... we'll kill them off in 2010 :) */
+#define __NR_sys_epoll_create		__NR_epoll_create
+#define __NR_sys_epoll_ctl		__NR_epoll_ctl
+#define __NR_sys_epoll_wait		__NR_epoll_wait
+#define __NR_remap_file_pages		410
+#define __NR_set_tid_address		411
+#define __NR_restart_syscall		412
+#define __NR_fadvise64			413
+#define __NR_timer_create		414
+#define __NR_timer_settime		415
+#define __NR_timer_gettime		416
+#define __NR_timer_getoverrun		417
+#define __NR_timer_delete		418
+#define __NR_clock_settime		419
+#define __NR_clock_gettime		420
+#define __NR_clock_getres		421
+#define __NR_clock_nanosleep		422
+#define __NR_semtimedop			423
+#define __NR_tgkill			424
+#define __NR_stat64			425
+#define __NR_lstat64			426
+#define __NR_fstat64			427
+#define __NR_vserver			428
+#define __NR_mbind			429
+#define __NR_get_mempolicy		430
+#define __NR_set_mempolicy		431
+#define __NR_mq_open			432
+#define __NR_mq_unlink			433
+#define __NR_mq_timedsend		434
+#define __NR_mq_timedreceive		435
+#define __NR_mq_notify			436
+#define __NR_mq_getsetattr		437
+#define __NR_waitid			438
+#define __NR_add_key			439
+#define __NR_request_key		440
+#define __NR_keyctl			441
+#define __NR_ioprio_set			442
+#define __NR_ioprio_get			443
+#define __NR_inotify_init		444
+#define __NR_inotify_add_watch		445
+#define __NR_inotify_rm_watch		446
+#define __NR_fdatasync			447
+#define __NR_kexec_load			448
+#define __NR_migrate_pages		449
+#define __NR_openat			450
+#define __NR_mkdirat			451
+#define __NR_mknodat			452
+#define __NR_fchownat			453
+#define __NR_futimesat			454
+#define __NR_fstatat64			455
+#define __NR_unlinkat			456
+#define __NR_renameat			457
+#define __NR_linkat			458
+#define __NR_symlinkat			459
+#define __NR_readlinkat			460
+#define __NR_fchmodat			461
+#define __NR_faccessat			462
+#define __NR_pselect6			463
+#define __NR_ppoll			464
+#define __NR_unshare			465
+#define __NR_set_robust_list		466
+#define __NR_get_robust_list		467
+#define __NR_splice			468
+#define __NR_sync_file_range		469
+#define __NR_tee			470
+#define __NR_vmsplice			471
+#define __NR_move_pages			472
+#define __NR_getcpu			473
+#define __NR_epoll_pwait		474
+#define __NR_utimensat			475
+#define __NR_signalfd			476
+#define __NR_timerfd			477
+#define __NR_eventfd			478
+#define __NR_recvmmsg			479
+#define __NR_fallocate			480
+#define __NR_timerfd_create		481
+#define __NR_timerfd_settime		482
+#define __NR_timerfd_gettime		483
+#define __NR_signalfd4			484
+#define __NR_eventfd2			485
+#define __NR_epoll_create1		486
+#define __NR_dup3			487
+#define __NR_pipe2			488
+#define __NR_inotify_init1		489
+#define __NR_preadv			490
+#define __NR_pwritev			491
+#define __NR_rt_tgsigqueueinfo		492
+#define __NR_perf_event_open		493
+#define __NR_fanotify_init		494
+#define __NR_fanotify_mark		495
+#define __NR_prlimit64			496
+#define __NR_name_to_handle_at		497
+#define __NR_open_by_handle_at		498
+#define __NR_clock_adjtime		499
+#define __NR_syncfs			500
+#define __NR_setns			501
+#define __NR_accept4			502
+#define __NR_sendmmsg			503
+#define __NR_process_vm_readv		504
+#define __NR_process_vm_writev		505
+
+#endif /* _UAPI_ALPHA_UNISTD_H */

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 336393c..02d02c0 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c

@@ -122,12 +122,6 @@
 	return sigsuspend(&blocked);
 }
 
-asmlinkage int
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
-{
-	return do_sigaltstack(uss, uoss, rdusp());
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
@@ -418,9 +412,7 @@
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(set->sig[0], &frame->uc.uc_osf_sigmask);
-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(oldsp), &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= __save_altstack(&frame->uc.uc_stack, oldsp);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, 
 				set->sig[0], oldsp);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8c83d98..f95ba14 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig

@@ -12,8 +12,6 @@
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 49ca86e..fe4d9c3 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S

@@ -44,7 +44,7 @@
 
 #else
 
-#include <mach/debug-macro.S>
+#include CONFIG_DEBUG_LL_INCLUDE
 
 		.macro	writeb,	ch, rb
 		senduart \ch, \rb

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 0f44174..e44da40 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile

@@ -42,11 +42,10 @@
 dtb-$(CONFIG_ARCH_EXYNOS) += exynos4210-origen.dtb \
 	exynos4210-smdkv310.dtb \
 	exynos4210-trats.dtb \
-	exynos5250-smdk5250.dtb \
-	exynos5440-ssdk5440.dtb \
 	exynos4412-smdk4412.dtb \
 	exynos5250-smdk5250.dtb \
-	exynos5250-snow.dtb
+	exynos5250-snow.dtb \
+	exynos5440-ssdk5440.dtb
 dtb-$(CONFIG_ARCH_HIGHBANK) += highbank.dtb \
 	ecx-2000.dtb
 dtb-$(CONFIG_ARCH_INTEGRATOR) += integratorap.dtb \
@@ -107,6 +106,7 @@
 	omap3-evm.dtb \
 	omap3-tobi.dtb \
 	omap4-panda.dtb \
+	omap4-panda-a4.dtb \
 	omap4-panda-es.dtb \
 	omap4-var-som.dtb \
 	omap4-sdp.dtb \
@@ -131,8 +131,8 @@
 	spear320-evb.dtb \
 	spear320-hmi.dtb
 dtb-$(CONFIG_ARCH_SPEAR6XX)+= spear600-evb.dtb
-dtb-$(CONFIG_ARCH_SUNXI) += sun4i-cubieboard.dtb \
-	sun5i-olinuxino.dtb
+dtb-$(CONFIG_ARCH_SUNXI) += sun4i-a10-cubieboard.dtb \
+	sun5i-a13-olinuxino.dtb
 dtb-$(CONFIG_ARCH_TEGRA) += tegra20-harmony.dtb \
 	tegra20-medcom-wide.dtb \
 	tegra20-paz00.dtb \

diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index 8e6251f..32ec62c 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi

@@ -368,14 +368,14 @@
 				compatible = "atmel,at91rm9200-ssc";
 				reg = <0xfff98000 0x4000>;
 				interrupts = <16 4 5>;
-				status = "disable";
+				status = "disabled";
 			};
 
 			ssc1: ssc@fff9c000 {
 				compatible = "atmel,at91rm9200-ssc";
 				reg = <0xfff9c000 0x4000>;
 				interrupts = <17 4 5>;
-				status = "disable";
+				status = "disabled";
 			};
 
 			macb0: ethernet@fffbc000 {

diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
index fa1ae0c..231858f 100644
--- a/arch/arm/boot/dts/at91sam9g45.dtsi
+++ b/arch/arm/boot/dts/at91sam9g45.dtsi

@@ -425,14 +425,14 @@
 				compatible = "atmel,at91sam9g45-ssc";
 				reg = <0xfff9c000 0x4000>;
 				interrupts = <16 4 5>;
-				status = "disable";
+				status = "disabled";
 			};
 
 			ssc1: ssc@fffa0000 {
 				compatible = "atmel,at91sam9g45-ssc";
 				reg = <0xfffa0000 0x4000>;
 				interrupts = <17 4 5>;
-				status = "disable";
+				status = "disabled";
 			};
 
 			adc0: adc@fffb0000 {

diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 617ede5..40ac3a4 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi

@@ -92,7 +92,7 @@
 				compatible = "atmel,at91sam9g45-ssc";
 				reg = <0xf0010000 0x4000>;
 				interrupts = <28 4 5>;
-				status = "disable";
+				status = "disabled";
 			};
 
 			tcb0: timer@f8008000 {

diff --git a/arch/arm/boot/dts/imx27-3ds.dts b/arch/arm/boot/dts/imx27-3ds.dts
index b01c0d7..fa04c7b 100644
--- a/arch/arm/boot/dts/imx27-3ds.dts
+++ b/arch/arm/boot/dts/imx27-3ds.dts

@@ -21,17 +21,17 @@
 	};
 
 	soc {
-		aipi@10000000 { /* aipi */
-
+		aipi@10000000 { /* aipi1 */
 			uart1: serial@1000a000 {
 				fsl,uart-has-rtscts;
 				status = "okay";
 			};
+		};
 
-			fec@1002b000 {
+		aipi@10020000 { /* aipi2 */
+			ethernet@1002b000 {
 				status = "okay";
 			};
 		};
 	};
-
 };

diff --git a/arch/arm/boot/dts/imx27-phytec-phycore.dts b/arch/arm/boot/dts/imx27-phytec-phycore.dts
index af50469..53b0ec0 100644
--- a/arch/arm/boot/dts/imx27-phytec-phycore.dts
+++ b/arch/arm/boot/dts/imx27-phytec-phycore.dts

@@ -21,8 +21,7 @@
 	};
 
 	soc {
-		aipi@10000000 { /* aipi */
-
+		aipi@10000000 { /* aipi1 */
 			serial@1000a000 {
 				fsl,uart-has-rtscts;
 				status = "okay";
@@ -38,10 +37,6 @@
 				status = "okay";
 			};
 
-			ethernet@1002b000 {
-				status = "okay";
-			};
-
 			i2c@1001d000 {
 				clock-frequency = <400000>;
 				status = "okay";
@@ -60,6 +55,12 @@
 				};
 			};
 		};
+
+		aipi@10020000 { /* aipi2 */
+			ethernet@1002b000 {
+				status = "okay";
+			};
+		};
 	};
 
 	nor_flash@c0000000 {

diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index b8d3905..5a82cb5 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi

@@ -55,7 +55,7 @@
 			compatible = "fsl,aipi-bus", "simple-bus";
 			#address-cells = <1>;
 			#size-cells = <1>;
-			reg = <0x10000000 0x10000000>;
+			reg = <0x10000000 0x20000>;
 			ranges;
 
 			wdog: wdog@10002000 {
@@ -211,6 +211,15 @@
 				status = "disabled";
 			};
 
+		};
+
+		aipi@10020000 { /* AIPI2 */
+			compatible = "fsl,aipi-bus", "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x10020000 0x20000>;
+			ranges;
+
 			fec: ethernet@1002b000 {
 				compatible = "fsl,imx27-fec";
 				reg = <0x1002b000 0x4000>;

diff --git a/arch/arm/boot/dts/imx28-cfa10049.dts b/arch/arm/boot/dts/imx28-cfa10049.dts
index b222614..bdc80a4 100644
--- a/arch/arm/boot/dts/imx28-cfa10049.dts
+++ b/arch/arm/boot/dts/imx28-cfa10049.dts

@@ -92,6 +92,30 @@
 				status = "okay";
 			};
 
+			i2cmux {
+				compatible = "i2c-mux-gpio";
+				#address-cells = <1>;
+				#size-cells = <0>;
+				mux-gpios = <&gpio1 22 0 &gpio1 23 0>;
+				i2c-parent = <&i2c1>;
+
+				i2c@0 {
+					reg = <0>;
+				};
+
+				i2c@1 {
+					reg = <1>;
+				};
+
+				i2c@2 {
+					reg = <2>;
+				};
+
+				i2c@3 {
+					reg = <3>;
+				};
+			};
+
 			usbphy1: usbphy@8007e000 {
 				status = "okay";
 			};

diff --git a/arch/arm/boot/dts/omap2420-h4.dts b/arch/arm/boot/dts/omap2420-h4.dts
index 77b84e1..9b0d077 100644
--- a/arch/arm/boot/dts/omap2420-h4.dts
+++ b/arch/arm/boot/dts/omap2420-h4.dts

@@ -15,6 +15,6 @@
 
 	memory {
 		device_type = "memory";
-		reg = <0x80000000 0x84000000>; /* 64 MB */
+		reg = <0x80000000 0x4000000>; /* 64 MB */
 	};
 };

diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 009096d..b4ca60f 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi

@@ -73,7 +73,7 @@
 				400000
 				500000
 				600000 >;
-		status = "disable";
+		status = "disabled";
 	};
 
 	ahb {
@@ -118,15 +118,15 @@
 			compatible = "st,spear600-fsmc-nand";
 			#address-cells = <1>;
 			#size-cells = <1>;
-			reg = <0xb0000000 0x1000	/* FSMC Register */
-			       0xb0800000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
+			reg = <0xb0000000 0x1000	/* FSMC Register*/
+			       0xb0800000 0x0010	/* NAND Base DATA */
+			       0xb0820000 0x0010	/* NAND Base ADDR */
+			       0xb0810000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			interrupts = <0 20 0x4
 				      0 21 0x4
 				      0 22 0x4
 				      0 23 0x4>;
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
 			st,mode = <2>;
 			status = "disabled";
 		};
@@ -144,7 +144,7 @@
 			compatible = "st,pcm-audio";
 			#address-cells = <0>;
 			#size-cells = <0>;
-			status = "disable";
+			status = "disabled";
 		};
 
 		smi: flash@ea000000 {

diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi
index 090adc6..f79b3df 100644
--- a/arch/arm/boot/dts/spear300.dtsi
+++ b/arch/arm/boot/dts/spear300.dtsi

@@ -38,10 +38,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x94000000 0x1000	/* FSMC Register */
-			       0x80000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0x80000000 0x0010	/* NAND Base DATA */
+			       0x80020000 0x0010	/* NAND Base ADDR */
+			       0x80010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 

diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi
index e814e5e9..ab45b8c 100644
--- a/arch/arm/boot/dts/spear310.dtsi
+++ b/arch/arm/boot/dts/spear310.dtsi

@@ -33,10 +33,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x44000000 0x1000	/* FSMC Register */
-			       0x40000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x10000>;
-			st,cle-off = <0x20000>;
+			       0x40000000 0x0010	/* NAND Base DATA */
+			       0x40020000 0x0010	/* NAND Base ADDR */
+			       0x40010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 

diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi
index c056a84..caa5520 100644
--- a/arch/arm/boot/dts/spear320.dtsi
+++ b/arch/arm/boot/dts/spear320.dtsi

@@ -40,10 +40,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x4c000000 0x1000	/* FSMC Register */
-			       0x50000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0x50000000 0x0010	/* NAND Base DATA */
+			       0x50020000 0x0010	/* NAND Base ADDR */
+			       0x50010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 

diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi
index e051dde..19f99dc 100644
--- a/arch/arm/boot/dts/spear600.dtsi
+++ b/arch/arm/boot/dts/spear600.dtsi

@@ -76,10 +76,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0xd1800000 0x1000	/* FSMC Register */
-			       0xd2000000 0x4000>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0xd2000000 0x0010	/* NAND Base DATA */
+			       0xd2020000 0x0010	/* NAND Base ADDR */
+			       0xd2010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 

diff --git a/arch/arm/boot/dts/sun4i-cubieboard.dts b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts
similarity index 87%
rename from arch/arm/boot/dts/sun4i-cubieboard.dts
rename to arch/arm/boot/dts/sun4i-a10-cubieboard.dts
index f4ca126..5cab825 100644
--- a/arch/arm/boot/dts/sun4i-cubieboard.dts
+++ b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts

@@ -11,11 +11,11 @@
  */
 
 /dts-v1/;
-/include/ "sun4i.dtsi"
+/include/ "sun4i-a10.dtsi"
 
 / {
 	model = "Cubietech Cubieboard";
-	compatible = "cubietech,cubieboard", "allwinner,sun4i";
+	compatible = "cubietech,a10-cubieboard", "allwinner,sun4i-a10";
 
 	aliases {
 		serial0 = &uart0;

diff --git a/arch/arm/boot/dts/sun4i.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
similarity index 100%
rename from arch/arm/boot/dts/sun4i.dtsi
rename to arch/arm/boot/dts/sun4i-a10.dtsi


diff --git a/arch/arm/boot/dts/sun5i-olinuxino.dts b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts
similarity index 86%
rename from arch/arm/boot/dts/sun5i-olinuxino.dts
rename to arch/arm/boot/dts/sun5i-a13-olinuxino.dts
index d6ff889..498a091 100644
--- a/arch/arm/boot/dts/sun5i-olinuxino.dts
+++ b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts

@@ -12,11 +12,11 @@
  */
 
 /dts-v1/;
-/include/ "sun5i.dtsi"
+/include/ "sun5i-a13.dtsi"
 
 / {
 	model = "Olimex A13-Olinuxino";
-	compatible = "olimex,a13-olinuxino", "allwinner,sun5i";
+	compatible = "olimex,a13-olinuxino", "allwinner,sun5i-a13";
 
 	chosen {
 		bootargs = "earlyprintk console=ttyS0,115200";

diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
similarity index 100%
rename from arch/arm/boot/dts/sun5i.dtsi
rename to arch/arm/boot/dts/sun5i-a13.dtsi


diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index 240b25e..86cfd29 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig

@@ -57,7 +57,7 @@
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_ECC_SMC=y
-CONFIG_MTD_NAND_NOMADIK=y
+CONFIG_MTD_NAND_FSMC=y
 CONFIG_MTD_ONENAND=y
 CONFIG_MTD_ONENAND_VERIFY_WRITE=y
 CONFIG_MTD_ONENAND_GENERIC=y

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 67d0632..5b579b9 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h

@@ -91,6 +91,7 @@
  */
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
+	debug_dma_mapping_error(dev, dma_addr);
 	return dma_addr == DMA_ERROR_CODE;
 }
 

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 7cd13cc..21a2700 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h

@@ -41,7 +41,6 @@
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_SOCKETCALL
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/arm/include/uapi/asm/signal.h b/arch/arm/include/uapi/asm/signal.h
index 921c57f..33073bd 100644
--- a/arch/arm/include/uapi/asm/signal.h
+++ b/arch/arm/include/uapi/asm/signal.h

@@ -87,13 +87,6 @@
 #define SA_NOMASK	SA_NODEFER
 #define SA_ONESHOT	SA_RESETHAND
 
-
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ac03bdb..4da7cde 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h

@@ -405,6 +405,7 @@
 #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
 					/* 378 for kcmp */
+#define __NR_finit_module		(__NR_SYSCALL_BASE+379)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to

diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 5935b6a02..a4fda4e 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S

@@ -388,6 +388,7 @@
 		CALL(sys_process_vm_readv)
 		CALL(sys_process_vm_writev)
 		CALL(sys_ni_syscall)	/* reserved for sys_kcmp */
+		CALL(sys_finit_module)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 9a89bf4..3f6cbb2 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c

@@ -733,7 +733,7 @@
 	setup_processor();
 	mdesc = setup_machine_fdt(__atags_pointer);
 	if (!mdesc)
-		mdesc = setup_machine_tags(__atags_pointer, machine_arch_type);
+		mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type);
 	machine_desc = mdesc;
 	machine_name = mdesc->name;
 

diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index df74518..ab1017b 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c

@@ -109,10 +109,12 @@
 {
 	siginfo_t info;
 
+	down_read(&current->mm->mmap_sem);
 	if (find_vma(current->mm, addr) == NULL)
 		info.si_code = SEGV_MAPERR;
 	else
 		info.si_code = SEGV_ACCERR;
+	up_read(&current->mm->mmap_sem);
 
 	info.si_signo = SIGSEGV;
 	info.si_errno = 0;

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b9f38e3..11c1785 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S

@@ -140,6 +140,8 @@
 	}
 #endif
 
+	NOTES
+
 	_etext = .;			/* End of text and rodata section */
 
 #ifndef CONFIG_XIP_KERNEL
@@ -295,8 +297,6 @@
 	}
 #endif
 
-	NOTES
-
 	BSS_SECTION(0, 0, 0)
 	_end = .;
 

diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 7211772..0299915 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c

@@ -41,6 +41,7 @@
 #include <mach/cp_intc.h>
 #include <mach/da8xx.h>
 #include <mach/mux.h>
+#include <mach/sram.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>

diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index fcdbe43..2d5502d 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c

@@ -725,7 +725,7 @@
 };
 
 static struct platform_device da8xx_rtc_device = {
-	.name           = "omap_rtc",
+	.name           = "da830-rtc",
 	.id             = -1,
 	.num_resources	= ARRAY_SIZE(da8xx_rtc_resources),
 	.resource	= da8xx_rtc_resources,
@@ -734,17 +734,6 @@
 int da8xx_register_rtc(void)
 {
 	int ret;
-	void __iomem *base;
-
-	base = ioremap(DA8XX_RTC_BASE, SZ_4K);
-	if (WARN_ON(!base))
-		return -ENOMEM;
-
-	/* Unlock the rtc's registers */
-	__raw_writel(0x83e70b13, base + 0x6c);
-	__raw_writel(0x95a4f1e0, base + 0x70);
-
-	iounmap(base);
 
 	ret = platform_device_register(&da8xx_rtc_device);
 	if (!ret)

diff --git a/arch/arm/mach-ep93xx/include/mach/uncompress.h b/arch/arm/mach-ep93xx/include/mach/uncompress.h
index 16026c2..d64274f 100644
--- a/arch/arm/mach-ep93xx/include/mach/uncompress.h
+++ b/arch/arm/mach-ep93xx/include/mach/uncompress.h

@@ -47,13 +47,9 @@
 
 static inline void putc(int c)
 {
-	int i;
-
-	for (i = 0; i < 1000; i++) {
-		/* Transmit fifo not full?  */
-		if (!(__raw_readb(PHYS_UART_FLAG) & UART_FLAG_TXFF))
-			break;
-	}
+	/* Transmit fifo not full?  */
+	while (__raw_readb(PHYS_UART_FLAG) & UART_FLAG_TXFF)
+		;
 
 	__raw_writeb(c, PHYS_UART_DATA);
 }

diff --git a/arch/arm/mach-exynos/clock-exynos4.c b/arch/arm/mach-exynos/clock-exynos4.c
index efead60..bbcb3de 100644
--- a/arch/arm/mach-exynos/clock-exynos4.c
+++ b/arch/arm/mach-exynos/clock-exynos4.c

@@ -529,7 +529,7 @@
 		.enable		= exynos4_clk_ip_fsys_ctrl,
 		.ctrlbit	= (1 << 8),
 	}, {
-		.name		= "dwmmc",
+		.name		= "biu",
 		.parent		= &exynos4_clk_aclk_133.clk,
 		.enable		= exynos4_clk_ip_fsys_ctrl,
 		.ctrlbit	= (1 << 9),
@@ -1134,7 +1134,7 @@
 		.reg_div = { .reg = EXYNOS4_CLKDIV_MFC, .shift = 0, .size = 4 },
 	}, {
 		.clk	= {
-			.name		= "sclk_dwmmc",
+			.name		= "ciu",
 			.parent		= &exynos4_clk_dout_mmc4.clk,
 			.enable		= exynos4_clksrc_mask_fsys_ctrl,
 			.ctrlbit	= (1 << 16),

diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c
index ddd4b72..d6d0dc6 100644
--- a/arch/arm/mach-exynos/common.c
+++ b/arch/arm/mach-exynos/common.c

@@ -679,7 +679,8 @@
 	 * Theses parameters should be NULL and 0 because EXYNOS4
 	 * uses GIC instead of VIC.
 	 */
-	s5p_init_irq(NULL, 0);
+	if (!of_machine_is_compatible("samsung,exynos5440"))
+		s5p_init_irq(NULL, 0);
 
 	gic_arch_extn.irq_set_wake = s3c_irq_wake;
 }

diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index dac146d..04744f9 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h

@@ -25,7 +25,7 @@
 #ifdef CONFIG_PM_GENERIC_DOMAINS
 int exynos_pm_late_initcall(void);
 #else
-static int exynos_pm_late_initcall(void) { return 0; }
+static inline int exynos_pm_late_initcall(void) { return 0; }
 #endif
 
 #ifdef CONFIG_ARCH_EXYNOS4

diff --git a/arch/arm/mach-exynos/dev-audio.c b/arch/arm/mach-exynos/dev-audio.c
index a1cb42c..9d1a609 100644
--- a/arch/arm/mach-exynos/dev-audio.c
+++ b/arch/arm/mach-exynos/dev-audio.c

@@ -23,11 +23,6 @@
 #include <mach/irqs.h>
 #include <mach/regs-audss.h>
 
-static const char *rclksrc[] = {
-	[0] = "busclk",
-	[1] = "i2sclk",
-};
-
 static int exynos4_cfg_i2s(struct platform_device *pdev)
 {
 	/* configure GPIO for i2s port */
@@ -55,7 +50,6 @@
 		.i2s = {
 			.quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI
 					 | QUIRK_NEED_RSTCLR,
-			.src_clk = rclksrc,
 			.idma_addr = EXYNOS4_AUDSS_INT_MEM,
 		},
 	},
@@ -78,17 +72,11 @@
 	},
 };
 
-static const char *rclksrc_v3[] = {
-	[0] = "sclk_i2s",
-	[1] = "no_such_clock",
-};
-
 static struct s3c_audio_pdata i2sv3_pdata = {
 	.cfg_gpio = exynos4_cfg_i2s,
 	.type = {
 		.i2s = {
 			.quirks = QUIRK_NO_MUXPSR,
-			.src_clk = rclksrc_v3,
 		},
 	},
 };

diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c
index f038c8c..e99d3d8 100644
--- a/arch/arm/mach-exynos/mach-exynos5-dt.c
+++ b/arch/arm/mach-exynos/mach-exynos5-dt.c

@@ -163,6 +163,7 @@
 
 static void __init exynos5_reserve(void)
 {
+#ifdef CONFIG_S5P_DEV_MFC
 	struct s5p_mfc_dt_meminfo mfc_mem;
 
 	/* Reserve memory for MFC only if it's available */
@@ -170,6 +171,7 @@
 	if (of_scan_flat_dt(s5p_fdt_find_mfc_mem, &mfc_mem))
 		s5p_mfc_reserve_mem(mfc_mem.roff, mfc_mem.rsize, mfc_mem.loff,
 				mfc_mem.lsize);
+#endif
 }
 
 DT_MACHINE_START(EXYNOS5_DT, "SAMSUNG EXYNOS5 (Flattened Device Tree)")

diff --git a/arch/arm/mach-exynos/mach-origen.c b/arch/arm/mach-exynos/mach-origen.c
index e6f4191..5e34b9c 100644
--- a/arch/arm/mach-exynos/mach-origen.c
+++ b/arch/arm/mach-exynos/mach-origen.c

@@ -621,7 +621,7 @@
 	PWM_LOOKUP("s3c24xx-pwm.0", 0, "pwm-backlight.0", NULL),
 };
 
-#ifdef CONFIG_DRM_EXYNOS
+#ifdef CONFIG_DRM_EXYNOS_FIMD
 static struct exynos_drm_fimd_pdata drm_fimd_pdata = {
 	.panel	= {
 		.timing	= {
@@ -793,7 +793,7 @@
 	s5p_i2c_hdmiphy_set_platdata(NULL);
 	s5p_hdmi_set_platdata(&hdmiphy_info, NULL, 0);
 
-#ifdef CONFIG_DRM_EXYNOS
+#ifdef CONFIG_DRM_EXYNOS_FIMD
 	s5p_device_fimd0.dev.platform_data = &drm_fimd_pdata;
 	exynos4_fimd0_gpio_setup_24bpp();
 #else

diff --git a/arch/arm/mach-exynos/mach-smdk4x12.c b/arch/arm/mach-exynos/mach-smdk4x12.c
index a1555a7..ae6da40 100644
--- a/arch/arm/mach-exynos/mach-smdk4x12.c
+++ b/arch/arm/mach-exynos/mach-smdk4x12.c

@@ -246,7 +246,7 @@
 	.cols		= 8,
 };
 
-#ifdef CONFIG_DRM_EXYNOS
+#ifdef CONFIG_DRM_EXYNOS_FIMD
 static struct exynos_drm_fimd_pdata drm_fimd_pdata = {
 	.panel	= {
 		.timing	= {
@@ -360,7 +360,7 @@
 
 	s3c_hsotg_set_platdata(&smdk4x12_hsotg_pdata);
 
-#ifdef CONFIG_DRM_EXYNOS
+#ifdef CONFIG_DRM_EXYNOS_FIMD
 	s5p_device_fimd0.dev.platform_data = &drm_fimd_pdata;
 	exynos4_fimd0_gpio_setup_24bpp();
 #else

diff --git a/arch/arm/mach-exynos/mach-smdkv310.c b/arch/arm/mach-exynos/mach-smdkv310.c
index b738424..35548e3 100644
--- a/arch/arm/mach-exynos/mach-smdkv310.c
+++ b/arch/arm/mach-exynos/mach-smdkv310.c

@@ -159,7 +159,7 @@
 	.dev.platform_data	= &smdkv310_lcd_lte480wv_data,
 };
 
-#ifdef CONFIG_DRM_EXYNOS
+#ifdef CONFIG_DRM_EXYNOS_FIMD
 static struct exynos_drm_fimd_pdata drm_fimd_pdata = {
 	.panel	= {
 		.timing	= {
@@ -402,7 +402,7 @@
 	samsung_bl_set(&smdkv310_bl_gpio_info, &smdkv310_bl_data);
 	pwm_add_table(smdkv310_pwm_lookup, ARRAY_SIZE(smdkv310_pwm_lookup));
 
-#ifdef CONFIG_DRM_EXYNOS
+#ifdef CONFIG_DRM_EXYNOS_FIMD
 	s5p_device_fimd0.dev.platform_data = &drm_fimd_pdata;
 	exynos4_fimd0_gpio_setup_24bpp();
 #else

diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 4ca8ff1..c5c840e 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c

@@ -198,7 +198,7 @@
 {
 	int i;
 
-	if (!soc_is_exynos5250())
+	if (!(soc_is_exynos5250() || soc_is_exynos5440()))
 		scu_enable(scu_base_addr());
 
 	/*

diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c
index e8c0473..579023f 100644
--- a/arch/arm/mach-imx/clk-imx51-imx53.c
+++ b/arch/arm/mach-imx/clk-imx51-imx53.c

@@ -319,6 +319,7 @@
 			unsigned long rate_ckih1, unsigned long rate_ckih2)
 {
 	int i;
+	u32 val;
 	struct device_node *np;
 
 	clk[pll1_sw] = imx_clk_pllv2("pll1_sw", "osc", MX51_DPLL1_BASE);
@@ -390,6 +391,21 @@
 	imx_print_silicon_rev("i.MX51", mx51_revision());
 	clk_disable_unprepare(clk[iim_gate]);
 
+	/*
+	 * Reference Manual says: Functionality of CCDR[18] and CLPCR[23] is no
+	 * longer supported. Set to one for better power saving.
+	 *
+	 * The effect of not setting these bits is that MIPI clocks can't be
+	 * enabled without the IPU clock being enabled aswell.
+	 */
+	val = readl(MXC_CCM_CCDR);
+	val |= 1 << 18;
+	writel(val, MXC_CCM_CCDR);
+
+	val = readl(MXC_CCM_CLPCR);
+	val |= 1 << 23;
+	writel(val, MXC_CCM_CLPCR);
+
 	return 0;
 }
 

diff --git a/arch/arm/plat-mxc/devices/platform-mx2-emma.c b/arch/arm/mach-imx/devices/platform-mx2-emma.c
similarity index 94%
rename from arch/arm/plat-mxc/devices/platform-mx2-emma.c
rename to arch/arm/mach-imx/devices/platform-mx2-emma.c
index 508404d..11bd01d 100644
--- a/arch/arm/plat-mxc/devices/platform-mx2-emma.c
+++ b/arch/arm/mach-imx/devices/platform-mx2-emma.c

@@ -6,8 +6,8 @@
  * the terms of the GNU General Public License version 2 as published by the
  * Free Software Foundation.
  */
-#include <mach/hardware.h>
-#include <mach/devices-common.h>
+#include "../hardware.h"
+#include "devices-common.h"
 
 #define imx_mx2_emmaprp_data_entry_single(soc)				\
 	{								\

diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 9807037..c66129b 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c

@@ -240,7 +240,7 @@
 		macaddr[4] = (val >> 8) & 0xff;
 		macaddr[5] = (val >> 0) & 0xff;
 
-		prom_update_property(np, newmac);
+		of_update_property(np, newmac);
 	}
 }
 

diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 5ccdf53..98167a4 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c

@@ -19,6 +19,7 @@
 #include <linux/gpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
+#include <linux/mtd/fsmc.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/i2c.h>
@@ -33,7 +34,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/time.h>
-#include <mach/fsmc.h>
 #include <mach/irqs.h>
 
 #include "cpu-8815.h"
@@ -42,39 +42,34 @@
 #define SRC_CR_INIT_MASK	0x00007fff
 #define SRC_CR_INIT_VAL		0x2aaa8000
 
+#define ALE_OFF 0x1000000
+#define CLE_OFF 0x800000
+
 /* These addresses span 16MB, so use three individual pages */
 static struct resource nhk8815_nand_resources[] = {
 	{
+		.name = "nand_data",
+		.start = 0x40000000,
+		.end = 0x40000000 + SZ_16K - 1,
+		.flags = IORESOURCE_MEM,
+	}, {
 		.name = "nand_addr",
-		.start = NAND_IO_ADDR,
-		.end = NAND_IO_ADDR + 0xfff,
+		.start = 0x40000000 + ALE_OFF,
+		.end = 0x40000000 +ALE_OFF + SZ_16K - 1,
 		.flags = IORESOURCE_MEM,
 	}, {
 		.name = "nand_cmd",
-		.start = NAND_IO_CMD,
-		.end = NAND_IO_CMD + 0xfff,
+		.start = 0x40000000 + CLE_OFF,
+		.end = 0x40000000 + CLE_OFF + SZ_16K - 1,
 		.flags = IORESOURCE_MEM,
 	}, {
-		.name = "nand_data",
-		.start = NAND_IO_DATA,
-		.end = NAND_IO_DATA + 0xfff,
+		.name  = "fsmc_regs",
+		.start = NOMADIK_FSMC_BASE,
+		.end   = NOMADIK_FSMC_BASE + SZ_4K - 1,
 		.flags = IORESOURCE_MEM,
-	}
+	},
 };
 
-static int nhk8815_nand_init(void)
-{
-	/* FSMC setup for nand chip select (8-bit nand in 8815NHK) */
-	writel(0x0000000E, FSMC_PCR(0));
-	writel(0x000D0A00, FSMC_PMEM(0));
-	writel(0x00100A00, FSMC_PATT(0));
-
-	/* enable access to the chip select area */
-	writel(readl(FSMC_PCR(0)) | 0x04, FSMC_PCR(0));
-
-	return 0;
-}
-
 /*
  * These partitions are the same as those used in the 2.6.20 release
  * shipped by the vendor; the first two partitions are mandated
@@ -108,20 +103,28 @@
 	}
 };
 
-static struct nomadik_nand_platform_data nhk8815_nand_data = {
-	.parts		= nhk8815_partitions,
-	.nparts		= ARRAY_SIZE(nhk8815_partitions),
-	.options	= NAND_COPYBACK | NAND_CACHEPRG | NAND_NO_PADDING,
-	.init		= nhk8815_nand_init,
+static struct fsmc_nand_timings nhk8815_nand_timings = {
+	.thiz	= 0,
+	.thold	= 0x10,
+	.twait	= 0x0A,
+	.tset	= 0,
+};
+
+static struct fsmc_nand_platform_data nhk8815_nand_platform_data = {
+	.nand_timings = &nhk8815_nand_timings,
+	.partitions = nhk8815_partitions,
+	.nr_partitions = ARRAY_SIZE(nhk8815_partitions),
+	.width = FSMC_NAND_BW8,
 };
 
 static struct platform_device nhk8815_nand_device = {
-	.name		= "nomadik_nand",
-	.dev		= {
-		.platform_data = &nhk8815_nand_data,
+	.name = "fsmc-nand",
+	.id = -1,
+	.resource = nhk8815_nand_resources,
+	.num_resources = ARRAY_SIZE(nhk8815_nand_resources),
+	.dev = {
+		.platform_data = &nhk8815_nand_platform_data,
 	},
-	.resource	= nhk8815_nand_resources,
-	.num_resources	= ARRAY_SIZE(nhk8815_nand_resources),
 };
 
 /* These are the partitions for the OneNand device, different from above */
@@ -176,6 +179,10 @@
 	.num_resources	= ARRAY_SIZE(nhk8815_onenand_resource),
 };
 
+/* bus control reg. and bus timing reg. for CS0..CS3 */
+#define FSMC_BCR(x)	(NOMADIK_FSMC_VA + (x << 3))
+#define FSMC_BTR(x)	(NOMADIK_FSMC_VA + (x << 3) + 0x04)
+
 static void __init nhk8815_onenand_init(void)
 {
 #ifdef CONFIG_MTD_ONENAND

diff --git a/arch/arm/mach-nomadik/include/mach/fsmc.h b/arch/arm/mach-nomadik/include/mach/fsmc.h
deleted file mode 100644
index 8c2c051..0000000
--- a/arch/arm/mach-nomadik/include/mach/fsmc.h
+++ /dev/null

@@ -1,29 +0,0 @@
-
-/* Definitions for the Nomadik FSMC "Flexible Static Memory controller" */
-
-#ifndef __ASM_ARCH_FSMC_H
-#define __ASM_ARCH_FSMC_H
-
-#include <mach/hardware.h>
-/*
- * Register list
- */
-
-/* bus control reg. and bus timing reg. for CS0..CS3 */
-#define FSMC_BCR(x)     (NOMADIK_FSMC_VA + (x << 3))
-#define FSMC_BTR(x)     (NOMADIK_FSMC_VA + (x << 3) + 0x04)
-
-/* PC-card and NAND:
- * PCR = control register
- * PMEM = memory timing
- * PATT = attribute timing
- * PIO = I/O timing
- * PECCR = ECC result
- */
-#define FSMC_PCR(x)     (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x00)
-#define FSMC_PMEM(x)    (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x08)
-#define FSMC_PATT(x)    (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x0c)
-#define FSMC_PIO(x)     (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x10)
-#define FSMC_PECCR(x)   (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x14)
-
-#endif /* __ASM_ARCH_FSMC_H */

diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 3e8ead6..24d2f2d 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c

@@ -112,17 +112,6 @@
 	omapfb_set_lcd_config(&nokia770_lcd_config);
 }
 
-static void __init ads7846_dev_init(void)
-{
-	if (gpio_request(ADS7846_PENDOWN_GPIO, "ADS7846 pendown") < 0)
-		printk(KERN_ERR "can't get ads7846 pen down GPIO\n");
-}
-
-static int ads7846_get_pendown_state(void)
-{
-	return !gpio_get_value(ADS7846_PENDOWN_GPIO);
-}
-
 static struct ads7846_platform_data nokia770_ads7846_platform_data __initdata = {
 	.x_max		= 0x0fff,
 	.y_max		= 0x0fff,
@@ -131,7 +120,7 @@
 	.debounce_max	= 10,
 	.debounce_tol	= 3,
 	.debounce_rep	= 1,
-	.get_pendown_state	= ads7846_get_pendown_state,
+	.gpio_pendown	= ADS7846_PENDOWN_GPIO,
 };
 
 static struct spi_board_info nokia770_spi_board_info[] __initdata = {
@@ -241,7 +230,6 @@
 	omap_serial_init();
 	omap_register_i2c_bus(1, 100, NULL, 0);
 	hwa742_dev_init();
-	ads7846_dev_init();
 	mipid_dev_init();
 	omap1_usb_init(&nokia770_usb_config);
 	nokia770_mmc_init();

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index be0f62b..41b581f 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig

@@ -26,6 +26,8 @@
 
 config SOC_HAS_REALTIME_COUNTER
 	bool "Real time free running counter"
+	depends on SOC_OMAP5
+	default y
 
 config ARCH_OMAP2
 	bool "TI OMAP2"
@@ -79,7 +81,6 @@
 	select ARM_GIC
 	select CPU_V7
 	select HAVE_SMP
-	select SOC_HAS_REALTIME_COUNTER
 	select COMMON_CLK
 
 comment "OMAP Core Type"

diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 7b20154..bb73afc 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c

@@ -157,6 +157,7 @@
 
 static struct tfp410_platform_data dvi_panel = {
 	.power_down_gpio	= -1,
+	.i2c_bus_num		= -1,
 };
 
 static struct omap_dss_device sdp3430_dvi_device = {

diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c
index 4be58fd..f81a303 100644
--- a/arch/arm/mach-omap2/board-am3517evm.c
+++ b/arch/arm/mach-omap2/board-am3517evm.c

@@ -208,6 +208,7 @@
 
 static struct tfp410_platform_data dvi_panel = {
 	.power_down_gpio	= -1,
+	.i2c_bus_num		= -1,
 };
 
 static struct omap_dss_device am3517_evm_dvi_device = {

diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index c8e37dc..b3102c2 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c

@@ -241,6 +241,7 @@
 
 static struct tfp410_platform_data dvi_panel = {
 	.power_down_gpio	= CM_T35_DVI_EN_GPIO,
+	.i2c_bus_num		= -1,
 };
 
 static struct omap_dss_device cm_t35_dvi_device = {

diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index 7667eb7..12865af 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c

@@ -141,6 +141,7 @@
 
 static struct tfp410_platform_data dvi_panel = {
 	.power_down_gpio	= -1,
+	.i2c_bus_num		= 1,
 };
 
 static struct omap_dss_device devkit8000_dvi_device = {

diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index 9a3878e..3be1311 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c

@@ -27,14 +27,12 @@
 #include <linux/io.h>
 #include <linux/input/matrix_keypad.h>
 #include <linux/mfd/menelaus.h>
+#include <linux/omap-dma.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
-#include <linux/omap-dma.h>
-#include <plat/debug-devices.h>
-
 #include <video/omapdss.h>
 #include <video/omap-panel-generic-dpi.h>
 
@@ -42,11 +40,9 @@
 #include "mux.h"
 #include "control.h"
 #include "gpmc.h"
+#include "gpmc-smc91x.h"
 
 #define H4_FLASH_CS	0
-#define H4_SMC91X_CS	1
-
-#define H4_ETHR_GPIO_IRQ		92
 
 #if defined(CONFIG_KEYBOARD_MATRIX) || defined(CONFIG_KEYBOARD_MATRIX_MODULE)
 static const uint32_t board_matrix_keys[] = {
@@ -250,71 +246,31 @@
 		return 0;
 }
 
-static inline void __init h4_init_debug(void)
+#if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91x_MODULE)
+
+static struct omap_smc91x_platform_data board_smc91x_data = {
+	.cs		= 1,
+	.gpio_irq	= 92,
+	.flags		= GPMC_TIMINGS_SMC91C96 | IORESOURCE_IRQ_LOWLEVEL,
+};
+
+static void __init board_smc91x_init(void)
 {
-	int eth_cs;
-	unsigned long cs_mem_base;
-	unsigned int muxed, rate;
-	struct clk *gpmc_fck;
-
-	eth_cs	= H4_SMC91X_CS;
-
-	gpmc_fck = clk_get(NULL, "gpmc_fck");	/* Always on ENABLE_ON_INIT */
-	if (IS_ERR(gpmc_fck)) {
-		WARN_ON(1);
-		return;
-	}
-
-	clk_prepare_enable(gpmc_fck);
-	rate = clk_get_rate(gpmc_fck);
-	clk_disable_unprepare(gpmc_fck);
-	clk_put(gpmc_fck);
-
 	if (is_gpmc_muxed())
-		muxed = 0x200;
-	else
-		muxed = 0;
+		board_smc91x_data.flags |= GPMC_MUX_ADD_DATA;
 
-	/* Make sure CS1 timings are correct */
-	gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG1,
-			  0x00011000 | muxed);
-
-	if (rate >= 160000000) {
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f01);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080803);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1c0b1c0a);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x041f1F1F);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000004C4);
-	} else if (rate >= 130000000) {
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f00);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080802);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1C091C09);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x041f1F1F);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000004C4);
-	} else {/* rate = 100000000 */
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f00);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080802);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1C091C09);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x031A1F1F);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000003C2);
-	}
-
-	if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) {
-		printk(KERN_ERR "Failed to request GPMC mem for smc91x\n");
-		goto out;
-	}
-
-	udelay(100);
-
-	omap_mux_init_gpio(92, 0);
-	if (debug_card_init(cs_mem_base, H4_ETHR_GPIO_IRQ) < 0)
-		gpmc_cs_free(eth_cs);
-
-out:
-	clk_disable_unprepare(gpmc_fck);
-	clk_put(gpmc_fck);
+	omap_mux_init_gpio(board_smc91x_data.gpio_irq, OMAP_PIN_INPUT);
+	gpmc_smc91x_init(&board_smc91x_data);
 }
 
+#else
+
+static inline void board_smc91x_init(void)
+{
+}
+
+#endif
+
 static void __init h4_init_flash(void)
 {
 	unsigned long base;
@@ -371,6 +327,7 @@
 	omap_serial_init();
 	omap_sdrc_init(NULL, NULL);
 	h4_init_flash();
+	board_smc91x_init();
 
 	omap_display_init(&h4_dss_data);
 }

diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index a4e167c..0abb30f 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c

@@ -16,10 +16,12 @@
 #include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/stddef.h>
 #include <linux/i2c.h>
 #include <linux/spi/spi.h>
 #include <linux/usb/musb.h>
+#include <linux/platform_data/i2c-cbus-gpio.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/platform_data/mtd-onenand-omap2.h>
 #include <linux/mfd/menelaus.h>
@@ -40,6 +42,45 @@
 #define TUSB6010_GPIO_ENABLE	0
 #define TUSB6010_DMACHAN	0x3f
 
+#if defined(CONFIG_I2C_CBUS_GPIO) || defined(CONFIG_I2C_CBUS_GPIO_MODULE)
+static struct i2c_cbus_platform_data n8x0_cbus_data = {
+	.clk_gpio = 66,
+	.dat_gpio = 65,
+	.sel_gpio = 64,
+};
+
+static struct platform_device n8x0_cbus_device = {
+	.name	= "i2c-cbus-gpio",
+	.id	= 3,
+	.dev	= {
+		.platform_data = &n8x0_cbus_data,
+	},
+};
+
+static struct i2c_board_info n8x0_i2c_board_info_3[] __initdata = {
+	{
+		I2C_BOARD_INFO("retu-mfd", 0x01),
+	},
+};
+
+static void __init n8x0_cbus_init(void)
+{
+	const int retu_irq_gpio = 108;
+
+	if (gpio_request_one(retu_irq_gpio, GPIOF_IN, "Retu IRQ"))
+		return;
+	irq_set_irq_type(gpio_to_irq(retu_irq_gpio), IRQ_TYPE_EDGE_RISING);
+	n8x0_i2c_board_info_3[0].irq = gpio_to_irq(retu_irq_gpio);
+	i2c_register_board_info(3, n8x0_i2c_board_info_3,
+				ARRAY_SIZE(n8x0_i2c_board_info_3));
+	platform_device_register(&n8x0_cbus_device);
+}
+#else /* CONFIG_I2C_CBUS_GPIO */
+static void __init n8x0_cbus_init(void)
+{
+}
+#endif /* CONFIG_I2C_CBUS_GPIO */
+
 #if defined(CONFIG_USB_MUSB_TUSB6010) || defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 /*
  * Enable or disable power to TUSB6010. When enabling, turn on 3.3 V and
@@ -678,6 +719,7 @@
 	gpmc_onenand_init(board_onenand_data);
 	n8x0_mmc_init();
 	n8x0_usb_init();
+	n8x0_cbus_init();
 }
 
 MACHINE_START(NOKIA_N800, "Nokia N800")

diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 54647d6..3985f35 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c

@@ -240,6 +240,7 @@
 
 static struct tfp410_platform_data dvi_panel = {
 	.power_down_gpio	= OMAP3EVM_DVI_PANEL_EN_GPIO,
+	.i2c_bus_num		= -1,
 };
 
 static struct omap_dss_device omap3_evm_dvi_device = {

diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c
index d8638b3..53a6cbc 100644
--- a/arch/arm/mach-omap2/board-omap3stalker.c
+++ b/arch/arm/mach-omap2/board-omap3stalker.c

@@ -118,6 +118,7 @@
 
 static struct tfp410_platform_data dvi_panel = {
 	.power_down_gpio	= DSS_ENABLE_GPIO,
+	.i2c_bus_num		= -1,
 };
 
 static struct omap_dss_device omap3_stalker_dvi_device = {

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 60529e0..cf07e28 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c

@@ -256,6 +256,11 @@
 	},
 };
 
+static struct platform_device rx51_battery_device = {
+	.name	= "rx51-battery",
+	.id	= -1,
+};
+
 static void rx51_charger_set_power(bool on)
 {
 	gpio_set_value(RX51_USB_TRANSCEIVER_RST_GPIO, on);
@@ -277,6 +282,7 @@
 	WARN_ON(gpio_request_one(RX51_USB_TRANSCEIVER_RST_GPIO,
 		GPIOF_OUT_INIT_HIGH, "isp1704_reset"));
 
+	platform_device_register(&rx51_battery_device);
 	platform_device_register(&rx51_charger_device);
 }
 

diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c
index aa56c3e..5789a5e 100644
--- a/arch/arm/mach-omap2/cclock44xx_data.c
+++ b/arch/arm/mach-omap2/cclock44xx_data.c

@@ -40,6 +40,14 @@
 #define OMAP4430_MODULEMODE_HWCTRL_SHIFT		0
 #define OMAP4430_MODULEMODE_SWCTRL_SHIFT		1
 
+/*
+ * OMAP4 ABE DPLL default frequency. In OMAP4460 TRM version V, section
+ * "3.6.3.2.3 CM1_ABE Clock Generator" states that the "DPLL_ABE_X2_CLK
+ * must be set to 196.608 MHz" and hence, the DPLL locked frequency is
+ * half of this value.
+ */
+#define OMAP4_DPLL_ABE_DEFFREQ				98304000
+
 /* Root clocks */
 
 DEFINE_CLK_FIXED_RATE(extalt_clkin_ck, CLK_IS_ROOT, 59000000, 0x0);
@@ -124,6 +132,8 @@
 	.enable_mask	= OMAP4430_DPLL_EN_MASK,
 	.autoidle_mask	= OMAP4430_AUTO_DPLL_MODE_MASK,
 	.idlest_mask	= OMAP4430_ST_DPLL_CLK_MASK,
+	.m4xen_mask	= OMAP4430_DPLL_REGM4XEN_MASK,
+	.lpmode_mask	= OMAP4430_DPLL_LPMODE_EN_MASK,
 	.max_multiplier	= 2047,
 	.max_divider	= 128,
 	.min_divider	= 1,
@@ -233,7 +243,7 @@
 
 
 static const char *dpll_core_ck_parents[] = {
-	"sys_clkin_ck",
+	"sys_clkin_ck", "core_hsd_byp_clk_mux_ck"
 };
 
 static struct clk dpll_core_ck;
@@ -286,9 +296,9 @@
 		   OMAP4430_CM_CLKSEL_CORE, OMAP4430_CLKSEL_CORE_SHIFT,
 		   OMAP4430_CLKSEL_CORE_WIDTH, 0x0, NULL);
 
-DEFINE_CLK_OMAP_HSDIVIDER(div_iva_hs_clk, "dpll_core_m5x2_ck",
-			  &dpll_core_m5x2_ck, 0x0, OMAP4430_CM_BYPCLK_DPLL_IVA,
-			  OMAP4430_CLKSEL_0_1_MASK);
+DEFINE_CLK_DIVIDER(div_iva_hs_clk, "dpll_core_m5x2_ck", &dpll_core_m5x2_ck,
+		   0x0, OMAP4430_CM_BYPCLK_DPLL_IVA, OMAP4430_CLKSEL_0_1_SHIFT,
+		   OMAP4430_CLKSEL_0_1_WIDTH, CLK_DIVIDER_POWER_OF_TWO, NULL);
 
 DEFINE_CLK_DIVIDER(div_mpu_hs_clk, "dpll_core_m5x2_ck", &dpll_core_m5x2_ck,
 		   0x0, OMAP4430_CM_BYPCLK_DPLL_MPU, OMAP4430_CLKSEL_0_1_SHIFT,
@@ -363,8 +373,21 @@
 	.min_divider	= 1,
 };
 
+static const char *dpll_iva_ck_parents[] = {
+	"sys_clkin_ck", "iva_hsd_byp_clk_mux_ck"
+};
+
 static struct clk dpll_iva_ck;
 
+static const struct clk_ops dpll_ck_ops = {
+	.enable		= &omap3_noncore_dpll_enable,
+	.disable	= &omap3_noncore_dpll_disable,
+	.recalc_rate	= &omap3_dpll_recalc,
+	.round_rate	= &omap2_dpll_round_rate,
+	.set_rate	= &omap3_noncore_dpll_set_rate,
+	.get_parent	= &omap2_init_dpll_parent,
+};
+
 static struct clk_hw_omap dpll_iva_ck_hw = {
 	.hw = {
 		.clk = &dpll_iva_ck,
@@ -373,7 +396,7 @@
 	.ops		= &clkhwops_omap3_dpll,
 };
 
-DEFINE_STRUCT_CLK(dpll_iva_ck, dpll_core_ck_parents, dpll_abe_ck_ops);
+DEFINE_STRUCT_CLK(dpll_iva_ck, dpll_iva_ck_parents, dpll_ck_ops);
 
 static const char *dpll_iva_x2_ck_parents[] = {
 	"dpll_iva_ck",
@@ -416,6 +439,10 @@
 	.min_divider	= 1,
 };
 
+static const char *dpll_mpu_ck_parents[] = {
+	"sys_clkin_ck", "div_mpu_hs_clk"
+};
+
 static struct clk dpll_mpu_ck;
 
 static struct clk_hw_omap dpll_mpu_ck_hw = {
@@ -426,7 +453,7 @@
 	.ops		= &clkhwops_omap3_dpll,
 };
 
-DEFINE_STRUCT_CLK(dpll_mpu_ck, dpll_core_ck_parents, dpll_abe_ck_ops);
+DEFINE_STRUCT_CLK(dpll_mpu_ck, dpll_mpu_ck_parents, dpll_ck_ops);
 
 DEFINE_CLK_FIXED_FACTOR(mpu_periphclk, "dpll_mpu_ck", &dpll_mpu_ck, 0x0, 1, 2);
 
@@ -464,6 +491,9 @@
 	.min_divider	= 1,
 };
 
+static const char *dpll_per_ck_parents[] = {
+	"sys_clkin_ck", "per_hsd_byp_clk_mux_ck"
+};
 
 static struct clk dpll_per_ck;
 
@@ -475,7 +505,7 @@
 	.ops		= &clkhwops_omap3_dpll,
 };
 
-DEFINE_STRUCT_CLK(dpll_per_ck, dpll_core_ck_parents, dpll_abe_ck_ops);
+DEFINE_STRUCT_CLK(dpll_per_ck, dpll_per_ck_parents, dpll_ck_ops);
 
 DEFINE_CLK_DIVIDER(dpll_per_m2_ck, "dpll_per_ck", &dpll_per_ck, 0x0,
 		   OMAP4430_CM_DIV_M2_DPLL_PER, OMAP4430_DPLL_CLKOUT_DIV_SHIFT,
@@ -559,6 +589,10 @@
 	.min_divider	= 1,
 };
 
+static const char *dpll_usb_ck_parents[] = {
+	"sys_clkin_ck", "usb_hs_clk_div_ck"
+};
+
 static struct clk dpll_usb_ck;
 
 static struct clk_hw_omap dpll_usb_ck_hw = {
@@ -569,7 +603,7 @@
 	.ops		= &clkhwops_omap3_dpll,
 };
 
-DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_core_ck_parents, dpll_abe_ck_ops);
+DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_usb_ck_parents, dpll_ck_ops);
 
 static const char *dpll_usb_clkdcoldo_ck_parents[] = {
 	"dpll_usb_ck",
@@ -696,9 +730,13 @@
 		   OMAP4430_CM_ABE_DSS_SYS_CLKSEL, OMAP4430_CLKSEL_0_0_SHIFT,
 		   OMAP4430_CLKSEL_0_0_WIDTH, 0x0, NULL);
 
+static const char *dbgclk_mux_ck_parents[] = {
+	"sys_clkin_ck"
+};
+
 static struct clk dbgclk_mux_ck;
 DEFINE_STRUCT_CLK_HW_OMAP(dbgclk_mux_ck, NULL);
-DEFINE_STRUCT_CLK(dbgclk_mux_ck, dpll_core_ck_parents,
+DEFINE_STRUCT_CLK(dbgclk_mux_ck, dbgclk_mux_ck_parents,
 		  dpll_usb_clkdcoldo_ck_ops);
 
 /* Leaf clocks controlled by modules */
@@ -1935,10 +1973,10 @@
 	CLK("4803e000.timer",	"timer_sys_ck",	&sys_clkin_ck,	CK_443X),
 	CLK("48086000.timer",	"timer_sys_ck",	&sys_clkin_ck,	CK_443X),
 	CLK("48088000.timer",	"timer_sys_ck",	&sys_clkin_ck,	CK_443X),
-	CLK("49038000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
-	CLK("4903a000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
-	CLK("4903c000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
-	CLK("4903e000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
+	CLK("40138000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
+	CLK("4013a000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
+	CLK("4013c000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
+	CLK("4013e000.timer",	"timer_sys_ck",	&syc_clk_div_ck,	CK_443X),
 	CLK(NULL,	"cpufreq_ck",	&dpll_mpu_ck,	CK_443X),
 };
 
@@ -1955,6 +1993,7 @@
 {
 	u32 cpu_clkflg;
 	struct omap_clk *c;
+	int rc;
 
 	if (cpu_is_omap443x()) {
 		cpu_mask = RATE_IN_4430;
@@ -1983,5 +2022,18 @@
 	omap2_clk_enable_init_clocks(enable_init_clks,
 				     ARRAY_SIZE(enable_init_clks));
 
+	/*
+	 * On OMAP4460 the ABE DPLL fails to turn on if in idle low-power
+	 * state when turning the ABE clock domain. Workaround this by
+	 * locking the ABE DPLL on boot.
+	 */
+	if (cpu_is_omap446x()) {
+		rc = clk_set_parent(&abe_dpll_refclk_mux_ck, &sys_32k_ck);
+		if (!rc)
+			rc = clk_set_rate(&dpll_abe_ck, OMAP4_DPLL_ABE_DEFFREQ);
+		if (rc)
+			pr_err("%s: failed to configure ABE DPLL!\n", __func__);
+	}
+
 	return 0;
 }

diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index 9917f79..b402048 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h

@@ -195,6 +195,10 @@
  * @enable_mask: mask of the DPLL mode bitfield in @control_reg
  * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate()
  * @last_rounded_m: cache of the last M result of omap2_dpll_round_rate()
+ * @last_rounded_m4xen: cache of the last M4X result of
+ *			omap4_dpll_regm4xen_round_rate()
+ * @last_rounded_lpmode: cache of the last lpmode result of
+ *			 omap4_dpll_lpmode_recalc()
  * @max_multiplier: maximum valid non-bypass multiplier value (actual)
  * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate()
  * @min_divider: minimum valid non-bypass divider value (actual)
@@ -205,6 +209,8 @@
  * @autoidle_mask: mask of the DPLL autoidle mode bitfield in @autoidle_reg
  * @freqsel_mask: mask of the DPLL jitter correction bitfield in @control_reg
  * @idlest_mask: mask of the DPLL idle status bitfield in @idlest_reg
+ * @lpmode_mask: mask of the DPLL low-power mode bitfield in @control_reg
+ * @m4xen_mask: mask of the DPLL M4X multiplier bitfield in @control_reg
  * @auto_recal_bit: bitshift of the driftguard enable bit in @control_reg
  * @recal_en_bit: bitshift of the PRM_IRQENABLE_* bit for recalibration IRQs
  * @recal_st_bit: bitshift of the PRM_IRQSTATUS_* bit for recalibration IRQs
@@ -233,6 +239,8 @@
 	u32			enable_mask;
 	unsigned long		last_rounded_rate;
 	u16			last_rounded_m;
+	u8			last_rounded_m4xen;
+	u8			last_rounded_lpmode;
 	u16			max_multiplier;
 	u8			last_rounded_n;
 	u8			min_divider;
@@ -245,6 +253,8 @@
 	u32			idlest_mask;
 	u32			dco_mask;
 	u32			sddiv_mask;
+	u32			lpmode_mask;
+	u32			m4xen_mask;
 	u8			auto_recal_bit;
 	u8			recal_en_bit;
 	u8			recal_st_bit;

diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c
index 3848735..7faf82d 100644
--- a/arch/arm/mach-omap2/clockdomain.c
+++ b/arch/arm/mach-omap2/clockdomain.c

@@ -998,7 +998,8 @@
 	spin_lock_irqsave(&clkdm->lock, flags);
 
 	/* corner case: disabling unused clocks */
-	if (__clk_get_enable_count(clk) == 0)
+	if ((__clk_get_enable_count(clk) == 0) &&
+	    (atomic_read(&clkdm->usecount) == 0))
 		goto ccd_exit;
 
 	if (atomic_read(&clkdm->usecount) == 0) {

diff --git a/arch/arm/mach-omap2/common.c b/arch/arm/mach-omap2/common.c
index 5c2fd48..2dabb9e 100644
--- a/arch/arm/mach-omap2/common.c
+++ b/arch/arm/mach-omap2/common.c

@@ -16,8 +16,6 @@
 #include <linux/init.h>
 #include <linux/platform_data/dsp-omap.h>
 
-#include <plat/vram.h>
-
 #include "common.h"
 #include "omap-secure.h"
 
@@ -32,7 +30,6 @@
 
 void __init omap_reserve(void)
 {
-	omap_vram_reserve_sdram_memblock();
 	omap_dsp_reserve_sdram_memblock();
 	omap_secure_ram_reserve_memblock();
 	omap_barrier_reserve_memblock();

diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index bca7a88..22590db 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c

@@ -40,6 +40,8 @@
 	u32 core_state;
 };
 
+static struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd;
+
 static struct omap3_idle_statedata omap3_idle_data[] = {
 	{
 		.mpu_state = PWRDM_POWER_ON,
@@ -71,7 +73,7 @@
 	},
 };
 
-static struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd;
+/* Private functions */
 
 static int __omap3_enter_idle(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv,
@@ -260,11 +262,11 @@
 	return ret;
 }
 
-DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev);
+static DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev);
 
-struct cpuidle_driver omap3_idle_driver = {
-	.name = 	"omap3_idle",
-	.owner = 	THIS_MODULE,
+static struct cpuidle_driver omap3_idle_driver = {
+	.name =		"omap3_idle",
+	.owner =	THIS_MODULE,
 	.states = {
 		{
 			.enter		  = omap3_enter_idle_bm,
@@ -327,6 +329,8 @@
 	.safe_state_index = 0,
 };
 
+/* Public functions */
+
 /**
  * omap3_idle_init - Init routine for OMAP3 idle
  *

diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index 288bee6..d639aef 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c

@@ -54,6 +54,8 @@
 static atomic_t abort_barrier;
 static bool cpu_done[NR_CPUS];
 
+/* Private functions */
+
 /**
  * omap4_enter_idle_coupled_[simple/coupled] - OMAP4 cpuidle entry functions
  * @dev: cpuidle device
@@ -161,9 +163,19 @@
 	return index;
 }
 
-DEFINE_PER_CPU(struct cpuidle_device, omap4_idle_dev);
+/*
+ * For each cpu, setup the broadcast timer because local timers
+ * stops for the states above C1.
+ */
+static void omap_setup_broadcast_timer(void *arg)
+{
+	int cpu = smp_processor_id();
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
+}
 
-struct cpuidle_driver omap4_idle_driver = {
+static DEFINE_PER_CPU(struct cpuidle_device, omap4_idle_dev);
+
+static struct cpuidle_driver omap4_idle_driver = {
 	.name				= "omap4_idle",
 	.owner				= THIS_MODULE,
 	.en_core_tk_irqen		= 1,
@@ -178,7 +190,7 @@
 			.desc = "MPUSS ON"
 		},
 		{
-                        /* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */
+			/* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */
 			.exit_latency = 328 + 440,
 			.target_residency = 960,
 			.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_COUPLED,
@@ -200,15 +212,7 @@
 	.safe_state_index = 0,
 };
 
-/*
- * For each cpu, setup the broadcast timer because local timers
- * stops for the states above C1.
- */
-static void omap_setup_broadcast_timer(void *arg)
-{
-	int cpu = smp_processor_id();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
-}
+/* Public functions */
 
 /**
  * omap4_idle_init - Init routine for OMAP4 idle

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 4abb8b5..5e304d0 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c

@@ -226,7 +226,7 @@
 };
 
 static struct omap_iommu_arch_data omap3_isp_iommu = {
-	.name = "isp",
+	.name = "mmu_isp",
 };
 
 int omap3_init_camera(struct isp_platform_data *pdata)

diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index fafb28c..2bb1883 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c

@@ -291,16 +291,13 @@
 
 /*
  * _omap3_noncore_dpll_program - set non-core DPLL M,N values directly
- * @clk: struct clk * of DPLL to set
- * @m: DPLL multiplier to set
- * @n: DPLL divider to set
- * @freqsel: FREQSEL value to set
+ * @clk:	struct clk * of DPLL to set
+ * @freqsel:	FREQSEL value to set
  *
- * Program the DPLL with the supplied M, N values, and wait for the DPLL to
- * lock..  Returns -EINVAL upon error, or 0 upon success.
+ * Program the DPLL with the last M, N values calculated, and wait for
+ * the DPLL to lock. Returns -EINVAL upon error, or 0 upon success.
  */
-static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 m, u8 n,
-				      u16 freqsel)
+static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 {
 	struct dpll_data *dd = clk->dpll_data;
 	u8 dco, sd_div;
@@ -323,23 +320,45 @@
 	/* Set DPLL multiplier, divider */
 	v = __raw_readl(dd->mult_div1_reg);
 	v &= ~(dd->mult_mask | dd->div1_mask);
-	v |= m << __ffs(dd->mult_mask);
-	v |= (n - 1) << __ffs(dd->div1_mask);
+	v |= dd->last_rounded_m << __ffs(dd->mult_mask);
+	v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask);
 
 	/* Configure dco and sd_div for dplls that have these fields */
 	if (dd->dco_mask) {
-		_lookup_dco(clk, &dco, m, n);
+		_lookup_dco(clk, &dco, dd->last_rounded_m, dd->last_rounded_n);
 		v &= ~(dd->dco_mask);
 		v |= dco << __ffs(dd->dco_mask);
 	}
 	if (dd->sddiv_mask) {
-		_lookup_sddiv(clk, &sd_div, m, n);
+		_lookup_sddiv(clk, &sd_div, dd->last_rounded_m,
+			      dd->last_rounded_n);
 		v &= ~(dd->sddiv_mask);
 		v |= sd_div << __ffs(dd->sddiv_mask);
 	}
 
 	__raw_writel(v, dd->mult_div1_reg);
 
+	/* Set 4X multiplier and low-power mode */
+	if (dd->m4xen_mask || dd->lpmode_mask) {
+		v = __raw_readl(dd->control_reg);
+
+		if (dd->m4xen_mask) {
+			if (dd->last_rounded_m4xen)
+				v |= dd->m4xen_mask;
+			else
+				v &= ~dd->m4xen_mask;
+		}
+
+		if (dd->lpmode_mask) {
+			if (dd->last_rounded_lpmode)
+				v |= dd->lpmode_mask;
+			else
+				v &= ~dd->lpmode_mask;
+		}
+
+		__raw_writel(v, dd->control_reg);
+	}
+
 	/* We let the clock framework set the other output dividers later */
 
 	/* REVISIT: Set ramp-up delay? */
@@ -492,8 +511,7 @@
 		pr_debug("%s: %s: set rate: locking rate to %lu.\n",
 			 __func__, __clk_get_name(hw->clk), rate);
 
-		ret = omap3_noncore_dpll_program(clk, dd->last_rounded_m,
-						dd->last_rounded_n, freqsel);
+		ret = omap3_noncore_dpll_program(clk, freqsel);
 		if (!ret)
 			new_parent = dd->clk_ref;
 	}

diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c
index d3326c4..d28b0f7 100644
--- a/arch/arm/mach-omap2/dpll44xx.c
+++ b/arch/arm/mach-omap2/dpll44xx.c

@@ -20,6 +20,15 @@
 #include "clock44xx.h"
 #include "cm-regbits-44xx.h"
 
+/*
+ * Maximum DPLL input frequency (FINT) and output frequency (FOUT) that
+ * can supported when using the DPLL low-power mode. Frequencies are
+ * defined in OMAP4430/60 Public TRM section 3.6.3.3.2 "Enable Control,
+ * Status, and Low-Power Operation Mode".
+ */
+#define OMAP4_DPLL_LP_FINT_MAX	1000000
+#define OMAP4_DPLL_LP_FOUT_MAX	100000000
+
 /* Supported only on OMAP4 */
 int omap4_dpllmx_gatectrl_read(struct clk_hw_omap *clk)
 {
@@ -82,6 +91,31 @@
 };
 
 /**
+ * omap4_dpll_lpmode_recalc - compute DPLL low-power setting
+ * @dd: pointer to the dpll data structure
+ *
+ * Calculates if low-power mode can be enabled based upon the last
+ * multiplier and divider values calculated. If low-power mode can be
+ * enabled, then the bit to enable low-power mode is stored in the
+ * last_rounded_lpmode variable. This implementation is based upon the
+ * criteria for enabling low-power mode as described in the OMAP4430/60
+ * Public TRM section 3.6.3.3.2 "Enable Control, Status, and Low-Power
+ * Operation Mode".
+ */
+static void omap4_dpll_lpmode_recalc(struct dpll_data *dd)
+{
+	long fint, fout;
+
+	fint = __clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
+	fout = fint * dd->last_rounded_m;
+
+	if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX))
+		dd->last_rounded_lpmode = 1;
+	else
+		dd->last_rounded_lpmode = 0;
+}
+
+/**
  * omap4_dpll_regm4xen_recalc - compute DPLL rate, considering REGM4XEN bit
  * @clk: struct clk * of the DPLL to compute the rate for
  *
@@ -130,7 +164,6 @@
 				    unsigned long *parent_rate)
 {
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	u32 v;
 	struct dpll_data *dd;
 	long r;
 
@@ -139,18 +172,31 @@
 
 	dd = clk->dpll_data;
 
-	/* regm4xen adds a multiplier of 4 to DPLL calculations */
-	v = __raw_readl(dd->control_reg) & OMAP4430_DPLL_REGM4XEN_MASK;
+	dd->last_rounded_m4xen = 0;
 
-	if (v)
-		target_rate = target_rate / OMAP4430_REGM4XEN_MULT;
-
+	/*
+	 * First try to compute the DPLL configuration for
+	 * target rate without using the 4X multiplier.
+	 */
 	r = omap2_dpll_round_rate(hw, target_rate, NULL);
+	if (r != ~0)
+		goto out;
+
+	/*
+	 * If we did not find a valid DPLL configuration, try again, but
+	 * this time see if using the 4X multiplier can help. Enabling the
+	 * 4X multiplier is equivalent to dividing the target rate by 4.
+	 */
+	r = omap2_dpll_round_rate(hw, target_rate / OMAP4430_REGM4XEN_MULT,
+				  NULL);
 	if (r == ~0)
 		return r;
 
-	if (v)
-		clk->dpll_data->last_rounded_rate *= OMAP4430_REGM4XEN_MULT;
+	dd->last_rounded_rate *= OMAP4430_REGM4XEN_MULT;
+	dd->last_rounded_m4xen = 1;
 
-	return clk->dpll_data->last_rounded_rate;
+out:
+	omap4_dpll_lpmode_recalc(dd);
+
+	return dd->last_rounded_rate;
 }

diff --git a/arch/arm/mach-omap2/i2c.c b/arch/arm/mach-omap2/i2c.c
index fbb9b15..b9074dd 100644
--- a/arch/arm/mach-omap2/i2c.c
+++ b/arch/arm/mach-omap2/i2c.c

@@ -22,6 +22,7 @@
 #include "soc.h"
 #include "omap_hwmod.h"
 #include "omap_device.h"
+#include "omap-pm.h"
 
 #include "prm.h"
 #include "common.h"
@@ -120,6 +121,16 @@
 	return ports;
 }
 
+/*
+ * XXX This function is a temporary compatibility wrapper - only
+ * needed until the I2C driver can be converted to call
+ * omap_pm_set_max_dev_wakeup_lat() and handle a return code.
+ */
+static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t)
+{
+	omap_pm_set_max_mpu_wakeup_lat(dev, t);
+}
+
 static const char name[] = "omap_i2c";
 
 int __init omap_i2c_add_bus(struct omap_i2c_bus_platform_data *i2c_pdata,
@@ -157,6 +168,15 @@
 	dev_attr = (struct omap_i2c_dev_attr *)oh->dev_attr;
 	pdata->flags = dev_attr->flags;
 
+	/*
+	 * When waiting for completion of a i2c transfer, we need to
+	 * set a wake up latency constraint for the MPU. This is to
+	 * ensure quick enough wakeup from idle, when transfer
+	 * completes.
+	 * Only omap3 has support for constraints
+	 */
+	if (cpu_is_omap34xx())
+		pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat;
 	pdev = omap_device_build(name, bus_id, oh, pdata,
 			sizeof(struct omap_i2c_bus_platform_data),
 			NULL, 0, 0);

diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 2612634..6a217c9 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c

@@ -135,10 +135,7 @@
 
 	old_mode = omap_mux_read(partition, gpio_mux->reg_offset);
 	mux_mode = val & ~(OMAP_MUX_NR_MODES - 1);
-	if (partition->flags & OMAP_MUX_GPIO_IN_MODE3)
-		mux_mode |= OMAP_MUX_MODE3;
-	else
-		mux_mode |= OMAP_MUX_MODE4;
+	mux_mode |= partition->gpio;
 	pr_debug("%s: Setting signal %s.gpio%i 0x%04x -> 0x%04x\n", __func__,
 		 gpio_mux->muxnames[0], gpio, old_mode, mux_mode);
 	omap_mux_write(partition, mux_mode, gpio_mux->reg_offset);
@@ -800,7 +797,7 @@
 			struct omap_mux *m = &e->mux;
 			u16 mode = omap_mux_read(partition, m->reg_offset);
 
-			if (OMAP_MODE_GPIO(mode))
+			if (OMAP_MODE_GPIO(partition, mode))
 				continue;
 
 #ifndef CONFIG_DEBUG_FS
@@ -1065,7 +1062,7 @@
 		}
 #else
 		/* Skip pins that are not muxed as GPIO by bootloader */
-		if (!OMAP_MODE_GPIO(omap_mux_read(partition,
+		if (!OMAP_MODE_GPIO(partition, omap_mux_read(partition,
 				    superset->reg_offset))) {
 			superset++;
 			continue;
@@ -1132,6 +1129,7 @@
 
 	partition->name = name;
 	partition->flags = flags;
+	partition->gpio = flags & OMAP_MUX_MODE7;
 	partition->size = mux_size;
 	partition->phys = mux_pbase;
 	partition->base = ioremap(mux_pbase, mux_size);

diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h
index 76f9b3c..fdb22f1 100644
--- a/arch/arm/mach-omap2/mux.h
+++ b/arch/arm/mach-omap2/mux.h

@@ -58,7 +58,8 @@
 #define OMAP_PIN_OFF_INPUT_PULLDOWN	(OMAP_OFF_EN | OMAP_OFF_PULL_EN)
 #define OMAP_PIN_OFF_WAKEUPENABLE	OMAP_WAKEUP_EN
 
-#define OMAP_MODE_GPIO(x)	(((x) & OMAP_MUX_MODE7) == OMAP_MUX_MODE4)
+#define OMAP_MODE_GPIO(partition, x)	(((x) & OMAP_MUX_MODE7) == \
+					  partition->gpio)
 #define OMAP_MODE_UART(x)	(((x) & OMAP_MUX_MODE7) == OMAP_MUX_MODE0)
 
 /* Flags for omapX_mux_init */
@@ -79,13 +80,20 @@
 /*
  * omap_mux_init flags definition:
  *
+ * OMAP_GPIO_MUX_MODE, bits 0-2: gpio muxing mode, same like pad control
+ *      register which includes values from 0-7.
  * OMAP_MUX_REG_8BIT: Ensure that access to padconf is done in 8 bits.
  * The default value is 16 bits.
- * OMAP_MUX_GPIO_IN_MODE3: The GPIO is selected in mode3.
- * The default is mode4.
  */
-#define OMAP_MUX_REG_8BIT		(1 << 0)
-#define OMAP_MUX_GPIO_IN_MODE3		(1 << 1)
+#define OMAP_MUX_GPIO_IN_MODE0		OMAP_MUX_MODE0
+#define OMAP_MUX_GPIO_IN_MODE1		OMAP_MUX_MODE1
+#define OMAP_MUX_GPIO_IN_MODE2		OMAP_MUX_MODE2
+#define OMAP_MUX_GPIO_IN_MODE3		OMAP_MUX_MODE3
+#define OMAP_MUX_GPIO_IN_MODE4		OMAP_MUX_MODE4
+#define OMAP_MUX_GPIO_IN_MODE5		OMAP_MUX_MODE5
+#define OMAP_MUX_GPIO_IN_MODE6		OMAP_MUX_MODE6
+#define OMAP_MUX_GPIO_IN_MODE7		OMAP_MUX_MODE7
+#define OMAP_MUX_REG_8BIT		(1 << 3)
 
 /**
  * struct omap_board_data - board specific device data
@@ -105,6 +113,7 @@
  * struct mux_partition - contain partition related information
  * @name: name of the current partition
  * @flags: flags specific to this partition
+ * @gpio: gpio mux mode
  * @phys: physical address
  * @size: partition size
  * @base: virtual address after ioremap
@@ -114,6 +123,7 @@
 struct omap_mux_partition {
 	const char		*name;
 	u32			flags;
+	u32			gpio;
 	u32			phys;
 	u32			size;
 	void __iomem		*base;

diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c
index c47140b..c53609f4 100644
--- a/arch/arm/mach-omap2/mux34xx.c
+++ b/arch/arm/mach-omap2/mux34xx.c

@@ -2053,7 +2053,7 @@
 		return -EINVAL;
 	}
 
-	return omap_mux_init("core", 0,
+	return omap_mux_init("core", OMAP_MUX_GPIO_IN_MODE4,
 			     OMAP3_CONTROL_PADCONF_MUX_PBASE,
 			     OMAP3_CONTROL_PADCONF_MUX_SIZE,
 			     omap3_muxmodes, package_subset, board_subset,

diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
index a6a4ff8..6da4f7a 100644
--- a/arch/arm/mach-omap2/omap-iommu.c
+++ b/arch/arm/mach-omap2/omap-iommu.c

@@ -12,153 +12,60 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
 
 #include <linux/platform_data/iommu-omap.h>
+#include "omap_hwmod.h"
+#include "omap_device.h"
 
-#include "soc.h"
-#include "common.h"
+static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused)
+{
+	struct platform_device *pdev;
+	struct iommu_platform_data *pdata;
+	struct omap_mmu_dev_attr *a = (struct omap_mmu_dev_attr *)oh->dev_attr;
+	static int i;
 
-struct iommu_device {
-	resource_size_t base;
-	int irq;
-	struct iommu_platform_data pdata;
-	struct resource res[2];
-};
-static struct iommu_device *devices;
-static int num_iommu_devices;
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
 
-#ifdef CONFIG_ARCH_OMAP3
-static struct iommu_device omap3_devices[] = {
-	{
-		.base = 0x480bd400,
-		.irq = 24 + OMAP_INTC_START,
-		.pdata = {
-			.name = "isp",
-			.nr_tlb_entries = 8,
-			.clk_name = "cam_ick",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-#if defined(CONFIG_OMAP_IOMMU_IVA2)
-	{
-		.base = 0x5d000000,
-		.irq = 28 + OMAP_INTC_START,
-		.pdata = {
-			.name = "iva2",
-			.nr_tlb_entries = 32,
-			.clk_name = "iva2_ck",
-			.da_start = 0x11000000,
-			.da_end = 0xFFFFF000,
-		},
-	},
-#endif
-};
-#define NR_OMAP3_IOMMU_DEVICES ARRAY_SIZE(omap3_devices)
-static struct platform_device *omap3_iommu_pdev[NR_OMAP3_IOMMU_DEVICES];
-#else
-#define omap3_devices		NULL
-#define NR_OMAP3_IOMMU_DEVICES	0
-#define omap3_iommu_pdev	NULL
-#endif
+	pdata->name = oh->name;
+	pdata->nr_tlb_entries = a->nr_tlb_entries;
+	pdata->da_start = a->da_start;
+	pdata->da_end = a->da_end;
 
-#ifdef CONFIG_ARCH_OMAP4
-static struct iommu_device omap4_devices[] = {
-	{
-		.base = OMAP4_MMU1_BASE,
-		.irq = 100 + OMAP44XX_IRQ_GIC_START,
-		.pdata = {
-			.name = "ducati",
-			.nr_tlb_entries = 32,
-			.clk_name = "ipu_fck",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-	{
-		.base = OMAP4_MMU2_BASE,
-		.irq = 28 + OMAP44XX_IRQ_GIC_START,
-		.pdata = {
-			.name = "tesla",
-			.nr_tlb_entries = 32,
-			.clk_name = "dsp_fck",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-};
-#define NR_OMAP4_IOMMU_DEVICES ARRAY_SIZE(omap4_devices)
-static struct platform_device *omap4_iommu_pdev[NR_OMAP4_IOMMU_DEVICES];
-#else
-#define omap4_devices		NULL
-#define NR_OMAP4_IOMMU_DEVICES	0
-#define omap4_iommu_pdev	NULL
-#endif
+	if (oh->rst_lines_cnt == 1) {
+		pdata->reset_name = oh->rst_lines->name;
+		pdata->assert_reset = omap_device_assert_hardreset;
+		pdata->deassert_reset = omap_device_deassert_hardreset;
+	}
 
-static struct platform_device **omap_iommu_pdev;
+	pdev = omap_device_build("omap-iommu", i, oh, pdata, sizeof(*pdata),
+				NULL, 0, 0);
+
+	kfree(pdata);
+
+	if (IS_ERR(pdev)) {
+		pr_err("%s: device build err: %ld\n", __func__, PTR_ERR(pdev));
+		return PTR_ERR(pdev);
+	}
+
+	i++;
+
+	return 0;
+}
 
 static int __init omap_iommu_init(void)
 {
-	int i, err;
-	struct resource res[] = {
-		{ .flags = IORESOURCE_MEM },
-		{ .flags = IORESOURCE_IRQ },
-	};
-
-	if (cpu_is_omap34xx()) {
-		devices = omap3_devices;
-		omap_iommu_pdev = omap3_iommu_pdev;
-		num_iommu_devices = NR_OMAP3_IOMMU_DEVICES;
-	} else if (cpu_is_omap44xx()) {
-		devices = omap4_devices;
-		omap_iommu_pdev = omap4_iommu_pdev;
-		num_iommu_devices = NR_OMAP4_IOMMU_DEVICES;
-	} else
-		return -ENODEV;
-
-	for (i = 0; i < num_iommu_devices; i++) {
-		struct platform_device *pdev;
-		const struct iommu_device *d = &devices[i];
-
-		pdev = platform_device_alloc("omap-iommu", i);
-		if (!pdev) {
-			err = -ENOMEM;
-			goto err_out;
-		}
-
-		res[0].start = d->base;
-		res[0].end = d->base + MMU_REG_SIZE - 1;
-		res[1].start = res[1].end = d->irq;
-
-		err = platform_device_add_resources(pdev, res,
-						    ARRAY_SIZE(res));
-		if (err)
-			goto err_out;
-		err = platform_device_add_data(pdev, &d->pdata,
-					       sizeof(d->pdata));
-		if (err)
-			goto err_out;
-		err = platform_device_add(pdev);
-		if (err)
-			goto err_out;
-		omap_iommu_pdev[i] = pdev;
-	}
-	return 0;
-
-err_out:
-	while (i--)
-		platform_device_put(omap_iommu_pdev[i]);
-	return err;
+	return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL);
 }
 /* must be ready before omap3isp is probed */
 subsys_initcall(omap_iommu_init);
 
 static void __exit omap_iommu_exit(void)
 {
-	int i;
-
-	for (i = 0; i < num_iommu_devices; i++)
-		platform_device_unregister(omap_iommu_pdev[i]);
+	/* Do nothing */
 }
 module_exit(omap_iommu_exit);
 

diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index 6c8fa70..d2d3840 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c

@@ -77,8 +77,7 @@
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-			  OMAP_I2C_FLAG_BUS_SHIFT_2 |
+	.flags		= OMAP_I2C_FLAG_BUS_SHIFT_2 |
 			  OMAP_I2C_FLAG_FORCE_19200_INT_CLK,
 };
 

diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index 32820d89..081c71e 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c

@@ -1118,8 +1118,7 @@
 };
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
-	.flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE |
-		  OMAP_I2C_FLAG_RESET_REGS_POSTIDLE,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE,
 };
 
 /* i2c1 */

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index ec4499e..8bb2628 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c

@@ -794,9 +794,7 @@
 /* I2C1 */
 static struct omap_i2c_dev_attr i2c1_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-			  OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-			  OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags		= OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod omap3xxx_i2c1_hwmod = {
@@ -821,9 +819,7 @@
 /* I2C2 */
 static struct omap_i2c_dev_attr i2c2_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod omap3xxx_i2c2_hwmod = {
@@ -848,9 +844,7 @@
 /* I2C3 */
 static struct omap_i2c_dev_attr i2c3_dev_attr = {
 	.fifo_depth	= 64, /* bytes */
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod_irq_info i2c3_mpu_irqs[] = {

diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index eb61cfd..f9fab94 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c

@@ -653,7 +653,7 @@
 	.mpu_irqs	= omap44xx_dsp_irqs,
 	.rst_lines	= omap44xx_dsp_resets,
 	.rst_lines_cnt	= ARRAY_SIZE(omap44xx_dsp_resets),
-	.main_clk	= "dsp_fck",
+	.main_clk	= "dpll_iva_m4x2_ck",
 	.prcm = {
 		.omap4 = {
 			.clkctrl_offs = OMAP4_CM_TESLA_TESLA_CLKCTRL_OFFSET,
@@ -1529,8 +1529,7 @@
 };
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
-	.flags	= OMAP_I2C_FLAG_BUS_SHIFT_NONE |
-			OMAP_I2C_FLAG_RESET_REGS_POSTIDLE,
+	.flags	= OMAP_I2C_FLAG_BUS_SHIFT_NONE,
 };
 
 /* i2c1 */
@@ -1680,7 +1679,7 @@
 	.mpu_irqs	= omap44xx_ipu_irqs,
 	.rst_lines	= omap44xx_ipu_resets,
 	.rst_lines_cnt	= ARRAY_SIZE(omap44xx_ipu_resets),
-	.main_clk	= "ipu_fck",
+	.main_clk	= "ducati_clk_mux_ck",
 	.prcm = {
 		.omap4 = {
 			.clkctrl_offs = OMAP4_CM_DUCATI_DUCATI_CLKCTRL_OFFSET,

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 93d1025..04fdbc4 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c

@@ -27,8 +27,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/console.h>
 #include <linux/omap-dma.h>
-
-#include <plat/omap-serial.h>
+#include <linux/platform_data/serial-omap.h>
 
 #include "common.h"
 #include "omap_hwmod.h"

diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 7016637..691aa67 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c

@@ -175,7 +175,7 @@
 			continue;
 		}
 
-		prom_add_property(np, &device_disabled);
+		of_add_property(np, &device_disabled);
 		return np;
 	}
 
@@ -190,7 +190,7 @@
  * kernel registering these devices remove them dynamically from the device
  * tree on boot.
  */
-void __init omap_dmtimer_init(void)
+static void __init omap_dmtimer_init(void)
 {
 	struct device_node *np;
 
@@ -210,7 +210,7 @@
  *
  * Get the timer errata flags that are specific to the OMAP device being used.
  */
-u32 __init omap_dm_timer_get_errata(void)
+static u32 __init omap_dm_timer_get_errata(void)
 {
 	if (cpu_is_omap24xx())
 		return 0;
@@ -392,7 +392,7 @@
 };
 
 /* Setup free-running counter for clocksource */
-static int __init omap2_sync32k_clocksource_init(void)
+static int __init __maybe_unused omap2_sync32k_clocksource_init(void)
 {
 	int ret;
 	struct device_node *np = NULL;

diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c
index d1dbe12..2e44e8a 100644
--- a/arch/arm/mach-omap2/usb-host.c
+++ b/arch/arm/mach-omap2/usb-host.c

@@ -508,6 +508,10 @@
 	if (cpu_is_omap34xx()) {
 		setup_ehci_io_mux(pdata->port_mode);
 		setup_ohci_io_mux(pdata->port_mode);
+
+		if (omap_rev() <= OMAP3430_REV_ES2_1)
+			usbhs_data.single_ulpi_bypass = true;
+
 	} else if (cpu_is_omap44xx()) {
 		setup_4430ehci_io_mux(pdata->port_mode);
 		setup_4430ohci_io_mux(pdata->port_mode);

diff --git a/arch/arm/mach-realview/include/mach/board-eb.h b/arch/arm/mach-realview/include/mach/board-eb.h
index 124bce6..a301e61 100644
--- a/arch/arm/mach-realview/include/mach/board-eb.h
+++ b/arch/arm/mach-realview/include/mach/board-eb.h

@@ -47,7 +47,7 @@
 #define REALVIEW_EB_USB_BASE		0x4F000000	/* USB */
 
 #ifdef CONFIG_REALVIEW_EB_ARM11MP_REVB
-#define REALVIEW_EB11MP_PRIV_MEM_BASE	0x1F000000
+#define REALVIEW_EB11MP_PRIV_MEM_BASE	0x10100000
 #define REALVIEW_EB11MP_L220_BASE	0x10102000	/* L220 registers */
 #define REALVIEW_EB11MP_SYS_PLD_CTRL1	0xD8		/* Register offset for MPCore sysctl */
 #else

diff --git a/arch/arm/mach-s3c64xx/clock.c b/arch/arm/mach-s3c64xx/clock.c
index 1a6f857..803711e 100644
--- a/arch/arm/mach-s3c64xx/clock.c
+++ b/arch/arm/mach-s3c64xx/clock.c

@@ -149,25 +149,6 @@
 		.enable		= s3c64xx_pclk_ctrl,
 		.ctrlbit	= S3C6410_CLKCON_PCLK_I2C1,
 	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.0",
-		.parent		= &clk_p,
-		.enable		= s3c64xx_pclk_ctrl,
-		.ctrlbit	= S3C_CLKCON_PCLK_IIS0,
-	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.1",
-		.parent		= &clk_p,
-		.enable		= s3c64xx_pclk_ctrl,
-		.ctrlbit	= S3C_CLKCON_PCLK_IIS1,
-	}, {
-#ifdef CONFIG_CPU_S3C6410
-		.name		= "iis",
-		.parent		= &clk_p,
-		.enable		= s3c64xx_pclk_ctrl,
-		.ctrlbit	= S3C6410_CLKCON_PCLK_IIS2,
-	}, {
-#endif
 		.name		= "keypad",
 		.parent		= &clk_p,
 		.enable		= s3c64xx_pclk_ctrl,
@@ -337,6 +318,32 @@
 	.ctrlbit	= S3C_CLKCON_SCLK_SPI1_48,
 };
 
+static struct clk clk_i2s0 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.0",
+	.parent		= &clk_p,
+	.enable		= s3c64xx_pclk_ctrl,
+	.ctrlbit	= S3C_CLKCON_PCLK_IIS0,
+};
+
+static struct clk clk_i2s1 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.1",
+	.parent		= &clk_p,
+	.enable		= s3c64xx_pclk_ctrl,
+	.ctrlbit	= S3C_CLKCON_PCLK_IIS1,
+};
+
+#ifdef CONFIG_CPU_S3C6410
+static struct clk clk_i2s2 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.2",
+	.parent		= &clk_p,
+	.enable		= s3c64xx_pclk_ctrl,
+	.ctrlbit	= S3C6410_CLKCON_PCLK_IIS2,
+};
+#endif
+
 static struct clk init_clocks[] = {
 	{
 		.name		= "lcd",
@@ -660,6 +667,7 @@
 	.nr_sources	= ARRAY_SIZE(clkset_audio1_list),
 };
 
+#ifdef CONFIG_CPU_S3C6410
 static struct clk *clkset_audio2_list[] = {
 	[0] = &clk_mout_epll.clk,
 	[1] = &clk_dout_mpll,
@@ -672,6 +680,7 @@
 	.sources	= clkset_audio2_list,
 	.nr_sources	= ARRAY_SIZE(clkset_audio2_list),
 };
+#endif
 
 static struct clksrc_clk clksrcs[] = {
 	{
@@ -685,36 +694,6 @@
 		.sources	= &clkset_uhost,
 	}, {
 		.clk	= {
-			.name		= "audio-bus",
-			.devname	= "samsung-i2s.0",
-			.ctrlbit        = S3C_CLKCON_SCLK_AUDIO0,
-			.enable		= s3c64xx_sclk_ctrl,
-		},
-		.reg_src	= { .reg = S3C_CLK_SRC, .shift = 7, .size = 3  },
-		.reg_div	= { .reg = S3C_CLK_DIV2, .shift = 8, .size = 4  },
-		.sources	= &clkset_audio0,
-	}, {
-		.clk	= {
-			.name		= "audio-bus",
-			.devname	= "samsung-i2s.1",
-			.ctrlbit        = S3C_CLKCON_SCLK_AUDIO1,
-			.enable		= s3c64xx_sclk_ctrl,
-		},
-		.reg_src	= { .reg = S3C_CLK_SRC, .shift = 10, .size = 3  },
-		.reg_div	= { .reg = S3C_CLK_DIV2, .shift = 12, .size = 4  },
-		.sources	= &clkset_audio1,
-	}, {
-		.clk	= {
-			.name		= "audio-bus",
-			.devname	= "samsung-i2s.2",
-			.ctrlbit        = S3C6410_CLKCON_SCLK_AUDIO2,
-			.enable		= s3c64xx_sclk_ctrl,
-		},
-		.reg_src	= { .reg = S3C6410_CLK_SRC2, .shift = 0, .size = 3  },
-		.reg_div	= { .reg = S3C_CLK_DIV2, .shift = 24, .size = 4  },
-		.sources	= &clkset_audio2,
-	}, {
-		.clk	= {
 			.name		= "irda-bus",
 			.ctrlbit        = S3C_CLKCON_SCLK_IRDA,
 			.enable		= s3c64xx_sclk_ctrl,
@@ -805,6 +784,43 @@
 	.sources = &clkset_spi_mmc,
 };
 
+static struct clksrc_clk clk_audio_bus0 = {
+	.clk	= {
+		.name		= "audio-bus",
+		.devname	= "samsung-i2s.0",
+		.ctrlbit	= S3C_CLKCON_SCLK_AUDIO0,
+		.enable		= s3c64xx_sclk_ctrl,
+	},
+	.reg_src	= { .reg = S3C_CLK_SRC, .shift = 7, .size = 3  },
+	.reg_div	= { .reg = S3C_CLK_DIV2, .shift = 8, .size = 4  },
+	.sources	= &clkset_audio0,
+};
+
+static struct clksrc_clk clk_audio_bus1 = {
+	.clk	= {
+		.name		= "audio-bus",
+		.devname	= "samsung-i2s.1",
+		.ctrlbit	= S3C_CLKCON_SCLK_AUDIO1,
+		.enable		= s3c64xx_sclk_ctrl,
+	},
+	.reg_src	= { .reg = S3C_CLK_SRC, .shift = 10, .size = 3  },
+	.reg_div	= { .reg = S3C_CLK_DIV2, .shift = 12, .size = 4  },
+	.sources	= &clkset_audio1,
+};
+
+#ifdef CONFIG_CPU_S3C6410
+static struct clksrc_clk clk_audio_bus2 = {
+	.clk	= {
+		.name		= "audio-bus",
+		.devname	= "samsung-i2s.2",
+		.ctrlbit	= S3C6410_CLKCON_SCLK_AUDIO2,
+		.enable		= s3c64xx_sclk_ctrl,
+	},
+	.reg_src	= { .reg = S3C6410_CLK_SRC2, .shift = 0, .size = 3  },
+	.reg_div	= { .reg = S3C_CLK_DIV2, .shift = 24, .size = 4  },
+	.sources	= &clkset_audio2,
+};
+#endif
 /* Clock initialisation code */
 
 static struct clksrc_clk *init_parents[] = {
@@ -820,6 +836,8 @@
 	&clk_sclk_mmc2,
 	&clk_sclk_spi0,
 	&clk_sclk_spi1,
+	&clk_audio_bus0,
+	&clk_audio_bus1,
 };
 
 static struct clk *clk_cdev[] = {
@@ -828,6 +846,8 @@
 	&clk_hsmmc2,
 	&clk_48m_spi0,
 	&clk_48m_spi1,
+	&clk_i2s0,
+	&clk_i2s1,
 };
 
 static struct clk_lookup s3c64xx_clk_lookup[] = {
@@ -844,6 +864,14 @@
 	CLKDEV_INIT("s3c6410-spi.0", "spi_busclk2", &clk_48m_spi0),
 	CLKDEV_INIT("s3c6410-spi.1", "spi_busclk1", &clk_sclk_spi1.clk),
 	CLKDEV_INIT("s3c6410-spi.1", "spi_busclk2", &clk_48m_spi1),
+	CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0),
+	CLKDEV_INIT("samsung-i2s.0", "i2s_opclk1", &clk_audio_bus0.clk),
+	CLKDEV_INIT("samsung-i2s.1", "i2s_opclk0", &clk_i2s1),
+	CLKDEV_INIT("samsung-i2s.1", "i2s_opclk1", &clk_audio_bus1.clk),
+#ifdef CONFIG_CPU_S3C6410
+	CLKDEV_INIT("samsung-i2s.2", "i2s_opclk0", &clk_i2s2),
+	CLKDEV_INIT("samsung-i2s.2", "i2s_opclk1", &clk_audio_bus2.clk),
+#endif
 };
 
 #define GET_DIV(clk, field) ((((clk) & field##_MASK) >> field##_SHIFT) + 1)

diff --git a/arch/arm/mach-s3c64xx/dev-audio.c b/arch/arm/mach-s3c64xx/dev-audio.c
index 35f3e07..e367e87 100644
--- a/arch/arm/mach-s3c64xx/dev-audio.c
+++ b/arch/arm/mach-s3c64xx/dev-audio.c

@@ -23,11 +23,6 @@
 #include <linux/platform_data/asoc-s3c.h>
 #include <plat/gpio-cfg.h>
 
-static const char *rclksrc[] = {
-	[0] = "iis",
-	[1] = "audio-bus",
-};
-
 static int s3c64xx_i2s_cfg_gpio(struct platform_device *pdev)
 {
 	unsigned int base;
@@ -64,11 +59,6 @@
 
 static struct s3c_audio_pdata i2sv3_pdata = {
 	.cfg_gpio = s3c64xx_i2s_cfg_gpio,
-	.type = {
-		.i2s = {
-			.src_clk = rclksrc,
-		},
-	},
 };
 
 struct platform_device s3c64xx_device_iis0 = {
@@ -110,7 +100,6 @@
 	.type = {
 		.i2s = {
 			.quirks = QUIRK_PRI_6CHAN,
-			.src_clk = rclksrc,
 		},
 	},
 };

diff --git a/arch/arm/mach-s5p64x0/clock-s5p6440.c b/arch/arm/mach-s5p64x0/clock-s5p6440.c
index 0004455..5112371 100644
--- a/arch/arm/mach-s5p64x0/clock-s5p6440.c
+++ b/arch/arm/mach-s5p64x0/clock-s5p6440.c

@@ -243,12 +243,6 @@
 		.enable		= s5p64x0_pclk_ctrl,
 		.ctrlbit	= (1 << 25),
 	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.0",
-		.parent		= &clk_pclk_low.clk,
-		.enable		= s5p64x0_pclk_ctrl,
-		.ctrlbit	= (1 << 26),
-	}, {
 		.name		= "dsim",
 		.parent		= &clk_pclk_low.clk,
 		.enable		= s5p64x0_pclk_ctrl,
@@ -405,15 +399,6 @@
 		.sources = &clkset_group1,
 		.reg_src = { .reg = S5P64X0_CLK_SRC1, .shift = 8, .size = 2 },
 		.reg_div = { .reg = S5P64X0_CLK_DIV3, .shift = 4, .size = 4 },
-	}, {
-		.clk	= {
-			.name		= "sclk_audio2",
-			.ctrlbit	= (1 << 11),
-			.enable		= s5p64x0_sclk_ctrl,
-		},
-		.sources = &clkset_audio,
-		.reg_src = { .reg = S5P64X0_CLK_SRC1, .shift = 0, .size = 3 },
-		.reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 24, .size = 4 },
 	},
 };
 
@@ -464,6 +449,26 @@
 	.reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 16, .size = 4 },
 };
 
+static struct clk clk_i2s0 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.0",
+	.parent		= &clk_pclk_low.clk,
+	.enable		= s5p64x0_pclk_ctrl,
+	.ctrlbit	= (1 << 26),
+};
+
+static struct clksrc_clk clk_audio_bus2 = {
+	.clk	= {
+		.name		= "sclk_audio2",
+		.devname	= "samsung-i2s.0",
+		.ctrlbit	= (1 << 11),
+		.enable		= s5p64x0_sclk_ctrl,
+	},
+	.sources = &clkset_audio,
+	.reg_src = { .reg = S5P64X0_CLK_SRC1, .shift = 0, .size = 3 },
+	.reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 24, .size = 4 },
+};
+
 static struct clksrc_clk clk_sclk_spi0 = {
 	.clk	= {
 		.name		= "sclk_spi",
@@ -506,13 +511,18 @@
 	.id		= -1,
 };
 
+static struct clk *clk_cdev[] = {
+	&clk_i2s0,
+};
+
 static struct clksrc_clk *clksrc_cdev[] = {
 	&clk_sclk_uclk,
 	&clk_sclk_spi0,
 	&clk_sclk_spi1,
 	&clk_sclk_mmc0,
 	&clk_sclk_mmc1,
-	&clk_sclk_mmc2
+	&clk_sclk_mmc2,
+	&clk_audio_bus2,
 };
 
 static struct clk_lookup s5p6440_clk_lookup[] = {
@@ -524,6 +534,8 @@
 	CLKDEV_INIT("s3c-sdhci.0", "mmc_busclk.2", &clk_sclk_mmc0.clk),
 	CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.2", &clk_sclk_mmc1.clk),
 	CLKDEV_INIT("s3c-sdhci.2", "mmc_busclk.2", &clk_sclk_mmc2.clk),
+	CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0),
+	CLKDEV_INIT("samsung-i2s.0", "i2s_opclk1", &clk_audio_bus2.clk),
 };
 
 void __init_or_cpufreq s5p6440_setup_clocks(void)
@@ -596,12 +608,17 @@
 void __init s5p6440_register_clocks(void)
 {
 	int ptr;
+	unsigned int cnt;
 
 	s3c24xx_register_clocks(clks, ARRAY_SIZE(clks));
 
 	for (ptr = 0; ptr < ARRAY_SIZE(sysclks); ptr++)
 		s3c_register_clksrc(sysclks[ptr], 1);
 
+	s3c24xx_register_clocks(clk_cdev, ARRAY_SIZE(clk_cdev));
+	for (cnt = 0; cnt < ARRAY_SIZE(clk_cdev); cnt++)
+		s3c_disable_clocks(clk_cdev[cnt], 1);
+
 	s3c_register_clksrc(clksrcs, ARRAY_SIZE(clksrcs));
 	s3c_register_clocks(init_clocks, ARRAY_SIZE(init_clocks));
 	for (ptr = 0; ptr < ARRAY_SIZE(clksrc_cdev); ptr++)

diff --git a/arch/arm/mach-s5p64x0/clock-s5p6450.c b/arch/arm/mach-s5p64x0/clock-s5p6450.c
index f3e0ef3..154dea7 100644
--- a/arch/arm/mach-s5p64x0/clock-s5p6450.c
+++ b/arch/arm/mach-s5p64x0/clock-s5p6450.c

@@ -247,24 +247,6 @@
 		.enable		= s5p64x0_pclk_ctrl,
 		.ctrlbit	= (1 << 22),
 	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.0",
-		.parent		= &clk_pclk_low.clk,
-		.enable		= s5p64x0_pclk_ctrl,
-		.ctrlbit	= (1 << 26),
-	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.1",
-		.parent		= &clk_pclk_low.clk,
-		.enable		= s5p64x0_pclk_ctrl,
-		.ctrlbit	= (1 << 15),
-	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.2",
-		.parent		= &clk_pclk_low.clk,
-		.enable		= s5p64x0_pclk_ctrl,
-		.ctrlbit	= (1 << 16),
-	}, {
 		.name		= "i2c",
 		.devname	= "s3c2440-i2c.1",
 		.parent		= &clk_pclk_low.clk,
@@ -402,6 +384,7 @@
 static struct clksrc_clk clk_sclk_audio0 = {
 	.clk		= {
 		.name		= "audio-bus",
+		.devname	= "samsung-i2s.0",
 		.enable		= s5p64x0_sclk_ctrl,
 		.ctrlbit	= (1 << 8),
 		.parent		= &clk_dout_epll.clk,
@@ -549,6 +532,36 @@
 	.reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 4, .size = 4 },
 };
 
+static struct clk clk_i2s0 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.0",
+	.parent		= &clk_pclk_low.clk,
+	.enable		= s5p64x0_pclk_ctrl,
+	.ctrlbit	= (1 << 26),
+};
+
+static struct clk clk_i2s1 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.1",
+	.parent		= &clk_pclk_low.clk,
+	.enable		= s5p64x0_pclk_ctrl,
+	.ctrlbit	= (1 << 15),
+};
+
+static struct clk clk_i2s2 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.2",
+	.parent		= &clk_pclk_low.clk,
+	.enable		= s5p64x0_pclk_ctrl,
+	.ctrlbit	= (1 << 16),
+};
+
+static struct clk *clk_cdev[] = {
+	&clk_i2s0,
+	&clk_i2s1,
+	&clk_i2s2,
+};
+
 static struct clksrc_clk *clksrc_cdev[] = {
 	&clk_sclk_uclk,
 	&clk_sclk_spi0,
@@ -556,6 +569,7 @@
 	&clk_sclk_mmc0,
 	&clk_sclk_mmc1,
 	&clk_sclk_mmc2,
+	&clk_sclk_audio0,
 };
 
 static struct clk_lookup s5p6450_clk_lookup[] = {
@@ -567,6 +581,10 @@
 	CLKDEV_INIT("s3c-sdhci.0", "mmc_busclk.2", &clk_sclk_mmc0.clk),
 	CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.2", &clk_sclk_mmc1.clk),
 	CLKDEV_INIT("s3c-sdhci.2", "mmc_busclk.2", &clk_sclk_mmc2.clk),
+	CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0),
+	CLKDEV_INIT("samsung-i2s.0", "i2s_opclk1", &clk_sclk_audio0.clk),
+	CLKDEV_INIT("samsung-i2s.1", "i2s_opclk0", &clk_i2s1),
+	CLKDEV_INIT("samsung-i2s.2", "i2s_opclk0", &clk_i2s2),
 };
 
 /* Clock initialization code */
@@ -584,7 +602,6 @@
 	&clk_pclk,
 	&clk_hclk_low,
 	&clk_pclk_low,
-	&clk_sclk_audio0,
 };
 
 static struct clk dummy_apb_pclk = {
@@ -661,10 +678,16 @@
 void __init s5p6450_register_clocks(void)
 {
 	int ptr;
+	unsigned int cnt;
 
 	for (ptr = 0; ptr < ARRAY_SIZE(sysclks); ptr++)
 		s3c_register_clksrc(sysclks[ptr], 1);
 
+
+	s3c24xx_register_clocks(clk_cdev, ARRAY_SIZE(clk_cdev));
+	for (cnt = 0; cnt < ARRAY_SIZE(clk_cdev); cnt++)
+		s3c_disable_clocks(clk_cdev[cnt], 1);
+
 	s3c_register_clksrc(clksrcs, ARRAY_SIZE(clksrcs));
 	s3c_register_clocks(init_clocks, ARRAY_SIZE(init_clocks));
 	for (ptr = 0; ptr < ARRAY_SIZE(clksrc_cdev); ptr++)

diff --git a/arch/arm/mach-s5p64x0/dev-audio.c b/arch/arm/mach-s5p64x0/dev-audio.c
index a0d6edf..723d4773c 100644
--- a/arch/arm/mach-s5p64x0/dev-audio.c
+++ b/arch/arm/mach-s5p64x0/dev-audio.c

@@ -19,11 +19,6 @@
 #include <mach/dma.h>
 #include <mach/irqs.h>
 
-static const char *rclksrc[] = {
-	[0] = "iis",
-	[1] = "sclk_audio2",
-};
-
 static int s5p6440_cfg_i2s(struct platform_device *pdev)
 {
 	switch (pdev->id) {
@@ -45,7 +40,6 @@
 	.type = {
 		.i2s = {
 			.quirks = QUIRK_PRI_6CHAN,
-			.src_clk = rclksrc,
 		},
 	},
 };
@@ -93,7 +87,6 @@
 	.type = {
 		.i2s = {
 			.quirks = QUIRK_PRI_6CHAN,
-			.src_clk = rclksrc,
 		},
 	},
 };
@@ -110,11 +103,6 @@
 
 static struct s3c_audio_pdata s5p6450_i2s_pdata = {
 	.cfg_gpio = s5p6450_cfg_i2s,
-	.type = {
-		.i2s = {
-			.src_clk = rclksrc,
-		},
-	},
 };
 
 static struct resource s5p6450_i2s1_resource[] = {

diff --git a/arch/arm/mach-s5pc100/clock.c b/arch/arm/mach-s5pc100/clock.c
index 9262197..a206dc3 100644
--- a/arch/arm/mach-s5pc100/clock.c
+++ b/arch/arm/mach-s5pc100/clock.c

@@ -606,24 +606,6 @@
 		.enable		= s5pc100_d1_4_ctrl,
 		.ctrlbit	= (1 << 13),
 	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.0",
-		.parent		= &clk_div_pclkd1.clk,
-		.enable		= s5pc100_d1_5_ctrl,
-		.ctrlbit	= (1 << 0),
-	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.1",
-		.parent		= &clk_div_pclkd1.clk,
-		.enable		= s5pc100_d1_5_ctrl,
-		.ctrlbit	= (1 << 1),
-	}, {
-		.name		= "iis",
-		.devname	= "samsung-i2s.2",
-		.parent		= &clk_div_pclkd1.clk,
-		.enable		= s5pc100_d1_5_ctrl,
-		.ctrlbit	= (1 << 2),
-	}, {
 		.name		= "ac97",
 		.parent		= &clk_div_pclkd1.clk,
 		.enable		= s5pc100_d1_5_ctrl,
@@ -724,6 +706,30 @@
 	.ctrlbit	= (1 << 9),
 };
 
+static struct clk clk_i2s0 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.0",
+	.parent		= &clk_div_pclkd1.clk,
+	.enable		= s5pc100_d1_5_ctrl,
+	.ctrlbit	= (1 << 0),
+};
+
+static struct clk clk_i2s1 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.1",
+	.parent		= &clk_div_pclkd1.clk,
+	.enable		= s5pc100_d1_5_ctrl,
+	.ctrlbit	= (1 << 1),
+};
+
+static struct clk clk_i2s2 = {
+	.name		= "iis",
+	.devname	= "samsung-i2s.2",
+	.parent		= &clk_div_pclkd1.clk,
+	.enable		= s5pc100_d1_5_ctrl,
+	.ctrlbit	= (1 << 2),
+};
+
 static struct clk clk_vclk54m = {
 	.name		= "vclk_54m",
 	.rate		= 54000000,
@@ -1154,6 +1160,9 @@
 	&clk_48m_spi0,
 	&clk_48m_spi1,
 	&clk_48m_spi2,
+	&clk_i2s0,
+	&clk_i2s1,
+	&clk_i2s2,
 };
 
 static struct clksrc_clk *clksrc_cdev[] = {
@@ -1321,6 +1330,9 @@
 	CLKDEV_INIT("s5pc100-spi.1", "spi_busclk2", &clk_sclk_spi1.clk),
 	CLKDEV_INIT("s5pc100-spi.2", "spi_busclk1", &clk_48m_spi2),
 	CLKDEV_INIT("s5pc100-spi.2", "spi_busclk2", &clk_sclk_spi2.clk),
+	CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0),
+	CLKDEV_INIT("samsung-i2s.1", "i2s_opclk0", &clk_i2s1),
+	CLKDEV_INIT("samsung-i2s.2", "i2s_opclk0", &clk_i2s2),
 };
 
 void __init s5pc100_register_clocks(void)

diff --git a/arch/arm/mach-s5pc100/dev-audio.c b/arch/arm/mach-s5pc100/dev-audio.c
index 1cc252c..46f488b 100644
--- a/arch/arm/mach-s5pc100/dev-audio.c
+++ b/arch/arm/mach-s5pc100/dev-audio.c

@@ -39,18 +39,12 @@
 	return 0;
 }
 
-static const char *rclksrc_v5[] = {
-	[0] = "iis",
-	[1] = "i2sclkd2",
-};
-
 static struct s3c_audio_pdata i2sv5_pdata = {
 	.cfg_gpio = s5pc100_cfg_i2s,
 	.type = {
 		.i2s = {
 			.quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI
 					 | QUIRK_NEED_RSTCLR,
-			.src_clk = rclksrc_v5,
 		},
 	},
 };
@@ -72,18 +66,8 @@
 	},
 };
 
-static const char *rclksrc_v3[] = {
-	[0] = "iis",
-	[1] = "sclk_audio",
-};
-
 static struct s3c_audio_pdata i2sv3_pdata = {
 	.cfg_gpio = s5pc100_cfg_i2s,
-	.type = {
-		.i2s = {
-			.src_clk = rclksrc_v3,
-		},
-	},
 };
 
 static struct resource s5pc100_iis1_resource[] = {

diff --git a/arch/arm/mach-s5pv210/dev-audio.c b/arch/arm/mach-s5pv210/dev-audio.c
index 0a5480b..addfb16 100644
--- a/arch/arm/mach-s5pv210/dev-audio.c
+++ b/arch/arm/mach-s5pv210/dev-audio.c

@@ -20,11 +20,6 @@
 #include <mach/irqs.h>
 #include <mach/regs-audss.h>
 
-static const char *rclksrc[] = {
-	[0] = "busclk",
-	[1] = "i2sclk",
-};
-
 static int s5pv210_cfg_i2s(struct platform_device *pdev)
 {
 	/* configure GPIO for i2s port */
@@ -52,7 +47,6 @@
 		.i2s = {
 			.quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI
 					 | QUIRK_NEED_RSTCLR,
-			.src_clk = rclksrc,
 			.idma_addr = S5PV210_AUDSS_INT_MEM,
 		},
 	},
@@ -75,18 +69,8 @@
 	},
 };
 
-static const char *rclksrc_v3[] = {
-	[0] = "iis",
-	[1] = "audio-bus",
-};
-
 static struct s3c_audio_pdata i2sv3_pdata = {
 	.cfg_gpio = s5pv210_cfg_i2s,
-	.type = {
-		.i2s = {
-			.src_clk = rclksrc_v3,
-		},
-	},
 };
 
 static struct resource s5pv210_iis1_resource[] = {

diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c
index 0816562..d54cfc5 100644
--- a/arch/arm/mach-tegra/common.c
+++ b/arch/arm/mach-tegra/common.c

@@ -104,7 +104,7 @@
 static __initdata struct tegra_clk_init_table tegra30_clk_init_table[] = {
 	/* name		parent		rate		enabled */
 	{ "clk_m",	NULL,		0,		true },
-	{ "pll_p",	"clk_m",	408000000,	true },
+	{ "pll_p",	"pll_ref",	408000000,	true },
 	{ "pll_p_out1",	"pll_p",	9600000,	true },
 	{ "pll_p_out4",	"pll_p",	102000000,	true },
 	{ "sclk",	"pll_p_out4",	102000000,	true },

diff --git a/arch/arm/mach-tegra/tegra30_clocks.c b/arch/arm/mach-tegra/tegra30_clocks.c
index efc000e..d714777 100644
--- a/arch/arm/mach-tegra/tegra30_clocks.c
+++ b/arch/arm/mach-tegra/tegra30_clocks.c

@@ -2045,9 +2045,7 @@
 static int tegra30_dsib_clk_set_parent(struct clk_hw *hw, u8 index)
 {
 	struct clk *d = clk_get_sys(NULL, "pll_d");
-	/* The DSIB parent selection bit is in PLLD base
-	   register - can not do direct r-m-w, must be
-	   protected by PLLD lock */
+	/* The DSIB parent selection bit is in PLLD base register */
 	tegra_clk_cfg_ex(
 		d, TEGRA_CLK_PLLD_MIPI_MUX_SEL, index);
 

diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 12f3994..4ce77cd 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c

@@ -27,7 +27,6 @@
 #include <linux/mtd/nand.h>
 #include <linux/mtd/fsmc.h>
 #include <linux/pinctrl/machine.h>
-#include <linux/pinctrl/consumer.h>
 #include <linux/pinctrl/pinconf-generic.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_data/clk-u300.h>
@@ -250,6 +249,18 @@
  */
 static struct resource fsmc_resources[] = {
 	{
+		.name  = "nand_addr",
+		.start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE,
+		.end   = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE + SZ_16K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "nand_cmd",
+		.start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE,
+		.end   = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE + SZ_16K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
 		.name  = "nand_data",
 		.start = U300_NAND_CS0_PHYS_BASE,
 		.end   = U300_NAND_CS0_PHYS_BASE + SZ_16K - 1,
@@ -1492,8 +1503,6 @@
 	.nr_partitions = ARRAY_SIZE(u300_partitions),
 	.options = NAND_SKIP_BBTSCAN,
 	.width = FSMC_NAND_BW8,
-	.ale_off = PLAT_NAND_ALE,
-	.cle_off = PLAT_NAND_CLE,
 };
 
 static struct platform_device nand_device = {
@@ -1543,39 +1552,6 @@
 				    pin_highz_conf),
 };
 
-struct u300_mux_hog {
-	struct device *dev;
-	struct pinctrl *p;
-};
-
-static struct u300_mux_hog u300_mux_hogs[] = {
-	{
-		.dev = &uart0_device.dev,
-	},
-	{
-		.dev = &mmcsd_device.dev,
-	},
-};
-
-static int __init u300_pinctrl_fetch(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(u300_mux_hogs); i++) {
-		struct pinctrl *p;
-
-		p = pinctrl_get_select_default(u300_mux_hogs[i].dev);
-		if (IS_ERR(p)) {
-			pr_err("u300: could not get pinmux hog for dev %s\n",
-			       dev_name(u300_mux_hogs[i].dev));
-			continue;
-		}
-		u300_mux_hogs[i].p = p;
-	}
-	return 0;
-}
-subsys_initcall(u300_pinctrl_fetch);
-
 /*
  * Notice that AMBA devices are initialized before platform devices.
  *

diff --git a/arch/arm/mach-ux500/board-mop500-stuib.c b/arch/arm/mach-ux500/board-mop500-stuib.c
index 564f57d..7e1f294 100644
--- a/arch/arm/mach-ux500/board-mop500-stuib.c
+++ b/arch/arm/mach-ux500/board-mop500-stuib.c

@@ -77,9 +77,6 @@
  * BU21013 ROHM touchscreen interface on the STUIBs
  */
 
-/* tracks number of bu21013 devices being enabled */
-static int bu21013_devices;
-
 #define TOUCH_GPIO_PIN  84
 
 #define TOUCH_XMAX	384
@@ -88,73 +85,8 @@
 #define PRCMU_CLOCK_OCR		0x1CC
 #define TSC_EXT_CLOCK_9_6MHZ	0x840000
 
-/**
- * bu21013_gpio_board_init : configures the touch panel.
- * @reset_pin: reset pin number
- * This function can be used to configures
- * the voltage and reset the touch panel controller.
- */
-static int bu21013_gpio_board_init(int reset_pin)
-{
-	int retval = 0;
-
-	bu21013_devices++;
-	if (bu21013_devices == 1) {
-		retval = gpio_request(reset_pin, "touchp_reset");
-		if (retval) {
-			printk(KERN_ERR "Unable to request gpio reset_pin");
-			return retval;
-		}
-		retval = gpio_direction_output(reset_pin, 1);
-		if (retval < 0) {
-			printk(KERN_ERR "%s: gpio direction failed\n",
-					__func__);
-			return retval;
-		}
-	}
-
-	return retval;
-}
-
-/**
- * bu21013_gpio_board_exit : deconfigures the touch panel controller
- * @reset_pin: reset pin number
- * This function can be used to deconfigures the chip selection
- * for touch panel controller.
- */
-static int bu21013_gpio_board_exit(int reset_pin)
-{
-	int retval = 0;
-
-	if (bu21013_devices == 1) {
-		retval = gpio_direction_output(reset_pin, 0);
-		if (retval < 0) {
-			printk(KERN_ERR "%s: gpio direction failed\n",
-					__func__);
-			return retval;
-		}
-		gpio_set_value(reset_pin, 0);
-	}
-	bu21013_devices--;
-
-	return retval;
-}
-
-/**
- * bu21013_read_pin_val : get the interrupt pin value
- * This function can be used to get the interrupt pin value for touch panel
- * controller.
- */
-static int bu21013_read_pin_val(void)
-{
-	return gpio_get_value(TOUCH_GPIO_PIN);
-}
-
 static struct bu21013_platform_device tsc_plat_device = {
-	.cs_en = bu21013_gpio_board_init,
-	.cs_dis = bu21013_gpio_board_exit,
-	.irq_read_val = bu21013_read_pin_val,
-	.irq = NOMADIK_GPIO_TO_IRQ(TOUCH_GPIO_PIN),
+	.touch_pin = TOUCH_GPIO_PIN,
 	.touch_x_max = TOUCH_XMAX,
 	.touch_y_max = TOUCH_YMAX,
 	.ext_clk = false,
@@ -171,7 +103,6 @@
 		I2C_BOARD_INFO("bu21013_tp", 0x5D),
 		.platform_data = &tsc_plat_device,
 	},
-
 };
 
 void __init mop500_stuib_init(void)

diff --git a/arch/arm/mach-ux500/devices-db8500.h b/arch/arm/mach-ux500/devices-db8500.h
index 4b24c99..a5e05f6 100644
--- a/arch/arm/mach-ux500/devices-db8500.h
+++ b/arch/arm/mach-ux500/devices-db8500.h

@@ -8,6 +8,7 @@
 #ifndef __DEVICES_DB8500_H
 #define __DEVICES_DB8500_H
 
+#include <linux/platform_data/usb-musb-ux500.h>
 #include <mach/irqs.h>
 #include "devices-common.h"
 

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index cd95664..7539ec2 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S

@@ -44,8 +44,10 @@
 ENTRY(v7_flush_dcache_louis)
 	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
-	ands	r3, r0, #0xe00000		@ extract LoUIS from clidr
-	mov	r3, r3, lsr #20			@ r3 = LoUIS * 2
+	ALT_SMP(ands	r3, r0, #(7 << 21))	@ extract LoUIS from clidr
+	ALT_UP(ands	r3, r0, #(7 << 27))	@ extract LoUU from clidr
+	ALT_SMP(mov	r3, r3, lsr #20)	@ r3 = LoUIS * 2
+	ALT_UP(mov	r3, r3, lsr #26)	@ r3 = LoUU * 2
 	moveq	pc, lr				@ return if level == 0
 	mov	r10, #0				@ r10 (starting level) = 0
 	b	flush_levels			@ start flushing cache levels

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 5383bc01..6b2fb87 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c

@@ -1034,7 +1034,8 @@
 	spin_unlock_irqrestore(&mapping->lock, flags);
 }
 
-static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
+static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
+					  gfp_t gfp, struct dma_attrs *attrs)
 {
 	struct page **pages;
 	int count = size >> PAGE_SHIFT;
@@ -1048,6 +1049,23 @@
 	if (!pages)
 		return NULL;
 
+	if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs))
+	{
+		unsigned long order = get_order(size);
+		struct page *page;
+
+		page = dma_alloc_from_contiguous(dev, count, order);
+		if (!page)
+			goto error;
+
+		__dma_clear_buffer(page, size);
+
+		for (i = 0; i < count; i++)
+			pages[i] = page + i;
+
+		return pages;
+	}
+
 	while (count) {
 		int j, order = __fls(count);
 
@@ -1081,14 +1099,21 @@
 	return NULL;
 }
 
-static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
+static int __iommu_free_buffer(struct device *dev, struct page **pages,
+			       size_t size, struct dma_attrs *attrs)
 {
 	int count = size >> PAGE_SHIFT;
 	int array_size = count * sizeof(struct page *);
 	int i;
-	for (i = 0; i < count; i++)
-		if (pages[i])
-			__free_pages(pages[i], 0);
+
+	if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
+		dma_release_from_contiguous(dev, pages[0], count);
+	} else {
+		for (i = 0; i < count; i++)
+			if (pages[i])
+				__free_pages(pages[i], 0);
+	}
+
 	if (array_size <= PAGE_SIZE)
 		kfree(pages);
 	else
@@ -1250,7 +1275,7 @@
 	if (gfp & GFP_ATOMIC)
 		return __iommu_alloc_atomic(dev, size, handle);
 
-	pages = __iommu_alloc_buffer(dev, size, gfp);
+	pages = __iommu_alloc_buffer(dev, size, gfp, attrs);
 	if (!pages)
 		return NULL;
 
@@ -1271,7 +1296,7 @@
 err_mapping:
 	__iommu_remove_mapping(dev, *handle, size);
 err_buffer:
-	__iommu_free_buffer(dev, pages, size);
+	__iommu_free_buffer(dev, pages, size, attrs);
 	return NULL;
 }
 
@@ -1327,7 +1352,7 @@
 	}
 
 	__iommu_remove_mapping(dev, handle, size);
-	__iommu_free_buffer(dev, pages, size);
+	__iommu_free_buffer(dev, pages, size, attrs);
 }
 
 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,

diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile
index 8d88584..9d9aa2f 100644
--- a/arch/arm/plat-omap/Makefile
+++ b/arch/arm/plat-omap/Makefile

@@ -11,7 +11,6 @@
 # omap_device support (OMAP2+ only at the moment)
 
 obj-$(CONFIG_OMAP_DM_TIMER) += dmtimer.o
-obj-$(CONFIG_OMAP_DEBUG_DEVICES) += debug-devices.o
 obj-$(CONFIG_OMAP_DEBUG_LEDS) += debug-leds.o
 i2c-omap-$(CONFIG_I2C_OMAP) := i2c.o
 obj-y += $(i2c-omap-m) $(i2c-omap-y)

diff --git a/arch/arm/plat-omap/debug-devices.c b/arch/arm/plat-omap/debug-devices.c
deleted file mode 100644
index a609e21..0000000
--- a/arch/arm/plat-omap/debug-devices.c
+++ /dev/null

@@ -1,92 +0,0 @@
-/*
- * linux/arch/arm/plat-omap/debug-devices.c
- *
- * Copyright (C) 2005 Nokia Corporation
- * Modified from mach-omap2/board-h4.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/gpio.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/smc91x.h>
-
-#include <plat/debug-devices.h>
-
-/* Many OMAP development platforms reuse the same "debug board"; these
- * platforms include H2, H3, H4, and Perseus2.
- */
-
-static struct smc91x_platdata smc91x_info = {
-	.flags	= SMC91X_USE_16BIT | SMC91X_NOWAIT,
-	.leda	= RPC_LED_100_10,
-	.ledb	= RPC_LED_TX_RX,
-};
-
-static struct resource smc91x_resources[] = {
-	[0] = {
-		.flags  = IORESOURCE_MEM,
-	},
-	[1] = {
-		.flags  = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
-	},
-};
-
-static struct platform_device smc91x_device = {
-	.name		= "smc91x",
-	.id		= -1,
-	.dev		= {
-		.platform_data = &smc91x_info,
-	},
-	.num_resources	= ARRAY_SIZE(smc91x_resources),
-	.resource	= smc91x_resources,
-};
-
-static struct resource led_resources[] = {
-	[0] = {
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device led_device = {
-	.name		= "omap_dbg_led",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(led_resources),
-	.resource	= led_resources,
-};
-
-static struct platform_device *debug_devices[] __initdata = {
-	&smc91x_device,
-	&led_device,
-	/* ps2 kbd + mouse ports */
-	/* 4 extra uarts */
-	/* 6 input dip switches */
-	/* 8 output pins */
-};
-
-int __init debug_card_init(u32 addr, unsigned gpio)
-{
-	int	status;
-
-	smc91x_resources[0].start = addr + 0x300;
-	smc91x_resources[0].end   = addr + 0x30f;
-
-	smc91x_resources[1].start = gpio_to_irq(gpio);
-	smc91x_resources[1].end   = gpio_to_irq(gpio);
-
-	status = gpio_request(gpio, "SMC91x irq");
-	if (status < 0) {
-		printk(KERN_ERR "GPIO%d unavailable for smc91x IRQ\n", gpio);
-		return status;
-	}
-	gpio_direction_input(gpio);
-
-	led_resources[0].start = addr;
-	led_resources[0].end   = addr + SZ_4K - 1;
-
-	return platform_add_devices(debug_devices, ARRAY_SIZE(debug_devices));
-}

diff --git a/arch/arm/plat-omap/include/plat/debug-devices.h b/arch/arm/plat-omap/include/plat/debug-devices.h
deleted file mode 100644
index 8fc4287..0000000
--- a/arch/arm/plat-omap/include/plat/debug-devices.h
+++ /dev/null

@@ -1,2 +0,0 @@
-/* for TI reference platforms sharing the same debug card */
-extern int debug_card_init(u32 addr, unsigned gpio);

diff --git a/arch/arm/plat-samsung/include/plat/gpio-core.h b/arch/arm/plat-samsung/include/plat/gpio-core.h
index dfd8b7a..f7a3ea2 100644
--- a/arch/arm/plat-samsung/include/plat/gpio-core.h
+++ b/arch/arm/plat-samsung/include/plat/gpio-core.h

@@ -11,6 +11,9 @@
  * published by the Free Software Foundation.
 */
 
+#ifndef __PLAT_SAMSUNG_GPIO_CORE_H
+#define __PLAT_SAMSUNG_GPIO_CORE_H
+
 #define GPIOCON_OFF	(0x00)
 #define GPIODAT_OFF	(0x04)
 
@@ -124,3 +127,5 @@
 /* locking wrappers to deal with multiple access to the same gpio bank */
 #define samsung_gpio_lock(_oc, _fl) spin_lock_irqsave(&(_oc)->lock, _fl)
 #define samsung_gpio_unlock(_oc, _fl) spin_unlock_irqrestore(&(_oc)->lock, _fl)
+
+#endif /* __PLAT_SAMSUNG_GPIO_CORE_H */

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f9ccff9..9c829b0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig

@@ -8,8 +8,6 @@
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-	select GENERIC_KERNEL_EXECVE
-	select GENERIC_KERNEL_THREAD
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 37e610d..d9ec402 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h

@@ -209,10 +209,11 @@
 	return (u32)(unsigned long)uptr;
 }
 
+#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
-	struct pt_regs *regs = task_pt_regs(current);
-	return (void __user *)regs->compat_sp - len;
+	return (void __user *)compat_user_stack_pointer() - len;
 }
 
 struct compat_ipc64_perm {

diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 538f4b4..9947768 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h

@@ -50,6 +50,7 @@
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
+	debug_dma_mapping_error(dev, dev_addr);
 	return ops->mapping_error(dev, dev_addr);
 }
 

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d69aeea..744087f 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h

@@ -20,6 +20,7 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
@@ -27,6 +28,5 @@
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>

diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index f7b05ed..26e9c4e 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c

@@ -28,21 +28,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd32.h>
 
-asmlinkage int compat_sys_sched_rr_get_interval(compat_pid_t pid,
-						struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
-	set_fs(old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 static inline void
 do_compat_cache_op(unsigned long start, unsigned long end, int flags)
 {

diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index e40c9bd..2ae6591 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig

@@ -17,8 +17,6 @@
 	select GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular

diff --git a/arch/avr32/include/asm/ptrace.h b/arch/avr32/include/asm/ptrace.h
index 8d3c412..630e4f9 100644
--- a/arch/avr32/include/asm/ptrace.h
+++ b/arch/avr32/include/asm/ptrace.h

@@ -21,6 +21,7 @@
 #define user_mode(regs)                 (((regs)->sr & MODE_MASK) == MODE_USER)
 #define instruction_pointer(regs)       ((regs)->pc)
 #define profile_pc(regs)                instruction_pointer(regs)
+#define user_stack_pointer(regs)	((regs)->sp)
 
 static __inline__ int valid_user_regs(struct pt_regs *regs)
 {

diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index f05a980..0bdf637 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h

@@ -39,7 +39,6 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/avr32/include/uapi/asm/signal.h b/arch/avr32/include/uapi/asm/signal.h
index eb46f61..1b77a93 100644
--- a/arch/avr32/include/uapi/asm/signal.h
+++ b/arch/avr32/include/uapi/asm/signal.h

@@ -89,12 +89,6 @@
 #define SA_NOMASK	SA_NODEFER
 #define SA_ONESHOT	SA_RESETHAND
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ab9ff40..b6f3ad5 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig

@@ -45,8 +45,6 @@
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config GENERIC_CSUM
 	def_bool y

diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 27d7075..127826f 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild

@@ -1,4 +1,3 @@
-include include/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
@@ -17,6 +16,7 @@
 generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kvm_para.h
 generic-y += local64.h
 generic-y += local.h
 generic-y += mman.h
@@ -44,7 +44,3 @@
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += xor.h
-
-header-y += bfin_sport.h
-header-y += cachectl.h
-header-y += fixed_code.h

diff --git a/arch/blackfin/include/asm/bfin_sport.h b/arch/blackfin/include/asm/bfin_sport.h
index f8907ea..50b9dfd 100644
--- a/arch/blackfin/include/asm/bfin_sport.h
+++ b/arch/blackfin/include/asm/bfin_sport.h

@@ -5,65 +5,12 @@
  *
  * Licensed under the GPL-2 or later.
  */
-
 #ifndef __BFIN_SPORT_H__
 #define __BFIN_SPORT_H__
 
-/* Sport mode: it can be set to TDM, i2s or others */
-#define NORM_MODE	0x0
-#define TDM_MODE	0x1
-#define I2S_MODE	0x2
-#define NDSO_MODE	0x3
-
-/* Data format, normal, a-law or u-law */
-#define NORM_FORMAT	0x0
-#define ALAW_FORMAT	0x2
-#define ULAW_FORMAT	0x3
-
-/* Function driver which use sport must initialize the structure */
-struct sport_config {
-	/* TDM (multichannels), I2S or other mode */
-	unsigned int mode:3;
-	unsigned int polled;	/* use poll instead of irq when set */
-
-	/* if TDM mode is selected, channels must be set */
-	int channels;	/* Must be in 8 units */
-	unsigned int frame_delay:4;	/* Delay between frame sync pulse and first bit */
-
-	/* I2S mode */
-	unsigned int right_first:1;	/* Right stereo channel first */
-
-	/* In mormal mode, the following item need to be set */
-	unsigned int lsb_first:1;	/* order of transmit or receive data */
-	unsigned int fsync:1;	/* Frame sync required */
-	unsigned int data_indep:1;	/* data independent frame sync generated */
-	unsigned int act_low:1;	/* Active low TFS */
-	unsigned int late_fsync:1;	/* Late frame sync */
-	unsigned int tckfe:1;
-	unsigned int sec_en:1;	/* Secondary side enabled */
-
-	/* Choose clock source */
-	unsigned int int_clk:1;	/* Internal or external clock */
-
-	/* If external clock is used, the following fields are ignored */
-	int serial_clk;
-	int fsync_clk;
-
-	unsigned int data_format:2;	/* Normal, u-law or a-law */
-
-	int word_len;		/* How length of the word in bits, 3-32 bits */
-	int dma_enabled;
-};
-
-/* Userspace interface */
-#define SPORT_IOC_MAGIC		'P'
-#define SPORT_IOC_CONFIG	_IOWR('P', 0x01, struct sport_config)
-#define SPORT_IOC_GET_SYSTEMCLOCK         _IOR('P', 0x02, unsigned long)
-#define SPORT_IOC_SET_BAUDRATE            _IOW('P', 0x03, unsigned long)
-
-#ifdef __KERNEL__
 
 #include <linux/types.h>
+#include <uapi/asm/bfin_sport.h>
 
 /*
  * All Blackfin system MMRs are padded to 32bits even if the register
@@ -122,76 +69,3 @@
 })
 
 #endif
-
-/* SPORT_TCR1 Masks */
-#define TSPEN		0x0001	/* TX enable */
-#define ITCLK		0x0002	/* Internal TX Clock Select */
-#define TDTYPE		0x000C	/* TX Data Formatting Select */
-#define DTYPE_NORM	0x0000	/* Data Format Normal */
-#define DTYPE_ULAW	0x0008	/* Compand Using u-Law */
-#define DTYPE_ALAW	0x000C	/* Compand Using A-Law */
-#define TLSBIT		0x0010	/* TX Bit Order */
-#define ITFS		0x0200	/* Internal TX Frame Sync Select */
-#define TFSR		0x0400	/* TX Frame Sync Required Select */
-#define DITFS		0x0800	/* Data Independent TX Frame Sync Select */
-#define LTFS		0x1000	/* Low TX Frame Sync Select */
-#define LATFS		0x2000	/* Late TX Frame Sync Select */
-#define TCKFE		0x4000	/* TX Clock Falling Edge Select */
-
-/* SPORT_TCR2 Masks */
-#define SLEN		0x001F	/* SPORT TX Word Length (2 - 31) */
-#define DP_SLEN(x)	BFIN_DEPOSIT(SLEN, x)
-#define EX_SLEN(x)	BFIN_EXTRACT(SLEN, x)
-#define TXSE		0x0100	/* TX Secondary Enable */
-#define TSFSE		0x0200	/* TX Stereo Frame Sync Enable */
-#define TRFST		0x0400	/* TX Right-First Data Order */
-
-/* SPORT_RCR1 Masks */
-#define RSPEN		0x0001	/* RX enable */
-#define IRCLK		0x0002	/* Internal RX Clock Select */
-#define RDTYPE		0x000C	/* RX Data Formatting Select */
-/* DTYPE_* defined above */
-#define RLSBIT		0x0010	/* RX Bit Order */
-#define IRFS		0x0200	/* Internal RX Frame Sync Select */
-#define RFSR		0x0400	/* RX Frame Sync Required Select */
-#define LRFS		0x1000	/* Low RX Frame Sync Select */
-#define LARFS		0x2000	/* Late RX Frame Sync Select */
-#define RCKFE		0x4000	/* RX Clock Falling Edge Select */
-
-/* SPORT_RCR2 Masks */
-/* SLEN defined above */
-#define RXSE		0x0100	/* RX Secondary Enable */
-#define RSFSE		0x0200	/* RX Stereo Frame Sync Enable */
-#define RRFST		0x0400	/* Right-First Data Order */
-
-/* SPORT_STAT Masks */
-#define RXNE		0x0001	/* RX FIFO Not Empty Status */
-#define RUVF		0x0002	/* RX Underflow Status */
-#define ROVF		0x0004	/* RX Overflow Status */
-#define TXF		0x0008	/* TX FIFO Full Status */
-#define TUVF		0x0010	/* TX Underflow Status */
-#define TOVF		0x0020	/* TX Overflow Status */
-#define TXHRE		0x0040	/* TX Hold Register Empty */
-
-/* SPORT_MCMC1 Masks */
-#define SP_WOFF		0x03FF	/* Multichannel Window Offset Field */
-#define DP_SP_WOFF(x)	BFIN_DEPOSIT(SP_WOFF, x)
-#define EX_SP_WOFF(x)	BFIN_EXTRACT(SP_WOFF, x)
-#define SP_WSIZE	0xF000	/* Multichannel Window Size Field */
-#define DP_SP_WSIZE(x)	BFIN_DEPOSIT(SP_WSIZE, x)
-#define EX_SP_WSIZE(x)	BFIN_EXTRACT(SP_WSIZE, x)
-
-/* SPORT_MCMC2 Masks */
-#define MCCRM		0x0003	/* Multichannel Clock Recovery Mode */
-#define REC_BYPASS	0x0000	/* Bypass Mode (No Clock Recovery) */
-#define REC_2FROM4	0x0002	/* Recover 2 MHz Clock from 4 MHz Clock */
-#define REC_8FROM16	0x0003	/* Recover 8 MHz Clock from 16 MHz Clock */
-#define MCDTXPE		0x0004	/* Multichannel DMA Transmit Packing */
-#define MCDRXPE		0x0008	/* Multichannel DMA Receive Packing */
-#define MCMEN		0x0010	/* Multichannel Frame Mode Enable */
-#define FSDR		0x0080	/* Multichannel Frame Sync to Data Relationship */
-#define MFD		0xF000	/* Multichannel Frame Delay */
-#define DP_MFD(x)	BFIN_DEPOSIT(MFD, x)
-#define EX_MFD(x)	BFIN_EXTRACT(MFD, x)
-
-#endif

diff --git a/arch/blackfin/include/asm/bfin_twi.h b/arch/blackfin/include/asm/bfin_twi.h
index f4a0727..90c3c00 100644
--- a/arch/blackfin/include/asm/bfin_twi.h
+++ b/arch/blackfin/include/asm/bfin_twi.h

@@ -61,7 +61,7 @@
 	int			cur_msg;
 	u16			saved_clkdiv;
 	u16			saved_control;
-	struct bfin_twi_regs	*regs_base;
+	struct bfin_twi_regs __iomem *regs_base;
 };
 
 #define DEFINE_TWI_REG(reg_name, reg) \

diff --git a/arch/blackfin/include/asm/fixed_code.h b/arch/blackfin/include/asm/fixed_code.h
index 5395088..bc330f0 100644
--- a/arch/blackfin/include/asm/fixed_code.h
+++ b/arch/blackfin/include/asm/fixed_code.h

@@ -6,11 +6,11 @@
  *
  * Licensed under the GPL-2 or later.
  */
-
 #ifndef __BFIN_ASM_FIXED_CODE_H__
 #define __BFIN_ASM_FIXED_CODE_H__
 
-#ifdef __KERNEL__
+#include <uapi/asm/fixed_code.h>
+
 #ifndef __ASSEMBLY__
 #include <linux/linkage.h>
 #include <linux/ptrace.h>
@@ -28,29 +28,3 @@
 extern void sigreturn_stub(void);
 #endif
 #endif
-
-#ifndef CONFIG_PHY_RAM_BASE_ADDRESS
-#define CONFIG_PHY_RAM_BASE_ADDRESS	0x0
-#endif
-
-#define FIXED_CODE_START	(CONFIG_PHY_RAM_BASE_ADDRESS + 0x400)
-
-#define SIGRETURN_STUB		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x400)
-
-#define ATOMIC_SEQS_START	(CONFIG_PHY_RAM_BASE_ADDRESS + 0x410)
-
-#define ATOMIC_XCHG32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x410)
-#define ATOMIC_CAS32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x420)
-#define ATOMIC_ADD32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x430)
-#define ATOMIC_SUB32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x440)
-#define ATOMIC_IOR32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x450)
-#define ATOMIC_AND32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x460)
-#define ATOMIC_XOR32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x470)
-
-#define ATOMIC_SEQS_END		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x480)
-
-#define SAFE_USER_INSTRUCTION   (CONFIG_PHY_RAM_BASE_ADDRESS + 0x480)
-
-#define FIXED_CODE_END		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x490)
-
-#endif

diff --git a/arch/blackfin/include/asm/kvm_para.h b/arch/blackfin/include/asm/kvm_para.h
deleted file mode 100644
index 14fab8f..0000000
--- a/arch/blackfin/include/asm/kvm_para.h
+++ /dev/null

@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>

diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h
index dcca3e6..b866392 100644
--- a/arch/blackfin/include/asm/pgtable.h
+++ b/arch/blackfin/include/asm/pgtable.h

@@ -83,8 +83,6 @@
 #define ZERO_PAGE(vaddr)	virt_to_page(empty_zero_page)
 extern char empty_zero_page[];
 
-extern unsigned int kobjsize(const void *objp);
-
 #define swapper_pg_dir ((pgd_t *) 0)
 /*
  * No page table caches to initialise.

diff --git a/arch/blackfin/include/asm/ptrace.h b/arch/blackfin/include/asm/ptrace.h
index 10d8641..c004915 100644
--- a/arch/blackfin/include/asm/ptrace.h
+++ b/arch/blackfin/include/asm/ptrace.h

@@ -3,102 +3,13 @@
  *
  * Licensed under the GPL-2 or later.
  */
-
 #ifndef _BFIN_PTRACE_H
 #define _BFIN_PTRACE_H
 
-/*
- * GCC defines register number like this:
- * -----------------------------
- *       0 - 7 are data registers R0-R7
- *       8 - 15 are address registers P0-P7
- *      16 - 31 dsp registers I/B/L0 -- I/B/L3 & M0--M3
- *      32 - 33 A registers A0 & A1
- *      34 -    status register
- * -----------------------------
- *
- * We follows above, except:
- *      32-33 --- Low 32-bit of A0&1
- *      34-35 --- High 8-bit of A0&1
- */
+#include <uapi/asm/ptrace.h>
 
 #ifndef __ASSEMBLY__
 
-struct task_struct;
-
-/* this struct defines the way the registers are stored on the
-   stack during a system call. */
-
-struct pt_regs {
-	long orig_pc;
-	long ipend;
-	long seqstat;
-	long rete;
-	long retn;
-	long retx;
-	long pc;		/* PC == RETI */
-	long rets;
-	long reserved;		/* Used as scratch during system calls */
-	long astat;
-	long lb1;
-	long lb0;
-	long lt1;
-	long lt0;
-	long lc1;
-	long lc0;
-	long a1w;
-	long a1x;
-	long a0w;
-	long a0x;
-	long b3;
-	long b2;
-	long b1;
-	long b0;
-	long l3;
-	long l2;
-	long l1;
-	long l0;
-	long m3;
-	long m2;
-	long m1;
-	long m0;
-	long i3;
-	long i2;
-	long i1;
-	long i0;
-	long usp;
-	long fp;
-	long p5;
-	long p4;
-	long p3;
-	long p2;
-	long p1;
-	long p0;
-	long r7;
-	long r6;
-	long r5;
-	long r4;
-	long r3;
-	long r2;
-	long r1;
-	long r0;
-	long orig_r0;
-	long orig_p0;
-	long syscfg;
-};
-
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS            12
-#define PTRACE_SETREGS            13	/* ptrace signal  */
-
-#define PTRACE_GETFDPIC           31	/* get the ELF fdpic loadmap address */
-#define PTRACE_GETFDPIC_EXEC       0	/* [addr] request the executable loadmap */
-#define PTRACE_GETFDPIC_INTERP     1	/* [addr] request the interpreter loadmap */
-
-#define PS_S  (0x0002)
-
-#ifdef __KERNEL__
-
 /* user_mode returns true if only one bit is set in IPEND, other than the
    master interrupt enable.  */
 #define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1)))
@@ -106,6 +17,7 @@
 #define arch_has_single_step()	(1)
 /* common code demands this function */
 #define ptrace_disable(child) user_disable_single_step(child)
+#define current_user_stack_pointer() rdusp()
 
 extern int is_user_addr_valid(struct task_struct *child,
 			      unsigned long start, unsigned long len);
@@ -126,75 +38,5 @@
 
 #include <asm-generic/ptrace.h>
 
-#endif  /*  __KERNEL__  */
-
 #endif				/* __ASSEMBLY__ */
-
-/*
- * Offsets used by 'ptrace' system call interface.
- */
-
-#define PT_R0 204
-#define PT_R1 200
-#define PT_R2 196
-#define PT_R3 192
-#define PT_R4 188
-#define PT_R5 184
-#define PT_R6 180
-#define PT_R7 176
-#define PT_P0 172
-#define PT_P1 168
-#define PT_P2 164
-#define PT_P3 160
-#define PT_P4 156
-#define PT_P5 152
-#define PT_FP 148
-#define PT_USP 144
-#define PT_I0 140
-#define PT_I1 136
-#define PT_I2 132
-#define PT_I3 128
-#define PT_M0 124
-#define PT_M1 120
-#define PT_M2 116
-#define PT_M3 112
-#define PT_L0 108
-#define PT_L1 104
-#define PT_L2 100
-#define PT_L3 96
-#define PT_B0 92
-#define PT_B1 88
-#define PT_B2 84
-#define PT_B3 80
-#define PT_A0X 76
-#define PT_A0W 72
-#define PT_A1X 68
-#define PT_A1W 64
-#define PT_LC0 60
-#define PT_LC1 56
-#define PT_LT0 52
-#define PT_LT1 48
-#define PT_LB0 44
-#define PT_LB1 40
-#define PT_ASTAT 36
-#define PT_RESERVED 32
-#define PT_RETS 28
-#define PT_PC 24
-#define PT_RETX 20
-#define PT_RETN 16
-#define PT_RETE 12
-#define PT_SEQSTAT 8
-#define PT_IPEND 4
-
-#define PT_ORIG_R0 208
-#define PT_ORIG_P0 212
-#define PT_SYSCFG 216
-#define PT_TEXT_ADDR 220
-#define PT_TEXT_END_ADDR 224
-#define PT_DATA_ADDR 228
-#define PT_FDPIC_EXEC 232
-#define PT_FDPIC_INTERP 236
-
-#define PT_LAST_PSEUDO PT_FDPIC_INTERP
-
 #endif				/* _BFIN_PTRACE_H */

diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
index 5cc1115..461bb54 100644
--- a/arch/blackfin/include/asm/uaccess.h
+++ b/arch/blackfin/include/asm/uaccess.h

@@ -34,23 +34,6 @@
 
 #define access_ok(type, addr, size) _access_ok((unsigned long)(addr), (size))
 
-static inline int is_in_rom(unsigned long addr)
-{
-	/*
-	 * What we are really trying to do is determine if addr is
-	 * in an allocated kernel memory region. If not then assume
-	 * we cannot free it or otherwise de-allocate it. Ideally
-	 * we could restrict this to really being in a ROM or flash,
-	 * but that would need to be done on a board by board basis,
-	 * not globally.
-	 */
-	if ((addr < _ramstart) || (addr >= _ramend))
-		return (1);
-
-	/* Default case, not in ROM */
-	return (0);
-}
-
 /*
  * The fs value determines whether argument validity checking should be
  * performed or not.  If get_fs() == USER_DS, checking is performed, with
@@ -89,7 +72,7 @@
 	({							\
 		int _err = 0;					\
 		typeof(*(p)) _x = (x);				\
-		typeof(*(p)) *_p = (p);				\
+		typeof(*(p)) __user *_p = (p);				\
 		if (!access_ok(VERIFY_WRITE, _p, sizeof(*(_p)))) {\
 			_err = -EFAULT;				\
 		}						\
@@ -108,8 +91,8 @@
 			long _xl, _xh;				\
 			_xl = ((long *)&_x)[0];			\
 			_xh = ((long *)&_x)[1];			\
-			__put_user_asm(_xl, ((long *)_p)+0, );	\
-			__put_user_asm(_xh, ((long *)_p)+1, );	\
+			__put_user_asm(_xl, ((long __user *)_p)+0, );	\
+			__put_user_asm(_xh, ((long __user *)_p)+1, );	\
 		} break;					\
 		default:					\
 			_err = __put_user_bad();		\
@@ -136,7 +119,7 @@
  * aliasing issues.
  */
 
-#define __ptr(x) ((unsigned long *)(x))
+#define __ptr(x) ((unsigned long __force *)(x))
 
 #define __put_user_asm(x,p,bhw)				\
 	__asm__ (#bhw"[%1] = %0;\n\t"			\
@@ -216,12 +199,12 @@
  */
 
 static inline long __must_check
-strncpy_from_user(char *dst, const char *src, long count)
+strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	char *tmp;
 	if (!access_ok(VERIFY_READ, src, 1))
 		return -EFAULT;
-	strncpy(dst, src, count);
+	strncpy(dst, (const char __force *)src, count);
 	for (tmp = dst; *tmp && count > 0; tmp++, count--) ;
 	return (tmp - dst);
 }
@@ -237,18 +220,18 @@
  * On exception, returns 0.
  * If the string is too long, returns a value greater than n.
  */
-static inline long __must_check strnlen_user(const char *src, long n)
+static inline long __must_check strnlen_user(const char __user *src, long n)
 {
 	if (!access_ok(VERIFY_READ, src, 1))
 		return 0;
-	return strnlen(src, n) + 1;
+	return strnlen((const char __force *)src, n) + 1;
 }
 
-static inline long __must_check strlen_user(const char *src)
+static inline long __must_check strlen_user(const char __user *src)
 {
 	if (!access_ok(VERIFY_READ, src, 1))
 		return 0;
-	return strlen(src) + 1;
+	return strlen((const char __force *)src) + 1;
 }
 
 /*
@@ -256,11 +239,11 @@
  */
 
 static inline unsigned long __must_check
-__clear_user(void *to, unsigned long n)
+__clear_user(void __user *to, unsigned long n)
 {
 	if (!access_ok(VERIFY_WRITE, to, n))
 		return n;
-	memset(to, 0, n);
+	memset((void __force *)to, 0, n);
 	return 0;
 }
 

diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index 460514a1..e943cb1 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h

@@ -3,437 +3,11 @@
  *
  * Licensed under the GPL-2 or later.
  */
-
 #ifndef __ASM_BFIN_UNISTD_H
 #define __ASM_BFIN_UNISTD_H
-/*
- * This file contains the system call numbers.
- */
-#define __NR_restart_syscall	  0
-#define __NR_exit		  1
-				/* 2 __NR_fork not supported on nommu */
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-				/* 7 __NR_waitpid obsolete */
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_chown		 16
-				/* 17 __NR_break obsolete */
-				/* 18 __NR_oldstat obsolete */
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-				/* 22 __NR_umount obsolete */
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-				/* 28 __NR_oldfstat obsolete */
-#define __NR_pause		 29
-				/* 30 __NR_utime obsolete */
-				/* 31 __NR_stty obsolete */
-				/* 32 __NR_gtty obsolete */
-#define __NR_access		 33
-#define __NR_nice		 34
-				/* 35 __NR_ftime obsolete */
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-				/* 44 __NR_prof obsolete */
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-				/* 48 __NR_signal obsolete */
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_umount2		 52
-				/* 53 __NR_lock obsolete */
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-				/* 56 __NR_mpx obsolete */
-#define __NR_setpgid		 57
-				/* 58 __NR_ulimit obsolete */
-				/* 59 __NR_oldolduname obsolete */
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-				/* 67 __NR_sigaction obsolete */
-#define __NR_sgetmask		 68
-#define __NR_ssetmask		 69
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-				/* 72 __NR_sigsuspend obsolete */
-				/* 73 __NR_sigpending obsolete */
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-				/* 76 __NR_old_getrlimit obsolete */
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-				/* 82 __NR_select obsolete */
-#define __NR_symlink		 83
-				/* 84 __NR_oldlstat obsolete */
-#define __NR_readlink		 85
-				/* 86 __NR_uselib obsolete */
-				/* 87 __NR_swapon obsolete */
-#define __NR_reboot		 88
-				/* 89 __NR_readdir obsolete */
-				/* 90 __NR_mmap obsolete */
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-				/* 98 __NR_profil obsolete */
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-				/* 101 __NR_ioperm */
-				/* 102 __NR_socketcall obsolete */
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-				/* 109 __NR_olduname obsolete */
-				/* 110 __NR_iopl obsolete */
-#define __NR_vhangup		111
-				/* 112 __NR_idle obsolete */
-				/* 113 __NR_vm86old */
-#define __NR_wait4		114
-				/* 115 __NR_swapoff obsolete */
-#define __NR_sysinfo		116
-				/* 117 __NR_ipc oboslete */
-#define __NR_fsync		118
-				/* 119 __NR_sigreturn obsolete */
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-				/* 123 __NR_modify_ldt obsolete */
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-				/* 126 __NR_sigprocmask obsolete */
-				/* 127 __NR_create_module obsolete */
-#define __NR_init_module	128
-#define __NR_delete_module	129
-				/* 130 __NR_get_kernel_syms obsolete */
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-				/* 135 was sysfs */
-#define __NR_personality	136
-				/* 137 __NR_afs_syscall */
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-				/* 142 __NR__newselect obsolete */
-#define __NR_flock		143
-				/* 144 __NR_msync obsolete */
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-				/* 150 __NR_mlock */
-				/* 151 __NR_munlock */
-				/* 152 __NR_mlockall */
-				/* 153 __NR_munlockall */
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-				/* 166 __NR_vm86 */
-				/* 167 __NR_query_module */
-				/* 168 __NR_poll */
-#define __NR_nfsservctl		169
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_prctl		172
-#define __NR_rt_sigreturn	173
-#define __NR_rt_sigaction	174
-#define __NR_rt_sigprocmask	175
-#define __NR_rt_sigpending	176
-#define __NR_rt_sigtimedwait	177
-#define __NR_rt_sigqueueinfo	178
-#define __NR_rt_sigsuspend	179
-#define __NR_pread		180
-#define __NR_pwrite		181
-#define __NR_lchown		182
-#define __NR_getcwd		183
-#define __NR_capget		184
-#define __NR_capset		185
-#define __NR_sigaltstack	186
-#define __NR_sendfile		187
-				/* 188 __NR_getpmsg */
-				/* 189 __NR_putpmsg */
-#define __NR_vfork		190
-#define __NR_getrlimit		191
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_chown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_lchown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_pivot_root		217
-				/* 218 __NR_mincore */
-				/* 219 __NR_madvise */
-#define __NR_getdents64		220
-#define __NR_fcntl64		221
-				/* 222 reserved for TUX */
-				/* 223 reserved for TUX */
-#define __NR_gettid		224
-#define __NR_readahead		225
-#define __NR_setxattr		226
-#define __NR_lsetxattr		227
-#define __NR_fsetxattr		228
-#define __NR_getxattr		229
-#define __NR_lgetxattr		230
-#define __NR_fgetxattr		231
-#define __NR_listxattr		232
-#define __NR_llistxattr		233
-#define __NR_flistxattr		234
-#define __NR_removexattr	235
-#define __NR_lremovexattr	236
-#define __NR_fremovexattr	237
-#define __NR_tkill		238
-#define __NR_sendfile64		239
-#define __NR_futex		240
-#define __NR_sched_setaffinity	241
-#define __NR_sched_getaffinity	242
-				/* 243 __NR_set_thread_area */
-				/* 244 __NR_get_thread_area */
-#define __NR_io_setup		245
-#define __NR_io_destroy		246
-#define __NR_io_getevents	247
-#define __NR_io_submit		248
-#define __NR_io_cancel		249
-				/* 250 __NR_alloc_hugepages */
-				/* 251 __NR_free_hugepages */
-#define __NR_exit_group		252
-#define __NR_lookup_dcookie     253
-#define __NR_bfin_spinlock      254
 
-#define __NR_epoll_create	255
-#define __NR_epoll_ctl		256
-#define __NR_epoll_wait		257
-				/* 258 __NR_remap_file_pages */
-#define __NR_set_tid_address	259
-#define __NR_timer_create	260
-#define __NR_timer_settime	261
-#define __NR_timer_gettime	262
-#define __NR_timer_getoverrun	263
-#define __NR_timer_delete	264
-#define __NR_clock_settime	265
-#define __NR_clock_gettime	266
-#define __NR_clock_getres	267
-#define __NR_clock_nanosleep	268
-#define __NR_statfs64		269
-#define __NR_fstatfs64		270
-#define __NR_tgkill		271
-#define __NR_utimes		272
-#define __NR_fadvise64_64	273
-				/* 274 __NR_vserver */
-				/* 275 __NR_mbind */
-				/* 276 __NR_get_mempolicy */
-				/* 277 __NR_set_mempolicy */
-#define __NR_mq_open 		278
-#define __NR_mq_unlink		279
-#define __NR_mq_timedsend	280
-#define __NR_mq_timedreceive	281
-#define __NR_mq_notify		282
-#define __NR_mq_getsetattr	283
-#define __NR_kexec_load		284
-#define __NR_waitid		285
-#define __NR_add_key		286
-#define __NR_request_key	287
-#define __NR_keyctl		288
-#define __NR_ioprio_set		289
-#define __NR_ioprio_get		290
-#define __NR_inotify_init	291
-#define __NR_inotify_add_watch	292
-#define __NR_inotify_rm_watch	293
-				/* 294 __NR_migrate_pages */
-#define __NR_openat		295
-#define __NR_mkdirat		296
-#define __NR_mknodat		297
-#define __NR_fchownat		298
-#define __NR_futimesat		299
-#define __NR_fstatat64		300
-#define __NR_unlinkat		301
-#define __NR_renameat		302
-#define __NR_linkat		303
-#define __NR_symlinkat		304
-#define __NR_readlinkat		305
-#define __NR_fchmodat		306
-#define __NR_faccessat		307
-#define __NR_pselect6		308
-#define __NR_ppoll		309
-#define __NR_unshare		310
+#include <uapi/asm/unistd.h>
 
-/* Blackfin private syscalls */
-#define __NR_sram_alloc		311
-#define __NR_sram_free		312
-#define __NR_dma_memcpy		313
-
-/* socket syscalls */
-#define __NR_accept		314
-#define __NR_bind		315
-#define __NR_connect		316
-#define __NR_getpeername	317
-#define __NR_getsockname	318
-#define __NR_getsockopt		319
-#define __NR_listen		320
-#define __NR_recv		321
-#define __NR_recvfrom		322
-#define __NR_recvmsg		323
-#define __NR_send		324
-#define __NR_sendmsg		325
-#define __NR_sendto		326
-#define __NR_setsockopt		327
-#define __NR_shutdown		328
-#define __NR_socket		329
-#define __NR_socketpair		330
-
-/* sysv ipc syscalls */
-#define __NR_semctl		331
-#define __NR_semget		332
-#define __NR_semop		333
-#define __NR_msgctl		334
-#define __NR_msgget		335
-#define __NR_msgrcv		336
-#define __NR_msgsnd		337
-#define __NR_shmat		338
-#define __NR_shmctl		339
-#define __NR_shmdt		340
-#define __NR_shmget		341
-
-#define __NR_splice		342
-#define __NR_sync_file_range	343
-#define __NR_tee		344
-#define __NR_vmsplice		345
-
-#define __NR_epoll_pwait	346
-#define __NR_utimensat		347
-#define __NR_signalfd		348
-#define __NR_timerfd_create	349
-#define __NR_eventfd		350
-#define __NR_pread64		351
-#define __NR_pwrite64		352
-#define __NR_fadvise64		353
-#define __NR_set_robust_list	354
-#define __NR_get_robust_list	355
-#define __NR_fallocate		356
-#define __NR_semtimedop		357
-#define __NR_timerfd_settime	358
-#define __NR_timerfd_gettime	359
-#define __NR_signalfd4		360
-#define __NR_eventfd2		361
-#define __NR_epoll_create1	362
-#define __NR_dup3		363
-#define __NR_pipe2		364
-#define __NR_inotify_init1	365
-#define __NR_preadv		366
-#define __NR_pwritev		367
-#define __NR_rt_tgsigqueueinfo	368
-#define __NR_perf_event_open	369
-#define __NR_recvmmsg		370
-#define __NR_fanotify_init	371
-#define __NR_fanotify_mark	372
-#define __NR_prlimit64		373
-#define __NR_cacheflush		374
-#define __NR_name_to_handle_at	375
-#define __NR_open_by_handle_at	376
-#define __NR_clock_adjtime	377
-#define __NR_syncfs		378
-#define __NR_setns		379
-#define __NR_sendmmsg		380
-#define __NR_process_vm_readv	381
-#define __NR_process_vm_writev	382
-
-#define __NR_syscall		383
-#define NR_syscalls		__NR_syscall
-
-/* Old optional stuff no one actually uses */
-#define __IGNORE_sysfs
-#define __IGNORE_uselib
-
-/* Implement the newer interfaces */
-#define __IGNORE_mmap
-#define __IGNORE_poll
-#define __IGNORE_select
-#define __IGNORE_utime
-
-/* Not relevant on no-mmu */
-#define __IGNORE_swapon
-#define __IGNORE_swapoff
-#define __IGNORE_msync
-#define __IGNORE_mlock
-#define __IGNORE_munlock
-#define __IGNORE_mlockall
-#define __IGNORE_munlockall
-#define __IGNORE_mincore
-#define __IGNORE_madvise
-#define __IGNORE_remap_file_pages
-#define __IGNORE_mbind
-#define __IGNORE_get_mempolicy
-#define __IGNORE_set_mempolicy
-#define __IGNORE_migrate_pages
-#define __IGNORE_move_pages
-#define __IGNORE_getcpu
-
-#ifdef __KERNEL__
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
@@ -446,7 +20,6 @@
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_VFORK
 
 /*
@@ -457,6 +30,4 @@
  */
 #define cond_syscall(x) asm(".weak\t_" #x "\n\t.set\t_" #x ",_sys_ni_syscall");
 
-#endif	/* __KERNEL__ */
-
 #endif				/* __ASM_BFIN_UNISTD_H */

diff --git a/arch/blackfin/include/mach-common/irq.h b/arch/blackfin/include/mach-common/irq.h
index cab14e9..af9fc81 100644
--- a/arch/blackfin/include/mach-common/irq.h
+++ b/arch/blackfin/include/mach-common/irq.h

@@ -40,8 +40,6 @@
 #define IRQ_HWERR		5	/* Hardware Error */
 #define IRQ_CORETMR		6	/* Core timer */
 
-#define BFIN_IRQ(x)		((x) + 7)
-
 #define IVG7			7
 #define IVG8			8
 #define IVG9			9
@@ -52,6 +50,9 @@
 #define IVG14			14
 #define IVG15			15
 
+#define BFIN_IRQ(x)		((x) + IVG7)
+#define BFIN_SYSIRQ(x)		((x) - IVG7)
+
 #define NR_IRQS			(NR_MACH_IRQS + NR_SPARE_IRQS)
 
 #endif

diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index baebb3d..0bd28f7 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild

@@ -1,3 +1,19 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += bfin_sport.h
+header-y += byteorder.h
+header-y += cachectl.h
+header-y += fcntl.h
+header-y += fixed_code.h
+header-y += ioctls.h
+header-y += kvm_para.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += stat.h
+header-y += swab.h
+header-y += unistd.h

diff --git a/arch/blackfin/include/uapi/asm/bfin_sport.h b/arch/blackfin/include/uapi/asm/bfin_sport.h
new file mode 100644
index 0000000..c086de87
--- /dev/null
+++ b/arch/blackfin/include/uapi/asm/bfin_sport.h

@@ -0,0 +1,136 @@
+/*
+ * bfin_sport.h - interface to Blackfin SPORTs
+ *
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _UAPI__BFIN_SPORT_H__
+#define _UAPI__BFIN_SPORT_H__
+
+/* Sport mode: it can be set to TDM, i2s or others */
+#define NORM_MODE	0x0
+#define TDM_MODE	0x1
+#define I2S_MODE	0x2
+#define NDSO_MODE	0x3
+
+/* Data format, normal, a-law or u-law */
+#define NORM_FORMAT	0x0
+#define ALAW_FORMAT	0x2
+#define ULAW_FORMAT	0x3
+
+/* Function driver which use sport must initialize the structure */
+struct sport_config {
+	/* TDM (multichannels), I2S or other mode */
+	unsigned int mode:3;
+	unsigned int polled;	/* use poll instead of irq when set */
+
+	/* if TDM mode is selected, channels must be set */
+	int channels;	/* Must be in 8 units */
+	unsigned int frame_delay:4;	/* Delay between frame sync pulse and first bit */
+
+	/* I2S mode */
+	unsigned int right_first:1;	/* Right stereo channel first */
+
+	/* In mormal mode, the following item need to be set */
+	unsigned int lsb_first:1;	/* order of transmit or receive data */
+	unsigned int fsync:1;	/* Frame sync required */
+	unsigned int data_indep:1;	/* data independent frame sync generated */
+	unsigned int act_low:1;	/* Active low TFS */
+	unsigned int late_fsync:1;	/* Late frame sync */
+	unsigned int tckfe:1;
+	unsigned int sec_en:1;	/* Secondary side enabled */
+
+	/* Choose clock source */
+	unsigned int int_clk:1;	/* Internal or external clock */
+
+	/* If external clock is used, the following fields are ignored */
+	int serial_clk;
+	int fsync_clk;
+
+	unsigned int data_format:2;	/* Normal, u-law or a-law */
+
+	int word_len;		/* How length of the word in bits, 3-32 bits */
+	int dma_enabled;
+};
+
+/* Userspace interface */
+#define SPORT_IOC_MAGIC		'P'
+#define SPORT_IOC_CONFIG	_IOWR('P', 0x01, struct sport_config)
+#define SPORT_IOC_GET_SYSTEMCLOCK         _IOR('P', 0x02, unsigned long)
+#define SPORT_IOC_SET_BAUDRATE            _IOW('P', 0x03, unsigned long)
+
+
+/* SPORT_TCR1 Masks */
+#define TSPEN		0x0001	/* TX enable */
+#define ITCLK		0x0002	/* Internal TX Clock Select */
+#define TDTYPE		0x000C	/* TX Data Formatting Select */
+#define DTYPE_NORM	0x0000	/* Data Format Normal */
+#define DTYPE_ULAW	0x0008	/* Compand Using u-Law */
+#define DTYPE_ALAW	0x000C	/* Compand Using A-Law */
+#define TLSBIT		0x0010	/* TX Bit Order */
+#define ITFS		0x0200	/* Internal TX Frame Sync Select */
+#define TFSR		0x0400	/* TX Frame Sync Required Select */
+#define DITFS		0x0800	/* Data Independent TX Frame Sync Select */
+#define LTFS		0x1000	/* Low TX Frame Sync Select */
+#define LATFS		0x2000	/* Late TX Frame Sync Select */
+#define TCKFE		0x4000	/* TX Clock Falling Edge Select */
+
+/* SPORT_TCR2 Masks */
+#define SLEN		0x001F	/* SPORT TX Word Length (2 - 31) */
+#define DP_SLEN(x)	BFIN_DEPOSIT(SLEN, x)
+#define EX_SLEN(x)	BFIN_EXTRACT(SLEN, x)
+#define TXSE		0x0100	/* TX Secondary Enable */
+#define TSFSE		0x0200	/* TX Stereo Frame Sync Enable */
+#define TRFST		0x0400	/* TX Right-First Data Order */
+
+/* SPORT_RCR1 Masks */
+#define RSPEN		0x0001	/* RX enable */
+#define IRCLK		0x0002	/* Internal RX Clock Select */
+#define RDTYPE		0x000C	/* RX Data Formatting Select */
+/* DTYPE_* defined above */
+#define RLSBIT		0x0010	/* RX Bit Order */
+#define IRFS		0x0200	/* Internal RX Frame Sync Select */
+#define RFSR		0x0400	/* RX Frame Sync Required Select */
+#define LRFS		0x1000	/* Low RX Frame Sync Select */
+#define LARFS		0x2000	/* Late RX Frame Sync Select */
+#define RCKFE		0x4000	/* RX Clock Falling Edge Select */
+
+/* SPORT_RCR2 Masks */
+/* SLEN defined above */
+#define RXSE		0x0100	/* RX Secondary Enable */
+#define RSFSE		0x0200	/* RX Stereo Frame Sync Enable */
+#define RRFST		0x0400	/* Right-First Data Order */
+
+/* SPORT_STAT Masks */
+#define RXNE		0x0001	/* RX FIFO Not Empty Status */
+#define RUVF		0x0002	/* RX Underflow Status */
+#define ROVF		0x0004	/* RX Overflow Status */
+#define TXF		0x0008	/* TX FIFO Full Status */
+#define TUVF		0x0010	/* TX Underflow Status */
+#define TOVF		0x0020	/* TX Overflow Status */
+#define TXHRE		0x0040	/* TX Hold Register Empty */
+
+/* SPORT_MCMC1 Masks */
+#define SP_WOFF		0x03FF	/* Multichannel Window Offset Field */
+#define DP_SP_WOFF(x)	BFIN_DEPOSIT(SP_WOFF, x)
+#define EX_SP_WOFF(x)	BFIN_EXTRACT(SP_WOFF, x)
+#define SP_WSIZE	0xF000	/* Multichannel Window Size Field */
+#define DP_SP_WSIZE(x)	BFIN_DEPOSIT(SP_WSIZE, x)
+#define EX_SP_WSIZE(x)	BFIN_EXTRACT(SP_WSIZE, x)
+
+/* SPORT_MCMC2 Masks */
+#define MCCRM		0x0003	/* Multichannel Clock Recovery Mode */
+#define REC_BYPASS	0x0000	/* Bypass Mode (No Clock Recovery) */
+#define REC_2FROM4	0x0002	/* Recover 2 MHz Clock from 4 MHz Clock */
+#define REC_8FROM16	0x0003	/* Recover 8 MHz Clock from 16 MHz Clock */
+#define MCDTXPE		0x0004	/* Multichannel DMA Transmit Packing */
+#define MCDRXPE		0x0008	/* Multichannel DMA Receive Packing */
+#define MCMEN		0x0010	/* Multichannel Frame Mode Enable */
+#define FSDR		0x0080	/* Multichannel Frame Sync to Data Relationship */
+#define MFD		0xF000	/* Multichannel Frame Delay */
+#define DP_MFD(x)	BFIN_DEPOSIT(MFD, x)
+#define EX_MFD(x)	BFIN_EXTRACT(MFD, x)
+
+#endif /* _UAPI__BFIN_SPORT_H__ */

diff --git a/arch/blackfin/include/asm/byteorder.h b/arch/blackfin/include/uapi/asm/byteorder.h
similarity index 100%
rename from arch/blackfin/include/asm/byteorder.h
rename to arch/blackfin/include/uapi/asm/byteorder.h


diff --git a/arch/blackfin/include/asm/cachectl.h b/arch/blackfin/include/uapi/asm/cachectl.h
similarity index 100%
rename from arch/blackfin/include/asm/cachectl.h
rename to arch/blackfin/include/uapi/asm/cachectl.h


diff --git a/arch/blackfin/include/asm/fcntl.h b/arch/blackfin/include/uapi/asm/fcntl.h
similarity index 100%
rename from arch/blackfin/include/asm/fcntl.h
rename to arch/blackfin/include/uapi/asm/fcntl.h


diff --git a/arch/blackfin/include/uapi/asm/fixed_code.h b/arch/blackfin/include/uapi/asm/fixed_code.h
new file mode 100644
index 0000000..3bef1dc
--- /dev/null
+++ b/arch/blackfin/include/uapi/asm/fixed_code.h

@@ -0,0 +1,38 @@
+/*
+ * This file defines the fixed addresses where userspace programs
+ * can find atomic code sequences.
+ *
+ * Copyright 2007-2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _UAPI__BFIN_ASM_FIXED_CODE_H__
+#define _UAPI__BFIN_ASM_FIXED_CODE_H__
+
+
+#ifndef CONFIG_PHY_RAM_BASE_ADDRESS
+#define CONFIG_PHY_RAM_BASE_ADDRESS	0x0
+#endif
+
+#define FIXED_CODE_START	(CONFIG_PHY_RAM_BASE_ADDRESS + 0x400)
+
+#define SIGRETURN_STUB		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x400)
+
+#define ATOMIC_SEQS_START	(CONFIG_PHY_RAM_BASE_ADDRESS + 0x410)
+
+#define ATOMIC_XCHG32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x410)
+#define ATOMIC_CAS32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x420)
+#define ATOMIC_ADD32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x430)
+#define ATOMIC_SUB32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x440)
+#define ATOMIC_IOR32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x450)
+#define ATOMIC_AND32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x460)
+#define ATOMIC_XOR32		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x470)
+
+#define ATOMIC_SEQS_END		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x480)
+
+#define SAFE_USER_INSTRUCTION   (CONFIG_PHY_RAM_BASE_ADDRESS + 0x480)
+
+#define FIXED_CODE_END		(CONFIG_PHY_RAM_BASE_ADDRESS + 0x490)
+
+#endif /* _UAPI__BFIN_ASM_FIXED_CODE_H__ */

diff --git a/arch/blackfin/include/asm/ioctls.h b/arch/blackfin/include/uapi/asm/ioctls.h
similarity index 100%
rename from arch/blackfin/include/asm/ioctls.h
rename to arch/blackfin/include/uapi/asm/ioctls.h


diff --git a/arch/blackfin/include/asm/poll.h b/arch/blackfin/include/uapi/asm/poll.h
similarity index 100%
rename from arch/blackfin/include/asm/poll.h
rename to arch/blackfin/include/uapi/asm/poll.h


diff --git a/arch/blackfin/include/asm/posix_types.h b/arch/blackfin/include/uapi/asm/posix_types.h
similarity index 100%
rename from arch/blackfin/include/asm/posix_types.h
rename to arch/blackfin/include/uapi/asm/posix_types.h


diff --git a/arch/blackfin/include/uapi/asm/ptrace.h b/arch/blackfin/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..fd48bd0
--- /dev/null
+++ b/arch/blackfin/include/uapi/asm/ptrace.h

@@ -0,0 +1,170 @@
+/*
+ * Copyright 2004-2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _UAPI_BFIN_PTRACE_H
+#define _UAPI_BFIN_PTRACE_H
+
+/*
+ * GCC defines register number like this:
+ * -----------------------------
+ *       0 - 7 are data registers R0-R7
+ *       8 - 15 are address registers P0-P7
+ *      16 - 31 dsp registers I/B/L0 -- I/B/L3 & M0--M3
+ *      32 - 33 A registers A0 & A1
+ *      34 -    status register
+ * -----------------------------
+ *
+ * We follows above, except:
+ *      32-33 --- Low 32-bit of A0&1
+ *      34-35 --- High 8-bit of A0&1
+ */
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+/* this struct defines the way the registers are stored on the
+   stack during a system call. */
+
+struct pt_regs {
+	long orig_pc;
+	long ipend;
+	long seqstat;
+	long rete;
+	long retn;
+	long retx;
+	long pc;		/* PC == RETI */
+	long rets;
+	long reserved;		/* Used as scratch during system calls */
+	long astat;
+	long lb1;
+	long lb0;
+	long lt1;
+	long lt0;
+	long lc1;
+	long lc0;
+	long a1w;
+	long a1x;
+	long a0w;
+	long a0x;
+	long b3;
+	long b2;
+	long b1;
+	long b0;
+	long l3;
+	long l2;
+	long l1;
+	long l0;
+	long m3;
+	long m2;
+	long m1;
+	long m0;
+	long i3;
+	long i2;
+	long i1;
+	long i0;
+	long usp;
+	long fp;
+	long p5;
+	long p4;
+	long p3;
+	long p2;
+	long p1;
+	long p0;
+	long r7;
+	long r6;
+	long r5;
+	long r4;
+	long r3;
+	long r2;
+	long r1;
+	long r0;
+	long orig_r0;
+	long orig_p0;
+	long syscfg;
+};
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13	/* ptrace signal  */
+
+#define PTRACE_GETFDPIC           31	/* get the ELF fdpic loadmap address */
+#define PTRACE_GETFDPIC_EXEC       0	/* [addr] request the executable loadmap */
+#define PTRACE_GETFDPIC_INTERP     1	/* [addr] request the interpreter loadmap */
+
+#define PS_S  (0x0002)
+
+
+#endif				/* __ASSEMBLY__ */
+
+/*
+ * Offsets used by 'ptrace' system call interface.
+ */
+
+#define PT_R0 204
+#define PT_R1 200
+#define PT_R2 196
+#define PT_R3 192
+#define PT_R4 188
+#define PT_R5 184
+#define PT_R6 180
+#define PT_R7 176
+#define PT_P0 172
+#define PT_P1 168
+#define PT_P2 164
+#define PT_P3 160
+#define PT_P4 156
+#define PT_P5 152
+#define PT_FP 148
+#define PT_USP 144
+#define PT_I0 140
+#define PT_I1 136
+#define PT_I2 132
+#define PT_I3 128
+#define PT_M0 124
+#define PT_M1 120
+#define PT_M2 116
+#define PT_M3 112
+#define PT_L0 108
+#define PT_L1 104
+#define PT_L2 100
+#define PT_L3 96
+#define PT_B0 92
+#define PT_B1 88
+#define PT_B2 84
+#define PT_B3 80
+#define PT_A0X 76
+#define PT_A0W 72
+#define PT_A1X 68
+#define PT_A1W 64
+#define PT_LC0 60
+#define PT_LC1 56
+#define PT_LT0 52
+#define PT_LT1 48
+#define PT_LB0 44
+#define PT_LB1 40
+#define PT_ASTAT 36
+#define PT_RESERVED 32
+#define PT_RETS 28
+#define PT_PC 24
+#define PT_RETX 20
+#define PT_RETN 16
+#define PT_RETE 12
+#define PT_SEQSTAT 8
+#define PT_IPEND 4
+
+#define PT_ORIG_R0 208
+#define PT_ORIG_P0 212
+#define PT_SYSCFG 216
+#define PT_TEXT_ADDR 220
+#define PT_TEXT_END_ADDR 224
+#define PT_DATA_ADDR 228
+#define PT_FDPIC_EXEC 232
+#define PT_FDPIC_INTERP 236
+
+#define PT_LAST_PSEUDO PT_FDPIC_INTERP
+
+#endif /* _UAPI_BFIN_PTRACE_H */

diff --git a/arch/blackfin/include/asm/sigcontext.h b/arch/blackfin/include/uapi/asm/sigcontext.h
similarity index 100%
rename from arch/blackfin/include/asm/sigcontext.h
rename to arch/blackfin/include/uapi/asm/sigcontext.h


diff --git a/arch/blackfin/include/asm/siginfo.h b/arch/blackfin/include/uapi/asm/siginfo.h
similarity index 100%
rename from arch/blackfin/include/asm/siginfo.h
rename to arch/blackfin/include/uapi/asm/siginfo.h


diff --git a/arch/blackfin/include/asm/signal.h b/arch/blackfin/include/uapi/asm/signal.h
similarity index 100%
rename from arch/blackfin/include/asm/signal.h
rename to arch/blackfin/include/uapi/asm/signal.h


diff --git a/arch/blackfin/include/asm/stat.h b/arch/blackfin/include/uapi/asm/stat.h
similarity index 100%
rename from arch/blackfin/include/asm/stat.h
rename to arch/blackfin/include/uapi/asm/stat.h


diff --git a/arch/blackfin/include/asm/swab.h b/arch/blackfin/include/uapi/asm/swab.h
similarity index 100%
rename from arch/blackfin/include/asm/swab.h
rename to arch/blackfin/include/uapi/asm/swab.h


diff --git a/arch/blackfin/include/uapi/asm/unistd.h b/arch/blackfin/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..a451164
--- /dev/null
+++ b/arch/blackfin/include/uapi/asm/unistd.h

@@ -0,0 +1,437 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _UAPI__ASM_BFIN_UNISTD_H
+#define _UAPI__ASM_BFIN_UNISTD_H
+/*
+ * This file contains the system call numbers.
+ */
+#define __NR_restart_syscall	  0
+#define __NR_exit		  1
+				/* 2 __NR_fork not supported on nommu */
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+				/* 7 __NR_waitpid obsolete */
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_chown		 16
+				/* 17 __NR_break obsolete */
+				/* 18 __NR_oldstat obsolete */
+#define __NR_lseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+				/* 22 __NR_umount obsolete */
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+				/* 28 __NR_oldfstat obsolete */
+#define __NR_pause		 29
+				/* 30 __NR_utime obsolete */
+				/* 31 __NR_stty obsolete */
+				/* 32 __NR_gtty obsolete */
+#define __NR_access		 33
+#define __NR_nice		 34
+				/* 35 __NR_ftime obsolete */
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+				/* 44 __NR_prof obsolete */
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+				/* 48 __NR_signal obsolete */
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_umount2		 52
+				/* 53 __NR_lock obsolete */
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+				/* 56 __NR_mpx obsolete */
+#define __NR_setpgid		 57
+				/* 58 __NR_ulimit obsolete */
+				/* 59 __NR_oldolduname obsolete */
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+				/* 67 __NR_sigaction obsolete */
+#define __NR_sgetmask		 68
+#define __NR_ssetmask		 69
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+				/* 72 __NR_sigsuspend obsolete */
+				/* 73 __NR_sigpending obsolete */
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+				/* 76 __NR_old_getrlimit obsolete */
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+				/* 82 __NR_select obsolete */
+#define __NR_symlink		 83
+				/* 84 __NR_oldlstat obsolete */
+#define __NR_readlink		 85
+				/* 86 __NR_uselib obsolete */
+				/* 87 __NR_swapon obsolete */
+#define __NR_reboot		 88
+				/* 89 __NR_readdir obsolete */
+				/* 90 __NR_mmap obsolete */
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+				/* 98 __NR_profil obsolete */
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+				/* 101 __NR_ioperm */
+				/* 102 __NR_socketcall obsolete */
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_stat		106
+#define __NR_lstat		107
+#define __NR_fstat		108
+				/* 109 __NR_olduname obsolete */
+				/* 110 __NR_iopl obsolete */
+#define __NR_vhangup		111
+				/* 112 __NR_idle obsolete */
+				/* 113 __NR_vm86old */
+#define __NR_wait4		114
+				/* 115 __NR_swapoff obsolete */
+#define __NR_sysinfo		116
+				/* 117 __NR_ipc oboslete */
+#define __NR_fsync		118
+				/* 119 __NR_sigreturn obsolete */
+#define __NR_clone		120
+#define __NR_setdomainname	121
+#define __NR_uname		122
+				/* 123 __NR_modify_ldt obsolete */
+#define __NR_adjtimex		124
+#define __NR_mprotect		125
+				/* 126 __NR_sigprocmask obsolete */
+				/* 127 __NR_create_module obsolete */
+#define __NR_init_module	128
+#define __NR_delete_module	129
+				/* 130 __NR_get_kernel_syms obsolete */
+#define __NR_quotactl		131
+#define __NR_getpgid		132
+#define __NR_fchdir		133
+#define __NR_bdflush		134
+				/* 135 was sysfs */
+#define __NR_personality	136
+				/* 137 __NR_afs_syscall */
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR_getdents		141
+				/* 142 __NR__newselect obsolete */
+#define __NR_flock		143
+				/* 144 __NR_msync obsolete */
+#define __NR_readv		145
+#define __NR_writev		146
+#define __NR_getsid		147
+#define __NR_fdatasync		148
+#define __NR__sysctl		149
+				/* 150 __NR_mlock */
+				/* 151 __NR_munlock */
+				/* 152 __NR_mlockall */
+				/* 153 __NR_munlockall */
+#define __NR_sched_setparam		154
+#define __NR_sched_getparam		155
+#define __NR_sched_setscheduler		156
+#define __NR_sched_getscheduler		157
+#define __NR_sched_yield		158
+#define __NR_sched_get_priority_max	159
+#define __NR_sched_get_priority_min	160
+#define __NR_sched_rr_get_interval	161
+#define __NR_nanosleep		162
+#define __NR_mremap		163
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+				/* 166 __NR_vm86 */
+				/* 167 __NR_query_module */
+				/* 168 __NR_poll */
+#define __NR_nfsservctl		169
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_prctl		172
+#define __NR_rt_sigreturn	173
+#define __NR_rt_sigaction	174
+#define __NR_rt_sigprocmask	175
+#define __NR_rt_sigpending	176
+#define __NR_rt_sigtimedwait	177
+#define __NR_rt_sigqueueinfo	178
+#define __NR_rt_sigsuspend	179
+#define __NR_pread		180
+#define __NR_pwrite		181
+#define __NR_lchown		182
+#define __NR_getcwd		183
+#define __NR_capget		184
+#define __NR_capset		185
+#define __NR_sigaltstack	186
+#define __NR_sendfile		187
+				/* 188 __NR_getpmsg */
+				/* 189 __NR_putpmsg */
+#define __NR_vfork		190
+#define __NR_getrlimit		191
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_chown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_lchown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_pivot_root		217
+				/* 218 __NR_mincore */
+				/* 219 __NR_madvise */
+#define __NR_getdents64		220
+#define __NR_fcntl64		221
+				/* 222 reserved for TUX */
+				/* 223 reserved for TUX */
+#define __NR_gettid		224
+#define __NR_readahead		225
+#define __NR_setxattr		226
+#define __NR_lsetxattr		227
+#define __NR_fsetxattr		228
+#define __NR_getxattr		229
+#define __NR_lgetxattr		230
+#define __NR_fgetxattr		231
+#define __NR_listxattr		232
+#define __NR_llistxattr		233
+#define __NR_flistxattr		234
+#define __NR_removexattr	235
+#define __NR_lremovexattr	236
+#define __NR_fremovexattr	237
+#define __NR_tkill		238
+#define __NR_sendfile64		239
+#define __NR_futex		240
+#define __NR_sched_setaffinity	241
+#define __NR_sched_getaffinity	242
+				/* 243 __NR_set_thread_area */
+				/* 244 __NR_get_thread_area */
+#define __NR_io_setup		245
+#define __NR_io_destroy		246
+#define __NR_io_getevents	247
+#define __NR_io_submit		248
+#define __NR_io_cancel		249
+				/* 250 __NR_alloc_hugepages */
+				/* 251 __NR_free_hugepages */
+#define __NR_exit_group		252
+#define __NR_lookup_dcookie     253
+#define __NR_bfin_spinlock      254
+
+#define __NR_epoll_create	255
+#define __NR_epoll_ctl		256
+#define __NR_epoll_wait		257
+				/* 258 __NR_remap_file_pages */
+#define __NR_set_tid_address	259
+#define __NR_timer_create	260
+#define __NR_timer_settime	261
+#define __NR_timer_gettime	262
+#define __NR_timer_getoverrun	263
+#define __NR_timer_delete	264
+#define __NR_clock_settime	265
+#define __NR_clock_gettime	266
+#define __NR_clock_getres	267
+#define __NR_clock_nanosleep	268
+#define __NR_statfs64		269
+#define __NR_fstatfs64		270
+#define __NR_tgkill		271
+#define __NR_utimes		272
+#define __NR_fadvise64_64	273
+				/* 274 __NR_vserver */
+				/* 275 __NR_mbind */
+				/* 276 __NR_get_mempolicy */
+				/* 277 __NR_set_mempolicy */
+#define __NR_mq_open 		278
+#define __NR_mq_unlink		279
+#define __NR_mq_timedsend	280
+#define __NR_mq_timedreceive	281
+#define __NR_mq_notify		282
+#define __NR_mq_getsetattr	283
+#define __NR_kexec_load		284
+#define __NR_waitid		285
+#define __NR_add_key		286
+#define __NR_request_key	287
+#define __NR_keyctl		288
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
+#define __NR_inotify_init	291
+#define __NR_inotify_add_watch	292
+#define __NR_inotify_rm_watch	293
+				/* 294 __NR_migrate_pages */
+#define __NR_openat		295
+#define __NR_mkdirat		296
+#define __NR_mknodat		297
+#define __NR_fchownat		298
+#define __NR_futimesat		299
+#define __NR_fstatat64		300
+#define __NR_unlinkat		301
+#define __NR_renameat		302
+#define __NR_linkat		303
+#define __NR_symlinkat		304
+#define __NR_readlinkat		305
+#define __NR_fchmodat		306
+#define __NR_faccessat		307
+#define __NR_pselect6		308
+#define __NR_ppoll		309
+#define __NR_unshare		310
+
+/* Blackfin private syscalls */
+#define __NR_sram_alloc		311
+#define __NR_sram_free		312
+#define __NR_dma_memcpy		313
+
+/* socket syscalls */
+#define __NR_accept		314
+#define __NR_bind		315
+#define __NR_connect		316
+#define __NR_getpeername	317
+#define __NR_getsockname	318
+#define __NR_getsockopt		319
+#define __NR_listen		320
+#define __NR_recv		321
+#define __NR_recvfrom		322
+#define __NR_recvmsg		323
+#define __NR_send		324
+#define __NR_sendmsg		325
+#define __NR_sendto		326
+#define __NR_setsockopt		327
+#define __NR_shutdown		328
+#define __NR_socket		329
+#define __NR_socketpair		330
+
+/* sysv ipc syscalls */
+#define __NR_semctl		331
+#define __NR_semget		332
+#define __NR_semop		333
+#define __NR_msgctl		334
+#define __NR_msgget		335
+#define __NR_msgrcv		336
+#define __NR_msgsnd		337
+#define __NR_shmat		338
+#define __NR_shmctl		339
+#define __NR_shmdt		340
+#define __NR_shmget		341
+
+#define __NR_splice		342
+#define __NR_sync_file_range	343
+#define __NR_tee		344
+#define __NR_vmsplice		345
+
+#define __NR_epoll_pwait	346
+#define __NR_utimensat		347
+#define __NR_signalfd		348
+#define __NR_timerfd_create	349
+#define __NR_eventfd		350
+#define __NR_pread64		351
+#define __NR_pwrite64		352
+#define __NR_fadvise64		353
+#define __NR_set_robust_list	354
+#define __NR_get_robust_list	355
+#define __NR_fallocate		356
+#define __NR_semtimedop		357
+#define __NR_timerfd_settime	358
+#define __NR_timerfd_gettime	359
+#define __NR_signalfd4		360
+#define __NR_eventfd2		361
+#define __NR_epoll_create1	362
+#define __NR_dup3		363
+#define __NR_pipe2		364
+#define __NR_inotify_init1	365
+#define __NR_preadv		366
+#define __NR_pwritev		367
+#define __NR_rt_tgsigqueueinfo	368
+#define __NR_perf_event_open	369
+#define __NR_recvmmsg		370
+#define __NR_fanotify_init	371
+#define __NR_fanotify_mark	372
+#define __NR_prlimit64		373
+#define __NR_cacheflush		374
+#define __NR_name_to_handle_at	375
+#define __NR_open_by_handle_at	376
+#define __NR_clock_adjtime	377
+#define __NR_syncfs		378
+#define __NR_setns		379
+#define __NR_sendmmsg		380
+#define __NR_process_vm_readv	381
+#define __NR_process_vm_writev	382
+
+#define __NR_syscall		383
+#define NR_syscalls		__NR_syscall
+
+/* Old optional stuff no one actually uses */
+#define __IGNORE_sysfs
+#define __IGNORE_uselib
+
+/* Implement the newer interfaces */
+#define __IGNORE_mmap
+#define __IGNORE_poll
+#define __IGNORE_select
+#define __IGNORE_utime
+
+/* Not relevant on no-mmu */
+#define __IGNORE_swapon
+#define __IGNORE_swapoff
+#define __IGNORE_msync
+#define __IGNORE_mlock
+#define __IGNORE_munlock
+#define __IGNORE_mlockall
+#define __IGNORE_munlockall
+#define __IGNORE_mincore
+#define __IGNORE_madvise
+#define __IGNORE_remap_file_pages
+#define __IGNORE_mbind
+#define __IGNORE_get_mempolicy
+#define __IGNORE_set_mempolicy
+#define __IGNORE_migrate_pages
+#define __IGNORE_move_pages
+#define __IGNORE_getcpu
+
+
+#endif /* _UAPI__ASM_BFIN_UNISTD_H */

diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c
index 9b80b15..b882ce2 100644
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c

@@ -329,6 +329,9 @@
 }
 
 #ifdef CONFIG_SMP
+extern void generic_exec_single(int cpu, struct call_single_data *data, int wait);
+static struct call_single_data kgdb_smp_ipi_data[NR_CPUS];
+
 void kgdb_passive_cpu_callback(void *info)
 {
 	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
@@ -336,12 +339,18 @@
 
 void kgdb_roundup_cpus(unsigned long flags)
 {
-	smp_call_function(kgdb_passive_cpu_callback, NULL, 0);
+	unsigned int cpu;
+
+	for (cpu = cpumask_first(cpu_online_mask); cpu < nr_cpu_ids;
+		cpu = cpumask_next(cpu, cpu_online_mask)) {
+		kgdb_smp_ipi_data[cpu].func = kgdb_passive_cpu_callback;
+		generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
+	}
 }
 
 void kgdb_roundup_cpu(int cpu, unsigned long flags)
 {
-	smp_call_function_single(cpu, kgdb_passive_cpu_callback, NULL, 0);
+	generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
 }
 #endif
 

diff --git a/arch/blackfin/mach-bf518/include/mach/anomaly.h b/arch/blackfin/mach-bf518/include/mach/anomaly.h
index 845e6bc..46cb882 100644
--- a/arch/blackfin/mach-bf518/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf518/include/mach/anomaly.h

@@ -165,5 +165,6 @@
 #define ANOMALY_05000474 (0)
 #define ANOMALY_05000475 (0)
 #define ANOMALY_05000480 (0)
+#define ANOMALY_16000030 (0)
 
 #endif

diff --git a/arch/blackfin/mach-bf527/include/mach/anomaly.h b/arch/blackfin/mach-bf527/include/mach/anomaly.h
index aa14110..2f9cc33 100644
--- a/arch/blackfin/mach-bf527/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf527/include/mach/anomaly.h

@@ -285,5 +285,6 @@
 #define ANOMALY_05000448 (0)
 #define ANOMALY_05000474 (0)
 #define ANOMALY_05000480 (0)
+#define ANOMALY_16000030 (0)
 
 #endif

diff --git a/arch/blackfin/mach-bf533/include/mach/anomaly.h b/arch/blackfin/mach-bf533/include/mach/anomaly.h
index 3a8f73a..0e754ef 100644
--- a/arch/blackfin/mach-bf533/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf533/include/mach/anomaly.h

@@ -378,5 +378,6 @@
 #define ANOMALY_05000474 (0)
 #define ANOMALY_05000480 (0)
 #define ANOMALY_05000485 (0)
+#define ANOMALY_16000030 (0)
 
 #endif

diff --git a/arch/blackfin/mach-bf537/include/mach/anomaly.h b/arch/blackfin/mach-bf537/include/mach/anomaly.h
index df92126..2bc70c5 100644
--- a/arch/blackfin/mach-bf537/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf537/include/mach/anomaly.h

@@ -236,5 +236,6 @@
 #define ANOMALY_05000467 (0)
 #define ANOMALY_05000474 (0)
 #define ANOMALY_05000485 (0)
+#define ANOMALY_16000030 (0)
 
 #endif

diff --git a/arch/blackfin/mach-bf538/include/mach/anomaly.h b/arch/blackfin/mach-bf538/include/mach/anomaly.h
index 318d922..eaac269 100644
--- a/arch/blackfin/mach-bf538/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf538/include/mach/anomaly.h

@@ -210,5 +210,6 @@
 #define ANOMALY_05000474 (0)
 #define ANOMALY_05000480 (0)
 #define ANOMALY_05000485 (0)
+#define ANOMALY_16000030 (0)
 
 #endif

diff --git a/arch/blackfin/mach-bf548/include/mach/anomaly.h b/arch/blackfin/mach-bf548/include/mach/anomaly.h
index 5b711d8..098fad6 100644
--- a/arch/blackfin/mach-bf548/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf548/include/mach/anomaly.h

@@ -296,5 +296,6 @@
 #define ANOMALY_05000440 (0)
 #define ANOMALY_05000475 (0)
 #define ANOMALY_05000480 (0)
+#define ANOMALY_16000030 (0)
 
 #endif

diff --git a/arch/blackfin/mach-bf561/include/mach/anomaly.h b/arch/blackfin/mach-bf561/include/mach/anomaly.h
index 72476ff..038249c 100644
--- a/arch/blackfin/mach-bf561/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf561/include/mach/anomaly.h

@@ -348,5 +348,6 @@
 #define ANOMALY_05000474 (0)
 #define ANOMALY_05000480 (0)
 #define ANOMALY_05000485 (0)
+#define ANOMALY_16000030 (0)
 
 #endif

diff --git a/arch/blackfin/mach-bf609/include/mach/irq.h b/arch/blackfin/mach-bf609/include/mach/irq.h
index 23e74cd..fa0843d 100644
--- a/arch/blackfin/mach-bf609/include/mach/irq.h
+++ b/arch/blackfin/mach-bf609/include/mach/irq.h

@@ -9,9 +9,6 @@
 
 #include <mach-common/irq.h>
 
-#undef BFIN_IRQ
-#define BFIN_IRQ(x) ((x) + IVG15)
-
 #define NR_PERI_INTS		(5 * 32)
 
 #define IRQ_SEC_ERR		BFIN_IRQ(0)	/* SEC Error */

diff --git a/arch/blackfin/mach-bf609/pm.c b/arch/blackfin/mach-bf609/pm.c
index dacafc1..ad505d9 100644
--- a/arch/blackfin/mach-bf609/pm.c
+++ b/arch/blackfin/mach-bf609/pm.c

@@ -174,7 +174,6 @@
 	bfin_write32(DPM0_RESTORE5, bfin_read32(DPM0_RESTORE5) | 4);
 }
 
-#define IRQ_SID(irq)   ((irq) - IVG15)
 asmlinkage void enter_deepsleep(void);
 
 __attribute__((l1_text))
@@ -311,7 +310,7 @@
 {
 	printk(KERN_DEBUG "gpio irq %d\n", irq);
 	if (irq == 231)
-		bfin_sec_raise_irq(IRQ_SID(IRQ_SOFT1));
+		bfin_sec_raise_irq(BFIN_SYSIRQ(IRQ_SOFT1));
 	return IRQ_HANDLED;
 }
 

diff --git a/arch/blackfin/mach-common/dpmc.c b/arch/blackfin/mach-common/dpmc.c
index f5685a4..978bb40 100644
--- a/arch/blackfin/mach-common/dpmc.c
+++ b/arch/blackfin/mach-common/dpmc.c

@@ -157,24 +157,7 @@
 		.name = DRIVER_NAME,
 	}
 };
-
-/**
- *	bfin_dpmc_init - Init driver
- */
-static int __init bfin_dpmc_init(void)
-{
-	return platform_driver_register(&bfin_dpmc_device_driver);
-}
-module_init(bfin_dpmc_init);
-
-/**
- *	bfin_dpmc_exit - break down driver
- */
-static void __exit bfin_dpmc_exit(void)
-{
-	platform_driver_unregister(&bfin_dpmc_device_driver);
-}
-module_exit(bfin_dpmc_exit);
+module_platform_driver(bfin_dpmc_device_driver);
 
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("cpu power management driver for Blackfin");

diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index 902bebc..83ff311 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c

@@ -28,12 +28,6 @@
 #include <asm/dpmc.h>
 #include <asm/traps.h>
 
-#ifndef SEC_GCTL
-# define SIC_SYSIRQ(irq)	(irq - (IRQ_CORETMR + 1))
-#else
-# define SIC_SYSIRQ(irq)	((irq) - IVG15)
-#endif
-
 /*
  * NOTES:
  * - we have separated the physical Hardware interrupt from the
@@ -141,13 +135,13 @@
 	return;
 }
 
+#ifndef SEC_GCTL
 void bfin_internal_mask_irq(unsigned int irq)
 {
 	unsigned long flags = hard_local_irq_save();
-#ifndef SEC_GCTL
 #ifdef SIC_IMASK0
-	unsigned mask_bank = SIC_SYSIRQ(irq) / 32;
-	unsigned mask_bit = SIC_SYSIRQ(irq) % 32;
+	unsigned mask_bank = BFIN_SYSIRQ(irq) / 32;
+	unsigned mask_bit = BFIN_SYSIRQ(irq) % 32;
 	bfin_write_SIC_IMASK(mask_bank, bfin_read_SIC_IMASK(mask_bank) &
 			~(1 << mask_bit));
 # if defined(CONFIG_SMP) || defined(CONFIG_ICC)
@@ -156,9 +150,8 @@
 # endif
 #else
 	bfin_write_SIC_IMASK(bfin_read_SIC_IMASK() &
-			~(1 << SIC_SYSIRQ(irq)));
+			~(1 << BFIN_SYSIRQ(irq)));
 #endif /* end of SIC_IMASK0 */
-#endif
 	hard_local_irq_restore(flags);
 }
 
@@ -176,10 +169,9 @@
 {
 	unsigned long flags = hard_local_irq_save();
 
-#ifndef SEC_GCTL
 #ifdef SIC_IMASK0
-	unsigned mask_bank = SIC_SYSIRQ(irq) / 32;
-	unsigned mask_bit = SIC_SYSIRQ(irq) % 32;
+	unsigned mask_bank = BFIN_SYSIRQ(irq) / 32;
+	unsigned mask_bit = BFIN_SYSIRQ(irq) % 32;
 # ifdef CONFIG_SMP
 	if (cpumask_test_cpu(0, affinity))
 # endif
@@ -194,17 +186,103 @@
 # endif
 #else
 	bfin_write_SIC_IMASK(bfin_read_SIC_IMASK() |
-			(1 << SIC_SYSIRQ(irq)));
-#endif
+			(1 << BFIN_SYSIRQ(irq)));
 #endif
 	hard_local_irq_restore(flags);
 }
 
-#ifdef SEC_GCTL
+#ifdef CONFIG_SMP
+static void bfin_internal_unmask_irq_chip(struct irq_data *d)
+{
+	bfin_internal_unmask_irq_affinity(d->irq, d->affinity);
+}
+
+static int bfin_internal_set_affinity(struct irq_data *d,
+				      const struct cpumask *mask, bool force)
+{
+	bfin_internal_mask_irq(d->irq);
+	bfin_internal_unmask_irq_affinity(d->irq, mask);
+
+	return 0;
+}
+#else
+static void bfin_internal_unmask_irq_chip(struct irq_data *d)
+{
+	bfin_internal_unmask_irq(d->irq);
+}
+#endif
+
+#if defined(CONFIG_PM)
+int bfin_internal_set_wake(unsigned int irq, unsigned int state)
+{
+	u32 bank, bit, wakeup = 0;
+	unsigned long flags;
+	bank = BFIN_SYSIRQ(irq) / 32;
+	bit = BFIN_SYSIRQ(irq) % 32;
+
+	switch (irq) {
+#ifdef IRQ_RTC
+	case IRQ_RTC:
+	wakeup |= WAKE;
+	break;
+#endif
+#ifdef IRQ_CAN0_RX
+	case IRQ_CAN0_RX:
+	wakeup |= CANWE;
+	break;
+#endif
+#ifdef IRQ_CAN1_RX
+	case IRQ_CAN1_RX:
+	wakeup |= CANWE;
+	break;
+#endif
+#ifdef IRQ_USB_INT0
+	case IRQ_USB_INT0:
+	wakeup |= USBWE;
+	break;
+#endif
+#ifdef CONFIG_BF54x
+	case IRQ_CNT:
+	wakeup |= ROTWE;
+	break;
+#endif
+	default:
+	break;
+	}
+
+	flags = hard_local_irq_save();
+
+	if (state) {
+		bfin_sic_iwr[bank] |= (1 << bit);
+		vr_wakeup  |= wakeup;
+
+	} else {
+		bfin_sic_iwr[bank] &= ~(1 << bit);
+		vr_wakeup  &= ~wakeup;
+	}
+
+	hard_local_irq_restore(flags);
+
+	return 0;
+}
+
+static int bfin_internal_set_wake_chip(struct irq_data *d, unsigned int state)
+{
+	return bfin_internal_set_wake(d->irq, state);
+}
+#else
+inline int bfin_internal_set_wake(unsigned int irq, unsigned int state)
+{
+	return 0;
+}
+# define bfin_internal_set_wake_chip NULL
+#endif
+
+#else /* SEC_GCTL */
 static void bfin_sec_preflow_handler(struct irq_data *d)
 {
 	unsigned long flags = hard_local_irq_save();
-	unsigned int sid = SIC_SYSIRQ(d->irq);
+	unsigned int sid = BFIN_SYSIRQ(d->irq);
 
 	bfin_write_SEC_SCI(0, SEC_CSID, sid);
 
@@ -214,7 +292,7 @@
 static void bfin_sec_mask_ack_irq(struct irq_data *d)
 {
 	unsigned long flags = hard_local_irq_save();
-	unsigned int sid = SIC_SYSIRQ(d->irq);
+	unsigned int sid = BFIN_SYSIRQ(d->irq);
 
 	bfin_write_SEC_SCI(0, SEC_CSID, sid);
 
@@ -224,7 +302,7 @@
 static void bfin_sec_unmask_irq(struct irq_data *d)
 {
 	unsigned long flags = hard_local_irq_save();
-	unsigned int sid = SIC_SYSIRQ(d->irq);
+	unsigned int sid = BFIN_SYSIRQ(d->irq);
 
 	bfin_write32(SEC_END, sid);
 
@@ -269,7 +347,7 @@
 	unsigned long flags = hard_local_irq_save();
 	uint32_t reg_sctl = bfin_read_SEC_SCTL(sid);
 
-	if (sid == SIC_SYSIRQ(IRQ_WATCH0))
+	if (sid == BFIN_SYSIRQ(IRQ_WATCH0))
 		reg_sctl |= SEC_SCTL_FAULT_EN;
 	else
 		reg_sctl |= SEC_SCTL_INT_EN;
@@ -292,7 +370,7 @@
 static void bfin_sec_enable(struct irq_data *d)
 {
 	unsigned long flags = hard_local_irq_save();
-	unsigned int sid = SIC_SYSIRQ(d->irq);
+	unsigned int sid = BFIN_SYSIRQ(d->irq);
 
 	bfin_sec_enable_sci(sid);
 	bfin_sec_enable_ssi(sid);
@@ -303,7 +381,7 @@
 static void bfin_sec_disable(struct irq_data *d)
 {
 	unsigned long flags = hard_local_irq_save();
-	unsigned int sid = SIC_SYSIRQ(d->irq);
+	unsigned int sid = BFIN_SYSIRQ(d->irq);
 
 	bfin_sec_disable_sci(sid);
 	bfin_sec_disable_ssi(sid);
@@ -328,9 +406,10 @@
 	hard_local_irq_restore(flags);
 }
 
-void bfin_sec_raise_irq(unsigned int sid)
+void bfin_sec_raise_irq(unsigned int irq)
 {
 	unsigned long flags = hard_local_irq_save();
+	unsigned int sid = BFIN_SYSIRQ(irq);
 
 	bfin_write32(SEC_RAISE, sid);
 
@@ -341,8 +420,13 @@
 {
 	bfin_sec_set_ssi_coreid(34, 0);
 	bfin_sec_set_ssi_coreid(35, 1);
+
+	bfin_sec_enable_sci(35);
+	bfin_sec_enable_ssi(35);
 	bfin_sec_set_ssi_coreid(36, 0);
 	bfin_sec_set_ssi_coreid(37, 1);
+	bfin_sec_enable_sci(37);
+	bfin_sec_enable_ssi(37);
 }
 
 void bfin_sec_resume(void)
@@ -412,6 +496,8 @@
 	}
 
 	raw_spin_unlock(&desc->lock);
+
+	handle_fasteoi_irq(irq, desc);
 }
 
 void handle_core_fault(unsigned int irq, struct irq_desc *desc)
@@ -431,105 +517,18 @@
 		printk(KERN_NOTICE "Kernel Stack\n");
 		show_stack(current, NULL);
 		print_modules();
-		panic("Kernel core hardware error");
+		panic("Core 0 hardware error");
 		break;
 	case IRQ_C0_NMI_L1_PARITY_ERR:
-		panic("NMI occurs unexpectedly");
+		panic("Core 0 NMI L1 parity error");
 		break;
 	default:
-		panic("Core 1 fault occurs unexpectedly");
+		panic("Core 1 fault %d occurs unexpectedly", irq);
 	}
 
 	raw_spin_unlock(&desc->lock);
 }
-#endif
-
-#ifdef CONFIG_SMP
-static void bfin_internal_unmask_irq_chip(struct irq_data *d)
-{
-	bfin_internal_unmask_irq_affinity(d->irq, d->affinity);
-}
-
-static int bfin_internal_set_affinity(struct irq_data *d,
-				      const struct cpumask *mask, bool force)
-{
-	bfin_internal_mask_irq(d->irq);
-	bfin_internal_unmask_irq_affinity(d->irq, mask);
-
-	return 0;
-}
-#else
-static void bfin_internal_unmask_irq_chip(struct irq_data *d)
-{
-	bfin_internal_unmask_irq(d->irq);
-}
-#endif
-
-#if defined(CONFIG_PM) && !defined(SEC_GCTL)
-int bfin_internal_set_wake(unsigned int irq, unsigned int state)
-{
-	u32 bank, bit, wakeup = 0;
-	unsigned long flags;
-	bank = SIC_SYSIRQ(irq) / 32;
-	bit = SIC_SYSIRQ(irq) % 32;
-
-	switch (irq) {
-#ifdef IRQ_RTC
-	case IRQ_RTC:
-	wakeup |= WAKE;
-	break;
-#endif
-#ifdef IRQ_CAN0_RX
-	case IRQ_CAN0_RX:
-	wakeup |= CANWE;
-	break;
-#endif
-#ifdef IRQ_CAN1_RX
-	case IRQ_CAN1_RX:
-	wakeup |= CANWE;
-	break;
-#endif
-#ifdef IRQ_USB_INT0
-	case IRQ_USB_INT0:
-	wakeup |= USBWE;
-	break;
-#endif
-#ifdef CONFIG_BF54x
-	case IRQ_CNT:
-	wakeup |= ROTWE;
-	break;
-#endif
-	default:
-	break;
-	}
-
-	flags = hard_local_irq_save();
-
-	if (state) {
-		bfin_sic_iwr[bank] |= (1 << bit);
-		vr_wakeup  |= wakeup;
-
-	} else {
-		bfin_sic_iwr[bank] &= ~(1 << bit);
-		vr_wakeup  &= ~wakeup;
-	}
-
-	hard_local_irq_restore(flags);
-
-	return 0;
-}
-
-static int bfin_internal_set_wake_chip(struct irq_data *d, unsigned int state)
-{
-	return bfin_internal_set_wake(d->irq, state);
-}
-#else
-inline int bfin_internal_set_wake(unsigned int irq, unsigned int state)
-{
-	return 0;
-}
-# define bfin_internal_set_wake_chip NULL
-#endif
+#endif /* SEC_GCTL */
 
 static struct irq_chip bfin_core_irqchip = {
 	.name = "CORE",
@@ -537,6 +536,7 @@
 	.irq_unmask = bfin_core_unmask_irq,
 };
 
+#ifndef SEC_GCTL
 static struct irq_chip bfin_internal_irqchip = {
 	.name = "INTN",
 	.irq_mask = bfin_internal_mask_irq_chip,
@@ -548,8 +548,7 @@
 #endif
 	.irq_set_wake = bfin_internal_set_wake_chip,
 };
-
-#ifdef SEC_GCTL
+#else
 static struct irq_chip bfin_sec_irqchip = {
 	.name = "SEC",
 	.irq_mask_ack = bfin_sec_mask_ack_irq,
@@ -1138,7 +1137,9 @@
 		return -EINVAL;
 	}
 
+#ifndef SEC_GCTL
 	bfin_internal_set_wake(pint_irq, state);
+#endif
 
 	return 0;
 }
@@ -1173,7 +1174,7 @@
 	u32 bank;
 
 	for (bank = 0; bank < NR_PINT_SYS_IRQS; bank++)
-		save_pint_sec_ctl[bank] = bfin_read_SEC_SCTL(bank + SIC_SYSIRQ(IRQ_PINT0));
+		save_pint_sec_ctl[bank] = bfin_read_SEC_SCTL(bank + BFIN_SYSIRQ(IRQ_PINT0));
 	return 0;
 }
 
@@ -1187,7 +1188,7 @@
 	bfin_write_SEC_SCI(0, SEC_CCTL, SEC_CCTL_EN | SEC_CCTL_NMI_EN);
 
 	for (bank = 0; bank < NR_PINT_SYS_IRQS; bank++)
-		bfin_write_SEC_SCTL(bank + SIC_SYSIRQ(IRQ_PINT0), save_pint_sec_ctl[bank]);
+		bfin_write_SEC_SCTL(bank + BFIN_SYSIRQ(IRQ_PINT0), save_pint_sec_ctl[bank]);
 }
 
 static struct syscore_ops sec_pm_syscore_ops = {
@@ -1538,33 +1539,26 @@
 
 	for (irq = 0; irq <= SYS_IRQS; irq++) {
 		if (irq <= IRQ_CORETMR) {
-			irq_set_chip(irq, &bfin_core_irqchip);
-#ifdef CONFIG_TICKSOURCE_CORETMR
+			irq_set_chip_and_handler(irq, &bfin_core_irqchip,
+				handle_simple_irq);
+#if defined(CONFIG_TICKSOURCE_CORETMR) && defined(CONFIG_SMP)
 			if (irq == IRQ_CORETMR)
-# ifdef CONFIG_SMP
 				irq_set_handler(irq, handle_percpu_irq);
-# else
-				irq_set_handler(irq, handle_simple_irq);
-# endif
 #endif
-		} else if (irq < BFIN_IRQ(0)) {
-			irq_set_chip_and_handler(irq, &bfin_internal_irqchip,
-					handle_simple_irq);
-		} else if (irq == IRQ_SEC_ERR) {
-			irq_set_chip_and_handler(irq, &bfin_sec_irqchip,
-					handle_sec_fault);
-		} else if (irq < CORE_IRQS && irq >= IRQ_C0_DBL_FAULT) {
-			irq_set_chip_and_handler(irq, &bfin_sec_irqchip,
-					handle_core_fault);
 		} else if (irq >= BFIN_IRQ(21) && irq <= BFIN_IRQ(26)) {
 			irq_set_chip(irq, &bfin_sec_irqchip);
 			irq_set_chained_handler(irq, bfin_demux_gpio_irq);
 		} else if (irq >= BFIN_IRQ(34) && irq <= BFIN_IRQ(37)) {
-			irq_set_chip(irq, &bfin_sec_irqchip);
-			irq_set_handler(irq, handle_percpu_irq);
-		} else {
 			irq_set_chip_and_handler(irq, &bfin_sec_irqchip,
-					handle_fasteoi_irq);
+				handle_percpu_irq);
+		} else {
+			irq_set_chip(irq, &bfin_sec_irqchip);
+			if (irq == IRQ_SEC_ERR)
+				irq_set_handler(irq, handle_sec_fault);
+			else if (irq >= IRQ_C0_DBL_FAULT && irq < CORE_IRQS)
+				irq_set_handler(irq, handle_core_fault);
+			else
+				irq_set_handler(irq, handle_fasteoi_irq);
 			__irq_set_preflow_handler(irq, bfin_sec_preflow_handler);
 		}
 	}
@@ -1593,8 +1587,8 @@
 
 
 	bfin_write_SEC_FCTL(SEC_FCTL_EN | SEC_FCTL_SYSRST_EN | SEC_FCTL_FLTIN_EN);
-	bfin_sec_enable_sci(SIC_SYSIRQ(IRQ_WATCH0));
-	bfin_sec_enable_ssi(SIC_SYSIRQ(IRQ_WATCH0));
+	bfin_sec_enable_sci(BFIN_SYSIRQ(IRQ_WATCH0));
+	bfin_sec_enable_ssi(BFIN_SYSIRQ(IRQ_WATCH0));
 	bfin_write_SEC_SCI(0, SEC_CCTL, SEC_CCTL_RESET);
 	udelay(100);
 	bfin_write_SEC_GCTL(SEC_GCTL_EN);

diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 66eab37..f6a3648 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig

@@ -17,8 +17,6 @@
 	select OF
 	select OF_EARLY_FLATTREE
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config MMU

diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index eae7b59..4258b08 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild

@@ -25,6 +25,7 @@
 generic-y += kmap_types.h
 generic-y += local.h
 generic-y += mman.h
+generic-y += mmu.h
 generic-y += mmu_context.h
 generic-y += msgbuf.h
 generic-y += param.h

diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h
index 03579fd..3c69406 100644
--- a/arch/c6x/include/asm/dma-mapping.h
+++ b/arch/c6x/include/asm/dma-mapping.h

@@ -32,6 +32,7 @@
  */
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
+	debug_dma_mapping_error(dev, dma_addr);
 	return dma_addr == ~0;
 }
 

diff --git a/arch/c6x/include/asm/mmu.h b/arch/c6x/include/asm/mmu.h
deleted file mode 100644
index 4467e77..0000000
--- a/arch/c6x/include/asm/mmu.h
+++ /dev/null

@@ -1,22 +0,0 @@
-/*
- *  Port on Texas Instruments TMS320C6x architecture
- *
- *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
- *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-#ifndef _ASM_C6X_MMU_H
-#define _ASM_C6X_MMU_H
-
-typedef struct {
-	unsigned long		end_brk;
-#ifdef CONFIG_BINFMT_ELF_FDPIC
-	unsigned long	exec_fdpic_loadmap;
-	unsigned long	interp_fdpic_loadmap;
-#endif
-} mm_context_t;
-
-#endif /* _ASM_C6X_MMU_H */

diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index f3987a8..e7d09a6 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h

@@ -14,7 +14,6 @@
  *   more details.
  */
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 
 /* Use the standard ABI for syscalls. */

diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 0cac6a4..c59a01d 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig

@@ -49,8 +49,6 @@
 	select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
 	select GENERIC_CMOS_UPDATE
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS2
 
 config HZ

diff --git a/arch/cris/include/arch-v10/arch/Kbuild b/arch/cris/include/arch-v10/arch/Kbuild
index 7a192e1..1f0fc7a 100644
--- a/arch/cris/include/arch-v10/arch/Kbuild
+++ b/arch/cris/include/arch-v10/arch/Kbuild

@@ -1,4 +1 @@
-header-y += user.h
-header-y += svinto.h
-header-y += sv_addr_ag.h
-header-y += sv_addr.agh
+# CRISv10 arch

diff --git a/arch/cris/include/arch-v32/arch/Kbuild b/arch/cris/include/arch-v32/arch/Kbuild
index 35f2fc4..2fd65c7 100644
--- a/arch/cris/include/arch-v32/arch/Kbuild
+++ b/arch/cris/include/arch-v32/arch/Kbuild

@@ -1,2 +1 @@
-header-y += user.h
-header-y += cryptocop.h
+# CRISv32 arch

diff --git a/arch/cris/include/arch-v32/arch/cryptocop.h b/arch/cris/include/arch-v32/arch/cryptocop.h
index e1cd83d..716e434 100644
--- a/arch/cris/include/arch-v32/arch/cryptocop.h
+++ b/arch/cris/include/arch-v32/arch/cryptocop.h

@@ -2,124 +2,12 @@
  * The device /dev/cryptocop is accessible using this driver using
  * CRYPTOCOP_MAJOR (254) and minor number 0.
  */
-
 #ifndef CRYPTOCOP_H
 #define CRYPTOCOP_H
 
-#include <linux/uio.h>
+#include <uapi/arch-v32/arch/cryptocop.h>
 
 
-#define CRYPTOCOP_SESSION_ID_NONE (0)
-
-typedef unsigned long long int cryptocop_session_id;
-
-/* cryptocop ioctls */
-#define ETRAXCRYPTOCOP_IOCTYPE         (250)
-
-#define CRYPTOCOP_IO_CREATE_SESSION    _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 1, struct strcop_session_op)
-#define CRYPTOCOP_IO_CLOSE_SESSION     _IOW(ETRAXCRYPTOCOP_IOCTYPE, 2, struct strcop_session_op)
-#define CRYPTOCOP_IO_PROCESS_OP        _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 3, struct strcop_crypto_op)
-#define CRYPTOCOP_IO_MAXNR             (3)
-
-typedef enum {
-	cryptocop_cipher_des = 0,
-	cryptocop_cipher_3des = 1,
-	cryptocop_cipher_aes = 2,
-	cryptocop_cipher_m2m = 3, /* mem2mem is essentially a NULL cipher with blocklength=1 */
-	cryptocop_cipher_none
-} cryptocop_cipher_type;
-
-typedef enum {
-	cryptocop_digest_sha1 = 0,
-	cryptocop_digest_md5 = 1,
-	cryptocop_digest_none
-} cryptocop_digest_type;
-
-typedef enum {
-	cryptocop_csum_le = 0,
-	cryptocop_csum_be = 1,
-	cryptocop_csum_none
-} cryptocop_csum_type;
-
-typedef enum {
-	cryptocop_cipher_mode_ecb = 0,
-	cryptocop_cipher_mode_cbc,
-	cryptocop_cipher_mode_none
-} cryptocop_cipher_mode;
-
-typedef enum {
-	cryptocop_3des_eee = 0,
-	cryptocop_3des_eed = 1,
-	cryptocop_3des_ede = 2,
-	cryptocop_3des_edd = 3,
-	cryptocop_3des_dee = 4,
-	cryptocop_3des_ded = 5,
-	cryptocop_3des_dde = 6,
-	cryptocop_3des_ddd = 7
-} cryptocop_3des_mode;
-
-/* Usermode accessible (ioctl) operations. */
-struct strcop_session_op{
-	cryptocop_session_id    ses_id;
-
-	cryptocop_cipher_type   cipher; /* AES, DES, 3DES, m2m, none */
-
-	cryptocop_cipher_mode   cmode; /* ECB, CBC, none */
-	cryptocop_3des_mode     des3_mode;
-
-	cryptocop_digest_type   digest; /* MD5, SHA1, none */
-
-	cryptocop_csum_type     csum;   /* BE, LE, none */
-
-	unsigned char           *key;
-	size_t                  keylen;
-};
-
-#define CRYPTOCOP_CSUM_LENGTH         (2)
-#define CRYPTOCOP_MAX_DIGEST_LENGTH   (20)  /* SHA-1 20, MD5 16 */
-#define CRYPTOCOP_MAX_IV_LENGTH       (16)  /* (3)DES==8, AES == 16 */
-#define CRYPTOCOP_MAX_KEY_LENGTH      (32)
-
-struct strcop_crypto_op{
-	cryptocop_session_id ses_id;
-
-	/* Indata. */
-	unsigned char            *indata;
-	size_t                   inlen; /* Total indata length. */
-
-	/* Cipher configuration. */
-	unsigned char            do_cipher:1;
-	unsigned char            decrypt:1; /* 1 == decrypt, 0 == encrypt */
-	unsigned char            cipher_explicit:1;
-	size_t                   cipher_start;
-	size_t                   cipher_len;
-	/* cipher_iv is used if do_cipher and cipher_explicit and the cipher
-	   mode is CBC.  The length is controlled by the type of cipher,
-	   e.g. DES/3DES 8 octets and AES 16 octets. */
-	unsigned char            cipher_iv[CRYPTOCOP_MAX_IV_LENGTH];
-	/* Outdata. */
-	unsigned char            *cipher_outdata;
-	size_t                   cipher_outlen;
-
-	/* digest configuration. */
-	unsigned char            do_digest:1;
-	size_t                   digest_start;
-	size_t                   digest_len;
-	/* Outdata.  The actual length is determined by the type of the digest. */
-	unsigned char            digest[CRYPTOCOP_MAX_DIGEST_LENGTH];
-
-	/* Checksum configuration. */
-	unsigned char            do_csum:1;
-	size_t                   csum_start;
-	size_t                   csum_len;
-	/* Outdata. */
-	unsigned char            csum[CRYPTOCOP_CSUM_LENGTH];
-};
-
-
-
-#ifdef __KERNEL__
-
 /********** The API to use from inside the kernel. ************/
 
 #include <arch/hwregs/dma.h>
@@ -267,6 +155,4 @@
 
 int cryptocop_job_queue_insert_user_job(struct cryptocop_operation *operation);
 
-#endif /* __KERNEL__ */
-
 #endif /* CRYPTOCOP_H */

diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
index f171a66..f132755 100644
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h

@@ -118,7 +118,7 @@
 		ret = 1;
 	}
 	arch_spin_unlock(&rw->slock);
-	return 1;
+	return ret;
 }
 
 #define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)

diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 15a122c..f1e79ed 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild

@@ -1,12 +1,7 @@
-include include/asm-generic/Kbuild.asm
 
 header-y += arch-v10/
 header-y += arch-v32/
 
-header-y += ethernet.h
-header-y += etraxgpio.h
-header-y += rs485.h
-header-y += sync_serial.h
 
 generic-y += clkdev.h
 generic-y += exec.h

diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h
index 32567bc..ac12ae2 100644
--- a/arch/cris/include/asm/io.h
+++ b/arch/cris/include/asm/io.h

@@ -133,12 +133,39 @@
 #define insb(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,1,count) : 0)
 #define insw(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,2,count) : 0)
 #define insl(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,4,count) : 0)
-#define outb(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,1,1)
-#define outw(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,2,1)
-#define outl(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,4,1)
-#define outsb(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,1,count)
-#define outsw(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,2,count)
-#define outsl(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,3,count)
+static inline void outb(unsigned char data, unsigned int port)
+{
+	if (cris_iops)
+		cris_iops->write_io(port, (void *) &data, 1, 1);
+}
+static inline void outw(unsigned short data, unsigned int port)
+{
+	if (cris_iops)
+		cris_iops->write_io(port, (void *) &data, 2, 1);
+}
+static inline void outl(unsigned int data, unsigned int port)
+{
+	if (cris_iops)
+		cris_iops->write_io(port, (void *) &data, 4, 1);
+}
+static inline void outsb(unsigned int port, const void *addr,
+			 unsigned long count)
+{
+	if (cris_iops)
+		cris_iops->write_io(port, (void *)addr, 1, count);
+}
+static inline void outsw(unsigned int port, const void *addr,
+			 unsigned long count)
+{
+	if (cris_iops)
+		cris_iops->write_io(port, (void *)addr, 2, count);
+}
+static inline void outsl(unsigned int port, const void *addr,
+			 unsigned long count)
+{
+	if (cris_iops)
+		cris_iops->write_io(port, (void *)addr, 4, count);
+}
 
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem

diff --git a/arch/cris/include/asm/ptrace.h b/arch/cris/include/asm/ptrace.h
index 6618893..9e788d0 100644
--- a/arch/cris/include/asm/ptrace.h
+++ b/arch/cris/include/asm/ptrace.h

@@ -1,16 +1,14 @@
 #ifndef _CRIS_PTRACE_H
 #define _CRIS_PTRACE_H
 
-#include <arch/ptrace.h>
+#include <uapi/asm/ptrace.h>
 
-#ifdef __KERNEL__
 
 /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
 #define PTRACE_GETREGS            12
 #define PTRACE_SETREGS            13
 
 #define profile_pc(regs) instruction_pointer(regs)
-
-#endif /* __KERNEL__ */
+#define current_user_stack_pointer() rdusp()
 
 #endif /* _CRIS_PTRACE_H */

diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h
index 72dbbf5..c0cb1fd 100644
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h

@@ -1,12 +1,8 @@
 #ifndef _ASM_CRIS_SIGNAL_H
 #define _ASM_CRIS_SIGNAL_H
 
-#include <linux/types.h>
+#include <uapi/asm/signal.h>
 
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-#ifdef __KERNEL__
 /* Most things should be clean enough to redefine this at will, if care
    is taken to make libc match.  */
 
@@ -20,95 +16,6 @@
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-#define NSIG		32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGBUS		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGUSR1		10
-#define SIGSEGV		11
-#define SIGUSR2		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGSTKFLT	16
-#define SIGCHLD		17
-#define SIGCONT		18
-#define SIGSTOP		19
-#define SIGTSTP		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGURG		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGIO		29
-#define SIGPOLL		SIGIO
-/*
-#define SIGLOST		29
-*/
-#define SIGPWR		30
-#define SIGSYS          31
-#define	SIGUNUSED	31
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN        32
-#define SIGRTMAX        _NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-
-#define SA_NOCLDSTOP	0x00000001u
-#define SA_NOCLDWAIT	0x00000002u
-#define SA_SIGINFO	0x00000004u
-#define SA_ONSTACK	0x08000000u
-#define SA_RESTART	0x10000000u
-#define SA_NODEFER	0x40000000u
-#define SA_RESETHAND	0x80000000u
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000
-
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
-
-#include <asm-generic/signal-defs.h>
-
-#ifdef __KERNEL__
 struct old_sigaction {
 	__sighandler_t sa_handler;
 	old_sigset_t sa_mask;
@@ -126,32 +33,6 @@
 struct k_sigaction {
 	struct sigaction sa;
 };
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-struct sigaction {
-	union {
-	  __sighandler_t _sa_handler;
-	  void (*_sa_sigaction)(int, struct siginfo *, void *);
-	} _u;
-	sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-#define sa_handler	_u._sa_handler
-#define sa_sigaction	_u._sa_sigaction
-
-#endif /* __KERNEL__ */
-
-typedef struct sigaltstack {
-	void *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
 #include <asm/sigcontext.h>
-#endif /* __KERNEL__ */
 
 #endif

diff --git a/arch/cris/include/asm/swab.h b/arch/cris/include/asm/swab.h
index 80668e8..991b6ac 100644
--- a/arch/cris/include/asm/swab.h
+++ b/arch/cris/include/asm/swab.h

@@ -1,8 +1,7 @@
 #ifndef _CRIS_SWAB_H
 #define _CRIS_SWAB_H
 
-#ifdef __KERNEL__
 #include <arch/swab.h>
-#endif /* __KERNEL__ */
+#include <uapi/asm/swab.h>
 
 #endif /* _CRIS_SWAB_H */

diff --git a/arch/cris/include/asm/termios.h b/arch/cris/include/asm/termios.h
index 1265109..1991cd9e 100644
--- a/arch/cris/include/asm/termios.h
+++ b/arch/cris/include/asm/termios.h

@@ -1,47 +1,8 @@
 #ifndef _CRIS_TERMIOS_H
 #define _CRIS_TERMIOS_H
 
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-#include <asm/rs485.h>
-#include <linux/serial.h>
+#include <uapi/asm/termios.h>
 
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
 
 /*	intr=^C		quit=^\		erase=del	kill=^U
 	eof=^D		vtime=\0	vmin=\1		sxtc=\0
@@ -87,6 +48,4 @@
 #define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
 #define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
 
-#endif	/* __KERNEL__ */
-
 #endif	/* _CRIS_TERMIOS_H */

diff --git a/arch/cris/include/asm/types.h b/arch/cris/include/asm/types.h
index adaf827..a3cac77 100644
--- a/arch/cris/include/asm/types.h
+++ b/arch/cris/include/asm/types.h

@@ -1,15 +1,12 @@
 #ifndef _ETRAX_TYPES_H
 #define _ETRAX_TYPES_H
 
-#include <asm-generic/int-ll64.h>
+#include <uapi/asm/types.h>
 
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
-#ifdef __KERNEL__
 
 #define BITS_PER_LONG 32
 
-#endif /* __KERNEL__ */
-
 #endif

diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index f27b542..6d062bd 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h

@@ -1,347 +1,8 @@
 #ifndef _ASM_CRIS_UNISTD_H_
 #define _ASM_CRIS_UNISTD_H_
 
-/*
- * This file contains the system call numbers, and stub macros for libc.
- */
+#include <uapi/asm/unistd.h>
 
-#define __NR_restart_syscall      0
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_waitpid		  7
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_lchown		 16
-#define __NR_break		 17
-#define __NR_oldstat		 18
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_umount		 22
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-#define __NR_oldfstat		 28
-#define __NR_pause		 29
-#define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
-#define __NR_access		 33
-#define __NR_nice		 34
-#define __NR_ftime		 35
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-#define __NR_prof		 44
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_signal		 48
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_umount2		 52
-#define __NR_lock		 53
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-#define __NR_mpx		 56
-#define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_oldolduname	 59
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-#define __NR_sigaction		 67
-#define __NR_sgetmask		 68
-#define __NR_ssetmask		 69
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_sigsuspend		 72
-#define __NR_sigpending		 73
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-#define __NR_getrlimit		 76
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_select		 82
-#define __NR_symlink		 83
-#define __NR_oldlstat		 84
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-#define __NR_readdir		 89
-#define __NR_mmap		 90
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-#define __NR_profil		 98
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-#define __NR_ioperm		101
-#define __NR_socketcall		102
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-#define __NR_olduname		109
-#define __NR_iopl		110
-#define __NR_vhangup		111
-#define __NR_idle		112
-#define __NR_vm86		113
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_ipc		117
-#define __NR_fsync		118
-#define __NR_sigreturn		119
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-#define __NR_modify_ldt		123
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR__newselect		142
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-#define __NR_mlock		150
-#define __NR_munlock		151
-#define __NR_mlockall		152
-#define __NR_munlockall		153
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-
-#define __NR_query_module	167
-#define __NR_poll		168
-#define __NR_nfsservctl		169
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_prctl              172
-#define __NR_rt_sigreturn	173
-#define __NR_rt_sigaction	174
-#define __NR_rt_sigprocmask	175
-#define __NR_rt_sigpending	176
-#define __NR_rt_sigtimedwait	177
-#define __NR_rt_sigqueueinfo	178
-#define __NR_rt_sigsuspend	179
-#define __NR_pread64		180
-#define __NR_pwrite64		181
-#define __NR_chown		182
-#define __NR_getcwd		183
-#define __NR_capget		184
-#define __NR_capset		185
-#define __NR_sigaltstack	186
-#define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
-#define __NR_vfork		190
-#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_pivot_root		217
-#define __NR_mincore		218
-#define __NR_madvise		219
-#define __NR_getdents64		220
-#define __NR_fcntl64		221
-/* 223 is unused */
-#define __NR_gettid             224
-#define __NR_readahead          225
-#define __NR_setxattr		226
-#define __NR_lsetxattr		227
-#define __NR_fsetxattr		228
-#define __NR_getxattr		229
-#define __NR_lgetxattr		230
-#define __NR_fgetxattr		231
-#define __NR_listxattr		232
-#define __NR_llistxattr		233
-#define __NR_flistxattr		234
-#define __NR_removexattr	235
-#define __NR_lremovexattr	236
-#define __NR_fremovexattr	237
-#define __NR_tkill		238
-#define __NR_sendfile64		239
-#define __NR_futex		240
-#define __NR_sched_setaffinity	241
-#define __NR_sched_getaffinity	242
-#define __NR_set_thread_area	243
-#define __NR_get_thread_area	244
-#define __NR_io_setup		245
-#define __NR_io_destroy		246
-#define __NR_io_getevents	247
-#define __NR_io_submit		248
-#define __NR_io_cancel		249
-#define __NR_fadvise64		250
-/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
-#define __NR_exit_group		252
-#define __NR_lookup_dcookie	253
-#define __NR_epoll_create	254
-#define __NR_epoll_ctl		255
-#define __NR_epoll_wait		256
-#define __NR_remap_file_pages	257
-#define __NR_set_tid_address	258
-#define __NR_timer_create	259
-#define __NR_timer_settime	(__NR_timer_create+1)
-#define __NR_timer_gettime	(__NR_timer_create+2)
-#define __NR_timer_getoverrun	(__NR_timer_create+3)
-#define __NR_timer_delete	(__NR_timer_create+4)
-#define __NR_clock_settime	(__NR_timer_create+5)
-#define __NR_clock_gettime	(__NR_timer_create+6)
-#define __NR_clock_getres	(__NR_timer_create+7)
-#define __NR_clock_nanosleep	(__NR_timer_create+8)
-#define __NR_statfs64		268
-#define __NR_fstatfs64		269
-#define __NR_tgkill		270
-#define __NR_utimes		271
-#define __NR_fadvise64_64	272
-#define __NR_vserver		273
-#define __NR_mbind		274
-#define __NR_get_mempolicy	275
-#define __NR_set_mempolicy	276
-#define __NR_mq_open		277
-#define __NR_mq_unlink		(__NR_mq_open+1)
-#define __NR_mq_timedsend	(__NR_mq_open+2)
-#define __NR_mq_timedreceive	(__NR_mq_open+3)
-#define __NR_mq_notify		(__NR_mq_open+4)
-#define __NR_mq_getsetattr	(__NR_mq_open+5)
-#define __NR_kexec_load		283
-#define __NR_waitid		284
-/* #define __NR_sys_setaltroot	285 */
-#define __NR_add_key		286
-#define __NR_request_key	287
-#define __NR_keyctl		288
-#define __NR_ioprio_set		289
-#define __NR_ioprio_get		290
-#define __NR_inotify_init	291
-#define __NR_inotify_add_watch	292
-#define __NR_inotify_rm_watch	293
-#define __NR_migrate_pages	294
-#define __NR_openat		295
-#define __NR_mkdirat		296
-#define __NR_mknodat		297
-#define __NR_fchownat		298
-#define __NR_futimesat		299
-#define __NR_fstatat64		300
-#define __NR_unlinkat		301
-#define __NR_renameat		302
-#define __NR_linkat		303
-#define __NR_symlinkat		304
-#define __NR_readlinkat		305
-#define __NR_fchmodat		306
-#define __NR_faccessat		307
-#define __NR_pselect6		308
-#define __NR_ppoll		309
-#define __NR_unshare		310
-#define __NR_set_robust_list	311
-#define __NR_get_robust_list	312
-#define __NR_splice		313
-#define __NR_sync_file_range	314
-#define __NR_tee		315
-#define __NR_vmsplice		316
-#define __NR_move_pages		317
-#define __NR_getcpu		318
-#define __NR_epoll_pwait	319
-#define __NR_utimensat		320
-#define __NR_signalfd		321
-#define __NR_timerfd_create	322
-#define __NR_eventfd		323
-#define __NR_fallocate		324
-#define __NR_timerfd_settime	325
-#define __NR_timerfd_gettime	326
-#define __NR_signalfd4		327
-#define __NR_eventfd2		328
-#define __NR_epoll_create1	329
-#define __NR_dup3		330
-#define __NR_pipe2		331
-#define __NR_inotify_init1	332
-#define __NR_preadv		333
-#define __NR_pwritev		334
-#define __NR_setns		335
-
-#ifdef __KERNEL__
 
 #define NR_syscalls 336
 
@@ -371,7 +32,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
@@ -384,5 +44,4 @@
  */
 #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
 
-#endif /* __KERNEL__ */
 #endif /* _ASM_CRIS_UNISTD_H_ */

diff --git a/arch/cris/include/uapi/arch-v10/arch/Kbuild b/arch/cris/include/uapi/arch-v10/arch/Kbuild
index aafaa5a..9048c87 100644
--- a/arch/cris/include/uapi/arch-v10/arch/Kbuild
+++ b/arch/cris/include/uapi/arch-v10/arch/Kbuild

@@ -1 +1,5 @@
 # UAPI Header export list
+header-y += sv_addr.agh
+header-y += sv_addr_ag.h
+header-y += svinto.h
+header-y += user.h

diff --git a/arch/cris/include/arch-v10/arch/sv_addr.agh b/arch/cris/include/uapi/arch-v10/arch/sv_addr.agh
similarity index 100%
rename from arch/cris/include/arch-v10/arch/sv_addr.agh
rename to arch/cris/include/uapi/arch-v10/arch/sv_addr.agh


diff --git a/arch/cris/include/arch-v10/arch/sv_addr_ag.h b/arch/cris/include/uapi/arch-v10/arch/sv_addr_ag.h
similarity index 100%
rename from arch/cris/include/arch-v10/arch/sv_addr_ag.h
rename to arch/cris/include/uapi/arch-v10/arch/sv_addr_ag.h


diff --git a/arch/cris/include/arch-v10/arch/svinto.h b/arch/cris/include/uapi/arch-v10/arch/svinto.h
similarity index 100%
rename from arch/cris/include/arch-v10/arch/svinto.h
rename to arch/cris/include/uapi/arch-v10/arch/svinto.h


diff --git a/arch/cris/include/arch-v10/arch/user.h b/arch/cris/include/uapi/arch-v10/arch/user.h
similarity index 100%
rename from arch/cris/include/arch-v10/arch/user.h
rename to arch/cris/include/uapi/arch-v10/arch/user.h


diff --git a/arch/cris/include/uapi/arch-v32/arch/Kbuild b/arch/cris/include/uapi/arch-v32/arch/Kbuild
index aafaa5a..59efffd 100644
--- a/arch/cris/include/uapi/arch-v32/arch/Kbuild
+++ b/arch/cris/include/uapi/arch-v32/arch/Kbuild

@@ -1 +1,3 @@
 # UAPI Header export list
+header-y += cryptocop.h
+header-y += user.h

diff --git a/arch/cris/include/uapi/arch-v32/arch/cryptocop.h b/arch/cris/include/uapi/arch-v32/arch/cryptocop.h
new file mode 100644
index 0000000..694fd13
--- /dev/null
+++ b/arch/cris/include/uapi/arch-v32/arch/cryptocop.h

@@ -0,0 +1,122 @@
+/*
+ * The device /dev/cryptocop is accessible using this driver using
+ * CRYPTOCOP_MAJOR (254) and minor number 0.
+ */
+
+#ifndef _UAPICRYPTOCOP_H
+#define _UAPICRYPTOCOP_H
+
+#include <linux/uio.h>
+
+
+#define CRYPTOCOP_SESSION_ID_NONE (0)
+
+typedef unsigned long long int cryptocop_session_id;
+
+/* cryptocop ioctls */
+#define ETRAXCRYPTOCOP_IOCTYPE         (250)
+
+#define CRYPTOCOP_IO_CREATE_SESSION    _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 1, struct strcop_session_op)
+#define CRYPTOCOP_IO_CLOSE_SESSION     _IOW(ETRAXCRYPTOCOP_IOCTYPE, 2, struct strcop_session_op)
+#define CRYPTOCOP_IO_PROCESS_OP        _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 3, struct strcop_crypto_op)
+#define CRYPTOCOP_IO_MAXNR             (3)
+
+typedef enum {
+	cryptocop_cipher_des = 0,
+	cryptocop_cipher_3des = 1,
+	cryptocop_cipher_aes = 2,
+	cryptocop_cipher_m2m = 3, /* mem2mem is essentially a NULL cipher with blocklength=1 */
+	cryptocop_cipher_none
+} cryptocop_cipher_type;
+
+typedef enum {
+	cryptocop_digest_sha1 = 0,
+	cryptocop_digest_md5 = 1,
+	cryptocop_digest_none
+} cryptocop_digest_type;
+
+typedef enum {
+	cryptocop_csum_le = 0,
+	cryptocop_csum_be = 1,
+	cryptocop_csum_none
+} cryptocop_csum_type;
+
+typedef enum {
+	cryptocop_cipher_mode_ecb = 0,
+	cryptocop_cipher_mode_cbc,
+	cryptocop_cipher_mode_none
+} cryptocop_cipher_mode;
+
+typedef enum {
+	cryptocop_3des_eee = 0,
+	cryptocop_3des_eed = 1,
+	cryptocop_3des_ede = 2,
+	cryptocop_3des_edd = 3,
+	cryptocop_3des_dee = 4,
+	cryptocop_3des_ded = 5,
+	cryptocop_3des_dde = 6,
+	cryptocop_3des_ddd = 7
+} cryptocop_3des_mode;
+
+/* Usermode accessible (ioctl) operations. */
+struct strcop_session_op{
+	cryptocop_session_id    ses_id;
+
+	cryptocop_cipher_type   cipher; /* AES, DES, 3DES, m2m, none */
+
+	cryptocop_cipher_mode   cmode; /* ECB, CBC, none */
+	cryptocop_3des_mode     des3_mode;
+
+	cryptocop_digest_type   digest; /* MD5, SHA1, none */
+
+	cryptocop_csum_type     csum;   /* BE, LE, none */
+
+	unsigned char           *key;
+	size_t                  keylen;
+};
+
+#define CRYPTOCOP_CSUM_LENGTH         (2)
+#define CRYPTOCOP_MAX_DIGEST_LENGTH   (20)  /* SHA-1 20, MD5 16 */
+#define CRYPTOCOP_MAX_IV_LENGTH       (16)  /* (3)DES==8, AES == 16 */
+#define CRYPTOCOP_MAX_KEY_LENGTH      (32)
+
+struct strcop_crypto_op{
+	cryptocop_session_id ses_id;
+
+	/* Indata. */
+	unsigned char            *indata;
+	size_t                   inlen; /* Total indata length. */
+
+	/* Cipher configuration. */
+	unsigned char            do_cipher:1;
+	unsigned char            decrypt:1; /* 1 == decrypt, 0 == encrypt */
+	unsigned char            cipher_explicit:1;
+	size_t                   cipher_start;
+	size_t                   cipher_len;
+	/* cipher_iv is used if do_cipher and cipher_explicit and the cipher
+	   mode is CBC.  The length is controlled by the type of cipher,
+	   e.g. DES/3DES 8 octets and AES 16 octets. */
+	unsigned char            cipher_iv[CRYPTOCOP_MAX_IV_LENGTH];
+	/* Outdata. */
+	unsigned char            *cipher_outdata;
+	size_t                   cipher_outlen;
+
+	/* digest configuration. */
+	unsigned char            do_digest:1;
+	size_t                   digest_start;
+	size_t                   digest_len;
+	/* Outdata.  The actual length is determined by the type of the digest. */
+	unsigned char            digest[CRYPTOCOP_MAX_DIGEST_LENGTH];
+
+	/* Checksum configuration. */
+	unsigned char            do_csum:1;
+	size_t                   csum_start;
+	size_t                   csum_len;
+	/* Outdata. */
+	unsigned char            csum[CRYPTOCOP_CSUM_LENGTH];
+};
+
+
+
+
+#endif /* _UAPICRYPTOCOP_H */

diff --git a/arch/cris/include/arch-v32/arch/user.h b/arch/cris/include/uapi/arch-v32/arch/user.h
similarity index 100%
rename from arch/cris/include/arch-v32/arch/user.h
rename to arch/cris/include/uapi/arch-v32/arch/user.h


diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index f50236a..7d47b36 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild

@@ -3,3 +3,37 @@
 
 header-y += arch-v10/
 header-y += arch-v32/
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += errno.h
+header-y += ethernet.h
+header-y += etraxgpio.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += mman.h
+header-y += msgbuf.h
+header-y += param.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += rs485.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += sync_serial.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += unistd.h

diff --git a/arch/cris/include/asm/auxvec.h b/arch/cris/include/uapi/asm/auxvec.h
similarity index 100%
rename from arch/cris/include/asm/auxvec.h
rename to arch/cris/include/uapi/asm/auxvec.h


diff --git a/arch/cris/include/asm/bitsperlong.h b/arch/cris/include/uapi/asm/bitsperlong.h
similarity index 100%
rename from arch/cris/include/asm/bitsperlong.h
rename to arch/cris/include/uapi/asm/bitsperlong.h


diff --git a/arch/cris/include/asm/byteorder.h b/arch/cris/include/uapi/asm/byteorder.h
similarity index 100%
rename from arch/cris/include/asm/byteorder.h
rename to arch/cris/include/uapi/asm/byteorder.h


diff --git a/arch/cris/include/asm/errno.h b/arch/cris/include/uapi/asm/errno.h
similarity index 100%
rename from arch/cris/include/asm/errno.h
rename to arch/cris/include/uapi/asm/errno.h


diff --git a/arch/cris/include/asm/ethernet.h b/arch/cris/include/uapi/asm/ethernet.h
similarity index 100%
rename from arch/cris/include/asm/ethernet.h
rename to arch/cris/include/uapi/asm/ethernet.h


diff --git a/arch/cris/include/asm/etraxgpio.h b/arch/cris/include/uapi/asm/etraxgpio.h
similarity index 100%
rename from arch/cris/include/asm/etraxgpio.h
rename to arch/cris/include/uapi/asm/etraxgpio.h


diff --git a/arch/cris/include/asm/fcntl.h b/arch/cris/include/uapi/asm/fcntl.h
similarity index 100%
rename from arch/cris/include/asm/fcntl.h
rename to arch/cris/include/uapi/asm/fcntl.h


diff --git a/arch/cris/include/asm/ioctl.h b/arch/cris/include/uapi/asm/ioctl.h
similarity index 100%
rename from arch/cris/include/asm/ioctl.h
rename to arch/cris/include/uapi/asm/ioctl.h


diff --git a/arch/cris/include/asm/ioctls.h b/arch/cris/include/uapi/asm/ioctls.h
similarity index 100%
rename from arch/cris/include/asm/ioctls.h
rename to arch/cris/include/uapi/asm/ioctls.h


diff --git a/arch/cris/include/asm/ipcbuf.h b/arch/cris/include/uapi/asm/ipcbuf.h
similarity index 100%
rename from arch/cris/include/asm/ipcbuf.h
rename to arch/cris/include/uapi/asm/ipcbuf.h


diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/uapi/asm/mman.h
similarity index 100%
rename from arch/cris/include/asm/mman.h
rename to arch/cris/include/uapi/asm/mman.h


diff --git a/arch/cris/include/asm/msgbuf.h b/arch/cris/include/uapi/asm/msgbuf.h
similarity index 100%
rename from arch/cris/include/asm/msgbuf.h
rename to arch/cris/include/uapi/asm/msgbuf.h


diff --git a/arch/cris/include/asm/param.h b/arch/cris/include/uapi/asm/param.h
similarity index 100%
rename from arch/cris/include/asm/param.h
rename to arch/cris/include/uapi/asm/param.h


diff --git a/arch/cris/include/asm/poll.h b/arch/cris/include/uapi/asm/poll.h
similarity index 100%
rename from arch/cris/include/asm/poll.h
rename to arch/cris/include/uapi/asm/poll.h


diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/uapi/asm/posix_types.h
similarity index 100%
rename from arch/cris/include/asm/posix_types.h
rename to arch/cris/include/uapi/asm/posix_types.h


diff --git a/arch/cris/include/uapi/asm/ptrace.h b/arch/cris/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..c689c9b
--- /dev/null
+++ b/arch/cris/include/uapi/asm/ptrace.h

@@ -0,0 +1 @@
+#include <arch/ptrace.h>

diff --git a/arch/cris/include/asm/resource.h b/arch/cris/include/uapi/asm/resource.h
similarity index 100%
rename from arch/cris/include/asm/resource.h
rename to arch/cris/include/uapi/asm/resource.h


diff --git a/arch/cris/include/asm/rs485.h b/arch/cris/include/uapi/asm/rs485.h
similarity index 100%
rename from arch/cris/include/asm/rs485.h
rename to arch/cris/include/uapi/asm/rs485.h


diff --git a/arch/cris/include/asm/sembuf.h b/arch/cris/include/uapi/asm/sembuf.h
similarity index 100%
rename from arch/cris/include/asm/sembuf.h
rename to arch/cris/include/uapi/asm/sembuf.h


diff --git a/arch/cris/include/asm/setup.h b/arch/cris/include/uapi/asm/setup.h
similarity index 100%
rename from arch/cris/include/asm/setup.h
rename to arch/cris/include/uapi/asm/setup.h


diff --git a/arch/cris/include/asm/shmbuf.h b/arch/cris/include/uapi/asm/shmbuf.h
similarity index 100%
rename from arch/cris/include/asm/shmbuf.h
rename to arch/cris/include/uapi/asm/shmbuf.h


diff --git a/arch/cris/include/asm/sigcontext.h b/arch/cris/include/uapi/asm/sigcontext.h
similarity index 100%
rename from arch/cris/include/asm/sigcontext.h
rename to arch/cris/include/uapi/asm/sigcontext.h


diff --git a/arch/cris/include/asm/siginfo.h b/arch/cris/include/uapi/asm/siginfo.h
similarity index 100%
rename from arch/cris/include/asm/siginfo.h
rename to arch/cris/include/uapi/asm/siginfo.h


diff --git a/arch/cris/include/uapi/asm/signal.h b/arch/cris/include/uapi/asm/signal.h
new file mode 100644
index 0000000..ce42fa7
--- /dev/null
+++ b/arch/cris/include/uapi/asm/signal.h

@@ -0,0 +1,116 @@
+#ifndef _UAPI_ASM_CRIS_SIGNAL_H
+#define _UAPI_ASM_CRIS_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS          31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN        32
+#define SIGRTMAX        _NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+
+#define SA_NOCLDSTOP	0x00000001u
+#define SA_NOCLDWAIT	0x00000002u
+#define SA_SIGINFO	0x00000004u
+#define SA_ONSTACK	0x08000000u
+#define SA_RESTART	0x10000000u
+#define SA_NODEFER	0x40000000u
+#define SA_RESETHAND	0x80000000u
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+#define SA_RESTORER	0x04000000
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+	  __sighandler_t _sa_handler;
+	  void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t sa_mask;
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+
+#endif /* _UAPI_ASM_CRIS_SIGNAL_H */

diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
similarity index 100%
rename from arch/cris/include/asm/socket.h
rename to arch/cris/include/uapi/asm/socket.h


diff --git a/arch/cris/include/asm/sockios.h b/arch/cris/include/uapi/asm/sockios.h
similarity index 100%
rename from arch/cris/include/asm/sockios.h
rename to arch/cris/include/uapi/asm/sockios.h


diff --git a/arch/cris/include/asm/stat.h b/arch/cris/include/uapi/asm/stat.h
similarity index 100%
rename from arch/cris/include/asm/stat.h
rename to arch/cris/include/uapi/asm/stat.h


diff --git a/arch/cris/include/asm/statfs.h b/arch/cris/include/uapi/asm/statfs.h
similarity index 100%
rename from arch/cris/include/asm/statfs.h
rename to arch/cris/include/uapi/asm/statfs.h


diff --git a/arch/cris/include/uapi/asm/swab.h b/arch/cris/include/uapi/asm/swab.h
new file mode 100644
index 0000000..4adf1e9
--- /dev/null
+++ b/arch/cris/include/uapi/asm/swab.h

@@ -0,0 +1,3 @@
+/*
+ * CRIS byte swapping.
+ */

diff --git a/arch/cris/include/asm/sync_serial.h b/arch/cris/include/uapi/asm/sync_serial.h
similarity index 100%
rename from arch/cris/include/asm/sync_serial.h
rename to arch/cris/include/uapi/asm/sync_serial.h


diff --git a/arch/cris/include/asm/termbits.h b/arch/cris/include/uapi/asm/termbits.h
similarity index 100%
rename from arch/cris/include/asm/termbits.h
rename to arch/cris/include/uapi/asm/termbits.h


diff --git a/arch/cris/include/uapi/asm/termios.h b/arch/cris/include/uapi/asm/termios.h
new file mode 100644
index 0000000..0a0386a
--- /dev/null
+++ b/arch/cris/include/uapi/asm/termios.h

@@ -0,0 +1,45 @@
+#ifndef _UAPI_CRIS_TERMIOS_H
+#define _UAPI_CRIS_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+#include <asm/rs485.h>
+#include <linux/serial.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+
+#endif /* _UAPI_CRIS_TERMIOS_H */

diff --git a/arch/cris/include/uapi/asm/types.h b/arch/cris/include/uapi/asm/types.h
new file mode 100644
index 0000000..9ec9d4c
--- /dev/null
+++ b/arch/cris/include/uapi/asm/types.h

@@ -0,0 +1 @@
+#include <asm-generic/int-ll64.h>

diff --git a/arch/cris/include/uapi/asm/unistd.h b/arch/cris/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..4884289
--- /dev/null
+++ b/arch/cris/include/uapi/asm/unistd.h

@@ -0,0 +1,344 @@
+#ifndef _UAPI_ASM_CRIS_UNISTD_H_
+#define _UAPI_ASM_CRIS_UNISTD_H_
+
+/*
+ * This file contains the system call numbers, and stub macros for libc.
+ */
+
+#define __NR_restart_syscall      0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_waitpid		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_lchown		 16
+#define __NR_break		 17
+#define __NR_oldstat		 18
+#define __NR_lseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount		 22
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+#define __NR_oldfstat		 28
+#define __NR_pause		 29
+#define __NR_utime		 30
+#define __NR_stty		 31
+#define __NR_gtty		 32
+#define __NR_access		 33
+#define __NR_nice		 34
+#define __NR_ftime		 35
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+#define __NR_prof		 44
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_signal		 48
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_umount2		 52
+#define __NR_lock		 53
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+#define __NR_mpx		 56
+#define __NR_setpgid		 57
+#define __NR_ulimit		 58
+#define __NR_oldolduname	 59
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+#define __NR_sigaction		 67
+#define __NR_sgetmask		 68
+#define __NR_ssetmask		 69
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_sigsuspend		 72
+#define __NR_sigpending		 73
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+#define __NR_getrlimit		 76
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_select		 82
+#define __NR_symlink		 83
+#define __NR_oldlstat		 84
+#define __NR_readlink		 85
+#define __NR_uselib		 86
+#define __NR_swapon		 87
+#define __NR_reboot		 88
+#define __NR_readdir		 89
+#define __NR_mmap		 90
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+#define __NR_profil		 98
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+#define __NR_ioperm		101
+#define __NR_socketcall		102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_stat		106
+#define __NR_lstat		107
+#define __NR_fstat		108
+#define __NR_olduname		109
+#define __NR_iopl		110
+#define __NR_vhangup		111
+#define __NR_idle		112
+#define __NR_vm86		113
+#define __NR_wait4		114
+#define __NR_swapoff		115
+#define __NR_sysinfo		116
+#define __NR_ipc		117
+#define __NR_fsync		118
+#define __NR_sigreturn		119
+#define __NR_clone		120
+#define __NR_setdomainname	121
+#define __NR_uname		122
+#define __NR_modify_ldt		123
+#define __NR_adjtimex		124
+#define __NR_mprotect		125
+#define __NR_sigprocmask	126
+#define __NR_create_module	127
+#define __NR_init_module	128
+#define __NR_delete_module	129
+#define __NR_get_kernel_syms	130
+#define __NR_quotactl		131
+#define __NR_getpgid		132
+#define __NR_fchdir		133
+#define __NR_bdflush		134
+#define __NR_sysfs		135
+#define __NR_personality	136
+#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR_getdents		141
+#define __NR__newselect		142
+#define __NR_flock		143
+#define __NR_msync		144
+#define __NR_readv		145
+#define __NR_writev		146
+#define __NR_getsid		147
+#define __NR_fdatasync		148
+#define __NR__sysctl		149
+#define __NR_mlock		150
+#define __NR_munlock		151
+#define __NR_mlockall		152
+#define __NR_munlockall		153
+#define __NR_sched_setparam		154
+#define __NR_sched_getparam		155
+#define __NR_sched_setscheduler		156
+#define __NR_sched_getscheduler		157
+#define __NR_sched_yield		158
+#define __NR_sched_get_priority_max	159
+#define __NR_sched_get_priority_min	160
+#define __NR_sched_rr_get_interval	161
+#define __NR_nanosleep		162
+#define __NR_mremap		163
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+
+#define __NR_query_module	167
+#define __NR_poll		168
+#define __NR_nfsservctl		169
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_prctl              172
+#define __NR_rt_sigreturn	173
+#define __NR_rt_sigaction	174
+#define __NR_rt_sigprocmask	175
+#define __NR_rt_sigpending	176
+#define __NR_rt_sigtimedwait	177
+#define __NR_rt_sigqueueinfo	178
+#define __NR_rt_sigsuspend	179
+#define __NR_pread64		180
+#define __NR_pwrite64		181
+#define __NR_chown		182
+#define __NR_getcwd		183
+#define __NR_capget		184
+#define __NR_capset		185
+#define __NR_sigaltstack	186
+#define __NR_sendfile		187
+#define __NR_getpmsg		188	/* some people actually want streams */
+#define __NR_putpmsg		189	/* some people actually want streams */
+#define __NR_vfork		190
+#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_pivot_root		217
+#define __NR_mincore		218
+#define __NR_madvise		219
+#define __NR_getdents64		220
+#define __NR_fcntl64		221
+/* 223 is unused */
+#define __NR_gettid             224
+#define __NR_readahead          225
+#define __NR_setxattr		226
+#define __NR_lsetxattr		227
+#define __NR_fsetxattr		228
+#define __NR_getxattr		229
+#define __NR_lgetxattr		230
+#define __NR_fgetxattr		231
+#define __NR_listxattr		232
+#define __NR_llistxattr		233
+#define __NR_flistxattr		234
+#define __NR_removexattr	235
+#define __NR_lremovexattr	236
+#define __NR_fremovexattr	237
+#define __NR_tkill		238
+#define __NR_sendfile64		239
+#define __NR_futex		240
+#define __NR_sched_setaffinity	241
+#define __NR_sched_getaffinity	242
+#define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
+#define __NR_io_setup		245
+#define __NR_io_destroy		246
+#define __NR_io_getevents	247
+#define __NR_io_submit		248
+#define __NR_io_cancel		249
+#define __NR_fadvise64		250
+/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
+#define __NR_exit_group		252
+#define __NR_lookup_dcookie	253
+#define __NR_epoll_create	254
+#define __NR_epoll_ctl		255
+#define __NR_epoll_wait		256
+#define __NR_remap_file_pages	257
+#define __NR_set_tid_address	258
+#define __NR_timer_create	259
+#define __NR_timer_settime	(__NR_timer_create+1)
+#define __NR_timer_gettime	(__NR_timer_create+2)
+#define __NR_timer_getoverrun	(__NR_timer_create+3)
+#define __NR_timer_delete	(__NR_timer_create+4)
+#define __NR_clock_settime	(__NR_timer_create+5)
+#define __NR_clock_gettime	(__NR_timer_create+6)
+#define __NR_clock_getres	(__NR_timer_create+7)
+#define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_statfs64		268
+#define __NR_fstatfs64		269
+#define __NR_tgkill		270
+#define __NR_utimes		271
+#define __NR_fadvise64_64	272
+#define __NR_vserver		273
+#define __NR_mbind		274
+#define __NR_get_mempolicy	275
+#define __NR_set_mempolicy	276
+#define __NR_mq_open		277
+#define __NR_mq_unlink		(__NR_mq_open+1)
+#define __NR_mq_timedsend	(__NR_mq_open+2)
+#define __NR_mq_timedreceive	(__NR_mq_open+3)
+#define __NR_mq_notify		(__NR_mq_open+4)
+#define __NR_mq_getsetattr	(__NR_mq_open+5)
+#define __NR_kexec_load		283
+#define __NR_waitid		284
+/* #define __NR_sys_setaltroot	285 */
+#define __NR_add_key		286
+#define __NR_request_key	287
+#define __NR_keyctl		288
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
+#define __NR_inotify_init	291
+#define __NR_inotify_add_watch	292
+#define __NR_inotify_rm_watch	293
+#define __NR_migrate_pages	294
+#define __NR_openat		295
+#define __NR_mkdirat		296
+#define __NR_mknodat		297
+#define __NR_fchownat		298
+#define __NR_futimesat		299
+#define __NR_fstatat64		300
+#define __NR_unlinkat		301
+#define __NR_renameat		302
+#define __NR_linkat		303
+#define __NR_symlinkat		304
+#define __NR_readlinkat		305
+#define __NR_fchmodat		306
+#define __NR_faccessat		307
+#define __NR_pselect6		308
+#define __NR_ppoll		309
+#define __NR_unshare		310
+#define __NR_set_robust_list	311
+#define __NR_get_robust_list	312
+#define __NR_splice		313
+#define __NR_sync_file_range	314
+#define __NR_tee		315
+#define __NR_vmsplice		316
+#define __NR_move_pages		317
+#define __NR_getcpu		318
+#define __NR_epoll_pwait	319
+#define __NR_utimensat		320
+#define __NR_signalfd		321
+#define __NR_timerfd_create	322
+#define __NR_eventfd		323
+#define __NR_fallocate		324
+#define __NR_timerfd_settime	325
+#define __NR_timerfd_gettime	326
+#define __NR_signalfd4		327
+#define __NR_eventfd2		328
+#define __NR_epoll_create1	329
+#define __NR_dup3		330
+#define __NR_pipe2		331
+#define __NR_inotify_init1	332
+#define __NR_preadv		333
+#define __NR_pwritev		334
+#define __NR_setns		335
+
+#endif /* _UAPI_ASM_CRIS_UNISTD_H_ */

diff --git a/arch/cris/kernel/asm-offsets.c b/arch/cris/kernel/asm-offsets.c
index dd7b8e9..a5fd88d 100644
--- a/arch/cris/kernel/asm-offsets.c
+++ b/arch/cris/kernel/asm-offsets.c

@@ -1,3 +1,4 @@
+#include <linux/kbuild.h>
 #include <linux/sched.h>
 #include <asm/thread_info.h>
 
@@ -7,11 +8,6 @@
  * and format the required data.
  */
 
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
 #if !defined(CONFIG_ETRAX_ARCH_V10) && !defined(CONFIG_ETRAX_ARCH_V32)
 #error One of ARCH v10 and ARCH v32 must be true!
 #endif

diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c
index 37400f5..51123f9 100644
--- a/arch/cris/kernel/module.c
+++ b/arch/cris/kernel/module.c

@@ -32,8 +32,6 @@
 #ifdef CONFIG_ETRAX_KMALLOCED_MODULES
 void *module_alloc(unsigned long size)
 {
-	if (size == 0)
-		return NULL;
 	return kmalloc(size, GFP_KERNEL);
 }
 

diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index df2eb4b..9d26264 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig

@@ -12,8 +12,6 @@
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CPU_DEVICES
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config ZONE_DMA
 	bool

diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 1807d8e..d685da1 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h

@@ -29,7 +29,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index b8993c8..3cb3392 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c

@@ -804,9 +804,9 @@
 
 	BUG_ON(memory_start == memory_end);
 
-	init_mm.start_code = (unsigned long) &_stext;
-	init_mm.end_code = (unsigned long) &_etext;
-	init_mm.end_data = (unsigned long) &_edata;
+	init_mm.start_code = (unsigned long) _stext;
+	init_mm.end_code = (unsigned long) _etext;
+	init_mm.end_data = (unsigned long) _edata;
 #if 0 /* DAVIDM - don't set brk just incase someone decides to use it */
 	init_mm.brk = (unsigned long) &_end;
 #else
@@ -814,10 +814,8 @@
 #endif
 
 #ifdef DEBUG
-	printk("KERNEL -> TEXT=0x%06x-0x%06x DATA=0x%06x-0x%06x BSS=0x%06x-0x%06x\n",
-	       (int) &_stext, (int) &_etext,
-	       (int) &_sdata, (int) &_edata,
-	       (int) &_sbss, (int) &_ebss);
+	printk("KERNEL -> TEXT=0x%p-0x%p DATA=0x%p-0x%p BSS=0x%p-0x%p\n",
+	       _stext, _etext, _sdata, _edata, __bss_start, __bss_stop);
 #endif
 
 #ifdef CONFIG_VT

diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index a19effc..92e97b0 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c

@@ -146,7 +146,7 @@
 
 #else
 	codek = (_etext - _stext) >> 10;
-	datak = 0; //(_ebss - _sdata) >> 10;
+	datak = 0; //(__bss_stop - _sdata) >> 10;
 #endif
 
 	tmp = nr_free_pages() << PAGE_SHIFT;

diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 04bef4d..2d2efb6 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig

@@ -3,13 +3,12 @@
 	default y
 	select HAVE_IDE
 	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_ATOMIC64
 	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SYMBOL_PREFIX
 	string

diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 4bc8ae7..995eb47 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild

@@ -1,6 +1,6 @@
-include include/asm-generic/Kbuild.asm
 
 generic-y += clkdev.h
 generic-y += exec.h
+generic-y += mmu.h
 generic-y += module.h
 generic-y += trace_clock.h

diff --git a/arch/h8300/include/asm/mmu.h b/arch/h8300/include/asm/mmu.h
deleted file mode 100644
index 3130996..0000000
--- a/arch/h8300/include/asm/mmu.h
+++ /dev/null

@@ -1,10 +0,0 @@
-#ifndef __MMU_H
-#define __MMU_H
-
-/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
-
-typedef struct {
-	unsigned long		end_brk;
-} mm_context_t;
-
-#endif

diff --git a/arch/h8300/include/asm/param.h b/arch/h8300/include/asm/param.h
index 1c72fb8..c3909e7 100644
--- a/arch/h8300/include/asm/param.h
+++ b/arch/h8300/include/asm/param.h

@@ -1,20 +1,9 @@
 #ifndef _H8300_PARAM_H
 #define _H8300_PARAM_H
 
-#ifdef __KERNEL__
+#include <uapi/asm/param.h>
+
 #define HZ		CONFIG_HZ
 #define	USER_HZ		HZ
 #define	CLOCKS_PER_SEC	(USER_HZ)
-#else
-#define HZ		100
-#endif
-
-#define EXEC_PAGESIZE	4096
-
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
 #endif /* _H8300_PARAM_H */

diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index 7468589..c1826b9 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h

@@ -1,46 +1,11 @@
 #ifndef _H8300_PTRACE_H
 #define _H8300_PTRACE_H
 
+#include <uapi/asm/ptrace.h>
+
 #ifndef __ASSEMBLY__
-
-#define PT_ER1	   0
-#define PT_ER2	   1
-#define PT_ER3	   2
-#define PT_ER4	   3
-#define PT_ER5	   4
-#define PT_ER6	   5
-#define PT_ER0	   6
-#define PT_ORIG_ER0	   7
-#define PT_CCR	   8
-#define PT_PC	   9
-#define PT_USP	   10
-#define PT_EXR     12
-
-/* this struct defines the way the registers are stored on the
-   stack during a system call. */
-
-struct pt_regs {
-	long     retpc;
-	long     er4;
-	long     er5;
-	long     er6;
-	long     er3;
-	long     er2;
-	long     er1;
-	long     orig_er0;
-	unsigned short ccr;
-	long     er0;
-	long     vector;
 #if defined(CONFIG_CPU_H8S)
-	unsigned short exr;
 #endif
-	unsigned long  pc;
-} __attribute__((aligned(2),packed));
-
-#define PTRACE_GETREGS            12
-#define PTRACE_SETREGS            13
-
-#ifdef __KERNEL__
 #ifndef PS_S
 #define PS_S  (0x10)
 #endif
@@ -63,6 +28,6 @@
 #define current_pt_regs() ((struct pt_regs *) \
 	(THREAD_SIZE + (unsigned long)current_thread_info()) - 1)
 #define signal_pt_regs() ((struct pt_regs *)current->thread.esp0)
-#endif /* __KERNEL__ */
+#define current_user_stack_pointer() rdusp()
 #endif /* __ASSEMBLY__ */
 #endif /* _H8300_PTRACE_H */

diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
index c43c0a7..66c81c6 100644
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h

@@ -1,12 +1,8 @@
 #ifndef _H8300_SIGNAL_H
 #define _H8300_SIGNAL_H
 
-#include <linux/types.h>
+#include <uapi/asm/signal.h>
 
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-#ifdef __KERNEL__
 /* Most things should be clean enough to redefine this at will, if care
    is taken to make libc match.  */
 
@@ -20,94 +16,6 @@
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-#define NSIG		32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGBUS		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGUSR1		10
-#define SIGSEGV		11
-#define SIGUSR2		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGSTKFLT	16
-#define SIGCHLD		17
-#define SIGCONT		18
-#define SIGSTOP		19
-#define SIGTSTP		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGURG		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGIO		29
-#define SIGPOLL		SIGIO
-/*
-#define SIGLOST		29
-*/
-#define SIGPWR		30
-#define SIGSYS		31
-#define	SIGUNUSED	31
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	32
-#define SIGRTMAX	_NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000
-
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
-
-#include <asm-generic/signal-defs.h>
-
-#ifdef __KERNEL__
 struct old_sigaction {
 	__sighandler_t sa_handler;
 	old_sigset_t sa_mask;
@@ -125,35 +33,8 @@
 struct k_sigaction {
 	struct sigaction sa;
 };
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-struct sigaction {
-	union {
-	  __sighandler_t _sa_handler;
-	  void (*_sa_sigaction)(int, struct siginfo *, void *);
-	} _u;
-	sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-#define sa_handler	_u._sa_handler
-#define sa_sigaction	_u._sa_sigaction
-
-#endif /* __KERNEL__ */
-
-typedef struct sigaltstack {
-	void *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
 
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#endif /* __KERNEL__ */
-
 #endif /* _H8300_SIGNAL_H */

diff --git a/arch/h8300/include/asm/termios.h b/arch/h8300/include/asm/termios.h
index 70eea64..93a63df 100644
--- a/arch/h8300/include/asm/termios.h
+++ b/arch/h8300/include/asm/termios.h

@@ -1,27 +1,8 @@
 #ifndef _H8300_TERMIOS_H
 #define _H8300_TERMIOS_H
 
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
- 
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
+#include <uapi/asm/termios.h>
 
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-#ifdef __KERNEL__
 /*	intr=^C		quit=^|		erase=del	kill=^U
 	eof=^D		vtime=\0	vmin=\1		sxtc=\0
 	start=^Q	stop=^S		susp=^Z		eol=\0
@@ -29,27 +10,6 @@
 	eol2=\0
 */
 #define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-#endif
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
 
 /*
  * Translate a "termio" structure into a "termios". Ugh.
@@ -87,6 +47,4 @@
 #define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
 #define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
 
-#endif	/* __KERNEL__ */
-
 #endif /* _H8300_TERMIOS_H */

diff --git a/arch/h8300/include/asm/types.h b/arch/h8300/include/asm/types.h
index 07257d9..c012707 100644
--- a/arch/h8300/include/asm/types.h
+++ b/arch/h8300/include/asm/types.h

@@ -1,12 +1,9 @@
 #ifndef _H8300_TYPES_H
 #define _H8300_TYPES_H
 
-#include <asm-generic/int-ll64.h>
+#include <uapi/asm/types.h>
 
-#ifdef __KERNEL__
 
 #define BITS_PER_LONG 32
 
-#endif /* __KERNEL__ */
-
 #endif /* _H8300_TYPES_H */

diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index c2c2f5c7..aa38105 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h

@@ -1,333 +1,8 @@
 #ifndef _ASM_H8300_UNISTD_H_
 #define _ASM_H8300_UNISTD_H_
 
-/*
- * This file contains the system call numbers.
- */
+#include <uapi/asm/unistd.h>
 
-#define __NR_restart_syscall      0
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_waitpid		  7
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_lchown		 16
-#define __NR_break		 17
-#define __NR_oldstat		 18
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_umount		 22
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-#define __NR_oldfstat		 28
-#define __NR_pause		 29
-#define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
-#define __NR_access		 33
-#define __NR_nice		 34
-#define __NR_ftime		 35
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-#define __NR_prof		 44
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_signal		 48
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_umount2		 52
-#define __NR_lock		 53
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-#define __NR_mpx		 56
-#define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_oldolduname	 59
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-#define __NR_sigaction		 67
-#define __NR_sgetmask		 68
-#define __NR_ssetmask		 69
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_sigsuspend		 72
-#define __NR_sigpending		 73
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-#define __NR_getrlimit		 76
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_select		 82
-#define __NR_symlink		 83
-#define __NR_oldlstat		 84
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-#define __NR_readdir		 89
-#define __NR_mmap		 90
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-#define __NR_profil		 98
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-#define __NR_ioperm		101
-#define __NR_socketcall		102
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-#define __NR_olduname		109
-#define __NR_iopl		110
-#define __NR_vhangup		111
-#define __NR_idle		112
-#define __NR_vm86old		113
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_ipc		117
-#define __NR_fsync		118
-#define __NR_sigreturn		119
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-#define __NR_modify_ldt		123
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR__newselect		142
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-#define __NR_mlock		150
-#define __NR_munlock		151
-#define __NR_mlockall		152
-#define __NR_munlockall		153
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_vm86		166
-#define __NR_query_module	167
-#define __NR_poll		168
-#define __NR_nfsservctl		169
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_prctl		172
-#define __NR_rt_sigreturn	173
-#define __NR_rt_sigaction	174
-#define __NR_rt_sigprocmask	175
-#define __NR_rt_sigpending	176
-#define __NR_rt_sigtimedwait	177
-#define __NR_rt_sigqueueinfo	178
-#define __NR_rt_sigsuspend	179
-#define __NR_pread64		180
-#define __NR_pwrite64		181
-#define __NR_chown		182
-#define __NR_getcwd		183
-#define __NR_capget		184
-#define __NR_capset		185
-#define __NR_sigaltstack	186
-#define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
-#define __NR_vfork		190
-#define __NR_ugetrlimit		191
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_pivot_root		217
-#define __NR_mincore		218
-#define __NR_madvise		219
-#define __NR_madvise1		219
-#define __NR_getdents64		220
-#define __NR_fcntl64		221
-/* 223 is unused */
-#define __NR_gettid		224
-#define __NR_readahead		225
-#define __NR_setxattr		226
-#define __NR_lsetxattr		227
-#define __NR_fsetxattr		228
-#define __NR_getxattr		229
-#define __NR_lgetxattr		230
-#define __NR_fgetxattr		231
-#define __NR_listxattr		232
-#define __NR_llistxattr		233
-#define __NR_flistxattr		234
-#define __NR_removexattr	235
-#define __NR_lremovexattr	236
-#define __NR_fremovexattr	237
-#define __NR_tkill		238
-#define __NR_sendfile64		239
-#define __NR_futex		240
-#define __NR_sched_setaffinity	241
-#define __NR_sched_getaffinity	242
-#define __NR_set_thread_area	243
-#define __NR_get_thread_area	244
-#define __NR_io_setup		245
-#define __NR_io_destroy		246
-#define __NR_io_getevents	247
-#define __NR_io_submit		248
-#define __NR_io_cancel		249
-#define __NR_fadvise64		250
-/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
-#define __NR_exit_group		252
-#define __NR_lookup_dcookie	253
-#define __NR_epoll_create	254
-#define __NR_epoll_ctl		255
-#define __NR_epoll_wait		256
-#define __NR_remap_file_pages	257
-#define __NR_set_tid_address	258
-#define __NR_timer_create	259
-#define __NR_timer_settime	(__NR_timer_create+1)
-#define __NR_timer_gettime	(__NR_timer_create+2)
-#define __NR_timer_getoverrun	(__NR_timer_create+3)
-#define __NR_timer_delete	(__NR_timer_create+4)
-#define __NR_clock_settime	(__NR_timer_create+5)
-#define __NR_clock_gettime	(__NR_timer_create+6)
-#define __NR_clock_getres	(__NR_timer_create+7)
-#define __NR_clock_nanosleep	(__NR_timer_create+8)
-#define __NR_statfs64		268
-#define __NR_fstatfs64		269
-#define __NR_tgkill		270
-#define __NR_utimes		271
-#define __NR_fadvise64_64	272
-#define __NR_vserver		273
-#define __NR_mbind		274
-#define __NR_get_mempolicy	275
-#define __NR_set_mempolicy	276
-#define __NR_mq_open 		277
-#define __NR_mq_unlink		(__NR_mq_open+1)
-#define __NR_mq_timedsend	(__NR_mq_open+2)
-#define __NR_mq_timedreceive	(__NR_mq_open+3)
-#define __NR_mq_notify		(__NR_mq_open+4)
-#define __NR_mq_getsetattr	(__NR_mq_open+5)
-#define __NR_kexec_load		283
-#define __NR_waitid		284
-/* #define __NR_sys_setaltroot	285 */
-#define __NR_add_key		286
-#define __NR_request_key	287
-#define __NR_keyctl		288
-#define __NR_ioprio_set		289
-#define __NR_ioprio_get		290
-#define __NR_inotify_init	291
-#define __NR_inotify_add_watch	292
-#define __NR_inotify_rm_watch	293
-#define __NR_migrate_pages	294
-#define __NR_openat		295
-#define __NR_mkdirat		296
-#define __NR_mknodat		297
-#define __NR_fchownat		298
-#define __NR_futimesat		299
-#define __NR_fstatat64		300
-#define __NR_unlinkat		301
-#define __NR_renameat		302
-#define __NR_linkat		303
-#define __NR_symlinkat		304
-#define __NR_readlinkat		305
-#define __NR_fchmodat		306
-#define __NR_faccessat		307
-#define __NR_pselect6		308
-#define __NR_ppoll		309
-#define __NR_unshare		310
-#define __NR_set_robust_list	311
-#define __NR_get_robust_list	312
-#define __NR_splice		313
-#define __NR_sync_file_range	314
-#define __NR_tee		315
-#define __NR_vmsplice		316
-#define __NR_move_pages		317
-#define __NR_getcpu		318
-#define __NR_epoll_pwait	319
-#define __NR_setns		320
-
-#ifdef __KERNEL__
 
 #define NR_syscalls 321
 
@@ -356,7 +31,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
@@ -368,5 +42,4 @@
   asm (".weak\t_" #name "\n"				\
        ".set\t_" #name ",_sys_ni_syscall");
 
-#endif /* __KERNEL__ */
 #endif /* _ASM_H8300_UNISTD_H_ */

diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index baebb3d..040178c 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild

@@ -1,3 +1,34 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += kvm_para.h
+header-y += mman.h
+header-y += msgbuf.h
+header-y += param.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += unistd.h

diff --git a/arch/h8300/include/asm/auxvec.h b/arch/h8300/include/uapi/asm/auxvec.h
similarity index 100%
rename from arch/h8300/include/asm/auxvec.h
rename to arch/h8300/include/uapi/asm/auxvec.h


diff --git a/arch/h8300/include/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h
similarity index 100%
rename from arch/h8300/include/asm/bitsperlong.h
rename to arch/h8300/include/uapi/asm/bitsperlong.h


diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/uapi/asm/byteorder.h
similarity index 100%
rename from arch/h8300/include/asm/byteorder.h
rename to arch/h8300/include/uapi/asm/byteorder.h


diff --git a/arch/h8300/include/asm/errno.h b/arch/h8300/include/uapi/asm/errno.h
similarity index 100%
rename from arch/h8300/include/asm/errno.h
rename to arch/h8300/include/uapi/asm/errno.h


diff --git a/arch/h8300/include/asm/fcntl.h b/arch/h8300/include/uapi/asm/fcntl.h
similarity index 100%
rename from arch/h8300/include/asm/fcntl.h
rename to arch/h8300/include/uapi/asm/fcntl.h


diff --git a/arch/h8300/include/asm/ioctl.h b/arch/h8300/include/uapi/asm/ioctl.h
similarity index 100%
rename from arch/h8300/include/asm/ioctl.h
rename to arch/h8300/include/uapi/asm/ioctl.h


diff --git a/arch/h8300/include/asm/ioctls.h b/arch/h8300/include/uapi/asm/ioctls.h
similarity index 100%
rename from arch/h8300/include/asm/ioctls.h
rename to arch/h8300/include/uapi/asm/ioctls.h


diff --git a/arch/h8300/include/asm/ipcbuf.h b/arch/h8300/include/uapi/asm/ipcbuf.h
similarity index 100%
rename from arch/h8300/include/asm/ipcbuf.h
rename to arch/h8300/include/uapi/asm/ipcbuf.h


diff --git a/arch/h8300/include/asm/kvm_para.h b/arch/h8300/include/uapi/asm/kvm_para.h
similarity index 100%
rename from arch/h8300/include/asm/kvm_para.h
rename to arch/h8300/include/uapi/asm/kvm_para.h


diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/uapi/asm/mman.h
similarity index 100%
rename from arch/h8300/include/asm/mman.h
rename to arch/h8300/include/uapi/asm/mman.h


diff --git a/arch/h8300/include/asm/msgbuf.h b/arch/h8300/include/uapi/asm/msgbuf.h
similarity index 100%
rename from arch/h8300/include/asm/msgbuf.h
rename to arch/h8300/include/uapi/asm/msgbuf.h


diff --git a/arch/h8300/include/uapi/asm/param.h b/arch/h8300/include/uapi/asm/param.h
new file mode 100644
index 0000000..3dd18ae
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/param.h

@@ -0,0 +1,16 @@
+#ifndef _UAPI_H8300_PARAM_H
+#define _UAPI_H8300_PARAM_H
+
+#ifndef __KERNEL__
+#define HZ		100
+#endif
+
+#define EXEC_PAGESIZE	4096
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#endif /* _UAPI_H8300_PARAM_H */

diff --git a/arch/h8300/include/asm/poll.h b/arch/h8300/include/uapi/asm/poll.h
similarity index 100%
rename from arch/h8300/include/asm/poll.h
rename to arch/h8300/include/uapi/asm/poll.h


diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/uapi/asm/posix_types.h
similarity index 100%
rename from arch/h8300/include/asm/posix_types.h
rename to arch/h8300/include/uapi/asm/posix_types.h


diff --git a/arch/h8300/include/uapi/asm/ptrace.h b/arch/h8300/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..ef39ec5
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/ptrace.h

@@ -0,0 +1,44 @@
+#ifndef _UAPI_H8300_PTRACE_H
+#define _UAPI_H8300_PTRACE_H
+
+#ifndef __ASSEMBLY__
+
+#define PT_ER1	   0
+#define PT_ER2	   1
+#define PT_ER3	   2
+#define PT_ER4	   3
+#define PT_ER5	   4
+#define PT_ER6	   5
+#define PT_ER0	   6
+#define PT_ORIG_ER0	   7
+#define PT_CCR	   8
+#define PT_PC	   9
+#define PT_USP	   10
+#define PT_EXR     12
+
+/* this struct defines the way the registers are stored on the
+   stack during a system call. */
+
+struct pt_regs {
+	long     retpc;
+	long     er4;
+	long     er5;
+	long     er6;
+	long     er3;
+	long     er2;
+	long     er1;
+	long     orig_er0;
+	unsigned short ccr;
+	long     er0;
+	long     vector;
+#if defined(CONFIG_CPU_H8S)
+	unsigned short exr;
+#endif
+	unsigned long  pc;
+} __attribute__((aligned(2),packed));
+
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+
+#endif /* __ASSEMBLY__ */
+#endif /* _UAPI_H8300_PTRACE_H */

diff --git a/arch/h8300/include/asm/resource.h b/arch/h8300/include/uapi/asm/resource.h
similarity index 100%
rename from arch/h8300/include/asm/resource.h
rename to arch/h8300/include/uapi/asm/resource.h


diff --git a/arch/h8300/include/asm/sembuf.h b/arch/h8300/include/uapi/asm/sembuf.h
similarity index 100%
rename from arch/h8300/include/asm/sembuf.h
rename to arch/h8300/include/uapi/asm/sembuf.h


diff --git a/arch/h8300/include/asm/setup.h b/arch/h8300/include/uapi/asm/setup.h
similarity index 100%
rename from arch/h8300/include/asm/setup.h
rename to arch/h8300/include/uapi/asm/setup.h


diff --git a/arch/h8300/include/asm/shmbuf.h b/arch/h8300/include/uapi/asm/shmbuf.h
similarity index 100%
rename from arch/h8300/include/asm/shmbuf.h
rename to arch/h8300/include/uapi/asm/shmbuf.h


diff --git a/arch/h8300/include/asm/sigcontext.h b/arch/h8300/include/uapi/asm/sigcontext.h
similarity index 100%
rename from arch/h8300/include/asm/sigcontext.h
rename to arch/h8300/include/uapi/asm/sigcontext.h


diff --git a/arch/h8300/include/asm/siginfo.h b/arch/h8300/include/uapi/asm/siginfo.h
similarity index 100%
rename from arch/h8300/include/asm/siginfo.h
rename to arch/h8300/include/uapi/asm/siginfo.h


diff --git a/arch/h8300/include/uapi/asm/signal.h b/arch/h8300/include/uapi/asm/signal.h
new file mode 100644
index 0000000..af3a6c3
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/signal.h

@@ -0,0 +1,115 @@
+#ifndef _UAPI_H8300_SIGNAL_H
+#define _UAPI_H8300_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001
+#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_SIGINFO	0x00000004
+#define SA_ONSTACK	0x08000000
+#define SA_RESTART	0x10000000
+#define SA_NODEFER	0x40000000
+#define SA_RESETHAND	0x80000000
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+#define SA_RESTORER	0x04000000
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+	  __sighandler_t _sa_handler;
+	  void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t sa_mask;
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+
+#endif /* _UAPI_H8300_SIGNAL_H */

diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
similarity index 100%
rename from arch/h8300/include/asm/socket.h
rename to arch/h8300/include/uapi/asm/socket.h


diff --git a/arch/h8300/include/asm/sockios.h b/arch/h8300/include/uapi/asm/sockios.h
similarity index 100%
rename from arch/h8300/include/asm/sockios.h
rename to arch/h8300/include/uapi/asm/sockios.h


diff --git a/arch/h8300/include/asm/stat.h b/arch/h8300/include/uapi/asm/stat.h
similarity index 100%
rename from arch/h8300/include/asm/stat.h
rename to arch/h8300/include/uapi/asm/stat.h


diff --git a/arch/h8300/include/asm/statfs.h b/arch/h8300/include/uapi/asm/statfs.h
similarity index 100%
rename from arch/h8300/include/asm/statfs.h
rename to arch/h8300/include/uapi/asm/statfs.h


diff --git a/arch/h8300/include/asm/swab.h b/arch/h8300/include/uapi/asm/swab.h
similarity index 100%
rename from arch/h8300/include/asm/swab.h
rename to arch/h8300/include/uapi/asm/swab.h


diff --git a/arch/h8300/include/asm/termbits.h b/arch/h8300/include/uapi/asm/termbits.h
similarity index 100%
rename from arch/h8300/include/asm/termbits.h
rename to arch/h8300/include/uapi/asm/termbits.h


diff --git a/arch/h8300/include/uapi/asm/termios.h b/arch/h8300/include/uapi/asm/termios.h
new file mode 100644
index 0000000..5a67d7e
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/termios.h

@@ -0,0 +1,44 @@
+#ifndef _UAPI_H8300_TERMIOS_H
+#define _UAPI_H8300_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+ 
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+
+#endif /* _UAPI_H8300_TERMIOS_H */

diff --git a/arch/h8300/include/uapi/asm/types.h b/arch/h8300/include/uapi/asm/types.h
new file mode 100644
index 0000000..9ec9d4c
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/types.h

@@ -0,0 +1 @@
+#include <asm-generic/int-ll64.h>

diff --git a/arch/h8300/include/uapi/asm/unistd.h b/arch/h8300/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..8cb5d42
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/unistd.h

@@ -0,0 +1,330 @@
+#ifndef _UAPI_ASM_H8300_UNISTD_H_
+#define _UAPI_ASM_H8300_UNISTD_H_
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_restart_syscall      0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_waitpid		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_lchown		 16
+#define __NR_break		 17
+#define __NR_oldstat		 18
+#define __NR_lseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount		 22
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+#define __NR_oldfstat		 28
+#define __NR_pause		 29
+#define __NR_utime		 30
+#define __NR_stty		 31
+#define __NR_gtty		 32
+#define __NR_access		 33
+#define __NR_nice		 34
+#define __NR_ftime		 35
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+#define __NR_prof		 44
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_signal		 48
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_umount2		 52
+#define __NR_lock		 53
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+#define __NR_mpx		 56
+#define __NR_setpgid		 57
+#define __NR_ulimit		 58
+#define __NR_oldolduname	 59
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+#define __NR_sigaction		 67
+#define __NR_sgetmask		 68
+#define __NR_ssetmask		 69
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_sigsuspend		 72
+#define __NR_sigpending		 73
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+#define __NR_getrlimit		 76
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_select		 82
+#define __NR_symlink		 83
+#define __NR_oldlstat		 84
+#define __NR_readlink		 85
+#define __NR_uselib		 86
+#define __NR_swapon		 87
+#define __NR_reboot		 88
+#define __NR_readdir		 89
+#define __NR_mmap		 90
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+#define __NR_profil		 98
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+#define __NR_ioperm		101
+#define __NR_socketcall		102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_stat		106
+#define __NR_lstat		107
+#define __NR_fstat		108
+#define __NR_olduname		109
+#define __NR_iopl		110
+#define __NR_vhangup		111
+#define __NR_idle		112
+#define __NR_vm86old		113
+#define __NR_wait4		114
+#define __NR_swapoff		115
+#define __NR_sysinfo		116
+#define __NR_ipc		117
+#define __NR_fsync		118
+#define __NR_sigreturn		119
+#define __NR_clone		120
+#define __NR_setdomainname	121
+#define __NR_uname		122
+#define __NR_modify_ldt		123
+#define __NR_adjtimex		124
+#define __NR_mprotect		125
+#define __NR_sigprocmask	126
+#define __NR_create_module	127
+#define __NR_init_module	128
+#define __NR_delete_module	129
+#define __NR_get_kernel_syms	130
+#define __NR_quotactl		131
+#define __NR_getpgid		132
+#define __NR_fchdir		133
+#define __NR_bdflush		134
+#define __NR_sysfs		135
+#define __NR_personality	136
+#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR_getdents		141
+#define __NR__newselect		142
+#define __NR_flock		143
+#define __NR_msync		144
+#define __NR_readv		145
+#define __NR_writev		146
+#define __NR_getsid		147
+#define __NR_fdatasync		148
+#define __NR__sysctl		149
+#define __NR_mlock		150
+#define __NR_munlock		151
+#define __NR_mlockall		152
+#define __NR_munlockall		153
+#define __NR_sched_setparam		154
+#define __NR_sched_getparam		155
+#define __NR_sched_setscheduler		156
+#define __NR_sched_getscheduler		157
+#define __NR_sched_yield		158
+#define __NR_sched_get_priority_max	159
+#define __NR_sched_get_priority_min	160
+#define __NR_sched_rr_get_interval	161
+#define __NR_nanosleep		162
+#define __NR_mremap		163
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+#define __NR_vm86		166
+#define __NR_query_module	167
+#define __NR_poll		168
+#define __NR_nfsservctl		169
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_prctl		172
+#define __NR_rt_sigreturn	173
+#define __NR_rt_sigaction	174
+#define __NR_rt_sigprocmask	175
+#define __NR_rt_sigpending	176
+#define __NR_rt_sigtimedwait	177
+#define __NR_rt_sigqueueinfo	178
+#define __NR_rt_sigsuspend	179
+#define __NR_pread64		180
+#define __NR_pwrite64		181
+#define __NR_chown		182
+#define __NR_getcwd		183
+#define __NR_capget		184
+#define __NR_capset		185
+#define __NR_sigaltstack	186
+#define __NR_sendfile		187
+#define __NR_getpmsg		188	/* some people actually want streams */
+#define __NR_putpmsg		189	/* some people actually want streams */
+#define __NR_vfork		190
+#define __NR_ugetrlimit		191
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_pivot_root		217
+#define __NR_mincore		218
+#define __NR_madvise		219
+#define __NR_madvise1		219
+#define __NR_getdents64		220
+#define __NR_fcntl64		221
+/* 223 is unused */
+#define __NR_gettid		224
+#define __NR_readahead		225
+#define __NR_setxattr		226
+#define __NR_lsetxattr		227
+#define __NR_fsetxattr		228
+#define __NR_getxattr		229
+#define __NR_lgetxattr		230
+#define __NR_fgetxattr		231
+#define __NR_listxattr		232
+#define __NR_llistxattr		233
+#define __NR_flistxattr		234
+#define __NR_removexattr	235
+#define __NR_lremovexattr	236
+#define __NR_fremovexattr	237
+#define __NR_tkill		238
+#define __NR_sendfile64		239
+#define __NR_futex		240
+#define __NR_sched_setaffinity	241
+#define __NR_sched_getaffinity	242
+#define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
+#define __NR_io_setup		245
+#define __NR_io_destroy		246
+#define __NR_io_getevents	247
+#define __NR_io_submit		248
+#define __NR_io_cancel		249
+#define __NR_fadvise64		250
+/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
+#define __NR_exit_group		252
+#define __NR_lookup_dcookie	253
+#define __NR_epoll_create	254
+#define __NR_epoll_ctl		255
+#define __NR_epoll_wait		256
+#define __NR_remap_file_pages	257
+#define __NR_set_tid_address	258
+#define __NR_timer_create	259
+#define __NR_timer_settime	(__NR_timer_create+1)
+#define __NR_timer_gettime	(__NR_timer_create+2)
+#define __NR_timer_getoverrun	(__NR_timer_create+3)
+#define __NR_timer_delete	(__NR_timer_create+4)
+#define __NR_clock_settime	(__NR_timer_create+5)
+#define __NR_clock_gettime	(__NR_timer_create+6)
+#define __NR_clock_getres	(__NR_timer_create+7)
+#define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_statfs64		268
+#define __NR_fstatfs64		269
+#define __NR_tgkill		270
+#define __NR_utimes		271
+#define __NR_fadvise64_64	272
+#define __NR_vserver		273
+#define __NR_mbind		274
+#define __NR_get_mempolicy	275
+#define __NR_set_mempolicy	276
+#define __NR_mq_open 		277
+#define __NR_mq_unlink		(__NR_mq_open+1)
+#define __NR_mq_timedsend	(__NR_mq_open+2)
+#define __NR_mq_timedreceive	(__NR_mq_open+3)
+#define __NR_mq_notify		(__NR_mq_open+4)
+#define __NR_mq_getsetattr	(__NR_mq_open+5)
+#define __NR_kexec_load		283
+#define __NR_waitid		284
+/* #define __NR_sys_setaltroot	285 */
+#define __NR_add_key		286
+#define __NR_request_key	287
+#define __NR_keyctl		288
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
+#define __NR_inotify_init	291
+#define __NR_inotify_add_watch	292
+#define __NR_inotify_rm_watch	293
+#define __NR_migrate_pages	294
+#define __NR_openat		295
+#define __NR_mkdirat		296
+#define __NR_mknodat		297
+#define __NR_fchownat		298
+#define __NR_futimesat		299
+#define __NR_fstatat64		300
+#define __NR_unlinkat		301
+#define __NR_renameat		302
+#define __NR_linkat		303
+#define __NR_symlinkat		304
+#define __NR_readlinkat		305
+#define __NR_fchmodat		306
+#define __NR_faccessat		307
+#define __NR_pselect6		308
+#define __NR_ppoll		309
+#define __NR_unshare		310
+#define __NR_set_robust_list	311
+#define __NR_get_robust_list	312
+#define __NR_splice		313
+#define __NR_sync_file_range	314
+#define __NR_tee		315
+#define __NR_vmsplice		316
+#define __NR_move_pages		317
+#define __NR_getcpu		318
+#define __NR_epoll_pwait	319
+#define __NR_setns		320
+
+#endif /* _UAPI_ASM_H8300_UNISTD_H_ */

diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index e418803..0744f7d 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig

@@ -31,8 +31,6 @@
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.

diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 2af8153..4a87cc4 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h

@@ -27,7 +27,6 @@
  */
 
 #define sys_mmap2 sys_mmap_pgoff
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 
 #include <asm-generic/unistd.h>

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6706004..3279646 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig

@@ -42,8 +42,6 @@
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to

diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 4f5e814..cf3ab7e 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h

@@ -58,6 +58,7 @@
 static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr)
 {
 	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	debug_dma_mapping_error(dev, daddr);
 	return ops->mapping_error(dev, daddr);
 }
 

diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index b0e9736..8451439 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h

@@ -78,6 +78,11 @@
 	unsigned long __ip = instruction_pointer(regs);			\
 	(__ip & ~3UL) + ((__ip & 3UL) << 2);				\
 })
+/*
+ * Why not default?  Because user_stack_pointer() on ia64 gives register
+ * stack backing store instead...
+ */
+#define current_user_stack_pointer() (current_pt_regs()->r12)
 
   /* given a pointer to a task_struct, return the user's pt_regs */
 # define task_pt_regs(t)		(((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)

diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 1574bca..8b3ff2f 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h

@@ -29,7 +29,6 @@
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 

diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h
index e531c42..c0ea285 100644
--- a/arch/ia64/include/uapi/asm/signal.h
+++ b/arch/ia64/include/uapi/asm/signal.h

@@ -79,12 +79,6 @@
 #define SA_RESTORER	0x04000000
 
 /*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-/*
  * The minimum stack size needs to be fairly large because we want to
  * be sure that an app compiled for today's CPUs will continue to run
  * on all future CPU models.  The CPU model matters because the signal

diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 5183f43..f807721 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig

@@ -15,8 +15,6 @@
 	select GENERIC_ATOMIC64
 	select ARCH_USES_GETTIMEOFFSET
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SBUS
 	bool

diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 4bc8ae7..bebdc36 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild

@@ -1,4 +1,3 @@
-include include/asm-generic/Kbuild.asm
 
 generic-y += clkdev.h
 generic-y += exec.h

diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h
index c4432f1..fa58ccf 100644
--- a/arch/m32r/include/asm/ptrace.h
+++ b/arch/m32r/include/asm/ptrace.h

@@ -1,6 +1,3 @@
-#ifndef _ASM_M32R_PTRACE_H
-#define _ASM_M32R_PTRACE_H
-
 /*
  * linux/include/asm-m32r/ptrace.h
  *
@@ -11,111 +8,12 @@
  * M32R version:
  *   Copyright (C) 2001-2002, 2004  Hirokazu Takata <takata at linux-m32r.org>
  */
+#ifndef _ASM_M32R_PTRACE_H
+#define _ASM_M32R_PTRACE_H
 
-/* 0 - 13 are integer registers (general purpose registers).  */
-#define PT_R4		0
-#define PT_R5		1
-#define PT_R6		2
-#define PT_REGS 	3
-#define PT_R0		4
-#define PT_R1		5
-#define PT_R2		6
-#define PT_R3		7
-#define PT_R7		8
-#define PT_R8		9
-#define PT_R9		10
-#define PT_R10		11
-#define PT_R11		12
-#define PT_R12		13
-#define PT_SYSCNR	14
-#define PT_R13		PT_FP
-#define PT_R14		PT_LR
-#define PT_R15		PT_SP
-
-/* processor status and miscellaneous context registers.  */
-#define PT_ACC0H	15
-#define PT_ACC0L	16
-#define PT_ACC1H	17	/* ISA_DSP_LEVEL2 only */
-#define PT_ACC1L	18	/* ISA_DSP_LEVEL2 only */
-#define PT_PSW		19
-#define PT_BPC		20
-#define PT_BBPSW	21
-#define PT_BBPC		22
-#define PT_SPU		23
-#define PT_FP		24
-#define PT_LR		25
-#define PT_SPI		26
-#define PT_ORIGR0	27
-
-/* virtual pt_reg entry for gdb */
-#define PT_PC		30
-#define PT_CBR		31
-#define PT_EVB		32
-
-
-/* Control registers.  */
-#define SPR_CR0 PT_PSW
-#define SPR_CR1 PT_CBR		/* read only */
-#define SPR_CR2 PT_SPI
-#define SPR_CR3 PT_SPU
-#define SPR_CR4
-#define SPR_CR5 PT_EVB		/* part of M32R/E, M32R/I core only */
-#define SPR_CR6 PT_BPC
-#define SPR_CR7
-#define SPR_CR8 PT_BBPSW
-#define SPR_CR9
-#define SPR_CR10
-#define SPR_CR11
-#define SPR_CR12
-#define SPR_CR13 PT_WR
-#define SPR_CR14 PT_BBPC
-#define SPR_CR15
-
-/* this struct defines the way the registers are stored on the
-   stack during a system call. */
-struct pt_regs {
-	/* Saved main processor registers. */
-	unsigned long r4;
-	unsigned long r5;
-	unsigned long r6;
-	struct pt_regs *pt_regs;
-	unsigned long r0;
-	unsigned long r1;
-	unsigned long r2;
-	unsigned long r3;
-	unsigned long r7;
-	unsigned long r8;
-	unsigned long r9;
-	unsigned long r10;
-	unsigned long r11;
-	unsigned long r12;
-	long syscall_nr;
-
-	/* Saved main processor status and miscellaneous context registers. */
-	unsigned long acc0h;
-	unsigned long acc0l;
-	unsigned long acc1h;	/* ISA_DSP_LEVEL2 only */
-	unsigned long acc1l;	/* ISA_DSP_LEVEL2 only */
-	unsigned long psw;
-	unsigned long bpc;		/* saved PC for TRAP syscalls */
-	unsigned long bbpsw;
-	unsigned long bbpc;
-	unsigned long spu;		/* saved user stack */
-	unsigned long fp;
-	unsigned long lr;		/* saved PC for JL syscalls */
-	unsigned long spi;		/* saved kernel stack */
-	unsigned long orig_r0;
-};
-
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS		12
-#define PTRACE_SETREGS		13
-
-#define PTRACE_OLDSETOPTIONS	21
-
-#ifdef __KERNEL__
 
 #include <asm/m32r.h>		/* M32R_PSW_BSM, M32R_PSW_BPM */
+#include <uapi/asm/ptrace.h>
 
 #define arch_has_single_step() (1)
 
@@ -134,6 +32,7 @@
 
 #define instruction_pointer(regs) ((regs)->bpc)
 #define profile_pc(regs) instruction_pointer(regs)
+#define user_stack_pointer(regs) ((regs)->spu)
 
 extern void withdraw_debug_trap(struct pt_regs *regs);
 
@@ -142,6 +41,4 @@
 #define current_pt_regs() ((struct pt_regs *) \
 	((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
 
-#endif /* __KERNEL */
-
 #endif /* _ASM_M32R_PTRACE_H */

diff --git a/arch/m32r/include/asm/setup.h b/arch/m32r/include/asm/setup.h
index c637ab9..bbe59a9 100644
--- a/arch/m32r/include/asm/setup.h
+++ b/arch/m32r/include/asm/setup.h

@@ -1,13 +1,8 @@
 #ifndef _ASM_M32R_SETUP_H
 #define _ASM_M32R_SETUP_H
 
-/*
- * This is set up by the setup-routine at boot-time
- */
+#include <uapi/asm/setup.h>
 
-#define COMMAND_LINE_SIZE       512
-
-#ifdef __KERNEL__
 
 #define PARAM			((unsigned char *)empty_zero_page)
 
@@ -33,6 +28,4 @@
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
-#endif  /*  __KERNEL__  */
-
 #endif /* _ASM_M32R_SETUP_H */

diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h
index e4d2e2a..a5ba4a2 100644
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h

@@ -1,14 +1,8 @@
 #ifndef _ASM_M32R_SIGNAL_H
 #define _ASM_M32R_SIGNAL_H
 
-#include <linux/types.h>
-#include <linux/time.h>
-#include <linux/compiler.h>
+#include <uapi/asm/signal.h>
 
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-#ifdef __KERNEL__
 /* Most things should be clean enough to redefine this at will, if care
    is taken to make libc match.  */
 
@@ -22,94 +16,6 @@
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-#define NSIG		32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGBUS		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGUSR1		10
-#define SIGSEGV		11
-#define SIGUSR2		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGSTKFLT	16
-#define SIGCHLD		17
-#define SIGCONT		18
-#define SIGSTOP		19
-#define SIGTSTP		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGURG		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGIO		29
-#define SIGPOLL		SIGIO
-/*
-#define SIGLOST		29
-*/
-#define SIGPWR		30
-#define SIGSYS		31
-#define	SIGUNUSED	31
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	32
-#define SIGRTMAX	_NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001u
-#define SA_NOCLDWAIT	0x00000002u
-#define SA_SIGINFO	0x00000004u
-#define SA_ONSTACK	0x08000000u
-#define SA_RESTART	0x10000000u
-#define SA_NODEFER	0x40000000u
-#define SA_RESETHAND	0x80000000u
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
-
-#include <asm-generic/signal-defs.h>
-
-#ifdef __KERNEL__
 struct sigaction {
 	__sighandler_t sa_handler;
 	unsigned long sa_flags;
@@ -120,35 +26,8 @@
 struct k_sigaction {
 	struct sigaction sa;
 };
-#else
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-struct sigaction {
-	union {
-	  __sighandler_t _sa_handler;
-	  void (*_sa_sigaction)(int, struct siginfo *, void *);
-	} _u;
-	sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-#define sa_handler	_u._sa_handler
-#define sa_sigaction	_u._sa_sigaction
-
-#endif /* __KERNEL__ */
-
-typedef struct sigaltstack {
-	void __user *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
 #include <asm/sigcontext.h>
 
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#endif /* __KERNEL__ */
-
 #endif  /* _ASM_M32R_SIGNAL_H */

diff --git a/arch/m32r/include/asm/termios.h b/arch/m32r/include/asm/termios.h
index 93ce79f..680898f 100644
--- a/arch/m32r/include/asm/termios.h
+++ b/arch/m32r/include/asm/termios.h

@@ -1,46 +1,8 @@
 #ifndef _M32R_TERMIOS_H
 #define _M32R_TERMIOS_H
 
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
 #include <linux/module.h>
+#include <uapi/asm/termios.h>
 
 /*	intr=^C		quit=^\		erase=del	kill=^U
 	eof=^D		vtime=\0	vmin=\1		sxtc=\0
@@ -86,6 +48,4 @@
 #define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
 #define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
 
-#endif	/* __KERNEL__ */
-
 #endif	/* _M32R_TERMIOS_H */

diff --git a/arch/m32r/include/asm/types.h b/arch/m32r/include/asm/types.h
index bb2eead..04a44c6 100644
--- a/arch/m32r/include/asm/types.h
+++ b/arch/m32r/include/asm/types.h

@@ -1,15 +1,12 @@
 #ifndef _ASM_M32R_TYPES_H
 #define _ASM_M32R_TYPES_H
 
-#include <asm-generic/int-ll64.h>
+#include <uapi/asm/types.h>
 
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
-#ifdef __KERNEL__
 
 #define BITS_PER_LONG 32
 
-#endif /* __KERNEL__ */
-
 #endif /* _ASM_M32R_TYPES_H */

diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index d9e7351..79b063c 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h

@@ -1,338 +1,8 @@
 #ifndef _ASM_M32R_UNISTD_H
 #define _ASM_M32R_UNISTD_H
 
-/*
- * This file contains the system call numbers.
- */
+#include <uapi/asm/unistd.h>
 
-#define __NR_restart_syscall	  0
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_waitpid		  7
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-/* 16 is unused */
-/* 17 is unused */
-/* 18 is unused */
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_umount		 22
-/* 23 is unused */
-/* 24 is unused */
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-/* 28 is unused */
-#define __NR_pause		 29
-#define __NR_utime		 30
-/* 31 is unused */
-#define __NR_cachectl		 32 /* old #define __NR_gtty		 32*/
-#define __NR_access		 33
-/* 34 is unused */
-/* 35 is unused */
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-/* 44 is unused */
-#define __NR_brk		 45
-/* 46 is unused */
-/* 47 is unused (getgid16) */
-/* 48 is unused */
-/* 49 is unused */
-/* 50 is unused */
-#define __NR_acct		 51
-#define __NR_umount2		 52
-/* 53 is unused */
-#define __NR_ioctl		 54
-/* 55 is unused (fcntl) */
-/* 56 is unused */
-#define __NR_setpgid		 57
-/* 58 is unused */
-/* 59 is unused */
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-/* 67 is unused */
-/* 68 is unused*/
-/* 69 is unused*/
-/* 70 is unused */
-/* 71 is unused */
-/* 72 is unused */
-/* 73 is unused */
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-/* 76 is unused (old getrlimit) */
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-/* 80 is unused */
-/* 81 is unused */
-/* 82 is unused */
-#define __NR_symlink		 83
-/* 84 is unused */
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-/* 89 is unused */
-/* 90 is unused */
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-/* 95 is unused */
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-/* 98 is unused */
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-/* 101 is unused */
-#define __NR_socketcall		102
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-/* 109 is unused */
-/* 110 is unused */
-#define __NR_vhangup		111
-/* 112 is unused */
-/* 113 is unused */
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_ipc		117
-#define __NR_fsync		118
-/* 119 is unused */
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-/* 123 is unused */
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-/* 126 is unused */
-/* 127 is unused */
-#define __NR_init_module	128
-#define __NR_delete_module	129
-/* 130 is unused */
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-/* 137 is unused */
-/* 138 is unused */
-/* 139 is unused */
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR__newselect		142
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-#define __NR_mlock		150
-#define __NR_munlock		151
-#define __NR_mlockall		152
-#define __NR_munlockall		153
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-/* 164 is unused */
-/* 165 is unused */
-#define __NR_tas		166
-/* 167 is unused */
-#define __NR_poll		168
-#define __NR_nfsservctl		169
-/* 170 is unused */
-/* 171 is unused */
-#define __NR_prctl              172
-#define __NR_rt_sigreturn	173
-#define __NR_rt_sigaction	174
-#define __NR_rt_sigprocmask	175
-#define __NR_rt_sigpending	176
-#define __NR_rt_sigtimedwait	177
-#define __NR_rt_sigqueueinfo	178
-#define __NR_rt_sigsuspend	179
-#define __NR_pread64		180
-#define __NR_pwrite64		181
-/* 182 is unused */
-#define __NR_getcwd		183
-#define __NR_capget		184
-#define __NR_capset		185
-#define __NR_sigaltstack	186
-#define __NR_sendfile		187
-/* 188 is unused */
-/* 189 is unused */
-#define __NR_vfork		190
-#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_pivot_root		217
-#define __NR_mincore		218
-#define __NR_madvise		219
-#define __NR_getdents64		220
-#define __NR_fcntl64		221
-/* 222 is unused */
-/* 223 is unused */
-#define __NR_gettid		224
-#define __NR_readahead		225
-#define __NR_setxattr		226
-#define __NR_lsetxattr		227
-#define __NR_fsetxattr		228
-#define __NR_getxattr		229
-#define __NR_lgetxattr		230
-#define __NR_fgetxattr		231
-#define __NR_listxattr		232
-#define __NR_llistxattr		233
-#define __NR_flistxattr		234
-#define __NR_removexattr	235
-#define __NR_lremovexattr	236
-#define __NR_fremovexattr	237
-#define __NR_tkill		238
-#define __NR_sendfile64		239
-#define __NR_futex		240
-#define __NR_sched_setaffinity	241
-#define __NR_sched_getaffinity	242
-#define __NR_set_thread_area	243
-#define __NR_get_thread_area	244
-#define __NR_io_setup		245
-#define __NR_io_destroy		246
-#define __NR_io_getevents	247
-#define __NR_io_submit		248
-#define __NR_io_cancel		249
-#define __NR_fadvise64		250
-/* 251 is unused */
-#define __NR_exit_group		252
-#define __NR_lookup_dcookie	253
-#define __NR_epoll_create	254
-#define __NR_epoll_ctl		255
-#define __NR_epoll_wait		256
-#define __NR_remap_file_pages	257
-#define __NR_set_tid_address	258
-#define __NR_timer_create	259
-#define __NR_timer_settime	(__NR_timer_create+1)
-#define __NR_timer_gettime	(__NR_timer_create+2)
-#define __NR_timer_getoverrun	(__NR_timer_create+3)
-#define __NR_timer_delete	(__NR_timer_create+4)
-#define __NR_clock_settime	(__NR_timer_create+5)
-#define __NR_clock_gettime	(__NR_timer_create+6)
-#define __NR_clock_getres	(__NR_timer_create+7)
-#define __NR_clock_nanosleep	(__NR_timer_create+8)
-#define __NR_statfs64		268
-#define __NR_fstatfs64		269
-#define __NR_tgkill		270
-#define __NR_utimes		271
-#define __NR_fadvise64_64	272
-#define __NR_vserver		273
-#define __NR_mbind		274
-#define __NR_get_mempolicy	275
-#define __NR_set_mempolicy	276
-#define __NR_mq_open		277
-#define __NR_mq_unlink		(__NR_mq_open+1)
-#define __NR_mq_timedsend	(__NR_mq_open+2)
-#define __NR_mq_timedreceive	(__NR_mq_open+3)
-#define __NR_mq_notify		(__NR_mq_open+4)
-#define __NR_mq_getsetattr	(__NR_mq_open+5)
-#define __NR_kexec_load		283
-#define __NR_waitid		284
-/* 285 is unused */
-#define __NR_add_key		286
-#define __NR_request_key	287
-#define __NR_keyctl		288
-#define __NR_ioprio_set		289
-#define __NR_ioprio_get		290
-#define __NR_inotify_init	291
-#define __NR_inotify_add_watch	292
-#define __NR_inotify_rm_watch	293
-#define __NR_migrate_pages	294
-#define __NR_openat		295
-#define __NR_mkdirat		296
-#define __NR_mknodat		297
-#define __NR_fchownat		298
-#define __NR_futimesat		299
-#define __NR_fstatat64		300
-#define __NR_unlinkat		301
-#define __NR_renameat		302
-#define __NR_linkat		303
-#define __NR_symlinkat		304
-#define __NR_readlinkat		305
-#define __NR_fchmodat		306
-#define __NR_faccessat		307
-#define __NR_pselect6		308
-#define __NR_ppoll		309
-#define __NR_unshare		310
-#define __NR_set_robust_list	311
-#define __NR_get_robust_list	312
-#define __NR_splice		313
-#define __NR_sync_file_range	314
-#define __NR_tee		315
-#define __NR_vmsplice		316
-#define __NR_move_pages		317
-#define __NR_getcpu		318
-#define __NR_epoll_pwait	319
-#define __NR_utimensat		320
-#define __NR_signalfd		321
-/* #define __NR_timerfd		322 removed */
-#define __NR_eventfd		323
-#define __NR_fallocate		324
-#define __NR_setns		325
-
-#ifdef __KERNEL__
 
 #define NR_syscalls 326
 
@@ -352,7 +22,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
@@ -391,5 +60,4 @@
 #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
 #endif
 
-#endif /* __KERNEL__ */
 #endif /* _ASM_M32R_UNISTD_H */

diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index baebb3d..43937a6 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild

@@ -1,3 +1,33 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += mman.h
+header-y += msgbuf.h
+header-y += param.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += unistd.h

diff --git a/arch/m32r/include/asm/auxvec.h b/arch/m32r/include/uapi/asm/auxvec.h
similarity index 100%
rename from arch/m32r/include/asm/auxvec.h
rename to arch/m32r/include/uapi/asm/auxvec.h


diff --git a/arch/m32r/include/asm/bitsperlong.h b/arch/m32r/include/uapi/asm/bitsperlong.h
similarity index 100%
rename from arch/m32r/include/asm/bitsperlong.h
rename to arch/m32r/include/uapi/asm/bitsperlong.h


diff --git a/arch/m32r/include/asm/byteorder.h b/arch/m32r/include/uapi/asm/byteorder.h
similarity index 100%
rename from arch/m32r/include/asm/byteorder.h
rename to arch/m32r/include/uapi/asm/byteorder.h


diff --git a/arch/m32r/include/asm/errno.h b/arch/m32r/include/uapi/asm/errno.h
similarity index 100%
rename from arch/m32r/include/asm/errno.h
rename to arch/m32r/include/uapi/asm/errno.h


diff --git a/arch/m32r/include/asm/fcntl.h b/arch/m32r/include/uapi/asm/fcntl.h
similarity index 100%
rename from arch/m32r/include/asm/fcntl.h
rename to arch/m32r/include/uapi/asm/fcntl.h


diff --git a/arch/m32r/include/asm/ioctl.h b/arch/m32r/include/uapi/asm/ioctl.h
similarity index 100%
rename from arch/m32r/include/asm/ioctl.h
rename to arch/m32r/include/uapi/asm/ioctl.h


diff --git a/arch/m32r/include/asm/ioctls.h b/arch/m32r/include/uapi/asm/ioctls.h
similarity index 100%
rename from arch/m32r/include/asm/ioctls.h
rename to arch/m32r/include/uapi/asm/ioctls.h


diff --git a/arch/m32r/include/asm/ipcbuf.h b/arch/m32r/include/uapi/asm/ipcbuf.h
similarity index 100%
rename from arch/m32r/include/asm/ipcbuf.h
rename to arch/m32r/include/uapi/asm/ipcbuf.h


diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/uapi/asm/mman.h
similarity index 100%
rename from arch/m32r/include/asm/mman.h
rename to arch/m32r/include/uapi/asm/mman.h


diff --git a/arch/m32r/include/asm/msgbuf.h b/arch/m32r/include/uapi/asm/msgbuf.h
similarity index 100%
rename from arch/m32r/include/asm/msgbuf.h
rename to arch/m32r/include/uapi/asm/msgbuf.h


diff --git a/arch/m32r/include/asm/param.h b/arch/m32r/include/uapi/asm/param.h
similarity index 100%
rename from arch/m32r/include/asm/param.h
rename to arch/m32r/include/uapi/asm/param.h


diff --git a/arch/m32r/include/asm/poll.h b/arch/m32r/include/uapi/asm/poll.h
similarity index 100%
rename from arch/m32r/include/asm/poll.h
rename to arch/m32r/include/uapi/asm/poll.h


diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/uapi/asm/posix_types.h
similarity index 100%
rename from arch/m32r/include/asm/posix_types.h
rename to arch/m32r/include/uapi/asm/posix_types.h


diff --git a/arch/m32r/include/uapi/asm/ptrace.h b/arch/m32r/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..f6930a8
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/ptrace.h

@@ -0,0 +1,117 @@
+/*
+ * linux/include/asm-m32r/ptrace.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * M32R version:
+ *   Copyright (C) 2001-2002, 2004  Hirokazu Takata <takata at linux-m32r.org>
+ */
+#ifndef _UAPI_ASM_M32R_PTRACE_H
+#define _UAPI_ASM_M32R_PTRACE_H
+
+
+/* 0 - 13 are integer registers (general purpose registers).  */
+#define PT_R4		0
+#define PT_R5		1
+#define PT_R6		2
+#define PT_REGS 	3
+#define PT_R0		4
+#define PT_R1		5
+#define PT_R2		6
+#define PT_R3		7
+#define PT_R7		8
+#define PT_R8		9
+#define PT_R9		10
+#define PT_R10		11
+#define PT_R11		12
+#define PT_R12		13
+#define PT_SYSCNR	14
+#define PT_R13		PT_FP
+#define PT_R14		PT_LR
+#define PT_R15		PT_SP
+
+/* processor status and miscellaneous context registers.  */
+#define PT_ACC0H	15
+#define PT_ACC0L	16
+#define PT_ACC1H	17	/* ISA_DSP_LEVEL2 only */
+#define PT_ACC1L	18	/* ISA_DSP_LEVEL2 only */
+#define PT_PSW		19
+#define PT_BPC		20
+#define PT_BBPSW	21
+#define PT_BBPC		22
+#define PT_SPU		23
+#define PT_FP		24
+#define PT_LR		25
+#define PT_SPI		26
+#define PT_ORIGR0	27
+
+/* virtual pt_reg entry for gdb */
+#define PT_PC		30
+#define PT_CBR		31
+#define PT_EVB		32
+
+
+/* Control registers.  */
+#define SPR_CR0 PT_PSW
+#define SPR_CR1 PT_CBR		/* read only */
+#define SPR_CR2 PT_SPI
+#define SPR_CR3 PT_SPU
+#define SPR_CR4
+#define SPR_CR5 PT_EVB		/* part of M32R/E, M32R/I core only */
+#define SPR_CR6 PT_BPC
+#define SPR_CR7
+#define SPR_CR8 PT_BBPSW
+#define SPR_CR9
+#define SPR_CR10
+#define SPR_CR11
+#define SPR_CR12
+#define SPR_CR13 PT_WR
+#define SPR_CR14 PT_BBPC
+#define SPR_CR15
+
+/* this struct defines the way the registers are stored on the
+   stack during a system call. */
+struct pt_regs {
+	/* Saved main processor registers. */
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	struct pt_regs *pt_regs;
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	long syscall_nr;
+
+	/* Saved main processor status and miscellaneous context registers. */
+	unsigned long acc0h;
+	unsigned long acc0l;
+	unsigned long acc1h;	/* ISA_DSP_LEVEL2 only */
+	unsigned long acc1l;	/* ISA_DSP_LEVEL2 only */
+	unsigned long psw;
+	unsigned long bpc;		/* saved PC for TRAP syscalls */
+	unsigned long bbpsw;
+	unsigned long bbpc;
+	unsigned long spu;		/* saved user stack */
+	unsigned long fp;
+	unsigned long lr;		/* saved PC for JL syscalls */
+	unsigned long spi;		/* saved kernel stack */
+	unsigned long orig_r0;
+};
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS		12
+#define PTRACE_SETREGS		13
+
+#define PTRACE_OLDSETOPTIONS	21
+
+
+#endif /* _UAPI_ASM_M32R_PTRACE_H */

diff --git a/arch/m32r/include/asm/resource.h b/arch/m32r/include/uapi/asm/resource.h
similarity index 100%
rename from arch/m32r/include/asm/resource.h
rename to arch/m32r/include/uapi/asm/resource.h


diff --git a/arch/m32r/include/asm/sembuf.h b/arch/m32r/include/uapi/asm/sembuf.h
similarity index 100%
rename from arch/m32r/include/asm/sembuf.h
rename to arch/m32r/include/uapi/asm/sembuf.h


diff --git a/arch/m32r/include/uapi/asm/setup.h b/arch/m32r/include/uapi/asm/setup.h
new file mode 100644
index 0000000..96961a4
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/setup.h

@@ -0,0 +1,11 @@
+#ifndef _UAPI_ASM_M32R_SETUP_H
+#define _UAPI_ASM_M32R_SETUP_H
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+
+#define COMMAND_LINE_SIZE       512
+
+
+#endif /* _UAPI_ASM_M32R_SETUP_H */

diff --git a/arch/m32r/include/asm/shmbuf.h b/arch/m32r/include/uapi/asm/shmbuf.h
similarity index 100%
rename from arch/m32r/include/asm/shmbuf.h
rename to arch/m32r/include/uapi/asm/shmbuf.h


diff --git a/arch/m32r/include/asm/sigcontext.h b/arch/m32r/include/uapi/asm/sigcontext.h
similarity index 100%
rename from arch/m32r/include/asm/sigcontext.h
rename to arch/m32r/include/uapi/asm/sigcontext.h


diff --git a/arch/m32r/include/asm/siginfo.h b/arch/m32r/include/uapi/asm/siginfo.h
similarity index 100%
rename from arch/m32r/include/asm/siginfo.h
rename to arch/m32r/include/uapi/asm/siginfo.h


diff --git a/arch/m32r/include/uapi/asm/signal.h b/arch/m32r/include/uapi/asm/signal.h
new file mode 100644
index 0000000..54acacb
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/signal.h

@@ -0,0 +1,117 @@
+#ifndef _UAPI_ASM_M32R_SIGNAL_H
+#define _UAPI_ASM_M32R_SIGNAL_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/compiler.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001u
+#define SA_NOCLDWAIT	0x00000002u
+#define SA_SIGINFO	0x00000004u
+#define SA_ONSTACK	0x08000000u
+#define SA_RESTART	0x10000000u
+#define SA_NODEFER	0x40000000u
+#define SA_RESETHAND	0x80000000u
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+#define SA_RESTORER	0x04000000
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+	  __sighandler_t _sa_handler;
+	  void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t sa_mask;
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+
+#endif /* _UAPI_ASM_M32R_SIGNAL_H */

diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
similarity index 100%
rename from arch/m32r/include/asm/socket.h
rename to arch/m32r/include/uapi/asm/socket.h


diff --git a/arch/m32r/include/asm/sockios.h b/arch/m32r/include/uapi/asm/sockios.h
similarity index 100%
rename from arch/m32r/include/asm/sockios.h
rename to arch/m32r/include/uapi/asm/sockios.h


diff --git a/arch/m32r/include/asm/stat.h b/arch/m32r/include/uapi/asm/stat.h
similarity index 100%
rename from arch/m32r/include/asm/stat.h
rename to arch/m32r/include/uapi/asm/stat.h


diff --git a/arch/m32r/include/asm/statfs.h b/arch/m32r/include/uapi/asm/statfs.h
similarity index 100%
rename from arch/m32r/include/asm/statfs.h
rename to arch/m32r/include/uapi/asm/statfs.h


diff --git a/arch/m32r/include/asm/swab.h b/arch/m32r/include/uapi/asm/swab.h
similarity index 100%
rename from arch/m32r/include/asm/swab.h
rename to arch/m32r/include/uapi/asm/swab.h


diff --git a/arch/m32r/include/asm/termbits.h b/arch/m32r/include/uapi/asm/termbits.h
similarity index 100%
rename from arch/m32r/include/asm/termbits.h
rename to arch/m32r/include/uapi/asm/termbits.h


diff --git a/arch/m32r/include/uapi/asm/termios.h b/arch/m32r/include/uapi/asm/termios.h
new file mode 100644
index 0000000..07ad27b
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/termios.h

@@ -0,0 +1,43 @@
+#ifndef _UAPI_M32R_TERMIOS_H
+#define _UAPI_M32R_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+
+#endif /* _UAPI_M32R_TERMIOS_H */

diff --git a/arch/m32r/include/uapi/asm/types.h b/arch/m32r/include/uapi/asm/types.h
new file mode 100644
index 0000000..9ec9d4c
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/types.h

@@ -0,0 +1 @@
+#include <asm-generic/int-ll64.h>

diff --git a/arch/m32r/include/uapi/asm/unistd.h b/arch/m32r/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..5a54f2a
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/unistd.h

@@ -0,0 +1,335 @@
+#ifndef _UAPI_ASM_M32R_UNISTD_H
+#define _UAPI_ASM_M32R_UNISTD_H
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_restart_syscall	  0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_waitpid		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+/* 16 is unused */
+/* 17 is unused */
+/* 18 is unused */
+#define __NR_lseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount		 22
+/* 23 is unused */
+/* 24 is unused */
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+/* 28 is unused */
+#define __NR_pause		 29
+#define __NR_utime		 30
+/* 31 is unused */
+#define __NR_cachectl		 32 /* old #define __NR_gtty		 32*/
+#define __NR_access		 33
+/* 34 is unused */
+/* 35 is unused */
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+/* 44 is unused */
+#define __NR_brk		 45
+/* 46 is unused */
+/* 47 is unused (getgid16) */
+/* 48 is unused */
+/* 49 is unused */
+/* 50 is unused */
+#define __NR_acct		 51
+#define __NR_umount2		 52
+/* 53 is unused */
+#define __NR_ioctl		 54
+/* 55 is unused (fcntl) */
+/* 56 is unused */
+#define __NR_setpgid		 57
+/* 58 is unused */
+/* 59 is unused */
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+/* 67 is unused */
+/* 68 is unused*/
+/* 69 is unused*/
+/* 70 is unused */
+/* 71 is unused */
+/* 72 is unused */
+/* 73 is unused */
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+/* 76 is unused (old getrlimit) */
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+/* 80 is unused */
+/* 81 is unused */
+/* 82 is unused */
+#define __NR_symlink		 83
+/* 84 is unused */
+#define __NR_readlink		 85
+#define __NR_uselib		 86
+#define __NR_swapon		 87
+#define __NR_reboot		 88
+/* 89 is unused */
+/* 90 is unused */
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+/* 95 is unused */
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+/* 98 is unused */
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+/* 101 is unused */
+#define __NR_socketcall		102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_stat		106
+#define __NR_lstat		107
+#define __NR_fstat		108
+/* 109 is unused */
+/* 110 is unused */
+#define __NR_vhangup		111
+/* 112 is unused */
+/* 113 is unused */
+#define __NR_wait4		114
+#define __NR_swapoff		115
+#define __NR_sysinfo		116
+#define __NR_ipc		117
+#define __NR_fsync		118
+/* 119 is unused */
+#define __NR_clone		120
+#define __NR_setdomainname	121
+#define __NR_uname		122
+/* 123 is unused */
+#define __NR_adjtimex		124
+#define __NR_mprotect		125
+/* 126 is unused */
+/* 127 is unused */
+#define __NR_init_module	128
+#define __NR_delete_module	129
+/* 130 is unused */
+#define __NR_quotactl		131
+#define __NR_getpgid		132
+#define __NR_fchdir		133
+#define __NR_bdflush		134
+#define __NR_sysfs		135
+#define __NR_personality	136
+/* 137 is unused */
+/* 138 is unused */
+/* 139 is unused */
+#define __NR__llseek		140
+#define __NR_getdents		141
+#define __NR__newselect		142
+#define __NR_flock		143
+#define __NR_msync		144
+#define __NR_readv		145
+#define __NR_writev		146
+#define __NR_getsid		147
+#define __NR_fdatasync		148
+#define __NR__sysctl		149
+#define __NR_mlock		150
+#define __NR_munlock		151
+#define __NR_mlockall		152
+#define __NR_munlockall		153
+#define __NR_sched_setparam		154
+#define __NR_sched_getparam		155
+#define __NR_sched_setscheduler		156
+#define __NR_sched_getscheduler		157
+#define __NR_sched_yield		158
+#define __NR_sched_get_priority_max	159
+#define __NR_sched_get_priority_min	160
+#define __NR_sched_rr_get_interval	161
+#define __NR_nanosleep		162
+#define __NR_mremap		163
+/* 164 is unused */
+/* 165 is unused */
+#define __NR_tas		166
+/* 167 is unused */
+#define __NR_poll		168
+#define __NR_nfsservctl		169
+/* 170 is unused */
+/* 171 is unused */
+#define __NR_prctl              172
+#define __NR_rt_sigreturn	173
+#define __NR_rt_sigaction	174
+#define __NR_rt_sigprocmask	175
+#define __NR_rt_sigpending	176
+#define __NR_rt_sigtimedwait	177
+#define __NR_rt_sigqueueinfo	178
+#define __NR_rt_sigsuspend	179
+#define __NR_pread64		180
+#define __NR_pwrite64		181
+/* 182 is unused */
+#define __NR_getcwd		183
+#define __NR_capget		184
+#define __NR_capset		185
+#define __NR_sigaltstack	186
+#define __NR_sendfile		187
+/* 188 is unused */
+/* 189 is unused */
+#define __NR_vfork		190
+#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_pivot_root		217
+#define __NR_mincore		218
+#define __NR_madvise		219
+#define __NR_getdents64		220
+#define __NR_fcntl64		221
+/* 222 is unused */
+/* 223 is unused */
+#define __NR_gettid		224
+#define __NR_readahead		225
+#define __NR_setxattr		226
+#define __NR_lsetxattr		227
+#define __NR_fsetxattr		228
+#define __NR_getxattr		229
+#define __NR_lgetxattr		230
+#define __NR_fgetxattr		231
+#define __NR_listxattr		232
+#define __NR_llistxattr		233
+#define __NR_flistxattr		234
+#define __NR_removexattr	235
+#define __NR_lremovexattr	236
+#define __NR_fremovexattr	237
+#define __NR_tkill		238
+#define __NR_sendfile64		239
+#define __NR_futex		240
+#define __NR_sched_setaffinity	241
+#define __NR_sched_getaffinity	242
+#define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
+#define __NR_io_setup		245
+#define __NR_io_destroy		246
+#define __NR_io_getevents	247
+#define __NR_io_submit		248
+#define __NR_io_cancel		249
+#define __NR_fadvise64		250
+/* 251 is unused */
+#define __NR_exit_group		252
+#define __NR_lookup_dcookie	253
+#define __NR_epoll_create	254
+#define __NR_epoll_ctl		255
+#define __NR_epoll_wait		256
+#define __NR_remap_file_pages	257
+#define __NR_set_tid_address	258
+#define __NR_timer_create	259
+#define __NR_timer_settime	(__NR_timer_create+1)
+#define __NR_timer_gettime	(__NR_timer_create+2)
+#define __NR_timer_getoverrun	(__NR_timer_create+3)
+#define __NR_timer_delete	(__NR_timer_create+4)
+#define __NR_clock_settime	(__NR_timer_create+5)
+#define __NR_clock_gettime	(__NR_timer_create+6)
+#define __NR_clock_getres	(__NR_timer_create+7)
+#define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_statfs64		268
+#define __NR_fstatfs64		269
+#define __NR_tgkill		270
+#define __NR_utimes		271
+#define __NR_fadvise64_64	272
+#define __NR_vserver		273
+#define __NR_mbind		274
+#define __NR_get_mempolicy	275
+#define __NR_set_mempolicy	276
+#define __NR_mq_open		277
+#define __NR_mq_unlink		(__NR_mq_open+1)
+#define __NR_mq_timedsend	(__NR_mq_open+2)
+#define __NR_mq_timedreceive	(__NR_mq_open+3)
+#define __NR_mq_notify		(__NR_mq_open+4)
+#define __NR_mq_getsetattr	(__NR_mq_open+5)
+#define __NR_kexec_load		283
+#define __NR_waitid		284
+/* 285 is unused */
+#define __NR_add_key		286
+#define __NR_request_key	287
+#define __NR_keyctl		288
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
+#define __NR_inotify_init	291
+#define __NR_inotify_add_watch	292
+#define __NR_inotify_rm_watch	293
+#define __NR_migrate_pages	294
+#define __NR_openat		295
+#define __NR_mkdirat		296
+#define __NR_mknodat		297
+#define __NR_fchownat		298
+#define __NR_futimesat		299
+#define __NR_fstatat64		300
+#define __NR_unlinkat		301
+#define __NR_renameat		302
+#define __NR_linkat		303
+#define __NR_symlinkat		304
+#define __NR_readlinkat		305
+#define __NR_fchmodat		306
+#define __NR_faccessat		307
+#define __NR_pselect6		308
+#define __NR_ppoll		309
+#define __NR_unshare		310
+#define __NR_set_robust_list	311
+#define __NR_get_robust_list	312
+#define __NR_splice		313
+#define __NR_sync_file_range	314
+#define __NR_tee		315
+#define __NR_vmsplice		316
+#define __NR_move_pages		317
+#define __NR_getcpu		318
+#define __NR_epoll_pwait	319
+#define __NR_utimensat		320
+#define __NR_signalfd		321
+/* #define __NR_timerfd		322 removed */
+#define __NR_eventfd		323
+#define __NR_fallocate		324
+#define __NR_setns		325
+
+#endif /* _UAPI_ASM_M32R_UNISTD_H */

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 953a7ba..6710084 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig

@@ -15,8 +15,6 @@
 	select FPU if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA

diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 2f2d87b..b1cfff8 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu

@@ -35,7 +35,8 @@
 if M68KCLASSIC
 
 config M68000
-	bool
+	bool "MC68000"
+	depends on !MMU
 	select CPU_HAS_NO_BITFIELDS
 	select CPU_HAS_NO_MULDIV64
 	select CPU_HAS_NO_UNALIGNED

diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index 7636751..2f02acf 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile

@@ -92,7 +92,7 @@
 head-y				:= arch/m68k/kernel/head.o
 head-$(CONFIG_SUN3)		:= arch/m68k/kernel/sun3-head.o
 head-$(CONFIG_M68360)		:= arch/m68k/platform/68360/head.o
-head-$(CONFIG_M68000)		:= arch/m68k/platform/68328/head.o
+head-$(CONFIG_M68000)		:= arch/m68k/platform/68000/head.o
 head-$(CONFIG_COLDFIRE)		:= arch/m68k/platform/coldfire/head.o
 
 core-y				+= arch/m68k/kernel/	arch/m68k/mm/
@@ -114,9 +114,7 @@
 core-$(CONFIG_M68060)		+= arch/m68k/ifpsp060/
 core-$(CONFIG_M68KFPU_EMU)	+= arch/m68k/math-emu/
 core-$(CONFIG_M68360)		+= arch/m68k/platform/68360/
-core-$(CONFIG_M68000)		+= arch/m68k/platform/68328/
-core-$(CONFIG_M68EZ328)		+= arch/m68k/platform/68EZ328/
-core-$(CONFIG_M68VZ328)		+= arch/m68k/platform/68VZ328/
+core-$(CONFIG_M68000)		+= arch/m68k/platform/68000/
 core-$(CONFIG_COLDFIRE)		+= arch/m68k/platform/coldfire/
 
 

diff --git a/arch/m68k/include/asm/m5249sim.h b/arch/m68k/include/asm/m5249sim.h
deleted file mode 100644
index fdf45e6..0000000
--- a/arch/m68k/include/asm/m5249sim.h
+++ /dev/null

@@ -1,269 +0,0 @@
-/****************************************************************************/
-
-/*
- *	m5249sim.h -- ColdFire 5249 System Integration Module support.
- *
- *	(C) Copyright 2002, Greg Ungerer (gerg@snapgear.com)
- */
-
-/****************************************************************************/
-#ifndef	m5249sim_h
-#define	m5249sim_h
-/****************************************************************************/
-
-#define	CPU_NAME		"COLDFIRE(m5249)"
-#define	CPU_INSTR_PER_JIFFY	3
-#define	MCF_BUSCLK		(MCF_CLK / 2)
-
-#include <asm/m52xxacr.h>
-
-/*
- *	The 5249 has a second MBAR region, define its address.
- */
-#define MCF_MBAR2		0x80000000
-
-/*
- *	Define the 5249 SIM register set addresses.
- */
-#define	MCFSIM_RSR		(MCF_MBAR + 0x00)	/* Reset Status */
-#define	MCFSIM_SYPCR		(MCF_MBAR + 0x01)	/* System Protection */
-#define	MCFSIM_SWIVR		(MCF_MBAR + 0x02)	/* SW Watchdog intr */
-#define	MCFSIM_SWSR		(MCF_MBAR + 0x03)	/* SW Watchdog srv */
-#define	MCFSIM_PAR		(MCF_MBAR + 0x04)	/* Pin Assignment */
-#define	MCFSIM_IRQPAR		(MCF_MBAR + 0x06)	/* Intr Assignment */
-#define	MCFSIM_MPARK		(MCF_MBAR + 0x0C)	/* BUS Master Ctrl */
-#define	MCFSIM_IPR		(MCF_MBAR + 0x40)	/* Interrupt Pending */
-#define	MCFSIM_IMR		(MCF_MBAR + 0x44)	/* Interrupt Mask */
-#define	MCFSIM_AVR		(MCF_MBAR + 0x4b)	/* Autovector Ctrl */
-#define	MCFSIM_ICR0		(MCF_MBAR + 0x4c)	/* Intr Ctrl reg 0 */
-#define	MCFSIM_ICR1		(MCF_MBAR + 0x4d)	/* Intr Ctrl reg 1 */
-#define	MCFSIM_ICR2		(MCF_MBAR + 0x4e)	/* Intr Ctrl reg 2 */
-#define	MCFSIM_ICR3		(MCF_MBAR + 0x4f)	/* Intr Ctrl reg 3 */
-#define	MCFSIM_ICR4		(MCF_MBAR + 0x50)	/* Intr Ctrl reg 4 */
-#define	MCFSIM_ICR5		(MCF_MBAR + 0x51)	/* Intr Ctrl reg 5 */
-#define	MCFSIM_ICR6		(MCF_MBAR + 0x52)	/* Intr Ctrl reg 6 */
-#define	MCFSIM_ICR7		(MCF_MBAR + 0x53)	/* Intr Ctrl reg 7 */
-#define	MCFSIM_ICR8		(MCF_MBAR + 0x54)	/* Intr Ctrl reg 8 */
-#define	MCFSIM_ICR9		(MCF_MBAR + 0x55)	/* Intr Ctrl reg 9 */
-#define	MCFSIM_ICR10		(MCF_MBAR + 0x56)	/* Intr Ctrl reg 10 */
-#define	MCFSIM_ICR11		(MCF_MBAR + 0x57)	/* Intr Ctrl reg 11 */
-
-#define	MCFSIM_CSAR0		(MCF_MBAR + 0x80)	/* CS 0 Address reg */
-#define	MCFSIM_CSMR0		(MCF_MBAR + 0x84)	/* CS 0 Mask reg */
-#define	MCFSIM_CSCR0		(MCF_MBAR + 0x8a)	/* CS 0 Control reg */
-#define	MCFSIM_CSAR1		(MCF_MBAR + 0x8c)	/* CS 1 Address reg */
-#define	MCFSIM_CSMR1		(MCF_MBAR + 0x90)	/* CS 1 Mask reg */
-#define	MCFSIM_CSCR1		(MCF_MBAR + 0x96)	/* CS 1 Control reg */
-#define	MCFSIM_CSAR2		(MCF_MBAR + 0x98)	/* CS 2 Address reg */
-#define	MCFSIM_CSMR2		(MCF_MBAR + 0x9c)	/* CS 2 Mask reg */
-#define	MCFSIM_CSCR2		(MCF_MBAR + 0xa2)	/* CS 2 Control reg */
-#define	MCFSIM_CSAR3		(MCF_MBAR + 0xa4)	/* CS 3 Address reg */
-#define	MCFSIM_CSMR3		(MCF_MBAR + 0xa8)	/* CS 3 Mask reg */
-#define	MCFSIM_CSCR3		(MCF_MBAR + 0xae)	/* CS 3 Control reg */
-
-#define MCFSIM_DCR		(MCF_MBAR + 0x100)	/* DRAM Control */
-#define MCFSIM_DACR0		(MCF_MBAR + 0x108)	/* DRAM 0 Addr/Ctrl */
-#define MCFSIM_DMR0		(MCF_MBAR + 0x10c)	/* DRAM 0 Mask */
-#define MCFSIM_DACR1		(MCF_MBAR + 0x110)	/* DRAM 1 Addr/Ctrl */
-#define MCFSIM_DMR1		(MCF_MBAR + 0x114)	/* DRAM 1 Mask */
-
-/*
- *	Timer module.
- */
-#define MCFTIMER_BASE1		(MCF_MBAR + 0x140)	/* Base of TIMER1 */
-#define MCFTIMER_BASE2		(MCF_MBAR + 0x180)	/* Base of TIMER2 */
-
-/*
- *	UART module.
- */
-#define MCFUART_BASE0		(MCF_MBAR + 0x1c0)	/* Base address UART0 */
-#define MCFUART_BASE1		(MCF_MBAR + 0x200)	/* Base address UART1 */
-
-/*
- *	QSPI module.
- */
-#define	MCFQSPI_BASE		(MCF_MBAR + 0x300)	/* Base address QSPI */
-#define	MCFQSPI_SIZE		0x40			/* Register set size */
-
-#define	MCFQSPI_CS0		29
-#define	MCFQSPI_CS1		24
-#define	MCFQSPI_CS2		21
-#define	MCFQSPI_CS3		22
-
-/*
- *	DMA unit base addresses.
- */
-#define MCFDMA_BASE0		(MCF_MBAR + 0x300)	/* Base address DMA 0 */
-#define MCFDMA_BASE1		(MCF_MBAR + 0x340)	/* Base address DMA 1 */
-#define MCFDMA_BASE2		(MCF_MBAR + 0x380)	/* Base address DMA 2 */
-#define MCFDMA_BASE3		(MCF_MBAR + 0x3C0)	/* Base address DMA 3 */
-
-/*
- *	Some symbol defines for the above...
- */
-#define	MCFSIM_SWDICR		MCFSIM_ICR0	/* Watchdog timer ICR */
-#define	MCFSIM_TIMER1ICR	MCFSIM_ICR1	/* Timer 1 ICR */
-#define	MCFSIM_TIMER2ICR	MCFSIM_ICR2	/* Timer 2 ICR */
-#define	MCFSIM_UART1ICR		MCFSIM_ICR4	/* UART 1 ICR */
-#define	MCFSIM_UART2ICR		MCFSIM_ICR5	/* UART 2 ICR */
-#define	MCFSIM_DMA0ICR		MCFSIM_ICR6	/* DMA 0 ICR */
-#define	MCFSIM_DMA1ICR		MCFSIM_ICR7	/* DMA 1 ICR */
-#define	MCFSIM_DMA2ICR		MCFSIM_ICR8	/* DMA 2 ICR */
-#define	MCFSIM_DMA3ICR		MCFSIM_ICR9	/* DMA 3 ICR */
-#define	MCFSIM_QSPIICR		MCFSIM_ICR10	/* QSPI ICR */
-
-/*
- *	Define system peripheral IRQ usage.
- */
-#define	MCF_IRQ_QSPI		28		/* QSPI, Level 4 */
-#define	MCF_IRQ_TIMER		30		/* Timer0, Level 6 */
-#define	MCF_IRQ_PROFILER	31		/* Timer1, Level 7 */
-
-#define	MCF_IRQ_UART0		73		/* UART0 */
-#define	MCF_IRQ_UART1		74		/* UART1 */
-
-/*
- *	General purpose IO registers (in MBAR2).
- */
-#define	MCFSIM2_GPIOREAD	(MCF_MBAR2 + 0x000)	/* GPIO read values */
-#define	MCFSIM2_GPIOWRITE	(MCF_MBAR2 + 0x004)	/* GPIO write values */
-#define	MCFSIM2_GPIOENABLE	(MCF_MBAR2 + 0x008)	/* GPIO enabled */
-#define	MCFSIM2_GPIOFUNC	(MCF_MBAR2 + 0x00C)	/* GPIO function */
-#define	MCFSIM2_GPIO1READ	(MCF_MBAR2 + 0x0B0)	/* GPIO1 read values */
-#define	MCFSIM2_GPIO1WRITE	(MCF_MBAR2 + 0x0B4)	/* GPIO1 write values */
-#define	MCFSIM2_GPIO1ENABLE	(MCF_MBAR2 + 0x0B8)	/* GPIO1 enabled */
-#define	MCFSIM2_GPIO1FUNC	(MCF_MBAR2 + 0x0BC)	/* GPIO1 function */
-
-#define	MCFSIM2_GPIOINTSTAT	(MCF_MBAR2 + 0xc0)	/* GPIO intr status */
-#define	MCFSIM2_GPIOINTCLEAR	(MCF_MBAR2 + 0xc0)	/* GPIO intr clear */
-#define	MCFSIM2_GPIOINTENABLE	(MCF_MBAR2 + 0xc4)	/* GPIO intr enable */
-
-#define	MCFSIM2_INTLEVEL1	(MCF_MBAR2 + 0x140)	/* Intr level reg 1 */
-#define	MCFSIM2_INTLEVEL2	(MCF_MBAR2 + 0x144)	/* Intr level reg 2 */
-#define	MCFSIM2_INTLEVEL3	(MCF_MBAR2 + 0x148)	/* Intr level reg 3 */
-#define	MCFSIM2_INTLEVEL4	(MCF_MBAR2 + 0x14c)	/* Intr level reg 4 */
-#define	MCFSIM2_INTLEVEL5	(MCF_MBAR2 + 0x150)	/* Intr level reg 5 */
-#define	MCFSIM2_INTLEVEL6	(MCF_MBAR2 + 0x154)	/* Intr level reg 6 */
-#define	MCFSIM2_INTLEVEL7	(MCF_MBAR2 + 0x158)	/* Intr level reg 7 */
-#define	MCFSIM2_INTLEVEL8	(MCF_MBAR2 + 0x15c)	/* Intr level reg 8 */
-
-#define	MCFSIM2_DMAROUTE	(MCF_MBAR2 + 0x188)	/* DMA routing */
-
-#define	MCFSIM2_IDECONFIG1	(MCF_MBAR2 + 0x18c)	/* IDEconfig1 */
-#define	MCFSIM2_IDECONFIG2	(MCF_MBAR2 + 0x190)	/* IDEconfig2 */
-
-/*
- * Define the base interrupt for the second interrupt controller.
- * We set it to 128, out of the way of the base interrupts, and plenty
- * of room for its 64 interrupts.
- */
-#define	MCFINTC2_VECBASE	128
-
-#define	MCFINTC2_GPIOIRQ0	(MCFINTC2_VECBASE + 32)
-#define	MCFINTC2_GPIOIRQ1	(MCFINTC2_VECBASE + 33)
-#define	MCFINTC2_GPIOIRQ2	(MCFINTC2_VECBASE + 34)
-#define	MCFINTC2_GPIOIRQ3	(MCFINTC2_VECBASE + 35)
-#define	MCFINTC2_GPIOIRQ4	(MCFINTC2_VECBASE + 36)
-#define	MCFINTC2_GPIOIRQ5	(MCFINTC2_VECBASE + 37)
-#define	MCFINTC2_GPIOIRQ6	(MCFINTC2_VECBASE + 38)
-#define	MCFINTC2_GPIOIRQ7	(MCFINTC2_VECBASE + 39)
-
-/*
- * Generic GPIO support
- */
-#define MCFGPIO_PIN_MAX		64
-#define MCFGPIO_IRQ_MAX		-1
-#define MCFGPIO_IRQ_VECBASE	-1
-
-/****************************************************************************/
-
-#ifdef __ASSEMBLER__
-
-/*
- *	The M5249C3 board needs a little help getting all its SIM devices
- *	initialized at kernel start time. dBUG doesn't set much up, so
- *	we need to do it manually.
- */
-.macro m5249c3_setup
-	/*
-	 *	Set MBAR1 and MBAR2, just incase they are not set.
-	 */
-	movel	#0x10000001,%a0
-	movec	%a0,%MBAR			/* map MBAR region */
-	subql	#1,%a0				/* get MBAR address in a0 */
-
-	movel	#0x80000001,%a1
-	movec	%a1,#3086			/* map MBAR2 region */
-	subql	#1,%a1				/* get MBAR2 address in a1 */
-
-	/*
-	 *      Move secondary interrupts to their base (128).
-	 */
-	moveb	#MCFINTC2_VECBASE,%d0
-	moveb	%d0,0x16b(%a1)			/* interrupt base register */
-
-	/*
-	 *      Work around broken CSMR0/DRAM vector problem.
-	 */
-	movel	#0x001F0021,%d0			/* disable C/I bit */
-	movel	%d0,0x84(%a0)			/* set CSMR0 */
-
-	/*
-	 *	Disable the PLL firstly. (Who knows what state it is
-	 *	in here!).
-	 */
-	movel	0x180(%a1),%d0			/* get current PLL value */
-	andl	#0xfffffffe,%d0			/* PLL bypass first */
-	movel	%d0,0x180(%a1)			/* set PLL register */
-	nop
-
-#if CONFIG_CLOCK_FREQ == 140000000
-	/*
-	 *	Set initial clock frequency. This assumes M5249C3 board
-	 *	is fitted with 11.2896MHz crystal. It will program the
-	 *	PLL for 140MHz. Lets go fast :-)
-	 */
-	movel	#0x125a40f0,%d0			/* set for 140MHz */
-	movel	%d0,0x180(%a1)			/* set PLL register */
-	orl	#0x1,%d0
-	movel	%d0,0x180(%a1)			/* set PLL register */
-#endif
-
-	/*
-	 *	Setup CS1 for ethernet controller.
-	 *	(Setup as per M5249C3 doco).
-	 */
-	movel  #0xe0000000,%d0			/* CS1 mapped at 0xe0000000 */
-	movel  %d0,0x8c(%a0)
-	movel  #0x001f0021,%d0			/* CS1 size of 1Mb */
-	movel  %d0,0x90(%a0)
-	movew  #0x0080,%d0			/* CS1 = 16bit port, AA */
-	movew  %d0,0x96(%a0)
-
-	/*
-	 *	Setup CS2 for IDE interface.
-	 */
-	movel	#0x50000000,%d0			/* CS2 mapped at 0x50000000 */
-	movel	%d0,0x98(%a0)
-	movel	#0x001f0001,%d0			/* CS2 size of 1MB */
-	movel	%d0,0x9c(%a0)
-	movew	#0x0080,%d0			/* CS2 = 16bit, TA */
-	movew	%d0,0xa2(%a0)
-
-	movel	#0x00107000,%d0			/* IDEconfig1 */
-	movel	%d0,0x18c(%a1)
-	movel	#0x000c0400,%d0			/* IDEconfig2 */
-	movel	%d0,0x190(%a1)
-
-	movel	#0x00080000,%d0			/* GPIO19, IDE reset bit */
-	orl	%d0,0xc(%a1)			/* function GPIO19 */
-	orl	%d0,0x8(%a1)			/* enable GPIO19 as output */
-        orl	%d0,0x4(%a1)			/* de-assert IDE reset */
-.endm
-
-#define	PLATFORM_SETUP	m5249c3_setup
-
-#endif /* __ASSEMBLER__ */
-
-/****************************************************************************/
-#endif	/* m5249sim_h */

diff --git a/arch/m68k/include/asm/m525xsim.h b/arch/m68k/include/asm/m525xsim.h
index acab61c..e33f5bb 100644
--- a/arch/m68k/include/asm/m525xsim.h
+++ b/arch/m68k/include/asm/m525xsim.h

@@ -12,6 +12,11 @@
 #define m525xsim_h
 /****************************************************************************/
 
+/*
+ *	This header supports ColdFire 5249, 5251 and 5253. There are a few
+ *	little differences between them, but most of the peripheral support
+ *	can be used by all of them.
+ */
 #define CPU_NAME		"COLDFIRE(m525x)"
 #define CPU_INSTR_PER_JIFFY	3
 #define MCF_BUSCLK		(MCF_CLK / 2)
@@ -65,6 +70,8 @@
 #define MCFSIM_DCR		(MCF_MBAR + 0x100)	/* DRAM Control */
 #define MCFSIM_DACR0		(MCF_MBAR + 0x108)	/* DRAM 0 Addr/Ctrl */
 #define MCFSIM_DMR0		(MCF_MBAR + 0x10c)	/* DRAM 0 Mask */
+#define MCFSIM_DACR1		(MCF_MBAR + 0x110)	/* DRAM 1 Addr/Ctrl */
+#define MCFSIM_DMR1		(MCF_MBAR + 0x114)	/* DRAM 1 Mask */
 
 /*
  * Secondary Interrupt Controller (in MBAR2)
@@ -101,11 +108,17 @@
 #define MCFQSPI_BASE		(MCF_MBAR + 0x300)	/* Base address QSPI */
 #define MCFQSPI_SIZE		0x40			/* Register set size */
 
-
+#ifdef CONFIG_M5249
+#define MCFQSPI_CS0		29
+#define MCFQSPI_CS1		24
+#define MCFQSPI_CS2		21
+#define MCFQSPI_CS3		22
+#else
 #define MCFQSPI_CS0		15
 #define MCFQSPI_CS1		16
 #define MCFQSPI_CS2		24
 #define MCFQSPI_CS3		28
+#endif
 
 /*
  *	I2C module.
@@ -115,6 +128,7 @@
 
 #define MCFI2C_BASE1		(MCF_MBAR2 + 0x440)	/* Base addreess I2C1 */
 #define MCFI2C_SIZE1		0x20			/* Register set size */
+
 /*
  *	DMA unit base addresses.
  */
@@ -163,6 +177,7 @@
 #define MCF_IRQ_GPIO4		(MCFINTC2_VECBASE + 36)
 #define MCF_IRQ_GPIO5		(MCFINTC2_VECBASE + 37)
 #define MCF_IRQ_GPIO6		(MCFINTC2_VECBASE + 38)
+#define MCF_IRQ_GPIO7		(MCFINTC2_VECBASE + 39)
 
 #define MCF_IRQ_USBWUP		(MCFINTC2_VECBASE + 40)
 #define MCF_IRQ_I2C1		(MCFINTC2_VECBASE + 62)
@@ -183,12 +198,111 @@
 #define MCFSIM2_GPIOINTCLEAR	(MCF_MBAR2 + 0xc0)	/* GPIO intr clear */
 #define MCFSIM2_GPIOINTENABLE	(MCF_MBAR2 + 0xc4)	/* GPIO intr enable */
 
+#define MCFSIM2_DMAROUTE	(MCF_MBAR2 + 0x188)     /* DMA routing */
+#define MCFSIM2_IDECONFIG1	(MCF_MBAR2 + 0x18c)	/* IDEconfig1 */
+#define MCFSIM2_IDECONFIG2	(MCF_MBAR2 + 0x190)	/* IDEconfig2 */
+
 /*
  * Generic GPIO support
  */
 #define MCFGPIO_PIN_MAX		64
+#ifdef CONFIG_M5249
+#define MCFGPIO_IRQ_MAX		-1
+#define MCFGPIO_IRQ_VECBASE	-1
+#else
 #define MCFGPIO_IRQ_MAX		7
 #define MCFGPIO_IRQ_VECBASE	MCF_IRQ_GPIO0
+#endif
 
 /****************************************************************************/
+
+#ifdef __ASSEMBLER__
+#ifdef CONFIG_M5249C3
+/*
+ *	The M5249C3 board needs a little help getting all its SIM devices
+ *	initialized at kernel start time. dBUG doesn't set much up, so
+ *	we need to do it manually.
+ */
+.macro m5249c3_setup
+	/*
+	 *	Set MBAR1 and MBAR2, just incase they are not set.
+	 */
+	movel	#0x10000001,%a0
+	movec	%a0,%MBAR			/* map MBAR region */
+	subql	#1,%a0				/* get MBAR address in a0 */
+
+	movel	#0x80000001,%a1
+	movec	%a1,#3086			/* map MBAR2 region */
+	subql	#1,%a1				/* get MBAR2 address in a1 */
+
+	/*
+	 *      Move secondary interrupts to their base (128).
+	 */
+	moveb	#MCFINTC2_VECBASE,%d0
+	moveb	%d0,0x16b(%a1)			/* interrupt base register */
+
+	/*
+	 *      Work around broken CSMR0/DRAM vector problem.
+	 */
+	movel	#0x001F0021,%d0			/* disable C/I bit */
+	movel	%d0,0x84(%a0)			/* set CSMR0 */
+
+	/*
+	 *	Disable the PLL firstly. (Who knows what state it is
+	 *	in here!).
+	 */
+	movel	0x180(%a1),%d0			/* get current PLL value */
+	andl	#0xfffffffe,%d0			/* PLL bypass first */
+	movel	%d0,0x180(%a1)			/* set PLL register */
+	nop
+
+#if CONFIG_CLOCK_FREQ == 140000000
+	/*
+	 *	Set initial clock frequency. This assumes M5249C3 board
+	 *	is fitted with 11.2896MHz crystal. It will program the
+	 *	PLL for 140MHz. Lets go fast :-)
+	 */
+	movel	#0x125a40f0,%d0			/* set for 140MHz */
+	movel	%d0,0x180(%a1)			/* set PLL register */
+	orl	#0x1,%d0
+	movel	%d0,0x180(%a1)			/* set PLL register */
+#endif
+
+	/*
+	 *	Setup CS1 for ethernet controller.
+	 *	(Setup as per M5249C3 doco).
+	 */
+	movel  #0xe0000000,%d0			/* CS1 mapped at 0xe0000000 */
+	movel  %d0,0x8c(%a0)
+	movel  #0x001f0021,%d0			/* CS1 size of 1Mb */
+	movel  %d0,0x90(%a0)
+	movew  #0x0080,%d0			/* CS1 = 16bit port, AA */
+	movew  %d0,0x96(%a0)
+
+	/*
+	 *	Setup CS2 for IDE interface.
+	 */
+	movel	#0x50000000,%d0			/* CS2 mapped at 0x50000000 */
+	movel	%d0,0x98(%a0)
+	movel	#0x001f0001,%d0			/* CS2 size of 1MB */
+	movel	%d0,0x9c(%a0)
+	movew	#0x0080,%d0			/* CS2 = 16bit, TA */
+	movew	%d0,0xa2(%a0)
+
+	movel	#0x00107000,%d0			/* IDEconfig1 */
+	movel	%d0,0x18c(%a1)
+	movel	#0x000c0400,%d0			/* IDEconfig2 */
+	movel	%d0,0x190(%a1)
+
+	movel	#0x00080000,%d0			/* GPIO19, IDE reset bit */
+	orl	%d0,0xc(%a1)			/* function GPIO19 */
+	orl	%d0,0x8(%a1)			/* enable GPIO19 as output */
+        orl	%d0,0x4(%a1)			/* de-assert IDE reset */
+.endm
+
+#define	PLATFORM_SETUP	m5249c3_setup
+
+#endif /* CONFIG_M5249C3 */
+#endif /* __ASSEMBLER__ */
+/****************************************************************************/
 #endif	/* m525xsim_h */

diff --git a/arch/m68k/include/asm/mcfclk.h b/arch/m68k/include/asm/mcfclk.h
index b676a02..ea4791e 100644
--- a/arch/m68k/include/asm/mcfclk.h
+++ b/arch/m68k/include/asm/mcfclk.h

@@ -8,7 +8,6 @@
 
 struct clk;
 
-#ifdef MCFPM_PPMCR0
 struct clk_ops {
 	void (*enable)(struct clk *);
 	void (*disable)(struct clk *);
@@ -23,6 +22,8 @@
 };
 
 extern struct clk *mcf_clks[];
+
+#ifdef MCFPM_PPMCR0
 extern struct clk_ops clk_ops0;
 #ifdef MCFPM_PPMCR1
 extern struct clk_ops clk_ops1;
@@ -38,6 +39,12 @@
 
 void __clk_init_enabled(struct clk *);
 void __clk_init_disabled(struct clk *);
+#else
+#define DEFINE_CLK(clk_ref, clk_name, clk_rate) \
+        static struct clk clk_##clk_ref = { \
+                .name = clk_name, \
+                .rate = clk_rate, \
+        }
 #endif /* MCFPM_PPMCR0 */
 
 #endif /* mcfclk_h */

diff --git a/arch/m68k/include/asm/mcfsim.h b/arch/m68k/include/asm/mcfsim.h
index 7a83e61..a04fd9b 100644
--- a/arch/m68k/include/asm/mcfsim.h
+++ b/arch/m68k/include/asm/mcfsim.h

@@ -24,10 +24,7 @@
 #elif defined(CONFIG_M523x)
 #include <asm/m523xsim.h>
 #include <asm/mcfintc.h>
-#elif defined(CONFIG_M5249)
-#include <asm/m5249sim.h>
-#include <asm/mcfintc.h>
-#elif defined(CONFIG_M525x)
+#elif defined(CONFIG_M5249) || defined(CONFIG_M525x)
 #include <asm/m525xsim.h>
 #include <asm/mcfintc.h>
 #elif defined(CONFIG_M527x)

diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index 9059572..ef20916 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h

@@ -26,7 +26,7 @@
 #define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
 
 #define virt_to_page(addr)	(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
-#define page_to_virt(page)	((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
+#define page_to_virt(page)	__va(((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET))
 
 #define pfn_to_page(pfn)	virt_to_page(pfn_to_virt(pfn))
 #define page_to_pfn(page)	virt_to_pfn(page_to_virt(page))

diff --git a/arch/m68k/include/asm/ptrace.h b/arch/m68k/include/asm/ptrace.h
index 0f71704..a45cb68 100644
--- a/arch/m68k/include/asm/ptrace.h
+++ b/arch/m68k/include/asm/ptrace.h

@@ -15,6 +15,7 @@
 #define profile_pc(regs) instruction_pointer(regs)
 #define current_pt_regs() \
 	(struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1
+#define current_user_stack_pointer() rdusp()
 
 #define arch_has_single_step()	(1)
 

diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index a021d67..847994c 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h

@@ -31,7 +31,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 

diff --git a/arch/m68k/include/uapi/asm/signal.h b/arch/m68k/include/uapi/asm/signal.h
index 2b450f3..cba6f85 100644
--- a/arch/m68k/include/uapi/asm/signal.h
+++ b/arch/m68k/include/uapi/asm/signal.h

@@ -80,12 +80,6 @@
 #define SA_NOMASK	SA_NODEFER
 #define SA_ONESHOT	SA_RESETHAND
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/m68k/lib/memcpy.c b/arch/m68k/lib/memcpy.c
index 10ca051..c1e2dfb 100644
--- a/arch/m68k/lib/memcpy.c
+++ b/arch/m68k/lib/memcpy.c

@@ -10,7 +10,7 @@
 void *memcpy(void *to, const void *from, size_t n)
 {
 	void *xto = to;
-	size_t temp, temp1;
+	size_t temp;
 
 	if (!n)
 		return xto;
@@ -47,6 +47,7 @@
 		for (; temp; temp--)
 			*lto++ = *lfrom++;
 #else
+		size_t temp1;
 		asm volatile (
 			"	movel %2,%3\n"
 			"	andw  #7,%3\n"

diff --git a/arch/m68k/platform/68000/Makefile b/arch/m68k/platform/68000/Makefile
new file mode 100644
index 0000000..1eab70c
--- /dev/null
+++ b/arch/m68k/platform/68000/Makefile

@@ -0,0 +1,18 @@
+##################################################
+#
+# Makefile for 68000 core based cpus
+#
+# 2012.10.21, Luis Alves <ljalvs@gmail.com>
+#             Merged all 68000 based cpu's config
+#             files into a single directory.
+#
+
+# 68328, 68EZ328, 68VZ328
+
+obj-y			+= entry.o ints.o timers.o
+obj-$(CONFIG_M68328)	+= m68328.o
+obj-$(CONFIG_M68EZ328)	+= m68EZ328.o
+obj-$(CONFIG_M68VZ328)	+= m68VZ328.o
+obj-$(CONFIG_ROM)	+= romvec.o
+
+extra-y 		:= head.o

diff --git a/arch/m68k/platform/68VZ328/bootlogo.h b/arch/m68k/platform/68000/bootlogo-vz.h
similarity index 100%
rename from arch/m68k/platform/68VZ328/bootlogo.h
rename to arch/m68k/platform/68000/bootlogo-vz.h


diff --git a/arch/m68k/platform/68328/bootlogo.h b/arch/m68k/platform/68000/bootlogo.h
similarity index 100%
rename from arch/m68k/platform/68328/bootlogo.h
rename to arch/m68k/platform/68000/bootlogo.h


diff --git a/arch/m68k/platform/68328/entry.S b/arch/m68k/platform/68000/entry.S
similarity index 100%
rename from arch/m68k/platform/68328/entry.S
rename to arch/m68k/platform/68000/entry.S


diff --git a/arch/m68k/platform/68000/head.S b/arch/m68k/platform/68000/head.S
new file mode 100644
index 0000000..536ef96
--- /dev/null
+++ b/arch/m68k/platform/68000/head.S

@@ -0,0 +1,240 @@
+/*
+ * head.S - Common startup code for 68000 core based CPU's
+ *
+ * 2012.10.21, Luis Alves <ljalvs@gmail.com>, Single head.S file for all
+ *             68000 core based CPU's. Based on the sources from:
+ *             Coldfire by Greg Ungerer <gerg@snapgear.com>
+ *             68328 by D. Jeff Dionne <jeff@ryeham.ee.ryerson.ca>,
+ *                      Kenneth Albanowski <kjahds@kjahds.com>,
+ *                      The Silver Hammer Group, Ltd.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+
+/*****************************************************************************
+ * UCSIMM and UCDIMM use CONFIG_MEMORY_RESERVE to reserve some RAM
+ *****************************************************************************/
+#ifdef CONFIG_MEMORY_RESERVE
+#define RAMEND (CONFIG_RAMBASE+CONFIG_RAMSIZE)-(CONFIG_MEMORY_RESERVE*0x100000)
+#else
+#define RAMEND (CONFIG_RAMBASE+CONFIG_RAMSIZE)
+#endif
+/*****************************************************************************/
+
+.global _start
+.global _rambase
+.global _ramvec
+.global _ramstart
+.global _ramend
+
+#if defined(CONFIG_PILOT) || defined(CONFIG_INIT_LCD)
+.global bootlogo_bits
+#endif
+
+/* Defining DEBUG_HEAD_CODE, serial port in 68x328 is inited */
+/* #define DEBUG_HEAD_CODE */
+#undef DEBUG_HEAD_CODE
+
+.data
+
+/*****************************************************************************
+ * RAM setup pointers. Used by the kernel to determine RAM location and size.
+ *****************************************************************************/
+
+_rambase:
+	.long	0
+_ramvec:
+	.long	0
+_ramstart:
+	.long	0
+_ramend:
+	.long	0
+
+__HEAD
+
+/*****************************************************************************
+ * Entry point, where all begins!
+ *****************************************************************************/
+
+_start:
+
+/* Pilot need this specific signature at the start of ROM */
+#ifdef CONFIG_PILOT
+	.byte	0x4e, 0xfa, 0x00, 0x0a		/* bra opcode (jmp 10 bytes) */
+	.byte	'b', 'o', 'o', 't'
+	.word	10000
+	nop
+	moveq	#0, %d0
+	movew	%d0, 0xfffff618			/* Watchdog off */
+	movel	#0x00011f07, 0xfffff114		/* CS A1 Mask */
+#endif /* CONFIG_PILOT */
+
+	movew	#0x2700, %sr			/* disable all interrupts */
+
+/*****************************************************************************
+ * Setup PLL and wait for it to settle (in 68x328 cpu's).
+ * Also, if enabled, init serial port.
+ *****************************************************************************/
+#if defined(CONFIG_M68328) || \
+    defined(CONFIG_M68EZ328) || \
+    defined(CONFIG_M68VZ328)
+
+/* Serial port setup. Should only be needed if debugging this startup code. */
+#ifdef DEBUG_HEAD_CODE
+	movew	#0x0800, 0xfffff906		/* Ignore CTS */
+	movew	#0x010b, 0xfffff902		/* BAUD to 9600 */
+	movew	#0xe100, 0xfffff900		/* enable */
+#endif /* DEBUG_HEAD */
+
+#ifdef CONFIG_PILOT
+	movew	#0x2410, 0xfffff200		/* PLLCR */
+#else
+	movew	#0x2400, 0xfffff200		/* PLLCR */
+#endif
+	movew	#0x0123, 0xfffff202		/* PLLFSR */
+	moveq	#0, %d0
+	movew	#16384, %d0			/* PLL settle wait loop */
+_pll_settle:
+	subw	#1, %d0
+	bne	_pll_settle
+#endif /* CONFIG_M68x328 */
+
+
+/*****************************************************************************
+ * If running kernel from ROM some specific initialization has to be done.
+ * (Assuming that everything is already init'ed when running from RAM)
+ *****************************************************************************/
+#ifdef CONFIG_ROMKERNEL
+
+/*****************************************************************************
+ * Init chip registers (uCsimm specific)
+ *****************************************************************************/
+#ifdef CONFIG_UCSIMM
+	moveb	#0x00, 0xfffffb0b	/* Watchdog off */
+	moveb	#0x10, 0xfffff000	/* SCR */
+	moveb	#0x00, 0xfffff40b	/* enable chip select */
+	moveb	#0x00, 0xfffff423	/* enable /DWE */
+	moveb	#0x08, 0xfffffd0d	/* disable hardmap */
+	moveb	#0x07, 0xfffffd0e	/* level 7 interrupt clear */
+	movew	#0x8600, 0xfffff100	/* FLASH at 0x10c00000 */
+	movew	#0x018b, 0xfffff110	/* 2Meg, enable, 0ws */
+	movew	#0x8f00, 0xfffffc00	/* DRAM configuration */
+	movew	#0x9667, 0xfffffc02	/* DRAM control */
+	movew	#0x0000, 0xfffff106	/* DRAM at 0x00000000 */
+	movew	#0x068f, 0xfffff116	/* 8Meg, enable, 0ws */
+	moveb	#0x40, 0xfffff300	/* IVR */
+	movel	#0x007FFFFF, %d0	/* IMR */
+	movel	%d0, 0xfffff304
+	moveb	0xfffff42b, %d0
+	andb	#0xe0, %d0
+	moveb	%d0, 0xfffff42b
+#endif
+
+/*****************************************************************************
+ * Init LCD controller.
+ * (Assuming that LCD controller is already init'ed when running from RAM)
+ *****************************************************************************/
+#ifdef CONFIG_INIT_LCD
+#ifdef CONFIG_PILOT
+	moveb	#0, 0xfffffA27			/* LCKCON */
+	movel	#_start, 0xfffffA00		/* LSSA */
+	moveb	#0xa, 0xfffffA05		/* LVPW */
+	movew	#0x9f, 0xFFFFFa08		/* LXMAX */
+	movew	#0x9f, 0xFFFFFa0a		/* LYMAX */
+	moveb	#9, 0xfffffa29			/* LBAR */
+	moveb	#0, 0xfffffa25			/* LPXCD */
+	moveb	#0x04, 0xFFFFFa20		/* LPICF */
+	moveb	#0x58, 0xfffffA27		/* LCKCON */
+	moveb	#0x85, 0xfffff429		/* PFDATA */
+	moveb	#0xd8, 0xfffffA27		/* LCKCON */
+	moveb	#0xc5, 0xfffff429		/* PFDATA */
+	moveb	#0xd5, 0xfffff429		/* PFDATA */
+	movel	#bootlogo_bits, 0xFFFFFA00	/* LSSA */
+	moveb	#10, 0xFFFFFA05			/* LVPW */
+	movew	#160, 0xFFFFFA08		/* LXMAX */
+	movew	#160, 0xFFFFFA0A		/* LYMAX */
+#else /* CONFIG_PILOT */
+	movel	#bootlogo_bits, 0xfffffA00	/* LSSA */
+	moveb	#0x28, 0xfffffA05		/* LVPW */
+	movew	#0x280, 0xFFFFFa08		/* LXMAX */
+	movew	#0x1df, 0xFFFFFa0a		/* LYMAX */
+	moveb	#0, 0xfffffa29			/* LBAR */
+	moveb	#0, 0xfffffa25			/* LPXCD */
+	moveb	#0x08, 0xFFFFFa20		/* LPICF */
+	moveb	#0x01, 0xFFFFFA21		/* -ve pol */
+	moveb	#0x81, 0xfffffA27		/* LCKCON */
+	movew	#0xff00, 0xfffff412		/* LCD pins */
+#endif /* CONFIG_PILOT */
+#endif /* CONFIG_INIT_LCD */
+
+/*****************************************************************************
+ * Kernel is running from FLASH/ROM (XIP)
+ * Copy init text & data to RAM
+ *****************************************************************************/
+	moveal	#_etext, %a0
+	moveal	#_sdata, %a1
+	moveal	#__bss_start, %a2
+_copy_initmem:
+	movel	%a0@+, %a1@+
+	cmpal	%a1, %a2
+	bhi	_copy_initmem
+#endif /* CONFIG_ROMKERNEL */
+
+/*****************************************************************************
+ * Setup basic memory information for kernel
+ *****************************************************************************/
+	movel	#CONFIG_VECTORBASE,_ramvec	/* set vector base location */
+	movel	#CONFIG_RAMBASE,_rambase	/* set the base of RAM */
+	movel	#RAMEND, _ramend		/* set end ram addr */
+	lea	__bss_stop,%a1
+	movel	%a1,_ramstart
+
+/*****************************************************************************
+ * If the kernel is in RAM, move romfs to right above bss and
+ * adjust _ramstart to where romfs ends.
+ *
+ * (Do this only if CONFIG_MTD_UCLINUX is true)
+ *****************************************************************************/
+
+#if defined(CONFIG_ROMFS_FS) && defined(CONFIG_RAMKERNEL) && \
+    defined(CONFIG_MTD_UCLINUX)
+	lea	__bss_start, %a0		/* get start of bss */
+	lea	__bss_stop, %a1			/* set up destination  */
+	movel	%a0, %a2			/* copy of bss start */
+
+	movel	8(%a0), %d0			/* get size of ROMFS */
+	addql	#8, %d0				/* allow for rounding */
+	andl	#0xfffffffc, %d0		/* whole words */
+
+	addl	%d0, %a0			/* copy from end */
+	addl	%d0, %a1			/* copy from end */
+	movel	%a1, _ramstart			/* set start of ram */
+_copy_romfs:
+	movel	-(%a0), -(%a1)			/* copy dword */
+	cmpl	%a0, %a2			/* check if at end */
+	bne	_copy_romfs
+#endif /* CONFIG_ROMFS_FS && CONFIG_RAMKERNEL && CONFIG_MTD_UCLINUX */
+
+/*****************************************************************************
+ * Clear bss region
+ *****************************************************************************/
+	lea	__bss_start, %a0		/* get start of bss */
+	lea	__bss_stop, %a1			/* get end of bss */
+_clear_bss:
+	movel	#0, (%a0)+			/* clear each word */
+	cmpl	%a0, %a1			/* check if at end */
+	bne	_clear_bss
+
+/*****************************************************************************
+ * Load the current task pointer and stack.
+ *****************************************************************************/
+	lea	init_thread_union,%a0
+	lea	THREAD_SIZE(%a0),%sp
+	jsr	start_kernel			/* start Linux kernel */
+_exit:
+	jmp	_exit				/* should never get here */

diff --git a/arch/m68k/platform/68328/ints.c b/arch/m68k/platform/68000/ints.c
similarity index 98%
rename from arch/m68k/platform/68328/ints.c
rename to arch/m68k/platform/68000/ints.c
index b3810fe..cda49b1 100644
--- a/arch/m68k/platform/68328/ints.c
+++ b/arch/m68k/platform/68000/ints.c

@@ -1,5 +1,5 @@
 /*
- * linux/arch/m68knommu/platform/68328/ints.c
+ * ints.c - Generic interrupt controller support
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file COPYING in the main directory of this archive

diff --git a/arch/m68k/platform/68328/config.c b/arch/m68k/platform/68000/m68328.c
similarity index 96%
rename from arch/m68k/platform/68328/config.c
rename to arch/m68k/platform/68000/m68328.c
index 8c20e89..a86eb66 100644
--- a/arch/m68k/platform/68328/config.c
+++ b/arch/m68k/platform/68000/m68328.c

@@ -1,7 +1,7 @@
 /***************************************************************************/
 
 /*
- *  linux/arch/m68knommu/platform/68328/config.c
+ *  m68328.c - 68328 specific config
  *
  *  Copyright (C) 1993 Hamish Macdonald
  *  Copyright (C) 1999 D. Jeff Dionne

diff --git a/arch/m68k/platform/68EZ328/config.c b/arch/m68k/platform/68000/m68EZ328.c
similarity index 96%
rename from arch/m68k/platform/68EZ328/config.c
rename to arch/m68k/platform/68000/m68EZ328.c
index 4f158d5..a6eb72d 100644
--- a/arch/m68k/platform/68EZ328/config.c
+++ b/arch/m68k/platform/68000/m68EZ328.c

@@ -1,7 +1,7 @@
 /***************************************************************************/
 
 /*
- *  linux/arch/m68knommu/platform/68EZ328/config.c
+ *  m68EZ328.c - 68EZ328 specific config
  *
  *  Copyright (C) 1993 Hamish Macdonald
  *  Copyright (C) 1999 D. Jeff Dionne

diff --git a/arch/m68k/platform/68VZ328/config.c b/arch/m68k/platform/68000/m68VZ328.c
similarity index 97%
rename from arch/m68k/platform/68VZ328/config.c
rename to arch/m68k/platform/68000/m68VZ328.c
index 2ed8dc3..eb6964f 100644
--- a/arch/m68k/platform/68VZ328/config.c
+++ b/arch/m68k/platform/68000/m68VZ328.c

@@ -1,7 +1,7 @@
 /***************************************************************************/
 
 /*
- *  linux/arch/m68knommu/platform/68VZ328/config.c
+ *  m68VZ328.c - 68VZ328 specific config
  *
  *  Copyright (C) 1993 Hamish Macdonald
  *  Copyright (C) 1999 D. Jeff Dionne
@@ -28,7 +28,7 @@
 #include <asm/bootstd.h>
 
 #ifdef CONFIG_INIT_LCD
-#include "bootlogo.h"
+#include "bootlogo-vz.h"
 #endif
 
 /***************************************************************************/

diff --git a/arch/m68k/platform/68328/romvec.S b/arch/m68k/platform/68000/romvec.S
similarity index 94%
rename from arch/m68k/platform/68328/romvec.S
rename to arch/m68k/platform/68000/romvec.S
index 3108446..15c70cd 100644
--- a/arch/m68k/platform/68328/romvec.S
+++ b/arch/m68k/platform/68000/romvec.S

@@ -1,5 +1,5 @@
 /*
- * linux/arch/m68knommu/platform/68328/romvec.S
+ * romvec.S - Vector table for 68000 cpus
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file COPYING in the main directory of this archive

diff --git a/arch/m68k/platform/68328/timers.c b/arch/m68k/platform/68000/timers.c
similarity index 98%
rename from arch/m68k/platform/68328/timers.c
rename to arch/m68k/platform/68000/timers.c
index f4dc9b2..ec30acb 100644
--- a/arch/m68k/platform/68328/timers.c
+++ b/arch/m68k/platform/68000/timers.c

@@ -1,7 +1,7 @@
 /***************************************************************************/
 
 /*
- *  linux/arch/m68knommu/platform/68328/timers.c
+ *  timers.c - Generic hardware timer support.
  *
  *  Copyright (C) 1993 Hamish Macdonald
  *  Copyright (C) 1999 D. Jeff Dionne

diff --git a/arch/m68k/platform/68328/Makefile b/arch/m68k/platform/68328/Makefile
deleted file mode 100644
index ee61bf8..0000000
--- a/arch/m68k/platform/68328/Makefile
+++ /dev/null

@@ -1,21 +0,0 @@
-#
-# Makefile for arch/m68knommu/platform/68328.
-#
-
-model-y			  := ram
-model-$(CONFIG_ROMKERNEL) := rom
-
-head-y			= head-$(model-y).o
-head-$(CONFIG_PILOT)	= head-pilot.o
-head-$(CONFIG_DRAGEN2)	= head-de2.o
-
-obj-y			+= entry.o ints.o timers.o
-obj-$(CONFIG_M68328)	+= config.o
-obj-$(CONFIG_ROM)	+= romvec.o
-
-extra-y			:= head.o
-
-$(obj)/head.o: $(obj)/$(head-y)
-	ln -sf $(head-y) $(obj)/head.o
-
-clean-files := $(obj)/head.o $(head-y)

diff --git a/arch/m68k/platform/68328/head-de2.S b/arch/m68k/platform/68328/head-de2.S
deleted file mode 100644
index 537d324..0000000
--- a/arch/m68k/platform/68328/head-de2.S
+++ /dev/null

@@ -1,128 +0,0 @@
-
-#define	MEM_END	0x00800000	/* Memory size 8Mb */
-
-#undef CRT_DEBUG
-
-.macro	PUTC CHAR
-#ifdef CRT_DEBUG
-	moveq	#\CHAR, %d7
-	jsr	putc
-#endif
-.endm
-
-	.global	_start
-	.global _rambase
-	.global _ramvec
-	.global	_ramstart
-	.global	_ramend
-	
-	.data
-
-/*
- *	Set up the usable of RAM stuff
- */
-_rambase:
-	.long	0
-_ramvec:
-	.long	0
-_ramstart:
-	.long	0
-_ramend:
-	.long	0
-
-	.text
-
-_start:
-
-/*
- * Setup initial stack
- */
-	/* disable all interrupts */
-	movew	#0x2700, %sr
-	movel	#-1, 0xfffff304
-	movel	#MEM_END-4, %sp
-
-	PUTC	'\r'
-	PUTC	'\n'
-	PUTC	'A'
-	PUTC	'B'
-
-/*
- *	Determine end of RAM
- */
-
-	movel	#MEM_END, %a0
-	movel	%a0, _ramend
-
-	PUTC	'C'
-
-/*
- *	Move ROM filesystem above bss :-)
- */
-
-	moveal	#__bss_start, %a0		/* romfs at the start of bss */
-	moveal	#__bss_stop, %a1		/* Set up destination  */
-	movel	%a0, %a2			/* Copy of bss start */
-
-	movel	8(%a0), %d1			/* Get size of ROMFS */
-	addql	#8, %d1				/* Allow for rounding */
-	andl	#0xfffffffc, %d1	/* Whole words */
-
-	addl	%d1, %a0			/* Copy from end */
-	addl	%d1, %a1			/* Copy from end */
-	movel	%a1, _ramstart		/* Set start of ram */
-
-1:
-	movel	-(%a0), %d0			/* Copy dword */
-	movel	%d0, -(%a1)
-	cmpl	%a0, %a2			/* Check if at end */
-	bne	1b
-
-	PUTC	'D'
-
-/*
- * Initialize BSS segment to 0
- */
-
-	lea	__bss_start, %a0
-	lea	__bss_stop, %a1
-
-	/* Copy 0 to %a0 until %a0 == %a1 */
-2:	cmpal	%a0, %a1
-	beq	1f
-	clrl	(%a0)+
-	bra	2b
-1:
-
-	PUTC	'E'
-
-/*
- * Load the current task pointer and stack
- */
-
-	lea	init_thread_union, %a0
-	lea	0x2000(%a0), %sp
-
-	PUTC	'F'
-	PUTC	'\r'
-	PUTC	'\n'
-
-/*
- * Go
- */
-
-	jmp	start_kernel
-
-/*
- * Local functions
- */
- 
-#ifdef CRT_DEBUG
-putc:
-	moveb	%d7, 0xfffff907
-1:
-	movew	0xfffff906, %d7
-	andw	#0x2000, %d7
-	beq	1b
-	rts
-#endif

diff --git a/arch/m68k/platform/68328/head-pilot.S b/arch/m68k/platform/68328/head-pilot.S
deleted file mode 100644
index 45a9dad..0000000
--- a/arch/m68k/platform/68328/head-pilot.S
+++ /dev/null

@@ -1,207 +0,0 @@
-/*
- * linux/arch/m68knommu/platform/68328/head-pilot.S
- * - A startup file for the MC68328
- *
- * Copyright (C) 1998  D. Jeff Dionne <jeff@ryeham.ee.ryerson.ca>,
- *                     Kenneth Albanowski <kjahds@kjahds.com>,
- *                     The Silver Hammer Group, Ltd.
- *
- * (c) 1995, Dionne & Associates
- * (c) 1995, DKG Display Tech.
- */
-
-#define ASSEMBLY
-
-#define IMMED #
-#define	DBG_PUTC(x)	moveb IMMED x, 0xfffff907
-
-
-.global _stext
-.global _start
-
-.global _rambase
-.global _ramvec
-.global _ramstart
-.global _ramend
-
-.global bootlogo_bits
-
-/*****************************************************************************/
-
-.data
-
-/*
- *      Set up the usable of RAM stuff. Size of RAM is determined then
- *      an initial stack set up at the end.
- */
-.align 4
-_ramvec:
-.long   0
-_rambase:
-.long   0
-_ramstart:
-.long   0
-_ramend:
-.long   0
-
-.text
-	
-_start:
-_stext:
-
-
-#ifdef CONFIG_M68328
-
-#ifdef CONFIG_PILOT
-	.byte 0x4e, 0xfa, 0x00, 0x0a /* Jmp +X bytes */
-	.byte 'b', 'o', 'o', 't'
-	.word 10000
-
-	nop
-#endif
-
-	moveq	#0, %d0
-	movew   %d0, 0xfffff618 /* Watchdog off */
-	movel	#0x00011f07, 0xfffff114 /* CS A1 Mask */
-
-	movew	#0x0800, 0xfffff906 /* Ignore CTS */
-	movew	#0x010b, 0xfffff902 /* BAUD to 9600 */
-
-	movew	#0x2410, 0xfffff200 /* PLLCR */
-	movew	#0x123, 0xfffff202 /* PLLFSR */
-
-#ifdef CONFIG_PILOT
-	moveb	#0, 0xfffffA27 /* LCKCON */
-	movel   #_start, 0xfffffA00 /* LSSA */
-	moveb   #0xa, 0xfffffA05 /* LVPW */
-	movew	#0x9f, 0xFFFFFa08 /* LXMAX */
-	movew	#0x9f, 0xFFFFFa0a /* LYMAX */
-	moveb   #9, 0xfffffa29 /* LBAR */
-	moveb   #0, 0xfffffa25 /* LPXCD */
-	moveb	#0x04, 0xFFFFFa20 /* LPICF */
-	moveb	#0x58, 0xfffffA27 /* LCKCON */
-	moveb	#0x85, 0xfffff429 /* PFDATA */
-	moveb	#0xd8, 0xfffffA27 /* LCKCON */
-	moveb	#0xc5, 0xfffff429 /* PFDATA */
-	moveb	#0xd5, 0xfffff429 /* PFDATA */
-
-	moveal	#0x00100000, %a3
-	moveal	#0x100ffc00, %a4
-#endif /* CONFIG_PILOT */
-
-#endif /* CONFIG_M68328 */
-
-	movew   #0x2700, %sr
-	lea	%a4@(-4), %sp
-
-	DBG_PUTC('\r')
-	DBG_PUTC('\n')
-	DBG_PUTC('A')
-
-   	moveq   #0,%d0
-	movew	#16384, %d0  /* PLL settle wait loop */
-L0:
-	subw	#1, %d0
-	bne	L0
-
-	DBG_PUTC('B')
-
-	/* Copy command line from beginning of RAM (+16) to end of bss */
-	movel	#CONFIG_VECTORBASE, %d7
-	addl	#16, %d7
-	moveal	%d7, %a0
-	moveal	#__bss_stop, %a1
-	lea	%a1@(512), %a2
-
-	DBG_PUTC('C')
-
-	/* Copy %a0 to %a1 until %a1 == %a2 */
-L2:
-	movel	%a0@+, %d0
-	movel	%d0, %a1@+
-	cmpal	%a1, %a2
-	bhi	L2
-
-	/* Copy data+init segment from ROM to RAM */
-	moveal	#_etext, %a0
-	moveal	#_sdata, %a1
-	moveal	#__init_end, %a2
-
-	DBG_PUTC('D')
-
-	/* Copy %a0 to %a1 until %a1 == %a2 */
-LD1:
-	movel	%a0@+, %d0
-	movel	%d0, %a1@+
-	cmpal	%a1, %a2
-	bhi	LD1
-
-	DBG_PUTC('E')
-
-	moveal	#__bss_start, %a0
-	moveal	#__bss_stop, %a1
-
-	/* Copy 0 to %a0 until %a0 == %a1 */
-L1:
-	movel	#0, %a0@+
-	cmpal	%a0, %a1
-	bhi	L1
-
-	DBG_PUTC('F')
-
-	/* Copy command line from end of bss to command line */
-	moveal	#__bss_stop, %a0
-	moveal	#command_line, %a1
-	lea	%a1@(512), %a2
-
-	DBG_PUTC('G')
-
-	/* Copy %a0 to %a1 until %a1 == %a2 */
-L3:
-	movel	%a0@+, %d0
-	movel	%d0, %a1@+
-	cmpal	%a1, %a2
-	bhi	L3
-
-	movel	#_sdata, %d0	
-	movel	%d0, _rambase	
-	movel	#__bss_stop, %d0
-	movel	%d0, _ramstart
-
-	movel	%a4, %d0
-	subl	#4096, %d0	/* Reserve 4K of stack */
-	moveq	#79, %d7
-	movel	%d0, _ramend
-
-	pea	0
-	pea	env
-	pea	%sp@(4)
-	pea	0
-
-	DBG_PUTC('H')
-
-#ifdef CONFIG_PILOT
-	movel	#bootlogo_bits, 0xFFFFFA00
-	moveb	#10, 0xFFFFFA05
-	movew	#160, 0xFFFFFA08
-	movew	#160, 0xFFFFFA0A
-#endif /* CONFIG_PILOT */
-
-	DBG_PUTC('I')
-
-	lea	init_thread_union, %a0
-	lea	0x2000(%a0), %sp
-
-	DBG_PUTC('J')
-	DBG_PUTC('\r')
-	DBG_PUTC('\n')
-
-	jsr	start_kernel
-_exit:
-
-	jmp	_exit
-
-
-	.data
-env:
-	.long	0

diff --git a/arch/m68k/platform/68328/head-ram.S b/arch/m68k/platform/68328/head-ram.S
deleted file mode 100644
index 5189ef9..0000000
--- a/arch/m68k/platform/68328/head-ram.S
+++ /dev/null

@@ -1,141 +0,0 @@
-
-	.global __main
-	.global __rom_start
-
-        .global _rambase
-        .global _ramstart
-	
-	.global splash_bits
-	.global _start
-	.global _stext
-	.global _edata
-
-#define DEBUG
-#define ROM_OFFSET 0x10C00000
-#define STACK_GAURD 0x10
-
-	.text
-	
-_start:
-_stext:
-	movew	#0x2700, %sr            /* Exceptions off! */
-
-#if 0
-	/* Init chip registers.  uCsimm specific */
-	moveb	#0x00,   0xfffffb0b	/* Watchdog off */
-	moveb	#0x10,   0xfffff000	/* SCR */
-
-	movew   #0x2400, 0xfffff200	/* PLLCR */
-	movew   #0x0123, 0xfffff202	/* PLLFSR */
-
-	moveb	#0x00,   0xfffff40b	/* enable chip select */
-	moveb	#0x00,   0xfffff423	/* enable /DWE */
-	moveb	#0x08,   0xfffffd0d	/* disable hardmap */
-	moveb	#0x07,   0xfffffd0e	/* level 7 interrupt clear */
-
-	movew	#0x8600, 0xfffff100	/* FLASH at 0x10c00000 */
-	movew	#0x018b, 0xfffff110	/* 2Meg, enable, 0ws */
-
-	movew	#0x8f00, 0xfffffc00	/* DRAM configuration */
-	movew	#0x9667, 0xfffffc02	/* DRAM control */
-	movew	#0x0000, 0xfffff106	/* DRAM at 0x00000000 */
-	movew	#0x068f, 0xfffff116	/* 8Meg, enable, 0ws */
-
-	moveb	#0x40,   0xfffff300	/* IVR */
-	movel	#0x007FFFFF, %d0	/* IMR */
-	movel	%d0,     0xfffff304
-
-	moveb	0xfffff42b, %d0
-	andb	#0xe0,	 %d0
-	moveb	%d0,	 0xfffff42b
-
-	moveb	#0x08,   0xfffff907	/* Ignore CTS */
-	movew	#0x010b, 0xfffff902	/* BAUD to 9600 */
-	movew	#0xe100, 0xfffff900	/* enable */
-#endif
-
-	movew	#16384, %d0  /* PLL settle wait loop */
-L0:
-	subw	#1, %d0
-	bne	L0
-#ifdef DEBUG
-	moveq	#70, %d7		/* 'F' */
-	moveb	%d7,0xfffff907          /* No absolute addresses */
-pclp1:
-	movew	0xfffff906, %d7
-	andw	#0x2000, %d7
-	beq	pclp1
-#endif /* DEBUG */
-
-#ifdef DEBUG
-	moveq	#82, %d7		/* 'R' */
-	moveb	%d7,0xfffff907          /* No absolute addresses */
-pclp3:
-	movew	0xfffff906, %d7
-	andw	#0x2000, %d7
-	beq	pclp3
-#endif /* DEBUG */
-	moveal	#0x007ffff0, %ssp
-	moveal	#__bss_start, %a0
-	moveal	#__bss_stop, %a1
-
-	/* Copy 0 to %a0 until %a0 >= %a1 */
-L1:
-	movel	#0, %a0@+
-	cmpal	%a0, %a1
-	bhi	L1
-
-#ifdef DEBUG
-	moveq	#67, %d7                /* 'C' */
-	jsr	putc
-#endif /* DEBUG */
-
-	pea	0
-	pea	env
-	pea	%sp@(4)
-	pea	0
-
-#ifdef DEBUG
-	moveq	#70, %d7		/* 'F' */
-	jsr	putc
-#endif /* DEBUG */
-
-lp:
-	jsr	start_kernel
-        jmp lp
-_exit:
-
-	jmp	_exit
-
-__main:
-	/* nothing */
-	rts
-
-#ifdef DEBUG
-putc:
-	moveb	%d7,0xfffff907
-pclp:
-	movew	0xfffff906, %d7
-	andw	#0x2000, %d7
-	beq	pclp
-	rts
-#endif /* DEBUG */
-
-	.data
-
-/*
- *      Set up the usable of RAM stuff. Size of RAM is determined then
- *      an initial stack set up at the end.
- */
-.align 4
-_ramvec:
-.long   0
-_rambase:
-.long   0
-_ramstart:
-.long   0
-_ramend:
-.long   0
-
-env:
-	.long	0

diff --git a/arch/m68k/platform/68328/head-rom.S b/arch/m68k/platform/68328/head-rom.S
deleted file mode 100644
index 3dff98b..0000000
--- a/arch/m68k/platform/68328/head-rom.S
+++ /dev/null

@@ -1,105 +0,0 @@
-	
-	.global _start
-	.global _stext
-
-	.global _rambase
-	.global _ramvec
-	.global _ramstart
-	.global _ramend
-
-#ifdef CONFIG_INIT_LCD
-	.global bootlogo_bits
-#endif
-
-	.data
-
-/*
- *      Set up the usable of RAM stuff. Size of RAM is determined then
- *      an initial stack set up at the end.
- */
-.align 4
-_ramvec:
-.long   0
-_rambase:
-.long   0
-_ramstart:
-.long   0
-_ramend:
-.long   0
-
-#define	RAMEND	(CONFIG_RAMBASE + CONFIG_RAMSIZE)
-
-	.text
-_start:
-_stext:	movew	#0x2700,%sr
-#ifdef CONFIG_INIT_LCD
-	movel	#bootlogo_bits, 0xfffffA00 /* LSSA */
-	moveb	#0x28,   0xfffffA05	/* LVPW */
-	movew	#0x280,  0xFFFFFa08	/* LXMAX */
-	movew	#0x1df,  0xFFFFFa0a	/* LYMAX */
-	moveb	#0,      0xfffffa29	/* LBAR */
-	moveb	#0,      0xfffffa25	/* LPXCD */
-	moveb	#0x08,   0xFFFFFa20	/* LPICF */
-	moveb	#0x01,   0xFFFFFA21	/* -ve pol */
-	moveb	#0x81,   0xfffffA27	/* LCKCON */
-	movew	#0xff00, 0xfffff412	/* LCD pins */
-#endif
-	moveal  #RAMEND-CONFIG_MEMORY_RESERVE*0x100000 - 0x10, %sp
-	movew	#32767, %d0  /* PLL settle wait loop */
-1:	subq	#1, %d0
-	bne	1b
-
-	/* Copy data segment from ROM to RAM */
-	moveal	#_etext, %a0
-	moveal	#_sdata, %a1
-	moveal	#_edata, %a2
-
-	/* Copy %a0 to %a1 until %a1 == %a2 */
-1:	movel	%a0@+, %a1@+
-	cmpal	%a1, %a2
-	bhi	1b
-
-	moveal	#__bss_start, %a0
-	moveal	#__bss_stop, %a1
-	/* Copy 0 to %a0 until %a0 == %a1 */
-	
-1:
-	clrl	%a0@+
-	cmpal	%a0, %a1
-	bhi	1b
-
-        movel   #_sdata, %d0    
-        movel   %d0, _rambase        
-        movel   #__bss_stop, %d0
-        movel   %d0, _ramstart
-	movel	#RAMEND-CONFIG_MEMORY_RESERVE*0x100000, %d0
-	movel	%d0, _ramend
-	movel	#CONFIG_VECTORBASE,	%d0
-	movel	%d0, _ramvec
-	
-/*
- * load the current task pointer and stack
- */
-	lea	init_thread_union, %a0
-	lea	0x2000(%a0), %sp
-
-1:	jsr	start_kernel
-        bra 1b
-_exit:
-
-	jmp	_exit
-
-
-putc:
-	moveb	%d7,0xfffff907
-1:
-	movew	0xfffff906, %d7
-	andw	#0x2000, %d7
-	beq	1b
-	rts
-
-	.data
-env:
-	.long	0
-	.text
-

diff --git a/arch/m68k/platform/68EZ328/Makefile b/arch/m68k/platform/68EZ328/Makefile
deleted file mode 100644
index b44d799..0000000
--- a/arch/m68k/platform/68EZ328/Makefile
+++ /dev/null

@@ -1,5 +0,0 @@
-#
-# Makefile for arch/m68knommu/platform/68EZ328.
-#
-
-obj-y := config.o

diff --git a/arch/m68k/platform/68VZ328/Makefile b/arch/m68k/platform/68VZ328/Makefile
deleted file mode 100644
index 8166741..0000000
--- a/arch/m68k/platform/68VZ328/Makefile
+++ /dev/null

@@ -1,5 +0,0 @@
-#
-# Makefile for arch/m68k/platform/68VZ328.
-#
-
-obj-y		:= config.o

diff --git a/arch/m68k/platform/coldfire/clk.c b/arch/m68k/platform/coldfire/clk.c
index 9cd13b4..fddfdcc 100644
--- a/arch/m68k/platform/coldfire/clk.c
+++ b/arch/m68k/platform/coldfire/clk.c

@@ -19,38 +19,59 @@
 #include <asm/mcfsim.h>
 #include <asm/mcfclk.h>
 
-/***************************************************************************/
-#ifndef MCFPM_PPMCR0
-struct clk *clk_get(struct device *dev, const char *id)
-{
-	return NULL;
-}
-EXPORT_SYMBOL(clk_get);
-
-int clk_enable(struct clk *clk)
-{
-	return 0;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_disable);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_put);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-	return MCF_CLK;
-}
-EXPORT_SYMBOL(clk_get_rate);
-#else
 static DEFINE_SPINLOCK(clk_lock);
 
+#ifdef MCFPM_PPMCR0
+/*
+ *	For more advanced ColdFire parts that have clocks that can be enabled
+ *	we supply enable/disable functions. These must properly define their
+ *	clocks in their platform specific code.
+ */
+void __clk_init_enabled(struct clk *clk)
+{
+	clk->enabled = 1;
+	clk->clk_ops->enable(clk);
+}
+
+void __clk_init_disabled(struct clk *clk)
+{
+	clk->enabled = 0;
+	clk->clk_ops->disable(clk);
+}
+
+static void __clk_enable0(struct clk *clk)
+{
+	__raw_writeb(clk->slot, MCFPM_PPMCR0);
+}
+
+static void __clk_disable0(struct clk *clk)
+{
+	__raw_writeb(clk->slot, MCFPM_PPMSR0);
+}
+
+struct clk_ops clk_ops0 = {
+	.enable		= __clk_enable0,
+	.disable	= __clk_disable0,
+};
+
+#ifdef MCFPM_PPMCR1
+static void __clk_enable1(struct clk *clk)
+{
+	__raw_writeb(clk->slot, MCFPM_PPMCR1);
+}
+
+static void __clk_disable1(struct clk *clk)
+{
+	__raw_writeb(clk->slot, MCFPM_PPMSR1);
+}
+
+struct clk_ops clk_ops1 = {
+	.enable		= __clk_enable1,
+	.disable	= __clk_disable1,
+};
+#endif /* MCFPM_PPMCR1 */
+#endif /* MCFPM_PPMCR0 */
+
 struct clk *clk_get(struct device *dev, const char *id)
 {
 	const char *clk_name = dev ? dev_name(dev) : id ? id : NULL;
@@ -101,48 +122,3 @@
 EXPORT_SYMBOL(clk_get_rate);
 
 /***************************************************************************/
-
-void __clk_init_enabled(struct clk *clk)
-{
-	clk->enabled = 1;
-	clk->clk_ops->enable(clk);
-}
-
-void __clk_init_disabled(struct clk *clk)
-{
-	clk->enabled = 0;
-	clk->clk_ops->disable(clk);
-}
-
-static void __clk_enable0(struct clk *clk)
-{
-	__raw_writeb(clk->slot, MCFPM_PPMCR0);
-}
-
-static void __clk_disable0(struct clk *clk)
-{
-	__raw_writeb(clk->slot, MCFPM_PPMSR0);
-}
-
-struct clk_ops clk_ops0 = {
-	.enable		= __clk_enable0,
-	.disable	= __clk_disable0,
-};
-
-#ifdef MCFPM_PPMCR1
-static void __clk_enable1(struct clk *clk)
-{
-	__raw_writeb(clk->slot, MCFPM_PPMCR1);
-}
-
-static void __clk_disable1(struct clk *clk)
-{
-	__raw_writeb(clk->slot, MCFPM_PPMSR1);
-}
-
-struct clk_ops clk_ops1 = {
-	.enable		= __clk_enable1,
-	.disable	= __clk_disable1,
-};
-#endif /* MCFPM_PPMCR1 */
-#endif /* MCFPM_PPMCR0 */

diff --git a/arch/m68k/platform/coldfire/intc-5249.c b/arch/m68k/platform/coldfire/intc-5249.c
index 0864b83..b0d1641 100644
--- a/arch/m68k/platform/coldfire/intc-5249.c
+++ b/arch/m68k/platform/coldfire/intc-5249.c

@@ -21,7 +21,7 @@
 {
 	u32 imr;
 	imr = readl(MCFSIM2_GPIOINTENABLE);
-	imr &= ~(0x1 << (d->irq - MCFINTC2_GPIOIRQ0));
+	imr &= ~(0x1 << (d->irq - MCF_IRQ_GPIO0));
 	writel(imr, MCFSIM2_GPIOINTENABLE);
 }
 
@@ -29,13 +29,13 @@
 {
 	u32 imr;
 	imr = readl(MCFSIM2_GPIOINTENABLE);
-	imr |= (0x1 << (d->irq - MCFINTC2_GPIOIRQ0));
+	imr |= (0x1 << (d->irq - MCF_IRQ_GPIO0));
 	writel(imr, MCFSIM2_GPIOINTENABLE);
 }
 
 static void intc2_irq_gpio_ack(struct irq_data *d)
 {
-	writel(0x1 << (d->irq - MCFINTC2_GPIOIRQ0), MCFSIM2_GPIOINTCLEAR);
+	writel(0x1 << (d->irq - MCF_IRQ_GPIO0), MCFSIM2_GPIOINTCLEAR);
 }
 
 static struct irq_chip intc2_irq_gpio_chip = {
@@ -50,7 +50,7 @@
 	int irq;
 
 	/* GPIO interrupt sources */
-	for (irq = MCFINTC2_GPIOIRQ0; (irq <= MCFINTC2_GPIOIRQ7); irq++) {
+	for (irq = MCF_IRQ_GPIO0; (irq <= MCF_IRQ_GPIO7); irq++) {
 		irq_set_chip(irq, &intc2_irq_gpio_chip);
 		irq_set_handler(irq, handle_edge_irq);
 	}

diff --git a/arch/m68k/platform/coldfire/m5206.c b/arch/m68k/platform/coldfire/m5206.c
index 6bfbeeb..0e55f44 100644
--- a/arch/m68k/platform/coldfire/m5206.c
+++ b/arch/m68k/platform/coldfire/m5206.c

@@ -16,6 +16,26 @@
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
+#include <asm/mcfclk.h>
+
+/***************************************************************************/
+
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr0, "mcftmr.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr1, "mcftmr.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcftmr0,
+	&clk_mcftmr1,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	NULL
+};
 
 /***************************************************************************/
 

diff --git a/arch/m68k/platform/coldfire/m523x.c b/arch/m68k/platform/coldfire/m523x.c
index ff37fe9..2b10e9f 100644
--- a/arch/m68k/platform/coldfire/m523x.c
+++ b/arch/m68k/platform/coldfire/m523x.c

@@ -19,6 +19,34 @@
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
+#include <asm/mcfclk.h>
+
+/***************************************************************************/
+
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcfpit0, "mcfpit.0", MCF_CLK);
+DEFINE_CLK(mcfpit1, "mcfpit.1", MCF_CLK);
+DEFINE_CLK(mcfpit2, "mcfpit.2", MCF_CLK);
+DEFINE_CLK(mcfpit3, "mcfpit.3", MCF_CLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart2, "mcfuart.2", MCF_BUSCLK);
+DEFINE_CLK(fec0, "fec.0", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcfpit0,
+	&clk_mcfpit1,
+	&clk_mcfpit2,
+	&clk_mcfpit3,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	&clk_mcfuart2,
+	&clk_fec0,
+	NULL
+};
 
 /***************************************************************************/
 

diff --git a/arch/m68k/platform/coldfire/m5249.c b/arch/m68k/platform/coldfire/m5249.c
index 23b19cb..c80b5e5 100644
--- a/arch/m68k/platform/coldfire/m5249.c
+++ b/arch/m68k/platform/coldfire/m5249.c

@@ -16,6 +16,26 @@
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
+#include <asm/mcfclk.h>
+
+/***************************************************************************/
+
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr0, "mcftmr.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr1, "mcftmr.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcftmr0,
+	&clk_mcftmr1,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	NULL
+};
 
 /***************************************************************************/
 
@@ -28,8 +48,8 @@
 		.flags		= IORESOURCE_MEM,
 	},
 	{
-		.start		= MCFINTC2_GPIOIRQ6,
-		.end		= MCFINTC2_GPIOIRQ6,
+		.start		= MCF_IRQ_GPIO6,
+		.end		= MCF_IRQ_GPIO6,
 		.flags		= IORESOURCE_IRQ,
 	},
 };
@@ -75,8 +95,8 @@
 	gpio = readl(MCFSIM2_GPIOINTENABLE);
 	writel(gpio | 0x40, MCFSIM2_GPIOINTENABLE);
 
-	gpio = readl(MCFSIM2_INTLEVEL5);
-	writel(gpio | 0x04000000, MCFSIM2_INTLEVEL5);
+	gpio = readl(MCFINTC2_INTPRI5);
+	writel(gpio | 0x04000000, MCFINTC2_INTPRI5);
 }
 
 #endif /* CONFIG_M5249C3 */

diff --git a/arch/m68k/platform/coldfire/m525x.c b/arch/m68k/platform/coldfire/m525x.c
index fce8f8a..5b9f657 100644
--- a/arch/m68k/platform/coldfire/m525x.c
+++ b/arch/m68k/platform/coldfire/m525x.c

@@ -16,6 +16,26 @@
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
+#include <asm/mcfclk.h>
+
+/***************************************************************************/
+
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr0, "mcftmr.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr1, "mcftmr.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcftmr0,
+	&clk_mcftmr1,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	NULL
+};
 
 /***************************************************************************/
 

diff --git a/arch/m68k/platform/coldfire/m5272.c b/arch/m68k/platform/coldfire/m5272.c
index 45b246d..a8c5856 100644
--- a/arch/m68k/platform/coldfire/m5272.c
+++ b/arch/m68k/platform/coldfire/m5272.c

@@ -19,6 +19,7 @@
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
 #include <asm/mcfuart.h>
+#include <asm/mcfclk.h>
 
 /***************************************************************************/
 
@@ -30,6 +31,31 @@
 
 /***************************************************************************/
 
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr0, "mcftmr.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr1, "mcftmr.1", MCF_BUSCLK);
+DEFINE_CLK(mcftmr2, "mcftmr.2", MCF_BUSCLK);
+DEFINE_CLK(mcftmr3, "mcftmr.3", MCF_BUSCLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+DEFINE_CLK(fec0, "fec.0", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcftmr0,
+	&clk_mcftmr1,
+	&clk_mcftmr2,
+	&clk_mcftmr3,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	&clk_fec0,
+	NULL
+};
+
+/***************************************************************************/
+
 static void __init m5272_uarts_init(void)
 {
 	u32 v;

diff --git a/arch/m68k/platform/coldfire/m527x.c b/arch/m68k/platform/coldfire/m527x.c
index 1431ba0..6fbfe909 100644
--- a/arch/m68k/platform/coldfire/m527x.c
+++ b/arch/m68k/platform/coldfire/m527x.c

@@ -20,6 +20,36 @@
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
 #include <asm/mcfuart.h>
+#include <asm/mcfclk.h>
+
+/***************************************************************************/
+
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcfpit0, "mcfpit.0", MCF_CLK);
+DEFINE_CLK(mcfpit1, "mcfpit.1", MCF_CLK);
+DEFINE_CLK(mcfpit2, "mcfpit.2", MCF_CLK);
+DEFINE_CLK(mcfpit3, "mcfpit.3", MCF_CLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart2, "mcfuart.2", MCF_BUSCLK);
+DEFINE_CLK(fec0, "fec.0", MCF_BUSCLK);
+DEFINE_CLK(fec1, "fec.1", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcfpit0,
+	&clk_mcfpit1,
+	&clk_mcfpit2,
+	&clk_mcfpit3,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	&clk_mcfuart2,
+	&clk_fec0,
+	&clk_fec1,
+	NULL
+};
 
 /***************************************************************************/
 

diff --git a/arch/m68k/platform/coldfire/m528x.c b/arch/m68k/platform/coldfire/m528x.c
index f9f7e6a..83b7dad 100644
--- a/arch/m68k/platform/coldfire/m528x.c
+++ b/arch/m68k/platform/coldfire/m528x.c

@@ -21,6 +21,34 @@
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
 #include <asm/mcfuart.h>
+#include <asm/mcfclk.h>
+
+/***************************************************************************/
+
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcfpit0, "mcfpit.0", MCF_CLK);
+DEFINE_CLK(mcfpit1, "mcfpit.1", MCF_CLK);
+DEFINE_CLK(mcfpit2, "mcfpit.2", MCF_CLK);
+DEFINE_CLK(mcfpit3, "mcfpit.3", MCF_CLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart2, "mcfuart.2", MCF_BUSCLK);
+DEFINE_CLK(fec0, "fec.0", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcfpit0,
+	&clk_mcfpit1,
+	&clk_mcfpit2,
+	&clk_mcfpit3,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	&clk_mcfuart2,
+	&clk_fec0,
+	NULL
+};
 
 /***************************************************************************/
 

diff --git a/arch/m68k/platform/coldfire/m5307.c b/arch/m68k/platform/coldfire/m5307.c
index a568d28..8874353 100644
--- a/arch/m68k/platform/coldfire/m5307.c
+++ b/arch/m68k/platform/coldfire/m5307.c

@@ -17,6 +17,7 @@
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
 #include <asm/mcfwdebug.h>
+#include <asm/mcfclk.h>
 
 /***************************************************************************/
 
@@ -28,6 +29,25 @@
 
 /***************************************************************************/
 
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr0, "mcftmr.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr1, "mcftmr.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcftmr0,
+	&clk_mcftmr1,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	NULL
+};
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 #if defined(CONFIG_NETtel) || \

diff --git a/arch/m68k/platform/coldfire/m5407.c b/arch/m68k/platform/coldfire/m5407.c
index bb6c746..2fb3cdb 100644
--- a/arch/m68k/platform/coldfire/m5407.c
+++ b/arch/m68k/platform/coldfire/m5407.c

@@ -16,6 +16,26 @@
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
+#include <asm/mcfclk.h>
+
+/***************************************************************************/
+
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr0, "mcftmr.0", MCF_BUSCLK);
+DEFINE_CLK(mcftmr1, "mcftmr.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcftmr0,
+	&clk_mcftmr1,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	NULL
+};
 
 /***************************************************************************/
 

diff --git a/arch/m68k/platform/coldfire/m54xx.c b/arch/m68k/platform/coldfire/m54xx.c
index b587bf3..952da53 100644
--- a/arch/m68k/platform/coldfire/m54xx.c
+++ b/arch/m68k/platform/coldfire/m54xx.c

@@ -14,19 +14,45 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/clk.h>
 #include <linux/bootmem.h>
 #include <asm/pgalloc.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
 #include <asm/m54xxsim.h>
 #include <asm/mcfuart.h>
+#include <asm/mcfclk.h>
 #include <asm/m54xxgpt.h>
+#include <asm/mcfclk.h>
 #ifdef CONFIG_MMU
 #include <asm/mmu_context.h>
 #endif
 
 /***************************************************************************/
 
+DEFINE_CLK(pll, "pll.0", MCF_CLK);
+DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
+DEFINE_CLK(mcfslt0, "mcfslt.0", MCF_BUSCLK);
+DEFINE_CLK(mcfslt1, "mcfslt.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart0, "mcfuart.0", MCF_BUSCLK);
+DEFINE_CLK(mcfuart1, "mcfuart.1", MCF_BUSCLK);
+DEFINE_CLK(mcfuart2, "mcfuart.2", MCF_BUSCLK);
+DEFINE_CLK(mcfuart3, "mcfuart.3", MCF_BUSCLK);
+
+struct clk *mcf_clks[] = {
+	&clk_pll,
+	&clk_sys,
+	&clk_mcfslt0,
+	&clk_mcfslt1,
+	&clk_mcfuart0,
+	&clk_mcfuart1,
+	&clk_mcfuart2,
+	&clk_mcfuart3,
+	NULL
+};
+
+/***************************************************************************/
+
 static void __init m54xx_uarts_init(void)
 {
 	/* enable io pins */

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 4bcf891..ba3b7c8 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig

@@ -26,8 +26,6 @@
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 config SWAP

diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
index 01d2282..46460f1 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h

@@ -114,6 +114,8 @@
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	debug_dma_mapping_error(dev, dma_addr);
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 

diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 3732bcf..5b18ec1 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h

@@ -16,6 +16,7 @@
 
 #define instruction_pointer(regs)	((regs)->pc)
 #define profile_pc(regs)		instruction_pointer(regs)
+#define user_stack_pointer(regs)	((regs)->r1)
 
 static inline long regs_return_value(struct pt_regs *regs)
 {

diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 99e2393..a5f06ac 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h

@@ -35,7 +35,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
 #ifdef CONFIG_MMU

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d971d15..b7dc39c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig

@@ -41,8 +41,6 @@
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA if 64BIT
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 menu "Machine selection"
 

diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index be39a12..006b43e 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h

@@ -40,6 +40,8 @@
 static inline int dma_mapping_error(struct device *dev, u64 mask)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	debug_dma_mapping_error(dev, mask);
 	return ops->mapping_error(dev, mask);
 }
 

diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index cec5e12..a3186f2 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h

@@ -49,6 +49,7 @@
 
 #define instruction_pointer(regs) ((regs)->cp0_epc)
 #define profile_pc(regs) instruction_pointer(regs)
+#define user_stack_pointer(r) ((r)->regs[29])
 
 extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);

diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index b306e20..9e47cc1 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h

@@ -20,7 +20,6 @@
 #define __ARCH_OMIT_COMPAT_SYS_GETDENTS64
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE

diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h
index 3f1237c..770732c 100644
--- a/arch/mips/include/uapi/asm/signal.h
+++ b/arch/mips/include/uapi/asm/signal.h

@@ -86,12 +86,6 @@
 
 #define SA_RESTORER	0x04000000	/* Only for o32 */
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK     1
-#define SS_DISABLE     2
-
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
 

diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 7247174..aa03f2e 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig

@@ -8,8 +8,6 @@
 	select HAVE_ARCH_KGDB
 	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config AM33_2

diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index cabf8ba..e6d2ed4 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h

@@ -43,7 +43,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/mn10300/include/uapi/asm/signal.h b/arch/mn10300/include/uapi/asm/signal.h
index 08dcd6a..f423a08 100644
--- a/arch/mn10300/include/uapi/asm/signal.h
+++ b/arch/mn10300/include/uapi/asm/signal.h

@@ -92,12 +92,6 @@
 
 #define SA_RESTORER	0x04000000
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index ec37e18..0ac66f6 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig

@@ -22,8 +22,6 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config MMU
 	def_bool y

diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 8971026..f20d01d 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild

@@ -32,6 +32,7 @@
 generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mman.h
 generic-y += module.h

diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h
index 07f5299..7c69139 100644
--- a/arch/openrisc/include/asm/io.h
+++ b/arch/openrisc/include/asm/io.h

@@ -30,6 +30,7 @@
 #define PIO_MASK		0
 
 #include <asm-generic/io.h>
+#include <asm/pgtable.h>
 
 extern void __iomem *__ioremap(phys_addr_t offset, unsigned long size,
 				pgprot_t prot);

diff --git a/arch/openrisc/include/uapi/asm/kvm_para.h b/arch/openrisc/include/uapi/asm/kvm_para.h
deleted file mode 100644
index 14fab8f..0000000
--- a/arch/openrisc/include/uapi/asm/kvm_para.h
+++ /dev/null

@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>

diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 5082b80..ce40b71 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h

@@ -20,7 +20,6 @@
 
 #define sys_mmap2 sys_mmap_pgoff
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
 

diff --git a/arch/openrisc/kernel/asm-offsets.c b/arch/openrisc/kernel/asm-offsets.c
index 1a242a0..ddb7368 100644
--- a/arch/openrisc/kernel/asm-offsets.c
+++ b/arch/openrisc/kernel/asm-offsets.c

@@ -34,15 +34,11 @@
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/thread_info.h>
+#include <linux/kbuild.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 
-#define DEFINE(sym, val) \
-		asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
 int main(void)
 {
 	/* offsets into the task_struct */

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index e688a2b..b77feff 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig

@@ -22,8 +22,6 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 	help

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 1efef41..3043194 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h

@@ -163,7 +163,6 @@
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h
index b1ddaa2..a2fa2971 100644
--- a/arch/parisc/include/uapi/asm/signal.h
+++ b/arch/parisc/include/uapi/asm/signal.h

@@ -71,12 +71,6 @@
 
 #define SA_RESTORER	0x04000000 /* obsolete -- ignored */
 
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 5e34ccf..2a625fb 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c

@@ -214,8 +214,6 @@
 
 void *module_alloc(unsigned long size)
 {
-	if (size == 0)
-		return NULL;
 	/* using RWX means less protection for modules, but it's
 	 * easier than trying to map the text, data, init_text and
 	 * init_data correctly */

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951a517..17903f1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig

@@ -141,10 +141,8 @@
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
-	select GENERIC_KERNEL_THREAD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 config EARLY_PRINTK

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 159e94f..b639852 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile

@@ -181,7 +181,7 @@
 bootwrapper_install:
 	$(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
-%.dtb:
+%.dtb: scripts
 	$(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
 define archhelp

diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
new file mode 100644
index 0000000..877a28c
--- /dev/null
+++ b/arch/powerpc/boot/dts/a3m071.dts

@@ -0,0 +1,144 @@
+/*
+ * a3m071 board Device Tree Source
+ *
+ * Copyright 2012 Stefan Roese <sr@denx.de>
+ *
+ * Copyright (C) 2011 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "mpc5200b.dtsi"
+
+/ {
+	model = "anonymous,a3m071";
+	compatible = "anonymous,a3m071";
+
+	soc5200@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc5200b-immr";
+		ranges = <0 0xf0000000 0x0000c000>;
+		reg = <0xf0000000 0x00000100>;
+		bus-frequency = <0>; /* From boot loader */
+		system-frequency = <0>; /* From boot loader */
+
+		timer@600 {
+			fsl,has-wdt;
+		};
+
+		spi@f00 {
+			status = "disabled";
+		};
+
+		usb: usb@1000 {
+			status = "disabled";
+		};
+
+		psc@2000 {
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2000 0x100>;
+			interrupts = <2 1 0>;
+		};
+
+		psc@2200 {
+			status = "disabled";
+		};
+
+		psc@2400 {
+			status = "disabled";
+		};
+
+		psc@2600 {
+			status = "disabled";
+		};
+
+		psc@2800 {
+			status = "disabled";
+		};
+
+		psc@2c00 {		// PSC6
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2c00 0x100>;
+			interrupts = <2 4 0>;
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@3 {
+				reg = <0x03>;
+			};
+		};
+
+		ata@3a00 {
+			status = "disabled";
+		};
+
+		i2c@3d00 {
+			status = "disabled";
+		};
+
+		i2c@3d40 {
+			status = "disabled";
+		};
+	};
+
+	localbus {
+		compatible = "fsl,mpc5200b-lpb","simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0xfc000000 0x02000000
+			  3 0 0xe9000000 0x00080000
+			  5 0 0xe8000000 0x00010000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0 0x0 0x02000000>;
+			compatible = "cfi-flash";
+			bank-width = <2>;
+			partition@0x0 {
+				label = "u-boot";
+				reg = <0x00000000 0x00040000>;
+				read-only;
+			};
+			partition@0x00040000 {
+				label = "env";
+				reg = <0x00040000 0x00020000>;
+			};
+			partition@0x00060000 {
+				label = "dtb";
+				reg = <0x00060000 0x00020000>;
+			};
+			partition@0x00080000 {
+				label = "kernel";
+				reg = <0x00080000 0x00500000>;
+			};
+			partition@0x00580000 {
+				label = "root";
+				reg = <0x00580000 0x00A80000>;
+			};
+		};
+
+		fpga@3,0 {
+			compatible = "anonymous,a3m071-fpga";
+			reg = <3 0x0 0x00080000
+			       5 0x0 0x00010000>;
+			interrupts = <0 0 3>;  /* level low */
+		};
+	};
+
+	pci@f0000d00 {
+		status = "disabled";
+	};
+};

diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
index 64b6abe..5d7205b 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi

@@ -354,4 +354,5 @@
 /include/ "qoriq-sata2-0.dtsi"
 /include/ "qoriq-sata2-1.dtsi"
 /include/ "qoriq-sec4.2-0.dtsi"
+/include/ "qoriq-raid1.0-0.dtsi"
 };

diff --git a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
index 0a198b0..8df47fc 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi

@@ -73,6 +73,12 @@
 		rtic_c = &rtic_c;
 		rtic_d = &rtic_d;
 		sec_mon = &sec_mon;
+
+		raideng = &raideng;
+		raideng_jr0 = &raideng_jr0;
+		raideng_jr1 = &raideng_jr1;
+		raideng_jr2 = &raideng_jr2;
+		raideng_jr3 = &raideng_jr3;
 	};
 
 	cpus {

diff --git a/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi
new file mode 100644
index 0000000..8d2e8aa
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi

@@ -0,0 +1,85 @@
+/*
+ * QorIQ RAID 1.0 device tree stub [ controller @ offset 0x320000 ]
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+raideng: raideng@320000 {
+	compatible = "fsl,raideng-v1.0";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg = <0x320000 0x10000>;
+	ranges = <0 0x320000 0x10000>;
+
+	raideng_jq0@1000 {
+		compatible = "fsl,raideng-v1.0-job-queue";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x1000 0x1000>;
+		ranges = <0x0 0x1000 0x1000>;
+
+		raideng_jr0: jr@0 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+			reg = <0x0 0x400>;
+			interrupts = <139 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		raideng_jr1: jr@400 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-lp-ring";
+			reg = <0x400 0x400>;
+			interrupts = <140 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	raideng_jq1@2000 {
+		compatible = "fsl,raideng-v1.0-job-queue";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x2000 0x1000>;
+		ranges = <0x0 0x2000 0x1000>;
+
+		raideng_jr2: jr@0 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+			reg = <0x0 0x400>;
+			interrupts = <141 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		raideng_jr3: jr@400 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-lp-ring";
+			reg = <0x400 0x400>;
+			interrupts = <142 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+};

diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 1f710a3..5b8e1e5 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig

@@ -2,7 +2,7 @@
 CONFIG_ALTIVEC=y
 CONFIG_VSX=y
 CONFIG_SMP=y
-CONFIG_NR_CPUS=1024
+CONFIG_NR_CPUS=2048
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y

diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index dc2cf9c..ef918a2 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h

@@ -52,8 +52,6 @@
 #define smp_mb__before_clear_bit()	smp_mb()
 #define smp_mb__after_clear_bit()	smp_mb()
 
-#define BITOP_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
 #define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
 
 /* Macro for generating the ***_bits() functions */
@@ -83,22 +81,22 @@
 
 static __inline__ void set_bit(int nr, volatile unsigned long *addr)
 {
-	set_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+	set_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
 
 static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
 {
-	clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+	clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
 
 static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
 {
-	clear_bits_unlock(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+	clear_bits_unlock(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
 
 static __inline__ void change_bit(int nr, volatile unsigned long *addr)
 {
-	change_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+	change_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
 }
 
 /* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output
@@ -136,26 +134,26 @@
 static __inline__ int test_and_set_bit(unsigned long nr,
 				       volatile unsigned long *addr)
 {
-	return test_and_set_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+	return test_and_set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
 static __inline__ int test_and_set_bit_lock(unsigned long nr,
 				       volatile unsigned long *addr)
 {
-	return test_and_set_bits_lock(BITOP_MASK(nr),
-				addr + BITOP_WORD(nr)) != 0;
+	return test_and_set_bits_lock(BIT_MASK(nr),
+				addr + BIT_WORD(nr)) != 0;
 }
 
 static __inline__ int test_and_clear_bit(unsigned long nr,
 					 volatile unsigned long *addr)
 {
-	return test_and_clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+	return test_and_clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
 static __inline__ int test_and_change_bit(unsigned long nr,
 					  volatile unsigned long *addr)
 {
-	return test_and_change_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+	return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
 #include <asm-generic/bitops/non-atomic.h>
@@ -280,61 +278,8 @@
 #include <asm-generic/bitops/find.h>
 
 /* Little-endian versions */
+#include <asm-generic/bitops/le.h>
 
-static __inline__ int test_bit_le(unsigned long nr,
-				  __const__ void *addr)
-{
-	__const__ unsigned char	*tmp = (__const__ unsigned char *) addr;
-	return (tmp[nr >> 3] >> (nr & 7)) & 1;
-}
-
-static inline void set_bit_le(int nr, void *addr)
-{
-	set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline void clear_bit_le(int nr, void *addr)
-{
-	clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline void __set_bit_le(int nr, void *addr)
-{
-	__set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline void __clear_bit_le(int nr, void *addr)
-{
-	__clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int test_and_set_bit_le(int nr, void *addr)
-{
-	return test_and_set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int test_and_clear_bit_le(int nr, void *addr)
-{
-	return test_and_clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int __test_and_set_bit_le(int nr, void *addr)
-{
-	return __test_and_set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int __test_and_clear_bit_le(int nr, void *addr)
-{
-	return __test_and_clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-#define find_first_zero_bit_le(addr, size) \
-	find_next_zero_bit_le((addr), (size), 0)
-unsigned long find_next_zero_bit_le(const void *addr,
-				    unsigned long size, unsigned long offset);
-
-unsigned long find_next_bit_le(const void *addr,
-				    unsigned long size, unsigned long offset);
 /* Bitmap functions for the ext2 filesystem */
 
 #include <asm-generic/bitops/ext2-atomic-setbit.h>

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 21a0687..76f81bd 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h

@@ -401,6 +401,14 @@
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
+#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -421,8 +429,8 @@
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
-	    CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T |		\
-	    CPU_FTR_VSX)
+	    CPU_FTRS_POWER7 | CPU_FTRS_POWER8 | CPU_FTRS_CELL |		\
+	    CPU_FTRS_PA6T | CPU_FTR_VSX)
 #endif
 #else
 enum {

diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 154c067..607e4ee 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h

@@ -1,5 +1,5 @@
 /*
- * Copyright 2009 Freescale Semicondutor, Inc.
+ * Copyright 2009 Freescale Semiconductor, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License

diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 7816087..e27e9ad 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h

@@ -172,6 +172,7 @@
 {
 	struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
+	debug_dma_mapping_error(dev, dma_addr);
 	if (dma_ops->mapping_error)
 		return dma_ops->mapping_error(dev, dma_addr);
 

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index a43c147..ad708dd 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h

@@ -48,6 +48,35 @@
 #define EX_LR		72
 #define EX_CFAR		80
 
+#ifdef CONFIG_RELOCATABLE
+#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)			\
+	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
+	LOAD_HANDLER(r12,label);					\
+	mtlr	r12;							\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	li	r10,MSR_RI;						\
+	mtmsrd 	r10,1;			/* Set RI (EE=0) */		\
+	blr;
+#else
+/* If not relocatable, we can jump directly -- and save messing with LR */
+#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)			\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	li	r10,MSR_RI;						\
+	mtmsrd 	r10,1;			/* Set RI (EE=0) */		\
+	b	label;
+#endif
+
+/*
+ * As EXCEPTION_PROLOG_PSERIES(), except we've already got relocation on
+ * so no need to rfid.  Save lr in case we're CONFIG_RELOCATABLE, in which
+ * case EXCEPTION_RELON_PROLOG_PSERIES_1 will be using lr.
+ */
+#define EXCEPTION_RELON_PROLOG_PSERIES(area, label, h, extra, vec)	\
+	EXCEPTION_PROLOG_1(area, extra, vec);				\
+	EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
+
 /*
  * We're short on space and time in the exception prolog, so we can't
  * use the normal SET_REG_IMMEDIATE macro. Normally we just need the
@@ -55,12 +84,29 @@
  * word.
  */
 #define LOAD_HANDLER(reg, label)					\
-	addi	reg,reg,(label)-_stext;	/* virt addr of handler ... */
+	/* Handlers must be within 64K of kbase, which must be 64k aligned */ \
+	ori	reg,reg,(label)-_stext;	/* virt addr of handler ... */
 
 /* Exception register prefixes */
 #define EXC_HV	H
 #define EXC_STD
 
+#if defined(CONFIG_RELOCATABLE)
+/*
+ * If we support interrupts with relocation on AND we're a relocatable
+ * kernel, we need to use LR to get to the 2nd level handler.  So, save/restore
+ * it when required.
+ */
+#define SAVE_LR(reg, area)	mflr	reg ; 	std	reg,area+EX_LR(r13)
+#define GET_LR(reg, area) 			ld	reg,area+EX_LR(r13)
+#define RESTORE_LR(reg, area)	ld	reg,area+EX_LR(r13) ; mtlr reg
+#else
+/* ...else LR is unused and in register. */
+#define SAVE_LR(reg, area)
+#define GET_LR(reg, area) 	mflr	reg
+#define RESTORE_LR(reg, area)
+#endif
+
 #define __EXCEPTION_PROLOG_1(area, extra, vec)				\
 	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
@@ -69,6 +115,7 @@
 	mfspr	r10,SPRN_CFAR;						\
 	std	r10,area+EX_CFAR(r13);					\
 	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		\
+	SAVE_LR(r10, area);						\
 	mfcr	r9;							\
 	extra(vec);							\
 	std	r11,area+EX_R11(r13);					\
@@ -169,6 +216,7 @@
 	sth	r1,PACA_TRAP_SAVE(r13);					   \
 	std	r3,area+EX_R3(r13);					   \
 	addi	r3,r13,area;		/* r3 -> where regs are saved*/	   \
+	RESTORE_LR(r1, area);						   \
 	b	bad_stack;						   \
 3:	std	r9,_CCR(r1);		/* save CR in stackframe	*/ \
 	std	r11,_NIP(r1);		/* save SRR0 in stackframe	*/ \
@@ -194,8 +242,8 @@
 	ld	r10,area+EX_CFAR(r13);					   \
 	std	r10,ORIG_GPR3(r1);					   \
 	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		   \
+	GET_LR(r9,area);		/* Get LR, later save to stack	*/ \
 	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
-	mflr	r9;			/* save LR in stackframe	*/ \
 	std	r9,_LINK(r1);						   \
 	mfctr	r10;			/* save CTR in stackframe	*/ \
 	std	r10,_CTR(r1);						   \
@@ -232,6 +280,26 @@
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,	\
 				 EXC_HV, KVMTEST, vec)
 
+#define STD_RELON_EXCEPTION_PSERIES(loc, vec, label)	\
+	. = loc;					\
+	.globl label##_relon_pSeries;			\
+label##_relon_pSeries:					\
+	HMT_MEDIUM;					\
+	/* No guest interrupts come through here */	\
+	SET_SCRATCH0(r13);		/* save r13 */	\
+	EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
+				       EXC_STD, KVMTEST_PR, vec)
+
+#define STD_RELON_EXCEPTION_HV(loc, vec, label)		\
+	. = loc;					\
+	.globl label##_relon_hv;			\
+label##_relon_hv:					\
+	HMT_MEDIUM;					\
+	/* No guest interrupts come through here */	\
+	SET_SCRATCH0(r13);	/* save r13 */		\
+	EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
+				       EXC_HV, KVMTEST, vec)
+
 /* This associate vector numbers with bits in paca->irq_happened */
 #define SOFTEN_VALUE_0x500	PACA_IRQ_EE
 #define SOFTEN_VALUE_0x502	PACA_IRQ_EE
@@ -257,6 +325,9 @@
 	KVMTEST(vec);							\
 	_SOFTEN_TEST(EXC_STD, vec)
 
+#define SOFTEN_NOTEST_PR(vec)		_SOFTEN_TEST(EXC_STD, vec)
+#define SOFTEN_NOTEST_HV(vec)		_SOFTEN_TEST(EXC_HV, vec)
+
 #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)		\
 	HMT_MEDIUM;							\
 	SET_SCRATCH0(r13);    /* save r13 */				\
@@ -279,6 +350,28 @@
 	_MASKABLE_EXCEPTION_PSERIES(vec, label,				\
 				    EXC_HV, SOFTEN_TEST_HV)
 
+#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)	\
+	HMT_MEDIUM;							\
+	SET_SCRATCH0(r13);    /* save r13 */				\
+	__EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec);		\
+	EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, h);
+#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)	\
+	__MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
+
+#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label)		\
+	. = loc;							\
+	.globl label##_relon_pSeries;					\
+label##_relon_pSeries:							\
+	_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label,			\
+					  EXC_STD, SOFTEN_NOTEST_PR)
+
+#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label)			\
+	. = loc;							\
+	.globl label##_relon_hv;					\
+label##_relon_hv:							\
+	_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label,			\
+					  EXC_HV, SOFTEN_NOTEST_HV)
+
 /*
  * Our exception common code can be passed various "additions"
  * to specify the behaviour of interrupts, whether to kick the

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index ad0b751..973cc3b 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h

@@ -49,6 +49,7 @@
 #define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
 #define FW_FEATURE_OPAL		ASM_CONST(0x0000000010000000)
 #define FW_FEATURE_OPALv2	ASM_CONST(0x0000000020000000)
+#define FW_FEATURE_SET_MODE	ASM_CONST(0x0000000040000000)
 
 #ifndef __ASSEMBLY__
 
@@ -62,7 +63,8 @@
 		FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
 		FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
 		FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
-		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO,
+		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
+		FW_FEATURE_SET_MODE,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2,
 	FW_FEATURE_POWERNV_ALWAYS = 0,

diff --git a/arch/powerpc/include/asm/fsl_gtm.h b/arch/powerpc/include/asm/fsl_gtm.h
index 8e8c9b5..3b05808 100644
--- a/arch/powerpc/include/asm/fsl_gtm.h
+++ b/arch/powerpc/include/asm/fsl_gtm.h

@@ -1,7 +1,7 @@
 /*
  * Freescale General-purpose Timers Module
  *
- * Copyright (c) Freescale Semicondutor, Inc. 2006.
+ * Copyright 2006 Freescale Semiconductor, Inc.
  *               Shlomi Gridish <gridish@freescale.com>
  *               Jerry Huang <Chang-Ming.Huang@freescale.com>
  * Copyright (c) MontaVista Software, Inc. 2008.

diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h
index dd5ba2c..77ced0b 100644
--- a/arch/powerpc/include/asm/fsl_guts.h
+++ b/arch/powerpc/include/asm/fsl_guts.h

@@ -71,7 +71,9 @@
 	u8	res0c4[0x224 - 0xc4];
 	__be32  iodelay1;	/* 0x.0224 - IO delay control register 1 */
 	__be32  iodelay2;	/* 0x.0228 - IO delay control register 2 */
-	u8	res22c[0x800 - 0x22c];
+	u8	res22c[0x604 - 0x22c];
+	__be32	pamubypenr; 	/* 0x.604 - PAMU bypass enable register */
+	u8	res608[0x800 - 0x608];
 	__be32	clkdvdr;	/* 0x.0800 - Clock Divide Register */
 	u8	res804[0x900 - 0x804];
 	__be32	ircr;		/* 0x.0900 - Infrared Control Register */

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 7a86706..0975e5c 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h

@@ -267,7 +267,8 @@
 #define H_RANDOM		0x300
 #define H_COP			0x304
 #define H_GET_MPP_X		0x314
-#define MAX_HCALL_OPCODE	H_GET_MPP_X
+#define H_SET_MODE		0x31C
+#define MAX_HCALL_OPCODE	H_SET_MODE
 
 #ifndef __ASSEMBLY__
 
@@ -355,6 +356,26 @@
 
 int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data);
 
+static inline unsigned int get_longbusy_msecs(int longbusy_rc)
+{
+	switch (longbusy_rc) {
+	case H_LONG_BUSY_ORDER_1_MSEC:
+		return 1;
+	case H_LONG_BUSY_ORDER_10_MSEC:
+		return 10;
+	case H_LONG_BUSY_ORDER_100_MSEC:
+		return 100;
+	case H_LONG_BUSY_ORDER_1_SEC:
+		return 1000;
+	case H_LONG_BUSY_ORDER_10_SEC:
+		return 10000;
+	case H_LONG_BUSY_ORDER_100_SEC:
+		return 100000;
+	default:
+		return 1;
+	}
+}
+
 #ifdef CONFIG_PPC_PSERIES
 extern int CMO_PrPSP;
 extern int CMO_SecPSP;

diff --git a/arch/powerpc/include/asm/immap_qe.h b/arch/powerpc/include/asm/immap_qe.h
index 61e8490..bedbff8 100644
--- a/arch/powerpc/include/asm/immap_qe.h
+++ b/arch/powerpc/include/asm/immap_qe.h

@@ -3,7 +3,7 @@
  * The Internal Memory Map for devices with QE on them. This
  * is the superset of all QE devices (8360, etc.).
 
- * Copyright (C) 2006. Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006. Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c423197..19d9d96 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h

@@ -166,9 +166,6 @@
 						unsigned long size,
 						pgprot_t vma_prot);
 
-	/* Idle loop for this platform, leave empty for default idle loop */
-	void		(*idle_loop)(void);
-
 	/*
 	 * Function for waiting for work with reduced power in idle loop;
 	 * called with interrupts disabled.
@@ -320,28 +317,28 @@
 		ppc_md.log_error(buf, err_type, fatal);
 }
 
-#define __define_machine_initcall(mach,level,fn,id) \
+#define __define_machine_initcall(mach, fn, id) \
 	static int __init __machine_initcall_##mach##_##fn(void) { \
 		if (machine_is(mach)) return fn(); \
 		return 0; \
 	} \
-	__define_initcall(level,__machine_initcall_##mach##_##fn,id);
+	__define_initcall(__machine_initcall_##mach##_##fn, id);
 
-#define machine_core_initcall(mach,fn)		__define_machine_initcall(mach,"1",fn,1)
-#define machine_core_initcall_sync(mach,fn)	__define_machine_initcall(mach,"1s",fn,1s)
-#define machine_postcore_initcall(mach,fn)	__define_machine_initcall(mach,"2",fn,2)
-#define machine_postcore_initcall_sync(mach,fn)	__define_machine_initcall(mach,"2s",fn,2s)
-#define machine_arch_initcall(mach,fn)		__define_machine_initcall(mach,"3",fn,3)
-#define machine_arch_initcall_sync(mach,fn)	__define_machine_initcall(mach,"3s",fn,3s)
-#define machine_subsys_initcall(mach,fn)	__define_machine_initcall(mach,"4",fn,4)
-#define machine_subsys_initcall_sync(mach,fn)	__define_machine_initcall(mach,"4s",fn,4s)
-#define machine_fs_initcall(mach,fn)		__define_machine_initcall(mach,"5",fn,5)
-#define machine_fs_initcall_sync(mach,fn)	__define_machine_initcall(mach,"5s",fn,5s)
-#define machine_rootfs_initcall(mach,fn)	__define_machine_initcall(mach,"rootfs",fn,rootfs)
-#define machine_device_initcall(mach,fn)	__define_machine_initcall(mach,"6",fn,6)
-#define machine_device_initcall_sync(mach,fn)	__define_machine_initcall(mach,"6s",fn,6s)
-#define machine_late_initcall(mach,fn)		__define_machine_initcall(mach,"7",fn,7)
-#define machine_late_initcall_sync(mach,fn)	__define_machine_initcall(mach,"7s",fn,7s)
+#define machine_core_initcall(mach, fn)		__define_machine_initcall(mach, fn, 1)
+#define machine_core_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 1s)
+#define machine_postcore_initcall(mach, fn)	__define_machine_initcall(mach, fn, 2)
+#define machine_postcore_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 2s)
+#define machine_arch_initcall(mach, fn)		__define_machine_initcall(mach, fn, 3)
+#define machine_arch_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 3s)
+#define machine_subsys_initcall(mach, fn)	__define_machine_initcall(mach, fn, 4)
+#define machine_subsys_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 4s)
+#define machine_fs_initcall(mach, fn)		__define_machine_initcall(mach, fn, 5)
+#define machine_fs_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 5s)
+#define machine_rootfs_initcall(mach, fn)	__define_machine_initcall(mach, fn, rootfs)
+#define machine_device_initcall(mach, fn)	__define_machine_initcall(mach, fn, 6)
+#define machine_device_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 6s)
+#define machine_late_initcall(mach, fn)		__define_machine_initcall(mach, fn, 7)
+#define machine_late_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 7s)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MACHDEP_H */

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 5e38eed..691fd8a 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h

@@ -101,6 +101,7 @@
 #define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
 #define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
 #define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER8		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
 #define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
 				MMU_FTR_CI_LARGE_PAGE
 #define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \

diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
deleted file mode 100644
index c07edfe..0000000
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ /dev/null

@@ -1,47 +0,0 @@
-#ifndef _PPC64_PSERIES_RECONFIG_H
-#define _PPC64_PSERIES_RECONFIG_H
-#ifdef __KERNEL__
-
-#include <linux/notifier.h>
-
-/*
- * Use this API if your code needs to know about OF device nodes being
- * added or removed on pSeries systems.
- */
-
-#define PSERIES_RECONFIG_ADD		0x0001
-#define PSERIES_RECONFIG_REMOVE		0x0002
-#define PSERIES_DRCONF_MEM_ADD		0x0003
-#define PSERIES_DRCONF_MEM_REMOVE	0x0004
-#define PSERIES_UPDATE_PROPERTY		0x0005
-
-/**
- * pSeries_reconfig_notify - Notifier value structure for OFDT property updates
- *
- * @node: Device tree node which owns the property being updated
- * @property: Updated property
- */
-struct pSeries_reconfig_prop_update {
-	struct device_node *node;
-	struct property *property;
-};
-
-#ifdef CONFIG_PPC_PSERIES
-extern int pSeries_reconfig_notifier_register(struct notifier_block *);
-extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
-extern int pSeries_reconfig_notify(unsigned long action, void *p);
-/* Not the best place to put this, will be fixed when we move some
- * of the rtas suspend-me stuff to pseries */
-extern void pSeries_coalesce_init(void);
-#else /* !CONFIG_PPC_PSERIES */
-static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
-static inline void pSeries_coalesce_init(void) { }
-#endif /* CONFIG_PPC_PSERIES */
-
-
-#endif /* __KERNEL__ */
-#endif /* _PPC64_PSERIES_RECONFIG_H */

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 42b1f43..51fb00a 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h

@@ -1,5 +1,5 @@
 /*
- * Copyright 2009 Freescale Semicondutor, Inc.
+ * Copyright 2009 Freescale Semiconductor, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -86,6 +86,7 @@
 #define PPC_INST_DCBA_MASK		0xfc0007fe
 #define PPC_INST_DCBAL			0x7c2005ec
 #define PPC_INST_DCBZL			0x7c2007ec
+#define PPC_INST_ICBT			0x7c00002c
 #define PPC_INST_ISEL			0x7c00001e
 #define PPC_INST_ISEL_MASK		0xfc00003e
 #define PPC_INST_LDARX			0x7c0000a8
@@ -201,6 +202,7 @@
 #define __PPC_MB(s)	(((s) & 0x1f) << 6)
 #define __PPC_ME(s)	(((s) & 0x1f) << 1)
 #define __PPC_BI(s)	(((s) & 0x1f) << 16)
+#define __PPC_CT(t)	(((t) & 0x0f) << 21)
 
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
@@ -263,6 +265,8 @@
 					__PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
 #define PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long PPC_INST_SLBFEE | \
 					__PPC_RT(t) | __PPC_RB(b))
+#define PPC_ICBT(c,a,b)		stringify_in_c(.long PPC_INST_ICBT | \
+				       __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
 /* PASemi instructions */
 #define LBZCIX(t,a,b)		stringify_in_c(.long PPC_INST_LBZCIX | \
 				       __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b))

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index b5c9190..99c92d5 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h

@@ -58,6 +58,22 @@
 
 extern void of_instantiate_rtc(void);
 
+/* The of_drconf_cell struct defines the layout of the LMB array
+ * specified in the device tree property
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
+ */
+struct of_drconf_cell {
+	u64	base_addr;
+	u32	drc_index;
+	u32	reserved;
+	u32	aa_index;
+	u32	flags;
+};
+
+#define DRCONF_MEM_ASSIGNED	0x00000008
+#define DRCONF_MEM_AI_INVALID	0x00000040
+#define DRCONF_MEM_RESERVED	0x00000080
+
 /* These includes are put at the bottom because they may contain things
  * that are overridden by this file.  Ideally they shouldn't be included
  * by this file, but there are a bunch of .c files that currently depend

diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 229571a..32b9bfa 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h
index f706164..25784cc 100644
--- a/arch/powerpc/include/asm/qe_ic.h
+++ b/arch/powerpc/include/asm/qe_ic.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 97d3727..3d5c9dc 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h

@@ -249,6 +249,8 @@
 #define   LPCR_RMLS    0x1C000000      /* impl dependent rmo limit sel */
 #define	  LPCR_RMLS_SH	(63-37)
 #define   LPCR_ILE     0x02000000      /* !HV irqs set MSR:LE */
+#define   LPCR_AIL_0	0x00000000	/* MMU off exception offset 0x0 */
+#define   LPCR_AIL_3	0x01800000	/* MMU on exception offset 0xc00...4xxx */
 #define   LPCR_PECE	0x00007000	/* powersave exit cause enable */
 #define     LPCR_PECE0	0x00004000	/* ext. exceptions can cause exit */
 #define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
@@ -1030,6 +1032,7 @@
 #define PVR_970MP	0x0044
 #define PVR_970GX	0x0045
 #define PVR_POWER7p	0x004A
+#define PVR_POWER8	0x004B
 #define PVR_BE		0x0070
 #define PVR_PA6T	0x0090
 

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 557cff8..aef00c6 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h

@@ -353,8 +353,13 @@
 		return 1;
 	return 0;
 }
+
+/* Not the best place to put pSeries_coalesce_init, will be fixed when we
+ * move some of the rtas suspend-me stuff to pseries */
+extern void pSeries_coalesce_init(void);
 #else
 static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;}
+static inline void pSeries_coalesce_init(void) { }
 #endif
 
 extern int call_rtas(const char *, int, int, unsigned long *, ...);

diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
new file mode 100644
index 0000000..d3ca855
--- /dev/null
+++ b/arch/powerpc/include/asm/setup.h

@@ -0,0 +1,29 @@
+#ifndef _ASM_POWERPC_SETUP_H
+#define _ASM_POWERPC_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+#ifndef __ASSEMBLY__
+extern void ppc_printk_progress(char *s, unsigned short hex);
+
+extern unsigned int rtas_data;
+extern int mem_init_done;	/* set on boot once kmalloc can be called */
+extern int init_bootmem_done;	/* set once bootmem is available */
+extern unsigned long long memory_limit;
+extern unsigned long klimit;
+extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
+
+struct device_node;
+extern void note_scsi_host(struct device_node *, void *);
+
+/* Used in very early kernel initialization. */
+extern unsigned long reloc_offset(void);
+extern unsigned long add_reloc_offset(unsigned long);
+extern void reloc_got2(unsigned long);
+
+#define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif	/* _ASM_POWERPC_SETUP_H */
+

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 8408387..97909d3b 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h

@@ -164,7 +164,7 @@
 SYSCALL_SPU(sched_yield)
 COMPAT_SYS_SPU(sched_get_priority_max)
 COMPAT_SYS_SPU(sched_get_priority_min)
-COMPAT_SYS_SPU(sched_rr_get_interval)
+SYSX_SPU(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval_wrapper,sys_sched_rr_get_interval)
 COMPAT_SYS_SPU(nanosleep)
 SYSCALL_SPU(mremap)
 SYSCALL_SPU(setresuid)
@@ -356,3 +356,4 @@
 SYSCALL_SPU(setns)
 COMPAT_SYS(process_vm_readv)
 COMPAT_SYS(process_vm_writev)
+SYSCALL(finit_module)

diff --git a/arch/powerpc/include/asm/ucc.h b/arch/powerpc/include/asm/ucc.h
index 46b09ba..6927ac2 100644
--- a/arch/powerpc/include/asm/ucc.h
+++ b/arch/powerpc/include/asm/ucc.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/include/asm/ucc_fast.h b/arch/powerpc/include/asm/ucc_fast.h
index 4644c84..72ea9ba 100644
--- a/arch/powerpc/include/asm/ucc_fast.h
+++ b/arch/powerpc/include/asm/ucc_fast.h

@@ -1,7 +1,7 @@
 /*
  * Internal header file for UCC FAST unit routines.
  *
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
index cf131ff..c44131e 100644
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ b/arch/powerpc/include/asm/ucc_slow.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index b303881..5a7510e 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h

@@ -21,7 +21,6 @@
 
 extern void udbg_puts(const char *s);
 extern int udbg_write(const char *s, int n);
-extern int udbg_read(char *buf, int buflen);
 
 extern void register_early_udbg_console(void);
 extern void udbg_printf(const char *fmt, ...)

diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 76fe846..1d4864a 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h

@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define __NR_syscalls		353
+#define __NR_syscalls		354
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
@@ -54,8 +54,8 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/powerpc/include/uapi/asm/setup.h b/arch/powerpc/include/uapi/asm/setup.h
index 8b9a306..552df83 100644
--- a/arch/powerpc/include/uapi/asm/setup.h
+++ b/arch/powerpc/include/uapi/asm/setup.h

@@ -1,32 +1 @@
-#ifndef _ASM_POWERPC_SETUP_H
-#define _ASM_POWERPC_SETUP_H
-
 #include <asm-generic/setup.h>
-
-#ifndef __ASSEMBLY__
-extern void ppc_printk_progress(char *s, unsigned short hex);
-
-extern unsigned int rtas_data;
-extern int mem_init_done;	/* set on boot once kmalloc can be called */
-extern int init_bootmem_done;	/* set once bootmem is available */
-extern unsigned long long memory_limit;
-extern unsigned long klimit;
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
-
-extern void via_cuda_init(void);
-extern void read_rtc_time(void);
-extern void pmac_find_display(void);
-
-struct device_node;
-extern void note_scsi_host(struct device_node *, void *);
-
-/* Used in very early kernel initialization. */
-extern unsigned long reloc_offset(void);
-extern unsigned long add_reloc_offset(unsigned long);
-extern void reloc_got2(unsigned long);
-
-#define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
-
-#endif /* !__ASSEMBLY__ */
-
-#endif	/* _ASM_POWERPC_SETUP_H */

diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h
index 48fa8d3..e079fb3 100644
--- a/arch/powerpc/include/uapi/asm/signal.h
+++ b/arch/powerpc/include/uapi/asm/signal.h

@@ -85,12 +85,6 @@
 
 #define SA_RESTORER	0x04000000U
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 380b5d3..8c478c6 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h

@@ -375,6 +375,7 @@
 #define __NR_setns		350
 #define __NR_process_vm_readv	351
 #define __NR_process_vm_writev	352
+#define __NR_finit_module	353
 
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cde12f8..8f61934 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile

@@ -38,7 +38,7 @@
 				   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power7.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o

diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power.S
similarity index 80%
rename from arch/powerpc/kernel/cpu_setup_power7.S
rename to arch/powerpc/kernel/cpu_setup_power.S
index 76797c5..57cf140 100644
--- a/arch/powerpc/kernel/cpu_setup_power7.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S

@@ -27,6 +27,7 @@
 	beqlr
 	li	r0,0
 	mtspr	SPRN_LPID,r0
+	mfspr	r3,SPRN_LPCR
 	bl	__init_LPCR
 	bl	__init_TLB
 	mtlr	r11
@@ -39,6 +40,35 @@
 	beqlr
 	li	r0,0
 	mtspr	SPRN_LPID,r0
+	mfspr	r3,SPRN_LPCR
+	bl	__init_LPCR
+	bl	__init_TLB
+	mtlr	r11
+	blr
+
+_GLOBAL(__setup_cpu_power8)
+	mflr	r11
+	bl	__init_hvmode_206
+	mtlr	r11
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	mfspr	r3,SPRN_LPCR
+	oris	r3, r3, LPCR_AIL_3@h
+	bl	__init_LPCR
+	bl	__init_TLB
+	mtlr	r11
+	blr
+
+_GLOBAL(__restore_cpu_power8)
+	mflr	r11
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	mfspr   r3,SPRN_LPCR
+	oris	r3, r3, LPCR_AIL_3@h
 	bl	__init_LPCR
 	bl	__init_TLB
 	mtlr	r11
@@ -57,6 +87,7 @@
 
 __init_LPCR:
 	/* Setup a sane LPCR:
+	 *   Called with initial LPCR in R3
 	 *
 	 *   LPES = 0b01 (HSRR0/1 used for 0x500)
 	 *   PECE = 0b111
@@ -67,7 +98,6 @@
 	 *
 	 * Other bits untouched for now
 	 */
-	mfspr	r3,SPRN_LPCR
 	li	r5,1
 	rldimi	r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
 	ori	r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 0514c21..75a3d71 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c

@@ -68,6 +68,8 @@
 extern void __restore_cpu_ppc970(void);
 extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power7(void);
+extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power8(void);
 extern void __restore_cpu_a2(void);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
@@ -94,6 +96,10 @@
 				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER8	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
 #define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -429,6 +435,21 @@
 		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7",
 	},
+	{	/* 2.07-compliant processor, i.e. Power8 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000004,
+		.cpu_name		= "POWER8 (architected)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.platform		= "power8",
+	},
 	{	/* Power7 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x003f0000,
@@ -463,6 +484,23 @@
 		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7+",
 	},
+	{	/* Power8 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004b0000,
+		.cpu_name		= "POWER8 (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.oprofile_cpu_type	= "ppc64/power8",
+		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.platform		= "power8",
+	},
 	{	/* Cell Broadband Engine */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x00700000,

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e9a906c..b310a05 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S

@@ -373,6 +373,8 @@
 _GLOBAL(ret_from_kernel_thread)
 	bl	.schedule_tail
 	REST_NVGPRS(r1)
+	li	r3,0
+	std	r3,0(r1)
 	ld	r14, 0(r14)
 	mtlr	r14
 	mr	r3,r15

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 10b658a..4665e82 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S

@@ -19,12 +19,76 @@
 /*
  * We layout physical memory as follows:
  * 0x0000 - 0x00ff : Secondary processor spin code
- * 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x5fff : interrupt support common interrupt prologs
- * 0x6000 - 0x6fff : Initial (CPU0) segment table
+ * 0x0100 - 0x17ff : pSeries Interrupt prologs
+ * 0x1800 - 0x4000 : interrupt support common interrupt prologs
+ * 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1
+ * 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1
  * 0x7000 - 0x7fff : FWNMI data area
- * 0x8000 -        : Early init and support code
+ * 0x8000 - 0x8fff : Initial (CPU0) segment table
+ * 0x9000 -        : Early init and support code
  */
+	/* Syscall routine is used twice, in reloc-off and reloc-on paths */
+#define SYSCALL_PSERIES_1 					\
+BEGIN_FTR_SECTION						\
+	cmpdi	r0,0x1ebe ; 					\
+	beq-	1f ;						\
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\
+	mr	r9,r13 ;					\
+	GET_PACA(r13) ;						\
+	mfspr	r11,SPRN_SRR0 ;					\
+0:
+
+#define SYSCALL_PSERIES_2_RFID 					\
+	mfspr	r12,SPRN_SRR1 ;					\
+	ld	r10,PACAKBASE(r13) ; 				\
+	LOAD_HANDLER(r10, system_call_entry) ; 			\
+	mtspr	SPRN_SRR0,r10 ; 				\
+	ld	r10,PACAKMSR(r13) ;				\
+	mtspr	SPRN_SRR1,r10 ; 				\
+	rfid ; 							\
+	b	. ;	/* prevent speculative execution */
+
+#define SYSCALL_PSERIES_3					\
+	/* Fast LE/BE switch system call */			\
+1:	mfspr	r12,SPRN_SRR1 ;					\
+	xori	r12,r12,MSR_LE ;				\
+	mtspr	SPRN_SRR1,r12 ;					\
+	rfid ;		/* return to userspace */		\
+	b	. ;						\
+2:	mfspr	r12,SPRN_SRR1 ;					\
+	andi.	r12,r12,MSR_PR ;				\
+	bne	0b ;						\
+	mtspr	SPRN_SRR0,r3 ;					\
+	mtspr	SPRN_SRR1,r4 ;					\
+	mtspr	SPRN_SDR1,r5 ;					\
+	rfid ;							\
+	b	. ;	/* prevent speculative execution */
+
+#if defined(CONFIG_RELOCATABLE)
+	/*
+	 * We can't branch directly; in the direct case we use LR
+	 * and system_call_entry restores LR.  (We thus need to move
+	 * LR to r10 in the RFID case too.)
+	 */
+#define SYSCALL_PSERIES_2_DIRECT				\
+	mflr	r10 ;						\
+	ld	r12,PACAKBASE(r13) ; 				\
+	LOAD_HANDLER(r12, system_call_entry_direct) ;		\
+	mtlr	r12 ;						\
+	mfspr	r12,SPRN_SRR1 ;					\
+	/* Re-use of r13... No spare regs to do this */	\
+	li	r13,MSR_RI ;					\
+	mtmsrd 	r13,1 ;						\
+	GET_PACA(r13) ;	/* get r13 back */			\
+	blr ;
+#else
+	/* We can branch directly */
+#define SYSCALL_PSERIES_2_DIRECT				\
+	mfspr	r12,SPRN_SRR1 ;					\
+	li	r10,MSR_RI ;					\
+	mtmsrd 	r10,1 ;			/* Set RI (EE=0) */	\
+	b	system_call_entry_direct ;
+#endif
 
 /*
  * This is the start of the interrupt handlers for pSeries
@@ -207,31 +271,11 @@
 	KVMTEST(0xc00)
 	GET_SCRATCH0(r13)
 #endif
-BEGIN_FTR_SECTION
-	cmpdi	r0,0x1ebe
-	beq-	1f
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
-	mr	r9,r13
-	GET_PACA(r13)
-	mfspr	r11,SPRN_SRR0
-	mfspr	r12,SPRN_SRR1
-	ld	r10,PACAKBASE(r13)
-	LOAD_HANDLER(r10, system_call_entry)
-	mtspr	SPRN_SRR0,r10
-	ld	r10,PACAKMSR(r13)
-	mtspr	SPRN_SRR1,r10
-	rfid
-	b	.	/* prevent speculative execution */
-
+	SYSCALL_PSERIES_1
+	SYSCALL_PSERIES_2_RFID
+	SYSCALL_PSERIES_3
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
 
-/* Fast LE/BE switch system call */
-1:	mfspr	r12,SPRN_SRR1
-	xori	r12,r12,MSR_LE
-	mtspr	SPRN_SRR1,r12
-	rfid		/* return to userspace */
-	b	.
-
 	STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
 	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
 
@@ -276,7 +320,7 @@
 	KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
 
 	. = 0x1500
-	.global denorm_Hypervisor
+	.global denorm_exception_hv
 denorm_exception_hv:
 	HMT_MEDIUM
 	mtspr	SPRN_SPRG_HSCRATCH0,r13
@@ -311,12 +355,14 @@
 #ifdef CONFIG_CBE_RAS
 	STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
+#else
+	. = 0x1800
 #endif /* CONFIG_CBE_RAS */
 
-	. = 0x3000
 
 /*** Out of line interrupts support ***/
 
+	.align	7
 	/* moved from 0x200 */
 machine_check_pSeries:
 	.globl machine_check_fwnmi
@@ -575,16 +621,12 @@
 	b	.				/* prevent spec. execution */
 #endif /* __DISABLED__ */
 
-	.align	7
-	.globl	__end_interrupts
-__end_interrupts:
-
 /*
  * Code from here down to __end_handlers is invoked from the
  * exception prologs above.  Because the prologs assemble the
  * addresses of these handlers using the LOAD_HANDLER macro,
- * which uses an addi instruction, these handlers must be in
- * the first 32k of the kernel image.
+ * which uses an ori instruction, these handlers must be in
+ * the first 64k of the kernel image.
  */
 
 /*** Common interrupt handlers ***/
@@ -613,8 +655,8 @@
 	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
 	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
 	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
-        STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
-        STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
+	STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+	STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
 	STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
 	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
 	STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception)
@@ -629,7 +671,158 @@
 	STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
 #endif /* CONFIG_CBE_RAS */
 
+	/*
+	 * Relocation-on interrupts: A subset of the interrupts can be delivered
+	 * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
+	 * it.  Addresses are the same as the original interrupt addresses, but
+	 * offset by 0xc000000000004000.
+	 * It's impossible to receive interrupts below 0x300 via this mechanism.
+	 * KVM: None of these traps are from the guest ; anything that escalated
+	 * to HV=1 from HV=0 is delivered via real mode handlers.
+	 */
+
+	/*
+	 * This uses the standard macro, since the original 0x300 vector
+	 * only has extra guff for STAB-based processors -- which never
+	 * come here.
+	 */
+	STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access)
+	. = 0x4380
+	.globl data_access_slb_relon_pSeries
+data_access_slb_relon_pSeries:
+	HMT_MEDIUM
+	SET_SCRATCH0(r13)
+	EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	mfspr	r3,SPRN_DAR
+	mfspr	r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+	b	.slb_miss_realmode
+#else
+	/*
+	 * We can't just use a direct branch to .slb_miss_realmode
+	 * because the distance from here to there depends on where
+	 * the kernel ends up being put.
+	 */
+	mfctr	r11
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10, .slb_miss_realmode)
+	mtctr	r10
+	bctr
+#endif
+
+	STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access)
+	. = 0x4480
+	.globl instruction_access_slb_relon_pSeries
+instruction_access_slb_relon_pSeries:
+	HMT_MEDIUM
+	SET_SCRATCH0(r13)
+	EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
+	mfspr	r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+	b	.slb_miss_realmode
+#else
+	mfctr	r11
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10, .slb_miss_realmode)
+	mtctr	r10
+	bctr
+#endif
+
+	. = 0x4500
+	.globl hardware_interrupt_relon_pSeries;
+	.globl hardware_interrupt_relon_hv;
+hardware_interrupt_relon_pSeries:
+hardware_interrupt_relon_hv:
+	BEGIN_FTR_SECTION
+		_MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV)
+	FTR_SECTION_ELSE
+		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR)
+	ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_206)
+	STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment)
+	STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check)
+	STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable)
+	MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer)
+	STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer)
+	STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b)
+
+	. = 0x4c00
+	.globl system_call_relon_pSeries
+system_call_relon_pSeries:
+	HMT_MEDIUM
+	SYSCALL_PSERIES_1
+	SYSCALL_PSERIES_2_DIRECT
+	SYSCALL_PSERIES_3
+
+	STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
+
+	. = 0x4e00
+	b	h_data_storage_relon_hv
+
+	. = 0x4e20
+	b	h_instr_storage_relon_hv
+
+	. = 0x4e40
+	b	emulation_assist_relon_hv
+
+	. = 0x4e50
+	b	hmi_exception_relon_hv
+
+	. = 0x4e60
+	b	hmi_exception_relon_hv
+
+	/* For when we support the doorbell interrupt:
+	STD_RELON_EXCEPTION_HYPERVISOR(0x4e80, 0xe80, doorbell_hyper)
+	*/
+
+performance_monitor_relon_pSeries_1:
+	. = 0x4f00
+	b	performance_monitor_relon_pSeries
+
+altivec_unavailable_relon_pSeries_1:
+	. = 0x4f20
+	b	altivec_unavailable_relon_pSeries
+
+vsx_unavailable_relon_pSeries_1:
+	. = 0x4f40
+	b	vsx_unavailable_relon_pSeries
+
+#ifdef CONFIG_CBE_RAS
+	STD_RELON_EXCEPTION_HV(0x5200, 0x1202, cbe_system_error)
+#endif /* CONFIG_CBE_RAS */
+	STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
+#ifdef CONFIG_PPC_DENORMALISATION
+	. = 0x5500
+	b	denorm_exception_hv
+#endif
+#ifdef CONFIG_CBE_RAS
+	STD_RELON_EXCEPTION_HV(0x5600, 0x1602, cbe_maintenance)
+#else
+#ifdef CONFIG_HVC_SCOM
+	STD_RELON_EXCEPTION_HV(0x5600, 0x1600, maintence_interrupt)
+	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1600)
+#endif /* CONFIG_HVC_SCOM */
+#endif /* CONFIG_CBE_RAS */
+	STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
+#ifdef CONFIG_CBE_RAS
+	STD_RELON_EXCEPTION_HV(0x5800, 0x1802, cbe_thermal)
+#endif /* CONFIG_CBE_RAS */
+
+	/* Other future vectors */
 	.align	7
+	.globl	__end_interrupts
+__end_interrupts:
+
+	.align	7
+system_call_entry_direct:
+#if defined(CONFIG_RELOCATABLE)
+	/* The first level prologue may have used LR to get here, saving
+	 * orig in r10.  To save hacking/ifdeffing common code, restore here.
+	 */
+	mtlr	r10
+#endif
 system_call_entry:
 	b	system_call_common
 
@@ -714,21 +907,21 @@
 	ld	r3,PACA_EXGEN+EX_DAR(r13)
 	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
 	li	r5,0x300
-	b	.do_hash_page	 	/* Try to handle as hpte fault */
+	b	.do_hash_page		/* Try to handle as hpte fault */
 
 	.align  7
-        .globl  h_data_storage_common
+	.globl  h_data_storage_common
 h_data_storage_common:
-        mfspr   r10,SPRN_HDAR
-        std     r10,PACA_EXGEN+EX_DAR(r13)
-        mfspr   r10,SPRN_HDSISR
-        stw     r10,PACA_EXGEN+EX_DSISR(r13)
-        EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
-        bl      .save_nvgprs
+	mfspr   r10,SPRN_HDAR
+	std     r10,PACA_EXGEN+EX_DAR(r13)
+	mfspr   r10,SPRN_HDSISR
+	stw     r10,PACA_EXGEN+EX_DSISR(r13)
+	EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+	bl      .save_nvgprs
 	DISABLE_INTS
-        addi    r3,r1,STACK_FRAME_OVERHEAD
-        bl      .unknown_exception
-        b       .ret_from_except
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	bl      .unknown_exception
+	b       .ret_from_except
 
 	.align	7
 	.globl instruction_access_common
@@ -741,7 +934,7 @@
 	li	r5,0x400
 	b	.do_hash_page		/* Try to handle as hpte fault */
 
-        STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+	STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
 
 /*
  * Here is the common SLB miss user that is used when going to virtual
@@ -1152,6 +1345,21 @@
 	rfid
 	b	.	/* prevent speculative execution */
 
+
+	/* Equivalents to the above handlers for relocation-on interrupt vectors */
+	STD_RELON_EXCEPTION_HV(., 0xe00, h_data_storage)
+	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00)
+	STD_RELON_EXCEPTION_HV(., 0xe20, h_instr_storage)
+	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20)
+	STD_RELON_EXCEPTION_HV(., 0xe40, emulation_assist)
+	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40)
+	STD_RELON_EXCEPTION_HV(., 0xe60, hmi_exception)
+	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60)
+
+	STD_RELON_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
+	STD_RELON_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
+	STD_RELON_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
+
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
 /*
  * Data area reserved for FWNMI option.
@@ -1164,7 +1372,7 @@
 	/* pseries and powernv need to keep the whole page from
 	 * 0x7000 to 0x8000 free for use by the firmware
 	 */
-        . = 0x8000
+	. = 0x8000
 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
 
 /* Space for CPU0's segment table */

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 58bddee..116f086 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S

@@ -422,7 +422,7 @@
 	tovirt(r6,r6)			/* on booke, we already run at PAGE_OFFSET */
 #endif
 
-#ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_RELOCATABLE
 /*
  * Check if the kernel has to be running as relocatable kernel based on the
  * variable __run_at_load, if it is set the kernel is treated as relocatable
@@ -432,7 +432,8 @@
 	cmplwi	cr0,r7,1
 	bne	3f
 
-	li	r5,__end_interrupts - _stext	/* just copy interrupts */
+	/* just copy interrupts */
+	LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext)
 	b	5f
 3:
 #endif
@@ -703,6 +704,7 @@
 
 #ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
 	/* Setup OPAL entry */
+	LOAD_REG_ADDR(r11, opal)
 	std	r28,0(r11);
 	std	r29,8(r11);
 #endif

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 2099d9a..ea78761 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c

@@ -55,9 +55,6 @@
  */
 void cpu_idle(void)
 {
-	if (ppc_md.idle_loop)
-		ppc_md.idle_loop();	/* doesn't return */
-
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
 		tick_nohz_idle_enter();

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 8226c6c..c862fd7 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c

@@ -656,7 +656,7 @@
 	struct iommu_pool *p;
 
 	/* number of bytes needed for the bitmap */
-	sz = (tbl->it_size + 7) >> 3;
+	sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
 
 	page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
 	if (!page)
@@ -708,7 +708,7 @@
 
 void iommu_free_table(struct iommu_table *tbl, const char *node_name)
 {
-	unsigned long bitmap_sz, i;
+	unsigned long bitmap_sz;
 	unsigned int order;
 
 	if (!tbl || !tbl->it_map) {
@@ -718,17 +718,11 @@
 	}
 
 	/* verify that table contains no entries */
-	/* it_size is in entries, and we're examining 64 at a time */
-	for (i = 0; i < (tbl->it_size/64); i++) {
-		if (tbl->it_map[i] != 0) {
-			printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
-				__func__, node_name);
-			break;
-		}
-	}
+	if (!bitmap_empty(tbl->it_map, tbl->it_size))
+		pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
 
 	/* calculate bitmap size in bytes */
-	bitmap_sz = (tbl->it_size + 7) / 8;
+	bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
 
 	/* free bitmap */
 	order = get_order(bitmap_sz);

diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index fa9f6c7..e1ec57e 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c

@@ -218,23 +218,23 @@
 	 * be sure what's in them, so remove them. */
 	prop = of_find_property(node, "linux,crashkernel-base", NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	prop = of_find_property(node, "linux,crashkernel-size", NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	if (crashk_res.start != 0) {
-		prom_add_property(node, &crashk_base_prop);
+		of_add_property(node, &crashk_base_prop);
 		crashk_size = resource_size(&crashk_res);
-		prom_add_property(node, &crashk_size_prop);
+		of_add_property(node, &crashk_size_prop);
 	}
 
 	/*
 	 * memory_limit is required by the kexec-tools to limit the
 	 * crash regions to the actual memory used.
 	 */
-	prom_update_property(node, &memory_limit_prop);
+	of_update_property(node, &memory_limit_prop);
 }
 
 static int __init kexec_setup(void)
@@ -249,11 +249,11 @@
 	/* remove any stale properties so ours can be found */
 	prop = of_find_property(node, kernel_end_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	/* information needed by userspace when using default_machine_kexec */
 	kernel_end = __pa(_end);
-	prom_add_property(node, &kernel_end_prop);
+	of_add_property(node, &kernel_end_prop);
 
 	export_crashk_values(node);
 

diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index d7f6090..7206701 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c

@@ -389,14 +389,14 @@
 	/* remove any stale propertys so ours can be found */
 	prop = of_find_property(node, htab_base_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 	prop = of_find_property(node, htab_size_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	htab_base = __pa(htab_address);
-	prom_add_property(node, &htab_base_prop);
-	prom_add_property(node, &htab_size_prop);
+	of_add_property(node, &htab_base_prop);
+	of_add_property(node, &htab_size_prop);
 
 	of_node_put(node);
 	return 0;

diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 4b06ec5..64f526a 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c

@@ -208,7 +208,7 @@
 		of_prop->name = "pci-OF-bus-map";
 		of_prop->length = 256;
 		of_prop->value = &of_prop[1];
-		prom_add_property(dn, of_prop);
+		of_add_property(dn, of_prop);
 		of_node_put(dn);
 	}
 }

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 37725e8..8b6f7a9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c

@@ -32,6 +32,7 @@
 #include <linux/debugfs.h>
 #include <linux/irq.h>
 #include <linux/memblock.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -49,11 +50,11 @@
 #include <asm/btext.h>
 #include <asm/sections.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/pci-bridge.h>
 #include <asm/kexec.h>
 #include <asm/opal.h>
 #include <asm/fadump.h>
+#include <asm/debug.h>
 
 #include <mm/mmu_decl.h>
 
@@ -802,7 +803,7 @@
 	int err;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = of_finish_dynamic_node(node);
 		if (err < 0)
 			printk(KERN_ERR "finish_node returned %d\n", err);
@@ -821,7 +822,7 @@
 
 static int __init prom_reconfig_setup(void)
 {
-	return pSeries_reconfig_notifier_register(&prom_reconfig_nb);
+	return of_reconfig_notifier_register(&prom_reconfig_nb);
 }
 __initcall(prom_reconfig_setup);
 #endif

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index cb6c123..779f340 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c

@@ -671,6 +671,7 @@
 #define OV1_PPC_2_04		0x08	/* set if we support PowerPC 2.04 */
 #define OV1_PPC_2_05		0x04	/* set if we support PowerPC 2.05 */
 #define OV1_PPC_2_06		0x02	/* set if we support PowerPC 2.06 */
+#define OV1_PPC_2_07		0x01	/* set if we support PowerPC 2.07 */
 
 /* Option vector 2: Open Firmware options supported */
 #define OV2_REAL_MODE		0x20	/* set if we want OF in real mode */
@@ -707,6 +708,7 @@
 #define OV5_PFO_HW_RNG		0x80	/* PFO Random Number Generator */
 #define OV5_PFO_HW_842		0x40	/* PFO Compression Accelerator */
 #define OV5_PFO_HW_ENCR		0x20	/* PFO Encryption Accelerator */
+#define OV5_SUB_PROCESSORS	0x01    /* 1,2,or 4 Sub-Processors supported */
 
 /* Option Vector 6: IBM PAPR hints */
 #define OV6_LINUX		0x02	/* Linux is our OS */
@@ -719,6 +721,8 @@
 	W(0xfffe0000), W(0x003a0000),	/* POWER5/POWER5+ */
 	W(0xffff0000), W(0x003e0000),	/* POWER6 */
 	W(0xffff0000), W(0x003f0000),	/* POWER7 */
+	W(0xffff0000), W(0x004b0000),	/* POWER8 */
+	W(0xffffffff), W(0x0f000004),	/* all 2.07-compliant */
 	W(0xffffffff), W(0x0f000003),	/* all 2.06-compliant */
 	W(0xffffffff), W(0x0f000002),	/* all 2.05-compliant */
 	W(0xfffffffe), W(0x0f000001),	/* all 2.04-compliant and earlier */
@@ -728,7 +732,7 @@
 	3 - 2,				/* length */
 	0,				/* don't ignore, don't halt */
 	OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
-	OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06,
+	OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
 
 	/* option vector 2: Open Firmware options supported */
 	34 - 2,				/* length */
@@ -755,7 +759,7 @@
 	OV4_MIN_ENT_CAP,		/* minimum VP entitled capacity */
 
 	/* option vector 5: PAPR/OF options */
-	18 - 2,				/* length */
+	19 - 2,				/* length */
 	0,				/* don't ignore, don't halt */
 	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
 	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
@@ -769,13 +773,14 @@
 	 * must match by the macro below. Update the definition if
 	 * the structure layout changes.
 	 */
-#define IBM_ARCH_VEC_NRCORES_OFFSET	101
+#define IBM_ARCH_VEC_NRCORES_OFFSET	117
 	W(NR_CPUS),			/* number of cores supported */
 	0,
 	0,
 	0,
 	0,
 	OV5_PFO_HW_RNG | OV5_PFO_HW_ENCR | OV5_PFO_HW_842,
+	OV5_SUB_PROCESSORS,
 	/* option vector 6: IBM PAPR hints */
 	4 - 2,				/* length */
 	0,

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 79d8e56..c497000 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c

@@ -952,6 +952,10 @@
 		arch_bp_generic_fields(data &
 					(DABR_DATA_WRITE | DABR_DATA_READ),
 							&attr.bp_type);
+
+		/* Enable breakpoint */
+		attr.disabled = false;
+
 		ret =  modify_user_hw_breakpoint(bp, &attr);
 		if (ret) {
 			ptrace_put_breakpoints(task);
@@ -1037,7 +1041,7 @@
 }
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static long set_intruction_bp(struct task_struct *child,
+static long set_instruction_bp(struct task_struct *child,
 			      struct ppc_hw_breakpoint *bp_info)
 {
 	int slot;
@@ -1338,6 +1342,12 @@
 static long ppc_set_hwdebug(struct task_struct *child,
 		     struct ppc_hw_breakpoint *bp_info)
 {
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int len = 0;
+	struct thread_struct *thread = &(child->thread);
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #ifndef CONFIG_PPC_ADV_DEBUG_REGS
 	unsigned long dabr;
 #endif
@@ -1365,7 +1375,7 @@
 		if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) ||
 		    (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
 			return -EINVAL;
-		return set_intruction_bp(child, bp_info);
+		return set_instruction_bp(child, bp_info);
 	}
 	if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
 		return set_dac(child, bp_info);
@@ -1381,13 +1391,9 @@
 	 */
 	if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
 	    (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
-	    bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT ||
 	    bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
 		return -EINVAL;
 
-	if (child->thread.dabr)
-		return -ENOSPC;
-
 	if ((unsigned long)bp_info->addr >= TASK_SIZE)
 		return -EIO;
 
@@ -1397,6 +1403,50 @@
 		dabr |= DABR_DATA_READ;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 		dabr |= DABR_DATA_WRITE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	if (ptrace_get_breakpoints(child) < 0)
+		return -ESRCH;
+
+	/*
+	 * Check if the request is for 'range' breakpoints. We can
+	 * support it if range < 8 bytes.
+	 */
+	if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) {
+		len = bp_info->addr2 - bp_info->addr;
+	} else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+		ptrace_put_breakpoints(child);
+		return -EINVAL;
+	}
+	bp = thread->ptrace_bps[0];
+	if (bp) {
+		ptrace_put_breakpoints(child);
+		return -ENOSPC;
+	}
+
+	/* Create a new breakpoint request if one doesn't exist already */
+	hw_breakpoint_init(&attr);
+	attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+	attr.bp_len = len;
+	arch_bp_generic_fields(dabr & (DABR_DATA_WRITE | DABR_DATA_READ),
+								&attr.bp_type);
+
+	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+					       ptrace_triggered, NULL, child);
+	if (IS_ERR(bp)) {
+		thread->ptrace_bps[0] = NULL;
+		ptrace_put_breakpoints(child);
+		return PTR_ERR(bp);
+	}
+
+	ptrace_put_breakpoints(child);
+	return 1;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+	if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
+		return -EINVAL;
+
+	if (child->thread.dabr)
+		return -ENOSPC;
 
 	child->thread.dabr = dabr;
 	child->thread.dabrx = DABRX_ALL;
@@ -1405,8 +1455,13 @@
 #endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
 }
 
-static long ppc_del_hwdebug(struct task_struct *child, long addr, long data)
+static long ppc_del_hwdebug(struct task_struct *child, long data)
 {
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int ret = 0;
+	struct thread_struct *thread = &(child->thread);
+	struct perf_event *bp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	int rc;
 
@@ -1426,10 +1481,25 @@
 #else
 	if (data != 1)
 		return -EINVAL;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	if (ptrace_get_breakpoints(child) < 0)
+		return -ESRCH;
+
+	bp = thread->ptrace_bps[0];
+	if (bp) {
+		unregister_hw_breakpoint(bp);
+		thread->ptrace_bps[0] = NULL;
+	} else
+		ret = -ENOENT;
+	ptrace_put_breakpoints(child);
+	return ret;
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
 	if (child->thread.dabr == 0)
 		return -ENOENT;
 
 	child->thread.dabr = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 	return 0;
 #endif
@@ -1536,7 +1606,11 @@
 		dbginfo.data_bp_alignment = 4;
 #endif
 		dbginfo.sizeof_condition = 0;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
+#else
 		dbginfo.features = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
 
 		if (!access_ok(VERIFY_WRITE, datavp,
@@ -1563,7 +1637,7 @@
 	}
 
 	case PPC_PTRACE_DELHWDEBUG: {
-		ret = ppc_del_hwdebug(child, addr, data);
+		ret = ppc_del_hwdebug(child, data);
 		break;
 	}
 

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index fcec382..1fd6e7b 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c

@@ -42,7 +42,6 @@
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
-#include <asm/pSeries_reconfig.h>
 
 struct rtas_t rtas = {
 	.lock = __ARCH_SPIN_LOCK_UNLOCKED

diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 20b0120..8329190 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c

@@ -650,10 +650,8 @@
 	int token;
 
 	dp->data = kzalloc(buf_size, GFP_KERNEL);
-	if (dp->data == NULL) {
-		remove_flash_pde(dp);
+	if (dp->data == NULL)
 		return -ENOMEM;
-	}
 
 	/*
 	 * This code assumes that the status int is the first member of the

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index efb6a41..6da881b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c

@@ -601,6 +601,11 @@
 
 	kvm_linear_init();
 
+	/* Interrupt code needs to be 64K-aligned */
+	if ((unsigned long)_stext & 0xffff)
+		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
+		      (unsigned long)_stext);
+
 	ppc64_boot_msg(0x15, "Setup Done");
 }
 

diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 9c2ed90..8a93778 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c

@@ -175,19 +175,10 @@
  * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
  * and the register representation of a signed int (msr in 64-bit mode) is performed.
  */
-asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval)
+asmlinkage long compat_sys_sched_rr_get_interval_wrapper(u32 pid,
+							 struct compat_timespec __user *interval)
 {
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs ();
-
-	/* The __user pointer cast is valid because of the set_fs() */
-	set_fs (KERNEL_DS);
-	ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t);
-	set_fs (old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
+	return compat_sys_sched_rr_get_interval((int)pid, interval);
 }
 
 /* Note: it is necessary to treat mode as an unsigned int,

diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index c39c1ca..f974849 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c

@@ -122,29 +122,6 @@
 	return n - remain;
 }
 
-int udbg_read(char *buf, int buflen)
-{
-	char *p = buf;
-	int i, c;
-
-	if (!udbg_getc)
-		return 0;
-
-	for (i = 0; i < buflen; ++i) {
-		do {
-			c = udbg_getc();
-			if (c == -1 && i == 0)
-				return -1;
-
-		} while (c == 0x11 || c == 0x13);
-		if (c == 0 || c == -1)
-			break;
-		*p++ = c;
-	}
-
-	return i;
-}
-
 #define UDBG_BUFSIZE 256
 void udbg_printf(const char *fmt, ...)
 {

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 59213cf..bba87ca 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c

@@ -399,18 +399,6 @@
 	return result;
 }
 
-struct of_drconf_cell {
-	u64	base_addr;
-	u32	drc_index;
-	u32	reserved;
-	u32	aa_index;
-	u32	flags;
-};
-
-#define DRCONF_MEM_ASSIGNED	0x00000008
-#define DRCONF_MEM_AI_INVALID	0x00000040
-#define DRCONF_MEM_RESERVED	0x00000080
-
 /*
  * Read the next memblock list entry from the ibm,dynamic-memory property
  * and return the information in the provided of_drconf_cell structure.

diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index fab919f..626ad08 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S

@@ -191,12 +191,6 @@
 #ifdef CONFIG_PPC_47x
 
 /*
- * 47x variant of icbt
- */
-# define ICBT(CT,RA,RB)	\
-	.long	0x7c00002c | ((CT) << 21) | ((RA) << 16) | ((RB) << 11)
-
-/*
  * _tlbivax_bcast is only on 47x. We don't bother doing a runtime
  * check though, it will blow up soon enough if we mistakenly try
  * to use it on a 440.
@@ -208,8 +202,7 @@
 	wrteei	0
 	mtspr	SPRN_MMUCR,r5
 	isync
-/*	tlbivax	0,r3 - use .long to avoid binutils deps */
-	.long 0x7c000624 | (r3 << 11)
+	PPC_TLBIVAX(0, R3)
 	isync
 	eieio
 	tlbsync
@@ -227,11 +220,11 @@
 	bl	2f
 2:	mflr	r6
 	li	r7,32
-	ICBT(0,r6,r7)		/* touch next cache line */
+	PPC_ICBT(0,R6,R7)		/* touch next cache line */
 	add	r6,r6,r7
-	ICBT(0,r6,r7)		/* touch next cache line */
+	PPC_ICBT(0,R6,R7)		/* touch next cache line */
 	add	r6,r6,r7
-	ICBT(0,r6,r7)		/* touch next cache line */
+	PPC_ICBT(0,R6,R7)		/* touch next cache line */
 	sync
 	nop
 	nop

diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 441af08..2ee01e3 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c

@@ -54,8 +54,10 @@
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
- *                                                 [  ><><><><><><>
- *                                                  NC P6P5P4P3P2P1
+ *                                              < ><  ><><><><><><>
+ *                                              L2  NC P6P5P4P3P2P1
+ *
+ * L2 - 16-18 - Required L2SEL value (select field)
  *
  * NC - number of counters
  *     15: NC error 0x8000
@@ -72,7 +74,7 @@
 static int power7_get_constraint(u64 event, unsigned long *maskp,
 				 unsigned long *valp)
 {
-	int pmc, sh;
+	int pmc, sh, unit;
 	unsigned long mask = 0, value = 0;
 
 	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
@@ -90,6 +92,15 @@
 		mask  |= 0x8000;
 		value |= 0x1000;
 	}
+
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == 6) {
+		/* L2SEL must be identical across events */
+		int l2sel = (event >> PM_L2SEL_SH) & PM_L2SEL_MSK;
+		mask  |= 0x7 << 16;
+		value |= l2sel << 16;
+	}
+
 	*maskp = mask;
 	*valp = value;
 	return 0;

diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
index b62508b..c169998 100644
--- a/arch/powerpc/platforms/512x/Kconfig
+++ b/arch/powerpc/platforms/512x/Kconfig

@@ -2,7 +2,6 @@
 	bool "512x-based boards"
 	depends on 6xx
 	select FSL_SOC
-	select FB_FSL_DIU
 	select IPIC
 	select PPC_CLOCK
 	select PPC_PCI_CHOICE

diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index dcef6ad..0a134e0 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c

@@ -42,7 +42,10 @@
 	for_each_compatible_node(np, "pci", "fsl,mpc5121-pci")
 		mpc83xx_add_bridge(np);
 #endif
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 	mpc512x_setup_diu();
+#endif
 }
 
 static void __init mpc5121_ads_init_IRQ(void)

diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 1ab6d11..c32b399 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h

@@ -16,6 +16,13 @@
 extern int __init mpc5121_clk_init(void);
 void __init mpc512x_declare_of_platform_devices(void);
 extern void mpc512x_restart(char *cmd);
-extern void mpc512x_init_diu(void);
-extern void mpc512x_setup_diu(void);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+void mpc512x_init_diu(void);
+void mpc512x_setup_diu(void);
+#else
+#define mpc512x_init_diu NULL
+#define mpc512x_setup_diu NULL
+#endif
+
 #endif				/* __MPC512X_H__ */

diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 1650e09..35f14fd 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c

@@ -58,6 +58,8 @@
 		;
 }
 
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
 struct fsl_diu_shared_fb {
 	u8		gamma[0x300];	/* 32-bit aligned! */
 	struct diu_ad	ad0;		/* 32-bit aligned! */
@@ -66,25 +68,6 @@
 	bool		in_use;
 };
 
-u32 mpc512x_get_pixel_format(enum fsl_diu_monitor_port port,
-			     unsigned int bits_per_pixel)
-{
-	switch (bits_per_pixel) {
-	case 32:
-		return 0x88883316;
-	case 24:
-		return 0x88082219;
-	case 16:
-		return 0x65053118;
-	}
-	return 0x00000400;
-}
-
-void mpc512x_set_gamma_table(enum fsl_diu_monitor_port port,
-			     char *gamma_table_base)
-{
-}
-
 void mpc512x_set_monitor_port(enum fsl_diu_monitor_port port)
 {
 }
@@ -320,14 +303,14 @@
 		}
 	}
 
-	diu_ops.get_pixel_format	= mpc512x_get_pixel_format;
-	diu_ops.set_gamma_table		= mpc512x_set_gamma_table;
 	diu_ops.set_monitor_port	= mpc512x_set_monitor_port;
 	diu_ops.set_pixel_clock		= mpc512x_set_pixel_clock;
 	diu_ops.valid_monitor_port	= mpc512x_valid_monitor_port;
 	diu_ops.release_bootmem		= mpc512x_release_bootmem;
 }
 
+#endif
+
 void __init mpc512x_init_IRQ(void)
 {
 	struct device_node *np;

diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
index 448d862..1843bc9 100644
--- a/arch/powerpc/platforms/52xx/lite5200.c
+++ b/arch/powerpc/platforms/52xx/lite5200.c

@@ -4,7 +4,7 @@
  * Written by: Grant Likely <grant.likely@secretlab.ca>
  *
  * Copyright (C) Secret Lab Technologies Ltd. 2006. All rights reserved.
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Description:
  * This program is free software; you can redistribute  it and/or modify it

diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c
index 9cf3602..792a301 100644
--- a/arch/powerpc/platforms/52xx/mpc5200_simple.c
+++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c

@@ -50,6 +50,7 @@
 
 /* list of the supported boards */
 static const char *board[] __initdata = {
+	"anonymous,a3m071",
 	"anonymous,a4m072",
 	"anon,charon",
 	"ifm,o2d",

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 2351f9e..16150fa 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c

@@ -578,18 +578,4 @@
 	.probe = mpc52xx_lpbfifo_probe,
 	.remove = __devexit_p(mpc52xx_lpbfifo_remove),
 };
-
-/***********************************************************************
- * Module init/exit
- */
-static int __init mpc52xx_lpbfifo_init(void)
-{
-	return platform_driver_register(&mpc52xx_lpbfifo_driver);
-}
-module_init(mpc52xx_lpbfifo_init);
-
-static void __exit mpc52xx_lpbfifo_exit(void)
-{
-	platform_driver_unregister(&mpc52xx_lpbfifo_driver);
-}
-module_exit(mpc52xx_lpbfifo_exit);
+module_platform_driver(mpc52xx_lpbfifo_driver);

diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 328d221..74861a7 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c

@@ -16,7 +16,6 @@
 #include <linux/spinlock.h>
 #include <linux/irq.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
 #include <linux/slab.h>
 
 #include <asm/io.h>
@@ -149,7 +148,7 @@
 	priv->regs = of_iomap(np, 0);
 	if (!priv->regs) {
 		printk(KERN_ERR "Cannot map PCI PIC registers.\n");
-		goto out_free_bootmem;
+		goto out_free_kmalloc;
 	}
 
 	/* mask all PCI interrupts */
@@ -171,9 +170,8 @@
 
 out_unmap_regs:
 	iounmap(priv->regs);
-out_free_bootmem:
-	free_bootmem((unsigned long)priv,
-	             sizeof(struct pq2ads_pci_pic));
+out_free_kmalloc:
+	kfree(priv);
 	of_node_put(np);
 out_unmap_irq:
 	irq_dispose_mapping(irq);

diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index d440435..8d76220 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Description:
  * MPC832xE MDS board specific routines.

diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 1b1f6c8..1a26d2f 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Li Yang <LeoLi@freescale.com>
  *	   Yin Olivia <Hong-hua.Yin@freescale.com>

diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index f8769d7..b63b42d 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c

@@ -1,7 +1,7 @@
 /*
  * MPC8360E-RDK board file.
  *
- * Copyright (c) 2006  Freescale Semicondutor, Inc.
+ * Copyright (c) 2006  Freescale Semiconductor, Inc.
  * Copyright (c) 2007-2008  MontaVista Software, Inc.
  *
  * Author: Anton Vorontsov <avorontsov@ru.mvista.com>

diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index eca1f09..9813c81 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c

@@ -1,7 +1,7 @@
 /*
  * arch/powerpc/platforms/83xx/mpc837x_rdb.c
  *
- * Copyright (C) 2007 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
  *
  * MPC837x RDB board specific routines
  *

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 8498f73..bd12588 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2010, 2012 Freescale Semicondutor, Inc.
+ * Copyright (C) 2006-2010, 2012 Freescale Semiconductor, Inc.
  * All rights reserved.
  *
  * Author: Andy Fleming <afleming@freescale.com>

diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 848a3e9..7328b8d 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c

@@ -249,7 +249,7 @@
 		goto exit;
 	}
 
-	iprop = of_get_property(law_node, "fsl,num-laws", 0);
+	iprop = of_get_property(law_node, "fsl,num-laws", NULL);
 	if (!iprop) {
 		pr_err("p1022ds: LAW node is missing fsl,num-laws property\n");
 		goto exit;
@@ -539,7 +539,7 @@
 				};
 
 				/*
-				 * prom_update_property() is called before
+				 * of_update_property() is called before
 				 * kmalloc() is available, so the 'new' object
 				 * should be allocated in the global area.
 				 * The easiest way is to do that is to
@@ -548,7 +548,7 @@
 				 */
 				pr_info("p1022ds: disabling %s node",
 					np2->full_name);
-				prom_update_property(np2, &nor_status);
+				of_update_property(np2, &nor_status);
 				of_node_put(np2);
 			}
 
@@ -564,7 +564,7 @@
 
 				pr_info("p1022ds: disabling %s node",
 					np2->full_name);
-				prom_update_property(np2, &nand_status);
+				of_update_property(np2, &nand_status);
 				of_node_put(np2);
 			}
 

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 6fcfa12..148c2f2 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c

@@ -128,6 +128,19 @@
 }
 #endif
 
+static inline void flush_spin_table(void *spin_table)
+{
+	flush_dcache_range((ulong)spin_table,
+		(ulong)spin_table + sizeof(struct epapr_spin_table));
+}
+
+static inline u32 read_spin_table_addr_l(void *spin_table)
+{
+	flush_dcache_range((ulong)spin_table,
+		(ulong)spin_table + sizeof(struct epapr_spin_table));
+	return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
+}
+
 static int __cpuinit smp_85xx_kick_cpu(int nr)
 {
 	unsigned long flags;
@@ -161,8 +174,8 @@
 
 	/* Map the spin table */
 	if (ioremappable)
-		spin_table = ioremap(*cpu_rel_addr,
-				sizeof(struct epapr_spin_table));
+		spin_table = ioremap_prot(*cpu_rel_addr,
+			sizeof(struct epapr_spin_table), _PAGE_COHERENT);
 	else
 		spin_table = phys_to_virt(*cpu_rel_addr);
 
@@ -173,7 +186,16 @@
 	generic_set_cpu_up(nr);
 
 	if (system_state == SYSTEM_RUNNING) {
+		/*
+		 * To keep it compatible with old boot program which uses
+		 * cache-inhibit spin table, we need to flush the cache
+		 * before accessing spin table to invalidate any staled data.
+		 * We also need to flush the cache after writing to spin
+		 * table to push data out.
+		 */
+		flush_spin_table(spin_table);
 		out_be32(&spin_table->addr_l, 0);
+		flush_spin_table(spin_table);
 
 		/*
 		 * We don't set the BPTR register here since it already points
@@ -181,9 +203,14 @@
 		 */
 		mpic_reset_core(hw_cpu);
 
-		/* wait until core is ready... */
-		if (!spin_event_timeout(in_be32(&spin_table->addr_l) == 1,
-						10000, 100)) {
+		/*
+		 * wait until core is ready...
+		 * We need to invalidate the stale data, in case the boot
+		 * loader uses a cache-inhibited spin table.
+		 */
+		if (!spin_event_timeout(
+				read_spin_table_addr_l(spin_table) == 1,
+				10000, 100)) {
 			pr_err("%s: timeout waiting for core %d to reset\n",
 							__func__, hw_cpu);
 			ret = -ENOENT;
@@ -194,12 +221,10 @@
 		__secondary_hold_acknowledge = -1;
 	}
 #endif
+	flush_spin_table(spin_table);
 	out_be32(&spin_table->pir, hw_cpu);
 	out_be32(&spin_table->addr_l, __pa(__early_start));
-
-	if (!ioremappable)
-		flush_dcache_range((ulong)spin_table,
-			(ulong)spin_table + sizeof(struct epapr_spin_table));
+	flush_spin_table(spin_table);
 
 	/* Wait a bit for the CPU to ack. */
 	if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu,
@@ -213,13 +238,11 @@
 #else
 	smp_generic_kick_cpu(nr);
 
+	flush_spin_table(spin_table);
 	out_be32(&spin_table->pir, hw_cpu);
 	out_be64((u64 *)(&spin_table->addr_h),
 	  __pa((u64)*((unsigned long long *)generic_secondary_smp_init)));
-
-	if (!ioremappable)
-		flush_dcache_range((ulong)spin_table,
-			(ulong)spin_table + sizeof(struct epapr_spin_table));
+	flush_spin_table(spin_table);
 #endif
 
 	local_irq_restore(flags);

diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index a817398..04d9d31 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c

@@ -353,5 +353,7 @@
 	.time_init		= mpc86xx_time_init,
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
 };

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 965d381..25db92a 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c

@@ -1094,7 +1094,7 @@
 		LOAD_INT(c), LOAD_FRAC(c),
 		count_active_contexts(),
 		atomic_read(&nr_spu_contexts),
-		current->nsproxy->pid_ns->last_pid);
+		task_active_pid_ns(current)->last_pid);
 	return 0;
 }
 

diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 5b7d8ff..baee994 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c

@@ -66,7 +66,7 @@
 	struct dentry *dentry;
 	int ret;
 
-	dentry = user_path_create(AT_FDCWD, pathname, &path, 1);
+	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
 	ret = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		ret = spufs_create(&path, dentry, flags, mode, neighbor);

diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index 6417119..311b804 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c

@@ -55,6 +55,7 @@
 static unsigned int hi_freq;
 static unsigned int cur_freq;
 static unsigned int sleep_freq;
+static unsigned long transition_latency;
 
 /*
  * Different models uses different mechanisms to switch the frequency
@@ -403,7 +404,7 @@
 	if (policy->cpu != 0)
 		return -ENODEV;
 
-	policy->cpuinfo.transition_latency	= CPUFREQ_ETERNAL;
+	policy->cpuinfo.transition_latency	= transition_latency;
 	policy->cur = cur_freq;
 
 	cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu);
@@ -658,12 +659,14 @@
 	if (!value)
 		goto out;
 	cur_freq = (*value) / 1000;
+	transition_latency = CPUFREQ_ETERNAL;
 
 	/*  Check for 7447A based MacRISC3 */
 	if (of_machine_is_compatible("MacRISC3") &&
 	    of_get_property(cpunode, "dynamic-power-step", NULL) &&
 	    PVR_VER(mfspr(SPRN_PVR)) == 0x8003) {
 		pmac_cpufreq_init_7447A(cpunode);
+		transition_latency = 8000000;
 	/* Check for other MacRISC3 machines */
 	} else if (of_machine_is_compatible("PowerBook3,4") ||
 		   of_machine_is_compatible("PowerBook3,5") ||

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 471aa3c..53d052e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c

@@ -34,24 +34,12 @@
 #include "powernv.h"
 #include "pci.h"
 
-static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
-		       struct va_format *vaf)
-{
-	char pfix[32];
-
-	if (pe->pdev)
-		strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
-	else
-		sprintf(pfix, "%04x:%02x     ",
-			pci_domain_nr(pe->pbus), pe->pbus->number);
-	return printk("pci %s%s: [PE# %.3d] %pV", level, pfix, pe->pe_number, vaf);
-}
-
 #define define_pe_printk_level(func, kern_level)		\
 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...)	\
 {								\
 	struct va_format vaf;					\
 	va_list args;						\
+	char pfix[32];						\
 	int r;							\
 								\
 	va_start(args, fmt);					\
@@ -59,7 +47,16 @@
 	vaf.fmt = fmt;						\
 	vaf.va = &args;						\
 								\
-	r = __pe_printk(kern_level, pe, &vaf);			\
+	if (pe->pdev)						\
+		strlcpy(pfix, dev_name(&pe->pdev->dev),		\
+			sizeof(pfix));				\
+	else							\
+		sprintf(pfix, "%04x:%02x     ",			\
+			pci_domain_nr(pe->pbus),		\
+			pe->pbus->number);			\
+	r = printk(kern_level "pci %s: [PE# %.3d] %pV",		\
+		   pfix, pe->pe_number, &vaf);			\
+								\
 	va_end(args);						\
 								\
 	return r;						\

diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index 56d26bc..0978713 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c

@@ -280,13 +280,13 @@
 
 	if (tmp) {
 		pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name);
-		prom_remove_property(node, tmp);
+		of_remove_property(node, tmp);
 	}
 
-	result = prom_add_property(node, prop);
+	result = of_add_property(node, prop);
 
 	if (result)
-		pr_debug("%s:%d prom_set_property failed\n", __func__,
+		pr_debug("%s:%d of_set_property failed\n", __func__,
 			__LINE__);
 }
 

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 0f1b706..a1a7b9a 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c

@@ -13,17 +13,16 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/notifier.h>
-#include <linux/proc_fs.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 #include "offline_states.h"
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/uaccess.h>
 #include <asm/rtas.h>
-#include <asm/pSeries_reconfig.h>
 
 struct cc_workarea {
 	u32	drc_index;
@@ -255,9 +254,6 @@
 
 int dlpar_attach_node(struct device_node *dn)
 {
-#ifdef CONFIG_PROC_DEVICETREE
-	struct proc_dir_entry *ent;
-#endif
 	int rc;
 
 	of_node_set_flag(dn, OF_DYNAMIC);
@@ -266,44 +262,26 @@
 	if (!dn->parent)
 		return -ENOMEM;
 
-	rc = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, dn);
+	rc = of_attach_node(dn);
 	if (rc) {
 		printk(KERN_ERR "Failed to add device node %s\n",
 		       dn->full_name);
 		return rc;
 	}
 
-	of_attach_node(dn);
-
-#ifdef CONFIG_PROC_DEVICETREE
-	ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
-	if (ent)
-		proc_device_tree_add_node(dn, ent);
-#endif
-
 	of_node_put(dn->parent);
 	return 0;
 }
 
 int dlpar_detach_node(struct device_node *dn)
 {
-#ifdef CONFIG_PROC_DEVICETREE
-	struct device_node *parent = dn->parent;
-	struct property *prop = dn->properties;
+	int rc;
 
-	while (prop) {
-		remove_proc_entry(prop->name, dn->pde);
-		prop = prop->next;
-	}
+	rc = of_detach_node(dn);
+	if (rc)
+		return rc;
 
-	if (dn->pde)
-		remove_proc_entry(dn->pde->name, parent->pde);
-#endif
-
-	pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, dn);
-	of_detach_node(dn);
 	of_node_put(dn); /* Must decrement the refcount */
-
 	return 0;
 }
 

diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 0b0eff0..7b56118 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c

@@ -56,6 +56,7 @@
 	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
 	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
 	{FW_FEATURE_VPHN,		"hcall-vphn"},
+	{FW_FEATURE_SET_MODE,		"hcall-set-mode"},
 };
 
 /* Build up the firmware features bitmask using the contents of

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 64c97d8..a389562 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c

@@ -23,12 +23,12 @@
 #include <linux/delay.h>
 #include <linux/sched.h>	/* for idle_task_exit */
 #include <linux/cpu.h>
+#include <linux/of.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/firmware.h>
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/xics.h>
 #include "plpar_wrappers.h"
 #include "offline_states.h"
@@ -333,10 +333,10 @@
 	int err = 0;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = pseries_add_processor(node);
 		break;
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		pseries_remove_processor(node);
 		break;
 	}
@@ -399,7 +399,7 @@
 
 	/* Processors can be added/removed only on LPAR */
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
-		pSeries_reconfig_notifier_register(&pseries_smp_nb);
+		of_reconfig_notifier_register(&pseries_smp_nb);
 		cpu_maps_update_begin();
 		if (cede_offline_enabled && parse_cede_parameters() == 0) {
 			default_offline_state = CPU_STATE_INACTIVE;

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ecdb0a6..2372c60 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c

@@ -16,7 +16,6 @@
 
 #include <asm/firmware.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/sparsemem.h>
 
 static unsigned long get_memblock_size(void)
@@ -187,42 +186,69 @@
 	return (ret < 0) ? -EINVAL : 0;
 }
 
-static int pseries_drconf_memory(unsigned long *base, unsigned int action)
+static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
 {
+	struct of_drconf_cell *new_drmem, *old_drmem;
 	unsigned long memblock_size;
-	int rc;
+	u32 entries;
+	u32 *p;
+	int i, rc = -EINVAL;
 
 	memblock_size = get_memblock_size();
 	if (!memblock_size)
 		return -EINVAL;
 
-	if (action == PSERIES_DRCONF_MEM_ADD) {
-		rc = memblock_add(*base, memblock_size);
-		rc = (rc < 0) ? -EINVAL : 0;
-	} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
-		rc = pseries_remove_memblock(*base, memblock_size);
-	} else {
-		rc = -EINVAL;
+	p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
+	if (!p)
+		return -EINVAL;
+
+	/* The first int of the property is the number of lmb's described
+	 * by the property. This is followed by an array of of_drconf_cell
+	 * entries. Get the niumber of entries and skip to the array of
+	 * of_drconf_cell's.
+	 */
+	entries = *p++;
+	old_drmem = (struct of_drconf_cell *)p;
+
+	p = (u32 *)pr->prop->value;
+	p++;
+	new_drmem = (struct of_drconf_cell *)p;
+
+	for (i = 0; i < entries; i++) {
+		if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) &&
+		    (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) {
+			rc = pseries_remove_memblock(old_drmem[i].base_addr,
+						     memblock_size);
+			break;
+		} else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) &&
+			   (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) {
+			rc = memblock_add(old_drmem[i].base_addr,
+					  memblock_size);
+			rc = (rc < 0) ? -EINVAL : 0;
+			break;
+		}
 	}
 
 	return rc;
 }
 
 static int pseries_memory_notifier(struct notifier_block *nb,
-				unsigned long action, void *node)
+				   unsigned long action, void *node)
 {
+	struct of_prop_reconfig *pr;
 	int err = 0;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = pseries_add_memory(node);
 		break;
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		err = pseries_remove_memory(node);
 		break;
-	case PSERIES_DRCONF_MEM_ADD:
-	case PSERIES_DRCONF_MEM_REMOVE:
-		err = pseries_drconf_memory(node, action);
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr = (struct of_prop_reconfig *)node;
+		if (!strcmp(pr->prop->name, "ibm,dynamic-memory"))
+			err = pseries_update_drconf_memory(pr);
 		break;
 	}
 	return notifier_from_errno(err);
@@ -235,7 +261,7 @@
 static int __init pseries_memory_hotplug_init(void)
 {
 	if (firmware_has_feature(FW_FEATURE_LPAR))
-		pSeries_reconfig_notifier_register(&pseries_mem_nb);
+		of_reconfig_notifier_register(&pseries_mem_nb);
 
 	return 0;
 }

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6153eea..e2685ba 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c

@@ -36,13 +36,13 @@
 #include <linux/dma-mapping.h>
 #include <linux/crash_dump.h>
 #include <linux/memory.h>
+#include <linux/of.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/iommu.h>
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/firmware.h>
 #include <asm/tce.h>
 #include <asm/ppc-pci.h>
@@ -760,7 +760,7 @@
 	__remove_ddw(np, ddw_avail, liobn);
 
 delprop:
-	ret = prom_remove_property(np, win64);
+	ret = of_remove_property(np, win64);
 	if (ret)
 		pr_warning("%s: failed to remove direct window property: %d\n",
 			np->full_name, ret);
@@ -1070,7 +1070,7 @@
 		goto out_free_window;
 	}
 
-	ret = prom_add_property(pdn, win64);
+	ret = of_add_property(pdn, win64);
 	if (ret) {
 		dev_err(&dev->dev, "unable to add dma window property for %s: %d",
 			 pdn->full_name, ret);
@@ -1294,7 +1294,7 @@
 	struct direct_window *window;
 
 	switch (action) {
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		if (pci && pci->iommu_table)
 			iommu_free_table(pci->iommu_table, np->full_name);
 
@@ -1357,7 +1357,7 @@
 	}
 
 
-	pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
+	of_reconfig_notifier_register(&iommu_reconfig_nb);
 	register_memory_notifier(&iommu_mem_nb);
 
 	set_pci_dma_ops(&dma_iommu_ops);

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index dd30b12..6573808 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c

@@ -116,7 +116,7 @@
 	}
 
 	if (!more) {
-		prom_update_property(dn, new_prop);
+		of_update_property(dn, new_prop);
 		new_prop = NULL;
 	}
 
@@ -172,7 +172,7 @@
 
 			case 0x80000000:
 				prop = of_find_property(dn, prop_name, NULL);
-				prom_remove_property(dn, prop);
+				of_remove_property(dn, prop);
 				prop = NULL;
 				break;
 

diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index 13e8cc4..e6cc34a 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h

@@ -273,4 +273,35 @@
 			lbuf[1]);
 }
 
+/* Set various resource mode parameters */
+static inline long plpar_set_mode(unsigned long mflags, unsigned long resource,
+		unsigned long value1, unsigned long value2)
+{
+	return plpar_hcall_norets(H_SET_MODE, mflags, resource, value1, value2);
+}
+
+/*
+ * Enable relocation on exceptions on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_reloc_on_exceptions(void)
+{
+	/* mflags = 3: Exceptions at 0xC000000000004000 */
+	return plpar_set_mode(3, 3, 0, 0);
+}
+
+/*
+ * Disable relocation on exceptions on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long disable_reloc_on_exceptions(void) {
+	return plpar_set_mode(0, 3, 0, 0);
+}
+
 #endif /* _PSERIES_PLPAR_WRAPPERS_H */

diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 2f46681..d6491bd 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c

@@ -16,55 +16,13 @@
 #include <linux/notifier.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/uaccess.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/mmu.h>
 
-
-
-/*
- * Routines for "runtime" addition and removal of device tree nodes.
- */
-#ifdef CONFIG_PROC_DEVICETREE
-/*
- * Add a node to /proc/device-tree.
- */
-static void add_node_proc_entries(struct device_node *np)
-{
-	struct proc_dir_entry *ent;
-
-	ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde);
-	if (ent)
-		proc_device_tree_add_node(np, ent);
-}
-
-static void remove_node_proc_entries(struct device_node *np)
-{
-	struct property *pp = np->properties;
-	struct device_node *parent = np->parent;
-
-	while (pp) {
-		remove_proc_entry(pp->name, np->pde);
-		pp = pp->next;
-	}
-	if (np->pde)
-		remove_proc_entry(np->pde->name, parent->pde);
-}
-#else /* !CONFIG_PROC_DEVICETREE */
-static void add_node_proc_entries(struct device_node *np)
-{
-	return;
-}
-
-static void remove_node_proc_entries(struct device_node *np)
-{
-	return;
-}
-#endif /* CONFIG_PROC_DEVICETREE */
-
 /**
  *	derive_parent - basically like dirname(1)
  *	@path:  the full_name of a node to be added to the tree
@@ -97,28 +55,6 @@
 	return parent;
 }
 
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
-
-int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_register);
-
-void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
-{
-	blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_unregister);
-
-int pSeries_reconfig_notify(unsigned long action, void *p)
-{
-	int err = blocking_notifier_call_chain(&pSeries_reconfig_chain,
-						action, p);
-
-	return notifier_to_errno(err);
-}
-
 static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
 {
 	struct device_node *np;
@@ -142,16 +78,12 @@
 		goto out_err;
 	}
 
-	err = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, np);
+	err = of_attach_node(np);
 	if (err) {
 		printk(KERN_ERR "Failed to add device node %s\n", path);
 		goto out_err;
 	}
 
-	of_attach_node(np);
-
-	add_node_proc_entries(np);
-
 	of_node_put(np->parent);
 
 	return 0;
@@ -179,11 +111,7 @@
 		return -EBUSY;
 	}
 
-	remove_node_proc_entries(np);
-
-	pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, np);
 	of_detach_node(np);
-
 	of_node_put(parent);
 	of_node_put(np); /* Must decrement the refcount */
 	return 0;
@@ -397,7 +325,7 @@
 	if (!prop)
 		return -ENOMEM;
 
-	prom_add_property(np, prop);
+	of_add_property(np, prop);
 
 	return 0;
 }
@@ -421,16 +349,15 @@
 
 	prop = of_find_property(np, buf, NULL);
 
-	return prom_remove_property(np, prop);
+	return of_remove_property(np, prop);
 }
 
 static int do_update_property(char *buf, size_t bufsize)
 {
 	struct device_node *np;
-	struct pSeries_reconfig_prop_update upd_value;
 	unsigned char *value;
 	char *name, *end, *next_prop;
-	int rc, length;
+	int length;
 	struct property *newprop;
 	buf = parse_node(buf, bufsize, &np);
 	end = buf + bufsize;
@@ -452,41 +379,7 @@
 	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
 		slb_set_size(*(int *)value);
 
-	upd_value.node = np;
-	upd_value.property = newprop;
-	pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value);
-
-	rc = prom_update_property(np, newprop);
-	if (rc)
-		return rc;
-
-	/* For memory under the ibm,dynamic-reconfiguration-memory node
-	 * of the device tree, adding and removing memory is just an update
-	 * to the ibm,dynamic-memory property instead of adding/removing a
-	 * memory node in the device tree.  For these cases we still need to
-	 * involve the notifier chain.
-	 */
-	if (!strcmp(name, "ibm,dynamic-memory")) {
-		int action;
-
-		next_prop = parse_next_property(next_prop, end, &name,
-						&length, &value);
-		if (!next_prop)
-			return -EINVAL;
-
-		if (!strcmp(name, "add"))
-			action = PSERIES_DRCONF_MEM_ADD;
-		else
-			action = PSERIES_DRCONF_MEM_REMOVE;
-
-		rc = pSeries_reconfig_notify(action, value);
-		if (rc) {
-			prom_update_property(np, newprop);
-			return rc;
-		}
-	}
-
-	return 0;
+	return of_update_property(np, newprop);
 }
 
 /**

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e3cb7ae..ca55882 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c

@@ -40,6 +40,8 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/cpuidle.h>
+#include <linux/of.h>
+#include <linux/kexec.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -63,7 +65,6 @@
 #include <asm/smp.h>
 #include <asm/firmware.h>
 #include <asm/eeh.h>
-#include <asm/pSeries_reconfig.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
@@ -258,7 +259,7 @@
 	int err = NOTIFY_OK;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		pci = np->parent->data;
 		if (pci) {
 			update_dn_pci_info(np, pci->phb);
@@ -367,6 +368,65 @@
 	}
 }
 
+/*
+ * Enable relocation on during exceptions. This has partition wide scope and
+ * may take a while to complete, if it takes longer than one second we will
+ * just give up rather than wasting any more time on this - if that turns out
+ * to ever be a problem in practice we can move this into a kernel thread to
+ * finish off the process later in boot.
+ */
+static int __init pSeries_enable_reloc_on_exc(void)
+{
+	long rc;
+	unsigned int delay, total_delay = 0;
+
+	while (1) {
+		rc = enable_reloc_on_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+
+		delay = get_longbusy_msecs(rc);
+		total_delay += delay;
+		if (total_delay > 1000) {
+			pr_warn("Warning: Giving up waiting to enable "
+				"relocation on exceptions (%u msec)!\n",
+				total_delay);
+			return rc;
+		}
+
+		mdelay(delay);
+	}
+}
+
+#ifdef CONFIG_KEXEC
+static long pSeries_disable_reloc_on_exc(void)
+{
+	long rc;
+
+	while (1) {
+		rc = disable_reloc_on_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+
+static void pSeries_machine_kexec(struct kimage *image)
+{
+	long rc;
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE) &&
+	    (image->type != KEXEC_TYPE_CRASH)) {
+		rc = pSeries_disable_reloc_on_exc();
+		if (rc != H_SUCCESS)
+			pr_warning("Warning: Failed to disable relocation on "
+				   "exceptions: %ld\n", rc);
+	}
+
+	default_machine_kexec(image);
+}
+#endif
+
 static void __init pSeries_setup_arch(void)
 {
 	panic_timeout = 10;
@@ -389,7 +449,7 @@
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
 	find_and_init_phbs();
-	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
 
 	pSeries_nvram_init();
 
@@ -402,6 +462,14 @@
 		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
 	else
 		ppc_md.enable_pmcs = power4_enable_pmcs;
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		long rc;
+		if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) {
+			pr_warn("Unable to enable relocation on exceptions: "
+				"%ld\n", rc);
+		}
+	}
 }
 
 static int __init pSeries_init_panel(void)
@@ -659,4 +727,7 @@
 	.progress		= rtas_progress,
 	.system_reset_exception = pSeries_system_reset_exception,
 	.machine_check_exception = pSeries_machine_check_exception,
+#ifdef CONFIG_KEXEC
+	.machine_kexec          = pSeries_machine_kexec,
+#endif
 };

diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 71706bc..9fc0a494 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c

@@ -38,7 +38,6 @@
 #include <asm/cputable.h>
 #include <asm/firmware.h>
 #include <asm/rtas.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/mpic.h>
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>

diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
index 02cf1e7..0eb871c 100644
--- a/arch/powerpc/sysdev/fsl_gtm.c
+++ b/arch/powerpc/sysdev/fsl_gtm.c

@@ -1,7 +1,7 @@
 /*
  * Freescale General-purpose Timers Module
  *
- * Copyright (c) Freescale Semicondutor, Inc. 2006.
+ * Copyright (c) Freescale Semiconductor, Inc. 2006.
  *               Shlomi Gridish <gridish@freescale.com>
  *               Jerry Huang <Chang-Ming.Huang@freescale.com>
  * Copyright (c) MontaVista Software, Inc. 2008.

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 01b62a6..5ba325b 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c

@@ -89,7 +89,7 @@
 	return 0;
 }
 
-static int __init setup_one_atmu(struct ccsr_pci __iomem *pci,
+static int setup_one_atmu(struct ccsr_pci __iomem *pci,
 	unsigned int index, const struct resource *res,
 	resource_size_t offset)
 {
@@ -126,7 +126,7 @@
 }
 
 /* atmu setup for fsl pci/pcie controller */
-static void __init setup_pci_atmu(struct pci_controller *hose,
+static void setup_pci_atmu(struct pci_controller *hose,
 				  struct resource *rsrc)
 {
 	struct ccsr_pci __iomem *pci;
@@ -902,9 +902,42 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int fsl_pci_resume(struct device *dev)
+{
+	struct pci_controller *hose;
+	struct resource pci_rsrc;
+
+	hose = pci_find_hose_for_OF_device(dev->of_node);
+	if (!hose)
+		return -ENODEV;
+
+	if (of_address_to_resource(dev->of_node, 0, &pci_rsrc)) {
+		dev_err(dev, "Get pci register base failed.");
+		return -ENODEV;
+	}
+
+	setup_pci_atmu(hose, &pci_rsrc);
+
+	return 0;
+}
+
+static const struct dev_pm_ops pci_pm_ops = {
+	.resume = fsl_pci_resume,
+};
+
+#define PCI_PM_OPS (&pci_pm_ops)
+
+#else
+
+#define PCI_PM_OPS NULL
+
+#endif
+
 static struct platform_driver fsl_pci_driver = {
 	.driver = {
 		.name = "fsl-pci",
+		.pm = PCI_PM_OPS,
 		.of_match_table = pci_ids,
 	},
 	.probe = fsl_pci_probe,

diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
index 8f04654..5aaf86c 100644
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c

@@ -214,18 +214,7 @@
 		.of_match_table = pmi_match,
 	},
 };
-
-static int __init pmi_module_init(void)
-{
-	return platform_driver_register(&pmi_of_platform_driver);
-}
-module_init(pmi_module_init);
-
-static void __exit pmi_module_exit(void)
-{
-	platform_driver_unregister(&pmi_of_platform_driver);
-}
-module_exit(pmi_module_exit);
+module_platform_driver(pmi_of_platform_driver);
 
 int pmi_send_message(pmi_message_t msg)
 {

diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index b043675..238a07b 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2010 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 2fba6ef2..b2b87c30 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c

@@ -1,7 +1,7 @@
 /*
  * arch/powerpc/sysdev/qe_lib/qe_ic.c
  *
- * Copyright (C) 2006 Freescale Semicondutor, Inc.  All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc.  All rights reserved.
  *
  * Author: Li Yang <leoli@freescale.com>
  * Based on code from Shlomi Gridish <gridish@freescale.com>

diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.h b/arch/powerpc/sysdev/qe_lib/qe_ic.h
index c327872..efef7ab 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.h
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.h

@@ -3,7 +3,7 @@
  *
  * QUICC ENGINE Interrupt Controller Header
  *
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Li Yang <leoli@freescale.com>
  * Based on code from Shlomi Gridish <gridish@freescale.com>

diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c
index fd1a6c3..a88807b 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_io.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_io.c

@@ -3,7 +3,7 @@
  *
  * QE Parallel I/O ports configuration routines
  *
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Li Yang <LeoLi@freescale.com>
  * Based on code from Shlomi Gridish <gridish@freescale.com>

diff --git a/arch/powerpc/sysdev/qe_lib/ucc.c b/arch/powerpc/sysdev/qe_lib/ucc.c
index 0467750..134b07d 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc.c

@@ -3,7 +3,7 @@
  *
  * QE UCC API Set - UCC specific routines implementations.
  *
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/sysdev/qe_lib/ucc_fast.c b/arch/powerpc/sysdev/qe_lib/ucc_fast.c
index fba0244..cceb2e3 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc_fast.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc_fast.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
index 524c0ea..1c062f4 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Authors: 	Shlomi Gridish <gridish@freescale.com>
  * 		Li Yang <leoli@freescale.com>

diff --git a/arch/powerpc/sysdev/qe_lib/usb.c b/arch/powerpc/sysdev/qe_lib/usb.c
index 9162828..27f23bd 100644
--- a/arch/powerpc/sysdev/qe_lib/usb.c
+++ b/arch/powerpc/sysdev/qe_lib/usb.c

@@ -1,7 +1,7 @@
 /*
  * QE USB routines
  *
- * Copyright (c) Freescale Semicondutor, Inc. 2006.
+ * Copyright 2006 Freescale Semiconductor, Inc.
  *               Shlomi Gridish <gridish@freescale.com>
  *               Jerry Huang <Chang-Ming.Huang@freescale.com>
  * Copyright (c) MontaVista Software, Inc. 2008.

diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index c168c54e..b49fdbd 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile

@@ -6,7 +6,7 @@
 
 ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
 
-obj-y			+= xmon.o start.o nonstdio.o
+obj-y			+= xmon.o nonstdio.o
 
 ifdef CONFIG_XMON_DISASSEMBLY
 obj-y			+= ppc-dis.o ppc-opc.o

diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
index bfac84f..bce3dcf 100644
--- a/arch/powerpc/xmon/nonstdio.c
+++ b/arch/powerpc/xmon/nonstdio.c

@@ -7,9 +7,23 @@
  *      2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
+#include <asm/udbg.h>
 #include <asm/time.h>
 #include "nonstdio.h"
 
+
+static int xmon_write(const void *ptr, int nb)
+{
+	return udbg_write(ptr, nb);
+}
+
+static int xmon_readchar(void)
+{
+	if (udbg_getc)
+		return udbg_getc();
+	return -1;
+}
+
 int xmon_putchar(int c)
 {
 	char ch = c;
@@ -23,34 +37,7 @@
 static char *lineptr;
 static int lineleft;
 
-int xmon_expect(const char *str, unsigned long timeout)
-{
-	int c;
-	unsigned long t0;
-
-	/* assume 25MHz default timebase if tb_ticks_per_sec not set yet */
-	timeout *= tb_ticks_per_sec? tb_ticks_per_sec: 25000000;
-	t0 = get_tbl();
-	do {
-		lineptr = line;
-		for (;;) {
-			c = xmon_read_poll();
-			if (c == -1) {
-				if (get_tbl() - t0 > timeout)
-					return 0;
-				continue;
-			}
-			if (c == '\n')
-				break;
-			if (c != '\r' && lineptr < &line[sizeof(line) - 1])
-				*lineptr++ = c;
-		}
-		*lineptr = 0;
-	} while (strstr(line, str) == NULL);
-	return 1;
-}
-
-int xmon_getchar(void)
+static int xmon_getchar(void)
 {
 	int c;
 
@@ -124,13 +111,19 @@
 void xmon_printf(const char *format, ...)
 {
 	va_list args;
-	int n;
 	static char xmon_outbuf[1024];
+	int rc, n;
 
 	va_start(args, format);
 	n = vsnprintf(xmon_outbuf, sizeof(xmon_outbuf), format, args);
 	va_end(args);
-	xmon_write(xmon_outbuf, n);
+
+	rc = xmon_write(xmon_outbuf, n);
+
+	if (n && rc == 0) {
+		/* No udbg hooks, fallback to printk() - dangerous */
+		printk(xmon_outbuf);
+	}
 }
 
 void xmon_puts(const char *str)

diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h
index 23dd95f..18a51de 100644
--- a/arch/powerpc/xmon/nonstdio.h
+++ b/arch/powerpc/xmon/nonstdio.h

@@ -4,12 +4,6 @@
 #define putchar	xmon_putchar
 
 extern int xmon_putchar(int c);
-extern int xmon_getchar(void);
 extern void xmon_puts(const char *);
 extern char *xmon_gets(char *, int);
 extern void xmon_printf(const char *, ...);
-extern void xmon_map_scc(void);
-extern int xmon_expect(const char *str, unsigned long timeout);
-extern int xmon_write(const void *ptr, int nb);
-extern int xmon_readchar(void);
-extern int xmon_read_poll(void);

diff --git a/arch/powerpc/xmon/start.c b/arch/powerpc/xmon/start.c
deleted file mode 100644
index 8864de2..0000000
--- a/arch/powerpc/xmon/start.c
+++ /dev/null

@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 1996 Paul Mackerras.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-#include "nonstdio.h"
-
-void xmon_map_scc(void)
-{
-}
-
-int xmon_write(const void *ptr, int nb)
-{
-	return udbg_write(ptr, nb);
-}
-
-int xmon_readchar(void)
-{
-	if (udbg_getc)
-		return udbg_getc();
-	return -1;
-}
-
-int xmon_read_poll(void)
-{
-	if (udbg_getc_poll)
-		return udbg_getc_poll();
-	return -1;
-}

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 3a56a63..1f8d2f1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c

@@ -52,9 +52,6 @@
 #include "nonstdio.h"
 #include "dis-asm.h"
 
-#define scanhex	xmon_scanhex
-#define skipbl	xmon_skipbl
-
 #ifdef CONFIG_SMP
 static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
 static unsigned long xmon_taken = 1;
@@ -169,12 +166,8 @@
 
 #ifdef CONFIG_PPC64
 #define REG		"%.16lx"
-#define REGS_PER_LINE	4
-#define LAST_VOLATILE	13
 #else
 #define REG		"%.8lx"
-#define REGS_PER_LINE	8
-#define LAST_VOLATILE	12
 #endif
 
 #define GETWORD(v)	(((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3])
@@ -1288,27 +1281,19 @@
 	catch_memory_errors = 0;
 }
 
-static int xmon_depth_to_print = 64;
-
 #define LRSAVE_OFFSET		(STACK_FRAME_LR_SAVE * sizeof(unsigned long))
 #define MARKER_OFFSET		(STACK_FRAME_MARKER * sizeof(unsigned long))
 
-#ifdef __powerpc64__
-#define REGS_OFFSET		0x70
-#else
-#define REGS_OFFSET		16
-#endif
-
 static void xmon_show_stack(unsigned long sp, unsigned long lr,
 			    unsigned long pc)
 {
+	int max_to_print = 64;
 	unsigned long ip;
 	unsigned long newsp;
 	unsigned long marker;
-	int count = 0;
 	struct pt_regs regs;
 
-	do {
+	while (max_to_print--) {
 		if (sp < PAGE_OFFSET) {
 			if (sp != 0)
 				printf("SP (%lx) is in userspace\n", sp);
@@ -1362,10 +1347,10 @@
 		   an exception frame. */
 		if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
 		    && marker == STACK_FRAME_REGS_MARKER) {
-			if (mread(sp + REGS_OFFSET, &regs, sizeof(regs))
+			if (mread(sp + STACK_FRAME_OVERHEAD, &regs, sizeof(regs))
 			    != sizeof(regs)) {
 				printf("Couldn't read registers at %lx\n",
-				       sp + REGS_OFFSET);
+				       sp + STACK_FRAME_OVERHEAD);
 				break;
 			}
 			printf("--- Exception: %lx %s at ", regs.trap,
@@ -1379,7 +1364,7 @@
 			break;
 
 		sp = newsp;
-	} while (count++ < xmon_depth_to_print);
+	}
 }
 
 static void backtrace(struct pt_regs *excp)
@@ -2943,7 +2928,6 @@
 		__debugger_dabr_match = NULL;
 		__debugger_fault_handler = NULL;
 	}
-	xmon_map_scc();
 }
 
 #ifdef CONFIG_MAGIC_SYSRQ

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 32425af..b5ea38c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig

@@ -137,8 +137,6 @@
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS2

diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 6d1f357..e606161 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h

@@ -12,15 +12,13 @@
 #include <linux/mod_devicetable.h>
 #include <asm/fcx.h>
 #include <asm/irq.h>
+#include <asm/schid.h>
 
 /* structs from asm/cio.h */
 struct irb;
 struct ccw1;
 struct ccw_dev_id;
 
-/* from asm/schid.h */
-struct subchannel_id;
-
 /* simplified initializers for struct ccw_device:
  * CCW_DEVICE and CCW_DEVICE_DEVTYPE initialize one
  * entry in your MODULE_DEVICE_TABLE and set the match_flag correctly */

diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 18cd6b5..f8c6df6 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h

@@ -7,6 +7,9 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
+#define __SC_DELOUSE(t,v) (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v))
+
 #define PSW32_MASK_PER		0x40000000UL
 #define PSW32_MASK_DAT		0x04000000UL
 #define PSW32_MASK_IO		0x02000000UL

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a6175ad..b1fa93c 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h

@@ -9,6 +9,7 @@
 #include <asm-generic/pci.h>
 #include <asm-generic/pci-dma-compat.h>
 #include <asm/pci_clp.h>
+#include <asm/pci_debug.h>
 
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
@@ -33,6 +34,25 @@
 #define ZPCI_FC_BLOCKED			0x20
 #define ZPCI_FC_DMA_ENABLED		0x10
 
+struct zpci_fmb {
+	u32 format	:  8;
+	u32 dma_valid	:  1;
+	u32		: 23;
+	u32 samples;
+	u64 last_update;
+	/* hardware counters */
+	u64 ld_ops;
+	u64 st_ops;
+	u64 stb_ops;
+	u64 rpcit_ops;
+	u64 dma_rbytes;
+	u64 dma_wbytes;
+	/* software counters */
+	atomic64_t allocated_pages;
+	atomic64_t mapped_pages;
+	atomic64_t unmapped_pages;
+} __packed __aligned(16);
+
 struct msi_map {
 	unsigned long irq;
 	struct msi_desc *msi;
@@ -92,7 +112,15 @@
 	u64		end_dma;	/* End of available DMA addresses */
 	u64		dma_mask;	/* DMA address space mask */
 
+	/* Function measurement block */
+	struct zpci_fmb *fmb;
+	u16		fmb_update;	/* update interval */
+
 	enum pci_bus_speed max_bus_speed;
+
+	struct dentry	*debugfs_dev;
+	struct dentry	*debugfs_perf;
+	struct dentry	*debugfs_debug;
 };
 
 struct pci_hp_callback_ops {
@@ -155,4 +183,15 @@
 extern struct pci_hp_callback_ops hotplug_ops;
 extern unsigned int pci_probe;
 
+/* FMB */
+int zpci_fmb_enable_device(struct zpci_dev *);
+int zpci_fmb_disable_device(struct zpci_dev *);
+
+/* Debug */
+int zpci_debug_init(void);
+void zpci_debug_exit(void);
+void zpci_debug_init_device(struct zpci_dev *);
+void zpci_debug_exit_device(struct zpci_dev *);
+void zpci_debug_info(struct zpci_dev *, struct seq_file *);
+
 #endif

diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
new file mode 100644
index 0000000..6bbec42
--- /dev/null
+++ b/arch/s390/include/asm/pci_debug.h

@@ -0,0 +1,36 @@
+#ifndef _S390_ASM_PCI_DEBUG_H
+#define _S390_ASM_PCI_DEBUG_H
+
+#include <asm/debug.h>
+
+extern debug_info_t *pci_debug_msg_id;
+extern debug_info_t *pci_debug_err_id;
+
+#ifdef CONFIG_PCI_DEBUG
+#define zpci_dbg(fmt, args...)							\
+	do {									\
+		if (pci_debug_msg_id->level >= 2)				\
+			debug_sprintf_event(pci_debug_msg_id, 2, fmt , ## args);\
+	} while (0)
+
+#else /* !CONFIG_PCI_DEBUG */
+#define zpci_dbg(fmt, args...) do { } while (0)
+#endif
+
+#define zpci_err(text...)							\
+	do {									\
+		char debug_buffer[16];						\
+		snprintf(debug_buffer, 16, text);				\
+		debug_text_event(pci_debug_err_id, 0, debug_buffer);		\
+	} while (0)
+
+static inline void zpci_err_hex(void *addr, int len)
+{
+	while (len > 0) {
+		debug_event(pci_debug_err_id, 0, (void *) addr, len);
+		len -= pci_debug_err_id->buf_size;
+		addr += pci_debug_err_id->buf_size;
+	}
+}
+
+#endif

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 086bb8e..6365308 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h

@@ -53,7 +53,6 @@
 #   define __ARCH_WANT_COMPAT_SYS_TIME
 #   define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h
index 8c6a49e..2f43cfb 100644
--- a/arch/s390/include/uapi/asm/signal.h
+++ b/arch/s390/include/uapi/asm/signal.h

@@ -90,12 +90,6 @@
 
 #define SA_RESTORER     0x04000000
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK      1
-#define SS_DISABLE      2
-
 #define MINSIGSTKSZ     2048
 #define SIGSTKSZ        8192
 

diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index ab0827b..f0f426a 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile

@@ -3,4 +3,4 @@
 #
 
 obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_msi.o \
-			   pci_sysfs.o pci_event.o
+			   pci_sysfs.o pci_event.o pci_debug.o

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7ed38e5..8fa416b 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c

@@ -98,6 +98,10 @@
 static int __read_mostly aisb_max;
 
 static struct kmem_cache *zdev_irq_cache;
+static struct kmem_cache *zdev_fmb_cache;
+
+debug_info_t *pci_debug_msg_id;
+debug_info_t *pci_debug_err_id;
 
 static inline int irq_to_msi_nr(unsigned int irq)
 {
@@ -216,6 +220,7 @@
 	u64 base;
 	u64 limit;
 	u64 iota;
+	u64 fmb_addr;
 };
 
 static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
@@ -232,6 +237,7 @@
 	fib->pba = args->base;
 	fib->pal = args->limit;
 	fib->iota = args->iota;
+	fib->fmb_addr = args->fmb_addr;
 
 	rc = mpcifc_instr(req, fib);
 	free_page((unsigned long) fib);
@@ -242,7 +248,7 @@
 int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
 		       u64 base, u64 limit, u64 iota)
 {
-	struct mod_pci_args args = { base, limit, iota };
+	struct mod_pci_args args = { base, limit, iota, 0 };
 
 	WARN_ON_ONCE(iota & 0x3fff);
 	args.iota |= ZPCI_IOTA_RTTO_FLAG;
@@ -252,7 +258,7 @@
 /* Modify PCI: Unregister I/O address translation parameters */
 int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
 {
-	struct mod_pci_args args = { 0, 0, 0 };
+	struct mod_pci_args args = { 0, 0, 0, 0 };
 
 	return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args);
 }
@@ -260,11 +266,46 @@
 /* Modify PCI: Unregister adapter interruptions */
 static int zpci_unregister_airq(struct zpci_dev *zdev)
 {
-	struct mod_pci_args args = { 0, 0, 0 };
+	struct mod_pci_args args = { 0, 0, 0, 0 };
 
 	return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args);
 }
 
+/* Modify PCI: Set PCI function measurement parameters */
+int zpci_fmb_enable_device(struct zpci_dev *zdev)
+{
+	struct mod_pci_args args = { 0, 0, 0, 0 };
+
+	if (zdev->fmb)
+		return -EINVAL;
+
+	zdev->fmb = kmem_cache_alloc(zdev_fmb_cache, GFP_KERNEL);
+	if (!zdev->fmb)
+		return -ENOMEM;
+	memset(zdev->fmb, 0, sizeof(*zdev->fmb));
+	WARN_ON((u64) zdev->fmb & 0xf);
+
+	args.fmb_addr = virt_to_phys(zdev->fmb);
+	return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+}
+
+/* Modify PCI: Disable PCI function measurement */
+int zpci_fmb_disable_device(struct zpci_dev *zdev)
+{
+	struct mod_pci_args args = { 0, 0, 0, 0 };
+	int rc;
+
+	if (!zdev->fmb)
+		return -EINVAL;
+
+	/* Function measurement is disabled if fmb address is zero */
+	rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+
+	kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+	zdev->fmb = NULL;
+	return rc;
+}
+
 #define ZPCI_PCIAS_CFGSPC	15
 
 static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
@@ -633,6 +674,7 @@
 	dev_info(&pdev->dev, "Removing device %u\n", zdev->domain);
 	zdev->state = ZPCI_FN_STATE_CONFIGURED;
 	zpci_dma_exit_device(zdev);
+	zpci_fmb_disable_device(zdev);
 	zpci_sysfs_remove_device(&pdev->dev);
 	zpci_unmap_resources(pdev);
 	list_del(&zdev->entry);		/* can be called from init */
@@ -799,6 +841,16 @@
 	kfree(bucket);
 }
 
+void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m)
+{
+	if (!zdev)
+		return;
+
+	seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries));
+	seq_printf(m, "aibv[0]:%016lx  aibv[1]:%016lx  aisb:%016lx\n",
+		   get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb);
+}
+
 static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
 						unsigned long flags, int domain)
 {
@@ -994,6 +1046,8 @@
 		goto out;
 	}
 
+	zpci_debug_init_device(zdev);
+	zpci_fmb_enable_device(zdev);
 	zpci_map_resources(zdev);
 	pci_bus_add_devices(zdev->bus);
 
@@ -1020,6 +1074,11 @@
 	if (!zdev_irq_cache)
 		goto error_zdev;
 
+	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
+				16, 0, NULL);
+	if (!zdev_fmb_cache)
+		goto error_fmb;
+
 	/* TODO: use realloc */
 	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
 				   GFP_KERNEL);
@@ -1028,6 +1087,8 @@
 	return 0;
 
 error_iomap:
+	kmem_cache_destroy(zdev_fmb_cache);
+error_fmb:
 	kmem_cache_destroy(zdev_irq_cache);
 error_zdev:
 	return -ENOMEM;
@@ -1037,6 +1098,7 @@
 {
 	kfree(zpci_iomap_start);
 	kmem_cache_destroy(zdev_irq_cache);
+	kmem_cache_destroy(zdev_fmb_cache);
 }
 
 unsigned int pci_probe = 1;
@@ -1066,6 +1128,10 @@
 		test_facility(69), test_facility(70),
 		test_facility(71));
 
+	rc = zpci_debug_init();
+	if (rc)
+		return rc;
+
 	rc = zpci_mem_init();
 	if (rc)
 		goto out_mem;
@@ -1098,6 +1164,7 @@
 out_hash:
 	zpci_mem_exit();
 out_mem:
+	zpci_debug_exit();
 	return rc;
 }
 subsys_initcall(pci_base_init);

diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 7f4ce8d..2c847143 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c

@@ -51,6 +51,7 @@
 	zdev->tlb_refresh = response->refresh;
 	zdev->dma_mask = response->dasm;
 	zdev->msi_addr = response->msia;
+	zdev->fmb_update = response->mui;
 
 	pr_debug("Supported number of MSI vectors: %u\n", response->noi);
 	switch (response->version) {

diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
new file mode 100644
index 0000000..a303c95
--- /dev/null
+++ b/arch/s390/pci/pci_debug.c

@@ -0,0 +1,193 @@
+/*
+ *  Copyright IBM Corp. 2012
+ *
+ *  Author(s):
+ *    Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <asm/debug.h>
+
+#include <asm/pci_dma.h>
+
+static struct dentry *debugfs_root;
+
+static char *pci_perf_names[] = {
+	/* hardware counters */
+	"Load operations",
+	"Store operations",
+	"Store block operations",
+	"Refresh operations",
+	"DMA read bytes",
+	"DMA write bytes",
+	/* software counters */
+	"Allocated pages",
+	"Mapped pages",
+	"Unmapped pages",
+};
+
+static int pci_perf_show(struct seq_file *m, void *v)
+{
+	struct zpci_dev *zdev = m->private;
+	u64 *stat;
+	int i;
+
+	if (!zdev)
+		return 0;
+	if (!zdev->fmb)
+		return seq_printf(m, "FMB statistics disabled\n");
+
+	/* header */
+	seq_printf(m, "FMB @ %p\n", zdev->fmb);
+	seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update);
+	seq_printf(m, "Samples: %u\n", zdev->fmb->samples);
+	seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update);
+
+	/* hardware counters */
+	stat = (u64 *) &zdev->fmb->ld_ops;
+	for (i = 0; i < 4; i++)
+		seq_printf(m, "%26s:\t%llu\n",
+			   pci_perf_names[i], *(stat + i));
+	if (zdev->fmb->dma_valid)
+		for (i = 4; i < 6; i++)
+			seq_printf(m, "%26s:\t%llu\n",
+				   pci_perf_names[i], *(stat + i));
+	/* software counters */
+	for (i = 6; i < ARRAY_SIZE(pci_perf_names); i++)
+		seq_printf(m, "%26s:\t%llu\n",
+			   pci_perf_names[i],
+			   atomic64_read((atomic64_t *) (stat + i)));
+
+	return 0;
+}
+
+static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
+				  size_t count, loff_t *off)
+{
+	struct zpci_dev *zdev = ((struct seq_file *) file->private_data)->private;
+	unsigned long val;
+	int rc;
+
+	if (!zdev)
+		return 0;
+
+	rc = kstrtoul_from_user(ubuf, count, 10, &val);
+	if (rc)
+		return rc;
+
+	switch (val) {
+	case 0:
+		rc = zpci_fmb_disable_device(zdev);
+		if (rc)
+			return rc;
+		break;
+	case 1:
+		rc = zpci_fmb_enable_device(zdev);
+		if (rc)
+			return rc;
+		break;
+	}
+	return count;
+}
+
+static int pci_perf_seq_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, pci_perf_show,
+			   filp->f_path.dentry->d_inode->i_private);
+}
+
+static const struct file_operations debugfs_pci_perf_fops = {
+	.open	 = pci_perf_seq_open,
+	.read	 = seq_read,
+	.write	 = pci_perf_seq_write,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+static int pci_debug_show(struct seq_file *m, void *v)
+{
+	struct zpci_dev *zdev = m->private;
+
+	zpci_debug_info(zdev, m);
+	return 0;
+}
+
+static int pci_debug_seq_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, pci_debug_show,
+			   filp->f_path.dentry->d_inode->i_private);
+}
+
+static const struct file_operations debugfs_pci_debug_fops = {
+	.open	 = pci_debug_seq_open,
+	.read	 = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+void zpci_debug_init_device(struct zpci_dev *zdev)
+{
+	zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
+					       debugfs_root);
+	if (IS_ERR(zdev->debugfs_dev))
+		zdev->debugfs_dev = NULL;
+
+	zdev->debugfs_perf = debugfs_create_file("statistics",
+				S_IFREG | S_IRUGO | S_IWUSR,
+				zdev->debugfs_dev, zdev,
+				&debugfs_pci_perf_fops);
+	if (IS_ERR(zdev->debugfs_perf))
+		zdev->debugfs_perf = NULL;
+
+	zdev->debugfs_debug = debugfs_create_file("debug",
+				S_IFREG | S_IRUGO | S_IWUSR,
+				zdev->debugfs_dev, zdev,
+				&debugfs_pci_debug_fops);
+	if (IS_ERR(zdev->debugfs_debug))
+		zdev->debugfs_debug = NULL;
+}
+
+void zpci_debug_exit_device(struct zpci_dev *zdev)
+{
+	debugfs_remove(zdev->debugfs_perf);
+	debugfs_remove(zdev->debugfs_debug);
+	debugfs_remove(zdev->debugfs_dev);
+}
+
+int __init zpci_debug_init(void)
+{
+	/* event trace buffer */
+	pci_debug_msg_id = debug_register("pci_msg", 16, 1, 16 * sizeof(long));
+	if (!pci_debug_msg_id)
+		return -EINVAL;
+	debug_register_view(pci_debug_msg_id, &debug_sprintf_view);
+	debug_set_level(pci_debug_msg_id, 3);
+	zpci_dbg("Debug view initialized\n");
+
+	/* error log */
+	pci_debug_err_id = debug_register("pci_error", 2, 1, 16);
+	if (!pci_debug_err_id)
+		return -EINVAL;
+	debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
+	debug_set_level(pci_debug_err_id, 6);
+	zpci_err("Debug view initialized\n");
+
+	debugfs_root = debugfs_create_dir("pci", NULL);
+	return 0;
+}
+
+void zpci_debug_exit(void)
+{
+	if (pci_debug_msg_id)
+		debug_unregister(pci_debug_msg_id);
+	if (pci_debug_err_id)
+		debug_unregister(pci_debug_err_id);
+
+	debugfs_remove(debugfs_root);
+}

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index c64b4b2..6138468 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c

@@ -291,8 +291,10 @@
 	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 		flags |= ZPCI_TABLE_PROTECTED;
 
-	if (!dma_update_trans(zdev, pa, dma_addr, size, flags))
+	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
+		atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages);
 		return dma_addr + offset;
+	}
 
 out_free:
 	dma_free_iommu(zdev, iommu_page_index, nr_pages);
@@ -315,6 +317,7 @@
 			     ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID))
 		dev_err(dev, "Failed to unmap addr: %Lx\n", dma_addr);
 
+	atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages);
 	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 	dma_free_iommu(zdev, iommu_page_index, npages);
 }
@@ -323,6 +326,7 @@
 			    dma_addr_t *dma_handle, gfp_t flag,
 			    struct dma_attrs *attrs)
 {
+	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
 	struct page *page;
 	unsigned long pa;
 	dma_addr_t map;
@@ -331,6 +335,8 @@
 	page = alloc_pages(flag, get_order(size));
 	if (!page)
 		return NULL;
+
+	atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages);
 	pa = page_to_phys(page);
 	memset((void *) pa, 0, size);
 

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index dbed8cd..ec62e3a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c

@@ -45,6 +45,8 @@
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 
+	zpci_err("SEI error CCD:\n");
+	zpci_err_hex(ccdf, sizeof(*ccdf));
 	dev_err(&zdev->pdev->dev, "event code: 0x%x\n", ccdf->pec);
 }
 

diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index 4589339..3b1482e 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig

@@ -13,8 +13,6 @@
        select GENERIC_CLOCKEVENTS
        select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 choice

diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 16e41fe..cebaff8 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild

@@ -1,4 +1,3 @@
-include include/asm-generic/Kbuild.asm
 
 header-y +=
 

diff --git a/arch/score/include/asm/ptrace.h b/arch/score/include/asm/ptrace.h
index e89dc9b..abc279d 100644
--- a/arch/score/include/asm/ptrace.h
+++ b/arch/score/include/asm/ptrace.h

@@ -1,78 +1,8 @@
 #ifndef _ASM_SCORE_PTRACE_H
 #define _ASM_SCORE_PTRACE_H
 
-#define PTRACE_GETREGS		12
-#define PTRACE_SETREGS		13
+#include <uapi/asm/ptrace.h>
 
-#define PC		32
-#define CONDITION	33
-#define ECR		34
-#define EMA		35
-#define CEH		36
-#define CEL		37
-#define COUNTER		38
-#define LDCR		39
-#define STCR		40
-#define PSR		41
-
-#define SINGLESTEP16_INSN	0x7006
-#define SINGLESTEP32_INSN	0x840C8000
-#define BREAKPOINT16_INSN	0x7002		/* work on SPG300 */
-#define BREAKPOINT32_INSN	0x84048000	/* work on SPG300 */
-
-/* Define instruction mask */
-#define INSN32_MASK	0x80008000
-
-#define J32	0x88008000	/* 1_00010_0000000000_1_000000000000000 */
-#define J32M	0xFC008000	/* 1_11111_0000000000_1_000000000000000 */
-
-#define B32	0x90008000	/* 1_00100_0000000000_1_000000000000000 */
-#define B32M	0xFC008000
-#define BL32	0x90008001	/* 1_00100_0000000000_1_000000000000001 */
-#define BL32M	B32
-#define BR32	0x80008008	/* 1_00000_0000000000_1_00000000_000100_0 */
-#define BR32M	0xFFE0807E
-#define BRL32	0x80008009	/* 1_00000_0000000000_1_00000000_000100_1 */
-#define BRL32M	BR32M
-
-#define B32_SET	(J32 | B32 | BL32 | BR32 | BRL32)
-
-#define J16	0x3000		/* 0_011_....... */
-#define J16M	0xF000
-#define B16	0x4000		/* 0_100_....... */
-#define B16M	0xF000
-#define BR16	0x0004		/* 0_000.......0100 */
-#define BR16M	0xF00F
-#define B16_SET (J16 | B16 | BR16)
-
-
-/*
- * This struct defines the way the registers are stored on the stack during a
- * system call/exception. As usual the registers k0/k1 aren't being saved.
- */
-struct pt_regs {
-	unsigned long pad0[6];	/* stack arguments */
-	unsigned long orig_r4;
-	unsigned long orig_r7;
-	long is_syscall;
-
-	unsigned long regs[32];
-
-	unsigned long cel;
-	unsigned long ceh;
-
-	unsigned long sr0;	/* cnt */
-	unsigned long sr1;	/* lcr */
-	unsigned long sr2;	/* scr */
-
-	unsigned long cp0_epc;
-	unsigned long cp0_ema;
-	unsigned long cp0_psr;
-	unsigned long cp0_ecr;
-	unsigned long cp0_condition;
-};
-
-#ifdef __KERNEL__
 
 struct task_struct;
 
@@ -83,6 +13,7 @@
 
 #define instruction_pointer(regs)	((unsigned long)(regs)->cp0_epc)
 #define profile_pc(regs)		instruction_pointer(regs)
+#define user_stack_pointer(r)		((unsigned long)(r)->regs[0])
 
 extern void do_syscall_trace(struct pt_regs *regs, int entryexit);
 extern int read_tsk_long(struct task_struct *, unsigned long, unsigned long *);
@@ -91,6 +22,4 @@
 
 #define arch_has_single_step()	(1)
 
-#endif /* __KERNEL__ */
-
 #endif /* _ASM_SCORE_PTRACE_H */

diff --git a/arch/score/include/asm/setup.h b/arch/score/include/asm/setup.h
index 3cb944d..1f3aa72 100644
--- a/arch/score/include/asm/setup.h
+++ b/arch/score/include/asm/setup.h

@@ -1,11 +1,8 @@
 #ifndef _ASM_SCORE_SETUP_H
 #define _ASM_SCORE_SETUP_H
 
-#define COMMAND_LINE_SIZE	256
-#define MEMORY_START		0
-#define MEMORY_SIZE		0x2000000
+#include <uapi/asm/setup.h>
 
-#ifdef __KERNEL__
 
 extern void pagetable_init(void);
 extern void pgd_init(unsigned long page);
@@ -36,6 +33,4 @@
 extern void general_exception_vector(void);
 extern void interrupt_exception_vector(void);
 
-#endif /* __KERNEL__ */
-
 #endif /* _ASM_SCORE_SETUP_H */

diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index baebb3d..040178c 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild

@@ -1,3 +1,34 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += kvm_para.h
+header-y += mman.h
+header-y += msgbuf.h
+header-y += param.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += unistd.h

diff --git a/arch/score/include/asm/auxvec.h b/arch/score/include/uapi/asm/auxvec.h
similarity index 100%
rename from arch/score/include/asm/auxvec.h
rename to arch/score/include/uapi/asm/auxvec.h


diff --git a/arch/score/include/asm/bitsperlong.h b/arch/score/include/uapi/asm/bitsperlong.h
similarity index 100%
rename from arch/score/include/asm/bitsperlong.h
rename to arch/score/include/uapi/asm/bitsperlong.h


diff --git a/arch/score/include/asm/byteorder.h b/arch/score/include/uapi/asm/byteorder.h
similarity index 100%
rename from arch/score/include/asm/byteorder.h
rename to arch/score/include/uapi/asm/byteorder.h


diff --git a/arch/score/include/asm/errno.h b/arch/score/include/uapi/asm/errno.h
similarity index 100%
rename from arch/score/include/asm/errno.h
rename to arch/score/include/uapi/asm/errno.h


diff --git a/arch/score/include/asm/fcntl.h b/arch/score/include/uapi/asm/fcntl.h
similarity index 100%
rename from arch/score/include/asm/fcntl.h
rename to arch/score/include/uapi/asm/fcntl.h


diff --git a/arch/score/include/asm/ioctl.h b/arch/score/include/uapi/asm/ioctl.h
similarity index 100%
rename from arch/score/include/asm/ioctl.h
rename to arch/score/include/uapi/asm/ioctl.h


diff --git a/arch/score/include/asm/ioctls.h b/arch/score/include/uapi/asm/ioctls.h
similarity index 100%
rename from arch/score/include/asm/ioctls.h
rename to arch/score/include/uapi/asm/ioctls.h


diff --git a/arch/score/include/asm/ipcbuf.h b/arch/score/include/uapi/asm/ipcbuf.h
similarity index 100%
rename from arch/score/include/asm/ipcbuf.h
rename to arch/score/include/uapi/asm/ipcbuf.h


diff --git a/arch/score/include/asm/kvm_para.h b/arch/score/include/uapi/asm/kvm_para.h
similarity index 100%
rename from arch/score/include/asm/kvm_para.h
rename to arch/score/include/uapi/asm/kvm_para.h


diff --git a/arch/score/include/asm/mman.h b/arch/score/include/uapi/asm/mman.h
similarity index 100%
rename from arch/score/include/asm/mman.h
rename to arch/score/include/uapi/asm/mman.h


diff --git a/arch/score/include/asm/msgbuf.h b/arch/score/include/uapi/asm/msgbuf.h
similarity index 100%
rename from arch/score/include/asm/msgbuf.h
rename to arch/score/include/uapi/asm/msgbuf.h


diff --git a/arch/score/include/asm/param.h b/arch/score/include/uapi/asm/param.h
similarity index 100%
rename from arch/score/include/asm/param.h
rename to arch/score/include/uapi/asm/param.h


diff --git a/arch/score/include/asm/poll.h b/arch/score/include/uapi/asm/poll.h
similarity index 100%
rename from arch/score/include/asm/poll.h
rename to arch/score/include/uapi/asm/poll.h


diff --git a/arch/score/include/asm/posix_types.h b/arch/score/include/uapi/asm/posix_types.h
similarity index 100%
rename from arch/score/include/asm/posix_types.h
rename to arch/score/include/uapi/asm/posix_types.h


diff --git a/arch/score/include/uapi/asm/ptrace.h b/arch/score/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..f59771a
--- /dev/null
+++ b/arch/score/include/uapi/asm/ptrace.h

@@ -0,0 +1,76 @@
+#ifndef _UAPI_ASM_SCORE_PTRACE_H
+#define _UAPI_ASM_SCORE_PTRACE_H
+
+#define PTRACE_GETREGS		12
+#define PTRACE_SETREGS		13
+
+#define PC		32
+#define CONDITION	33
+#define ECR		34
+#define EMA		35
+#define CEH		36
+#define CEL		37
+#define COUNTER		38
+#define LDCR		39
+#define STCR		40
+#define PSR		41
+
+#define SINGLESTEP16_INSN	0x7006
+#define SINGLESTEP32_INSN	0x840C8000
+#define BREAKPOINT16_INSN	0x7002		/* work on SPG300 */
+#define BREAKPOINT32_INSN	0x84048000	/* work on SPG300 */
+
+/* Define instruction mask */
+#define INSN32_MASK	0x80008000
+
+#define J32	0x88008000	/* 1_00010_0000000000_1_000000000000000 */
+#define J32M	0xFC008000	/* 1_11111_0000000000_1_000000000000000 */
+
+#define B32	0x90008000	/* 1_00100_0000000000_1_000000000000000 */
+#define B32M	0xFC008000
+#define BL32	0x90008001	/* 1_00100_0000000000_1_000000000000001 */
+#define BL32M	B32
+#define BR32	0x80008008	/* 1_00000_0000000000_1_00000000_000100_0 */
+#define BR32M	0xFFE0807E
+#define BRL32	0x80008009	/* 1_00000_0000000000_1_00000000_000100_1 */
+#define BRL32M	BR32M
+
+#define B32_SET	(J32 | B32 | BL32 | BR32 | BRL32)
+
+#define J16	0x3000		/* 0_011_....... */
+#define J16M	0xF000
+#define B16	0x4000		/* 0_100_....... */
+#define B16M	0xF000
+#define BR16	0x0004		/* 0_000.......0100 */
+#define BR16M	0xF00F
+#define B16_SET (J16 | B16 | BR16)
+
+
+/*
+ * This struct defines the way the registers are stored on the stack during a
+ * system call/exception. As usual the registers k0/k1 aren't being saved.
+ */
+struct pt_regs {
+	unsigned long pad0[6];	/* stack arguments */
+	unsigned long orig_r4;
+	unsigned long orig_r7;
+	long is_syscall;
+
+	unsigned long regs[32];
+
+	unsigned long cel;
+	unsigned long ceh;
+
+	unsigned long sr0;	/* cnt */
+	unsigned long sr1;	/* lcr */
+	unsigned long sr2;	/* scr */
+
+	unsigned long cp0_epc;
+	unsigned long cp0_ema;
+	unsigned long cp0_psr;
+	unsigned long cp0_ecr;
+	unsigned long cp0_condition;
+};
+
+
+#endif /* _UAPI_ASM_SCORE_PTRACE_H */

diff --git a/arch/score/include/asm/resource.h b/arch/score/include/uapi/asm/resource.h
similarity index 100%
rename from arch/score/include/asm/resource.h
rename to arch/score/include/uapi/asm/resource.h


diff --git a/arch/score/include/asm/sembuf.h b/arch/score/include/uapi/asm/sembuf.h
similarity index 100%
rename from arch/score/include/asm/sembuf.h
rename to arch/score/include/uapi/asm/sembuf.h


diff --git a/arch/score/include/uapi/asm/setup.h b/arch/score/include/uapi/asm/setup.h
new file mode 100644
index 0000000..ab9dbdb
--- /dev/null
+++ b/arch/score/include/uapi/asm/setup.h

@@ -0,0 +1,9 @@
+#ifndef _UAPI_ASM_SCORE_SETUP_H
+#define _UAPI_ASM_SCORE_SETUP_H
+
+#define COMMAND_LINE_SIZE	256
+#define MEMORY_START		0
+#define MEMORY_SIZE		0x2000000
+
+
+#endif /* _UAPI_ASM_SCORE_SETUP_H */

diff --git a/arch/score/include/asm/shmbuf.h b/arch/score/include/uapi/asm/shmbuf.h
similarity index 100%
rename from arch/score/include/asm/shmbuf.h
rename to arch/score/include/uapi/asm/shmbuf.h


diff --git a/arch/score/include/asm/sigcontext.h b/arch/score/include/uapi/asm/sigcontext.h
similarity index 100%
rename from arch/score/include/asm/sigcontext.h
rename to arch/score/include/uapi/asm/sigcontext.h


diff --git a/arch/score/include/asm/siginfo.h b/arch/score/include/uapi/asm/siginfo.h
similarity index 100%
rename from arch/score/include/asm/siginfo.h
rename to arch/score/include/uapi/asm/siginfo.h


diff --git a/arch/score/include/asm/signal.h b/arch/score/include/uapi/asm/signal.h
similarity index 100%
rename from arch/score/include/asm/signal.h
rename to arch/score/include/uapi/asm/signal.h


diff --git a/arch/score/include/asm/socket.h b/arch/score/include/uapi/asm/socket.h
similarity index 100%
rename from arch/score/include/asm/socket.h
rename to arch/score/include/uapi/asm/socket.h


diff --git a/arch/score/include/asm/sockios.h b/arch/score/include/uapi/asm/sockios.h
similarity index 100%
rename from arch/score/include/asm/sockios.h
rename to arch/score/include/uapi/asm/sockios.h


diff --git a/arch/score/include/asm/stat.h b/arch/score/include/uapi/asm/stat.h
similarity index 100%
rename from arch/score/include/asm/stat.h
rename to arch/score/include/uapi/asm/stat.h


diff --git a/arch/score/include/asm/statfs.h b/arch/score/include/uapi/asm/statfs.h
similarity index 100%
rename from arch/score/include/asm/statfs.h
rename to arch/score/include/uapi/asm/statfs.h


diff --git a/arch/score/include/asm/swab.h b/arch/score/include/uapi/asm/swab.h
similarity index 100%
rename from arch/score/include/asm/swab.h
rename to arch/score/include/uapi/asm/swab.h


diff --git a/arch/score/include/asm/termbits.h b/arch/score/include/uapi/asm/termbits.h
similarity index 100%
rename from arch/score/include/asm/termbits.h
rename to arch/score/include/uapi/asm/termbits.h


diff --git a/arch/score/include/asm/termios.h b/arch/score/include/uapi/asm/termios.h
similarity index 100%
rename from arch/score/include/asm/termios.h
rename to arch/score/include/uapi/asm/termios.h


diff --git a/arch/score/include/asm/types.h b/arch/score/include/uapi/asm/types.h
similarity index 100%
rename from arch/score/include/asm/types.h
rename to arch/score/include/uapi/asm/types.h


diff --git a/arch/score/include/asm/unistd.h b/arch/score/include/uapi/asm/unistd.h
similarity index 90%
rename from arch/score/include/asm/unistd.h
rename to arch/score/include/uapi/asm/unistd.h
index 56001c9..9cb4260 100644
--- a/arch/score/include/asm/unistd.h
+++ b/arch/score/include/uapi/asm/unistd.h

@@ -4,7 +4,6 @@
 #define __ARCH_WANT_SYSCALL_NO_FLAGS
 #define __ARCH_WANT_SYSCALL_OFF_T
 #define __ARCH_WANT_SYSCALL_DEPRECATED
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 8451317..babc2b8 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig

@@ -40,8 +40,6 @@
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast

diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h
index 8bd965e..b437f2c 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h

@@ -46,6 +46,7 @@
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 
+	debug_dma_mapping_error(dev, dma_addr);
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 

diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index 43d3f26b..012004e 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h

@@ -28,7 +28,6 @@
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
 # define __ARCH_WANT_SYS_RT_SIGACTION
-# define __ARCH_WANT_SYS_EXECVE
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
 # define __ARCH_WANT_SYS_CLONE

diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index cb8f992..0f7c852 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig

@@ -111,6 +111,7 @@
 config NUMA
 	bool "Non Uniform Memory Access (NUMA) Support"
 	depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+	select ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	default n
 	help
 	  Some SH systems have many various memories scattered around

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 0c7d365..9f2edb5 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig

@@ -41,8 +41,6 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SPARC32
 	def_bool !64BIT

diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
index 23f6cbb..1cda8aa 100644
--- a/arch/sparc/crypto/aes_asm.S
+++ b/arch/sparc/crypto/aes_asm.S

@@ -1024,7 +1024,11 @@
 	 add		%o2, 0x20, %o2
 	brlz,pt		%o3, 11f
 	 nop
-10:	ldx		[%o1 + 0x00], %g3
+10:	ldd		[%o0 + 0xd0], %f56
+	ldd		[%o0 + 0xd8], %f58
+	ldd		[%o0 + 0xe0], %f60
+	ldd		[%o0 + 0xe8], %f62
+	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
@@ -1128,9 +1132,9 @@
 	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
 	ldx		[%o0 - 0x10], %g1
 	subcc		%o3, 0x10, %o3
+	ldx		[%o0 - 0x08], %g2
 	be		10f
-	 ldx		[%o0 - 0x08], %g2
-	sub		%o0, 0xf0, %o0
+	 sub		%o0, 0xf0, %o0
 1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	ldx		[%o1 + 0x10], %o4
@@ -1154,7 +1158,11 @@
 	 add		%o2, 0x20, %o2
 	brlz,pt		%o3, 11f
 	 nop
-10:	ldx		[%o1 + 0x00], %g3
+10:	ldd		[%o0 + 0x18], %f56
+	ldd		[%o0 + 0x10], %f58
+	ldd		[%o0 + 0x08], %f60
+	ldd		[%o0 + 0x00], %f62
+	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
@@ -1511,11 +1519,11 @@
 	 add		%o2, 0x20, %o2
 	brlz,pt		%o3, 11f
 	 nop
-	ldd		[%o0 + 0xd0], %f56
+10:	ldd		[%o0 + 0xd0], %f56
 	ldd		[%o0 + 0xd8], %f58
 	ldd		[%o0 + 0xe0], %f60
 	ldd		[%o0 + 0xe8], %f62
-10:	xor		%g1, %g3, %o5
+	xor		%g1, %g3, %o5
 	MOVXTOD_O5_F0
 	xor		%g2, %g7, %o5
 	MOVXTOD_O5_F2

diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 3965d1d..503e6d9 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c

@@ -222,6 +222,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
 	while ((nbytes = walk.nbytes)) {
@@ -251,6 +252,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	ctx->ops->load_decrypt_keys(&ctx->key[0]);
 	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
@@ -280,6 +282,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
 	while ((nbytes = walk.nbytes)) {
@@ -309,6 +312,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	ctx->ops->load_decrypt_keys(&ctx->key[0]);
 	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
@@ -329,6 +333,22 @@
 	return err;
 }
 
+static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
+			    struct blkcipher_walk *walk)
+{
+	u8 *ctrblk = walk->iv;
+	u64 keystream[AES_BLOCK_SIZE / sizeof(u64)];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
+			      keystream, AES_BLOCK_SIZE);
+	crypto_xor((u8 *) keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+	crypto_inc(ctrblk, AES_BLOCK_SIZE);
+}
+
 static int ctr_crypt(struct blkcipher_desc *desc,
 		     struct scatterlist *dst, struct scatterlist *src,
 		     unsigned int nbytes)
@@ -338,10 +358,11 @@
 	int err;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
-	while ((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		unsigned int block_len = nbytes & AES_BLOCK_MASK;
 
 		if (likely(block_len)) {
@@ -353,6 +374,10 @@
 		nbytes &= AES_BLOCK_SIZE - 1;
 		err = blkcipher_walk_done(desc, &walk, nbytes);
 	}
+	if (walk.nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
 	fprs_write(0);
 	return err;
 }
@@ -418,7 +443,7 @@
 	.cra_driver_name	= "ctr-aes-sparc64",
 	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
 	.cra_alignmask		= 7,
 	.cra_type		= &crypto_blkcipher_type,

diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 62c89af..888f6260 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c

@@ -98,6 +98,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	if (encrypt)
 		key = &ctx->encrypt_key[0];
@@ -160,6 +161,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	key = &ctx->encrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);
@@ -198,6 +200,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	key = &ctx->decrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);

diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S
index 30b6e90..b5c8fc2 100644
--- a/arch/sparc/crypto/des_asm.S
+++ b/arch/sparc/crypto/des_asm.S

@@ -376,6 +376,7 @@
 1:	ldd	[%o1 + 0x00], %f60
 	DES3_LOOP_BODY(60)
 	std	%f60, [%o2 + 0x00]
+	add	%o1, 0x08, %o1
 	subcc	%o3, 0x08, %o3
 	bne,pt	%icc, 1b
 	 add	%o2, 0x08, %o2

diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index 41524ce..3065bc6 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c

@@ -100,6 +100,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	if (encrypt)
 		des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
@@ -147,6 +148,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
 	while ((nbytes = walk.nbytes)) {
@@ -177,6 +179,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
 	while ((nbytes = walk.nbytes)) {
@@ -266,6 +269,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	if (encrypt)
 		K = &ctx->encrypt_expkey[0];
@@ -317,6 +321,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	K = &ctx->encrypt_expkey[0];
 	des3_ede_sparc64_load_keys(K);
@@ -352,6 +357,7 @@
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	K = &ctx->decrypt_expkey[0];
 	des3_ede_sparc64_load_keys(K);

diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 8493fd3..05fe53f 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h

@@ -59,6 +59,7 @@
 
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
+	debug_dma_mapping_error(dev, dma_addr);
 	return (dma_addr == DMA_ERROR_CODE);
 }
 

diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 8c5eed6..9661e9b 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h

@@ -61,14 +61,20 @@
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	ptep_set_wrprotect(mm, addr, ptep);
+	pte_t old_pte = *ptep;
+	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
 }
 
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
 {
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+	int changed = !pte_same(*ptep, pte);
+	if (changed) {
+		set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+		flush_tlb_page(vma, addr);
+	}
+	return changed;
 }
 
 static inline pte_t huge_ptep_get(pte_t *ptep)

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 95515f1..7870be0 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h

@@ -617,6 +617,12 @@
 	return val;
 }
 
+#define pte_accessible pte_accessible
+static inline unsigned long pte_accessible(pte_t a)
+{
+	return pte_val(a) & _PAGE_VALID;
+}
+
 static inline unsigned long pte_special(pte_t pte)
 {
 	return pte_val(pte) & _PAGE_SPECIAL;
@@ -802,7 +808,7 @@
 	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
 	 *             and SUN4V pte layout, so this inline test is fine.
 	 */
-	if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID))
+	if (likely(mm != &init_mm) && pte_accessible(orig))
 		tlb_batch_add(mm, addr, ptep, orig, fullmm);
 }
 

diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index c3e5d8b..87ce24c 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h

@@ -45,8 +45,8 @@
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 
 /*
  * "Conditional" syscalls

diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h
index 1a04189..c4ffd6c 100644
--- a/arch/sparc/include/uapi/asm/signal.h
+++ b/arch/sparc/include/uapi/asm/signal.h

@@ -147,12 +147,6 @@
 #define SIG_UNBLOCK        0x02	/* for unblocking signals */
 #define SIG_SETMASK        0x04	/* for setting the signal mask */
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	4096
 #define SIGSTKSZ	16384
 

diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index f1ddc0d..4435488 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c

@@ -43,10 +43,6 @@
 {
 	void *ret;
 
-	/* We handle the zero case fine, unlike vmalloc */
-	if (size == 0)
-		return NULL;
-
 	ret = module_map(size);
 	if (ret)
 		memset(ret, 0, size);

diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index 03c7e92..4a4cdc6 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c

@@ -211,20 +211,6 @@
 	return sys_sysfs(option, arg1, arg2);
 }
 
-asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs ();
-	
-	set_fs (KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t);
-	set_fs (old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 asmlinkage long compat_sys_rt_sigprocmask(int how,
 					  compat_sigset_t __user *set,
 					  compat_sigset_t __user *oset,

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index ea7f61e..875d008 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig

@@ -21,8 +21,6 @@
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT

diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index ca61fb4..88f3c22 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h

@@ -296,8 +296,6 @@
 long compat_sys_fallocate(int fd, int mode,
 			  u32 offset_lo, u32 offset_hi,
 			  u32 len_lo, u32 len_hi);
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-				      struct compat_timespec __user *interval);
 
 /* Assembly trampoline to avoid clobbering r0. */
 long _compat_sys_rt_sigreturn(void);

diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index 4b6247d..f2ff191 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h

@@ -72,6 +72,7 @@
 static inline int
 dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
+	debug_dma_mapping_error(dev, dma_addr);
 	return get_dma_ops(dev)->mapping_error(dev, dma_addr);
 }
 

diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index b73e103..ff8a934 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h

@@ -170,4 +170,6 @@
 
 #endif /* CONFIG_COMPAT */
 
+#define CORE_DUMP_USE_REGSET
+
 #endif /* _ASM_TILE_ELF_H */

diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index 1a4fd9a..2e83fc1 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h

@@ -24,8 +24,7 @@
 #include <uapi/asm/ptrace.h>
 
 #define PTRACE_O_MASK_TILE	(PTRACE_O_TRACEMIGRATE)
-#define PT_TRACE_MIGRATE	0x00080000
-#define PT_TRACE_MASK_TILE	(PT_TRACE_MIGRATE)
+#define PT_TRACE_MIGRATE	PT_EVENT_FLAG(PTRACE_EVENT_MIGRATE)
 
 /* Flag bits in pt_regs.flags */
 #define PT_FLAGS_DISABLE_IRQ    1  /* on return to kernel, disable irqs */
@@ -36,6 +35,7 @@
 
 #define instruction_pointer(regs) ((regs)->pc)
 #define profile_pc(regs) instruction_pointer(regs)
+#define user_stack_pointer(regs) ((regs)->sp)
 
 /* Does the process account for user or for system time? */
 #define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL)

diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index b51c6ee..6ac2103 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h

@@ -14,8 +14,8 @@
 /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
 #ifdef CONFIG_COMPAT
 #define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_NEWFSTATAT
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>

diff --git a/arch/tile/include/uapi/asm/ptrace.h b/arch/tile/include/uapi/asm/ptrace.h
index c717d0f..7757e19 100644
--- a/arch/tile/include/uapi/asm/ptrace.h
+++ b/arch/tile/include/uapi/asm/ptrace.h

@@ -81,8 +81,14 @@
 #define PTRACE_SETFPREGS	15
 
 /* Support TILE-specific ptrace options, with events starting at 16. */
-#define PTRACE_O_TRACEMIGRATE	0x00010000
 #define PTRACE_EVENT_MIGRATE	16
+#define PTRACE_O_TRACEMIGRATE	(1 << PTRACE_EVENT_MIGRATE)
 
+/*
+ * Flag bits in pt_regs.flags that are part of the ptrace API.
+ * We start our numbering higher up to avoid confusion with the
+ * non-ABI kernel-internal values that use the low 16 bits.
+ */
+#define PT_FLAGS_COMPAT		0x10000  /* process is an -m32 compat process */
 
 #endif /* _UAPI_ASM_TILE_PTRACE_H */

diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index 9cd7cb6..7f72401 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c

@@ -76,24 +76,6 @@
 			     ((loff_t)len_hi << 32) | len_lo);
 }
 
-
-
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-				      struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid,
-					(struct timespec __force __user *)&t);
-	set_fs(old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 /* Provide the compat syscall number to call mapping. */
 #undef __SYSCALL
 #define __SYSCALL(nr, call) [nr] = (call),

diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 243ffeb..4918d91 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c

@@ -42,8 +42,6 @@
 	int i = 0;
 	int npages;
 
-	if (size == 0)
-		return NULL;
 	npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 	pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
 	if (pages == NULL)

diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 7598226..aac1cd5 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c

@@ -245,7 +245,7 @@
 	u16 new_values;
 
 	/* Scan for the smallest maximum payload size. */
-	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+	for_each_pci_dev(dev) {
 		u32 devcap;
 		int max_payload;
 
@@ -260,7 +260,7 @@
 
 	/* Now, set the max_payload_size for all devices to that value. */
 	new_values = (max_read_size << 12) | (smallest_max_payload << 5);
-	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL)
+	for_each_pci_dev(dev)
 		pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
 				PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ,
 				new_values);

diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 2ba6d05..94810d4 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c

@@ -1047,8 +1047,7 @@
 }
 
 /* Called for each device after PCI setup is done. */
-static void __init
-pcibios_fixup_final(struct pci_dev *pdev)
+static void pcibios_fixup_final(struct pci_dev *pdev)
 {
 	set_dma_ops(&pdev->dev, gx_pci_dma_map_ops);
 	set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);

diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index e92e405..9835312 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c

@@ -19,7 +19,10 @@
 #include <linux/kprobes.h>
 #include <linux/compat.h>
 #include <linux/uaccess.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
 #include <asm/traps.h>
+#include <arch/chip.h>
 
 void user_enable_single_step(struct task_struct *child)
 {
@@ -45,6 +48,100 @@
 	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 }
 
+/*
+ * Get registers from task and ready the result for userspace.
+ * Note that we localize the API issues to getregs() and putregs() at
+ * some cost in performance, e.g. we need a full pt_regs copy for
+ * PEEKUSR, and two copies for POKEUSR.  But in general we expect
+ * GETREGS/PUTREGS to be the API of choice anyway.
+ */
+static char *getregs(struct task_struct *child, struct pt_regs *uregs)
+{
+	*uregs = *task_pt_regs(child);
+
+	/* Set up flags ABI bits. */
+	uregs->flags = 0;
+#ifdef CONFIG_COMPAT
+	if (task_thread_info(child)->status & TS_COMPAT)
+		uregs->flags |= PT_FLAGS_COMPAT;
+#endif
+
+	return (char *)uregs;
+}
+
+/* Put registers back to task. */
+static void putregs(struct task_struct *child, struct pt_regs *uregs)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	/* Don't allow overwriting the kernel-internal flags word. */
+	uregs->flags = regs->flags;
+
+	/* Only allow setting the ICS bit in the ex1 word. */
+	uregs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(uregs->ex1));
+
+	*regs = *uregs;
+}
+
+enum tile_regset {
+	REGSET_GPR,
+};
+
+static int tile_gpr_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	struct pt_regs regs;
+
+	getregs(target, &regs);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &regs, 0,
+				   sizeof(regs));
+}
+
+static int tile_gpr_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct pt_regs regs;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &regs, 0,
+				 sizeof(regs));
+	if (ret)
+		return ret;
+
+	putregs(target, &regs);
+
+	return 0;
+}
+
+static const struct user_regset tile_user_regset[] = {
+	[REGSET_GPR] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t),
+		.align = sizeof(elf_greg_t),
+		.get = tile_gpr_get,
+		.set = tile_gpr_set,
+	},
+};
+
+static const struct user_regset_view tile_user_regset_view = {
+	.name = CHIP_ARCH_NAME,
+	.e_machine = ELF_ARCH,
+	.ei_osabi = ELF_OSABI,
+	.regsets = tile_user_regset,
+	.n = ARRAY_SIZE(tile_user_regset),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &tile_user_regset_view;
+}
+
 long arch_ptrace(struct task_struct *child, long request,
 		 unsigned long addr, unsigned long data)
 {
@@ -53,14 +150,13 @@
 	long ret = -EIO;
 	char *childreg;
 	struct pt_regs copyregs;
-	int ex1_offset;
 
 	switch (request) {
 
 	case PTRACE_PEEKUSR:  /* Read register from pt_regs. */
 		if (addr >= PTREGS_SIZE)
 			break;
-		childreg = (char *)task_pt_regs(child) + addr;
+		childreg = getregs(child, &copyregs) + addr;
 #ifdef CONFIG_COMPAT
 		if (is_compat_task()) {
 			if (addr & (sizeof(compat_long_t)-1))
@@ -79,17 +175,7 @@
 	case PTRACE_POKEUSR:  /* Write register in pt_regs. */
 		if (addr >= PTREGS_SIZE)
 			break;
-		childreg = (char *)task_pt_regs(child) + addr;
-
-		/* Guard against overwrites of the privilege level. */
-		ex1_offset = PTREGS_OFFSET_EX1;
-#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN)
-		if (is_compat_task())   /* point at low word */
-			ex1_offset += sizeof(compat_long_t);
-#endif
-		if (addr == ex1_offset)
-			data = PL_ICS_EX1(USER_PL, EX1_ICS(data));
-
+		childreg = getregs(child, &copyregs) + addr;
 #ifdef CONFIG_COMPAT
 		if (is_compat_task()) {
 			if (addr & (sizeof(compat_long_t)-1))
@@ -102,24 +188,20 @@
 				break;
 			*(long *)childreg = data;
 		}
+		putregs(child, &copyregs);
 		ret = 0;
 		break;
 
 	case PTRACE_GETREGS:  /* Get all registers from the child. */
-		if (copy_to_user(datap, task_pt_regs(child),
-				 sizeof(struct pt_regs)) == 0) {
-			ret = 0;
-		}
+		ret = copy_regset_to_user(child, &tile_user_regset_view,
+					  REGSET_GPR, 0,
+					  sizeof(struct pt_regs), datap);
 		break;
 
 	case PTRACE_SETREGS:  /* Set all registers in the child. */
-		if (copy_from_user(&copyregs, datap,
-				   sizeof(struct pt_regs)) == 0) {
-			copyregs.ex1 =
-				PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1));
-			*task_pt_regs(child) = copyregs;
-			ret = 0;
-		}
+		ret = copy_regset_from_user(child, &tile_user_regset_view,
+					    REGSET_GPR, 0,
+					    sizeof(struct pt_regs), datap);
 		break;
 
 	case PTRACE_GETFPREGS:  /* Get the child FPU state. */
@@ -128,12 +210,16 @@
 
 	case PTRACE_SETOPTIONS:
 		/* Support TILE-specific ptrace options. */
-		child->ptrace &= ~PT_TRACE_MASK_TILE;
+		BUILD_BUG_ON(PTRACE_O_MASK_TILE & PTRACE_O_MASK);
 		tmp = data & PTRACE_O_MASK_TILE;
 		data &= ~PTRACE_O_MASK_TILE;
 		ret = ptrace_request(child, request, addr, data);
-		if (tmp & PTRACE_O_TRACEMIGRATE)
-			child->ptrace |= PT_TRACE_MIGRATE;
+		if (ret == 0) {
+			unsigned int flags = child->ptrace;
+			flags &= ~(PTRACE_O_MASK_TILE << PT_OPT_FLAG_SHIFT);
+			flags |= (tmp << PT_OPT_FLAG_SHIFT);
+			child->ptrace = flags;
+		}
 		break;
 
 	default:

diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 49e3b49..4bd82ac 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c

@@ -123,7 +123,7 @@
 
 void mconsole_proc(struct mc_request *req)
 {
-	struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
+	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
 	char *buf;
 	int len;
 	struct file *file;

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index db18eb6..48ccf71 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c

@@ -132,8 +132,3 @@
 	siginitset(&blocked, mask);
 	return sigsuspend(&blocked);
 }
-
-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
-{
-	return do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs));
-}

diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index c4fbb21..60651df 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig

@@ -16,8 +16,6 @@
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IOMAP
 	select MODULES_USE_ELF_REL
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip

diff --git a/arch/unicore32/include/asm/ptrace.h b/arch/unicore32/include/asm/ptrace.h
index 726749d..9df53d9 100644
--- a/arch/unicore32/include/asm/ptrace.h
+++ b/arch/unicore32/include/asm/ptrace.h

@@ -54,6 +54,7 @@
 }
 
 #define instruction_pointer(regs)	((regs)->UCreg_pc)
+#define user_stack_pointer(regs)	((regs)->UCreg_sp)
 
 #endif /* __ASSEMBLY__ */
 #endif

diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index 00cf5e2..d4cc455 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h

@@ -12,5 +12,4 @@
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE

diff --git a/arch/unicore32/kernel/module.c b/arch/unicore32/kernel/module.c
index 8fbe857..16bd149 100644
--- a/arch/unicore32/kernel/module.c
+++ b/arch/unicore32/kernel/module.c

@@ -27,9 +27,6 @@
 	struct vm_struct *area;
 
 	size = PAGE_ALIGN(size);
-	if (!size)
-		return NULL;
-
 	area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END);
 	if (!area)
 		return NULL;

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 65a872b..79795af 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -22,6 +22,8 @@
 	def_bool y
 	select HAVE_AOUT if X86_32
 	select HAVE_UNSTABLE_SCHED_CLOCK
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
@@ -108,11 +110,10 @@
 	select GENERIC_STRNLEN_USER
 	select HAVE_CONTEXT_TRACKING if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_REL if X86_32
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
+	select GENERIC_SIGALTSTACK
 
 config INSTRUCTION_DECODER
 	def_bool y

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 05afcca..e71fc42 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile

@@ -123,9 +123,10 @@
 # does binutils support specific instructions?
 asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
 avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
+avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
 
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index efc6a95..a1daf4a 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c

@@ -136,52 +136,6 @@
 	return sigsuspend(&blocked);
 }
 
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
-				  stack_ia32_t __user *uoss_ptr,
-				  struct pt_regs *regs)
-{
-	stack_t uss, uoss;
-	int ret, err = 0;
-	mm_segment_t seg;
-
-	if (uss_ptr) {
-		u32 ptr;
-
-		memset(&uss, 0, sizeof(stack_t));
-		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)))
-			return -EFAULT;
-
-		get_user_try {
-			get_user_ex(ptr, &uss_ptr->ss_sp);
-			get_user_ex(uss.ss_flags, &uss_ptr->ss_flags);
-			get_user_ex(uss.ss_size, &uss_ptr->ss_size);
-		} get_user_catch(err);
-
-		if (err)
-			return -EFAULT;
-		uss.ss_sp = compat_ptr(ptr);
-	}
-	seg = get_fs();
-	set_fs(KERNEL_DS);
-	ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
-			     (stack_t __force __user *) &uoss, regs->sp);
-	set_fs(seg);
-	if (ret >= 0 && uoss_ptr)  {
-		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
-			return -EFAULT;
-
-		put_user_try {
-			put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp);
-			put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags);
-			put_user_ex(uoss.ss_size, &uoss_ptr->ss_size);
-		} put_user_catch(err);
-
-		if (err)
-			ret = -EFAULT;
-	}
-	return ret;
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
@@ -292,7 +246,6 @@
 	struct rt_sigframe_ia32 __user *frame;
 	sigset_t set;
 	unsigned int ax;
-	struct pt_regs tregs;
 
 	frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
@@ -306,8 +259,7 @@
 	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	tregs = *regs;
-	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
@@ -515,10 +467,7 @@
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		if (ka->sa.sa_flags & SA_RESTORER)
 			restorer = ka->sa.sa_restorer;

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 32e6f05..102ff7c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S

@@ -464,7 +464,6 @@
 
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
-	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
 	PTREGSCALL stub32_execve, compat_sys_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index f7b4c79..808dae6 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h

@@ -47,6 +47,7 @@
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
+	debug_dma_mapping_error(dev, dma_addr);
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index fd13815..6e8fdf5 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h

@@ -69,37 +69,23 @@
 	efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4), (u64)(a5), (u64)(a6))
 
-extern unsigned long efi_call_virt_prelog(void);
-extern void efi_call_virt_epilog(unsigned long);
-
-#define efi_callx(x, func, ...)					\
-	({							\
-		efi_status_t __status;				\
-		unsigned long __pgd;				\
-								\
-		__pgd = efi_call_virt_prelog();			\
-		__status = efi_call##x(func, __VA_ARGS__);	\
-		efi_call_virt_epilog(__pgd);			\
-		__status;					\
-	})
-
 #define efi_call_virt0(f)				\
-	efi_callx(0, (void *)(efi.systab->runtime->f))
+	efi_call0((void *)(efi.systab->runtime->f))
 #define efi_call_virt1(f, a1)					\
-	efi_callx(1, (void *)(efi.systab->runtime->f), (u64)(a1))
+	efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
 #define efi_call_virt2(f, a1, a2)					\
-	efi_callx(2, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+	efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
 #define efi_call_virt3(f, a1, a2, a3)					\
-	efi_callx(3, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3))
 #define efi_call_virt4(f, a1, a2, a3, a4)				\
-	efi_callx(4, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4))
 #define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
-	efi_callx(5, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5))
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
-	efi_callx(6, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,

diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index e623277..4c6da2e 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h

@@ -29,16 +29,10 @@
 	unsigned int sa_restorer;	/* Another 32 bit pointer */
 };
 
-typedef struct sigaltstack_ia32 {
-	unsigned int	ss_sp;
-	int		ss_flags;
-	unsigned int	ss_size;
-} stack_ia32_t;
-
 struct ucontext_ia32 {
 	unsigned int	  uc_flags;
 	unsigned int 	  uc_link;
-	stack_ia32_t	  uc_stack;
+	compat_stack_t	  uc_stack;
 	struct sigcontext_ia32 uc_mcontext;
 	compat_sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
@@ -46,7 +40,7 @@
 struct ucontext_x32 {
 	unsigned int	  uc_flags;
 	unsigned int 	  uc_link;
-	stack_ia32_t	  uc_stack;
+	compat_stack_t	  uc_stack;
 	unsigned int	  uc__pad0;     /* needed for alignment */
 	struct sigcontext uc_mcontext;  /* the 64-bit sigcontext type */
 	compat_sigset_t	  uc_sigmask;	/* mask last for extensibility */

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a0facf3..5edd174 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h

@@ -528,7 +528,6 @@
 		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t pmd)
 {
@@ -539,7 +538,6 @@
 		PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
 			    native_pmd_val(pmd));
 }
-#endif
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a1f780d..5199db2 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h

@@ -404,7 +404,14 @@
 
 static inline int pte_present(pte_t a)
 {
-	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
+			       _PAGE_NUMA);
+}
+
+#define pte_accessible pte_accessible
+static inline int pte_accessible(pte_t a)
+{
+	return pte_flags(a) & _PAGE_PRESENT;
 }
 
 static inline int pte_hidden(pte_t pte)
@@ -420,7 +427,8 @@
 	 * the _PAGE_PSE flag will remain set at all times while the
 	 * _PAGE_PRESENT bit is clear).
 	 */
-	return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
+	return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE |
+				 _PAGE_NUMA);
 }
 
 static inline int pmd_none(pmd_t pmd)
@@ -479,6 +487,11 @@
 
 static inline int pmd_bad(pmd_t pmd)
 {
+#ifdef CONFIG_NUMA_BALANCING
+	/* pmd_numa check */
+	if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
+		return 0;
+#endif
 	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
 }
 

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index ec8a1fc..3c32db8 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h

@@ -64,6 +64,26 @@
 #define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
+/*
+ * _PAGE_NUMA indicates that this page will trigger a numa hinting
+ * minor page fault to gather numa placement statistics (see
+ * pte_numa()). The bit picked (8) is within the range between
+ * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
+ * require changes to the swp entry format because that bit is always
+ * zero when the pte is not present.
+ *
+ * The bit picked must be always zero when the pmd is present and not
+ * present, so that we don't lose information when we set it while
+ * atomically clearing the present bit.
+ *
+ * Because we shared the same bit (8) with _PAGE_PROTNONE this can be
+ * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
+ * couldn't reach, like handle_mm_fault() (see access_error in
+ * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
+ * handle_mm_fault() to be invoked).
+ */
+#define _PAGE_NUMA	_PAGE_PROTNONE
+
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
 			 _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 03ca442..942a086 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h

@@ -133,6 +133,13 @@
 	return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
 #endif
 }
+
+#define current_user_stack_pointer()	this_cpu_read(old_rsp)
+/* ia32 vs. x32 difference */
+#define compat_user_stack_pointer()	\
+	(test_thread_flag(TIF_IA32) 	\
+	 ? current_pt_regs()->sp 	\
+	 : this_cpu_read(old_rsp))
 #endif
 
 #ifdef CONFIG_X86_32

diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index c76fae4..31f61f9 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h

@@ -69,8 +69,6 @@
 
 /* ia32/ia32_signal.c */
 asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
-				  stack_ia32_t __user *, struct pt_regs *);
 asmlinkage long sys32_sigreturn(struct pt_regs *);
 asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
 

diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2f83747..58b7e3e 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h

@@ -25,9 +25,6 @@
 
 /* kernel/signal.c */
 long sys_rt_sigreturn(struct pt_regs *);
-long sys_sigaltstack(const stack_t __user *, stack_t __user *,
-		     struct pt_regs *);
-
 
 /* kernel/tls.c */
 asmlinkage int sys_set_thread_area(struct user_desc __user *);

diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 1003e69a..a0790e0 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h

@@ -48,7 +48,6 @@
 # define __ARCH_WANT_SYS_TIME
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
-# define __ARCH_WANT_SYS_EXECVE
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
 # define __ARCH_WANT_SYS_CLONE

diff --git a/arch/x86/include/uapi/asm/hw_breakpoint.h b/arch/x86/include/uapi/asm/hw_breakpoint.h
index e69de29..79a9626 100644
--- a/arch/x86/include/uapi/asm/hw_breakpoint.h
+++ b/arch/x86/include/uapi/asm/hw_breakpoint.h

@@ -0,0 +1 @@
+/* */

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 6e930b2..433a59f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h

@@ -35,11 +35,14 @@
 #define MSR_IA32_PERFCTR0		0x000000c1
 #define MSR_IA32_PERFCTR1		0x000000c2
 #define MSR_FSB_FREQ			0x000000cd
+#define MSR_NHM_PLATFORM_INFO		0x000000ce
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
 #define NHM_C1_AUTO_DEMOTE		(1UL << 26)
 #define ATM_LNC_C6_AUTO_DEMOTE		(1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE		(1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
 
 #define MSR_MTRRcap			0x000000fe
 #define MSR_IA32_BBL_CR_CTL		0x00000119
@@ -55,6 +58,8 @@
 
 #define MSR_OFFCORE_RSP_0		0x000001a6
 #define MSR_OFFCORE_RSP_1		0x000001a7
+#define MSR_NHM_TURBO_RATIO_LIMIT	0x000001ad
+#define MSR_IVT_TURBO_RATIO_LIMIT	0x000001ae
 
 #define MSR_LBR_SELECT			0x000001c8
 #define MSR_LBR_TOS			0x000001c9
@@ -103,6 +108,38 @@
 #define MSR_IA32_MC0_ADDR		0x00000402
 #define MSR_IA32_MC0_MISC		0x00000403
 
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY		0x000003f8
+#define MSR_PKG_C6_RESIDENCY		0x000003f9
+#define MSR_PKG_C7_RESIDENCY		0x000003fa
+#define MSR_CORE_C3_RESIDENCY		0x000003fc
+#define MSR_CORE_C6_RESIDENCY		0x000003fd
+#define MSR_CORE_C7_RESIDENCY		0x000003fe
+#define MSR_PKG_C2_RESIDENCY		0x0000060d
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT		0x00000606
+
+#define MSR_PKG_POWER_LIMIT		0x00000610
+#define MSR_PKG_ENERGY_STATUS		0x00000611
+#define MSR_PKG_PERF_STATUS		0x00000613
+#define MSR_PKG_POWER_INFO		0x00000614
+
+#define MSR_DRAM_POWER_LIMIT		0x00000618
+#define MSR_DRAM_ENERGY_STATUS		0x00000619
+#define MSR_DRAM_PERF_STATUS		0x0000061b
+#define MSR_DRAM_POWER_INFO		0x0000061c
+
+#define MSR_PP0_POWER_LIMIT		0x00000638
+#define MSR_PP0_ENERGY_STATUS		0x00000639
+#define MSR_PP0_POLICY			0x0000063a
+#define MSR_PP0_PERF_STATUS		0x0000063b
+
+#define MSR_PP1_POWER_LIMIT		0x00000640
+#define MSR_PP1_ENERGY_STATUS		0x00000641
+#define MSR_PP1_POLICY			0x00000642
+
 #define MSR_AMD64_MC0_MASK		0xc0010044
 
 #define MSR_IA32_MCx_CTL(x)		(MSR_IA32_MC0_CTL + 4*(x))

diff --git a/arch/x86/include/uapi/asm/setup.h b/arch/x86/include/uapi/asm/setup.h
index e69de29..79a9626 100644
--- a/arch/x86/include/uapi/asm/setup.h
+++ b/arch/x86/include/uapi/asm/setup.h

@@ -0,0 +1 @@
+/* */

diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index 0818f9a..aa7d6ae 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h

@@ -87,12 +87,6 @@
 
 #define SA_RESTORER	0x04000000
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index fbd8955..3286a92 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c

@@ -26,11 +26,6 @@
 #ifdef CONFIG_X86_32
 static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 {
-	/*
-	 * We use exception 16 if we have hardware math and we've either seen
-	 * it or the CPU claims it is internal
-	 */
-	int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
 	seq_printf(m,
 		   "fdiv_bug\t: %s\n"
 		   "hlt_bug\t\t: %s\n"
@@ -45,7 +40,7 @@
 		   c->f00f_bug ? "yes" : "no",
 		   c->coma_bug ? "yes" : "no",
 		   c->hard_math ? "yes" : "no",
-		   fpu_exception ? "yes" : "no",
+		   c->hard_math ? "yes" : "no",
 		   c->cpuid_level,
 		   c->wp_works_ok ? "yes" : "no");
 }

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c763116..ff84d54 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S

@@ -739,7 +739,6 @@
 ENDPROC(ptregs_##name)
 
 PTREGSCALL1(iopl)
-PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
 PTREGSCALL2(vm86)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 70641af..07a7a04 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S

@@ -864,7 +864,6 @@
 	FORK_LIKE  clone
 	FORK_LIKE  fork
 	FORK_LIKE  vfork
-	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 	PTREGSCALL stub_iopl, sys_iopl, %rsi
 
 ENTRY(ptregscall_common)
@@ -913,8 +912,6 @@
 END(stub_rt_sigreturn)
 
 #ifdef CONFIG_X86_X32_ABI
-	PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
-
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 6e03b0d..7dc4e45 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c

@@ -42,39 +42,6 @@
  * (these are usually mapped into the 0x30-0xff vector range)
  */
 
-#ifdef CONFIG_X86_32
-/*
- * Note that on a 486, we don't want to do a SIGFPE on an irq13
- * as the irq is unreliable, and exception 16 works correctly
- * (ie as explained in the intel literature). On a 386, you
- * can't use exception 16 due to bad IBM design, so we have to
- * rely on the less exact irq13.
- *
- * Careful.. Not only is IRQ13 unreliable, but it is also
- * leads to races. IBM designers who came up with it should
- * be shot.
- */
-
-static irqreturn_t math_error_irq(int cpl, void *dev_id)
-{
-	outb(0, 0xF0);
-	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
-		return IRQ_NONE;
-	math_error(get_irq_regs(), 0, X86_TRAP_MF);
-	return IRQ_HANDLED;
-}
-
-/*
- * New motherboards sometimes make IRQ 13 be a PCI interrupt,
- * so allow interrupt sharing.
- */
-static struct irqaction fpu_irq = {
-	.handler = math_error_irq,
-	.name = "fpu",
-	.flags = IRQF_NO_THREAD,
-};
-#endif
-
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
  */
@@ -242,13 +209,6 @@
 		setup_irq(2, &irq2);
 
 #ifdef CONFIG_X86_32
-	/*
-	 * External FPU? Set up irq13 if so, for
-	 * original braindamaged IBM FERR coupling.
-	 */
-	if (boot_cpu_data.hard_math && !cpu_has_fpu)
-		setup_irq(FPU_IRQ, &fpu_irq);
-
 	irq_ctx_init(smp_processor_id());
 #endif
 }

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index fbbb604..d6bf1f3 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c

@@ -364,10 +364,7 @@
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  */
 		restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -414,7 +411,6 @@
 	struct rt_sigframe __user *frame;
 	void __user *fp = NULL;
 	int err = 0;
-	struct task_struct *me = current;
 
 	frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
 
@@ -433,10 +429,7 @@
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  If provided, use a stub
 		   already in userspace.  */
@@ -503,10 +496,7 @@
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
 
 		if (ka->sa.sa_flags & SA_RESTORER) {
@@ -603,13 +593,6 @@
 }
 #endif /* CONFIG_X86_32 */
 
-long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
@@ -659,7 +642,7 @@
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
@@ -865,7 +848,6 @@
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
 	unsigned long ax;
-	struct pt_regs tregs;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -879,8 +861,7 @@
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	tregs = *regs;
-	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index d4f460f..f84fe00 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c

@@ -103,13 +103,71 @@
 	pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
 }
 
+static pgd_t *tboot_pg_dir;
+static struct mm_struct tboot_mm = {
+	.mm_rb          = RB_ROOT,
+	.pgd            = swapper_pg_dir,
+	.mm_users       = ATOMIC_INIT(2),
+	.mm_count       = ATOMIC_INIT(1),
+	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
+};
+
 static inline void switch_to_tboot_pt(void)
 {
-#ifdef CONFIG_X86_32
-	load_cr3(initial_page_table);
-#else
-	write_cr3(real_mode_header->trampoline_pgd);
-#endif
+	write_cr3(virt_to_phys(tboot_pg_dir));
+}
+
+static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+			  pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(&tboot_mm, vaddr);
+	pud = pud_alloc(&tboot_mm, pgd, vaddr);
+	if (!pud)
+		return -1;
+	pmd = pmd_alloc(&tboot_mm, pud, vaddr);
+	if (!pmd)
+		return -1;
+	pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
+	if (!pte)
+		return -1;
+	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
+	pte_unmap(pte);
+	return 0;
+}
+
+static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
+			   unsigned long nr)
+{
+	/* Reuse the original kernel mapping */
+	tboot_pg_dir = pgd_alloc(&tboot_mm);
+	if (!tboot_pg_dir)
+		return -1;
+
+	for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
+		if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
+			return -1;
+	}
+
+	return 0;
+}
+
+static void tboot_create_trampoline(void)
+{
+	u32 map_base, map_size;
+
+	/* Create identity map for tboot shutdown code. */
+	map_base = PFN_DOWN(tboot->tboot_base);
+	map_size = PFN_UP(tboot->tboot_size);
+	if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
+		panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
+		      map_base, map_size);
 }
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -167,6 +225,14 @@
 	if (!tboot_enabled())
 		return;
 
+	/*
+	 * if we're being called before the 1:1 mapping is set up then just
+	 * return and let the normal shutdown happen; this should only be
+	 * due to very early panic()
+	 */
+	if (!tboot_pg_dir)
+		return;
+
 	/* if this is S3 then set regions to MAC */
 	if (shutdown_type == TB_SHUTDOWN_S3)
 		if (tboot_setup_sleep())
@@ -277,6 +343,8 @@
 	if (!tboot_enabled())
 		return 0;
 
+	tboot_create_trampoline();
+
 	atomic_set(&ap_wfs_count, 0);
 	register_hotcpu_notifier(&tboot_cpu_notifier);
 

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index eb85866..ecffca1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c

@@ -69,9 +69,6 @@
 
 asmlinkage int system_call(void);
 
-/* Do we ignore FPU interrupts ? */
-char ignore_fpu_irq;
-
 /*
  * The IDT has to be page-aligned to simplify the Pentium
  * F0 0F bug workaround.
@@ -564,9 +561,6 @@
 
 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
 {
-#ifdef CONFIG_X86_32
-	ignore_fpu_irq = 1;
-#endif
 	exception_enter(regs);
 	math_error(regs, error_code, X86_TRAP_MF);
 	exception_exit(regs);

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3a3e8c9..9a907a6 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c

@@ -145,19 +145,6 @@
 	return nr;
 }
 
-#ifdef CONFIG_SECCOMP
-static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
-{
-	if (!seccomp_mode(&tsk->seccomp))
-		return 0;
-	task_pt_regs(tsk)->orig_ax = syscall_nr;
-	task_pt_regs(tsk)->ax = syscall_nr;
-	return __secure_computing(syscall_nr);
-}
-#else
-#define vsyscall_seccomp(_tsk, _nr) 0
-#endif
-
 static bool write_ok_or_segv(unsigned long ptr, size_t size)
 {
 	/*
@@ -190,10 +177,9 @@
 {
 	struct task_struct *tsk;
 	unsigned long caller;
-	int vsyscall_nr;
+	int vsyscall_nr, syscall_nr, tmp;
 	int prev_sig_on_uaccess_error;
 	long ret;
-	int skip;
 
 	/*
 	 * No point in checking CS -- the only way to get here is a user mode
@@ -225,6 +211,64 @@
 	}
 
 	tsk = current;
+
+	/*
+	 * Check for access_ok violations and find the syscall nr.
+	 *
+	 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
+	 * 64-bit, so we don't need to special-case it here.  For all the
+	 * vsyscalls, NULL means "don't write anything" not "write it at
+	 * address 0".
+	 */
+	switch (vsyscall_nr) {
+	case 0:
+		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+		    !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
+			ret = -EFAULT;
+			goto check_fault;
+		}
+
+		syscall_nr = __NR_gettimeofday;
+		break;
+
+	case 1:
+		if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
+			ret = -EFAULT;
+			goto check_fault;
+		}
+
+		syscall_nr = __NR_time;
+		break;
+
+	case 2:
+		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
+		    !write_ok_or_segv(regs->si, sizeof(unsigned))) {
+			ret = -EFAULT;
+			goto check_fault;
+		}
+
+		syscall_nr = __NR_getcpu;
+		break;
+	}
+
+	/*
+	 * Handle seccomp.  regs->ip must be the original value.
+	 * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
+	 *
+	 * We could optimize the seccomp disabled case, but performance
+	 * here doesn't matter.
+	 */
+	regs->orig_ax = syscall_nr;
+	regs->ax = -ENOSYS;
+	tmp = secure_computing(syscall_nr);
+	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
+		warn_bad_vsyscall(KERN_DEBUG, regs,
+				  "seccomp tried to change syscall nr or ip");
+		do_exit(SIGSYS);
+	}
+	if (tmp)
+		goto do_ret;  /* skip requested */
+
 	/*
 	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
 	 * preserve that behavior to make writing exploits harder.
@@ -232,49 +276,19 @@
 	prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
 	current_thread_info()->sig_on_uaccess_error = 1;
 
-	/*
-	 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
-	 * 64-bit, so we don't need to special-case it here.  For all the
-	 * vsyscalls, NULL means "don't write anything" not "write it at
-	 * address 0".
-	 */
 	ret = -EFAULT;
-	skip = 0;
 	switch (vsyscall_nr) {
 	case 0:
-		skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
-		if (skip)
-			break;
-
-		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
-		    !write_ok_or_segv(regs->si, sizeof(struct timezone)))
-			break;
-
 		ret = sys_gettimeofday(
 			(struct timeval __user *)regs->di,
 			(struct timezone __user *)regs->si);
 		break;
 
 	case 1:
-		skip = vsyscall_seccomp(tsk, __NR_time);
-		if (skip)
-			break;
-
-		if (!write_ok_or_segv(regs->di, sizeof(time_t)))
-			break;
-
 		ret = sys_time((time_t __user *)regs->di);
 		break;
 
 	case 2:
-		skip = vsyscall_seccomp(tsk, __NR_getcpu);
-		if (skip)
-			break;
-
-		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
-		    !write_ok_or_segv(regs->si, sizeof(unsigned)))
-			break;
-
 		ret = sys_getcpu((unsigned __user *)regs->di,
 				 (unsigned __user *)regs->si,
 				 NULL);
@@ -283,12 +297,7 @@
 
 	current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
 
-	if (skip) {
-		if ((long)regs->ax <= 0L) /* seccomp errno emulation */
-			goto do_ret;
-		goto done; /* seccomp trace/trap */
-	}
-
+check_fault:
 	if (ret == -EFAULT) {
 		/* Bad news -- userspace fed a bad pointer to a vsyscall. */
 		warn_bad_vsyscall(KERN_INFO, regs,
@@ -311,7 +320,6 @@
 	/* Emulate a ret instruction. */
 	regs->ip = caller;
 	regs->sp += 8;
-done:
 	return true;
 
 sigsegv:

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 931930a..a718e0d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c

@@ -919,13 +919,11 @@
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
-	 * avoid the wbindv. If the CPU does not support it, in the
-	 * error case, and during early boot (for EFI) we fall back
-	 * to cpa_flush_all (which uses wbinvd):
+	 * avoid the wbindv. If the CPU does not support it and in the
+	 * error case we fall back to cpa_flush_all (which uses
+	 * wbindv):
 	 */
-	if (early_boot_irqs_disabled)
-		__cpa_flush_all((void *)(long)cache);
-	else if (!ret && cpu_has_clflush) {
+	if (!ret && cpu_has_clflush) {
 		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 217eb70..e27fbf8 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c

@@ -301,6 +301,13 @@
 	free_page((unsigned long)pgd);
 }
 
+/*
+ * Used to set accessed or dirty bits in the page table entries
+ * on other architectures. On x86, the accessed and dirty bits
+ * are tracked by hardware. However, do_wp_page calls this function
+ * to also make the pte writeable at the same time the dirty bit is
+ * set. In that case we do actually need to write the PTE.
+ */
 int ptep_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pte_t *ptep,
 			  pte_t entry, int dirty)
@@ -310,7 +317,6 @@
 	if (changed && dirty) {
 		*ptep = entry;
 		pte_update_defer(vma->vm_mm, address, ptep);
-		flush_tlb_page(vma, address);
 	}
 
 	return changed;

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0a34d9e..ad443914 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c

@@ -239,7 +239,22 @@
 	return status;
 }
 
-static int efi_set_rtc_mmss(unsigned long nowtime)
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+					     efi_time_cap_t *tc)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	efi_call_phys_prelog();
+	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+				virt_to_phys(tc));
+	efi_call_phys_epilog();
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
 {
 	int real_seconds, real_minutes;
 	efi_status_t 	status;
@@ -268,7 +283,7 @@
 	return 0;
 }
 
-static unsigned long efi_get_time(void)
+unsigned long efi_get_time(void)
 {
 	efi_status_t status;
 	efi_time_t eft;
@@ -624,13 +639,18 @@
 	}
 	/*
 	 * We will only need *early* access to the following
-	 * EFI runtime service before set_virtual_address_map
+	 * two EFI runtime services before set_virtual_address_map
 	 * is invoked.
 	 */
+	efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
 	efi_phys.set_virtual_address_map =
 		(efi_set_virtual_address_map_t *)
 		runtime->set_virtual_address_map;
-
+	/*
+	 * Make efi_get_time can be called before entering
+	 * virtual mode.
+	 */
+	efi.get_time = phys_efi_get_time;
 	early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
 	return 0;
@@ -716,10 +736,12 @@
 		efi_enabled = 0;
 		return;
 	}
+#ifdef CONFIG_X86_32
 	if (efi_is_native()) {
 		x86_platform.get_wallclock = efi_get_time;
 		x86_platform.set_wallclock = efi_set_rtc_mmss;
 	}
+#endif
 
 #if EFI_DEBUG
 	print_efi_memmap();

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 06c8b2e..95fd505 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c

@@ -58,21 +58,6 @@
 	}
 }
 
-unsigned long efi_call_virt_prelog(void)
-{
-	unsigned long saved;
-
-	saved = read_cr3();
-	write_cr3(real_mode_header->trampoline_pgd);
-
-	return saved;
-}
-
-void efi_call_virt_epilog(unsigned long saved)
-{
-	write_cr3(saved);
-}
-
 void __init efi_call_phys_prelog(void)
 {
 	unsigned long vaddress;

diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index 5917eb5..e6cb80f 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c

@@ -23,6 +23,7 @@
 
 #include <linux/moduleparam.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
@@ -62,29 +63,75 @@
  * by reading its input port and seeing whether the read value is
  * meaningful.
  */
-static int iris_init(void)
+static int iris_probe(struct platform_device *pdev)
 {
-	unsigned char status;
-	if (force != 1) {
-		printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
-		return -ENODEV;
-	}
-	status = inb(IRIS_GIO_INPUT);
+	unsigned char status = inb(IRIS_GIO_INPUT);
 	if (status == IRIS_GIO_NODEV) {
-		printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
+		printk(KERN_ERR "This machine does not seem to be an Iris. "
+			"Power off handler not installed.\n");
 		return -ENODEV;
 	}
 	old_pm_power_off = pm_power_off;
 	pm_power_off = &iris_power_off;
 	printk(KERN_INFO "Iris power_off handler installed.\n");
+	return 0;
+}
 
+static int iris_remove(struct platform_device *pdev)
+{
+	pm_power_off = old_pm_power_off;
+	printk(KERN_INFO "Iris power_off handler uninstalled.\n");
+	return 0;
+}
+
+static struct platform_driver iris_driver = {
+	.driver		= {
+		.name   = "iris",
+		.owner  = THIS_MODULE,
+	},
+	.probe          = iris_probe,
+	.remove         = iris_remove,
+};
+
+static struct resource iris_resources[] = {
+	{
+		.start  = IRIS_GIO_BASE,
+		.end    = IRIS_GIO_OUTPUT,
+		.flags  = IORESOURCE_IO,
+		.name   = "address"
+	}
+};
+
+static struct platform_device *iris_device;
+
+static int iris_init(void)
+{
+	int ret;
+	if (force != 1) {
+		printk(KERN_ERR "The force parameter has not been set to 1."
+			" The Iris poweroff handler will not be installed.\n");
+		return -ENODEV;
+	}
+	ret = platform_driver_register(&iris_driver);
+	if (ret < 0) {
+		printk(KERN_ERR "Failed to register iris platform driver: %d\n",
+			ret);
+		return ret;
+	}
+	iris_device = platform_device_register_simple("iris", (-1),
+				iris_resources, ARRAY_SIZE(iris_resources));
+	if (IS_ERR(iris_device)) {
+		printk(KERN_ERR "Failed to register iris platform device\n");
+		platform_driver_unregister(&iris_driver);
+		return PTR_ERR(iris_device);
+	}
 	return 0;
 }
 
 static void iris_exit(void)
 {
-	pm_power_off = old_pm_power_off;
-	printk(KERN_INFO "Iris power_off handler uninstalled.\n");
+	platform_device_unregister(iris_device);
+	platform_driver_unregister(&iris_driver);
 }
 
 module_init(iris_init);

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index ee3c220..28e3fa9 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl

@@ -192,7 +192,7 @@
 183	i386	getcwd			sys_getcwd
 184	i386	capget			sys_capget
 185	i386	capset			sys_capset
-186	i386	sigaltstack		ptregs_sigaltstack		stub32_sigaltstack
+186	i386	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
 187	i386	sendfile		sys_sendfile			sys32_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
@@ -356,3 +356,4 @@
 347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
+350	i386	finit_module		sys_finit_module

diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a582bfe..dc97328 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl

@@ -137,7 +137,7 @@
 128	64	rt_sigtimedwait		sys_rt_sigtimedwait
 129	64	rt_sigqueueinfo		sys_rt_sigqueueinfo
 130	common	rt_sigsuspend		sys_rt_sigsuspend
-131	64	sigaltstack		stub_sigaltstack
+131	64	sigaltstack		sys_sigaltstack
 132	common	utime			sys_utime
 133	common	mknod			sys_mknod
 134	64	uselib
@@ -319,6 +319,7 @@
 310	64	process_vm_readv	sys_process_vm_readv
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
+313	common	finit_module		sys_finit_module
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
@@ -337,7 +338,7 @@
 522	x32	rt_sigpending		sys32_rt_sigpending
 523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
 524	x32	rt_sigqueueinfo		sys32_rt_sigqueueinfo
-525	x32	sigaltstack		stub_x32_sigaltstack
+525	x32	sigaltstack		compat_sys_sigaltstack
 526	x32	timer_create		compat_sys_timer_create
 527	x32	mq_notify		compat_sys_mq_notify
 528	x32	kexec_load		compat_sys_kexec_load

diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9839970..53c90fd 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig

@@ -13,8 +13,7 @@
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
+	select GENERIC_SIGALTSTACK
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"

diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index 75513325..54f8102 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h

@@ -86,4 +86,5 @@
 		       unsigned long __user *addr);
 
 #endif
+#define user_stack_pointer(regs) PT_REGS_SP(regs)
 #endif /* __UM_X86_PTRACE_H */

diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index bdaa08c..71cef48 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c

@@ -342,9 +342,7 @@
 {
 	int err = 0;
 
-	err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
-	err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
-	err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
+	err |= __save_altstack(&uc->uc_stack, sp);
 	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
 	err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
 	return err;
@@ -529,10 +527,7 @@
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs));
 	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
 			       set->sig[0]);
 	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);

diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 812e98c..a0c3b0d 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c

@@ -27,7 +27,6 @@
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
 #define ptregs_vm86 sys_vm86
-#define ptregs_sigaltstack sys_sigaltstack
 
 #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
 #include <asm/syscalls_32.h>

diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 170bd92..f2f0723 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c

@@ -31,7 +31,6 @@
 #define stub_fork sys_fork
 #define stub_vfork sys_vfork
 #define stub_execve sys_execve
-#define stub_sigaltstack sys_sigaltstack
 #define stub_rt_sigreturn sys_rt_sigreturn
 
 #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3aeaa93..138e566 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c

@@ -193,10 +193,11 @@
 {
 	int cpu;
 
-	for_each_online_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		bool other_cpu = (cpu != smp_processor_id());
+		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
 
-		if (other_cpu &&
+		if (other_cpu && is_up &&
 		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
 			BUG();
 
@@ -205,7 +206,7 @@
 		if (have_vcpu_info_placement)
 			xen_vcpu_setup(cpu);
 
-		if (other_cpu &&
+		if (other_cpu && is_up &&
 		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 			BUG();
 	}

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 353c50f..4f7d259 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c

@@ -254,7 +254,7 @@
 	}
 	xen_init_lock_cpu(0);
 
-	smp_store_cpu_info(0);
+	smp_store_boot_cpu_info();
 	cpu_data(0).x86_max_cores = 1;
 
 	for_each_possible_cpu(i) {

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2481f26..5aab1ac 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig

@@ -13,10 +13,9 @@
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
 	select GENERIC_PCI_IOMAP
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select CLONE_BACKWARDS
+	select IRQ_DOMAIN
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
@@ -150,6 +149,15 @@
 	select SERIAL_CONSOLE
 	select NO_IOPORT
 
+config XTENSA_PLATFORM_XTFPGA
+	bool "XTFPGA"
+	select SERIAL_CONSOLE
+	select ETHOC
+	select XTENSA_CALIBRATE_CCOUNT
+	help
+	  XTFPGA is the name of Tensilica board family (LX60, LX110, LX200, ML605).
+	  This hardware is capable of running a full Linux distribution.
+
 endchoice
 
 
@@ -177,6 +185,17 @@
 	  time by entering them here. As a minimum, you should specify the
 	  memory size and the root device (e.g., mem=64M root=/dev/nfs).
 
+config USE_OF
+	bool "Flattened Device Tree support"
+	select OF
+	select OF_EARLY_FLATTREE
+	help
+	  Include support for flattened device tree machine descriptions.
+
+config BUILTIN_DTB
+	string "DTB to build into the kernel image"
+	depends on OF
+
 source "mm/Kconfig"
 
 source "drivers/pcmcia/Kconfig"

diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug
index 11c5852..a34010e 100644
--- a/arch/xtensa/Kconfig.debug
+++ b/arch/xtensa/Kconfig.debug

@@ -2,6 +2,26 @@
 
 source "lib/Kconfig.debug"
 
+config LD_NO_RELAX
+	bool "Disable linker relaxation"
+	default n
+	help
+	  Enable this function to disable link-time optimizations.
+	  The default linker behavior is to combine identical literal
+	  values to reduce code size and remove unnecessary overhead from
+	  assembler-generated 'longcall' sequences.
+	  Enabling this option improves the link time but increases the
+	  code size, and possibly execution time.
+
+config S32C1I_SELFTEST
+	bool "Perform S32C1I instruction self-test at boot"
+	default y
+	help
+	  Enable this option to test S32C1I instruction behavior at boot.
+	  Correct operation of this instruction requires some cooperation from hardware
+	  external to the processor (such as bus bridge, bus fabric, or memory controller).
+	  It is easy to make wrong hardware configuration, this test should catch it early.
+
+	  Say 'N' on stable hardware.
+
 endmenu
-
-

diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index bb5ba61..0aa7270 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile

@@ -38,6 +38,7 @@
 platform-$(CONFIG_XTENSA_PLATFORM_XT2000)	:= xt2000
 platform-$(CONFIG_XTENSA_PLATFORM_ISS)		:= iss
 platform-$(CONFIG_XTENSA_PLATFORM_S6105)	:= s6105
+platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA)	:= xtfpga
 
 PLATFORM = $(platform-y)
 export PLATFORM
@@ -49,6 +50,17 @@
 
 KBUILD_CFLAGS += $(call cc-option,-mforce-no-pic,)
 
+ifneq ($(CONFIG_LD_NO_RELAX),)
+LDFLAGS := --no-relax
+endif
+
+ifeq ($(shell echo -e __XTENSA_EB__ | $(CC) -E - | grep -v "\#"),1)
+CHECKFLAGS += -D__XTENSA_EB__
+endif
+ifeq ($(shell echo -e __XTENSA_EL__ | $(CC) -E - | grep -v "\#"),1)
+CHECKFLAGS += -D__XTENSA_EL__
+endif
+
 vardirs := $(patsubst %,arch/xtensa/variants/%/,$(variant-y))
 plfdirs := $(patsubst %,arch/xtensa/platforms/%/,$(platform-y))
 
@@ -75,6 +87,10 @@
 
 libs-y		+= arch/xtensa/lib/ $(LIBGCC)
 
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+core-$(CONFIG_OF) += arch/xtensa/boot/
+endif
+
 boot		:= arch/xtensa/boot
 
 all: zImage
@@ -84,7 +100,9 @@
 zImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
+%.dtb:
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
 define archhelp
   @echo '* zImage      - Compressed kernel image (arch/xtensa/boot/images/zImage.*)'
 endef
-

diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile
index 4018f89..818647e 100644
--- a/arch/xtensa/boot/Makefile
+++ b/arch/xtensa/boot/Makefile

@@ -22,12 +22,35 @@
 # Subdirs for the boot loader(s)
 
 bootdir-$(CONFIG_XTENSA_PLATFORM_ISS)	 += boot-elf
-bootdir-$(CONFIG_XTENSA_PLATFORM_XT2000) += boot-redboot boot-elf
+bootdir-$(CONFIG_XTENSA_PLATFORM_XT2000) += boot-redboot boot-elf boot-uboot
+bootdir-$(CONFIG_XTENSA_PLATFORM_XTFPGA) += boot-redboot boot-elf boot-uboot
 
 
+BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB)).dtb.o
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+obj-$(CONFIG_OF) += $(BUILTIN_DTB)
+endif
+
+# Rule to build device tree blobs
+$(obj)/%.dtb: $(src)/dts/%.dts FORCE
+	$(call if_changed_dep,dtc)
+
+clean-files := *.dtb.S
+
 zImage Image: $(bootdir-y)
 
 $(bootdir-y): $(addprefix $(obj)/,$(subdir-y)) \
 	      $(addprefix $(obj)/,$(host-progs))
 	$(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
 
+OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
+
+vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+vmlinux.bin.gz: vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+boot-elf: vmlinux.bin
+boot-redboot: vmlinux.bin.gz
+boot-uboot: vmlinux.bin.gz

diff --git a/arch/xtensa/boot/boot-elf/Makefile b/arch/xtensa/boot/boot-elf/Makefile
index f10992b..1fe01b7 100644
--- a/arch/xtensa/boot/boot-elf/Makefile
+++ b/arch/xtensa/boot/boot-elf/Makefile

@@ -4,9 +4,6 @@
 # for more details.
 #
 
-GZIP = gzip
-GZIP_FLAGS = -v9fc
-
 ifeq ($(BIG_ENDIAN),1)
 OBJCOPY_ARGS    := -O elf32-xtensa-be
 else
@@ -20,18 +17,17 @@
 
 OBJS		:= $(addprefix $(obj)/,$(boot-y))
 
-vmlinux.tmp: vmlinux
-	$(OBJCOPY) --strip-all -R .comment -R .note.gnu.build-id -O binary \
-		$^ $@
-
-Image:	vmlinux.tmp $(OBJS) arch/$(ARCH)/boot/boot-elf/boot.lds
-	$(OBJCOPY) $(OBJCOPY_ARGS) -R .comment \
-		--add-section image=vmlinux.tmp \
+$(obj)/Image.o: vmlinux.bin $(OBJS)
+	$(Q)$(OBJCOPY) $(OBJCOPY_ARGS) -R .comment \
+		--add-section image=vmlinux.bin \
 		--set-section-flags image=contents,alloc,load,load,data \
-		$(OBJS) $@.tmp
-	$(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) \
-		-T arch/$(ARCH)/boot/boot-elf/boot.lds \
-		-o arch/$(ARCH)/boot/$@.elf $@.tmp
+		$(OBJS) $@
 
-zImage:	Image
+$(obj)/../Image.elf: $(obj)/Image.o $(obj)/boot.lds
+	$(Q)$(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) \
+		-T $(obj)/boot.lds \
+		--build-id=none \
+		-o $@ $(obj)/Image.o
+	$(Q)$(kecho) '  Kernel: $@ is ready'
 
+zImage:	$(obj)/../Image.elf

diff --git a/arch/xtensa/boot/boot-redboot/Makefile b/arch/xtensa/boot/boot-redboot/Makefile
index 25a78c6..8be8b94 100644
--- a/arch/xtensa/boot/boot-redboot/Makefile
+++ b/arch/xtensa/boot/boot-redboot/Makefile

@@ -4,8 +4,6 @@
 # for more details.
 #
 
-GZIP = gzip
-GZIP_FLAGS = -v9fc
 ifeq ($(BIG_ENDIAN),1)
 OBJCOPY_ARGS 	:= -O elf32-xtensa-be
 else
@@ -21,17 +19,17 @@
 
 LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
-vmlinux.tmp: vmlinux
-	$(OBJCOPY) --strip-all -R .comment -R .note.gnu.build-id -O binary \
-		$^ $@
-
-vmlinux.tmp.gz: vmlinux.tmp
-	$(GZIP) $(GZIP_FLAGS) $^ > $@
-
-zImage: vmlinux.tmp.gz $(OBJS) $(LIBS)
-	$(OBJCOPY) $(OBJCOPY_ARGS) -R .comment \
-		--add-section image=vmlinux.tmp.gz \
+$(obj)/zImage.o: vmlinux.bin.gz $(OBJS)
+	$(Q)$(OBJCOPY) $(OBJCOPY_ARGS) -R .comment \
+		--add-section image=vmlinux.bin.gz \
 		--set-section-flags image=contents,alloc,load,load,data \
-		$(OBJS) $@.tmp
-	$(LD) $(LD_ARGS) -o $@.elf $@.tmp $(LIBS) -L/xtensa-elf/lib $(LIBGCC)
-	$(OBJCOPY) -S -O binary $@.elf arch/$(ARCH)/boot/$@.redboot
+		$(OBJS) $@
+
+$(obj)/zImage.elf: $(obj)/zImage.o $(LIBS)
+	$(Q)$(LD) $(LD_ARGS) -o $@ $^ -L/xtensa-elf/lib $(LIBGCC)
+
+$(obj)/../zImage.redboot: $(obj)/zImage.elf
+	$(Q)$(OBJCOPY) -S -O binary $< $@
+	$(Q)$(kecho) '  Kernel: $@ is ready'
+
+zImage: $(obj)/../zImage.redboot

diff --git a/arch/xtensa/boot/boot-uboot/Makefile b/arch/xtensa/boot/boot-uboot/Makefile
new file mode 100644
index 0000000..bfbf8af
--- /dev/null
+++ b/arch/xtensa/boot/boot-uboot/Makefile

@@ -0,0 +1,14 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+
+UIMAGE_LOADADDR = 0xd0001000
+UIMAGE_COMPRESSION = gzip
+
+$(obj)/../uImage: vmlinux.bin.gz FORCE
+	$(call if_changed,uimage)
+	$(Q)$(kecho) '  Kernel: $@ is ready'
+
+zImage: $(obj)/../uImage

diff --git a/arch/xtensa/boot/dts/lx60.dts b/arch/xtensa/boot/dts/lx60.dts
new file mode 100644
index 0000000..2eab365
--- /dev/null
+++ b/arch/xtensa/boot/dts/lx60.dts

@@ -0,0 +1,11 @@
+/dts-v1/;
+/include/ "xtfpga.dtsi"
+/include/ "xtfpga-flash-4m.dtsi"
+
+/ {
+	compatible = "xtensa,lx60";
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x04000000>;
+	};
+};

diff --git a/arch/xtensa/boot/dts/ml605.dts b/arch/xtensa/boot/dts/ml605.dts
new file mode 100644
index 0000000..6ed51d6
--- /dev/null
+++ b/arch/xtensa/boot/dts/ml605.dts

@@ -0,0 +1,11 @@
+/dts-v1/;
+/include/ "xtfpga.dtsi"
+/include/ "xtfpga-flash-16m.dtsi"
+
+/ {
+	compatible = "xtensa,ml605";
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;
+	};
+};

diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi
new file mode 100644
index 0000000..e5703c7
--- /dev/null
+++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi

@@ -0,0 +1,26 @@
+/ {
+	flash: flash@f8000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0xf8000000 0x01000000>;
+		bank-width = <2>;
+		device-width = <2>;
+		partition@0x0 {
+			label = "boot loader area";
+			reg = <0x00000000 0x00400000>;
+		};
+		partition@0x400000 {
+			label = "kernel image";
+			reg = <0x00400000 0x00600000>;
+		};
+		partition@0xa00000 {
+			label = "data";
+			reg = <0x00a00000 0x005e0000>;
+		};
+		partition@0xfe0000 {
+			label = "boot environment";
+			reg = <0x00fe0000 0x00020000>;
+		};
+        };
+};

diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi
new file mode 100644
index 0000000..6f9c10d
--- /dev/null
+++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi

@@ -0,0 +1,18 @@
+/ {
+	flash: flash@f8000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0xf8000000 0x00400000>;
+		bank-width = <2>;
+		device-width = <2>;
+		partition@0x0 {
+			label = "boot loader area";
+			reg = <0x00000000 0x003f0000>;
+		};
+		partition@0x3f0000 {
+			label = "boot environment";
+			reg = <0x003f0000 0x00010000>;
+		};
+        };
+};

diff --git a/arch/xtensa/boot/dts/xtfpga.dtsi b/arch/xtensa/boot/dts/xtfpga.dtsi
new file mode 100644
index 0000000..7eda6ec
--- /dev/null
+++ b/arch/xtensa/boot/dts/xtfpga.dtsi

@@ -0,0 +1,56 @@
+/ {
+	compatible = "xtensa,xtfpga";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&pic>;
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug";
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x06000000>;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cpu@0 {
+			compatible = "xtensa,cpu";
+			reg = <0>;
+			/* Filled in by platform_setup from FPGA register
+			 * clock-frequency = <100000000>;
+			 */
+		};
+	};
+
+	pic: pic {
+		compatible = "xtensa,pic";
+		/* one cell: internal irq number,
+		 * two cells: second cell == 0: internal irq number
+		 *            second cell == 1: external irq number
+		 */
+		#interrupt-cells = <2>;
+		interrupt-controller;
+	};
+
+	serial0: serial@fd050020 {
+		device_type = "serial";
+		compatible = "ns16550a";
+		no-loopback-test;
+		reg = <0xfd050020 0x20>;
+		reg-shift = <2>;
+		interrupts = <0 1>; /* external irq 0 */
+		/* Filled in by platform_setup from FPGA register
+		 * clock-frequency = <100000000>;
+		 */
+	};
+
+	enet0: ethoc@fd030000 {
+		compatible = "opencores,ethoc";
+		reg = <0xfd030000 0x4000 0xfd800000 0x4000>;
+		interrupts = <1 1>; /* external irq 1 */
+		local-mac-address = [00 50 c2 13 6f 00];
+	};
+};

diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 24f50ca..c3f2891 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h

@@ -66,19 +66,35 @@
  */
 static inline void atomic_add(int i, atomic_t * v)
 {
-    unsigned int vval;
+#if XCHAL_HAVE_S32C1I
+	unsigned long tmp;
+	int result;
 
-    __asm__ __volatile__(
-	"rsil    a15, "__stringify(LOCKLEVEL)"\n\t"
-	"l32i    %0, %2, 0              \n\t"
-	"add     %0, %0, %1             \n\t"
-	"s32i    %0, %2, 0              \n\t"
-	"wsr     a15, ps                \n\t"
-	"rsync                          \n"
-	: "=&a" (vval)
-	: "a" (i), "a" (v)
-	: "a15", "memory"
-	);
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       add     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (i), "a" (v)
+			: "memory"
+			);
+#else
+	unsigned int vval;
+
+	__asm__ __volatile__(
+			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %2, 0\n"
+			"       add     %0, %0, %1\n"
+			"       s32i    %0, %2, 0\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (vval)
+			: "a" (i), "a" (v)
+			: "a15", "memory"
+			);
+#endif
 }
 
 /**
@@ -90,19 +106,35 @@
  */
 static inline void atomic_sub(int i, atomic_t *v)
 {
-    unsigned int vval;
+#if XCHAL_HAVE_S32C1I
+	unsigned long tmp;
+	int result;
 
-    __asm__ __volatile__(
-	"rsil    a15, "__stringify(LOCKLEVEL)"\n\t"
-	"l32i    %0, %2, 0              \n\t"
-	"sub     %0, %0, %1             \n\t"
-	"s32i    %0, %2, 0              \n\t"
-	"wsr     a15, ps                \n\t"
-	"rsync                          \n"
-	: "=&a" (vval)
-	: "a" (i), "a" (v)
-	: "a15", "memory"
-	);
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       sub     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (i), "a" (v)
+			: "memory"
+			);
+#else
+	unsigned int vval;
+
+	__asm__ __volatile__(
+			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %2, 0\n"
+			"       sub     %0, %0, %1\n"
+			"       s32i    %0, %2, 0\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (vval)
+			: "a" (i), "a" (v)
+			: "a15", "memory"
+			);
+#endif
 }
 
 /*
@@ -111,40 +143,78 @@
 
 static inline int atomic_add_return(int i, atomic_t * v)
 {
-     unsigned int vval;
+#if XCHAL_HAVE_S32C1I
+	unsigned long tmp;
+	int result;
 
-    __asm__ __volatile__(
-	"rsil    a15,"__stringify(LOCKLEVEL)"\n\t"
-	"l32i    %0, %2, 0             \n\t"
-	"add     %0, %0, %1            \n\t"
-	"s32i    %0, %2, 0             \n\t"
-	"wsr     a15, ps               \n\t"
-	"rsync                         \n"
-	: "=&a" (vval)
-	: "a" (i), "a" (v)
-	: "a15", "memory"
-	);
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       add     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			"       add     %0, %0, %2\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (i), "a" (v)
+			: "memory"
+			);
 
-    return vval;
+	return result;
+#else
+	unsigned int vval;
+
+	__asm__ __volatile__(
+			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %2, 0\n"
+			"       add     %0, %0, %1\n"
+			"       s32i    %0, %2, 0\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (vval)
+			: "a" (i), "a" (v)
+			: "a15", "memory"
+			);
+
+	return vval;
+#endif
 }
 
 static inline int atomic_sub_return(int i, atomic_t * v)
 {
-    unsigned int vval;
+#if XCHAL_HAVE_S32C1I
+	unsigned long tmp;
+	int result;
 
-    __asm__ __volatile__(
-	"rsil    a15,"__stringify(LOCKLEVEL)"\n\t"
-	"l32i    %0, %2, 0             \n\t"
-	"sub     %0, %0, %1            \n\t"
-	"s32i    %0, %2, 0             \n\t"
-	"wsr     a15, ps               \n\t"
-	"rsync                         \n"
-	: "=&a" (vval)
-	: "a" (i), "a" (v)
-	: "a15", "memory"
-	);
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       sub     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			"       sub     %0, %0, %2\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (i), "a" (v)
+			: "memory"
+			);
 
-    return vval;
+	return result;
+#else
+	unsigned int vval;
+
+	__asm__ __volatile__(
+			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %2, 0\n"
+			"       sub     %0, %0, %1\n"
+			"       s32i    %0, %2, 0\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (vval)
+			: "a" (i), "a" (v)
+			: "a15", "memory"
+			);
+
+	return vval;
+#endif
 }
 
 /**
@@ -251,38 +321,70 @@
 
 static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-    unsigned int all_f = -1;
-    unsigned int vval;
+#if XCHAL_HAVE_S32C1I
+	unsigned long tmp;
+	int result;
 
-    __asm__ __volatile__(
-	"rsil    a15,"__stringify(LOCKLEVEL)"\n\t"
-	"l32i    %0, %2, 0             \n\t"
-	"xor     %1, %4, %3            \n\t"
-	"and     %0, %0, %4            \n\t"
-	"s32i    %0, %2, 0             \n\t"
-	"wsr     a15, ps               \n\t"
-	"rsync                         \n"
-	: "=&a" (vval), "=a" (mask)
-	: "a" (v), "a" (all_f), "1" (mask)
-	: "a15", "memory"
-	);
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       and     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (~mask), "a" (v)
+			: "memory"
+			);
+#else
+	unsigned int all_f = -1;
+	unsigned int vval;
+
+	__asm__ __volatile__(
+			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %2, 0\n"
+			"       xor     %1, %4, %3\n"
+			"       and     %0, %0, %4\n"
+			"       s32i    %0, %2, 0\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (vval), "=a" (mask)
+			: "a" (v), "a" (all_f), "1" (mask)
+			: "a15", "memory"
+			);
+#endif
 }
 
 static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 {
-    unsigned int vval;
+#if XCHAL_HAVE_S32C1I
+	unsigned long tmp;
+	int result;
 
-    __asm__ __volatile__(
-	"rsil    a15,"__stringify(LOCKLEVEL)"\n\t"
-	"l32i    %0, %2, 0             \n\t"
-	"or      %0, %0, %1            \n\t"
-	"s32i    %0, %2, 0             \n\t"
-	"wsr     a15, ps               \n\t"
-	"rsync                         \n"
-	: "=&a" (vval)
-	: "a" (mask), "a" (v)
-	: "a15", "memory"
-	);
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       or      %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (mask), "a" (v)
+			: "memory"
+			);
+#else
+	unsigned int vval;
+
+	__asm__ __volatile__(
+			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %2, 0\n"
+			"       or      %0, %0, %1\n"
+			"       s32i    %0, %2, 0\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (vval)
+			: "a" (mask), "a" (v)
+			: "a15", "memory"
+			);
+#endif
 }
 
 /* Atomic operations are already serializing */
@@ -294,4 +396,3 @@
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
-

diff --git a/arch/xtensa/include/asm/barrier.h b/arch/xtensa/include/asm/barrier.h
index 55707a8..ef02167 100644
--- a/arch/xtensa/include/asm/barrier.h
+++ b/arch/xtensa/include/asm/barrier.h

@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
+ * Copyright (C) 2001 - 2012 Tensilica Inc.
  */
 
 #ifndef _XTENSA_SYSTEM_H
@@ -12,8 +12,8 @@
 #define smp_read_barrier_depends() do { } while(0)
 #define read_barrier_depends() do { } while(0)
 
-#define mb()  barrier()
-#define rmb() mb()
+#define mb()  ({ __asm__ __volatile__("memw" : : : "memory"); })
+#define rmb() barrier()
 #define wmb() mb()
 
 #ifdef CONFIG_SMP

diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h
index 5270197..84afe58d 100644
--- a/arch/xtensa/include/asm/bitops.h
+++ b/arch/xtensa/include/asm/bitops.h

@@ -29,7 +29,6 @@
 #define smp_mb__before_clear_bit()	barrier()
 #define smp_mb__after_clear_bit()	barrier()
 
-#include <asm-generic/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
 
 #if XCHAL_HAVE_NSA
@@ -104,6 +103,132 @@
 #endif
 
 #include <asm-generic/bitops/fls64.h>
+
+#if XCHAL_HAVE_S32C1I
+
+static inline void set_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long tmp, value;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       or      %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (tmp), "=&a" (value)
+			: "a" (mask), "a" (p)
+			: "memory");
+}
+
+static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long tmp, value;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       and     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (tmp), "=&a" (value)
+			: "a" (~mask), "a" (p)
+			: "memory");
+}
+
+static inline void change_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long tmp, value;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       xor     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (tmp), "=&a" (value)
+			: "a" (mask), "a" (p)
+			: "memory");
+}
+
+static inline int
+test_and_set_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long tmp, value;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       or      %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (tmp), "=&a" (value)
+			: "a" (mask), "a" (p)
+			: "memory");
+
+	return tmp & mask;
+}
+
+static inline int
+test_and_clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long tmp, value;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       and     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (tmp), "=&a" (value)
+			: "a" (~mask), "a" (p)
+			: "memory");
+
+	return tmp & mask;
+}
+
+static inline int
+test_and_change_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long tmp, value;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %3, 0\n"
+			"       wsr     %1, scompare1\n"
+			"       xor     %0, %1, %2\n"
+			"       s32c1i  %0, %3, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (tmp), "=&a" (value)
+			: "a" (mask), "a" (p)
+			: "memory");
+
+	return tmp & mask;
+}
+
+#else
+
+#include <asm-generic/bitops/atomic.h>
+
+#endif /* XCHAL_HAVE_S32C1I */
+
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/le.h>
 

diff --git a/arch/xtensa/include/asm/bootparam.h b/arch/xtensa/include/asm/bootparam.h
index 9983f2c..0c25799 100644
--- a/arch/xtensa/include/asm/bootparam.h
+++ b/arch/xtensa/include/asm/bootparam.h

@@ -22,6 +22,7 @@
 #define BP_TAG_MEMORY		0x1003	/* memory addr and size (bp_meminfo) */
 #define BP_TAG_SERIAL_BAUSRATE	0x1004	/* baud rate of current console. */
 #define BP_TAG_SERIAL_PORT	0x1005	/* serial device of current console */
+#define BP_TAG_FDT		0x1006	/* flat device tree addr */
 
 #define BP_TAG_FIRST		0x7B0B  /* first tag with a version number */
 #define BP_TAG_LAST 		0x7E0B	/* last tag */
@@ -31,15 +32,15 @@
 /* All records are aligned to 4 bytes */
 
 typedef struct bp_tag {
-  unsigned short id;		/* tag id */
-  unsigned short size;		/* size of this record excluding the structure*/
-  unsigned long data[0];	/* data */
+	unsigned short id;	/* tag id */
+	unsigned short size;	/* size of this record excluding the structure*/
+	unsigned long data[0];	/* data */
 } bp_tag_t;
 
 typedef struct meminfo {
-  unsigned long type;
-  unsigned long start;
-  unsigned long end;
+	unsigned long type;
+	unsigned long start;
+	unsigned long end;
 } meminfo_t;
 
 #define SYSMEM_BANKS_MAX 5
@@ -48,14 +49,11 @@
 #define MEMORY_TYPE_NONE		0x2000
 
 typedef struct sysmem_info {
-  int nr_banks;
-  meminfo_t bank[SYSMEM_BANKS_MAX];
+	int nr_banks;
+	meminfo_t bank[SYSMEM_BANKS_MAX];
 } sysmem_info_t;
 
 extern sysmem_info_t sysmem;
 
 #endif
 #endif
-
-
-

diff --git a/arch/xtensa/include/asm/cacheasm.h b/arch/xtensa/include/asm/cacheasm.h
index 2c20a58..60e1877 100644
--- a/arch/xtensa/include/asm/cacheasm.h
+++ b/arch/xtensa/include/asm/cacheasm.h

@@ -174,4 +174,3 @@
 	__loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH
 
 	.endm
-

diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index 569fec4..127cd48 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h

@@ -104,7 +104,8 @@
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page*);
 extern void flush_cache_range(struct vm_area_struct*, ulong, ulong);
-extern void flush_cache_page(struct vm_area_struct*, unsigned long, unsigned long);
+extern void flush_cache_page(struct vm_area_struct*,
+			     unsigned long, unsigned long);
 
 #else
 

diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h
index e4d831a..aed7ad6 100644
--- a/arch/xtensa/include/asm/checksum.h
+++ b/arch/xtensa/include/asm/checksum.h

@@ -36,8 +36,9 @@
  * better 64-bit) boundary
  */
 
-asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum,
-						   int *src_err_ptr, int *dst_err_ptr);
+asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
+					    int len, __wsum sum,
+					    int *src_err_ptr, int *dst_err_ptr);
 
 /*
  *	Note: when you get a NULL pointer exception here this means someone
@@ -54,7 +55,7 @@
 
 static inline
 __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-						int len, __wsum sum, int *err_ptr)
+				   int len, __wsum sum, int *err_ptr)
 {
 	return csum_partial_copy_generic((__force const void *)src, dst,
 					len, sum, err_ptr, NULL);
@@ -112,7 +113,8 @@
 	/* Since the input registers which are loaded with iph and ihl
 	   are modified, we must also specify them as outputs, or gcc
 	   will assume they contain their original values. */
-		: "=r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmp), "=&r" (endaddr)
+		: "=r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmp),
+		  "=&r" (endaddr)
 		: "1" (iph), "2" (ihl)
 		: "memory");
 
@@ -168,7 +170,7 @@
 
 static __inline__ __sum16 ip_compute_csum(const void *buff, int len)
 {
-    return csum_fold (csum_partial(buff, len, 0));
+	return csum_fold (csum_partial(buff, len, 0));
 }
 
 #define _HAVE_ARCH_IPV6_CSUM
@@ -238,11 +240,12 @@
  *	Copy and checksum to user
  */
 #define HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user(const void *src, void __user *dst,
-				    int len, __wsum sum, int *err_ptr)
+static __inline__ __wsum csum_and_copy_to_user(const void *src,
+					       void __user *dst, int len,
+					       __wsum sum, int *err_ptr)
 {
 	if (access_ok(VERIFY_WRITE, dst, len))
-		return csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr);
+		return csum_partial_copy_generic(src,dst,len,sum,NULL,err_ptr);
 
 	if (len)
 		*err_ptr = -EFAULT;

diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 64dad04..d9ab131 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h

@@ -22,17 +22,30 @@
 static inline unsigned long
 __cmpxchg_u32(volatile int *p, int old, int new)
 {
-  __asm__ __volatile__("rsil    a15, "__stringify(LOCKLEVEL)"\n\t"
-		       "l32i    %0, %1, 0              \n\t"
-		       "bne	%0, %2, 1f             \n\t"
-		       "s32i    %3, %1, 0              \n\t"
-		       "1:                             \n\t"
-		       "wsr     a15, ps                \n\t"
-		       "rsync                          \n\t"
-		       : "=&a" (old)
-		       : "a" (p), "a" (old), "r" (new)
-		       : "a15", "memory");
-  return old;
+#if XCHAL_HAVE_S32C1I
+	__asm__ __volatile__(
+			"       wsr     %2, scompare1\n"
+			"       s32c1i  %0, %1, 0\n"
+			: "+a" (new)
+			: "a" (p), "a" (old)
+			: "memory"
+			);
+
+	return new;
+#else
+	__asm__ __volatile__(
+			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %1, 0\n"
+			"       bne     %0, %2, 1f\n"
+			"       s32i    %3, %1, 0\n"
+			"1:\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (old)
+			: "a" (p), "a" (old), "r" (new)
+			: "a15", "memory");
+	return old;
+#endif
 }
 /* This function doesn't exist, so you'll get a linker error
  * if something tries to do an invalid cmpxchg(). */
@@ -93,19 +106,36 @@
 
 static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 {
-  unsigned long tmp;
-  __asm__ __volatile__("rsil    a15, "__stringify(LOCKLEVEL)"\n\t"
-		       "l32i    %0, %1, 0              \n\t"
-		       "s32i    %2, %1, 0              \n\t"
-		       "wsr     a15, ps                \n\t"
-		       "rsync                          \n\t"
-		       : "=&a" (tmp)
-		       : "a" (m), "a" (val)
-		       : "a15", "memory");
-  return tmp;
+#if XCHAL_HAVE_S32C1I
+	unsigned long tmp, result;
+	__asm__ __volatile__(
+			"1:     l32i    %1, %2, 0\n"
+			"       mov     %0, %3\n"
+			"       wsr     %1, scompare1\n"
+			"       s32c1i  %0, %2, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (m), "a" (val)
+			: "memory"
+			);
+	return result;
+#else
+	unsigned long tmp;
+	__asm__ __volatile__(
+			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       l32i    %0, %1, 0\n"
+			"       s32i    %2, %1, 0\n"
+			"       wsr     a15, ps\n"
+			"       rsync\n"
+			: "=&a" (tmp)
+			: "a" (m), "a" (val)
+			: "a15", "memory");
+	return tmp;
+#endif
 }
 
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define xchg(ptr,x) \
+	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
 /*
  * This only works if the compiler isn't horribly bad at optimizing.

diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h
index 8d1eb5d..47e46dc 100644
--- a/arch/xtensa/include/asm/current.h
+++ b/arch/xtensa/include/asm/current.h

@@ -30,7 +30,7 @@
 
 #define GET_CURRENT(reg,sp)		\
 	GET_THREAD_INFO(reg,sp);	\
-  	l32i reg, reg, TI_TASK		\
+	l32i reg, reg, TI_TASK		\
 
 #endif
 

diff --git a/arch/xtensa/include/asm/delay.h b/arch/xtensa/include/asm/delay.h
index 58c0a4f..61fc5fa 100644
--- a/arch/xtensa/include/asm/delay.h
+++ b/arch/xtensa/include/asm/delay.h

@@ -19,9 +19,9 @@
 
 static inline void __delay(unsigned long loops)
 {
-  /* 2 cycles per loop. */
-  __asm__ __volatile__ ("1: addi %0, %0, -2; bgeui %0, 2, 1b"
-			: "=r" (loops) : "0" (loops));
+	/* 2 cycles per loop. */
+	__asm__ __volatile__ ("1: addi %0, %0, -2; bgeui %0, 2, 1b"
+			      : "=r" (loops) : "0" (loops));
 }
 
 static __inline__ u32 xtensa_get_ccount(void)
@@ -46,4 +46,3 @@
 }
 
 #endif
-

diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index 492c957..4acb5feb 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h

@@ -16,6 +16,8 @@
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 
+#define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
+
 /*
  * DMA-consistent mapping functions.
  */
@@ -98,8 +100,8 @@
 }
 
 static inline void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+		           size_t size, enum dma_data_direction direction)
 {
 	consistent_sync((void *)bus_to_virt(dma_handle), size, direction);
 }

diff --git a/arch/xtensa/include/asm/elf.h b/arch/xtensa/include/asm/elf.h
index 5293312..264d5fa 100644
--- a/arch/xtensa/include/asm/elf.h
+++ b/arch/xtensa/include/asm/elf.h

@@ -168,11 +168,11 @@
  */
 
 #define ELF_PLAT_INIT(_r, load_addr) \
-  do { _r->areg[0]=0; /*_r->areg[1]=0;*/ _r->areg[2]=0;  _r->areg[3]=0;  \
-       _r->areg[4]=0;  _r->areg[5]=0;    _r->areg[6]=0;  _r->areg[7]=0;  \
-       _r->areg[8]=0;  _r->areg[9]=0;    _r->areg[10]=0; _r->areg[11]=0; \
-       _r->areg[12]=0; _r->areg[13]=0;   _r->areg[14]=0; _r->areg[15]=0; \
-  } while (0)
+	do { _r->areg[0]=0; /*_r->areg[1]=0;*/ _r->areg[2]=0;  _r->areg[3]=0;  \
+	     _r->areg[4]=0;  _r->areg[5]=0;    _r->areg[6]=0;  _r->areg[7]=0;  \
+	     _r->areg[8]=0;  _r->areg[9]=0;    _r->areg[10]=0; _r->areg[11]=0; \
+	     _r->areg[12]=0; _r->areg[13]=0;   _r->areg[14]=0; _r->areg[15]=0; \
+	} while (0)
 
 typedef struct {
 	xtregs_opt_t	opt;

diff --git a/arch/xtensa/include/asm/highmem.h b/arch/xtensa/include/asm/highmem.h
index 0a046ca..80be151 100644
--- a/arch/xtensa/include/asm/highmem.h
+++ b/arch/xtensa/include/asm/highmem.h

@@ -14,4 +14,3 @@
 extern void flush_cache_kmaps(void);
 
 #endif
-

diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
new file mode 100644
index 0000000..e1f8ba4
--- /dev/null
+++ b/arch/xtensa/include/asm/initialize_mmu.h

@@ -0,0 +1,55 @@
+/*
+ * arch/xtensa/include/asm/initialize_mmu.h
+ *
+ * Initializes MMU:
+ *
+ *      For the new V3 MMU we remap the TLB from virtual == physical
+ *      to the standard Linux mapping used in earlier MMU's.
+ *
+ *      The the MMU we also support a new configuration register that
+ *      specifies how the S32C1I instruction operates with the cache
+ *      controller.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file "COPYING" in the main directory of
+ * this archive for more details.
+ *
+ * Copyright (C) 2008 - 2012 Tensilica, Inc.
+ *
+ *   Marc Gauthier <marc@tensilica.com>
+ *   Pete Delaney <piet@tensilica.com>
+ */
+
+#ifndef _XTENSA_INITIALIZE_MMU_H
+#define _XTENSA_INITIALIZE_MMU_H
+
+#ifdef __ASSEMBLY__
+
+#define XTENSA_HWVERSION_RC_2009_0 230000
+
+	.macro	initialize_mmu
+
+#if XCHAL_HAVE_S32C1I && (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RC_2009_0)
+/*
+ * We Have Atomic Operation Control (ATOMCTL) Register; Initialize it.
+ * For details see Documentation/xtensa/atomctl.txt
+ */
+#if XCHAL_DCACHE_IS_COHERENT
+	movi	a3, 0x25	/* For SMP/MX -- internal for writeback,
+				 * RCW otherwise
+				 */
+#else
+	movi	a3, 0x29	/* non-MX -- Most cores use Std Memory
+				 * Controlers which usually can't use RCW
+				 */
+#endif
+	wsr	a3, atomctl
+#endif  /* XCHAL_HAVE_S32C1I &&
+	 * (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RC_2009_0)
+	 */
+
+	.endm
+
+#endif /*__ASSEMBLY__*/
+
+#endif /* _XTENSA_INITIALIZE_MMU_H */

diff --git a/arch/xtensa/include/asm/mmu.h b/arch/xtensa/include/asm/mmu.h
index 04890d6..8554b2c 100644
--- a/arch/xtensa/include/asm/mmu.h
+++ b/arch/xtensa/include/asm/mmu.h

@@ -12,7 +12,7 @@
 #define _XTENSA_MMU_H
 
 #ifndef CONFIG_MMU
-#include <asm/nommu.h>
+#include <asm-generic/mmu.h>
 #else
 
 /* Default "unsigned long" context */

diff --git a/arch/xtensa/include/asm/mmu_context.h b/arch/xtensa/include/asm/mmu_context.h
index feb10af..d43525a 100644
--- a/arch/xtensa/include/asm/mmu_context.h
+++ b/arch/xtensa/include/asm/mmu_context.h

@@ -107,7 +107,7 @@
 
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-                             struct task_struct *tsk)
+			     struct task_struct *tsk)
 {
 	unsigned long asid = asid_cache;
 

diff --git a/arch/xtensa/include/asm/nommu.h b/arch/xtensa/include/asm/nommu.h
deleted file mode 100644
index dce2c43..0000000
--- a/arch/xtensa/include/asm/nommu.h
+++ /dev/null

@@ -1,3 +0,0 @@
-typedef struct {
-	unsigned long end_brk;
-} mm_context_t;

diff --git a/arch/xtensa/include/asm/nommu_context.h b/arch/xtensa/include/asm/nommu_context.h
index 599e7a2..3407cf7 100644
--- a/arch/xtensa/include/asm/nommu_context.h
+++ b/arch/xtensa/include/asm/nommu_context.h

@@ -2,7 +2,7 @@
 {
 }
 
-static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+static inline int init_new_context(struct task_struct *tsk,struct mm_struct *mm)
 {
 	return 0;
 }

diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 7a5591a7..47f5823 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h

@@ -29,19 +29,19 @@
  * PAGE_SHIFT determines the page size
  */
 
-#define PAGE_SHIFT		12
-#define PAGE_SIZE		(__XTENSA_UL_CONST(1) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
+#define PAGE_SHIFT	12
+#define PAGE_SIZE	(__XTENSA_UL_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
 
 #ifdef CONFIG_MMU
-#define PAGE_OFFSET		XCHAL_KSEG_CACHED_VADDR
-#define MAX_MEM_PFN		XCHAL_KSEG_SIZE
+#define PAGE_OFFSET	XCHAL_KSEG_CACHED_VADDR
+#define MAX_MEM_PFN	XCHAL_KSEG_SIZE
 #else
-#define PAGE_OFFSET		0
-#define MAX_MEM_PFN		(PLATFORM_DEFAULT_MEM_START + PLATFORM_DEFAULT_MEM_SIZE)
+#define PAGE_OFFSET	0
+#define MAX_MEM_PFN	(PLATFORM_DEFAULT_MEM_START + PLATFORM_DEFAULT_MEM_SIZE)
 #endif
 
-#define PGTABLE_START		0x80000000
+#define PGTABLE_START	0x80000000
 
 /*
  * Cache aliasing:
@@ -161,7 +161,9 @@
 
 #define __pa(x)			((unsigned long) (x) - PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
-#define pfn_valid(pfn)		((pfn) >= ARCH_PFN_OFFSET && ((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
+#define pfn_valid(pfn) \
+	((pfn) >= ARCH_PFN_OFFSET && ((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
+
 #ifdef CONFIG_DISCONTIGMEM
 # error CONFIG_DISCONTIGMEM not supported
 #endif

diff --git a/arch/xtensa/include/asm/pci-bridge.h b/arch/xtensa/include/asm/pci-bridge.h
index 00fcbd7..0b68c76 100644
--- a/arch/xtensa/include/asm/pci-bridge.h
+++ b/arch/xtensa/include/asm/pci-bridge.h

@@ -35,7 +35,7 @@
 struct pci_controller {
 	int index;			/* used for pci_controller_num */
 	struct pci_controller *next;
-        struct pci_bus *bus;
+	struct pci_bus *bus;
 	void *arch_data;
 
 	int first_busno;

diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index 05244f0..614be03 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h

@@ -53,7 +53,7 @@
 
 /* Map a range of PCI memory or I/O space for a device into user space */
 int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
-                        enum pci_mmap_state mmap_state, int write_combine);
+			enum pci_mmap_state mmap_state, int write_combine);
 
 /* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
 #define HAVE_PCI_MMAP	1

diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 40cf9bc..cf914c8 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h

@@ -42,7 +42,7 @@
 
 extern struct kmem_cache *pgtable_cache;
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					 unsigned long address)
 {
 	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);

diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index b03c043..c90ea5b 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h

@@ -284,7 +284,7 @@
 
 static inline int
 ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr,
-    			  pte_t *ptep)
+			  pte_t *ptep)
 {
 	pte_t pte = *ptep;
 	if (!pte_young(pte))
@@ -304,8 +304,8 @@
 static inline void
 ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-  	pte_t pte = *ptep;
-  	update_pte(ptep, pte_wrprotect(pte));
+	pte_t pte = *ptep;
+	update_pte(ptep, pte_wrprotect(pte));
 }
 
 /* to find an entry in a kernel page-table-directory */
@@ -399,7 +399,7 @@
  */
 
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
-                remap_pfn_range(vma, from, pfn, size, prot)
+	remap_pfn_range(vma, from, pfn, size, prot)
 
 typedef pte_t *pte_addr_t;
 

diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h
index 7d936e5..ec098b6 100644
--- a/arch/xtensa/include/asm/platform.h
+++ b/arch/xtensa/include/asm/platform.h

@@ -75,4 +75,3 @@
 extern void platform_calibrate_ccount (void);
 
 #endif	/* _XTENSA_PLATFORM_H */
-

diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 2d630e7..e5fb6b0a 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h

@@ -89,7 +89,7 @@
 #define MAKE_PC_FROM_RA(ra,sp)    (((ra) & 0x3fffffff) | ((sp) & 0xc0000000))
 
 typedef struct {
-    unsigned long seg;
+	unsigned long seg;
 } mm_segment_t;
 
 struct thread_struct {
@@ -145,10 +145,10 @@
  *       set_thread_state in signal.c depends on it.
  */
 #define USER_PS_VALUE ((1 << PS_WOE_BIT) |				\
-                       (1 << PS_CALLINC_SHIFT) |			\
-                       (USER_RING << PS_RING_SHIFT) |			\
-                       (1 << PS_UM_BIT) |				\
-                       (1 << PS_EXCM_BIT))
+		       (1 << PS_CALLINC_SHIFT) |			\
+		       (USER_RING << PS_RING_SHIFT) |			\
+		       (1 << PS_UM_BIT) |				\
+		       (1 << PS_EXCM_BIT))
 
 /* Clearing a0 terminates the backtrace. */
 #define start_thread(regs, new_pc, new_sp) \

diff --git a/arch/xtensa/include/asm/prom.h b/arch/xtensa/include/asm/prom.h
new file mode 100644
index 0000000..f3d7cd2
--- /dev/null
+++ b/arch/xtensa/include/asm/prom.h

@@ -0,0 +1,6 @@
+#ifndef _XTENSA_ASM_PROM_H
+#define _XTENSA_ASM_PROM_H
+
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+#endif /* _XTENSA_ASM_PROM_H */

diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h
index da21c17..682b1de 100644
--- a/arch/xtensa/include/asm/ptrace.h
+++ b/arch/xtensa/include/asm/ptrace.h

@@ -37,7 +37,7 @@
 	unsigned long windowstart;	/*  52 */
 	unsigned long syscall;		/*  56 */
 	unsigned long icountlevel;	/*  60 */
-	int reserved[1];		/*  64 */
+	unsigned long scompare1;	/*  64 */
 
 	/* Additional configurable registers that are used by the compiler. */
 	xtregs_opt_t xtregs_opt;
@@ -55,7 +55,7 @@
 
 # define arch_has_single_step()	(1)
 # define task_pt_regs(tsk) ((struct pt_regs*) \
-  (task_stack_page(tsk) + KERNEL_STACK_SIZE - (XCHAL_NUM_AREGS-16)*4) - 1)
+	(task_stack_page(tsk) + KERNEL_STACK_SIZE - (XCHAL_NUM_AREGS-16)*4) - 1)
 # define user_mode(regs) (((regs)->ps & 0x00000020)!=0)
 # define instruction_pointer(regs) ((regs)->pc)
 
@@ -63,6 +63,8 @@
 #  define profile_pc(regs) instruction_pointer(regs)
 # endif
 
+#define user_stack_pointer(regs) ((regs)->areg[1])
+
 #else	/* __ASSEMBLY__ */
 
 # include <asm/asm-offsets.h>

diff --git a/arch/xtensa/include/asm/regs.h b/arch/xtensa/include/asm/regs.h
index 8a8aa61..76096a4 100644
--- a/arch/xtensa/include/asm/regs.h
+++ b/arch/xtensa/include/asm/regs.h

@@ -52,6 +52,10 @@
 #define EXCCAUSE_SPECULATION			7
 #define EXCCAUSE_PRIVILEGED			8
 #define EXCCAUSE_UNALIGNED			9
+#define EXCCAUSE_INSTR_DATA_ERROR		12
+#define EXCCAUSE_LOAD_STORE_DATA_ERROR		13
+#define EXCCAUSE_INSTR_ADDR_ERROR		14
+#define EXCCAUSE_LOAD_STORE_ADDR_ERROR		15
 #define EXCCAUSE_ITLB_MISS			16
 #define EXCCAUSE_ITLB_MULTIHIT			17
 #define EXCCAUSE_ITLB_PRIVILEGE			18
@@ -105,4 +109,3 @@
 #define DEBUGCAUSE_ICOUNT_BIT		0	/* ICOUNT would incr. to zero */
 
 #endif /* _XTENSA_SPECREG_H */
-

diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index 8ff2364..0397590 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h

@@ -11,6 +11,192 @@
 #ifndef _XTENSA_SPINLOCK_H
 #define _XTENSA_SPINLOCK_H
 
-#include <linux/spinlock.h>
+/*
+ * spinlock
+ *
+ * There is at most one owner of a spinlock.  There are not different
+ * types of spinlock owners like there are for rwlocks (see below).
+ *
+ * When trying to obtain a spinlock, the function "spins" forever, or busy-
+ * waits, until the lock is obtained.  When spinning, presumably some other
+ * owner will soon give up the spinlock making it available to others.  Use
+ * the trylock functions to avoid spinning forever.
+ *
+ * possible values:
+ *
+ *    0         nobody owns the spinlock
+ *    1         somebody owns the spinlock
+ */
+
+#define __raw_spin_is_locked(x) ((x)->slock != 0)
+#define __raw_spin_unlock_wait(lock) \
+	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+			"       movi    %0, 0\n"
+			"       wsr     %0, scompare1\n"
+			"1:     movi    %0, 1\n"
+			"       s32c1i  %0, %1, 0\n"
+			"       bnez    %0, 1b\n"
+			: "=&a" (tmp)
+			: "a" (&lock->slock)
+			: "memory");
+}
+
+/* Returns 1 if the lock is obtained, 0 otherwise. */
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+			"       movi    %0, 0\n"
+			"       wsr     %0, scompare1\n"
+			"       movi    %0, 1\n"
+			"       s32c1i  %0, %1, 0\n"
+			: "=&a" (tmp)
+			: "a" (&lock->slock)
+			: "memory");
+
+	return tmp == 0 ? 1 : 0;
+}
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+			"       movi    %0, 0\n"
+			"       s32ri   %0, %1, 0\n"
+			: "=&a" (tmp)
+			: "a" (&lock->slock)
+			: "memory");
+}
+
+/*
+ * rwlock
+ *
+ * Read-write locks are really a more flexible spinlock.  They allow
+ * multiple readers but only one writer.  Write ownership is exclusive
+ * (i.e., all other readers and writers are blocked from ownership while
+ * there is a write owner).  These rwlocks are unfair to writers.  Writers
+ * can be starved for an indefinite time by readers.
+ *
+ * possible values:
+ *
+ *   0          nobody owns the rwlock
+ *  >0          one or more readers own the rwlock
+ *                (the positive value is the actual number of readers)
+ *  0x80000000  one writer owns the rwlock, no other writers, no readers
+ */
+
+#define __raw_write_can_lock(x)  ((x)->lock == 0)
+
+static inline void __raw_write_lock(raw_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+			"       movi    %0, 0\n"
+			"       wsr     %0, scompare1\n"
+			"1:     movi    %0, 1\n"
+			"       slli    %0, %0, 31\n"
+			"       s32c1i  %0, %1, 0\n"
+			"       bnez    %0, 1b\n"
+			: "=&a" (tmp)
+			: "a" (&rw->lock)
+			: "memory");
+}
+
+/* Returns 1 if the lock is obtained, 0 otherwise. */
+
+static inline int __raw_write_trylock(raw_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+			"       movi    %0, 0\n"
+			"       wsr     %0, scompare1\n"
+			"       movi    %0, 1\n"
+			"       slli    %0, %0, 31\n"
+			"       s32c1i  %0, %1, 0\n"
+			: "=&a" (tmp)
+			: "a" (&rw->lock)
+			: "memory");
+
+	return tmp == 0 ? 1 : 0;
+}
+
+static inline void __raw_write_unlock(raw_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+			"       movi    %0, 0\n"
+			"       s32ri   %0, %1, 0\n"
+			: "=&a" (tmp)
+			: "a" (&rw->lock)
+			: "memory");
+}
+
+static inline void __raw_read_lock(raw_rwlock_t *rw)
+{
+	unsigned long tmp;
+	unsigned long result;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %2, 0\n"
+			"       bltz    %1, 1b\n"
+			"       wsr     %1, scompare1\n"
+			"       addi    %0, %1, 1\n"
+			"       s32c1i  %0, %2, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (&rw->lock)
+			: "memory");
+}
+
+/* Returns 1 if the lock is obtained, 0 otherwise. */
+
+static inline int __raw_read_trylock(raw_rwlock_t *rw)
+{
+	unsigned long result;
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+			"       l32i    %1, %2, 0\n"
+			"       addi    %0, %1, 1\n"
+			"       bltz    %0, 1f\n"
+			"       wsr     %1, scompare1\n"
+			"       s32c1i  %0, %2, 0\n"
+			"       sub     %0, %0, %1\n"
+			"1:\n"
+			: "=&a" (result), "=&a" (tmp)
+			: "a" (&rw->lock)
+			: "memory");
+
+	return result == 0;
+}
+
+static inline void __raw_read_unlock(raw_rwlock_t *rw)
+{
+	unsigned long tmp1, tmp2;
+
+	__asm__ __volatile__(
+			"1:     l32i    %1, %2, 0\n"
+			"       addi    %0, %1, -1\n"
+			"       wsr     %1, scompare1\n"
+			"       s32c1i  %0, %2, 0\n"
+			"       bne     %0, %1, 1b\n"
+			: "=&a" (tmp1), "=&a" (tmp2)
+			: "a" (&rw->lock)
+			: "memory");
+}
 
 #endif	/* _XTENSA_SPINLOCK_H */

diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index b00c928..8d5e47f 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h

@@ -25,9 +25,10 @@
 /* Should probably move to linux/syscalls.h */
 struct pollfd;
 asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
-	fd_set __user *exp, struct timespec __user *tsp, void __user *sig);
+			     fd_set __user *exp, struct timespec __user *tsp,
+			     void __user *sig);
 asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
-	struct timespec __user *tsp, const sigset_t __user *sigmask,
-	size_t sigsetsize);
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset,
-		size_t sigsetsize);
+			  struct timespec __user *tsp,
+			  const sigset_t __user *sigmask,
+			  size_t sigsetsize);
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);

diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
new file mode 100644
index 0000000..54f7044
--- /dev/null
+++ b/arch/xtensa/include/asm/traps.h

@@ -0,0 +1,23 @@
+/*
+ * arch/xtensa/include/asm/traps.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Tensilica Inc.
+ */
+#ifndef _XTENSA_TRAPS_H
+#define _XTENSA_TRAPS_H
+
+#include <asm/ptrace.h>
+
+/*
+ * handler must be either of the following:
+ *  void (*)(struct pt_regs *regs);
+ *  void (*)(struct pt_regs *regs, unsigned long exccause);
+ */
+extern void * __init trap_set_handler(int cause, void *handler);
+extern void do_unhandled(struct pt_regs *regs, unsigned long exccause);
+
+#endif /* _XTENSA_TRAPS_H */

diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index 6e4bb3b..fd686dc 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h

@@ -180,7 +180,8 @@
 #define segment_eq(a,b)	((a).seg == (b).seg)
 
 #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS))
-#define __user_ok(addr,size) (((size) <= TASK_SIZE)&&((addr) <= TASK_SIZE-(size)))
+#define __user_ok(addr,size) \
+		(((size) <= TASK_SIZE)&&((addr) <= TASK_SIZE-(size)))
 #define __access_ok(addr,size) (__kernel_ok || __user_ok((addr),(size)))
 #define access_ok(type,addr,size) __access_ok((unsigned long)(addr),(size))
 
@@ -234,10 +235,10 @@
 	int __cb;							\
 	retval = 0;							\
 	switch (size) {							\
-        case 1: __put_user_asm(x,ptr,retval,1,"s8i",__cb);  break;	\
-        case 2: __put_user_asm(x,ptr,retval,2,"s16i",__cb); break;	\
-        case 4: __put_user_asm(x,ptr,retval,4,"s32i",__cb); break;	\
-        case 8: {							\
+	case 1: __put_user_asm(x,ptr,retval,1,"s8i",__cb);  break;	\
+	case 2: __put_user_asm(x,ptr,retval,2,"s16i",__cb); break;	\
+	case 4: __put_user_asm(x,ptr,retval,4,"s32i",__cb); break;	\
+	case 8: {							\
 		     __typeof__(*ptr) __v64 = x;			\
 		     retval = __copy_to_user(ptr,&__v64,8);		\
 		     break;						\
@@ -291,7 +292,7 @@
  * __check_align_* macros still work.
  */
 #define __put_user_asm(x, addr, err, align, insn, cb)	\
-   __asm__ __volatile__(				\
+__asm__ __volatile__(					\
 	__check_align_##align				\
 	"1: "insn"  %2, %3, 0		\n"		\
 	"2:				\n"		\
@@ -301,8 +302,8 @@
 	"   .long  2b			\n"		\
 	"5:				\n"		\
 	"   l32r   %1, 4b		\n"		\
-        "   movi   %0, %4		\n"		\
-        "   jx     %1			\n"		\
+	"   movi   %0, %4		\n"		\
+	"   jx     %1			\n"		\
 	"   .previous			\n"		\
 	"   .section  __ex_table,\"a\"	\n"		\
 	"   .long	1b, 5b		\n"		\
@@ -334,13 +335,13 @@
 do {									\
 	int __cb;							\
 	retval = 0;							\
-        switch (size) {							\
-          case 1: __get_user_asm(x,ptr,retval,1,"l8ui",__cb);  break;	\
-          case 2: __get_user_asm(x,ptr,retval,2,"l16ui",__cb); break;	\
-          case 4: __get_user_asm(x,ptr,retval,4,"l32i",__cb);  break;	\
-          case 8: retval = __copy_from_user(&x,ptr,8);    break;	\
-          default: (x) = __get_user_bad();				\
-        }								\
+	switch (size) {							\
+	case 1: __get_user_asm(x,ptr,retval,1,"l8ui",__cb);  break;	\
+	case 2: __get_user_asm(x,ptr,retval,2,"l16ui",__cb); break;	\
+	case 4: __get_user_asm(x,ptr,retval,4,"l32i",__cb);  break;	\
+	case 8: retval = __copy_from_user(&x,ptr,8);    break;	\
+	default: (x) = __get_user_bad();				\
+	}								\
 } while (0)
 
 
@@ -349,7 +350,7 @@
  * __check_align_* macros still work.
  */
 #define __get_user_asm(x, addr, err, align, insn, cb) \
-   __asm__ __volatile__(			\
+__asm__ __volatile__(			\
 	__check_align_##align			\
 	"1: "insn"  %2, %3, 0		\n"	\
 	"2:				\n"	\
@@ -360,8 +361,8 @@
 	"5:				\n"	\
 	"   l32r   %1, 4b		\n"	\
 	"   movi   %2, 0		\n"	\
-        "   movi   %0, %4		\n"	\
-        "   jx     %1			\n"	\
+	"   movi   %0, %4		\n"	\
+	"   jx     %1			\n"	\
 	"   .previous			\n"	\
 	"   .section  __ex_table,\"a\"	\n"	\
 	"   .long	1b, 5b		\n"	\
@@ -421,8 +422,10 @@
 
 #define copy_to_user(to,from,n) __generic_copy_to_user((to),(from),(n))
 #define copy_from_user(to,from,n) __generic_copy_from_user((to),(from),(n))
-#define __copy_to_user(to,from,n) __generic_copy_to_user_nocheck((to),(from),(n))
-#define __copy_from_user(to,from,n) __generic_copy_from_user_nocheck((to),(from),(n))
+#define __copy_to_user(to,from,n) \
+	__generic_copy_to_user_nocheck((to),(from),(n))
+#define __copy_from_user(to,from,n) \
+	__generic_copy_from_user_nocheck((to),(from),(n))
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 

diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index e002dbc..eb63ea8 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h

@@ -1,7 +1,6 @@
 #ifndef _XTENSA_UNISTD_H
 #define _XTENSA_UNISTD_H
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
 

diff --git a/arch/xtensa/include/uapi/asm/signal.h b/arch/xtensa/include/uapi/asm/signal.h
index b88ce96..dacf716 100644
--- a/arch/xtensa/include/uapi/asm/signal.h
+++ b/arch/xtensa/include/uapi/asm/signal.h

@@ -97,12 +97,6 @@
 
 #define SA_RESTORER	0x04000000
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index f36cef5..c3a59d9 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile

@@ -23,13 +23,13 @@
 #
 # Replicate rules in scripts/Makefile.build
 
-sed-y = -e 's/\*(\(\.[a-z]*it\|\.ref\|\)\.text)/*(\1.literal \1.text)/g'    \
-	-e 's/\.text\.unlikely/.literal.unlikely .text.unlikely/g' \
+sed-y = -e 's/\*(\(\.[a-z]*it\|\.ref\|\)\.text)/*(\1.literal \1.text)/g' \
+	-e 's/\.text\.unlikely/.literal.unlikely .text.unlikely/g'	 \
 	-e 's/\*(\(\.text\.[a-z]*\))/*(\1.literal \1)/g'
 
 quiet_cmd__cpp_lds_S = LDS     $@
-      cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $< \
-                       | sed $(sed-y) >$@
+cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $<    \
+                 | sed $(sed-y) >$@
 
 $(obj)/vmlinux.lds: $(src)/vmlinux.lds.S FORCE
 	$(call if_changed_dep,_cpp_lds_S)

diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S
index 934ae58..aa2e87b 100644
--- a/arch/xtensa/kernel/align.S
+++ b/arch/xtensa/kernel/align.S

@@ -442,7 +442,7 @@
 	mov	a1, a2
 
 	rsr	a0, ps
-        bbsi.l  a2, PS_UM_BIT, 1f     # jump if user mode
+	bbsi.l  a2, PS_UM_BIT, 1f     # jump if user mode
 
 	movi	a0, _kernel_exception
 	jx	a0
@@ -450,6 +450,6 @@
 1:	movi	a0, _user_exception
 	jx	a0
 
+ENDPROC(fast_unaligned)
 
 #endif /* XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION */
-

diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 7dc3f91..0701fad 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c

@@ -41,6 +41,7 @@
 	DEFINE(PT_SAR, offsetof (struct pt_regs, sar));
 	DEFINE(PT_ICOUNTLEVEL, offsetof (struct pt_regs, icountlevel));
 	DEFINE(PT_SYSCALL, offsetof (struct pt_regs, syscall));
+	DEFINE(PT_SCOMPARE1, offsetof(struct pt_regs, scompare1));
 	DEFINE(PT_AREG, offsetof (struct pt_regs, areg[0]));
 	DEFINE(PT_AREG0, offsetof (struct pt_regs, areg[0]));
 	DEFINE(PT_AREG1, offsetof (struct pt_regs, areg[1]));
@@ -91,7 +92,8 @@
 #endif
 	DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
 	DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
-	DEFINE(THREAD_CURRENT_DS, offsetof (struct task_struct, thread.current_ds));
+	DEFINE(THREAD_CURRENT_DS, offsetof (struct task_struct, \
+	       thread.current_ds));
 
 	/* struct mm_struct */
 	DEFINE(MM_USERS, offsetof(struct mm_struct, mm_users));
@@ -108,4 +110,3 @@
 
 	return 0;
 }
-

diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 54c3be3..6476574 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S

@@ -43,10 +43,13 @@
 /* IO protection is currently unsupported. */
 
 ENTRY(fast_io_protect)
+
 	wsr	a0, excsave1
 	movi	a0, unrecoverable_exception
 	callx0	a0
 
+ENDPROC(fast_io_protect)
+
 #if XTENSA_HAVE_COPROCESSORS
 
 /*
@@ -139,6 +142,7 @@
  */
 
 ENTRY(coprocessor_save)
+
 	entry	a1, 32
 	s32i	a0, a1, 0
 	movi	a0, .Lsave_cp_regs_jump_table
@@ -150,7 +154,10 @@
 1:	l32i	a0, a1, 0
 	retw
 
+ENDPROC(coprocessor_save)
+
 ENTRY(coprocessor_load)
+
 	entry	a1, 32
 	s32i	a0, a1, 0
 	movi	a0, .Lload_cp_regs_jump_table
@@ -162,8 +169,10 @@
 1:	l32i	a0, a1, 0
 	retw
 
+ENDPROC(coprocessor_load)
+
 /*
- * coprocessor_flush(struct task_info*, index) 
+ * coprocessor_flush(struct task_info*, index)
  *                             a2        a3
  * coprocessor_restore(struct task_info*, index)
  *                              a2         a3
@@ -178,6 +187,7 @@
 
 
 ENTRY(coprocessor_flush)
+
 	entry	a1, 32
 	s32i	a0, a1, 0
 	movi	a0, .Lsave_cp_regs_jump_table
@@ -191,6 +201,8 @@
 1:	l32i	a0, a1, 0
 	retw
 
+ENDPROC(coprocessor_flush)
+
 ENTRY(coprocessor_restore)
 	entry	a1, 32
 	s32i	a0, a1, 0
@@ -205,6 +217,8 @@
 1:	l32i	a0, a1, 0
 	retw
 
+ENDPROC(coprocessor_restore)
+
 /*
  * Entry condition:
  *
@@ -220,10 +234,12 @@
  */
 
 ENTRY(fast_coprocessor_double)
+
 	wsr	a0, excsave1
 	movi	a0, unrecoverable_exception
 	callx0	a0
 
+ENDPROC(fast_coprocessor_double)
 
 ENTRY(fast_coprocessor)
 
@@ -327,9 +343,14 @@
 
 	rfe
 
+ENDPROC(fast_coprocessor)
+
 	.data
+
 ENTRY(coprocessor_owner)
+
 	.fill XCHAL_CP_MAX, 4, 0
 
-#endif /* XTENSA_HAVE_COPROCESSORS */
+END(coprocessor_owner)
 
+#endif /* XTENSA_HAVE_COPROCESSORS */

diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 90bfc1d..3777fec 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S

@@ -219,6 +219,7 @@
 
 	j	common_exception
 
+ENDPROC(user_exception)
 
 /*
  * First-level exit handler for kernel exceptions
@@ -371,6 +372,13 @@
 	s32i	a2, a1, PT_LBEG
 	s32i	a3, a1, PT_LEND
 
+	/* Save SCOMPARE1 */
+
+#if XCHAL_HAVE_S32C1I
+	rsr     a2, scompare1
+	s32i    a2, a1, PT_SCOMPARE1
+#endif
+
 	/* Save optional registers. */
 
 	save_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
@@ -432,6 +440,12 @@
 
 	load_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
 
+	/* Restore SCOMPARE1 */
+
+#if XCHAL_HAVE_S32C1I
+	l32i    a2, a1, PT_SCOMPARE1
+	wsr     a2, scompare1
+#endif
 	wsr	a3, ps		/* disable interrupts */
 
 	_bbci.l	a3, PS_UM_BIT, kernel_exception_exit
@@ -641,6 +655,8 @@
 	l32i	a1, a1, PT_AREG1
 	rfde
 
+ENDPROC(kernel_exception)
+
 /*
  * Debug exception handler.
  *
@@ -701,6 +717,7 @@
 	/* Debug exception while in exception mode. */
 1:	j	1b	// FIXME!!
 
+ENDPROC(debug_exception)
 
 /*
  * We get here in case of an unrecoverable exception.
@@ -751,6 +768,7 @@
 
 1:	j	1b
 
+ENDPROC(unrecoverable_exception)
 
 /* -------------------------- FAST EXCEPTION HANDLERS ----------------------- */
 
@@ -856,7 +874,7 @@
 
 	_bnei	a0, 1, 1f		# no 'movsp a1, ax': jump
 
-        /* Move the save area. This implies the use of the L32E
+	/* Move the save area. This implies the use of the L32E
 	 * and S32E instructions, because this move must be done with
 	 * the user's PS.RING privilege levels, not with ring 0
 	 * (kernel's) privileges currently active with PS.EXCM
@@ -929,6 +947,7 @@
 	l32i	a2, a2, PT_AREG2
 	rfe
 
+ENDPROC(fast_alloca)
 
 /*
  * fast system calls.
@@ -966,6 +985,8 @@
 
 	j	kernel_exception
 
+ENDPROC(fast_syscall_kernel)
+
 ENTRY(fast_syscall_user)
 
 	/* Skip syscall. */
@@ -983,19 +1004,21 @@
 
 	j	user_exception
 
+ENDPROC(fast_syscall_user)
+
 ENTRY(fast_syscall_unrecoverable)
 
-        /* Restore all states. */
+	/* Restore all states. */
 
-        l32i    a0, a2, PT_AREG0        # restore a0
-        xsr     a2, depc                # restore a2, depc
-        rsr     a3, excsave1
+	l32i    a0, a2, PT_AREG0        # restore a0
+	xsr     a2, depc                # restore a2, depc
+	rsr     a3, excsave1
 
-        wsr     a0, excsave1
-        movi    a0, unrecoverable_exception
-        callx0  a0
+	wsr     a0, excsave1
+	movi    a0, unrecoverable_exception
+	callx0  a0
 
-
+ENDPROC(fast_syscall_unrecoverable)
 
 /*
  * sysxtensa syscall handler
@@ -1101,7 +1124,7 @@
 	movi	a2, -EINVAL
 	rfe
 
-
+ENDPROC(fast_syscall_xtensa)
 
 
 /* fast_syscall_spill_registers.
@@ -1160,6 +1183,8 @@
 	movi	a2, 0
 	rfe
 
+ENDPROC(fast_syscall_spill_registers)
+
 /* Fixup handler.
  *
  * We get here if the spill routine causes an exception, e.g. tlb miss.
@@ -1228,9 +1253,9 @@
 
 	movi	a3, exc_table
 	rsr	a0, exccause
-        addx4	a0, a0, a3              	# find entry in table
-        l32i	a0, a0, EXC_TABLE_FAST_USER     # load handler
-        jx	a0
+	addx4	a0, a0, a3              	# find entry in table
+	l32i	a0, a0, EXC_TABLE_FAST_USER     # load handler
+	jx	a0
 
 fast_syscall_spill_registers_fixup_return:
 
@@ -1432,7 +1457,7 @@
 	rsr	a0, ps
 	_bbci.l	a0, PS_UM_BIT, 1f
 
- 	/* User space: Setup a dummy frame and kill application.
+	/* User space: Setup a dummy frame and kill application.
 	 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
 	 */
 
@@ -1464,6 +1489,8 @@
 	callx0	a0		# should not return
 1:	j	1b
 
+ENDPROC(_spill_registers)
+
 #ifdef CONFIG_MMU
 /*
  * We should never get here. Bail out!
@@ -1475,6 +1502,8 @@
 	callx0	a0		# should not return
 1:	j	1b
 
+ENDPROC(fast_second_level_miss_double_kernel)
+
 /* First-level entry handler for user, kernel, and double 2nd-level
  * TLB miss exceptions.  Note that for now, user and kernel miss
  * exceptions share the same entry point and are handled identically.
@@ -1682,6 +1711,7 @@
 	j	_kernel_exception
 1:	j	_user_exception
 
+ENDPROC(fast_second_level_miss)
 
 /*
  * StoreProhibitedException
@@ -1777,6 +1807,9 @@
 	bbsi.l	a2, PS_UM_BIT, 1f
 	j	_kernel_exception
 1:	j	_user_exception
+
+ENDPROC(fast_store_prohibited)
+
 #endif /* CONFIG_MMU */
 
 /*
@@ -1787,6 +1820,7 @@
  */
 
 ENTRY(system_call)
+
 	entry	a1, 32
 
 	/* regs->syscall = regs->areg[2] */
@@ -1831,6 +1865,8 @@
 	callx4	a4
 	retw
 
+ENDPROC(system_call)
+
 
 /*
  * Task switch.
@@ -1899,6 +1935,7 @@
 
 	retw
 
+ENDPROC(_switch_to)
 
 ENTRY(ret_from_fork)
 
@@ -1914,6 +1951,8 @@
 
 	j	common_exception_return
 
+ENDPROC(ret_from_fork)
+
 /*
  * Kernel thread creation helper
  * On entry, set up by copy_thread: a2 = thread_fn, a3 = thread_fn arg

diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index bdc5078..91d9095 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S

@@ -18,6 +18,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cacheasm.h>
+#include <asm/initialize_mmu.h>
 
 #include <linux/init.h>
 #include <linux/linkage.h>
@@ -47,16 +48,19 @@
 	 */
 
 	__HEAD
-	.globl _start
-_start:	_j	2f
+ENTRY(_start)
+
+	_j	2f
 	.align	4
 1:	.word	_startup
 2:	l32r	a0, 1b
 	jx	a0
 
+ENDPROC(_start)
+
 	.section .init.text, "ax"
-	.align 4
-_startup:
+
+ENTRY(_startup)
 
 	/* Disable interrupts and exceptions. */
 
@@ -107,7 +111,7 @@
 	/* Disable all timers. */
 
 	.set	_index, 0
-	.rept	XCHAL_NUM_TIMERS - 1
+	.rept	XCHAL_NUM_TIMERS
 	wsr	a0, SREG_CCOMPARE + _index
 	.set	_index, _index + 1
 	.endr
@@ -120,7 +124,7 @@
 
 	/* Disable coprocessors. */
 
-#if XCHAL_CP_NUM > 0
+#if XCHAL_HAVE_CP
 	wsr	a0, cpenable
 #endif
 
@@ -152,6 +156,8 @@
 
 	isync
 
+	initialize_mmu
+
 	/* Unpack data sections
 	 *
 	 * The linker script used to build the Linux kernel image
@@ -230,6 +236,7 @@
 should_never_return:
 	j	should_never_return
 
+ENDPROC(_startup)
 
 /*
  * BSS section
@@ -239,6 +246,8 @@
 #ifdef CONFIG_MMU
 ENTRY(swapper_pg_dir)
 	.fill	PAGE_SIZE, 1, 0
+END(swapper_pg_dir)
 #endif
 ENTRY(empty_zero_page)
 	.fill	PAGE_SIZE, 1, 0
+END(empty_zero_page)

diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index a6ce3e5..6f4f974 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c

@@ -18,6 +18,8 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel_stat.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
 
 #include <asm/uaccess.h>
 #include <asm/platform.h>
@@ -26,19 +28,22 @@
 
 atomic_t irq_err_count;
 
+static struct irq_domain *root_domain;
+
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
 
-asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
+asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
+	int irq = irq_find_mapping(root_domain, hwirq);
 
-	if (irq >= NR_IRQS) {
+	if (hwirq >= NR_IRQS) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-				__func__, irq);
+				__func__, hwirq);
 	}
 
 	irq_enter();
@@ -71,40 +76,39 @@
 
 static void xtensa_irq_mask(struct irq_data *d)
 {
-	cached_irq_mask &= ~(1 << d->irq);
+	cached_irq_mask &= ~(1 << d->hwirq);
 	set_sr (cached_irq_mask, intenable);
 }
 
 static void xtensa_irq_unmask(struct irq_data *d)
 {
-	cached_irq_mask |= 1 << d->irq;
+	cached_irq_mask |= 1 << d->hwirq;
 	set_sr (cached_irq_mask, intenable);
 }
 
 static void xtensa_irq_enable(struct irq_data *d)
 {
-	variant_irq_enable(d->irq);
+	variant_irq_enable(d->hwirq);
 	xtensa_irq_unmask(d);
 }
 
 static void xtensa_irq_disable(struct irq_data *d)
 {
 	xtensa_irq_mask(d);
-	variant_irq_disable(d->irq);
+	variant_irq_disable(d->hwirq);
 }
 
 static void xtensa_irq_ack(struct irq_data *d)
 {
-	set_sr(1 << d->irq, intclear);
+	set_sr(1 << d->hwirq, intclear);
 }
 
 static int xtensa_irq_retrigger(struct irq_data *d)
 {
-	set_sr (1 << d->irq, INTSET);
+	set_sr(1 << d->hwirq, intset);
 	return 1;
 }
 
-
 static struct irq_chip xtensa_irq_chip = {
 	.name		= "xtensa",
 	.irq_enable	= xtensa_irq_enable,
@@ -115,37 +119,99 @@
 	.irq_retrigger	= xtensa_irq_retrigger,
 };
 
+static int xtensa_irq_map(struct irq_domain *d, unsigned int irq,
+		irq_hw_number_t hw)
+{
+	u32 mask = 1 << hw;
+
+	if (mask & XCHAL_INTTYPE_MASK_SOFTWARE) {
+		irq_set_chip_and_handler_name(irq, &xtensa_irq_chip,
+				handle_simple_irq, "level");
+		irq_set_status_flags(irq, IRQ_LEVEL);
+	} else if (mask & XCHAL_INTTYPE_MASK_EXTERN_EDGE) {
+		irq_set_chip_and_handler_name(irq, &xtensa_irq_chip,
+				handle_edge_irq, "edge");
+		irq_clear_status_flags(irq, IRQ_LEVEL);
+	} else if (mask & XCHAL_INTTYPE_MASK_EXTERN_LEVEL) {
+		irq_set_chip_and_handler_name(irq, &xtensa_irq_chip,
+				handle_level_irq, "level");
+		irq_set_status_flags(irq, IRQ_LEVEL);
+	} else if (mask & XCHAL_INTTYPE_MASK_TIMER) {
+		irq_set_chip_and_handler_name(irq, &xtensa_irq_chip,
+				handle_edge_irq, "edge");
+		irq_clear_status_flags(irq, IRQ_LEVEL);
+	} else {/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
+		/* XCHAL_INTTYPE_MASK_NMI */
+
+		irq_set_chip_and_handler_name(irq, &xtensa_irq_chip,
+				handle_level_irq, "level");
+		irq_set_status_flags(irq, IRQ_LEVEL);
+	}
+	return 0;
+}
+
+static unsigned map_ext_irq(unsigned ext_irq)
+{
+	unsigned mask = XCHAL_INTTYPE_MASK_EXTERN_EDGE |
+		XCHAL_INTTYPE_MASK_EXTERN_LEVEL;
+	unsigned i;
+
+	for (i = 0; mask; ++i, mask >>= 1) {
+		if ((mask & 1) && ext_irq-- == 0)
+			return i;
+	}
+	return XCHAL_NUM_INTERRUPTS;
+}
+
+/*
+ * Device Tree IRQ specifier translation function which works with one or
+ * two cell bindings. First cell value maps directly to the hwirq number.
+ * Second cell if present specifies whether hwirq number is external (1) or
+ * internal (0).
+ */
+int xtensa_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+		const u32 *intspec, unsigned int intsize,
+		unsigned long *out_hwirq, unsigned int *out_type)
+{
+	if (WARN_ON(intsize < 1 || intsize > 2))
+		return -EINVAL;
+	if (intsize == 2 && intspec[1] == 1) {
+		unsigned int_irq = map_ext_irq(intspec[0]);
+		if (int_irq < XCHAL_NUM_INTERRUPTS)
+			*out_hwirq = int_irq;
+		else
+			return -EINVAL;
+	} else {
+		*out_hwirq = intspec[0];
+	}
+	*out_type = IRQ_TYPE_NONE;
+	return 0;
+}
+
+static const struct irq_domain_ops xtensa_irq_domain_ops = {
+	.xlate = xtensa_irq_domain_xlate,
+	.map = xtensa_irq_map,
+};
+
 void __init init_IRQ(void)
 {
-	int index;
-
-	for (index = 0; index < XTENSA_NR_IRQS; index++) {
-		int mask = 1 << index;
-
-		if (mask & XCHAL_INTTYPE_MASK_SOFTWARE)
-			irq_set_chip_and_handler(index, &xtensa_irq_chip,
-						 handle_simple_irq);
-
-		else if (mask & XCHAL_INTTYPE_MASK_EXTERN_EDGE)
-			irq_set_chip_and_handler(index, &xtensa_irq_chip,
-						 handle_edge_irq);
-
-		else if (mask & XCHAL_INTTYPE_MASK_EXTERN_LEVEL)
-			irq_set_chip_and_handler(index, &xtensa_irq_chip,
-						 handle_level_irq);
-
-		else if (mask & XCHAL_INTTYPE_MASK_TIMER)
-			irq_set_chip_and_handler(index, &xtensa_irq_chip,
-						 handle_edge_irq);
-
-		else	/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
-			/* XCHAL_INTTYPE_MASK_NMI */
-
-			irq_set_chip_and_handler(index, &xtensa_irq_chip,
-						 handle_level_irq);
-	}
+	struct device_node *intc = NULL;
 
 	cached_irq_mask = 0;
+	set_sr(~0, intclear);
+
+#ifdef CONFIG_OF
+	/* The interrupt controller device node is mandatory */
+	intc = of_find_compatible_node(NULL, NULL, "xtensa,pic");
+	BUG_ON(!intc);
+
+	root_domain = irq_domain_add_linear(intc, NR_IRQS,
+			&xtensa_irq_domain_ops, NULL);
+#else
+	root_domain = irq_domain_add_legacy(intc, NR_IRQS, 0, 0,
+			&xtensa_irq_domain_ops, NULL);
+#endif
+	irq_set_default_host(root_domain);
 
 	variant_init_irq();
 }

diff --git a/arch/xtensa/kernel/module.c b/arch/xtensa/kernel/module.c
index 451dda92..b715237 100644
--- a/arch/xtensa/kernel/module.c
+++ b/arch/xtensa/kernel/module.c

@@ -53,7 +53,7 @@
 		       struct module *mod)
 {
 	unsigned int i;
-        Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
 	Elf32_Sym *sym;
 	unsigned char *location;
 	uint32_t value;

diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c
index 97230e4..44bf21c 100644
--- a/arch/xtensa/kernel/platform.c
+++ b/arch/xtensa/kernel/platform.c

@@ -44,4 +44,3 @@
 	ccount_per_jiffy = 10 * (1000000UL/HZ);
 });
 #endif
-

diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 1accf28..0dd5784 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c

@@ -108,7 +108,7 @@
 
 void cpu_idle(void)
 {
-  	local_irq_enable();
+	local_irq_enable();
 
 	/* endless idle loop with no priority at all */
 	while (1) {

diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 33eea4c..61fb2e9 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c

@@ -154,7 +154,7 @@
 	coprocessor_flush_all(ti);
 	coprocessor_release_all(ti);
 
-	ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0, 
+	ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
 				sizeof(xtregs_coprocessor_t));
 #endif
 	ret |= __copy_from_user(&regs->xtregs_opt, &xtregs->opt,
@@ -343,4 +343,3 @@
 			&& (current->ptrace & PT_PTRACED))
 		do_syscall_trace();
 }
-

diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index b237988..24c1a57 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c

@@ -22,6 +22,11 @@
 #include <linux/bootmem.h>
 #include <linux/kernel.h>
 
+#ifdef CONFIG_OF
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#endif
+
 #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
 # include <linux/console.h>
 #endif
@@ -42,6 +47,7 @@
 #include <asm/page.h>
 #include <asm/setup.h>
 #include <asm/param.h>
+#include <asm/traps.h>
 
 #include <platform/hardware.h>
 
@@ -64,6 +70,11 @@
 extern int initrd_below_start_ok;
 #endif
 
+#ifdef CONFIG_OF
+extern u32 __dtb_start[];
+void *dtb_start = __dtb_start;
+#endif
+
 unsigned char aux_device_present;
 extern unsigned long loops_per_jiffy;
 
@@ -83,6 +94,8 @@
 static inline void init_mmu(void) { }
 #endif
 
+extern int mem_reserve(unsigned long, unsigned long, int);
+extern void bootmem_init(void);
 extern void zones_init(void);
 
 /*
@@ -104,26 +117,31 @@
 
 /* parse current tag */
 
+static int __init add_sysmem_bank(unsigned long type, unsigned long start,
+		unsigned long end)
+{
+	if (sysmem.nr_banks >= SYSMEM_BANKS_MAX) {
+		printk(KERN_WARNING
+				"Ignoring memory bank 0x%08lx size %ldKB\n",
+				start, end - start);
+		return -EINVAL;
+	}
+	sysmem.bank[sysmem.nr_banks].type  = type;
+	sysmem.bank[sysmem.nr_banks].start = PAGE_ALIGN(start);
+	sysmem.bank[sysmem.nr_banks].end   = end & PAGE_MASK;
+	sysmem.nr_banks++;
+
+	return 0;
+}
+
 static int __init parse_tag_mem(const bp_tag_t *tag)
 {
-	meminfo_t *mi = (meminfo_t*)(tag->data);
+	meminfo_t *mi = (meminfo_t *)(tag->data);
 
 	if (mi->type != MEMORY_TYPE_CONVENTIONAL)
 		return -1;
 
-	if (sysmem.nr_banks >= SYSMEM_BANKS_MAX) {
-		printk(KERN_WARNING
-		       "Ignoring memory bank 0x%08lx size %ldKB\n",
-		       (unsigned long)mi->start,
-		       (unsigned long)mi->end - (unsigned long)mi->start);
-		return -EINVAL;
-	}
-	sysmem.bank[sysmem.nr_banks].type  = mi->type;
-	sysmem.bank[sysmem.nr_banks].start = PAGE_ALIGN(mi->start);
-	sysmem.bank[sysmem.nr_banks].end   = mi->end & PAGE_MASK;
-	sysmem.nr_banks++;
-
-	return 0;
+	return add_sysmem_bank(mi->type, mi->start, mi->end);
 }
 
 __tagtable(BP_TAG_MEMORY, parse_tag_mem);
@@ -142,12 +160,31 @@
 
 __tagtable(BP_TAG_INITRD, parse_tag_initrd);
 
+#ifdef CONFIG_OF
+
+static int __init parse_tag_fdt(const bp_tag_t *tag)
+{
+	dtb_start = (void *)(tag->data[0]);
+	return 0;
+}
+
+__tagtable(BP_TAG_FDT, parse_tag_fdt);
+
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+		unsigned long end)
+{
+	initrd_start = (void *)__va(start);
+	initrd_end = (void *)__va(end);
+	initrd_below_start_ok = 1;
+}
+
+#endif /* CONFIG_OF */
+
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 static int __init parse_tag_cmdline(const bp_tag_t* tag)
 {
-	strncpy(command_line, (char*)(tag->data), COMMAND_LINE_SIZE);
-	command_line[COMMAND_LINE_SIZE - 1] = '\0';
+	strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE);
 	return 0;
 }
 
@@ -185,6 +222,58 @@
 	return 0;
 }
 
+#ifdef CONFIG_OF
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	size &= PAGE_MASK;
+	add_sysmem_bank(MEMORY_TYPE_CONVENTIONAL, base, base + size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return __alloc_bootmem(size, align, 0);
+}
+
+void __init early_init_devtree(void *params)
+{
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+
+	/* Retrieve various informations from the /chosen node of the
+	 * device-tree, including the platform type, initrd location and
+	 * size, TCE reserve, and more ...
+	 */
+	if (!command_line[0])
+		of_scan_flat_dt(early_init_dt_scan_chosen, command_line);
+
+	/* Scan memory nodes and rebuild MEMBLOCKs */
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+	if (sysmem.nr_banks == 0)
+		of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+}
+
+static void __init copy_devtree(void)
+{
+	void *alloc = early_init_dt_alloc_memory_arch(
+			be32_to_cpu(initial_boot_params->totalsize), 0);
+	if (alloc) {
+		memcpy(alloc, initial_boot_params,
+				be32_to_cpu(initial_boot_params->totalsize));
+		initial_boot_params = alloc;
+	}
+}
+
+static int __init xtensa_device_probe(void)
+{
+	of_platform_populate(NULL, NULL, NULL, NULL);
+	return 0;
+}
+
+device_initcall(xtensa_device_probe);
+
+#endif /* CONFIG_OF */
+
 /*
  * Initialize architecture. (Early stage)
  */
@@ -193,14 +282,14 @@
 {
 	sysmem.nr_banks = 0;
 
-#ifdef CONFIG_CMDLINE_BOOL
-	strcpy(command_line, default_command_line);
-#endif
-
 	/* Parse boot parameters */
 
-        if (bp_start)
-	  parse_bootparam(bp_start);
+	if (bp_start)
+		parse_bootparam(bp_start);
+
+#ifdef CONFIG_OF
+	early_init_devtree(dtb_start);
+#endif
 
 	if (sysmem.nr_banks == 0) {
 		sysmem.nr_banks = 1;
@@ -209,6 +298,11 @@
 				     + PLATFORM_DEFAULT_MEM_SIZE;
 	}
 
+#ifdef CONFIG_CMDLINE_BOOL
+	if (!command_line[0])
+		strlcpy(command_line, default_command_line, COMMAND_LINE_SIZE);
+#endif
+
 	/* Early hook for platforms */
 
 	platform_init(bp_start);
@@ -235,15 +329,130 @@
 extern char _DoubleExceptionVector_literal_start;
 extern char _DoubleExceptionVector_text_end;
 
+
+#ifdef CONFIG_S32C1I_SELFTEST
+#if XCHAL_HAVE_S32C1I
+
+static int __initdata rcw_word, rcw_probe_pc, rcw_exc;
+
+/*
+ * Basic atomic compare-and-swap, that records PC of S32C1I for probing.
+ *
+ * If *v == cmp, set *v = set.  Return previous *v.
+ */
+static inline int probed_compare_swap(int *v, int cmp, int set)
+{
+	int tmp;
+
+	__asm__ __volatile__(
+			"	movi	%1, 1f\n"
+			"	s32i	%1, %4, 0\n"
+			"	wsr	%2, scompare1\n"
+			"1:	s32c1i	%0, %3, 0\n"
+			: "=a" (set), "=&a" (tmp)
+			: "a" (cmp), "a" (v), "a" (&rcw_probe_pc), "0" (set)
+			: "memory"
+			);
+	return set;
+}
+
+/* Handle probed exception */
+
+void __init do_probed_exception(struct pt_regs *regs, unsigned long exccause)
+{
+	if (regs->pc == rcw_probe_pc) {	/* exception on s32c1i ? */
+		regs->pc += 3;		/* skip the s32c1i instruction */
+		rcw_exc = exccause;
+	} else {
+		do_unhandled(regs, exccause);
+	}
+}
+
+/* Simple test of S32C1I (soc bringup assist) */
+
+void __init check_s32c1i(void)
+{
+	int n, cause1, cause2;
+	void *handbus, *handdata, *handaddr; /* temporarily saved handlers */
+
+	rcw_probe_pc = 0;
+	handbus  = trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR,
+			do_probed_exception);
+	handdata = trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR,
+			do_probed_exception);
+	handaddr = trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR,
+			do_probed_exception);
+
+	/* First try an S32C1I that does not store: */
+	rcw_exc = 0;
+	rcw_word = 1;
+	n = probed_compare_swap(&rcw_word, 0, 2);
+	cause1 = rcw_exc;
+
+	/* took exception? */
+	if (cause1 != 0) {
+		/* unclean exception? */
+		if (n != 2 || rcw_word != 1)
+			panic("S32C1I exception error");
+	} else if (rcw_word != 1 || n != 1) {
+		panic("S32C1I compare error");
+	}
+
+	/* Then an S32C1I that stores: */
+	rcw_exc = 0;
+	rcw_word = 0x1234567;
+	n = probed_compare_swap(&rcw_word, 0x1234567, 0xabcde);
+	cause2 = rcw_exc;
+
+	if (cause2 != 0) {
+		/* unclean exception? */
+		if (n != 0xabcde || rcw_word != 0x1234567)
+			panic("S32C1I exception error (b)");
+	} else if (rcw_word != 0xabcde || n != 0x1234567) {
+		panic("S32C1I store error");
+	}
+
+	/* Verify consistency of exceptions: */
+	if (cause1 || cause2) {
+		pr_warn("S32C1I took exception %d, %d\n", cause1, cause2);
+		/* If emulation of S32C1I upon bus error gets implemented,
+		   we can get rid of this panic for single core (not SMP) */
+		panic("S32C1I exceptions not currently supported");
+	}
+	if (cause1 != cause2)
+		panic("inconsistent S32C1I exceptions");
+
+	trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR, handbus);
+	trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR, handdata);
+	trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR, handaddr);
+}
+
+#else /* XCHAL_HAVE_S32C1I */
+
+/* This condition should not occur with a commercially deployed processor.
+   Display reminder for early engr test or demo chips / FPGA bitstreams */
+void __init check_s32c1i(void)
+{
+	pr_warn("Processor configuration lacks atomic compare-and-swap support!\n");
+}
+
+#endif /* XCHAL_HAVE_S32C1I */
+#else /* CONFIG_S32C1I_SELFTEST */
+
+void __init check_s32c1i(void)
+{
+}
+
+#endif /* CONFIG_S32C1I_SELFTEST */
+
+
 void __init setup_arch(char **cmdline_p)
 {
-	extern int mem_reserve(unsigned long, unsigned long, int);
-	extern void bootmem_init(void);
-
-	memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
-	boot_command_line[COMMAND_LINE_SIZE-1] = '\0';
+	strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
+	check_s32c1i();
+
 	/* Reserve some memory regions */
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -251,7 +460,7 @@
 		initrd_is_mapped = mem_reserve(__pa(initrd_start),
 					       __pa(initrd_end), 0);
 		initrd_below_start_ok = 1;
- 	} else {
+	} else {
 		initrd_start = 0;
 	}
 #endif
@@ -275,8 +484,12 @@
 
 	bootmem_init();
 
-	platform_setup(cmdline_p);
+#ifdef CONFIG_OF
+	copy_devtree();
+	unflatten_device_tree();
+#endif
 
+	platform_setup(cmdline_p);
 
 	paging_init();
 	zones_init();
@@ -326,7 +539,7 @@
 		     "core ID\t\t: " XCHAL_CORE_ID "\n"
 		     "build ID\t: 0x%x\n"
 		     "byte order\t: %s\n"
- 		     "cpu MHz\t\t: %lu.%02lu\n"
+		     "cpu MHz\t\t: %lu.%02lu\n"
 		     "bogomips\t: %lu.%02lu\n",
 		     XCHAL_BUILD_UNIQUE_ID,
 		     XCHAL_HAVE_BE ?  "big" : "little",
@@ -381,6 +594,9 @@
 #if XCHAL_HAVE_FP
 		     "fpu "
 #endif
+#if XCHAL_HAVE_S32C1I
+		     "s32c1i "
+#endif
 		     "\n");
 
 	/* Registers. */
@@ -412,7 +628,7 @@
 		     "icache size\t: %d\n"
 		     "icache flags\t: "
 #if XCHAL_ICACHE_LINE_LOCKABLE
-		     "lock"
+		     "lock "
 #endif
 		     "\n"
 		     "dcache line size: %d\n"
@@ -420,10 +636,10 @@
 		     "dcache size\t: %d\n"
 		     "dcache flags\t: "
 #if XCHAL_DCACHE_IS_WRITEBACK
-		     "writeback"
+		     "writeback "
 #endif
 #if XCHAL_DCACHE_LINE_LOCKABLE
-		     "lock"
+		     "lock "
 #endif
 		     "\n",
 		     XCHAL_ICACHE_LINESIZE,
@@ -465,4 +681,3 @@
 };
 
 #endif /* CONFIG_PROC_FS */
-

diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 63c566f..de34d6b 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c

@@ -212,7 +212,7 @@
 	if (err)
 		return err;
 
- 	/* The signal handler may have used coprocessors in which
+	/* The signal handler may have used coprocessors in which
 	 * case they are still enabled.  We disable them to force a
 	 * reloading of the original task's CP state by the lazy
 	 * context-switching mechanisms of CP exception handling.
@@ -396,7 +396,7 @@
 	 */
 
 	/* Set up registers for signal handler */
-	start_thread(regs, (unsigned long) ka->sa.sa_handler, 
+	start_thread(regs, (unsigned long) ka->sa.sa_handler,
 		     (unsigned long) frame);
 
 	/* Set up a stack frame for a call4
@@ -424,9 +424,9 @@
 	return -EFAULT;
 }
 
-asmlinkage long xtensa_sigaltstack(const stack_t __user *uss, 
+asmlinkage long xtensa_sigaltstack(const stack_t __user *uss,
 				   stack_t __user *uoss,
-    				   long a2, long a3, long a4, long a5,
+				   long a2, long a3, long a4, long a5,
 				   struct pt_regs *regs)
 {
 	return do_sigaltstack(uss, uoss, regs->areg[1]);

diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index 5702065..54fa842 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c

@@ -52,4 +52,3 @@
 {
 	return sys_fadvise64_64(fd, offset, len, advice);
 }
-

diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index ac62f9c..ffb4741 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c

@@ -22,6 +22,7 @@
 #include <linux/irq.h>
 #include <linux/profile.h>
 #include <linux/delay.h>
+#include <linux/irqdomain.h>
 
 #include <asm/timex.h>
 #include <asm/platform.h>
@@ -31,7 +32,7 @@
 unsigned long nsec_per_ccount;		/* nsec per ccount increment */
 #endif
 
-static cycle_t ccount_read(void)
+static cycle_t ccount_read(struct clocksource *cs)
 {
 	return (cycle_t)get_ccount();
 }
@@ -52,6 +53,7 @@
 
 void __init time_init(void)
 {
+	unsigned int irq;
 #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 	printk("Calibrating CPU frequency ");
 	platform_calibrate_ccount();
@@ -62,7 +64,8 @@
 
 	/* Initialize the linux timer interrupt. */
 
-	setup_irq(LINUX_TIMER_INT, &timer_irqaction);
+	irq = irq_create_mapping(NULL, LINUX_TIMER_INT);
+	setup_irq(irq, &timer_irqaction);
 	set_linux_timer(get_ccount() + CCOUNT_PER_JIFFY);
 }
 

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 5caf2b6..01e0111 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c

@@ -293,6 +293,17 @@
 }
 
 
+/* Set exception C handler - for temporary use when probing exceptions */
+
+void * __init trap_set_handler(int cause, void *handler)
+{
+	unsigned long *entry = &exc_table[EXC_TABLE_DEFAULT / 4 + cause];
+	void *previous = (void *)*entry;
+	*entry = (unsigned long)handler;
+	return previous;
+}
+
+
 /*
  * Initialize dispatch tables.
  *
@@ -397,7 +408,8 @@
 		"wsr	a13, sar\n\t"
 		"wsr	a14, ps\n\t"
 		:: "a" (&a0), "a" (&ps)
-		: "a2", "a3", "a4", "a7", "a11", "a12", "a13", "a14", "a15", "memory");
+		: "a2", "a3", "a4", "a7", "a11", "a12", "a13", "a14", "a15",
+		  "memory");
 }
 
 void show_trace(struct task_struct *task, unsigned long *sp)
@@ -452,7 +464,7 @@
 
 	if (!sp)
 		sp = stack_pointer(task);
- 	stack = sp;
+	stack = sp;
 
 	printk("\nStack: ");
 
@@ -523,5 +535,3 @@
 
 	do_exit(err);
 }
-
-

diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 4462c1e..68df35f 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S

@@ -79,6 +79,8 @@
 	l32i	a0, a0, EXC_TABLE_FAST_USER	# load handler
 	jx	a0
 
+ENDPROC(_UserExceptionVector)
+
 /*
  * Kernel exception vector. (Exceptions with PS.UM == 0, PS.EXCM == 0)
  *
@@ -103,6 +105,7 @@
 	l32i	a0, a0, EXC_TABLE_FAST_KERNEL	# load handler address
 	jx	a0
 
+ENDPROC(_KernelExceptionVector)
 
 /*
  * Double exception vector (Exceptions with PS.EXCM == 1)
@@ -225,7 +228,13 @@
 	/* Window overflow/underflow exception. Get stack pointer. */
 
 	mov	a3, a2
-	movi	a2, exc_table
+	/* This explicit literal and the following references to it are made
+	 * in order to fit DoubleExceptionVector.literals into the available
+	 * 16-byte gap before DoubleExceptionVector.text in the absence of
+	 * link time relaxation. See kernel/vmlinux.lds.S
+	 */
+	.literal .Lexc_table, exc_table
+	l32r	a2, .Lexc_table
 	l32i	a2, a2, EXC_TABLE_KSTK
 
 	/* Check for overflow/underflow exception, jump if overflow. */
@@ -255,7 +264,7 @@
 	s32i	a0, a2, PT_AREG0
 
 	wsr	a3, excsave1		# save a3
-	movi	a3, exc_table
+	l32r	a3, .Lexc_table
 
 	rsr	a0, exccause
 	s32i	a0, a2, PT_DEPC		# mark it as a regular exception
@@ -267,7 +276,7 @@
 
 	/* a0: depc, a1: a1, a2: a2, a3: trashed, depc: a0, excsave1: a3 */
 
-	movi	a3, exc_table
+	l32r	a3, .Lexc_table
 	s32i	a2, a3, EXC_TABLE_DOUBLE_SAVE	# temporary variable
 
 	/* Enter critical section. */
@@ -296,7 +305,7 @@
 
 	/* a0: avail, a1: a1, a2: kstk, a3: avail, depc: a2, excsave: a3 */
 
-	movi	a3, exc_table
+	l32r	a3, .Lexc_table
 	rsr	a0, exccause
 	addx4	a0, a0, a3
 	l32i	a0, a0, EXC_TABLE_FAST_USER
@@ -338,6 +347,7 @@
 
 	.end literal_prefix
 
+ENDPROC(_DoubleExceptionVector)
 
 /*
  * Debug interrupt vector
@@ -349,9 +359,11 @@
 	.section .DebugInterruptVector.text, "ax"
 
 ENTRY(_DebugInterruptVector)
+
 	xsr	a0, SREG_EXCSAVE + XCHAL_DEBUGLEVEL
 	jx	a0
 
+ENDPROC(_DebugInterruptVector)
 
 
 /* Window overflow and underflow handlers.
@@ -363,38 +375,43 @@
  *	 we try to access any page that would cause a page fault early.
  */
 
+#define ENTRY_ALIGN64(name)	\
+	.globl name;		\
+	.align 64;		\
+	name:
+
 	.section		.WindowVectors.text, "ax"
 
 
 /* 4-Register Window Overflow Vector (Handler) */
 
-	.align 64
-.global _WindowOverflow4
-_WindowOverflow4:
+ENTRY_ALIGN64(_WindowOverflow4)
+
 	s32e	a0, a5, -16
 	s32e	a1, a5, -12
 	s32e	a2, a5,  -8
 	s32e	a3, a5,  -4
 	rfwo
 
+ENDPROC(_WindowOverflow4)
+
 
 /* 4-Register Window Underflow Vector (Handler) */
 
-	.align 64
-.global _WindowUnderflow4
-_WindowUnderflow4:
+ENTRY_ALIGN64(_WindowUnderflow4)
+
 	l32e	a0, a5, -16
 	l32e	a1, a5, -12
 	l32e	a2, a5,  -8
 	l32e	a3, a5,  -4
 	rfwu
 
+ENDPROC(_WindowUnderflow4)
 
 /* 8-Register Window Overflow Vector (Handler) */
 
-	.align 64
-.global _WindowOverflow8
-_WindowOverflow8:
+ENTRY_ALIGN64(_WindowOverflow8)
+
 	s32e	a0, a9, -16
 	l32e	a0, a1, -12
 	s32e	a2, a9,  -8
@@ -406,11 +423,12 @@
 	s32e	a7, a0, -20
 	rfwo
 
+ENDPROC(_WindowOverflow8)
+
 /* 8-Register Window Underflow Vector (Handler) */
 
-	.align 64
-.global _WindowUnderflow8
-_WindowUnderflow8:
+ENTRY_ALIGN64(_WindowUnderflow8)
+
 	l32e	a1, a9, -12
 	l32e	a0, a9, -16
 	l32e	a7, a1, -12
@@ -422,12 +440,12 @@
 	l32e	a7, a7, -20
 	rfwu
 
+ENDPROC(_WindowUnderflow8)
 
 /* 12-Register Window Overflow Vector (Handler) */
 
-	.align 64
-.global _WindowOverflow12
-_WindowOverflow12:
+ENTRY_ALIGN64(_WindowOverflow12)
+
 	s32e	a0,  a13, -16
 	l32e	a0,  a1,  -12
 	s32e	a1,  a13, -12
@@ -443,11 +461,12 @@
 	s32e	a11, a0,  -20
 	rfwo
 
+ENDPROC(_WindowOverflow12)
+
 /* 12-Register Window Underflow Vector (Handler) */
 
-	.align 64
-.global _WindowUnderflow12
-_WindowUnderflow12:
+ENTRY_ALIGN64(_WindowUnderflow12)
+
 	l32e	a1,  a13, -12
 	l32e	a0,  a13, -16
 	l32e	a11, a1,  -12
@@ -463,6 +482,6 @@
 	l32e	a11, a11, -20
 	rfwu
 
+ENDPROC(_WindowUnderflow12)
+
 	.text
-
-

diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
index df397f9..4eb573d 100644
--- a/arch/xtensa/lib/checksum.S
+++ b/arch/xtensa/lib/checksum.S

@@ -41,10 +41,11 @@
 
 .text
 ENTRY(csum_partial)
-	  /*
-	   * Experiments with Ethernet and SLIP connections show that buf
-	   * is aligned on either a 2-byte or 4-byte boundary.
-	   */
+
+	/*
+	 * Experiments with Ethernet and SLIP connections show that buf
+	 * is aligned on either a 2-byte or 4-byte boundary.
+	 */
 	entry	sp, 32
 	extui	a5, a2, 0, 2
 	bnez	a5, 8f		/* branch if 2-byte aligned */
@@ -170,7 +171,7 @@
 3:
 	j	5b		/* branch to handle the remaining byte */
 
-
+ENDPROC(csum_partial)
 
 /*
  * Copy from ds while checksumming, otherwise like csum_partial
@@ -211,6 +212,7 @@
  */
 
 ENTRY(csum_partial_copy_generic)
+
 	entry	sp, 32
 	mov	a12, a3
 	mov	a11, a4
@@ -367,6 +369,8 @@
 6:
 	j	4b		/* process the possible trailing odd byte */
 
+ENDPROC(csum_partial_copy_generic)
+
 
 # Exception handler:
 .section .fixup, "ax"
@@ -406,4 +410,3 @@
 	retw
 
 .previous
-

diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index c48b80a..b1c219a 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S

@@ -210,8 +210,10 @@
 	_beqz	a4, .Ldone	# avoid loading anything for zero-length copies
 	# copy 16 bytes per iteration for word-aligned dst and unaligned src
 	ssa8	a3		# set shift amount from byte offset
-#define SIM_CHECKS_ALIGNMENT	1	/* set to 1 when running on ISS (simulator) with the
-					   lint or ferret client, or 0 to save a few cycles */
+
+/* set to 1 when running on ISS (simulator) with the
+   lint or ferret client, or 0 to save a few cycles */
+#define SIM_CHECKS_ALIGNMENT	1
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
 	and	a11, a3, a8	# save unalignment offset for below
 	sub	a3, a3, a11	# align a3

diff --git a/arch/xtensa/lib/pci-auto.c b/arch/xtensa/lib/pci-auto.c
index a71733a..34d05ab 100644
--- a/arch/xtensa/lib/pci-auto.c
+++ b/arch/xtensa/lib/pci-auto.c

@@ -241,8 +241,8 @@
 	unsigned char header_type;
 	struct pci_dev *dev = &pciauto_dev;
 
-        pciauto_dev.bus = &pciauto_bus;
-        pciauto_dev.sysdata = pci_ctrl;
+	pciauto_dev.bus = &pciauto_bus;
+	pciauto_dev.sysdata = pci_ctrl;
 	pciauto_bus.ops = pci_ctrl->ops;
 
 	/*
@@ -345,8 +345,3 @@
 	}
 	return sub_bus;
 }
-
-
-
-
-

diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
index 9f603cd..1ad0ecf 100644
--- a/arch/xtensa/lib/strncpy_user.S
+++ b/arch/xtensa/lib/strncpy_user.S

@@ -166,7 +166,7 @@
 	retw
 .Lz1:	# byte 1 is zero
 #ifdef __XTENSA_EB__
-        extui   a9, a9, 16, 16
+	extui   a9, a9, 16, 16
 #endif /* __XTENSA_EB__ */
 	EX(s16i, a9, a11, 0, fixup_s)
 	addi	a11, a11, 1		# advance dst pointer
@@ -174,7 +174,7 @@
 	retw
 .Lz2:	# byte 2 is zero
 #ifdef __XTENSA_EB__
-        extui   a9, a9, 16, 16
+	extui   a9, a9, 16, 16
 #endif /* __XTENSA_EB__ */
 	EX(s16i, a9, a11, 0, fixup_s)
 	movi	a9, 0

diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
index 23f2a89..4c03b1e 100644
--- a/arch/xtensa/lib/strnlen_user.S
+++ b/arch/xtensa/lib/strnlen_user.S

@@ -145,4 +145,3 @@
 lenfixup:
 	movi	a2, 0
 	retw
-

diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index 46d6031..ace1892 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S

@@ -318,4 +318,3 @@
 	/* Ignore memset return value in a6. */
 	/* a2 still contains bytes not copied. */
 	retw
-

diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 85df465..81edeab 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c

@@ -118,7 +118,7 @@
  * For now, flush the whole cache. FIXME??
  */
 
-void flush_cache_range(struct vm_area_struct* vma, 
+void flush_cache_range(struct vm_area_struct* vma,
 		       unsigned long start, unsigned long end)
 {
 	__flush_invalidate_dcache_all();
@@ -133,7 +133,7 @@
  */
 
 void flush_cache_page(struct vm_area_struct* vma, unsigned long address,
-    		      unsigned long pfn)
+		      unsigned long pfn)
 {
 	/* Note that we have to use the 'alias' address to avoid multi-hit */
 
@@ -166,14 +166,14 @@
 
 	if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) {
 
-		unsigned long vaddr = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);
 		unsigned long paddr = (unsigned long) page_address(page);
 		unsigned long phys = page_to_phys(page);
+		unsigned long tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);
 
 		__flush_invalidate_dcache_page(paddr);
 
-		__flush_invalidate_dcache_page_alias(vaddr, phys);
-		__invalidate_icache_page_alias(vaddr, phys);
+		__flush_invalidate_dcache_page_alias(tmp, phys);
+		__invalidate_icache_page_alias(tmp, phys);
 
 		clear_bit(PG_arch_1, &page->flags);
 	}
@@ -195,7 +195,7 @@
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
 
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page, 
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		unsigned long vaddr, void *dst, const void *src,
 		unsigned long len)
 {
@@ -205,8 +205,8 @@
 	/* Flush and invalidate user page if aliased. */
 
 	if (alias) {
-		unsigned long temp = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
-		__flush_invalidate_dcache_page_alias(temp, phys);
+		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+		__flush_invalidate_dcache_page_alias(t, phys);
 	}
 
 	/* Copy data */
@@ -219,12 +219,11 @@
 	 */
 
 	if (alias) {
-		unsigned long temp = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
 
 		__flush_invalidate_dcache_range((unsigned long) dst, len);
-		if ((vma->vm_flags & VM_EXEC) != 0) {
-			__invalidate_icache_page_alias(temp, phys);
-		}
+		if ((vma->vm_flags & VM_EXEC) != 0)
+			__invalidate_icache_page_alias(t, phys);
 
 	} else if ((vma->vm_flags & VM_EXEC) != 0) {
 		__flush_dcache_range((unsigned long)dst,len);
@@ -245,8 +244,8 @@
 	 */
 
 	if (alias) {
-		unsigned long temp = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
-		__flush_invalidate_dcache_page_alias(temp, phys);
+		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+		__flush_invalidate_dcache_page_alias(t, phys);
 	}
 
 	memcpy(dst, src, len);

diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 245b08f..4b7bc8d 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c

@@ -254,4 +254,3 @@
 	die("Oops", regs, sig);
 	do_exit(sig);
 }
-

diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index db955179..7a5156f 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c

@@ -75,15 +75,15 @@
 			sysmem.nr_banks++;
 		}
 		sysmem.bank[i].end = start;
+
+	} else if (end < sysmem.bank[i].end) {
+		sysmem.bank[i].start = end;
+
 	} else {
-		if (end < sysmem.bank[i].end)
-			sysmem.bank[i].start = end;
-		else {
-			/* remove entry */
-			sysmem.nr_banks--;
-			sysmem.bank[i].start = sysmem.bank[sysmem.nr_banks].start;
-			sysmem.bank[i].end   = sysmem.bank[sysmem.nr_banks].end;
-		}
+		/* remove entry */
+		sysmem.nr_banks--;
+		sysmem.bank[i].start = sysmem.bank[sysmem.nr_banks].start;
+		sysmem.bank[i].end   = sysmem.bank[sysmem.nr_banks].end;
 	}
 	return -1;
 }

diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S
index b048406..d97ed1b 100644
--- a/arch/xtensa/mm/misc.S
+++ b/arch/xtensa/mm/misc.S

@@ -29,6 +29,7 @@
  */
 
 ENTRY(clear_page)
+
 	entry	a1, 16
 
 	movi	a3, 0
@@ -45,6 +46,8 @@
 
 	retw
 
+ENDPROC(clear_page)
+
 /*
  * copy_page and copy_user_page are the same for non-cache-aliased configs.
  *
@@ -53,6 +56,7 @@
  */
 
 ENTRY(copy_page)
+
 	entry	a1, 16
 
 	__loopi a2, a4, PAGE_SIZE, 32
@@ -84,6 +88,8 @@
 
 	retw
 
+ENDPROC(copy_page)
+
 #ifdef CONFIG_MMU
 /*
  * If we have to deal with cache aliasing, we use temporary memory mappings
@@ -109,6 +115,7 @@
  */
 
 ENTRY(clear_user_page)
+
 	entry	a1, 32
 
 	/* Mark page dirty and determine alias. */
@@ -164,6 +171,8 @@
 
 	retw
 
+ENDPROC(clear_user_page)
+
 /*
  * copy_page_user (void *to, void *from, unsigned long vaddr, struct page *page)
  *                    a2          a3	        a4		    a5
@@ -171,7 +180,7 @@
 
 ENTRY(copy_user_page)
 
-	entry	a1, 32 
+	entry	a1, 32
 
 	/* Mark page dirty and determine alias for destination. */
 
@@ -262,6 +271,8 @@
 
 	retw
 
+ENDPROC(copy_user_page)
+
 #endif
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
@@ -272,6 +283,7 @@
  */
 
 ENTRY(__flush_invalidate_dcache_page_alias)
+
 	entry	sp, 16
 
 	movi	a7, 0			# required for exception handler
@@ -287,6 +299,7 @@
 
 	retw
 
+ENDPROC(__flush_invalidate_dcache_page_alias)
 #endif
 
 ENTRY(__tlbtemp_mapping_itlb)
@@ -294,6 +307,7 @@
 #if (ICACHE_WAY_SIZE > PAGE_SIZE)
 	
 ENTRY(__invalidate_icache_page_alias)
+
 	entry	sp, 16
 
 	addi	a6, a3, (PAGE_KERNEL_EXEC | _PAGE_HW_WRITE)
@@ -307,11 +321,14 @@
 	isync
 	retw
 
+ENDPROC(__invalidate_icache_page_alias)
+
 #endif
 
 /* End of special treatment in tlb miss exception */
 
 ENTRY(__tlbtemp_mapping_end)
+
 #endif /* CONFIG_MMU
 
 /*
@@ -319,6 +336,7 @@
  */
 
 ENTRY(__invalidate_icache_page)
+
 	entry	sp, 16
 
 	___invalidate_icache_page a2 a3
@@ -326,11 +344,14 @@
 
 	retw
 
+ENDPROC(__invalidate_icache_page)
+
 /*
  * void __invalidate_dcache_page(ulong start)
  */
 
 ENTRY(__invalidate_dcache_page)
+
 	entry	sp, 16
 
 	___invalidate_dcache_page a2 a3
@@ -338,11 +359,14 @@
 
 	retw
 
+ENDPROC(__invalidate_dcache_page)
+
 /*
  * void __flush_invalidate_dcache_page(ulong start)
  */
 
 ENTRY(__flush_invalidate_dcache_page)
+
 	entry	sp, 16
 
 	___flush_invalidate_dcache_page a2 a3
@@ -350,11 +374,14 @@
 	dsync
 	retw
 
+ENDPROC(__flush_invalidate_dcache_page)
+
 /*
  * void __flush_dcache_page(ulong start)
  */
 
 ENTRY(__flush_dcache_page)
+
 	entry	sp, 16
 
 	___flush_dcache_page a2 a3
@@ -362,11 +389,14 @@
 	dsync
 	retw
 
+ENDPROC(__flush_dcache_page)
+
 /*
  * void __invalidate_icache_range(ulong start, ulong size)
  */
 
 ENTRY(__invalidate_icache_range)
+
 	entry	sp, 16
 
 	___invalidate_icache_range a2 a3 a4
@@ -374,11 +404,14 @@
 
 	retw
 
+ENDPROC(__invalidate_icache_range)
+
 /*
  * void __flush_invalidate_dcache_range(ulong start, ulong size)
  */
 
 ENTRY(__flush_invalidate_dcache_range)
+
 	entry	sp, 16
 
 	___flush_invalidate_dcache_range a2 a3 a4
@@ -386,11 +419,14 @@
 
 	retw
 
+ENDPROC(__flush_invalidate_dcache_range)
+
 /*
  * void _flush_dcache_range(ulong start, ulong size)
  */
 
 ENTRY(__flush_dcache_range)
+
 	entry	sp, 16
 
 	___flush_dcache_range a2 a3 a4
@@ -398,22 +434,28 @@
 
 	retw
 
+ENDPROC(__flush_dcache_range)
+
 /*
  * void _invalidate_dcache_range(ulong start, ulong size)
  */
 
 ENTRY(__invalidate_dcache_range)
+
 	entry	sp, 16
 
 	___invalidate_dcache_range a2 a3 a4
 
 	retw
 
+ENDPROC(__invalidate_dcache_range)
+
 /*
  * void _invalidate_icache_all(void)
  */
 
 ENTRY(__invalidate_icache_all)
+
 	entry	sp, 16
 
 	___invalidate_icache_all a2 a3
@@ -421,11 +463,14 @@
 
 	retw
 
+ENDPROC(__invalidate_icache_all)
+
 /*
  * void _flush_invalidate_dcache_all(void)
  */
 
 ENTRY(__flush_invalidate_dcache_all)
+
 	entry	sp, 16
 
 	___flush_invalidate_dcache_all a2 a3
@@ -433,11 +478,14 @@
 
 	retw
 
+ENDPROC(__flush_invalidate_dcache_all)
+
 /*
  * void _invalidate_dcache_all(void)
  */
 
 ENTRY(__invalidate_dcache_all)
+
 	entry	sp, 16
 
 	___invalidate_dcache_all a2 a3
@@ -445,3 +493,4 @@
 
 	retw
 
+ENDPROC(__invalidate_dcache_all)

diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index ca81654..0f77f9d 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c

@@ -37,7 +37,7 @@
 
 	/* Set rasid register to a known value. */
 
-	set_rasid_register(ASID_USER_FIRST);
+	set_rasid_register(ASID_INSERT(ASID_USER_FIRST));
 
 	/* Set PTEVADDR special register to the start of the page
 	 * table, which is in kernel mappable space (ie. not

diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
index e2700b2..5411aa6 100644
--- a/arch/xtensa/mm/tlb.c
+++ b/arch/xtensa/mm/tlb.c

@@ -63,7 +63,7 @@
 void flush_tlb_mm(struct mm_struct *mm)
 {
 	if (mm == current->active_mm) {
-		int flags;
+		unsigned long flags;
 		local_save_flags(flags);
 		__get_new_mmu_context(mm);
 		__load_mmu_context(mm);
@@ -82,7 +82,7 @@
 #endif
 
 void flush_tlb_range (struct vm_area_struct *vma,
-    		      unsigned long start, unsigned long end)
+		      unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long flags;
@@ -100,7 +100,7 @@
 		int oldpid = get_rasid_register();
 		set_rasid_register (ASID_INSERT(mm->context));
 		start &= PAGE_MASK;
- 		if (vma->vm_flags & VM_EXEC)
+		if (vma->vm_flags & VM_EXEC)
 			while(start < end) {
 				invalidate_itlb_mapping(start);
 				invalidate_dtlb_mapping(start);
@@ -130,7 +130,7 @@
 
 	local_save_flags(flags);
 
-       	oldpid = get_rasid_register();
+	oldpid = get_rasid_register();
 
 	if (vma->vm_flags & VM_EXEC)
 		invalidate_itlb_mapping(page);
@@ -140,4 +140,3 @@
 
 	local_irq_restore(flags);
 }
-

diff --git a/arch/xtensa/platforms/iss/include/platform/serial.h b/arch/xtensa/platforms/iss/include/platform/serial.h
index e69de29..16aec54 100644
--- a/arch/xtensa/platforms/iss/include/platform/serial.h
+++ b/arch/xtensa/platforms/iss/include/platform/serial.h

@@ -0,0 +1,15 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Tensilica Inc.
+ */
+
+#ifndef __ASM_XTENSA_ISS_SERIAL_H
+#define __ASM_XTENSA_ISS_SERIAL_H
+
+/* Have no meaning on ISS, but needed for 8250_early.c */
+#define BASE_BAUD 0
+
+#endif /* __ASM_XTENSA_ISS_SERIAL_H */

diff --git a/arch/xtensa/platforms/iss/include/platform/simcall.h b/arch/xtensa/platforms/iss/include/platform/simcall.h
index bd78192..b5a4edf 100644
--- a/arch/xtensa/platforms/iss/include/platform/simcall.h
+++ b/arch/xtensa/platforms/iss/include/platform/simcall.h

@@ -74,13 +74,12 @@
 			"mov %1, a3\n"
 			: "=a" (ret), "=a" (errno), "+r"(a1), "+r"(b1)
 			: "r"(c1), "r"(d1), "r"(e1), "r"(f1)
-			: );
+			: "memory");
 	return ret;
 }
 
 static inline int simc_open(const char *file, int flags, int mode)
 {
-	wmb();
 	return __simc(SYS_open, (int) file, flags, mode, 0, 0);
 }
 
@@ -91,19 +90,16 @@
 
 static inline int simc_ioctl(int fd, int request, void *arg)
 {
-	wmb();
 	return __simc(SYS_ioctl, fd, request, (int) arg, 0, 0);
 }
 
 static inline int simc_read(int fd, void *buf, size_t count)
 {
-	rmb();
 	return __simc(SYS_read, fd, (int) buf, count, 0, 0);
 }
 
 static inline int simc_write(int fd, const void *buf, size_t count)
 {
-	wmb();
 	return __simc(SYS_write, fd, (int) buf, count, 0, 0);
 }
 
@@ -111,7 +107,6 @@
 {
 	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 
-	wmb();
 	return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&tv,
 			0, 0);
 }

diff --git a/arch/xtensa/platforms/xtfpga/Makefile b/arch/xtensa/platforms/xtfpga/Makefile
new file mode 100644
index 0000000..b9ae206
--- /dev/null
+++ b/arch/xtensa/platforms/xtfpga/Makefile

@@ -0,0 +1,9 @@
+# Makefile for the Tensilica xtavnet Emulation Board
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are in the main makefile...
+
+obj-y			= setup.o lcd.o

diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
new file mode 100644
index 0000000..4416773
--- /dev/null
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h

@@ -0,0 +1,69 @@
+/*
+ * arch/xtensa/platform/xtavnet/include/platform/hardware.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Tensilica Inc.
+ */
+
+/*
+ * This file contains the hardware configuration of the XTAVNET boards.
+ */
+
+#ifndef __XTENSA_XTAVNET_HARDWARE_H
+#define __XTENSA_XTAVNET_HARDWARE_H
+
+/* By default NO_IRQ is defined to 0 in Linux, but we use the
+   interrupt 0 for UART... */
+#define NO_IRQ                 -1
+
+/* Memory configuration. */
+
+#define PLATFORM_DEFAULT_MEM_START 0x00000000
+#define PLATFORM_DEFAULT_MEM_SIZE  0x04000000
+
+/* Interrupt configuration. */
+
+#define PLATFORM_NR_IRQS	10
+
+/* Default assignment of LX60 devices to external interrupts. */
+
+#ifdef CONFIG_ARCH_HAS_SMP
+#define DUART16552_INTNUM	XCHAL_EXTINT3_NUM
+#define OETH_IRQ		XCHAL_EXTINT4_NUM
+#else
+#define DUART16552_INTNUM	XCHAL_EXTINT0_NUM
+#define OETH_IRQ		XCHAL_EXTINT1_NUM
+#endif
+
+/*
+ *  Device addresses and parameters.
+ */
+
+/* UART */
+#define DUART16552_PADDR	(XCHAL_KIO_PADDR + 0x0D050020)
+/* LCD instruction and data addresses. */
+#define LCD_INSTR_ADDR		((char *)IOADDR(0x0D040000))
+#define LCD_DATA_ADDR		((char *)IOADDR(0x0D040004))
+
+/* Misc. */
+#define XTFPGA_FPGAREGS_VADDR	IOADDR(0x0D020000)
+/* Clock frequency in Hz (read-only):  */
+#define XTFPGA_CLKFRQ_VADDR	(XTFPGA_FPGAREGS_VADDR + 0x04)
+/* Setting of 8 DIP switches:  */
+#define DIP_SWITCHES_VADDR	(XTFPGA_FPGAREGS_VADDR + 0x0C)
+/* Software reset (write 0xdead):  */
+#define XTFPGA_SWRST_VADDR	(XTFPGA_FPGAREGS_VADDR + 0x10)
+
+/*  OpenCores Ethernet controller:  */
+				/* regs + RX/TX descriptors */
+#define OETH_REGS_PADDR		(XCHAL_KIO_PADDR + 0x0D030000)
+#define OETH_REGS_SIZE		0x1000
+#define OETH_SRAMBUFF_PADDR	(XCHAL_KIO_PADDR + 0x0D800000)
+
+				/* 5*rx buffs + 5*tx buffs */
+#define OETH_SRAMBUFF_SIZE	(5 * 0x600 + 5 * 0x600)
+
+#endif /* __XTENSA_XTAVNET_HARDWARE_H */

diff --git a/arch/xtensa/platforms/xtfpga/include/platform/lcd.h b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h
new file mode 100644
index 0000000..0e43564
--- /dev/null
+++ b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h

@@ -0,0 +1,20 @@
+/*
+ * arch/xtensa/platform/xtavnet/include/platform/lcd.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001, 2006 Tensilica Inc.
+ */
+
+#ifndef __XTENSA_XTAVNET_LCD_H
+#define __XTENSA_XTAVNET_LCD_H
+
+/* Display string STR at position POS on the LCD. */
+void lcd_disp_at_pos(char *str, unsigned char pos);
+
+/* Shift the contents of the LCD display left or right. */
+void lcd_shiftleft(void);
+void lcd_shiftright(void);
+#endif

diff --git a/arch/xtensa/platforms/xtfpga/include/platform/serial.h b/arch/xtensa/platforms/xtfpga/include/platform/serial.h
new file mode 100644
index 0000000..14d8f7b
--- /dev/null
+++ b/arch/xtensa/platforms/xtfpga/include/platform/serial.h

@@ -0,0 +1,18 @@
+/*
+ * arch/xtensa/platform/xtavnet/include/platform/serial.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001, 2006 Tensilica Inc.
+ */
+
+#ifndef __ASM_XTENSA_XTAVNET_SERIAL_H
+#define __ASM_XTENSA_XTAVNET_SERIAL_H
+
+#include <platform/hardware.h>
+
+#define BASE_BAUD (*(long *)XTFPGA_CLKFRQ_VADDR / 16)
+
+#endif /* __ASM_XTENSA_XTAVNET_SERIAL_H */

diff --git a/arch/xtensa/platforms/xtfpga/lcd.c b/arch/xtensa/platforms/xtfpga/lcd.c
new file mode 100644
index 0000000..2872301
--- /dev/null
+++ b/arch/xtensa/platforms/xtfpga/lcd.c

@@ -0,0 +1,76 @@
+/*
+ * Driver for the LCD display on the Tensilica LX60 Board.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001, 2006 Tensilica Inc.
+ */
+
+/*
+ *
+ * FIXME: this code is from the examples from the LX60 user guide.
+ *
+ * The lcd_pause function does busy waiting, which is probably not
+ * great. Maybe the code could be changed to use kernel timers, or
+ * change the hardware to not need to wait.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include <platform/hardware.h>
+#include <platform/lcd.h>
+#include <linux/delay.h>
+
+#define LCD_PAUSE_ITERATIONS	4000
+#define LCD_CLEAR		0x1
+#define LCD_DISPLAY_ON		0xc
+
+/* 8bit and 2 lines display */
+#define LCD_DISPLAY_MODE8BIT	0x38
+#define LCD_DISPLAY_POS		0x80
+#define LCD_SHIFT_LEFT		0x18
+#define LCD_SHIFT_RIGHT		0x1c
+
+static int __init lcd_init(void)
+{
+	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	mdelay(5);
+	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	udelay(200);
+	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	udelay(50);
+	*LCD_INSTR_ADDR = LCD_DISPLAY_ON;
+	udelay(50);
+	*LCD_INSTR_ADDR = LCD_CLEAR;
+	mdelay(10);
+	lcd_disp_at_pos("XTENSA LINUX", 0);
+	return 0;
+}
+
+void lcd_disp_at_pos(char *str, unsigned char pos)
+{
+	*LCD_INSTR_ADDR = LCD_DISPLAY_POS | pos;
+	udelay(100);
+	while (*str != 0) {
+		*LCD_DATA_ADDR = *str;
+		udelay(200);
+		str++;
+	}
+}
+
+void lcd_shiftleft(void)
+{
+	*LCD_INSTR_ADDR = LCD_SHIFT_LEFT;
+	udelay(50);
+}
+
+void lcd_shiftright(void)
+{
+	*LCD_INSTR_ADDR = LCD_SHIFT_RIGHT;
+	udelay(50);
+}
+
+arch_initcall(lcd_init);

diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
new file mode 100644
index 0000000..4b9951a
--- /dev/null
+++ b/arch/xtensa/platforms/xtfpga/setup.c

@@ -0,0 +1,301 @@
+/*
+ *
+ * arch/xtensa/platform/xtavnet/setup.c
+ *
+ * ...
+ *
+ * Authors:	Chris Zankel <chris@zankel.net>
+ *		Joe Taylor <joe@tensilica.com>
+ *
+ * Copyright 2001 - 2006 Tensilica Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/kdev_t.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+
+#include <asm/timex.h>
+#include <asm/processor.h>
+#include <asm/platform.h>
+#include <asm/bootparam.h>
+#include <platform/lcd.h>
+#include <platform/hardware.h>
+
+void platform_halt(void)
+{
+	lcd_disp_at_pos(" HALT ", 0);
+	local_irq_disable();
+	while (1)
+		cpu_relax();
+}
+
+void platform_power_off(void)
+{
+	lcd_disp_at_pos("POWEROFF", 0);
+	local_irq_disable();
+	while (1)
+		cpu_relax();
+}
+
+void platform_restart(void)
+{
+	/* Flush and reset the mmu, simulate a processor reset, and
+	 * jump to the reset vector. */
+
+
+	__asm__ __volatile__ ("movi	a2, 15\n\t"
+			      "wsr	a2, icountlevel\n\t"
+			      "movi	a2, 0\n\t"
+			      "wsr	a2, icount\n\t"
+			      "wsr	a2, ibreakenable\n\t"
+			      "wsr	a2, lcount\n\t"
+			      "movi	a2, 0x1f\n\t"
+			      "wsr	a2, ps\n\t"
+			      "isync\n\t"
+			      "jx	%0\n\t"
+			      :
+			      : "a" (XCHAL_RESET_VECTOR_VADDR)
+			      : "a2"
+			      );
+
+	/* control never gets here */
+}
+
+void __init platform_setup(char **cmdline)
+{
+}
+
+#ifdef CONFIG_OF
+
+static void __init update_clock_frequency(struct device_node *node)
+{
+	struct property *newfreq;
+	u32 freq;
+
+	if (!of_property_read_u32(node, "clock-frequency", &freq) && freq != 0)
+		return;
+
+	newfreq = kzalloc(sizeof(*newfreq) + sizeof(u32), GFP_KERNEL);
+	if (!newfreq)
+		return;
+	newfreq->value = newfreq + 1;
+	newfreq->length = sizeof(freq);
+	newfreq->name = kstrdup("clock-frequency", GFP_KERNEL);
+	if (!newfreq->name) {
+		kfree(newfreq);
+		return;
+	}
+
+	*(u32 *)newfreq->value = cpu_to_be32(*(u32 *)XTFPGA_CLKFRQ_VADDR);
+	prom_update_property(node, newfreq);
+}
+
+#define MAC_LEN 6
+static void __init update_local_mac(struct device_node *node)
+{
+	struct property *newmac;
+	const u8* macaddr;
+	int prop_len;
+
+	macaddr = of_get_property(node, "local-mac-address", &prop_len);
+	if (macaddr == NULL || prop_len != MAC_LEN)
+		return;
+
+	newmac = kzalloc(sizeof(*newmac) + MAC_LEN, GFP_KERNEL);
+	if (newmac == NULL)
+		return;
+
+	newmac->value = newmac + 1;
+	newmac->length = MAC_LEN;
+	newmac->name = kstrdup("local-mac-address", GFP_KERNEL);
+	if (newmac->name == NULL) {
+		kfree(newmac);
+		return;
+	}
+
+	memcpy(newmac->value, macaddr, MAC_LEN);
+	((u8*)newmac->value)[5] = (*(u32*)DIP_SWITCHES_VADDR) & 0x3f;
+	prom_update_property(node, newmac);
+}
+
+static int __init machine_setup(void)
+{
+	struct device_node *serial;
+	struct device_node *eth = NULL;
+
+	for_each_compatible_node(serial, NULL, "ns16550a")
+		update_clock_frequency(serial);
+
+	if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc")))
+		update_local_mac(eth);
+	return 0;
+}
+arch_initcall(machine_setup);
+
+#endif
+
+/* early initialization */
+
+void __init platform_init(bp_tag_t *first)
+{
+}
+
+/* Heartbeat. */
+
+void platform_heartbeat(void)
+{
+}
+
+#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
+
+void platform_calibrate_ccount(void)
+{
+	long clk_freq = 0;
+#ifdef CONFIG_OF
+	struct device_node *cpu =
+		of_find_compatible_node(NULL, NULL, "xtensa,cpu");
+	if (cpu) {
+		u32 freq;
+		update_clock_frequency(cpu);
+		if (!of_property_read_u32(cpu, "clock-frequency", &freq))
+			clk_freq = freq;
+	}
+#endif
+	if (!clk_freq)
+		clk_freq = *(long *)XTFPGA_CLKFRQ_VADDR;
+
+	ccount_per_jiffy = clk_freq / HZ;
+	nsec_per_ccount = 1000000000UL / clk_freq;
+}
+
+#endif
+
+#ifndef CONFIG_OF
+
+#include <linux/serial_8250.h>
+#include <linux/if.h>
+#include <net/ethoc.h>
+
+/*----------------------------------------------------------------------------
+ *  Ethernet -- OpenCores Ethernet MAC (ethoc driver)
+ */
+
+static struct resource ethoc_res[] __initdata = {
+	[0] = { /* register space */
+		.start = OETH_REGS_PADDR,
+		.end   = OETH_REGS_PADDR + OETH_REGS_SIZE - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = { /* buffer space */
+		.start = OETH_SRAMBUFF_PADDR,
+		.end   = OETH_SRAMBUFF_PADDR + OETH_SRAMBUFF_SIZE - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[2] = { /* IRQ number */
+		.start = OETH_IRQ,
+		.end   = OETH_IRQ,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct ethoc_platform_data ethoc_pdata __initdata = {
+	/*
+	 * The MAC address for these boards is 00:50:c2:13:6f:xx.
+	 * The last byte (here as zero) is read from the DIP switches on the
+	 * board.
+	 */
+	.hwaddr = { 0x00, 0x50, 0xc2, 0x13, 0x6f, 0 },
+	.phy_id = -1,
+};
+
+static struct platform_device ethoc_device __initdata = {
+	.name = "ethoc",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(ethoc_res),
+	.resource = ethoc_res,
+	.dev = {
+		.platform_data = &ethoc_pdata,
+	},
+};
+
+/*----------------------------------------------------------------------------
+ *  UART
+ */
+
+static struct resource serial_resource __initdata = {
+	.start	= DUART16552_PADDR,
+	.end	= DUART16552_PADDR + 0x1f,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct plat_serial8250_port serial_platform_data[] __initdata = {
+	[0] = {
+		.mapbase	= DUART16552_PADDR,
+		.irq		= DUART16552_INTNUM,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM32,
+		.regshift	= 2,
+		.uartclk	= 0,    /* set in xtavnet_init() */
+	},
+	{ },
+};
+
+static struct platform_device xtavnet_uart __initdata = {
+	.name		= "serial8250",
+	.id		= PLAT8250_DEV_PLATFORM,
+	.dev		= {
+		.platform_data	= serial_platform_data,
+	},
+	.num_resources	= 1,
+	.resource	= &serial_resource,
+};
+
+/* platform devices */
+static struct platform_device *platform_devices[] __initdata = {
+	&ethoc_device,
+	&xtavnet_uart,
+};
+
+
+static int __init xtavnet_init(void)
+{
+	/* Ethernet MAC address.  */
+	ethoc_pdata.hwaddr[5] = *(u32 *)DIP_SWITCHES_VADDR;
+
+	/* Clock rate varies among FPGA bitstreams; board specific FPGA register
+	 * reports the actual clock rate.
+	 */
+	serial_platform_data[0].uartclk = *(long *)XTFPGA_CLKFRQ_VADDR;
+
+
+	/* register platform devices */
+	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
+
+	/* ETHOC driver is a bit quiet; at least display Ethernet MAC, so user
+	 * knows whether they set it correctly on the DIP switches.
+	 */
+	pr_info("XTFPGA: Ethernet MAC %pM\n", ethoc_pdata.hwaddr);
+
+	return 0;
+}
+
+/*
+ * Register to be done during do_initcalls().
+ */
+arch_initcall(xtavnet_init);
+
+#endif /* CONFIG_OF */

diff --git a/arch/xtensa/variants/s6000/gpio.c b/arch/xtensa/variants/s6000/gpio.c
index b89541b..da9e85c 100644
--- a/arch/xtensa/variants/s6000/gpio.c
+++ b/arch/xtensa/variants/s6000/gpio.c

@@ -164,7 +164,7 @@
 	int cirq;
 
 	chip->irq_mask(&desc->irq_data);
-	chip->irq_ack(&desc->irq_data));
+	chip->irq_ack(&desc->irq_data);
 	pending = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_MIS) & *mask;
 	cirq = IRQ_BASE - 1;
 	while (pending) {
@@ -173,7 +173,7 @@
 		pending >>= n;
 		generic_handle_irq(cirq);
 	}
-	chip->irq_unmask(&desc->irq_data));
+	chip->irq_unmask(&desc->irq_data);
 }
 
 extern const signed char *platform_irq_mappings[XTENSA_NR_IRQS];

diff --git a/block/Kconfig b/block/Kconfig
index a7e40a7..4a85ccf8 100644
--- a/block/Kconfig
+++ b/block/Kconfig

@@ -4,6 +4,7 @@
 menuconfig BLOCK
        bool "Enable the block layer" if EXPERT
        default y
+       select PERCPU_RWSEM
        help
 	 Provide block layer support for the kernel.
 

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 3f6d39d..b8858fb 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c

@@ -231,7 +231,7 @@
 	 * we shouldn't allow anything to go through for a bypassing queue.
 	 */
 	if (unlikely(blk_queue_bypass(q)))
-		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+		return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
 	return __blkg_lookup_create(blkcg, q, NULL);
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);

diff --git a/block/blk-core.c b/block/blk-core.c
index 3c95c4d..c973249 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -40,6 +40,7 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
 
@@ -219,12 +220,13 @@
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
- *   restarted around the specified time.
+ *   restarted around the specified time. Queue lock must be held.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
-	queue_delayed_work(kblockd_workqueue, &q->delay_work,
-				msecs_to_jiffies(msecs));
+	if (likely(!blk_queue_dead(q)))
+		queue_delayed_work(kblockd_workqueue, &q->delay_work,
+				   msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_delay_queue);
 
@@ -293,6 +295,34 @@
 EXPORT_SYMBOL(blk_sync_queue);
 
 /**
+ * __blk_run_queue_uncond - run a queue whether or not it has been stopped
+ * @q:	The queue to run
+ *
+ * Description:
+ *    Invoke request handling on a queue if there are any pending requests.
+ *    May be used to restart request handling after a request has completed.
+ *    This variant runs the queue whether or not the queue has been
+ *    stopped. Must be called with the queue lock held and interrupts
+ *    disabled. See also @blk_run_queue.
+ */
+inline void __blk_run_queue_uncond(struct request_queue *q)
+{
+	if (unlikely(blk_queue_dead(q)))
+		return;
+
+	/*
+	 * Some request_fn implementations, e.g. scsi_request_fn(), unlock
+	 * the queue lock internally. As a result multiple threads may be
+	 * running such a request function concurrently. Keep track of the
+	 * number of active request_fn invocations such that blk_drain_queue()
+	 * can wait until all these request_fn calls have finished.
+	 */
+	q->request_fn_active++;
+	q->request_fn(q);
+	q->request_fn_active--;
+}
+
+/**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
  *
@@ -305,7 +335,7 @@
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	q->request_fn(q);
+	__blk_run_queue_uncond(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -315,11 +345,11 @@
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- *    of us.
+ *    of us. The caller must hold the queue lock.
  */
 void blk_run_queue_async(struct request_queue *q)
 {
-	if (likely(!blk_queue_stopped(q)))
+	if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
 		mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
 EXPORT_SYMBOL(blk_run_queue_async);
@@ -349,7 +379,7 @@
 EXPORT_SYMBOL(blk_put_queue);
 
 /**
- * blk_drain_queue - drain requests from request_queue
+ * __blk_drain_queue - drain requests from request_queue
  * @q: queue to drain
  * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
  *
@@ -357,15 +387,17 @@
  * If not, only ELVPRIV requests are drained.  The caller is responsible
  * for ensuring that no new requests which need to be drained are queued.
  */
-void blk_drain_queue(struct request_queue *q, bool drain_all)
+static void __blk_drain_queue(struct request_queue *q, bool drain_all)
+	__releases(q->queue_lock)
+	__acquires(q->queue_lock)
 {
 	int i;
 
+	lockdep_assert_held(q->queue_lock);
+
 	while (true) {
 		bool drain = false;
 
-		spin_lock_irq(q->queue_lock);
-
 		/*
 		 * The caller might be trying to drain @q before its
 		 * elevator is initialized.
@@ -386,6 +418,7 @@
 			__blk_run_queue(q);
 
 		drain |= q->nr_rqs_elvpriv;
+		drain |= q->request_fn_active;
 
 		/*
 		 * Unfortunately, requests are queued at and tracked from
@@ -401,11 +434,14 @@
 			}
 		}
 
-		spin_unlock_irq(q->queue_lock);
-
 		if (!drain)
 			break;
+
+		spin_unlock_irq(q->queue_lock);
+
 		msleep(10);
+
+		spin_lock_irq(q->queue_lock);
 	}
 
 	/*
@@ -416,13 +452,9 @@
 	if (q->request_fn) {
 		struct request_list *rl;
 
-		spin_lock_irq(q->queue_lock);
-
 		blk_queue_for_each_rl(rl, q)
 			for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
 				wake_up_all(&rl->wait[i]);
-
-		spin_unlock_irq(q->queue_lock);
 	}
 }
 
@@ -446,7 +478,10 @@
 	spin_unlock_irq(q->queue_lock);
 
 	if (drain) {
-		blk_drain_queue(q, false);
+		spin_lock_irq(q->queue_lock);
+		__blk_drain_queue(q, false);
+		spin_unlock_irq(q->queue_lock);
+
 		/* ensure blk_queue_bypass() is %true inside RCU read lock */
 		synchronize_rcu();
 	}
@@ -473,20 +508,20 @@
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DEAD, drain all pending requests, destroy and put it.  All
- * future requests will be failed immediately with -ENODEV.
+ * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
+ * put it.  All future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
 	spinlock_t *lock = q->queue_lock;
 
-	/* mark @q DEAD, no new request or merges will be allowed afterwards */
+	/* mark @q DYING, no new request or merges will be allowed afterwards */
 	mutex_lock(&q->sysfs_lock);
-	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
 	spin_lock_irq(lock);
 
 	/*
-	 * Dead queue is permanently in bypass mode till released.  Note
+	 * A dying queue is permanently in bypass mode till released.  Note
 	 * that, unlike blk_queue_bypass_start(), we aren't performing
 	 * synchronize_rcu() after entering bypass mode to avoid the delay
 	 * as some drivers create and destroy a lot of queues while
@@ -499,12 +534,18 @@
 
 	queue_flag_set(QUEUE_FLAG_NOMERGES, q);
 	queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-	queue_flag_set(QUEUE_FLAG_DEAD, q);
+	queue_flag_set(QUEUE_FLAG_DYING, q);
 	spin_unlock_irq(lock);
 	mutex_unlock(&q->sysfs_lock);
 
-	/* drain all requests queued before DEAD marking */
-	blk_drain_queue(q, true);
+	/*
+	 * Drain all requests queued before DYING marking. Set DEAD flag to
+	 * prevent that q->request_fn() gets invoked after draining finished.
+	 */
+	spin_lock_irq(lock);
+	__blk_drain_queue(q, true);
+	queue_flag_set(QUEUE_FLAG_DEAD, q);
+	spin_unlock_irq(lock);
 
 	/* @q won't process any more request, flush async actions */
 	del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
@@ -549,7 +590,7 @@
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
-	return blk_alloc_queue_node(gfp_mask, -1);
+	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
@@ -660,7 +701,7 @@
 
 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
-	return blk_init_queue_node(rfn, lock, -1);
+	return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(blk_init_queue);
 
@@ -716,7 +757,7 @@
 
 bool blk_get_queue(struct request_queue *q)
 {
-	if (likely(!blk_queue_dead(q))) {
+	if (likely(!blk_queue_dying(q))) {
 		__blk_get_queue(q);
 		return true;
 	}
@@ -870,7 +911,7 @@
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue;
 
-	if (unlikely(blk_queue_dead(q)))
+	if (unlikely(blk_queue_dying(q)))
 		return NULL;
 
 	may_queue = elv_may_queue(q, rw_flags);
@@ -1050,7 +1091,7 @@
 	if (rq)
 		return rq;
 
-	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
+	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return NULL;
 	}
@@ -1910,7 +1951,7 @@
 		return -EIO;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		return -ENODEV;
 	}
@@ -2884,27 +2925,11 @@
 {
 	trace_block_unplug(q, depth, !from_schedule);
 
-	/*
-	 * Don't mess with dead queue.
-	 */
-	if (unlikely(blk_queue_dead(q))) {
-		spin_unlock(q->queue_lock);
-		return;
-	}
-
-	/*
-	 * If we are punting this to kblockd, then we can safely drop
-	 * the queue_lock before waking kblockd (which needs to take
-	 * this lock).
-	 */
-	if (from_schedule) {
-		spin_unlock(q->queue_lock);
+	if (from_schedule)
 		blk_run_queue_async(q);
-	} else {
+	else
 		__blk_run_queue(q);
-		spin_unlock(q->queue_lock);
-	}
-
+	spin_unlock(q->queue_lock);
 }
 
 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@@ -2996,7 +3021,7 @@
 		/*
 		 * Short-circuit if @q is dead
 		 */
-		if (unlikely(blk_queue_dead(q))) {
+		if (unlikely(blk_queue_dying(q))) {
 			__blk_end_request_all(rq, -ENODEV);
 			continue;
 		}

diff --git a/block/blk-exec.c b/block/blk-exec.c
index f71eac3..74638ec 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c

@@ -66,7 +66,7 @@
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		rq->errors = -ENXIO;
 		if (rq->end_io)
 			rq->end_io(rq, rq->errors);
@@ -78,7 +78,7 @@
 	__blk_run_queue(q);
 	/* the queue is stopped so it won't be run */
 	if (is_pm_resume)
-		q->request_fn(q);
+		__blk_run_queue_uncond(q);
 	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9373b58..b3a1f2b7 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c

@@ -43,11 +43,12 @@
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = REQ_WRITE | REQ_DISCARD;
-	unsigned int max_discard_sectors;
-	unsigned int granularity, alignment, mask;
+	sector_t max_discard_sectors;
+	sector_t granularity, alignment;
 	struct bio_batch bb;
 	struct bio *bio;
 	int ret = 0;
+	struct blk_plug plug;
 
 	if (!q)
 		return -ENXIO;
@@ -57,15 +58,16 @@
 
 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 	granularity = max(q->limits.discard_granularity >> 9, 1U);
-	mask = granularity - 1;
-	alignment = (bdev_discard_alignment(bdev) >> 9) & mask;
+	alignment = bdev_discard_alignment(bdev) >> 9;
+	alignment = sector_div(alignment, granularity);
 
 	/*
 	 * Ensure that max_discard_sectors is of the proper
 	 * granularity, so that requests stay aligned after a split.
 	 */
 	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-	max_discard_sectors = round_down(max_discard_sectors, granularity);
+	sector_div(max_discard_sectors, granularity);
+	max_discard_sectors *= granularity;
 	if (unlikely(!max_discard_sectors)) {
 		/* Avoid infinite loop below. Being cautious never hurts. */
 		return -EOPNOTSUPP;
@@ -81,9 +83,10 @@
 	bb.flags = 1 << BIO_UPTODATE;
 	bb.wait = &wait;
 
+	blk_start_plug(&plug);
 	while (nr_sects) {
 		unsigned int req_sects;
-		sector_t end_sect;
+		sector_t end_sect, tmp;
 
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio) {
@@ -98,10 +101,12 @@
 		 * misaligned, stop the discard at the previous aligned sector.
 		 */
 		end_sect = sector + req_sects;
-		if (req_sects < nr_sects && (end_sect & mask) != alignment) {
-			end_sect =
-				round_down(end_sect - alignment, granularity)
-				+ alignment;
+		tmp = end_sect;
+		if (req_sects < nr_sects &&
+		    sector_div(tmp, granularity) != alignment) {
+			end_sect = end_sect - alignment;
+			sector_div(end_sect, granularity);
+			end_sect = end_sect * granularity + alignment;
 			req_sects = end_sect - sector;
 		}
 
@@ -117,6 +122,7 @@
 		atomic_inc(&bb.done);
 		submit_bio(type, bio);
 	}
+	blk_finish_plug(&plug);
 
 	/* Wait for bios in-flight */
 	if (!atomic_dec_and_test(&bb.done))

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 779bb76..c50ecf0 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -611,7 +611,7 @@
 			bottom = b->discard_granularity + alignment;
 
 			/* Verify that top and bottom intervals line up */
-			if (max(top, bottom) & (min(top, bottom) - 1))
+			if ((max(top, bottom) % min(top, bottom)) != 0)
 				t->discard_misaligned = 1;
 		}
 
@@ -619,8 +619,8 @@
 						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
-		t->discard_alignment = lcm(t->discard_alignment, alignment) &
-			(t->discard_granularity - 1);
+		t->discard_alignment = lcm(t->discard_alignment, alignment) %
+			t->discard_granularity;
 	}
 
 	return ret;

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ce62046..7881477 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c

@@ -466,7 +466,7 @@
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
@@ -488,7 +488,7 @@
 
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a9664fa..3114622 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c

@@ -302,7 +302,7 @@
 		/* if %NULL and @q is alive, fall back to root_tg */
 		if (!IS_ERR(blkg))
 			tg = blkg_to_tg(blkg);
-		else if (!blk_queue_dead(q))
+		else if (!blk_queue_dying(q))
 			tg = td_root_tg(td);
 	}
 

diff --git a/block/blk.h b/block/blk.h
index ca51543..47fdfdd 100644
--- a/block/blk.h
+++ b/block/blk.h

@@ -96,7 +96,7 @@
 			q->flush_queue_delayed = 1;
 			return NULL;
 		}
-		if (unlikely(blk_queue_dead(q)) ||
+		if (unlikely(blk_queue_dying(q)) ||
 		    !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
@@ -145,6 +145,8 @@
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
+void __blk_run_queue_uncond(struct request_queue *q);
+
 int blk_dev_init(void);
 
 

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index deee61f..650f427 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c

@@ -151,19 +151,6 @@
 	return -ENOMEM;
 }
 
-/*
- * bsg_goose_queue - restart queue in case it was stopped
- * @q: request q to be restarted
- */
-void bsg_goose_queue(struct request_queue *q)
-{
-	if (!q)
-		return;
-
-	blk_run_queue_async(q);
-}
-EXPORT_SYMBOL_GPL(bsg_goose_queue);
-
 /**
  * bsg_request_fn - generic handler for bsg requests
  * @q: request queue to manage

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fb52df97..e62e920 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c

@@ -1973,7 +1973,8 @@
 	 * reposition in fifo if next is older than rq
 	 */
 	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-	    time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
+	    time_before(rq_fifo_time(next), rq_fifo_time(rq)) &&
+	    cfqq == RQ_CFQQ(next)) {
 		list_move(&rq->queuelist, &next->queuelist);
 		rq_set_fifo_time(rq, rq_fifo_time(next));
 	}

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 599b12e..90037b5 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c

@@ -230,7 +230,7 @@
 	/*
 	 * rq is expired!
 	 */
-	if (time_after(jiffies, rq_fifo_time(rq)))
+	if (time_after_eq(jiffies, rq_fifo_time(rq)))
 		return 1;
 
 	return 0;

diff --git a/block/elevator.c b/block/elevator.c
index 9b1d42b..9edba1b 100644
--- a/block/elevator.c
+++ b/block/elevator.c

@@ -458,6 +458,7 @@
 				     struct request *rq)
 {
 	struct request *__rq;
+	bool ret;
 
 	if (blk_queue_nomerges(q))
 		return false;
@@ -471,14 +472,21 @@
 	if (blk_queue_noxmerges(q))
 		return false;
 
+	ret = false;
 	/*
 	 * See if our hash lookup can find a potential backmerge.
 	 */
-	__rq = elv_rqhash_find(q, blk_rq_pos(rq));
-	if (__rq && blk_attempt_req_merge(q, __rq, rq))
-		return true;
+	while (1) {
+		__rq = elv_rqhash_find(q, blk_rq_pos(rq));
+		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
+			break;
 
-	return false;
+		/* The merged request could be merged with others, try again */
+		ret = true;
+		rq = __rq;
+	}
+
+	return ret;
 }
 
 void elv_merged_request(struct request_queue *q, struct request *rq, int type)

diff --git a/block/genhd.c b/block/genhd.c
index 6cace66..9a289d7 100644
--- a/block/genhd.c
+++ b/block/genhd.c

@@ -743,7 +743,6 @@
 		struct hd_struct *part;
 		char name_buf[BDEVNAME_SIZE];
 		char devt_buf[BDEVT_SIZE];
-		char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
 
 		/*
 		 * Don't show empty devices or things that have been
@@ -762,16 +761,11 @@
 		while ((part = disk_part_iter_next(&piter))) {
 			bool is_part0 = part == &disk->part0;
 
-			uuid_buf[0] = '\0';
-			if (part->info)
-				snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
-					 part->info->uuid);
-
 			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
 			       bdevt_str(part_devt(part), devt_buf),
 			       (unsigned long long)part_nr_sects_read(part) >> 1
 			       , disk_name(disk, part->partno, name_buf),
-			       uuid_buf);
+			       part->info ? part->info->uuid : "");
 			if (is_part0) {
 				if (disk->driverfs_dev != NULL &&
 				    disk->driverfs_dev->driver != NULL)
@@ -1245,7 +1239,7 @@
 
 struct gendisk *alloc_disk(int minors)
 {
-	return alloc_disk_node(minors, -1);
+	return alloc_disk_node(minors, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(alloc_disk);
 

diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig
index cb5f0a3..75a54e1 100644
--- a/block/partitions/Kconfig
+++ b/block/partitions/Kconfig

@@ -234,8 +234,8 @@
 	  uses a proprietary partition table.
 
 config EFI_PARTITION
-	bool "EFI GUID Partition support"
-	depends on PARTITION_ADVANCED
+	bool "EFI GUID Partition support" if PARTITION_ADVANCED
+	default y
 	select CRC32
 	help
 	  Say Y here if you would like to use hard disks under Linux which

diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index 6296b40..b62fb88 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c

@@ -620,7 +620,6 @@
 	gpt_entry *ptes = NULL;
 	u32 i;
 	unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
-	u8 unparsed_guid[37];
 
 	if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
 		kfree(gpt);
@@ -649,11 +648,7 @@
 			state->parts[i + 1].flags = ADDPART_FLAG_RAID;
 
 		info = &state->parts[i + 1].info;
-		/* Instead of doing a manual swap to big endian, reuse the
-		 * common ASCII hex format as the interim.
-		 */
-		efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
-		part_pack_uuid(unparsed_guid, info->uuid);
+		efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
 
 		/* Naively convert UTF16-LE to 7 bits. */
 		label_max = min(sizeof(info->volname) - 1,

diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 5f79a66..8752a5d 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c

@@ -94,6 +94,17 @@
 	return ret;
 }
 
+static void set_info(struct parsed_partitions *state, int slot,
+		     u32 disksig)
+{
+	struct partition_meta_info *info = &state->parts[slot].info;
+
+	snprintf(info->uuid, sizeof(info->uuid), "%08x-%02x", disksig,
+		 slot);
+	info->volname[0] = 0;
+	state->parts[slot].has_info = true;
+}
+
 /*
  * Create devices for each logical partition in an extended partition.
  * The logical partitions form a linked list, with each entry being
@@ -106,7 +117,8 @@
  */
 
 static void parse_extended(struct parsed_partitions *state,
-			   sector_t first_sector, sector_t first_size)
+			   sector_t first_sector, sector_t first_size,
+			   u32 disksig)
 {
 	struct partition *p;
 	Sector sect;
@@ -166,6 +178,7 @@
 			}
 
 			put_partition(state, state->next, next, size);
+			set_info(state, state->next, disksig);
 			if (SYS_IND(p) == LINUX_RAID_PARTITION)
 				state->parts[state->next].flags = ADDPART_FLAG_RAID;
 			loopct = 0;
@@ -437,6 +450,7 @@
 	struct partition *p;
 	struct fat_boot_sector *fb;
 	int slot;
+	u32 disksig;
 
 	data = read_part_sector(state, 0, &sect);
 	if (!data)
@@ -491,6 +505,8 @@
 #endif
 	p = (struct partition *) (data + 0x1be);
 
+	disksig = le32_to_cpup((__le32 *)(data + 0x1b8));
+
 	/*
 	 * Look for partitions in two passes:
 	 * First find the primary and DOS-type extended partitions.
@@ -515,11 +531,12 @@
 			put_partition(state, slot, start, n);
 
 			strlcat(state->pp_buf, " <", PAGE_SIZE);
-			parse_extended(state, start, size);
+			parse_extended(state, start, size, disksig);
 			strlcat(state->pp_buf, " >", PAGE_SIZE);
 			continue;
 		}
 		put_partition(state, slot, start, size);
+		set_info(state, slot, disksig);
 		if (SYS_IND(p) == LINUX_RAID_PARTITION)
 			state->parts[slot].flags = ADDPART_FLAG_RAID;
 		if (SYS_IND(p) == DM6_PARTITION)

diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index bd5de08..0576a7d 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c

@@ -157,6 +157,7 @@
 EXPORT_SYMBOL(tegra_ahb_enable_smmu);
 #endif
 
+#ifdef CONFIG_PM_SLEEP
 static int tegra_ahb_suspend(struct device *dev)
 {
 	int i;
@@ -176,6 +177,7 @@
 		gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]);
 	return 0;
 }
+#endif
 
 static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm,
 			    tegra_ahb_suspend,

diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index c909b7b..d70abe7 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c

@@ -42,7 +42,8 @@
 #include <linux/swab.h>
 #include <linux/slab.h>
 
-#define VERSION "0.07"
+#define VERSION "1.04"
+#define DRIVER_VERSION 0x01
 #define PTAG "solos-pci"
 
 #define CONFIG_RAM_SIZE	128
@@ -56,16 +57,21 @@
 #define FLASH_BUSY	0x60
 #define FPGA_MODE	0x5C
 #define FLASH_MODE	0x58
+#define GPIO_STATUS	0x54
+#define DRIVER_VER	0x50
 #define TX_DMA_ADDR(port)	(0x40 + (4 * (port)))
 #define RX_DMA_ADDR(port)	(0x30 + (4 * (port)))
 
 #define DATA_RAM_SIZE	32768
 #define BUF_SIZE	2048
 #define OLD_BUF_SIZE	4096 /* For FPGA versions <= 2*/
-#define FPGA_PAGE	528 /* FPGA flash page size*/
-#define SOLOS_PAGE	512 /* Solos flash page size*/
-#define FPGA_BLOCK	(FPGA_PAGE * 8) /* FPGA flash block size*/
-#define SOLOS_BLOCK	(SOLOS_PAGE * 8) /* Solos flash block size*/
+/* Old boards use ATMEL AD45DB161D flash */
+#define ATMEL_FPGA_PAGE	528 /* FPGA flash page size*/
+#define ATMEL_SOLOS_PAGE	512 /* Solos flash page size*/
+#define ATMEL_FPGA_BLOCK	(ATMEL_FPGA_PAGE * 8) /* FPGA block size*/
+#define ATMEL_SOLOS_BLOCK	(ATMEL_SOLOS_PAGE * 8) /* Solos block size*/
+/* Current boards use M25P/M25PE SPI flash */
+#define SPI_FLASH_BLOCK	(256 * 64)
 
 #define RX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2)
 #define TX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2 + (card->buffer_size))
@@ -122,11 +128,14 @@
 	struct sk_buff_head cli_queue[4];
 	struct sk_buff *tx_skb[4];
 	struct sk_buff *rx_skb[4];
+	unsigned char *dma_bounce;
 	wait_queue_head_t param_wq;
 	wait_queue_head_t fw_wq;
 	int using_dma;
+	int dma_alignment;
 	int fpga_version;
 	int buffer_size;
+	int atmel_flash;
 };
 
 
@@ -451,7 +460,6 @@
 
 	len = skb->len;
 	memcpy(buf, skb->data, len);
-	dev_dbg(&card->dev->dev, "len: %d\n", len);
 
 	kfree_skb(skb);
 	return len;
@@ -498,6 +506,78 @@
 	return err?:count;
 }
 
+struct geos_gpio_attr {
+	struct device_attribute attr;
+	int offset;
+};
+
+#define SOLOS_GPIO_ATTR(_name, _mode, _show, _store, _offset)	\
+	struct geos_gpio_attr gpio_attr_##_name = {		\
+		.attr = __ATTR(_name, _mode, _show, _store),	\
+		.offset = _offset }
+
+static ssize_t geos_gpio_store(struct device *dev, struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+	struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr);
+	struct solos_card *card = pci_get_drvdata(pdev);
+	uint32_t data32;
+
+	if (count != 1 && (count != 2 || buf[1] != '\n'))
+		return -EINVAL;
+
+	spin_lock_irq(&card->param_queue_lock);
+	data32 = ioread32(card->config_regs + GPIO_STATUS);
+	if (buf[0] == '1') {
+		data32 |= 1 << gattr->offset;
+		iowrite32(data32, card->config_regs + GPIO_STATUS);
+	} else if (buf[0] == '0') {
+		data32 &= ~(1 << gattr->offset);
+		iowrite32(data32, card->config_regs + GPIO_STATUS);
+	} else {
+		count = -EINVAL;
+	}
+	spin_lock_irq(&card->param_queue_lock);
+	return count;
+}
+
+static ssize_t geos_gpio_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+	struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr);
+	struct solos_card *card = pci_get_drvdata(pdev);
+	uint32_t data32;
+
+	data32 = ioread32(card->config_regs + GPIO_STATUS);
+	data32 = (data32 >> gattr->offset) & 1;
+
+	return sprintf(buf, "%d\n", data32);
+}
+
+static ssize_t hardware_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+	struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr);
+	struct solos_card *card = pci_get_drvdata(pdev);
+	uint32_t data32;
+
+	data32 = ioread32(card->config_regs + GPIO_STATUS);
+	switch (gattr->offset) {
+	case 0:
+		/* HardwareVersion */
+		data32 = data32 & 0x1F;
+		break;
+	case 1:
+		/* HardwareVariant */
+		data32 = (data32 >> 5) & 0x0F;
+		break;
+	}
+	return sprintf(buf, "%d\n", data32);
+}
+
 static DEVICE_ATTR(console, 0644, console_show, console_store);
 
 
@@ -506,6 +586,14 @@
 
 #include "solos-attrlist.c"
 
+static SOLOS_GPIO_ATTR(GPIO1, 0644, geos_gpio_show, geos_gpio_store, 9);
+static SOLOS_GPIO_ATTR(GPIO2, 0644, geos_gpio_show, geos_gpio_store, 10);
+static SOLOS_GPIO_ATTR(GPIO3, 0644, geos_gpio_show, geos_gpio_store, 11);
+static SOLOS_GPIO_ATTR(GPIO4, 0644, geos_gpio_show, geos_gpio_store, 12);
+static SOLOS_GPIO_ATTR(GPIO5, 0644, geos_gpio_show, geos_gpio_store, 13);
+static SOLOS_GPIO_ATTR(PushButton, 0444, geos_gpio_show, NULL, 14);
+static SOLOS_GPIO_ATTR(HardwareVersion, 0444, hardware_show, NULL, 0);
+static SOLOS_GPIO_ATTR(HardwareVariant, 0444, hardware_show, NULL, 1);
 #undef SOLOS_ATTR_RO
 #undef SOLOS_ATTR_RW
 
@@ -522,6 +610,23 @@
 	.name = "parameters",
 };
 
+static struct attribute *gpio_attrs[] = {
+	&gpio_attr_GPIO1.attr.attr,
+	&gpio_attr_GPIO2.attr.attr,
+	&gpio_attr_GPIO3.attr.attr,
+	&gpio_attr_GPIO4.attr.attr,
+	&gpio_attr_GPIO5.attr.attr,
+	&gpio_attr_PushButton.attr.attr,
+	&gpio_attr_HardwareVersion.attr.attr,
+	&gpio_attr_HardwareVariant.attr.attr,
+	NULL
+};
+
+static struct attribute_group gpio_attr_group = {
+	.attrs = gpio_attrs,
+	.name = "gpio",
+};
+
 static int flash_upgrade(struct solos_card *card, int chip)
 {
 	const struct firmware *fw;
@@ -533,16 +638,25 @@
 	switch (chip) {
 	case 0:
 		fw_name = "solos-FPGA.bin";
-		blocksize = FPGA_BLOCK;
+		if (card->atmel_flash)
+			blocksize = ATMEL_FPGA_BLOCK;
+		else
+			blocksize = SPI_FLASH_BLOCK;
 		break;
 	case 1:
 		fw_name = "solos-Firmware.bin";
-		blocksize = SOLOS_BLOCK;
+		if (card->atmel_flash)
+			blocksize = ATMEL_SOLOS_BLOCK;
+		else
+			blocksize = SPI_FLASH_BLOCK;
 		break;
 	case 2:
 		if (card->fpga_version > LEGACY_BUFFERS){
 			fw_name = "solos-db-FPGA.bin";
-			blocksize = FPGA_BLOCK;
+			if (card->atmel_flash)
+				blocksize = ATMEL_FPGA_BLOCK;
+			else
+				blocksize = SPI_FLASH_BLOCK;
 		} else {
 			dev_info(&card->dev->dev, "FPGA version doesn't support"
 					" daughter board upgrades\n");
@@ -552,7 +666,10 @@
 	case 3:
 		if (card->fpga_version > LEGACY_BUFFERS){
 			fw_name = "solos-Firmware.bin";
-			blocksize = SOLOS_BLOCK;
+			if (card->atmel_flash)
+				blocksize = ATMEL_SOLOS_BLOCK;
+			else
+				blocksize = SPI_FLASH_BLOCK;
 		} else {
 			dev_info(&card->dev->dev, "FPGA version doesn't support"
 					" daughter board upgrades\n");
@@ -568,6 +685,9 @@
 
 	dev_info(&card->dev->dev, "Flash upgrade starting\n");
 
+	/* New FPGAs require driver version before permitting flash upgrades */
+	iowrite32(DRIVER_VERSION, card->config_regs + DRIVER_VER);
+
 	numblocks = fw->size / blocksize;
 	dev_info(&card->dev->dev, "Firmware size: %zd\n", fw->size);
 	dev_info(&card->dev->dev, "Number of blocks: %d\n", numblocks);
@@ -597,9 +717,13 @@
 		/* dev_info(&card->dev->dev, "Set FPGA Flash mode to Block Write\n"); */
 		iowrite32(((chip * 2) + 1), card->config_regs + FLASH_MODE);
 
-		/* Copy block to buffer, swapping each 16 bits */
+		/* Copy block to buffer, swapping each 16 bits for Atmel flash */
 		for(i = 0; i < blocksize; i += 4) {
-			uint32_t word = swahb32p((uint32_t *)(fw->data + offset + i));
+			uint32_t word;
+			if (card->atmel_flash)
+				word = swahb32p((uint32_t *)(fw->data + offset + i));
+			else
+				word = *(uint32_t *)(fw->data + offset + i);
 			if(card->fpga_version > LEGACY_BUFFERS)
 				iowrite32(word, FLASH_BUF + i);
 			else
@@ -961,7 +1085,12 @@
 				tx_started |= 1 << port;
 				oldskb = skb; /* We're done with this skb already */
 			} else if (skb && card->using_dma) {
-				SKB_CB(skb)->dma_addr = pci_map_single(card->dev, skb->data,
+				unsigned char *data = skb->data;
+				if ((unsigned long)data & card->dma_alignment) {
+					data = card->dma_bounce + (BUF_SIZE * port);
+					memcpy(data, skb->data, skb->len);
+				}
+				SKB_CB(skb)->dma_addr = pci_map_single(card->dev, data,
 								       skb->len, PCI_DMA_TODEVICE);
 				card->tx_skb[port] = skb;
 				iowrite32(SKB_CB(skb)->dma_addr,
@@ -1133,18 +1262,33 @@
 		db_fpga_upgrade = db_firmware_upgrade = 0;
 	}
 
+	/* Stopped using Atmel flash after 0.03-38 */
+	if (fpga_ver < 39)
+		card->atmel_flash = 1;
+	else
+		card->atmel_flash = 0;
+
+	data32 = ioread32(card->config_regs + PORTS);
+	card->nr_ports = (data32 & 0x000000FF);
+
 	if (card->fpga_version >= DMA_SUPPORTED) {
 		pci_set_master(dev);
 		card->using_dma = 1;
+		if (1) { /* All known FPGA versions so far */
+			card->dma_alignment = 3;
+			card->dma_bounce = kmalloc(card->nr_ports * BUF_SIZE, GFP_KERNEL);
+			if (!card->dma_bounce) {
+				dev_warn(&card->dev->dev, "Failed to allocate DMA bounce buffers\n");
+				/* Fallback to MMIO doesn't work */
+				goto out_unmap_both;
+			}
+		}
 	} else {
 		card->using_dma = 0;
 		/* Set RX empty flag for all ports */
 		iowrite32(0xF0, card->config_regs + FLAGS_ADDR);
 	}
 
-	data32 = ioread32(card->config_regs + PORTS);
-	card->nr_ports = (data32 & 0x000000FF);
-
 	pci_set_drvdata(dev, card);
 
 	tasklet_init(&card->tlet, solos_bh, (unsigned long)card);
@@ -1179,6 +1323,10 @@
 	if (err)
 		goto out_free_irq;
 
+	if (card->fpga_version >= DMA_SUPPORTED &&
+	    sysfs_create_group(&card->dev->dev.kobj, &gpio_attr_group))
+		dev_err(&card->dev->dev, "Could not register parameter group for GPIOs\n");
+
 	return 0;
 
  out_free_irq:
@@ -1187,6 +1335,7 @@
 	tasklet_kill(&card->tlet);
 	
  out_unmap_both:
+	kfree(card->dma_bounce);
 	pci_set_drvdata(dev, NULL);
 	pci_iounmap(dev, card->buffers);
  out_unmap_config:
@@ -1289,11 +1438,16 @@
 	iowrite32(1, card->config_regs + FPGA_MODE);
 	(void)ioread32(card->config_regs + FPGA_MODE); 
 
+	if (card->fpga_version >= DMA_SUPPORTED)
+		sysfs_remove_group(&card->dev->dev.kobj, &gpio_attr_group);
+
 	atm_remove(card);
 
 	free_irq(dev->irq, card);
 	tasklet_kill(&card->tlet);
 
+	kfree(card->dma_bounce);
+
 	/* Release device from reset */
 	iowrite32(0, card->config_regs + FPGA_MODE);
 	(void)ioread32(card->config_regs + FPGA_MODE); 

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 147d1a4..17cf7ca 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c

@@ -148,7 +148,7 @@
 	struct path path;
 	int err;
 
-	dentry = kern_path_create(AT_FDCWD, name, &path, 1);
+	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 

diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
index 460e22d..a3f79c4 100644
--- a/drivers/base/dma-buf.c
+++ b/drivers/base/dma-buf.c

@@ -298,6 +298,8 @@
 				struct sg_table *sg_table,
 				enum dma_data_direction direction)
 {
+	might_sleep();
+
 	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
 		return;
 

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index e162999..c62c788 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c

@@ -13,12 +13,13 @@
 #include <linux/export.h>
 #include <linux/bcma/bcma.h>
 
-static u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
+u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
 {
 	bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset);
 	bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR);
 	return bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA);
 }
+EXPORT_SYMBOL_GPL(bcma_chipco_pll_read);
 
 void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value)
 {

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index d2ed7f1..1756494 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h

@@ -1,5 +1,5 @@
 /* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
-#define VERSION "50"
+#define VERSION "81"
 #define AOE_MAJOR 152
 #define DEVICE_NAME "aoe"
 
@@ -10,7 +10,7 @@
 #define AOE_PARTITIONS (16)
 #endif
 
-#define WHITESPACE " \t\v\f\n"
+#define WHITESPACE " \t\v\f\n,"
 
 enum {
 	AOECMD_ATA,
@@ -73,21 +73,29 @@
 	DEVFL_TKILL = (1<<1),	/* flag for timer to know when to kill self */
 	DEVFL_EXT = (1<<2),	/* device accepts lba48 commands */
 	DEVFL_GDALLOC = (1<<3),	/* need to alloc gendisk */
-	DEVFL_KICKME = (1<<4),	/* slow polling network card catch */
-	DEVFL_NEWSIZE = (1<<5),	/* need to update dev size in block layer */
+	DEVFL_GD_NOW = (1<<4),	/* allocating gendisk */
+	DEVFL_KICKME = (1<<5),	/* slow polling network card catch */
+	DEVFL_NEWSIZE = (1<<6),	/* need to update dev size in block layer */
+	DEVFL_FREEING = (1<<7),	/* set when device is being cleaned up */
+	DEVFL_FREED = (1<<8),	/* device has been cleaned up */
 };
 
 enum {
 	DEFAULTBCNT = 2 * 512,	/* 2 sectors */
 	MIN_BUFS = 16,
-	NTARGETS = 8,
+	NTARGETS = 4,
 	NAOEIFS = 8,
 	NSKBPOOLMAX = 256,
 	NFACTIVE = 61,
 
 	TIMERTICK = HZ / 10,
-	MINTIMER = HZ >> 2,
-	MAXTIMER = HZ << 1,
+	RTTSCALE = 8,
+	RTTDSCALE = 3,
+	RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
+	RTTDEV_INIT = RTTAVG_INIT / 4,
+
+	HARD_SCORN_SECS = 10,	/* try another remote port after this */
+	MAX_TAINT = 1000,	/* cap on aoetgt taint */
 };
 
 struct buf {
@@ -100,10 +108,17 @@
 	struct request *rq;
 };
 
+enum frame_flags {
+	FFL_PROBE = 1,
+};
+
 struct frame {
 	struct list_head head;
 	u32 tag;
+	struct timeval sent;	/* high-res time packet was sent */
+	u32 sent_jiffs;		/* low-res jiffies-based sent time */
 	ulong waited;
+	ulong waited_total;
 	struct aoetgt *t;		/* parent target I belong to */
 	sector_t lba;
 	struct sk_buff *skb;		/* command skb freed on module exit */
@@ -112,6 +127,7 @@
 	struct bio_vec *bv;
 	ulong bcnt;
 	ulong bv_off;
+	char flags;
 };
 
 struct aoeif {
@@ -122,28 +138,31 @@
 
 struct aoetgt {
 	unsigned char addr[6];
-	ushort nframes;
+	ushort nframes;		/* cap on frames to use */
 	struct aoedev *d;			/* parent device I belong to */
 	struct list_head ffree;			/* list of free frames */
 	struct aoeif ifs[NAOEIFS];
 	struct aoeif *ifp;	/* current aoeif in use */
-	ushort nout;
-	ushort maxout;
-	ulong falloc;
-	ulong lastwadj;		/* last window adjustment */
+	ushort nout;		/* number of AoE commands outstanding */
+	ushort maxout;		/* current value for max outstanding */
+	ushort next_cwnd;	/* incr maxout after decrementing to zero */
+	ushort ssthresh;	/* slow start threshold */
+	ulong falloc;		/* number of allocated frames */
+	int taint;		/* how much we want to avoid this aoetgt */
 	int minbcnt;
 	int wpkts, rpkts;
+	char nout_probes;
 };
 
 struct aoedev {
 	struct aoedev *next;
 	ulong sysminor;
 	ulong aoemajor;
+	u32 rttavg;		/* scaled AoE round trip time average */
+	u32 rttdev;		/* scaled round trip time mean deviation */
 	u16 aoeminor;
 	u16 flags;
 	u16 nopen;		/* (bd_openers isn't available without sleeping) */
-	u16 rttavg;		/* round trip average of requests/responses */
-	u16 mintimer;
 	u16 fw_ver;		/* version of blade's firmware */
 	u16 lasttag;		/* last tag sent */
 	u16 useme;
@@ -151,7 +170,7 @@
 	struct work_struct work;/* disk create work struct */
 	struct gendisk *gd;
 	struct request_queue *blkq;
-	struct hd_geometry geo; 
+	struct hd_geometry geo;
 	sector_t ssize;
 	struct timer_list timer;
 	spinlock_t lock;
@@ -164,11 +183,12 @@
 	} ip;
 	ulong maxbcnt;
 	struct list_head factive[NFACTIVE];	/* hash of active frames */
-	struct aoetgt *targets[NTARGETS];
+	struct list_head rexmitq; /* deferred retransmissions */
+	struct aoetgt **targets;
+	ulong ntargets;		/* number of allocated aoetgt pointers */
 	struct aoetgt **tgt;	/* target in use when working */
-	struct aoetgt *htgt;	/* target needing rexmit assistance */
-	ulong ntargets;
 	ulong kicked;
+	char ident[512];
 };
 
 /* kthread tracking */
@@ -195,6 +215,7 @@
 struct sk_buff *aoecmd_ata_rsp(struct sk_buff *);
 void aoecmd_cfg_rsp(struct sk_buff *);
 void aoecmd_sleepwork(struct work_struct *);
+void aoecmd_wreset(struct aoetgt *t);
 void aoecmd_cleanslate(struct aoedev *);
 void aoecmd_exit(void);
 int aoecmd_init(void);

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 00dfc50..a129f8c 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c

@@ -16,11 +16,19 @@
 #include <linux/netdevice.h>
 #include <linux/mutex.h>
 #include <linux/export.h>
+#include <linux/moduleparam.h>
+#include <scsi/sg.h>
 #include "aoe.h"
 
 static DEFINE_MUTEX(aoeblk_mutex);
 static struct kmem_cache *buf_pool_cache;
 
+/* GPFS needs a larger value than the default. */
+static int aoe_maxsectors;
+module_param(aoe_maxsectors, int, 0644);
+MODULE_PARM_DESC(aoe_maxsectors,
+	"When nonzero, set the maximum number of sectors per I/O request");
+
 static ssize_t aoedisk_show_state(struct device *dev,
 				  struct device_attribute *attr, char *page)
 {
@@ -59,7 +67,7 @@
 	nd = nds;
 	ne = nd + ARRAY_SIZE(nds);
 	t = d->targets;
-	te = t + NTARGETS;
+	te = t + d->ntargets;
 	for (; t < te && *t; t++) {
 		ifp = (*t)->ifs;
 		e = ifp + NAOEIFS;
@@ -91,6 +99,14 @@
 
 	return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
 }
+static ssize_t aoedisk_show_payload(struct device *dev,
+				    struct device_attribute *attr, char *page)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct aoedev *d = disk->private_data;
+
+	return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
+}
 
 static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
 static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
@@ -99,12 +115,14 @@
 	.attr = { .name = "firmware-version", .mode = S_IRUGO },
 	.show = aoedisk_show_fwver,
 };
+static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
 
 static struct attribute *aoe_attrs[] = {
 	&dev_attr_state.attr,
 	&dev_attr_mac.attr,
 	&dev_attr_netif.attr,
 	&dev_attr_firmware_version.attr,
+	&dev_attr_payload.attr,
 	NULL,
 };
 
@@ -129,9 +147,18 @@
 	struct aoedev *d = bdev->bd_disk->private_data;
 	ulong flags;
 
+	if (!virt_addr_valid(d)) {
+		pr_crit("aoe: invalid device pointer in %s\n",
+			__func__);
+		WARN_ON(1);
+		return -ENODEV;
+	}
+	if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
+		return -ENODEV;
+
 	mutex_lock(&aoeblk_mutex);
 	spin_lock_irqsave(&d->lock, flags);
-	if (d->flags & DEVFL_UP) {
+	if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
 		d->nopen++;
 		spin_unlock_irqrestore(&d->lock, flags);
 		mutex_unlock(&aoeblk_mutex);
@@ -195,9 +222,38 @@
 	return 0;
 }
 
+static int
+aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
+{
+	struct aoedev *d;
+
+	if (!arg)
+		return -EINVAL;
+
+	d = bdev->bd_disk->private_data;
+	if ((d->flags & DEVFL_UP) == 0) {
+		pr_err("aoe: disk not up\n");
+		return -ENODEV;
+	}
+
+	if (cmd == HDIO_GET_IDENTITY) {
+		if (!copy_to_user((void __user *) arg, &d->ident,
+			sizeof(d->ident)))
+			return 0;
+		return -EFAULT;
+	}
+
+	/* udev calls scsi_id, which uses SG_IO, resulting in noise */
+	if (cmd != SG_IO)
+		pr_info("aoe: unknown ioctl 0x%x\n", cmd);
+
+	return -ENOTTY;
+}
+
 static const struct block_device_operations aoe_bdops = {
 	.open = aoeblk_open,
 	.release = aoeblk_release,
+	.ioctl = aoeblk_ioctl,
 	.getgeo = aoeblk_getgeo,
 	.owner = THIS_MODULE,
 };
@@ -212,6 +268,18 @@
 	struct request_queue *q;
 	enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
 	ulong flags;
+	int late = 0;
+
+	spin_lock_irqsave(&d->lock, flags);
+	if (d->flags & DEVFL_GDALLOC
+	&& !(d->flags & DEVFL_TKILL)
+	&& !(d->flags & DEVFL_GD_NOW))
+		d->flags |= DEVFL_GD_NOW;
+	else
+		late = 1;
+	spin_unlock_irqrestore(&d->lock, flags);
+	if (late)
+		return;
 
 	gd = alloc_disk(AOE_PARTITIONS);
 	if (gd == NULL) {
@@ -231,23 +299,24 @@
 	if (q == NULL) {
 		pr_err("aoe: cannot allocate block queue for %ld.%d\n",
 			d->aoemajor, d->aoeminor);
-		mempool_destroy(mp);
-		goto err_disk;
+		goto err_mempool;
 	}
 
-	d->blkq = blk_alloc_queue(GFP_KERNEL);
-	if (!d->blkq)
-		goto err_mempool;
-	d->blkq->backing_dev_info.name = "aoe";
-	if (bdi_init(&d->blkq->backing_dev_info))
-		goto err_blkq;
 	spin_lock_irqsave(&d->lock, flags);
-	blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS);
+	WARN_ON(!(d->flags & DEVFL_GD_NOW));
+	WARN_ON(!(d->flags & DEVFL_GDALLOC));
+	WARN_ON(d->flags & DEVFL_TKILL);
+	WARN_ON(d->gd);
+	WARN_ON(d->flags & DEVFL_UP);
+	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
+	q->backing_dev_info.name = "aoe";
 	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
 	d->bufpool = mp;
 	d->blkq = gd->queue = q;
 	q->queuedata = d;
 	d->gd = gd;
+	if (aoe_maxsectors)
+		blk_queue_max_hw_sectors(q, aoe_maxsectors);
 	gd->major = AOE_MAJOR;
 	gd->first_minor = d->sysminor;
 	gd->fops = &aoe_bdops;
@@ -263,18 +332,21 @@
 
 	add_disk(gd);
 	aoedisk_add_sysfs(d);
+
+	spin_lock_irqsave(&d->lock, flags);
+	WARN_ON(!(d->flags & DEVFL_GD_NOW));
+	d->flags &= ~DEVFL_GD_NOW;
+	spin_unlock_irqrestore(&d->lock, flags);
 	return;
 
-err_blkq:
-	blk_cleanup_queue(d->blkq);
-	d->blkq = NULL;
 err_mempool:
-	mempool_destroy(d->bufpool);
+	mempool_destroy(mp);
 err_disk:
 	put_disk(gd);
 err:
 	spin_lock_irqsave(&d->lock, flags);
-	d->flags &= ~DEVFL_GDALLOC;
+	d->flags &= ~DEVFL_GD_NOW;
+	schedule_work(&d->work);
 	spin_unlock_irqrestore(&d->lock, flags);
 }
 

diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index ed57a89..42e67ad 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c

@@ -39,6 +39,11 @@
 };
 
 static DEFINE_MUTEX(aoechr_mutex);
+
+/* A ring buffer of error messages, to be read through
+ * "/dev/etherd/err".  When no messages are present,
+ * readers will block waiting for messages to appear.
+ */
 static struct ErrMsg emsgs[NMSG];
 static int emsgs_head_idx, emsgs_tail_idx;
 static struct completion emsgs_comp;
@@ -282,7 +287,7 @@
 	int n, i;
 
 	n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
-	if (n < 0) { 
+	if (n < 0) {
 		printk(KERN_ERR "aoe: can't register char device\n");
 		return n;
 	}

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 9fe4f18..25ef5c0 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c

@@ -22,6 +22,7 @@
 #define MAXIOC (8192)	/* default meant to avoid most soft lockups */
 
 static void ktcomplete(struct frame *, struct sk_buff *);
+static int count_targets(struct aoedev *d, int *untainted);
 
 static struct buf *nextbuf(struct aoedev *);
 
@@ -29,7 +30,7 @@
 module_param(aoe_deadsecs, int, 0644);
 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
 
-static int aoe_maxout = 16;
+static int aoe_maxout = 64;
 module_param(aoe_maxout, int, 0644);
 MODULE_PARM_DESC(aoe_maxout,
 	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
@@ -43,6 +44,8 @@
 	spinlock_t lock;
 } iocq;
 
+static struct page *empty_page;
+
 static struct sk_buff *
 new_skb(ulong len)
 {
@@ -59,6 +62,23 @@
 }
 
 static struct frame *
+getframe_deferred(struct aoedev *d, u32 tag)
+{
+	struct list_head *head, *pos, *nx;
+	struct frame *f;
+
+	head = &d->rexmitq;
+	list_for_each_safe(pos, nx, head) {
+		f = list_entry(pos, struct frame, head);
+		if (f->tag == tag) {
+			list_del(pos);
+			return f;
+		}
+	}
+	return NULL;
+}
+
+static struct frame *
 getframe(struct aoedev *d, u32 tag)
 {
 	struct frame *f;
@@ -162,8 +182,10 @@
 
 	t = f->t;
 	f->buf = NULL;
+	f->lba = 0;
 	f->bv = NULL;
 	f->r_skb = NULL;
+	f->flags = 0;
 	list_add(&f->head, &t->ffree);
 }
 
@@ -217,20 +239,25 @@
 	struct frame *f;
 	struct aoetgt *t, **tt;
 	int totout = 0;
+	int use_tainted;
+	int has_untainted;
 
-	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
+	if (!d->targets || !d->targets[0]) {
 		printk(KERN_ERR "aoe: NULL TARGETS!\n");
 		return NULL;
 	}
 	tt = d->tgt;	/* last used target */
-	for (;;) {
+	for (use_tainted = 0, has_untainted = 0;;) {
 		tt++;
-		if (tt >= &d->targets[NTARGETS] || !*tt)
+		if (tt >= &d->targets[d->ntargets] || !*tt)
 			tt = d->targets;
 		t = *tt;
-		totout += t->nout;
+		if (!t->taint) {
+			has_untainted = 1;
+			totout += t->nout;
+		}
 		if (t->nout < t->maxout
-		&& t != d->htgt
+		&& (use_tainted || !t->taint)
 		&& t->ifp->nd) {
 			f = newtframe(d, t);
 			if (f) {
@@ -239,8 +266,12 @@
 				return f;
 			}
 		}
-		if (tt == d->tgt)	/* we've looped and found nada */
-			break;
+		if (tt == d->tgt) {	/* we've looped and found nada */
+			if (!use_tainted && !has_untainted)
+				use_tainted = 1;
+			else
+				break;
+		}
 	}
 	if (totout == 0) {
 		d->kicked++;
@@ -277,21 +308,68 @@
 	list_add_tail(&f->head, &d->factive[n]);
 }
 
+static void
+ata_rw_frameinit(struct frame *f)
+{
+	struct aoetgt *t;
+	struct aoe_hdr *h;
+	struct aoe_atahdr *ah;
+	struct sk_buff *skb;
+	char writebit, extbit;
+
+	skb = f->skb;
+	h = (struct aoe_hdr *) skb_mac_header(skb);
+	ah = (struct aoe_atahdr *) (h + 1);
+	skb_put(skb, sizeof(*h) + sizeof(*ah));
+	memset(h, 0, skb->len);
+
+	writebit = 0x10;
+	extbit = 0x4;
+
+	t = f->t;
+	f->tag = aoehdr_atainit(t->d, t, h);
+	fhash(f);
+	t->nout++;
+	f->waited = 0;
+	f->waited_total = 0;
+	if (f->buf)
+		f->lba = f->buf->sector;
+
+	/* set up ata header */
+	ah->scnt = f->bcnt >> 9;
+	put_lba(ah, f->lba);
+	if (t->d->flags & DEVFL_EXT) {
+		ah->aflags |= AOEAFL_EXT;
+	} else {
+		extbit = 0;
+		ah->lba3 &= 0x0f;
+		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
+	}
+	if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
+		skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
+		ah->aflags |= AOEAFL_WRITE;
+		skb->len += f->bcnt;
+		skb->data_len = f->bcnt;
+		skb->truesize += f->bcnt;
+		t->wpkts++;
+	} else {
+		t->rpkts++;
+		writebit = 0;
+	}
+
+	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+	skb->dev = t->ifp->nd;
+}
+
 static int
 aoecmd_ata_rw(struct aoedev *d)
 {
 	struct frame *f;
-	struct aoe_hdr *h;
-	struct aoe_atahdr *ah;
 	struct buf *buf;
 	struct aoetgt *t;
 	struct sk_buff *skb;
 	struct sk_buff_head queue;
 	ulong bcnt, fbcnt;
-	char writebit, extbit;
-
-	writebit = 0x10;
-	extbit = 0x4;
 
 	buf = nextbuf(d);
 	if (buf == NULL)
@@ -326,50 +404,18 @@
 	} while (fbcnt);
 
 	/* initialize the headers & frame */
-	skb = f->skb;
-	h = (struct aoe_hdr *) skb_mac_header(skb);
-	ah = (struct aoe_atahdr *) (h+1);
-	skb_put(skb, sizeof *h + sizeof *ah);
-	memset(h, 0, skb->len);
-	f->tag = aoehdr_atainit(d, t, h);
-	fhash(f);
-	t->nout++;
-	f->waited = 0;
 	f->buf = buf;
 	f->bcnt = bcnt;
-	f->lba = buf->sector;
-
-	/* set up ata header */
-	ah->scnt = bcnt >> 9;
-	put_lba(ah, buf->sector);
-	if (d->flags & DEVFL_EXT) {
-		ah->aflags |= AOEAFL_EXT;
-	} else {
-		extbit = 0;
-		ah->lba3 &= 0x0f;
-		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
-	}
-	if (bio_data_dir(buf->bio) == WRITE) {
-		skb_fillup(skb, f->bv, f->bv_off, bcnt);
-		ah->aflags |= AOEAFL_WRITE;
-		skb->len += bcnt;
-		skb->data_len = bcnt;
-		skb->truesize += bcnt;
-		t->wpkts++;
-	} else {
-		t->rpkts++;
-		writebit = 0;
-	}
-
-	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+	ata_rw_frameinit(f);
 
 	/* mark all tracking fields and load out */
 	buf->nframesout += 1;
 	buf->sector += bcnt >> 9;
 
-	skb->dev = t->ifp->nd;
-	skb = skb_clone(skb, GFP_ATOMIC);
+	skb = skb_clone(f->skb, GFP_ATOMIC);
 	if (skb) {
+		do_gettimeofday(&f->sent);
+		f->sent_jiffs = (u32) jiffies;
 		__skb_queue_head_init(&queue);
 		__skb_queue_tail(&queue, skb);
 		aoenet_xmit(&queue);
@@ -442,11 +488,14 @@
 	h = (struct aoe_hdr *) skb_mac_header(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 
-	snprintf(buf, sizeof buf,
-		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
-		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
-		h->src, h->dst, t->nout);
-	aoechr_error(buf);
+	if (!(f->flags & FFL_PROBE)) {
+		snprintf(buf, sizeof(buf),
+			"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
+			"retransmit", d->aoemajor, d->aoeminor,
+			f->tag, jiffies, n,
+			h->src, h->dst, t->nout);
+		aoechr_error(buf);
+	}
 
 	f->tag = n;
 	fhash(f);
@@ -458,12 +507,46 @@
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (skb == NULL)
 		return;
+	do_gettimeofday(&f->sent);
+	f->sent_jiffs = (u32) jiffies;
 	__skb_queue_head_init(&queue);
 	__skb_queue_tail(&queue, skb);
 	aoenet_xmit(&queue);
 }
 
 static int
+tsince_hr(struct frame *f)
+{
+	struct timeval now;
+	int n;
+
+	do_gettimeofday(&now);
+	n = now.tv_usec - f->sent.tv_usec;
+	n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC;
+
+	if (n < 0)
+		n = -n;
+
+	/* For relatively long periods, use jiffies to avoid
+	 * discrepancies caused by updates to the system time.
+	 *
+	 * On system with HZ of 1000, 32-bits is over 49 days
+	 * worth of jiffies, or over 71 minutes worth of usecs.
+	 *
+	 * Jiffies overflow is handled by subtraction of unsigned ints:
+	 * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe
+	 * $3 = 4
+	 * (gdb)
+	 */
+	if (n > USEC_PER_SEC / 4) {
+		n = ((u32) jiffies) - f->sent_jiffs;
+		n *= USEC_PER_SEC / HZ;
+	}
+
+	return n;
+}
+
+static int
 tsince(u32 tag)
 {
 	int n;
@@ -472,7 +555,7 @@
 	n -= tag & 0xffff;
 	if (n < 0)
 		n += 1<<16;
-	return n;
+	return jiffies_to_usecs(n + 1);
 }
 
 static struct aoeif *
@@ -503,70 +586,189 @@
 	dev_put(nd);
 }
 
-static int
-sthtith(struct aoedev *d)
+static struct frame *
+reassign_frame(struct frame *f)
 {
-	struct frame *f, *nf;
-	struct list_head *nx, *pos, *head;
+	struct frame *nf;
 	struct sk_buff *skb;
-	struct aoetgt *ht = d->htgt;
-	int i;
 
-	for (i = 0; i < NFACTIVE; i++) {
-		head = &d->factive[i];
-		list_for_each_safe(pos, nx, head) {
-			f = list_entry(pos, struct frame, head);
-			if (f->t != ht)
-				continue;
-
-			nf = newframe(d);
-			if (!nf)
-				return 0;
-
-			/* remove frame from active list */
-			list_del(pos);
-
-			/* reassign all pertinent bits to new outbound frame */
-			skb = nf->skb;
-			nf->skb = f->skb;
-			nf->buf = f->buf;
-			nf->bcnt = f->bcnt;
-			nf->lba = f->lba;
-			nf->bv = f->bv;
-			nf->bv_off = f->bv_off;
-			nf->waited = 0;
-			f->skb = skb;
-			aoe_freetframe(f);
-			ht->nout--;
-			nf->t->nout++;
-			resend(d, nf);
-		}
+	nf = newframe(f->t->d);
+	if (!nf)
+		return NULL;
+	if (nf->t == f->t) {
+		aoe_freetframe(nf);
+		return NULL;
 	}
-	/* We've cleaned up the outstanding so take away his
-	 * interfaces so he won't be used.  We should remove him from
-	 * the target array here, but cleaning up a target is
-	 * involved.  PUNT!
-	 */
-	memset(ht->ifs, 0, sizeof ht->ifs);
-	d->htgt = NULL;
-	return 1;
+
+	skb = nf->skb;
+	nf->skb = f->skb;
+	nf->buf = f->buf;
+	nf->bcnt = f->bcnt;
+	nf->lba = f->lba;
+	nf->bv = f->bv;
+	nf->bv_off = f->bv_off;
+	nf->waited = 0;
+	nf->waited_total = f->waited_total;
+	nf->sent = f->sent;
+	nf->sent_jiffs = f->sent_jiffs;
+	f->skb = skb;
+
+	return nf;
 }
 
-static inline unsigned char
-ata_scnt(unsigned char *packet) {
-	struct aoe_hdr *h;
-	struct aoe_atahdr *ah;
+static void
+probe(struct aoetgt *t)
+{
+	struct aoedev *d;
+	struct frame *f;
+	struct sk_buff *skb;
+	struct sk_buff_head queue;
+	size_t n, m;
+	int frag;
 
-	h = (struct aoe_hdr *) packet;
-	ah = (struct aoe_atahdr *) (h+1);
-	return ah->scnt;
+	d = t->d;
+	f = newtframe(d, t);
+	if (!f) {
+		pr_err("%s %pm for e%ld.%d: %s\n",
+			"aoe: cannot probe remote address",
+			t->addr,
+			(long) d->aoemajor, d->aoeminor,
+			"no frame available");
+		return;
+	}
+	f->flags |= FFL_PROBE;
+	ifrotate(t);
+	f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
+	ata_rw_frameinit(f);
+	skb = f->skb;
+	for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
+		if (n < PAGE_SIZE)
+			m = n;
+		else
+			m = PAGE_SIZE;
+		skb_fill_page_desc(skb, frag, empty_page, 0, m);
+	}
+	skb->len += f->bcnt;
+	skb->data_len = f->bcnt;
+	skb->truesize += f->bcnt;
+
+	skb = skb_clone(f->skb, GFP_ATOMIC);
+	if (skb) {
+		do_gettimeofday(&f->sent);
+		f->sent_jiffs = (u32) jiffies;
+		__skb_queue_head_init(&queue);
+		__skb_queue_tail(&queue, skb);
+		aoenet_xmit(&queue);
+	}
+}
+
+static long
+rto(struct aoedev *d)
+{
+	long t;
+
+	t = 2 * d->rttavg >> RTTSCALE;
+	t += 8 * d->rttdev >> RTTDSCALE;
+	if (t == 0)
+		t = 1;
+
+	return t;
+}
+
+static void
+rexmit_deferred(struct aoedev *d)
+{
+	struct aoetgt *t;
+	struct frame *f;
+	struct frame *nf;
+	struct list_head *pos, *nx, *head;
+	int since;
+	int untainted;
+
+	count_targets(d, &untainted);
+
+	head = &d->rexmitq;
+	list_for_each_safe(pos, nx, head) {
+		f = list_entry(pos, struct frame, head);
+		t = f->t;
+		if (t->taint) {
+			if (!(f->flags & FFL_PROBE)) {
+				nf = reassign_frame(f);
+				if (nf) {
+					if (t->nout_probes == 0
+					&& untainted > 0) {
+						probe(t);
+						t->nout_probes++;
+					}
+					list_replace(&f->head, &nf->head);
+					pos = &nf->head;
+					aoe_freetframe(f);
+					f = nf;
+					t = f->t;
+				}
+			} else if (untainted < 1) {
+				/* don't probe w/o other untainted aoetgts */
+				goto stop_probe;
+			} else if (tsince_hr(f) < t->taint * rto(d)) {
+				/* reprobe slowly when taint is high */
+				continue;
+			}
+		} else if (f->flags & FFL_PROBE) {
+stop_probe:		/* don't probe untainted aoetgts */
+			list_del(pos);
+			aoe_freetframe(f);
+			/* leaving d->kicked, because this is routine */
+			f->t->d->flags |= DEVFL_KICKME;
+			continue;
+		}
+		if (t->nout >= t->maxout)
+			continue;
+		list_del(pos);
+		t->nout++;
+		if (f->flags & FFL_PROBE)
+			t->nout_probes++;
+		since = tsince_hr(f);
+		f->waited += since;
+		f->waited_total += since;
+		resend(d, f);
+	}
+}
+
+/* An aoetgt accumulates demerits quickly, and successful
+ * probing redeems the aoetgt slowly.
+ */
+static void
+scorn(struct aoetgt *t)
+{
+	int n;
+
+	n = t->taint++;
+	t->taint += t->taint * 2;
+	if (n > t->taint)
+		t->taint = n;
+	if (t->taint > MAX_TAINT)
+		t->taint = MAX_TAINT;
+}
+
+static int
+count_targets(struct aoedev *d, int *untainted)
+{
+	int i, good;
+
+	for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
+		if (d->targets[i]->taint == 0)
+			good++;
+
+	if (untainted)
+		*untainted = good;
+	return i;
 }
 
 static void
 rexmit_timer(ulong vp)
 {
 	struct aoedev *d;
-	struct aoetgt *t, **tt, **te;
+	struct aoetgt *t;
 	struct aoeif *ifp;
 	struct frame *f;
 	struct list_head *head, *pos, *nx;
@@ -574,15 +776,18 @@
 	register long timeout;
 	ulong flags, n;
 	int i;
+	int utgts;	/* number of aoetgt descriptors (not slots) */
+	int since;
 
 	d = (struct aoedev *) vp;
 
-	/* timeout is always ~150% of the moving average */
-	timeout = d->rttavg;
-	timeout += timeout >> 1;
-
 	spin_lock_irqsave(&d->lock, flags);
 
+	/* timeout based on observed timings and variations */
+	timeout = rto(d);
+
+	utgts = count_targets(d, NULL);
+
 	if (d->flags & DEVFL_TKILL) {
 		spin_unlock_irqrestore(&d->lock, flags);
 		return;
@@ -593,67 +798,61 @@
 		head = &d->factive[i];
 		list_for_each_safe(pos, nx, head) {
 			f = list_entry(pos, struct frame, head);
-			if (tsince(f->tag) < timeout)
+			if (tsince_hr(f) < timeout)
 				break;	/* end of expired frames */
 			/* move to flist for later processing */
 			list_move_tail(pos, &flist);
 		}
 	}
-	/* window check */
-	tt = d->targets;
-	te = tt + d->ntargets;
-	for (; tt < te && (t = *tt); tt++) {
-		if (t->nout == t->maxout
-		&& t->maxout < t->nframes
-		&& (jiffies - t->lastwadj)/HZ > 10) {
-			t->maxout++;
-			t->lastwadj = jiffies;
-		}
-	}
-
-	if (!list_empty(&flist)) {	/* retransmissions necessary */
-		n = d->rttavg <<= 1;
-		if (n > MAXTIMER)
-			d->rttavg = MAXTIMER;
-	}
 
 	/* process expired frames */
 	while (!list_empty(&flist)) {
 		pos = flist.next;
 		f = list_entry(pos, struct frame, head);
-		n = f->waited += timeout;
-		n /= HZ;
-		if (n > aoe_deadsecs) {
+		since = tsince_hr(f);
+		n = f->waited_total + since;
+		n /= USEC_PER_SEC;
+		if (aoe_deadsecs
+		&& n > aoe_deadsecs
+		&& !(f->flags & FFL_PROBE)) {
 			/* Waited too long.  Device failure.
 			 * Hang all frames on first hash bucket for downdev
 			 * to clean up.
 			 */
 			list_splice(&flist, &d->factive[0]);
 			aoedev_downdev(d);
-			break;
+			goto out;
 		}
-		list_del(pos);
 
 		t = f->t;
-		if (n > aoe_deadsecs/2)
-			d->htgt = t; /* see if another target can help */
+		n = f->waited + since;
+		n /= USEC_PER_SEC;
+		if (aoe_deadsecs && utgts > 0
+		&& (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
+			scorn(t); /* avoid this target */
 
-		if (t->nout == t->maxout) {
-			if (t->maxout > 1)
-				t->maxout--;
-			t->lastwadj = jiffies;
+		if (t->maxout != 1) {
+			t->ssthresh = t->maxout / 2;
+			t->maxout = 1;
 		}
 
-		ifp = getif(t, f->skb->dev);
-		if (ifp && ++ifp->lost > (t->nframes << 1)
-		&& (ifp != t->ifs || t->ifs[1].nd)) {
-			ejectif(t, ifp);
-			ifp = NULL;
+		if (f->flags & FFL_PROBE) {
+			t->nout_probes--;
+		} else {
+			ifp = getif(t, f->skb->dev);
+			if (ifp && ++ifp->lost > (t->nframes << 1)
+			&& (ifp != t->ifs || t->ifs[1].nd)) {
+				ejectif(t, ifp);
+				ifp = NULL;
+			}
 		}
-		resend(d, f);
+		list_move_tail(pos, &d->rexmitq);
+		t->nout--;
 	}
+	rexmit_deferred(d);
 
-	if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
+out:
+	if ((d->flags & DEVFL_KICKME) && d->blkq) {
 		d->flags &= ~DEVFL_KICKME;
 		d->blkq->request_fn(d->blkq);
 	}
@@ -774,8 +973,7 @@
 void
 aoecmd_work(struct aoedev *d)
 {
-	if (d->htgt && !sthtith(d))
-		return;
+	rexmit_deferred(d);
 	while (aoecmd_ata_rw(d))
 		;
 }
@@ -809,6 +1007,17 @@
 }
 
 static void
+ata_ident_fixstring(u16 *id, int ns)
+{
+	u16 s;
+
+	while (ns-- > 0) {
+		s = *id;
+		*id++ = s >> 8 | s << 8;
+	}
+}
+
+static void
 ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 {
 	u64 ssize;
@@ -843,6 +1052,11 @@
 		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
 	}
 
+	ata_ident_fixstring((u16 *) &id[10<<1], 10);	/* serial */
+	ata_ident_fixstring((u16 *) &id[23<<1], 4);	/* firmware */
+	ata_ident_fixstring((u16 *) &id[27<<1], 20);	/* model */
+	memcpy(d->ident, id, sizeof(d->ident));
+
 	if (d->ssize != ssize)
 		printk(KERN_INFO
 			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
@@ -862,26 +1076,28 @@
 }
 
 static void
-calc_rttavg(struct aoedev *d, int rtt)
+calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
 {
 	register long n;
 
 	n = rtt;
-	if (n < 0) {
-		n = -rtt;
-		if (n < MINTIMER)
-			n = MINTIMER;
-		else if (n > MAXTIMER)
-			n = MAXTIMER;
-		d->mintimer += (n - d->mintimer) >> 1;
-	} else if (n < d->mintimer)
-		n = d->mintimer;
-	else if (n > MAXTIMER)
-		n = MAXTIMER;
 
-	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
-	n -= d->rttavg;
-	d->rttavg += n >> 2;
+	/* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
+	n -= d->rttavg >> RTTSCALE;
+	d->rttavg += n;
+	if (n < 0)
+		n = -n;
+	n -= d->rttdev >> RTTDSCALE;
+	d->rttdev += n;
+
+	if (!t || t->maxout >= t->nframes)
+		return;
+	if (t->maxout < t->ssthresh)
+		t->maxout += 1;
+	else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
+		t->maxout += 1;
+		t->next_cwnd = t->maxout;
+	}
 }
 
 static struct aoetgt *
@@ -890,7 +1106,7 @@
 	struct aoetgt **t, **e;
 
 	t = d->targets;
-	e = t + NTARGETS;
+	e = t + d->ntargets;
 	for (; t < e && *t; t++)
 		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
 			return *t;
@@ -966,19 +1182,22 @@
 	struct aoeif *ifp;
 	struct aoedev *d;
 	long n;
+	int untainted;
 
 	if (f == NULL)
 		return;
 
 	t = f->t;
 	d = t->d;
+	skb = f->r_skb;
+	buf = f->buf;
+	if (f->flags & FFL_PROBE)
+		goto out;
+	if (!skb)		/* just fail the buf. */
+		goto noskb;
 
 	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
 	ahout = (struct aoe_atahdr *) (hout+1);
-	buf = f->buf;
-	skb = f->r_skb;
-	if (skb == NULL)
-		goto noskb;	/* just fail the buf. */
 
 	hin = (struct aoe_hdr *) skb->data;
 	skb_pull(skb, sizeof(*hin));
@@ -988,9 +1207,9 @@
 		pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
 			ahout->cmdstat, ahin->cmdstat,
 			d->aoemajor, d->aoeminor);
-noskb:	if (buf)
+noskb:		if (buf)
 			clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
-		goto badrsp;
+		goto out;
 	}
 
 	n = ahout->scnt << 9;
@@ -998,8 +1217,10 @@
 	case ATA_CMD_PIO_READ:
 	case ATA_CMD_PIO_READ_EXT:
 		if (skb->len < n) {
-			pr_err("aoe: runt data size in read.  skb->len=%d need=%ld\n",
-				skb->len, n);
+			pr_err("%s e%ld.%d.  skb->len=%d need=%ld\n",
+				"aoe: runt data size in read from",
+				(long) d->aoemajor, d->aoeminor,
+			       skb->len, n);
 			clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
 			break;
 		}
@@ -1010,13 +1231,13 @@
 		ifp = getif(t, skb->dev);
 		if (ifp)
 			ifp->lost = 0;
-		if (d->htgt == t) /* I'll help myself, thank you. */
-			d->htgt = NULL;
 		spin_unlock_irq(&d->lock);
 		break;
 	case ATA_CMD_ID_ATA:
 		if (skb->len < 512) {
-			pr_info("aoe: runt data size in ataid.  skb->len=%d\n",
+			pr_info("%s e%ld.%d.  skb->len=%d need=512\n",
+				"aoe: runt data size in ataid from",
+				(long) d->aoemajor, d->aoeminor,
 				skb->len);
 			break;
 		}
@@ -1032,16 +1253,23 @@
 			be16_to_cpu(get_unaligned(&hin->major)),
 			hin->minor);
 	}
-badrsp:
+out:
 	spin_lock_irq(&d->lock);
+	if (t->taint > 0
+	&& --t->taint > 0
+	&& t->nout_probes == 0) {
+		count_targets(d, &untainted);
+		if (untainted > 0) {
+			probe(t);
+			t->nout_probes++;
+		}
+	}
 
 	aoe_freetframe(f);
 
 	if (buf && --buf->nframesout == 0 && buf->resid == 0)
 		aoe_end_buf(d, buf);
 
-	aoecmd_work(d);
-
 	spin_unlock_irq(&d->lock);
 	aoedev_put(d);
 	dev_kfree_skb(skb);
@@ -1141,7 +1369,6 @@
 	struct aoedev *d;
 	struct aoe_hdr *h;
 	struct frame *f;
-	struct aoetgt *t;
 	u32 n;
 	ulong flags;
 	char ebuf[128];
@@ -1162,23 +1389,32 @@
 
 	n = be32_to_cpu(get_unaligned(&h->tag));
 	f = getframe(d, n);
-	if (f == NULL) {
-		calc_rttavg(d, -tsince(n));
-		spin_unlock_irqrestore(&d->lock, flags);
-		aoedev_put(d);
-		snprintf(ebuf, sizeof ebuf,
-			"%15s e%d.%d    tag=%08x@%08lx\n",
-			"unexpected rsp",
-			get_unaligned_be16(&h->major),
-			h->minor,
-			get_unaligned_be32(&h->tag),
-			jiffies);
-		aoechr_error(ebuf);
-		return skb;
+	if (f) {
+		calc_rttavg(d, f->t, tsince_hr(f));
+		f->t->nout--;
+		if (f->flags & FFL_PROBE)
+			f->t->nout_probes--;
+	} else {
+		f = getframe_deferred(d, n);
+		if (f) {
+			calc_rttavg(d, NULL, tsince_hr(f));
+		} else {
+			calc_rttavg(d, NULL, tsince(n));
+			spin_unlock_irqrestore(&d->lock, flags);
+			aoedev_put(d);
+			snprintf(ebuf, sizeof(ebuf),
+				 "%15s e%d.%d    tag=%08x@%08lx s=%pm d=%pm\n",
+				 "unexpected rsp",
+				 get_unaligned_be16(&h->major),
+				 h->minor,
+				 get_unaligned_be32(&h->tag),
+				 jiffies,
+				 h->src,
+				 h->dst);
+			aoechr_error(ebuf);
+			return skb;
+		}
 	}
-	t = f->t;
-	calc_rttavg(d, tsince(f->tag));
-	t->nout--;
 	aoecmd_work(d);
 
 	spin_unlock_irqrestore(&d->lock, flags);
@@ -1201,7 +1437,7 @@
 	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
 	aoenet_xmit(&queue);
 }
- 
+
 struct sk_buff *
 aoecmd_ata_id(struct aoedev *d)
 {
@@ -1227,6 +1463,7 @@
 	fhash(f);
 	t->nout++;
 	f->waited = 0;
+	f->waited_total = 0;
 
 	/* set up ata header */
 	ah->scnt = 1;
@@ -1235,41 +1472,69 @@
 
 	skb->dev = t->ifp->nd;
 
-	d->rttavg = MAXTIMER;
+	d->rttavg = RTTAVG_INIT;
+	d->rttdev = RTTDEV_INIT;
 	d->timer.function = rexmit_timer;
 
-	return skb_clone(skb, GFP_ATOMIC);
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (skb) {
+		do_gettimeofday(&f->sent);
+		f->sent_jiffs = (u32) jiffies;
+	}
+
+	return skb;
 }
- 
+
+static struct aoetgt **
+grow_targets(struct aoedev *d)
+{
+	ulong oldn, newn;
+	struct aoetgt **tt;
+
+	oldn = d->ntargets;
+	newn = oldn * 2;
+	tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
+	if (!tt)
+		return NULL;
+	memmove(tt, d->targets, sizeof(*d->targets) * oldn);
+	d->tgt = tt + (d->tgt - d->targets);
+	kfree(d->targets);
+	d->targets = tt;
+	d->ntargets = newn;
+
+	return &d->targets[oldn];
+}
+
 static struct aoetgt *
 addtgt(struct aoedev *d, char *addr, ulong nframes)
 {
 	struct aoetgt *t, **tt, **te;
 
 	tt = d->targets;
-	te = tt + NTARGETS;
+	te = tt + d->ntargets;
 	for (; tt < te && *tt; tt++)
 		;
 
 	if (tt == te) {
-		printk(KERN_INFO
-			"aoe: device addtgt failure; too many targets\n");
-		return NULL;
+		tt = grow_targets(d);
+		if (!tt)
+			goto nomem;
 	}
 	t = kzalloc(sizeof(*t), GFP_ATOMIC);
-	if (!t) {
-		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
-		return NULL;
-	}
-
-	d->ntargets++;
+	if (!t)
+		goto nomem;
 	t->nframes = nframes;
 	t->d = d;
 	memcpy(t->addr, addr, sizeof t->addr);
 	t->ifp = t->ifs;
-	t->maxout = t->nframes;
+	aoecmd_wreset(t);
+	t->maxout = t->nframes / 2;
 	INIT_LIST_HEAD(&t->ffree);
 	return *tt = t;
+
+ nomem:
+	pr_info("aoe: cannot allocate memory to add target\n");
+	return NULL;
 }
 
 static void
@@ -1279,7 +1544,7 @@
 	int bcnt = 0;
 
 	t = d->targets;
-	e = t + NTARGETS;
+	e = t + d->ntargets;
 	for (; t < e && *t; t++)
 		if (bcnt == 0 || bcnt > (*t)->minbcnt)
 			bcnt = (*t)->minbcnt;
@@ -1373,7 +1638,11 @@
 	spin_lock_irqsave(&d->lock, flags);
 
 	t = gettgt(d, h->src);
-	if (!t) {
+	if (t) {
+		t->nframes = n;
+		if (n < t->maxout)
+			aoecmd_wreset(t);
+	} else {
 		t = addtgt(d, h->src, n);
 		if (!t)
 			goto bail;
@@ -1402,17 +1671,26 @@
 }
 
 void
+aoecmd_wreset(struct aoetgt *t)
+{
+	t->maxout = 1;
+	t->ssthresh = t->nframes / 2;
+	t->next_cwnd = t->nframes;
+}
+
+void
 aoecmd_cleanslate(struct aoedev *d)
 {
 	struct aoetgt **t, **te;
 
-	d->mintimer = MINTIMER;
+	d->rttavg = RTTAVG_INIT;
+	d->rttdev = RTTDEV_INIT;
 	d->maxbcnt = 0;
 
 	t = d->targets;
-	te = t + NTARGETS;
+	te = t + d->ntargets;
 	for (; t < te && *t; t++)
-		(*t)->maxout = (*t)->nframes;
+		aoecmd_wreset(*t);
 }
 
 void
@@ -1460,6 +1738,14 @@
 int __init
 aoecmd_init(void)
 {
+	void *p;
+
+	/* get_zeroed_page returns page with ref count 1 */
+	p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	if (!p)
+		return -ENOMEM;
+	empty_page = virt_to_page(p);
+
 	INIT_LIST_HEAD(&iocq.head);
 	spin_lock_init(&iocq.lock);
 	init_waitqueue_head(&ktiowq);
@@ -1475,4 +1761,7 @@
 {
 	aoe_ktstop(&kts);
 	aoe_flush_iocq();
+
+	free_page((unsigned long) page_address(empty_page));
+	empty_page = NULL;
 }

diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 90e5b53..98f2965 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c

@@ -15,7 +15,6 @@
 #include "aoe.h"
 
 static void dummy_timer(ulong);
-static void aoedev_freedev(struct aoedev *);
 static void freetgt(struct aoedev *d, struct aoetgt *t);
 static void skbpoolfree(struct aoedev *d);
 
@@ -69,25 +68,34 @@
 		NPERSHELF = 16,
 	};
 
+	if (aoemin >= NPERSHELF) {
+		pr_err("aoe: %s %d slots per shelf\n",
+			"static minor device numbers support only",
+			NPERSHELF);
+		error = -1;
+		goto out;
+	}
+
 	n = aoemaj * NPERSHELF + aoemin;
-	if (aoemin >= NPERSHELF || n >= N_DEVS) {
+	if (n >= N_DEVS) {
 		pr_err("aoe: %s with e%ld.%d\n",
 			"cannot use static minor device numbers",
 			aoemaj, aoemin);
 		error = -1;
-	} else {
-		spin_lock_irqsave(&used_minors_lock, flags);
-		if (test_bit(n, used_minors)) {
-			pr_err("aoe: %s %lu\n",
-				"existing device already has static minor number",
-				n);
-			error = -1;
-		} else
-			set_bit(n, used_minors);
-		spin_unlock_irqrestore(&used_minors_lock, flags);
+		goto out;
 	}
 
-	*sysminor = n;
+	spin_lock_irqsave(&used_minors_lock, flags);
+	if (test_bit(n, used_minors)) {
+		pr_err("aoe: %s %lu\n",
+			"existing device already has static minor number",
+			n);
+		error = -1;
+	} else
+		set_bit(n, used_minors);
+	spin_unlock_irqrestore(&used_minors_lock, flags);
+	*sysminor = n * AOE_PARTITIONS;
+out:
 	return error;
 }
 
@@ -170,41 +178,50 @@
 		aoe_end_request(d, rq, 0);
 }
 
+static void
+downdev_frame(struct list_head *pos)
+{
+	struct frame *f;
+
+	f = list_entry(pos, struct frame, head);
+	list_del(pos);
+	if (f->buf) {
+		f->buf->nframesout--;
+		aoe_failbuf(f->t->d, f->buf);
+	}
+	aoe_freetframe(f);
+}
+
 void
 aoedev_downdev(struct aoedev *d)
 {
 	struct aoetgt *t, **tt, **te;
-	struct frame *f;
 	struct list_head *head, *pos, *nx;
 	struct request *rq;
 	int i;
 
 	d->flags &= ~DEVFL_UP;
 
-	/* clean out active buffers */
+	/* clean out active and to-be-retransmitted buffers */
 	for (i = 0; i < NFACTIVE; i++) {
 		head = &d->factive[i];
-		list_for_each_safe(pos, nx, head) {
-			f = list_entry(pos, struct frame, head);
-			list_del(pos);
-			if (f->buf) {
-				f->buf->nframesout--;
-				aoe_failbuf(d, f->buf);
-			}
-			aoe_freetframe(f);
-		}
+		list_for_each_safe(pos, nx, head)
+			downdev_frame(pos);
 	}
+	head = &d->rexmitq;
+	list_for_each_safe(pos, nx, head)
+		downdev_frame(pos);
+
 	/* reset window dressings */
 	tt = d->targets;
-	te = tt + NTARGETS;
+	te = tt + d->ntargets;
 	for (; tt < te && (t = *tt); tt++) {
-		t->maxout = t->nframes;
+		aoecmd_wreset(t);
 		t->nout = 0;
 	}
 
 	/* clean out the in-process request (if any) */
 	aoe_failip(d);
-	d->htgt = NULL;
 
 	/* fast fail all pending I/O */
 	if (d->blkq) {
@@ -218,12 +235,48 @@
 		set_capacity(d->gd, 0);
 }
 
+/* return whether the user asked for this particular
+ * device to be flushed
+ */
+static int
+user_req(char *s, size_t slen, struct aoedev *d)
+{
+	char *p;
+	size_t lim;
+
+	if (!d->gd)
+		return 0;
+	p = strrchr(d->gd->disk_name, '/');
+	if (!p)
+		p = d->gd->disk_name;
+	else
+		p += 1;
+	lim = sizeof(d->gd->disk_name);
+	lim -= p - d->gd->disk_name;
+	if (slen < lim)
+		lim = slen;
+
+	return !strncmp(s, p, lim);
+}
+
 static void
-aoedev_freedev(struct aoedev *d)
+freedev(struct aoedev *d)
 {
 	struct aoetgt **t, **e;
+	int freeing = 0;
+	unsigned long flags;
 
-	cancel_work_sync(&d->work);
+	spin_lock_irqsave(&d->lock, flags);
+	if (d->flags & DEVFL_TKILL
+	&& !(d->flags & DEVFL_FREEING)) {
+		d->flags |= DEVFL_FREEING;
+		freeing = 1;
+	}
+	spin_unlock_irqrestore(&d->lock, flags);
+	if (!freeing)
+		return;
+
+	del_timer_sync(&d->timer);
 	if (d->gd) {
 		aoedisk_rm_sysfs(d);
 		del_gendisk(d->gd);
@@ -231,61 +284,113 @@
 		blk_cleanup_queue(d->blkq);
 	}
 	t = d->targets;
-	e = t + NTARGETS;
+	e = t + d->ntargets;
 	for (; t < e && *t; t++)
 		freetgt(d, *t);
 	if (d->bufpool)
 		mempool_destroy(d->bufpool);
 	skbpoolfree(d);
 	minor_free(d->sysminor);
-	kfree(d);
+
+	spin_lock_irqsave(&d->lock, flags);
+	d->flags |= DEVFL_FREED;
+	spin_unlock_irqrestore(&d->lock, flags);
 }
 
-int
-aoedev_flush(const char __user *str, size_t cnt)
+enum flush_parms {
+	NOT_EXITING = 0,
+	EXITING = 1,
+};
+
+static int
+flush(const char __user *str, size_t cnt, int exiting)
 {
 	ulong flags;
 	struct aoedev *d, **dd;
-	struct aoedev *rmd = NULL;
 	char buf[16];
 	int all = 0;
+	int specified = 0;	/* flush a specific device */
+	unsigned int skipflags;
 
-	if (cnt >= 3) {
+	skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
+
+	if (!exiting && cnt >= 3) {
 		if (cnt > sizeof buf)
 			cnt = sizeof buf;
 		if (copy_from_user(buf, str, cnt))
 			return -EFAULT;
 		all = !strncmp(buf, "all", 3);
+		if (!all)
+			specified = 1;
 	}
 
+	flush_scheduled_work();
+	/* pass one: without sleeping, do aoedev_downdev */
 	spin_lock_irqsave(&devlist_lock, flags);
-	dd = &devlist;
-	while ((d = *dd)) {
+	for (d = devlist; d; d = d->next) {
 		spin_lock(&d->lock);
-		if ((!all && (d->flags & DEVFL_UP))
-		|| (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+		if (exiting) {
+			/* unconditionally take each device down */
+		} else if (specified) {
+			if (!user_req(buf, cnt, d))
+				goto cont;
+		} else if ((!all && (d->flags & DEVFL_UP))
+		|| d->flags & skipflags
 		|| d->nopen
-		|| d->ref) {
-			spin_unlock(&d->lock);
-			dd = &d->next;
-			continue;
-		}
-		*dd = d->next;
+		|| d->ref)
+			goto cont;
+
 		aoedev_downdev(d);
 		d->flags |= DEVFL_TKILL;
+cont:
 		spin_unlock(&d->lock);
-		d->next = rmd;
-		rmd = d;
 	}
 	spin_unlock_irqrestore(&devlist_lock, flags);
-	while ((d = rmd)) {
-		rmd = d->next;
-		del_timer_sync(&d->timer);
-		aoedev_freedev(d);	/* must be able to sleep */
+
+	/* pass two: call freedev, which might sleep,
+	 * for aoedevs marked with DEVFL_TKILL
+	 */
+restart:
+	spin_lock_irqsave(&devlist_lock, flags);
+	for (d = devlist; d; d = d->next) {
+		spin_lock(&d->lock);
+		if (d->flags & DEVFL_TKILL
+		&& !(d->flags & DEVFL_FREEING)) {
+			spin_unlock(&d->lock);
+			spin_unlock_irqrestore(&devlist_lock, flags);
+			freedev(d);
+			goto restart;
+		}
+		spin_unlock(&d->lock);
 	}
+
+	/* pass three: remove aoedevs marked with DEVFL_FREED */
+	for (dd = &devlist, d = *dd; d; d = *dd) {
+		struct aoedev *doomed = NULL;
+
+		spin_lock(&d->lock);
+		if (d->flags & DEVFL_FREED) {
+			*dd = d->next;
+			doomed = d;
+		} else {
+			dd = &d->next;
+		}
+		spin_unlock(&d->lock);
+		if (doomed)
+			kfree(doomed->targets);
+		kfree(doomed);
+	}
+	spin_unlock_irqrestore(&devlist_lock, flags);
+
 	return 0;
 }
 
+int
+aoedev_flush(const char __user *str, size_t cnt)
+{
+	return flush(str, cnt, NOT_EXITING);
+}
+
 /* This has been confirmed to occur once with Tms=3*1000 due to the
  * driver changing link and not processing its transmit ring.  The
  * problem is hard enough to solve by returning an error that I'm
@@ -332,13 +437,20 @@
 	struct aoedev *d;
 	int i;
 	ulong flags;
-	ulong sysminor;
+	ulong sysminor = 0;
 
 	spin_lock_irqsave(&devlist_lock, flags);
 
 	for (d=devlist; d; d=d->next)
 		if (d->aoemajor == maj && d->aoeminor == min) {
+			spin_lock(&d->lock);
+			if (d->flags & DEVFL_TKILL) {
+				spin_unlock(&d->lock);
+				d = NULL;
+				goto out;
+			}
 			d->ref++;
+			spin_unlock(&d->lock);
 			break;
 		}
 	if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
@@ -346,6 +458,13 @@
 	d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 	if (!d)
 		goto out;
+	d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
+	if (!d->targets) {
+		kfree(d);
+		d = NULL;
+		goto out;
+	}
+	d->ntargets = NTARGETS;
 	INIT_WORK(&d->work, aoecmd_sleepwork);
 	spin_lock_init(&d->lock);
 	skb_queue_head_init(&d->skbpool);
@@ -359,10 +478,12 @@
 	d->ref = 1;
 	for (i = 0; i < NFACTIVE; i++)
 		INIT_LIST_HEAD(&d->factive[i]);
+	INIT_LIST_HEAD(&d->rexmitq);
 	d->sysminor = sysminor;
 	d->aoemajor = maj;
 	d->aoeminor = min;
-	d->mintimer = MINTIMER;
+	d->rttavg = RTTAVG_INIT;
+	d->rttdev = RTTDEV_INIT;
 	d->next = devlist;
 	devlist = d;
  out:
@@ -396,21 +517,9 @@
 void
 aoedev_exit(void)
 {
-	struct aoedev *d;
-	ulong flags;
-
+	flush_scheduled_work();
 	aoe_flush_iocq();
-	while ((d = devlist)) {
-		devlist = d->next;
-
-		spin_lock_irqsave(&d->lock, flags);
-		aoedev_downdev(d);
-		d->flags |= DEVFL_TKILL;
-		spin_unlock_irqrestore(&d->lock, flags);
-
-		del_timer_sync(&d->timer);
-		aoedev_freedev(d);
-	}
+	flush(NULL, 0, EXITING);
 }
 
 int __init

diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 04793c2..4b987c2 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c

@@ -105,7 +105,7 @@
 	aoechr_exit();
  chr_fail:
 	aoedev_exit();
-	
+
 	printk(KERN_INFO "aoe: initialisation failure.\n");
 	return ret;
 }

diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index 162c647..71d3ea8 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c

@@ -31,7 +31,7 @@
 
 static char aoe_iflist[IFLISTSZ];
 module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600);
-MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\"");
+MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=dev1[,dev2...]");
 
 static wait_queue_head_t txwq;
 static struct ktstate kts;
@@ -52,13 +52,18 @@
 
 /* enters with txlock held */
 static int
-tx(void)
+tx(void) __must_hold(&txlock)
 {
 	struct sk_buff *skb;
+	struct net_device *ifp;
 
 	while ((skb = skb_dequeue(&skbtxq))) {
 		spin_unlock_irq(&txlock);
-		dev_queue_xmit(skb);
+		ifp = skb->dev;
+		if (dev_queue_xmit(skb) == NET_XMIT_DROP && net_ratelimit())
+			pr_warn("aoe: packet could not be sent on %s.  %s\n",
+				ifp ? ifp->name : "netif",
+				"consider increasing tx_queue_len");
 		spin_lock_irq(&txlock);
 	}
 	return 0;
@@ -119,8 +124,8 @@
 	}
 }
 
-/* 
- * (1) len doesn't include the header by default.  I want this. 
+/*
+ * (1) len doesn't include the header by default.  I want this.
  */
 static int
 aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ca83f96..6526157 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c

@@ -41,8 +41,9 @@
 #include <linux/spinlock.h>
 #include <linux/compat.h>
 #include <linux/mutex.h>
+#include <linux/bitmap.h>
+#include <linux/io.h>
 #include <asm/uaccess.h>
-#include <asm/io.h>
 
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
@@ -978,8 +979,7 @@
 		i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
 		if (i == h->nr_cmds)
 			return NULL;
-	} while (test_and_set_bit(i & (BITS_PER_LONG - 1),
-		  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
+	} while (test_and_set_bit(i, h->cmd_pool_bits) != 0);
 	c = h->cmd_pool + i;
 	memset(c, 0, sizeof(CommandList_struct));
 	cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct);
@@ -1046,8 +1046,7 @@
 	int i;
 
 	i = c - h->cmd_pool;
-	clear_bit(i & (BITS_PER_LONG - 1),
-		  h->cmd_pool_bits + (i / BITS_PER_LONG));
+	clear_bit(i, h->cmd_pool_bits);
 	h->nr_frees++;
 }
 
@@ -4268,10 +4267,7 @@
 
 static inline bool CISS_signature_present(ctlr_info_t *h)
 {
-	if ((readb(&h->cfgtable->Signature[0]) != 'C') ||
-	    (readb(&h->cfgtable->Signature[1]) != 'I') ||
-	    (readb(&h->cfgtable->Signature[2]) != 'S') ||
-	    (readb(&h->cfgtable->Signature[3]) != 'S')) {
+	if (!check_signature(h->cfgtable->Signature, "CISS", 4)) {
 		dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
 		return false;
 	}
@@ -4812,8 +4808,7 @@
 
 static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h)
 {
-	h->cmd_pool_bits = kmalloc(
-		DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
+	h->cmd_pool_bits = kmalloc(BITS_TO_LONGS(h->nr_cmds) *
 		sizeof(unsigned long), GFP_KERNEL);
 	h->cmd_pool = pci_alloc_consistent(h->pdev,
 		h->nr_cmds * sizeof(CommandList_struct),
@@ -5068,9 +5063,7 @@
 	pci_set_drvdata(pdev, h);
 	/* command and error info recs zeroed out before
 	   they are used */
-	memset(h->cmd_pool_bits, 0,
-	       DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
-			* sizeof(unsigned long));
+	bitmap_zero(h->cmd_pool_bits, h->nr_cmds);
 
 	h->num_luns = 0;
 	h->highest_lun = -1;

diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
index df09837..7845bd6 100644
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig

@@ -2,13 +2,14 @@
 # DRBD device driver configuration
 #
 
-comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
-	depends on PROC_FS='n' || INET='n' || CONNECTOR='n'
+comment "DRBD disabled because PROC_FS or INET not selected"
+	depends on PROC_FS='n' || INET='n'
 
 config BLK_DEV_DRBD
 	tristate "DRBD Distributed Replicated Block Device support"
-	depends on PROC_FS && INET && CONNECTOR
+	depends on PROC_FS && INET
 	select LRU_CACHE
+	select LIBCRC32C
 	default n
 	help
 
@@ -58,7 +59,8 @@
 	  32	data read
 	  64	read ahead
 	  128	kmalloc of bitmap
-	  256	allocation of EE (epoch_entries)
+	  256	allocation of peer_requests
+	  512	insert data corruption on receiving side
 
 	  fault_devs: bitmask of minor numbers
 	  fault_rate: frequency in percent

diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 0d3f337..8b45033 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile

@@ -1,5 +1,7 @@
 drbd-y := drbd_bitmap.o drbd_proc.o
 drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
 drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
+drbd-y += drbd_interval.o drbd_state.o
+drbd-y += drbd_nla.o
 
 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 3fbef01..92510f8 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c

@@ -24,21 +24,73 @@
  */
 
 #include <linux/slab.h>
+#include <linux/crc32c.h>
 #include <linux/drbd.h>
+#include <linux/drbd_limits.h>
+#include <linux/dynamic_debug.h>
 #include "drbd_int.h"
 #include "drbd_wrappers.h"
 
-/* We maintain a trivial checksum in our on disk activity log.
- * With that we can ensure correct operation even when the storage
- * device might do a partial (last) sector write while losing power.
- */
-struct __packed al_transaction {
-	u32       magic;
-	u32       tr_number;
-	struct __packed {
-		u32 pos;
-		u32 extent; } updates[1 + AL_EXTENTS_PT];
-	u32       xor_sum;
+
+enum al_transaction_types {
+	AL_TR_UPDATE = 0,
+	AL_TR_INITIALIZED = 0xffff
+};
+/* all fields on disc in big endian */
+struct __packed al_transaction_on_disk {
+	/* don't we all like magic */
+	__be32	magic;
+
+	/* to identify the most recent transaction block
+	 * in the on disk ring buffer */
+	__be32	tr_number;
+
+	/* checksum on the full 4k block, with this field set to 0. */
+	__be32	crc32c;
+
+	/* type of transaction, special transaction types like:
+	 * purge-all, set-all-idle, set-all-active, ... to-be-defined
+	 * see also enum al_transaction_types */
+	__be16	transaction_type;
+
+	/* we currently allow only a few thousand extents,
+	 * so 16bit will be enough for the slot number. */
+
+	/* how many updates in this transaction */
+	__be16	n_updates;
+
+	/* maximum slot number, "al-extents" in drbd.conf speak.
+	 * Having this in each transaction should make reconfiguration
+	 * of that parameter easier. */
+	__be16	context_size;
+
+	/* slot number the context starts with */
+	__be16	context_start_slot_nr;
+
+	/* Some reserved bytes.  Expected usage is a 64bit counter of
+	 * sectors-written since device creation, and other data generation tag
+	 * supporting usage */
+	__be32	__reserved[4];
+
+	/* --- 36 byte used --- */
+
+	/* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
+	 * in one transaction, then use the remaining byte in the 4k block for
+	 * context information.  "Flexible" number of updates per transaction
+	 * does not help, as we have to account for the case when all update
+	 * slots are used anyways, so it would only complicate code without
+	 * additional benefit.
+	 */
+	__be16	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* but the extent number is 32bit, which at an extent size of 4 MiB
+	 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
+	__be32	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* --- 420 bytes used (36 + 64*6) --- */
+
+	/* 4096 - 420 = 3676 = 919 * 4 */
+	__be32	context[AL_CONTEXT_PER_TRANSACTION];
 };
 
 struct update_odbm_work {
@@ -48,22 +100,11 @@
 
 struct update_al_work {
 	struct drbd_work w;
-	struct lc_element *al_ext;
 	struct completion event;
-	unsigned int enr;
-	/* if old_enr != LC_FREE, write corresponding bitmap sector, too */
-	unsigned int old_enr;
+	int err;
 };
 
-struct drbd_atodb_wait {
-	atomic_t           count;
-	struct completion  io_done;
-	struct drbd_conf   *mdev;
-	int                error;
-};
-
-
-int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
+static int al_write_transaction(struct drbd_conf *mdev);
 
 void *drbd_md_get_buffer(struct drbd_conf *mdev)
 {
@@ -82,22 +123,24 @@
 		wake_up(&mdev->misc_wait);
 }
 
-static bool md_io_allowed(struct drbd_conf *mdev)
-{
-	enum drbd_disk_state ds = mdev->state.disk;
-	return ds >= D_NEGOTIATING || ds == D_ATTACHING;
-}
-
-void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 				     unsigned int *done)
 {
-	long dt = bdev->dc.disk_timeout * HZ / 10;
+	long dt;
+
+	rcu_read_lock();
+	dt = rcu_dereference(bdev->disk_conf)->disk_timeout;
+	rcu_read_unlock();
+	dt = dt * HZ / 10;
 	if (dt == 0)
 		dt = MAX_SCHEDULE_TIMEOUT;
 
-	dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt);
-	if (dt == 0)
+	dt = wait_event_timeout(mdev->misc_wait,
+			*done || test_bit(FORCE_DETACH, &mdev->flags), dt);
+	if (dt == 0) {
 		dev_err(DEV, "meta-data IO operation timed out\n");
+		drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH);
+	}
 }
 
 static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
@@ -106,7 +149,7 @@
 				 int rw, int size)
 {
 	struct bio *bio;
-	int ok;
+	int err;
 
 	mdev->md_io.done = 0;
 	mdev->md_io.error = -ENODEV;
@@ -118,8 +161,8 @@
 	bio = bio_alloc_drbd(GFP_NOIO);
 	bio->bi_bdev = bdev->md_bdev;
 	bio->bi_sector = sector;
-	ok = (bio_add_page(bio, page, size, 0) == size);
-	if (!ok)
+	err = -EIO;
+	if (bio_add_page(bio, page, size, 0) != size)
 		goto out;
 	bio->bi_private = &mdev->md_io;
 	bio->bi_end_io = drbd_md_io_complete;
@@ -127,7 +170,7 @@
 
 	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* Corresponding put_ldev in drbd_md_io_complete() */
 		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
-		ok = 0;
+		err = -ENODEV;
 		goto out;
 	}
 
@@ -137,86 +180,47 @@
 		bio_endio(bio, -EIO);
 	else
 		submit_bio(rw, bio);
-	wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done);
-	ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0;
+	wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done);
+	if (bio_flagged(bio, BIO_UPTODATE))
+		err = mdev->md_io.error;
 
  out:
 	bio_put(bio);
-	return ok;
+	return err;
 }
 
 int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 			 sector_t sector, int rw)
 {
-	int logical_block_size, mask, ok;
-	int offset = 0;
+	int err;
 	struct page *iop = mdev->md_io_page;
 
 	D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
 
 	BUG_ON(!bdev->md_bdev);
 
-	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	/* in case logical_block_size != 512 [ s390 only? ] */
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
-		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
-		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
-		offset = sector & mask;
-		sector = sector & ~mask;
-		iop = mdev->md_io_tmpp;
-
-		if (rw & WRITE) {
-			/* these are GFP_KERNEL pages, pre-allocated
-			 * on device initialization */
-			void *p = page_address(mdev->md_io_page);
-			void *hp = page_address(mdev->md_io_tmpp);
-
-			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
-					READ, logical_block_size);
-
-			if (unlikely(!ok)) {
-				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
-				    "READ [logical_block_size!=512]) failed!\n",
-				    (unsigned long long)sector);
-				return 0;
-			}
-
-			memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
-		}
-	}
+	dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
+	     current->comm, current->pid, __func__,
+	     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
 	if (sector < drbd_md_first_sector(bdev) ||
-	    sector > drbd_md_last_sector(bdev))
+	    sector + 7 > drbd_md_last_sector(bdev))
 		dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
 		     current->comm, current->pid, __func__,
 		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
-	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
-	if (unlikely(!ok)) {
-		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
-		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
-		return 0;
+	err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
+	if (err) {
+		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
+		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
 	}
-
-	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
-		void *p = page_address(mdev->md_io_page);
-		void *hp = page_address(mdev->md_io_tmpp);
-
-		memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
-	}
-
-	return ok;
+	return err;
 }
 
 static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
 {
 	struct lc_element *al_ext;
 	struct lc_element *tmp;
-	unsigned long     al_flags = 0;
 	int wake;
 
 	spin_lock_irq(&mdev->al_lock);
@@ -231,76 +235,92 @@
 			return NULL;
 		}
 	}
-	al_ext   = lc_get(mdev->act_log, enr);
-	al_flags = mdev->act_log->flags;
+	al_ext = lc_get(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
-
-	/*
-	if (!al_ext) {
-		if (al_flags & LC_STARVING)
-			dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
-		if (al_flags & LC_DIRTY)
-			dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
-	}
-	*/
-
 	return al_ext;
 }
 
-void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
+void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
 {
-	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
-	struct lc_element *al_ext;
-	struct update_al_work al_work;
+	/* for bios crossing activity log extent boundaries,
+	 * we may need to activate two extents in one go */
+	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
+	unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
+	unsigned enr;
+	bool locked = false;
 
+
+	D_ASSERT(first <= last);
 	D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
 
-	wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)));
+	for (enr = first; enr <= last; enr++)
+		wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL);
 
-	if (al_ext->lc_number != enr) {
+	/* Serialize multiple transactions.
+	 * This uses test_and_set_bit, memory barrier is implicit.
+	 */
+	wait_event(mdev->al_wait,
+			mdev->act_log->pending_changes == 0 ||
+			(locked = lc_try_lock_for_transaction(mdev->act_log)));
+
+	if (locked) {
 		/* drbd_al_write_transaction(mdev,al_ext,enr);
 		 * recurses into generic_make_request(), which
 		 * disallows recursion, bios being serialized on the
 		 * current->bio_tail list now.
 		 * we have to delegate updates to the activity log
 		 * to the worker thread. */
-		init_completion(&al_work.event);
-		al_work.al_ext = al_ext;
-		al_work.enr = enr;
-		al_work.old_enr = al_ext->lc_number;
-		al_work.w.cb = w_al_write_transaction;
-		drbd_queue_work_front(&mdev->data.work, &al_work.w);
-		wait_for_completion(&al_work.event);
 
-		mdev->al_writ_cnt++;
+		/* Double check: it may have been committed by someone else,
+		 * while we have been waiting for the lock. */
+		if (mdev->act_log->pending_changes) {
+			bool write_al_updates;
 
-		spin_lock_irq(&mdev->al_lock);
-		lc_changed(mdev->act_log, al_ext);
-		spin_unlock_irq(&mdev->al_lock);
+			rcu_read_lock();
+			write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates;
+			rcu_read_unlock();
+
+			if (write_al_updates) {
+				al_write_transaction(mdev);
+				mdev->al_writ_cnt++;
+			}
+
+			spin_lock_irq(&mdev->al_lock);
+			/* FIXME
+			if (err)
+				we need an "lc_cancel" here;
+			*/
+			lc_committed(mdev->act_log);
+			spin_unlock_irq(&mdev->al_lock);
+		}
+		lc_unlock(mdev->act_log);
 		wake_up(&mdev->al_wait);
 	}
 }
 
-void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
+void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i)
 {
-	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+	/* for bios crossing activity log extent boundaries,
+	 * we may need to activate two extents in one go */
+	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
+	unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
+	unsigned enr;
 	struct lc_element *extent;
 	unsigned long flags;
 
+	D_ASSERT(first <= last);
 	spin_lock_irqsave(&mdev->al_lock, flags);
 
-	extent = lc_find(mdev->act_log, enr);
-
-	if (!extent) {
-		spin_unlock_irqrestore(&mdev->al_lock, flags);
-		dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
-		return;
+	for (enr = first; enr <= last; enr++) {
+		extent = lc_find(mdev->act_log, enr);
+		if (!extent) {
+			dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
+			continue;
+		}
+		lc_put(mdev->act_log, extent);
 	}
-
-	if (lc_put(mdev->act_log, extent) == 0)
-		wake_up(&mdev->al_wait);
-
 	spin_unlock_irqrestore(&mdev->al_lock, flags);
+	wake_up(&mdev->al_wait);
 }
 
 #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
@@ -326,296 +346,148 @@
 	return rs_enr >>
 		/* bit to page */
 		((PAGE_SHIFT + 3) -
-		/* al extent number to bit */
+		/* resync extent number to bit */
 		 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
 }
 
-int
-w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int
+_al_write_transaction(struct drbd_conf *mdev)
 {
-	struct update_al_work *aw = container_of(w, struct update_al_work, w);
-	struct lc_element *updated = aw->al_ext;
-	const unsigned int new_enr = aw->enr;
-	const unsigned int evicted = aw->old_enr;
-	struct al_transaction *buffer;
+	struct al_transaction_on_disk *buffer;
+	struct lc_element *e;
 	sector_t sector;
-	int i, n, mx;
-	unsigned int extent_nr;
-	u32 xor_sum = 0;
+	int i, mx;
+	unsigned extent_nr;
+	unsigned crc = 0;
+	int err = 0;
 
 	if (!get_ldev(mdev)) {
-		dev_err(DEV,
-			"disk is %s, cannot start al transaction (-%d +%d)\n",
-			drbd_disk_str(mdev->state.disk), evicted, new_enr);
-		complete(&((struct update_al_work *)w)->event);
-		return 1;
+		dev_err(DEV, "disk is %s, cannot start al transaction\n",
+			drbd_disk_str(mdev->state.disk));
+		return -EIO;
 	}
-	/* do we have to do a bitmap write, first?
-	 * TODO reduce maximum latency:
-	 * submit both bios, then wait for both,
-	 * instead of doing two synchronous sector writes.
-	 * For now, we must not write the transaction,
-	 * if we cannot write out the bitmap of the evicted extent. */
-	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
-		drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));
 
 	/* The bitmap write may have failed, causing a state change. */
 	if (mdev->state.disk < D_INCONSISTENT) {
 		dev_err(DEV,
-			"disk is %s, cannot write al transaction (-%d +%d)\n",
-			drbd_disk_str(mdev->state.disk), evicted, new_enr);
-		complete(&((struct update_al_work *)w)->event);
+			"disk is %s, cannot write al transaction\n",
+			drbd_disk_str(mdev->state.disk));
 		put_ldev(mdev);
-		return 1;
+		return -EIO;
 	}
 
 	buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
 	if (!buffer) {
 		dev_err(DEV, "disk failed while waiting for md_io buffer\n");
-		complete(&((struct update_al_work *)w)->event);
 		put_ldev(mdev);
-		return 1;
+		return -ENODEV;
 	}
 
-	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
+	memset(buffer, 0, sizeof(*buffer));
+	buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
 	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
 
-	n = lc_index_of(mdev->act_log, updated);
+	i = 0;
 
-	buffer->updates[0].pos = cpu_to_be32(n);
-	buffer->updates[0].extent = cpu_to_be32(new_enr);
+	/* Even though no one can start to change this list
+	 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
+	 * lc_try_lock_for_transaction() --, someone may still
+	 * be in the process of changing it. */
+	spin_lock_irq(&mdev->al_lock);
+	list_for_each_entry(e, &mdev->act_log->to_be_changed, list) {
+		if (i == AL_UPDATES_PER_TRANSACTION) {
+			i++;
+			break;
+		}
+		buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
+		buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
+		if (e->lc_number != LC_FREE)
+			drbd_bm_mark_for_writeout(mdev,
+					al_extent_to_bm_page(e->lc_number));
+		i++;
+	}
+	spin_unlock_irq(&mdev->al_lock);
+	BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
 
-	xor_sum ^= new_enr;
+	buffer->n_updates = cpu_to_be16(i);
+	for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
+		buffer->update_slot_nr[i] = cpu_to_be16(-1);
+		buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
+	}
 
-	mx = min_t(int, AL_EXTENTS_PT,
+	buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements);
+	buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle);
+
+	mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
 		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
 	for (i = 0; i < mx; i++) {
 		unsigned idx = mdev->al_tr_cycle + i;
 		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
-		buffer->updates[i+1].pos = cpu_to_be32(idx);
-		buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
-		xor_sum ^= extent_nr;
+		buffer->context[i] = cpu_to_be32(extent_nr);
 	}
-	for (; i < AL_EXTENTS_PT; i++) {
-		buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
-		buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
-		xor_sum ^= LC_FREE;
-	}
-	mdev->al_tr_cycle += AL_EXTENTS_PT;
+	for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
+		buffer->context[i] = cpu_to_be32(LC_FREE);
+
+	mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
 	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
 		mdev->al_tr_cycle = 0;
 
-	buffer->xor_sum = cpu_to_be32(xor_sum);
-
 	sector =  mdev->ldev->md.md_offset
-		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
+		+ mdev->ldev->md.al_offset
+		+ mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
 
-	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
+	crc = crc32c(0, buffer, 4096);
+	buffer->crc32c = cpu_to_be32(crc);
+
+	if (drbd_bm_write_hinted(mdev))
+		err = -EIO;
+		/* drbd_chk_io_error done already */
+	else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+		err = -EIO;
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
-
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
-
-	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
-	mdev->al_tr_number++;
+	} else {
+		/* advance ringbuffer position and transaction counter */
+		mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
+		mdev->al_tr_number++;
+	}
 
 	drbd_md_put_buffer(mdev);
-
-	complete(&((struct update_al_work *)w)->event);
 	put_ldev(mdev);
 
-	return 1;
+	return err;
 }
 
-/**
- * drbd_al_read_tr() - Read a single transaction from the on disk activity log
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- * @b:		pointer to an al_transaction.
- * @index:	On disk slot of the transaction to read.
- *
- * Returns -1 on IO error, 0 on checksum error and 1 upon success.
- */
-static int drbd_al_read_tr(struct drbd_conf *mdev,
-			   struct drbd_backing_dev *bdev,
-			   struct al_transaction *b,
-			   int index)
+
+static int w_al_write_transaction(struct drbd_work *w, int unused)
 {
-	sector_t sector;
-	int rv, i;
-	u32 xor_sum = 0;
+	struct update_al_work *aw = container_of(w, struct update_al_work, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
-	sector = bdev->md.md_offset + bdev->md.al_offset + index;
+	err = _al_write_transaction(mdev);
+	aw->err = err;
+	complete(&aw->event);
 
-	/* Dont process error normally,
-	 * as this is done before disk is attached! */
-	if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
-		return -1;
-
-	rv = (be32_to_cpu(b->magic) == DRBD_MAGIC);
-
-	for (i = 0; i < AL_EXTENTS_PT + 1; i++)
-		xor_sum ^= be32_to_cpu(b->updates[i].extent);
-	rv &= (xor_sum == be32_to_cpu(b->xor_sum));
-
-	return rv;
+	return err != -EIO ? err : 0;
 }
 
-/**
- * drbd_al_read_log() - Restores the activity log from its on disk representation.
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- *
- * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
- */
-int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+/* Calls from worker context (see w_restart_disk_io()) need to write the
+   transaction directly. Others came through generic_make_request(),
+   those need to delegate it to the worker. */
+static int al_write_transaction(struct drbd_conf *mdev)
 {
-	struct al_transaction *buffer;
-	int i;
-	int rv;
-	int mx;
-	int active_extents = 0;
-	int transactions = 0;
-	int found_valid = 0;
-	int from = 0;
-	int to = 0;
-	u32 from_tnr = 0;
-	u32 to_tnr = 0;
-	u32 cnr;
+	struct update_al_work al_work;
 
-	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
+	if (current == mdev->tconn->worker.task)
+		return _al_write_transaction(mdev);
 
-	/* lock out all other meta data io for now,
-	 * and make sure the page is mapped.
-	 */
-	buffer = drbd_md_get_buffer(mdev);
-	if (!buffer)
-		return 0;
+	init_completion(&al_work.event);
+	al_work.w.cb = w_al_write_transaction;
+	al_work.w.mdev = mdev;
+	drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w);
+	wait_for_completion(&al_work.event);
 
-	/* Find the valid transaction in the log */
-	for (i = 0; i <= mx; i++) {
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
-		if (rv == 0)
-			continue;
-		if (rv == -1) {
-			drbd_md_put_buffer(mdev);
-			return 0;
-		}
-		cnr = be32_to_cpu(buffer->tr_number);
-
-		if (++found_valid == 1) {
-			from = i;
-			to = i;
-			from_tnr = cnr;
-			to_tnr = cnr;
-			continue;
-		}
-		if ((int)cnr - (int)from_tnr < 0) {
-			D_ASSERT(from_tnr - cnr + i - from == mx+1);
-			from = i;
-			from_tnr = cnr;
-		}
-		if ((int)cnr - (int)to_tnr > 0) {
-			D_ASSERT(cnr - to_tnr == i - to);
-			to = i;
-			to_tnr = cnr;
-		}
-	}
-
-	if (!found_valid) {
-		dev_warn(DEV, "No usable activity log found.\n");
-		drbd_md_put_buffer(mdev);
-		return 1;
-	}
-
-	/* Read the valid transactions.
-	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
-	i = from;
-	while (1) {
-		int j, pos;
-		unsigned int extent_nr;
-		unsigned int trn;
-
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
-		ERR_IF(rv == 0) goto cancel;
-		if (rv == -1) {
-			drbd_md_put_buffer(mdev);
-			return 0;
-		}
-
-		trn = be32_to_cpu(buffer->tr_number);
-
-		spin_lock_irq(&mdev->al_lock);
-
-		/* This loop runs backwards because in the cyclic
-		   elements there might be an old version of the
-		   updated element (in slot 0). So the element in slot 0
-		   can overwrite old versions. */
-		for (j = AL_EXTENTS_PT; j >= 0; j--) {
-			pos = be32_to_cpu(buffer->updates[j].pos);
-			extent_nr = be32_to_cpu(buffer->updates[j].extent);
-
-			if (extent_nr == LC_FREE)
-				continue;
-
-			lc_set(mdev->act_log, extent_nr, pos);
-			active_extents++;
-		}
-		spin_unlock_irq(&mdev->al_lock);
-
-		transactions++;
-
-cancel:
-		if (i == to)
-			break;
-		i++;
-		if (i > mx)
-			i = 0;
-	}
-
-	mdev->al_tr_number = to_tnr+1;
-	mdev->al_tr_pos = to;
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
-
-	/* ok, we are done with it */
-	drbd_md_put_buffer(mdev);
-
-	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
-	     transactions, active_extents);
-
-	return 1;
-}
-
-/**
- * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
- * @mdev:	DRBD device.
- */
-void drbd_al_apply_to_bm(struct drbd_conf *mdev)
-{
-	unsigned int enr;
-	unsigned long add = 0;
-	char ppb[10];
-	int i, tmp;
-
-	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-
-	for (i = 0; i < mdev->act_log->nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		tmp = drbd_bm_ALe_set_all(mdev, enr);
-		dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr);
-		add += tmp;
-	}
-
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
-
-	dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
-	     ppsize(ppb, Bit2KB(add)));
+	return al_work.err;
 }
 
 static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
@@ -645,7 +517,7 @@
 	struct lc_element *al_ext;
 	int i;
 
-	D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
+	D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
 
 	for (i = 0; i < mdev->act_log->nr_elements; i++) {
 		al_ext = lc_element_by_index(mdev->act_log, i);
@@ -657,15 +529,17 @@
 	wake_up(&mdev->al_wait);
 }
 
-static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
+	struct drbd_conf *mdev = w->mdev;
+	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
 
 	if (!get_ldev(mdev)) {
 		if (__ratelimit(&drbd_ratelimit_state))
 			dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
 		kfree(udw);
-		return 1;
+		return 0;
 	}
 
 	drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
@@ -683,9 +557,9 @@
 			break;
 		}
 	}
-	drbd_bcast_sync_progress(mdev);
+	drbd_bcast_event(mdev, &sib);
 
-	return 1;
+	return 0;
 }
 
 
@@ -755,7 +629,9 @@
 			}
 			ext->rs_left = rs_left;
 			ext->rs_failed = success ? 0 : count;
-			lc_changed(mdev->resync, &ext->lce);
+			/* we don't keep a persistent log of the resync lru,
+			 * we can commit any change right away. */
+			lc_committed(mdev->resync);
 		}
 		lc_put(mdev->resync, &ext->lce);
 		/* no race, we are within the al_lock! */
@@ -767,7 +643,8 @@
 			if (udw) {
 				udw->enr = ext->lce.lc_number;
 				udw->w.cb = w_update_odbm;
-				drbd_queue_work_front(&mdev->data.work, &udw->w);
+				udw->w.mdev = mdev;
+				drbd_queue_work_front(&mdev->tconn->sender_work, &udw->w);
 			} else {
 				dev_warn(DEV, "Could not kmalloc an udw\n");
 			}
@@ -813,16 +690,22 @@
 	int wake_up = 0;
 	unsigned long flags;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 		return;
 	}
+
+	if (!get_ldev(mdev))
+		return; /* no disk, no metadata, no bitmap to clear bits in */
+
 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	esector = sector + (size >> 9) - 1;
 
-	ERR_IF(sector >= nr_sectors) return;
-	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+	if (!expect(sector < nr_sectors))
+		goto out;
+	if (!expect(esector < nr_sectors))
+		esector = nr_sectors - 1;
 
 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
 
@@ -830,7 +713,7 @@
 	 * round up start sector, round down end sector.  we make sure we only
 	 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
 	if (unlikely(esector < BM_SECT_PER_BIT-1))
-		return;
+		goto out;
 	if (unlikely(esector == (nr_sectors-1)))
 		ebnr = lbnr;
 	else
@@ -838,14 +721,14 @@
 	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
 
 	if (sbnr > ebnr)
-		return;
+		goto out;
 
 	/*
 	 * ok, (capacity & 7) != 0 sometimes, but who cares...
 	 * we count rs_{total,left} in bits, not sectors.
 	 */
 	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
-	if (count && get_ldev(mdev)) {
+	if (count) {
 		drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
 		spin_lock_irqsave(&mdev->al_lock, flags);
 		drbd_try_clear_on_disk_bm(mdev, sector, count, true);
@@ -854,8 +737,9 @@
 		/* just wake_up unconditional now, various lc_chaged(),
 		 * lc_put() in drbd_try_clear_on_disk_bm(). */
 		wake_up = 1;
-		put_ldev(mdev);
 	}
+out:
+	put_ldev(mdev);
 	if (wake_up)
 		wake_up(&mdev->al_wait);
 }
@@ -871,7 +755,7 @@
 int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
 			    const char *file, const unsigned int line)
 {
-	unsigned long sbnr, ebnr, lbnr, flags;
+	unsigned long sbnr, ebnr, flags;
 	sector_t esector, nr_sectors;
 	unsigned int enr, count = 0;
 	struct lc_element *e;
@@ -880,7 +764,7 @@
 	if (size == 0)
 		return 0;
 
-	if (size < 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+	if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "sector: %llus, size: %d\n",
 			(unsigned long long)sector, size);
 		return 0;
@@ -892,12 +776,10 @@
 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	esector = sector + (size >> 9) - 1;
 
-	ERR_IF(sector >= nr_sectors)
+	if (!expect(sector < nr_sectors))
 		goto out;
-	ERR_IF(esector >= nr_sectors)
-		esector = (nr_sectors-1);
-
-	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+	if (!expect(esector < nr_sectors))
+		esector = nr_sectors - 1;
 
 	/* we set it out of sync,
 	 * we do not need to round anything here */
@@ -940,7 +822,7 @@
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wakeup = 1;
 		}
 		if (bm_ext->lce.refcnt == 1)
@@ -956,7 +838,7 @@
 		if (rs_flags & LC_STARVING)
 			dev_warn(DEV, "Have to wait for element"
 			     " (resync LRU too small?)\n");
-		BUG_ON(rs_flags & LC_DIRTY);
+		BUG_ON(rs_flags & LC_LOCKED);
 	}
 
 	return bm_ext;
@@ -964,26 +846,12 @@
 
 static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
 {
-	struct lc_element *al_ext;
-	int rv = 0;
+	int rv;
 
 	spin_lock_irq(&mdev->al_lock);
-	if (unlikely(enr == mdev->act_log->new_number))
-		rv = 1;
-	else {
-		al_ext = lc_find(mdev->act_log, enr);
-		if (al_ext) {
-			if (al_ext->refcnt)
-				rv = 1;
-		}
-	}
+	rv = lc_is_used(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
 
-	/*
-	if (unlikely(rv)) {
-		dev_info(DEV, "Delaying sync read until app's write is done\n");
-	}
-	*/
 	return rv;
 }
 
@@ -1113,13 +981,13 @@
 			if (rs_flags & LC_STARVING)
 				dev_warn(DEV, "Have to wait for element"
 				     " (resync LRU too small?)\n");
-			BUG_ON(rs_flags & LC_DIRTY);
+			BUG_ON(rs_flags & LC_LOCKED);
 			goto try_again;
 		}
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wake_up(&mdev->al_wait);
 			D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
 		}
@@ -1130,8 +998,6 @@
 	}
 check_al:
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
-		if (unlikely(al_enr+i == mdev->act_log->new_number))
-			goto try_again;
 		if (lc_is_used(mdev->act_log, al_enr+i))
 			goto try_again;
 	}
@@ -1266,7 +1132,7 @@
 	sector_t esector, nr_sectors;
 	int wake_up = 0;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 		return;
@@ -1274,8 +1140,10 @@
 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	esector = sector + (size >> 9) - 1;
 
-	ERR_IF(sector >= nr_sectors) return;
-	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+	if (!expect(sector < nr_sectors))
+		return;
+	if (!expect(esector < nr_sectors))
+		esector = nr_sectors - 1;
 
 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
 

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index d845664..8dc2950 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c

@@ -119,13 +119,9 @@
 	if (!__ratelimit(&drbd_ratelimit_state))
 		return;
 	dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
-	    current == mdev->receiver.task ? "receiver" :
-	    current == mdev->asender.task  ? "asender"  :
-	    current == mdev->worker.task   ? "worker"   : current->comm,
-	    func, b->bm_why ?: "?",
-	    b->bm_task == mdev->receiver.task ? "receiver" :
-	    b->bm_task == mdev->asender.task  ? "asender"  :
-	    b->bm_task == mdev->worker.task   ? "worker"   : "?");
+		drbd_task_to_thread_name(mdev->tconn, current),
+		func, b->bm_why ?: "?",
+		drbd_task_to_thread_name(mdev->tconn, b->bm_task));
 }
 
 void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
@@ -142,13 +138,9 @@
 
 	if (trylock_failed) {
 		dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
-		    current == mdev->receiver.task ? "receiver" :
-		    current == mdev->asender.task  ? "asender"  :
-		    current == mdev->worker.task   ? "worker"   : current->comm,
-		    why, b->bm_why ?: "?",
-		    b->bm_task == mdev->receiver.task ? "receiver" :
-		    b->bm_task == mdev->asender.task  ? "asender"  :
-		    b->bm_task == mdev->worker.task   ? "worker"   : "?");
+			 drbd_task_to_thread_name(mdev->tconn, current),
+			 why, b->bm_why ?: "?",
+			 drbd_task_to_thread_name(mdev->tconn, b->bm_task));
 		mutex_lock(&b->bm_change);
 	}
 	if (BM_LOCKED_MASK & b->bm_flags)
@@ -196,6 +188,9 @@
 /* to mark for lazy writeout once syncer cleared all clearable bits,
  * we if bits have been cleared since last IO. */
 #define BM_PAGE_LAZY_WRITEOUT	28
+/* pages marked with this "HINT" will be considered for writeout
+ * on activity log transactions */
+#define BM_PAGE_HINT_WRITEOUT	27
 
 /* store_page_idx uses non-atomic assignment. It is only used directly after
  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
@@ -227,8 +222,7 @@
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	void *addr = &page_private(b->bm_pages[page_nr]);
-	clear_bit(BM_PAGE_IO_LOCK, addr);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
 	wake_up(&mdev->bitmap->bm_io_wait);
 }
 
@@ -246,6 +240,27 @@
 	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
 }
 
+/**
+ * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
+ * @mdev:	DRBD device.
+ * @page_nr:	the bitmap page to mark with the "hint" flag
+ *
+ * From within an activity log transaction, we mark a few pages with these
+ * hints, then call drbd_bm_write_hinted(), which will only write out changed
+ * pages which are flagged with this mark.
+ */
+void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
+{
+	struct page *page;
+	if (page_nr >= mdev->bitmap->bm_number_of_pages) {
+		dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
+			 page_nr, (int)mdev->bitmap->bm_number_of_pages);
+		return;
+	}
+	page = mdev->bitmap->bm_pages[page_nr];
+	set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
+}
+
 static int bm_test_page_unchanged(struct page *page)
 {
 	volatile const unsigned long *addr = &page_private(page);
@@ -373,14 +388,16 @@
 		return old_pages;
 
 	/* Trying kmalloc first, falling back to vmalloc.
-	 * GFP_KERNEL is ok, as this is done when a lower level disk is
-	 * "attached" to the drbd.  Context is receiver thread or cqueue
-	 * thread.  As we have no disk yet, we are not in the IO path,
-	 * not even the IO path of the peer. */
+	 * GFP_NOIO, as this is called while drbd IO is "suspended",
+	 * and during resize or attach on diskless Primary,
+	 * we must not block on IO to ourselves.
+	 * Context is receiver thread or dmsetup. */
 	bytes = sizeof(struct page *)*want;
-	new_pages = kzalloc(bytes, GFP_KERNEL);
+	new_pages = kzalloc(bytes, GFP_NOIO);
 	if (!new_pages) {
-		new_pages = vzalloc(bytes);
+		new_pages = __vmalloc(bytes,
+				GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO,
+				PAGE_KERNEL);
 		if (!new_pages)
 			return NULL;
 		vmalloced = 1;
@@ -390,7 +407,7 @@
 		for (i = 0; i < have; i++)
 			new_pages[i] = old_pages[i];
 		for (; i < want; i++) {
-			page = alloc_page(GFP_HIGHUSER);
+			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
 			if (!page) {
 				bm_free_pages(new_pages + have, i - have);
 				bm_vk_free(new_pages, vmalloced);
@@ -439,7 +456,8 @@
 
 sector_t drbd_bm_capacity(struct drbd_conf *mdev)
 {
-	ERR_IF(!mdev->bitmap) return 0;
+	if (!expect(mdev->bitmap))
+		return 0;
 	return mdev->bitmap->bm_dev_capacity;
 }
 
@@ -447,7 +465,8 @@
  */
 void drbd_bm_cleanup(struct drbd_conf *mdev)
 {
-	ERR_IF (!mdev->bitmap) return;
+	if (!expect(mdev->bitmap))
+		return;
 	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
 	bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
 	kfree(mdev->bitmap);
@@ -610,7 +629,8 @@
 	int err = 0, growing;
 	int opages_vmalloced;
 
-	ERR_IF(!b) return -ENOMEM;
+	if (!expect(b))
+		return -ENOMEM;
 
 	drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
 
@@ -732,8 +752,10 @@
 	unsigned long s;
 	unsigned long flags;
 
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
 	s = b->bm_set;
@@ -756,8 +778,10 @@
 size_t drbd_bm_words(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	return b->bm_words;
 }
@@ -765,7 +789,8 @@
 unsigned long drbd_bm_bits(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return 0;
+	if (!expect(b))
+		return 0;
 
 	return b->bm_bits;
 }
@@ -786,8 +811,10 @@
 
 	end = offset + number;
 
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 	if (number == 0)
 		return;
 	WARN_ON(offset >= b->bm_words);
@@ -831,8 +858,10 @@
 
 	end = offset + number;
 
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 
 	spin_lock_irq(&b->bm_lock);
 	if ((offset >= b->bm_words) ||
@@ -860,8 +889,10 @@
 void drbd_bm_set_all(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 
 	spin_lock_irq(&b->bm_lock);
 	bm_memset(b, 0, 0xff, b->bm_words);
@@ -874,8 +905,10 @@
 void drbd_bm_clear_all(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 
 	spin_lock_irq(&b->bm_lock);
 	bm_memset(b, 0, 0, b->bm_words);
@@ -889,7 +922,8 @@
 	unsigned int done;
 	unsigned flags;
 #define BM_AIO_COPY_PAGES	1
-#define BM_WRITE_ALL_PAGES	2
+#define BM_AIO_WRITE_HINTED	2
+#define BM_WRITE_ALL_PAGES	4
 	int error;
 	struct kref kref;
 };
@@ -977,17 +1011,11 @@
 	bm_set_page_unchanged(b->bm_pages[page_nr]);
 
 	if (ctx->flags & BM_AIO_COPY_PAGES) {
-		void *src, *dest;
 		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
-		dest = kmap_atomic(page);
-		src = kmap_atomic(b->bm_pages[page_nr]);
-		memcpy(dest, src, PAGE_SIZE);
-		kunmap_atomic(src);
-		kunmap_atomic(dest);
+		copy_highpage(page, b->bm_pages[page_nr]);
 		bm_store_page_idx(page, page_nr);
 	} else
 		page = b->bm_pages[page_nr];
-
 	bio->bi_bdev = mdev->ldev->md_bdev;
 	bio->bi_sector = on_disk_sector;
 	/* bio_add_page of a single page to an empty bio will always succeed,
@@ -1060,6 +1088,11 @@
 		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
 			break;
 		if (rw & WRITE) {
+			if ((flags & BM_AIO_WRITE_HINTED) &&
+			    !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
+				    &page_private(b->bm_pages[i])))
+				continue;
+
 			if (!(flags & BM_WRITE_ALL_PAGES) &&
 			    bm_test_page_unchanged(b->bm_pages[i])) {
 				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
@@ -1088,13 +1121,15 @@
 	 * "in_flight reached zero, all done" event.
 	 */
 	if (!atomic_dec_and_test(&ctx->in_flight))
-		wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
+		wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
 	else
 		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 
-	dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
-			rw == WRITE ? "WRITE" : "READ",
-			count, jiffies - now);
+	/* summary for global bitmap IO */
+	if (flags == 0)
+		dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
+			 rw == WRITE ? "WRITE" : "READ",
+			 count, jiffies - now);
 
 	if (ctx->error) {
 		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
@@ -1103,7 +1138,7 @@
 	}
 
 	if (atomic_read(&ctx->in_flight))
-		err = -EIO; /* Disk failed during IO... */
+		err = -EIO; /* Disk timeout/force-detach during IO... */
 
 	now = jiffies;
 	if (rw == WRITE) {
@@ -1115,8 +1150,9 @@
 	}
 	now = b->bm_set;
 
-	dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
-	     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+	if (flags == 0)
+		dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
+		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
 
 	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 	return err;
@@ -1179,9 +1215,17 @@
 	return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
 }
 
+/**
+ * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
+ * @mdev:	DRBD device.
+ */
+int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
+{
+	return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
+}
 
 /**
- * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
+ * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
  * @mdev:	DRBD device.
  * @idx:	bitmap page index
  *
@@ -1222,11 +1266,11 @@
 	}
 
 	bm_page_io_async(ctx, idx, WRITE_SYNC);
-	wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
+	wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
 
 	if (ctx->error)
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
-		/* that should force detach, so the in memory bitmap will be
+		/* that causes us to detach, so the in memory bitmap will be
 		 * gone in a moment as well. */
 
 	mdev->bm_writ_cnt++;
@@ -1289,8 +1333,10 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long i = DRBD_END_OF_BITMAP;
 
-	ERR_IF(!b) return i;
-	ERR_IF(!b->bm_pages) return i;
+	if (!expect(b))
+		return i;
+	if (!expect(b->bm_pages))
+		return i;
 
 	spin_lock_irq(&b->bm_lock);
 	if (BM_DONT_TEST & b->bm_flags)
@@ -1391,8 +1437,10 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	int c = 0;
 
-	ERR_IF(!b) return 1;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 1;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
 	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
@@ -1423,13 +1471,21 @@
 {
 	int i;
 	int bits;
+	int changed = 0;
 	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
 	for (i = first_word; i < last_word; i++) {
 		bits = hweight_long(paddr[i]);
 		paddr[i] = ~0UL;
-		b->bm_set += BITS_PER_LONG - bits;
+		changed += BITS_PER_LONG - bits;
 	}
 	kunmap_atomic(paddr);
+	if (changed) {
+		/* We only need lazy writeout, the information is still in the
+		 * remote bitmap as well, and is reconstructed during the next
+		 * bitmap exchange, if lost locally due to a crash. */
+		bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
+		b->bm_set += changed;
+	}
 }
 
 /* Same thing as drbd_bm_set_bits,
@@ -1524,8 +1580,10 @@
 	unsigned long *p_addr;
 	int i;
 
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
 	if (BM_DONT_TEST & b->bm_flags)
@@ -1559,8 +1617,10 @@
 	 * robust in case we screwed up elsewhere, in that case pretend there
 	 * was one dirty bit in the requested area, so we won't try to do a
 	 * local read there (no bitmap probably implies no disk) */
-	ERR_IF(!b) return 1;
-	ERR_IF(!b->bm_pages) return 1;
+	if (!expect(b))
+		return 1;
+	if (!expect(b->bm_pages))
+		return 1;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
 	if (BM_DONT_TEST & b->bm_flags)
@@ -1573,11 +1633,10 @@
 				bm_unmap(p_addr);
 			p_addr = bm_map_pidx(b, idx);
 		}
-		ERR_IF (bitnr >= b->bm_bits) {
-			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
-		} else {
+		if (expect(bitnr < b->bm_bits))
 			c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
-		}
+		else
+			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
 	}
 	if (p_addr)
 		bm_unmap(p_addr);
@@ -1607,8 +1666,10 @@
 	unsigned long flags;
 	unsigned long *p_addr, *bm;
 
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
 	if (BM_DONT_TEST & b->bm_flags)
@@ -1630,47 +1691,3 @@
 	spin_unlock_irqrestore(&b->bm_lock, flags);
 	return count;
 }
-
-/* Set all bits covered by the AL-extent al_enr.
- * Returns number of bits changed. */
-unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
-{
-	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr, *bm;
-	unsigned long weight;
-	unsigned long s, e;
-	int count, i, do_now;
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
-
-	spin_lock_irq(&b->bm_lock);
-	if (BM_DONT_SET & b->bm_flags)
-		bm_print_lock_info(mdev);
-	weight = b->bm_set;
-
-	s = al_enr * BM_WORDS_PER_AL_EXT;
-	e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
-	/* assert that s and e are on the same page */
-	D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
-	      ==  s    >> (PAGE_SHIFT - LN2_BPL + 3));
-	count = 0;
-	if (s < b->bm_words) {
-		i = do_now = e-s;
-		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
-		bm = p_addr + MLPP(s);
-		while (i--) {
-			count += hweight_long(*bm);
-			*bm = -1UL;
-			bm++;
-		}
-		bm_unmap(p_addr);
-		b->bm_set += do_now*BITS_PER_LONG - count;
-		if (e == b->bm_words)
-			b->bm_set -= bm_clear_surplus(b);
-	} else {
-		dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
-	}
-	weight = b->bm_set - weight;
-	spin_unlock_irq(&b->bm_lock);
-	return weight;
-}

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b953cc7..6b51afa 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h

@@ -39,9 +39,13 @@
 #include <linux/major.h>
 #include <linux/blkdev.h>
 #include <linux/genhd.h>
+#include <linux/idr.h>
 #include <net/tcp.h>
 #include <linux/lru_cache.h>
 #include <linux/prefetch.h>
+#include <linux/drbd_genl_api.h>
+#include <linux/drbd.h>
+#include "drbd_state.h"
 
 #ifdef __CHECKER__
 # define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
@@ -61,7 +65,6 @@
 extern unsigned int minor_count;
 extern bool disable_sendpage;
 extern bool allow_oos;
-extern unsigned int cn_idx;
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
 extern int enable_faults;
@@ -86,34 +89,44 @@
  */
 #define DRBD_SIGKILL SIGHUP
 
-/* All EEs on the free list should have ID_VACANT (== 0)
- * freshly allocated EEs get !ID_VACANT (== 1)
- * so if it says "cannot dereference null pointer at address 0x00000001",
- * it is most likely one of these :( */
-
 #define ID_IN_SYNC      (4711ULL)
 #define ID_OUT_OF_SYNC  (4712ULL)
-
 #define ID_SYNCER (-1ULL)
-#define ID_VACANT 0
-#define is_syncer_block_id(id) ((id) == ID_SYNCER)
+
 #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
 
 struct drbd_conf;
+struct drbd_tconn;
 
 
 /* to shorten dev_warn(DEV, "msg"); and relatives statements */
 #define DEV (disk_to_dev(mdev->vdisk))
 
+#define conn_printk(LEVEL, TCONN, FMT, ARGS...) \
+	printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS)
+#define conn_alert(TCONN, FMT, ARGS...)  conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS)
+#define conn_crit(TCONN, FMT, ARGS...)   conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS)
+#define conn_err(TCONN, FMT, ARGS...)    conn_printk(KERN_ERR, TCONN, FMT, ## ARGS)
+#define conn_warn(TCONN, FMT, ARGS...)   conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS)
+#define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS)
+#define conn_info(TCONN, FMT, ARGS...)   conn_printk(KERN_INFO, TCONN, FMT, ## ARGS)
+#define conn_dbg(TCONN, FMT, ARGS...)    conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS)
+
 #define D_ASSERT(exp)	if (!(exp)) \
 	 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
 
-#define ERR_IF(exp) if (({						\
-	int _b = (exp) != 0;						\
-	if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n",	\
-			__func__, #exp, __FILE__, __LINE__);		\
-	_b;								\
-	}))
+/**
+ * expect  -  Make an assertion
+ *
+ * Unlike the assert macro, this macro returns a boolean result.
+ */
+#define expect(exp) ({								\
+		bool _bool = (exp);						\
+		if (!_bool)							\
+			dev_err(DEV, "ASSERTION %s FAILED in %s\n",		\
+			        #exp, __func__);				\
+		_bool;								\
+		})
 
 /* Defines to control fault insertion */
 enum {
@@ -150,15 +163,12 @@
 /* usual integer division */
 #define div_floor(A, B) ((A)/(B))
 
-/* drbd_meta-data.c (still in drbd_main.c) */
-/* 4th incarnation of the disk layout. */
-#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
-
-extern struct drbd_conf **minor_table;
 extern struct ratelimit_state drbd_ratelimit_state;
+extern struct idr minors; /* RCU, updates: genl_lock() */
+extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */
 
 /* on the wire */
-enum drbd_packets {
+enum drbd_packet {
 	/* receiver (data socket) */
 	P_DATA		      = 0x00,
 	P_DATA_REPLY	      = 0x01, /* Response to P_DATA_REQUEST */
@@ -186,7 +196,7 @@
 	P_RECV_ACK	      = 0x15, /* Used in protocol B */
 	P_WRITE_ACK	      = 0x16, /* Used in protocol C */
 	P_RS_WRITE_ACK	      = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
-	P_DISCARD_ACK	      = 0x18, /* Used in proto C, two-primaries conflict detection */
+	P_SUPERSEDED	      = 0x18, /* Used in proto C, two-primaries conflict detection */
 	P_NEG_ACK	      = 0x19, /* Sent if local disk is unusable */
 	P_NEG_DREPLY	      = 0x1a, /* Local disk is broken... */
 	P_NEG_RS_DREPLY	      = 0x1b, /* Local disk is broken... */
@@ -207,77 +217,23 @@
 	P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
 	P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
 	P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
+	P_CONN_ST_CHG_REQ     = 0x2a, /* data sock: Connection wide state request */
+	P_CONN_ST_CHG_REPLY   = 0x2b, /* meta sock: Connection side state req reply */
+	P_RETRY_WRITE	      = 0x2c, /* Protocol C: retry conflicting write request */
+	P_PROTOCOL_UPDATE     = 0x2d, /* data sock: is used in established connections */
 
-	P_MAX_CMD	      = 0x2A,
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAX_OPT_CMD	      = 0x101,
 
 	/* special command ids for handshake */
 
-	P_HAND_SHAKE_M	      = 0xfff1, /* First Packet on the MetaSock */
-	P_HAND_SHAKE_S	      = 0xfff2, /* First Packet on the Socket */
+	P_INITIAL_META	      = 0xfff1, /* First Packet on the MetaSock */
+	P_INITIAL_DATA	      = 0xfff2, /* First Packet on the Socket */
 
-	P_HAND_SHAKE	      = 0xfffe	/* FIXED for the next century! */
+	P_CONNECTION_FEATURES = 0xfffe	/* FIXED for the next century! */
 };
 
-static inline const char *cmdname(enum drbd_packets cmd)
-{
-	/* THINK may need to become several global tables
-	 * when we want to support more than
-	 * one PRO_VERSION */
-	static const char *cmdnames[] = {
-		[P_DATA]	        = "Data",
-		[P_DATA_REPLY]	        = "DataReply",
-		[P_RS_DATA_REPLY]	= "RSDataReply",
-		[P_BARRIER]	        = "Barrier",
-		[P_BITMAP]	        = "ReportBitMap",
-		[P_BECOME_SYNC_TARGET]  = "BecomeSyncTarget",
-		[P_BECOME_SYNC_SOURCE]  = "BecomeSyncSource",
-		[P_UNPLUG_REMOTE]	= "UnplugRemote",
-		[P_DATA_REQUEST]	= "DataRequest",
-		[P_RS_DATA_REQUEST]     = "RSDataRequest",
-		[P_SYNC_PARAM]	        = "SyncParam",
-		[P_SYNC_PARAM89]	= "SyncParam89",
-		[P_PROTOCOL]            = "ReportProtocol",
-		[P_UUIDS]	        = "ReportUUIDs",
-		[P_SIZES]	        = "ReportSizes",
-		[P_STATE]	        = "ReportState",
-		[P_SYNC_UUID]           = "ReportSyncUUID",
-		[P_AUTH_CHALLENGE]      = "AuthChallenge",
-		[P_AUTH_RESPONSE]	= "AuthResponse",
-		[P_PING]		= "Ping",
-		[P_PING_ACK]	        = "PingAck",
-		[P_RECV_ACK]	        = "RecvAck",
-		[P_WRITE_ACK]	        = "WriteAck",
-		[P_RS_WRITE_ACK]	= "RSWriteAck",
-		[P_DISCARD_ACK]	        = "DiscardAck",
-		[P_NEG_ACK]	        = "NegAck",
-		[P_NEG_DREPLY]	        = "NegDReply",
-		[P_NEG_RS_DREPLY]	= "NegRSDReply",
-		[P_BARRIER_ACK]	        = "BarrierAck",
-		[P_STATE_CHG_REQ]       = "StateChgRequest",
-		[P_STATE_CHG_REPLY]     = "StateChgReply",
-		[P_OV_REQUEST]          = "OVRequest",
-		[P_OV_REPLY]            = "OVReply",
-		[P_OV_RESULT]           = "OVResult",
-		[P_CSUM_RS_REQUEST]     = "CsumRSRequest",
-		[P_RS_IS_IN_SYNC]	= "CsumRSIsInSync",
-		[P_COMPRESSED_BITMAP]   = "CBitmap",
-		[P_DELAY_PROBE]         = "DelayProbe",
-		[P_OUT_OF_SYNC]		= "OutOfSync",
-		[P_MAX_CMD]	        = NULL,
-	};
-
-	if (cmd == P_HAND_SHAKE_M)
-		return "HandShakeM";
-	if (cmd == P_HAND_SHAKE_S)
-		return "HandShakeS";
-	if (cmd == P_HAND_SHAKE)
-		return "HandShake";
-	if (cmd >= P_MAX_CMD)
-		return "Unknown";
-	return cmdnames[cmd];
-}
+extern const char *cmdname(enum drbd_packet cmd);
 
 /* for sending/receiving the bitmap,
  * possibly in some encoding scheme */
@@ -337,37 +293,24 @@
 	u32	  magic;
 	u16	  command;
 	u16	  length;	/* bytes of data after this header */
-	u8	  payload[0];
 } __packed;
 
 /* Header for big packets, Used for data packets exceeding 64kB */
 struct p_header95 {
 	u16	  magic;	/* use DRBD_MAGIC_BIG here */
 	u16	  command;
-	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
-	u8	  payload[0];
+	u32	  length;
 } __packed;
 
-union p_header {
-	struct p_header80 h80;
-	struct p_header95 h95;
-};
+struct p_header100 {
+	u32	  magic;
+	u16	  volume;
+	u16	  command;
+	u32	  length;
+	u32	  pad;
+} __packed;
 
-/*
- * short commands, packets without payload, plain p_header:
- *   P_PING
- *   P_PING_ACK
- *   P_BECOME_SYNC_TARGET
- *   P_BECOME_SYNC_SOURCE
- *   P_UNPLUG_REMOTE
- */
-
-/*
- * commands with out-of-struct payload:
- *   P_BITMAP    (no additional fields)
- *   P_DATA, P_DATA_REPLY (see p_data)
- *   P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
- */
+extern unsigned int drbd_header_size(struct drbd_tconn *tconn);
 
 /* these defines must not be changed without changing the protocol version */
 #define DP_HARDBARRIER	      1 /* depricated */
@@ -377,9 +320,10 @@
 #define DP_FUA               16 /* equals REQ_FUA     */
 #define DP_FLUSH             32 /* equals REQ_FLUSH   */
 #define DP_DISCARD           64 /* equals REQ_DISCARD */
+#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
+#define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
 
 struct p_data {
-	union p_header head;
 	u64	    sector;    /* 64 bits sector number */
 	u64	    block_id;  /* to identify the request in protocol B&C */
 	u32	    seq_num;
@@ -390,21 +334,18 @@
  * commands which share a struct:
  *  p_block_ack:
  *   P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
- *   P_DISCARD_ACK (proto C, two-primaries conflict detection)
+ *   P_SUPERSEDED (proto C, two-primaries conflict detection)
  *  p_block_req:
  *   P_DATA_REQUEST, P_RS_DATA_REQUEST
  */
 struct p_block_ack {
-	struct p_header80 head;
 	u64	    sector;
 	u64	    block_id;
 	u32	    blksize;
 	u32	    seq_num;
 } __packed;
 
-
 struct p_block_req {
-	struct p_header80 head;
 	u64 sector;
 	u64 block_id;
 	u32 blksize;
@@ -413,59 +354,52 @@
 
 /*
  * commands with their own struct for additional fields:
- *   P_HAND_SHAKE
+ *   P_CONNECTION_FEATURES
  *   P_BARRIER
  *   P_BARRIER_ACK
  *   P_SYNC_PARAM
  *   ReportParams
  */
 
-struct p_handshake {
-	struct p_header80 head;	/* 8 bytes */
+struct p_connection_features {
 	u32 protocol_min;
 	u32 feature_flags;
 	u32 protocol_max;
 
 	/* should be more than enough for future enhancements
-	 * for now, feature_flags and the reserverd array shall be zero.
+	 * for now, feature_flags and the reserved array shall be zero.
 	 */
 
 	u32 _pad;
-	u64 reserverd[7];
+	u64 reserved[7];
 } __packed;
-/* 80 bytes, FIXED for the next century */
 
 struct p_barrier {
-	struct p_header80 head;
 	u32 barrier;	/* barrier number _handle_ only */
 	u32 pad;	/* to multiple of 8 Byte */
 } __packed;
 
 struct p_barrier_ack {
-	struct p_header80 head;
 	u32 barrier;
 	u32 set_size;
 } __packed;
 
 struct p_rs_param {
-	struct p_header80 head;
-	u32 rate;
+	u32 resync_rate;
 
 	      /* Since protocol version 88 and higher. */
 	char verify_alg[0];
 } __packed;
 
 struct p_rs_param_89 {
-	struct p_header80 head;
-	u32 rate;
+	u32 resync_rate;
         /* protocol version 89: */
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 } __packed;
 
 struct p_rs_param_95 {
-	struct p_header80 head;
-	u32 rate;
+	u32 resync_rate;
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 	u32 c_plan_ahead;
@@ -475,12 +409,11 @@
 } __packed;
 
 enum drbd_conn_flags {
-	CF_WANT_LOSE = 1,
+	CF_DISCARD_MY_DATA = 1,
 	CF_DRY_RUN = 2,
 };
 
 struct p_protocol {
-	struct p_header80 head;
 	u32 protocol;
 	u32 after_sb_0p;
 	u32 after_sb_1p;
@@ -494,17 +427,14 @@
 } __packed;
 
 struct p_uuids {
-	struct p_header80 head;
 	u64 uuid[UI_EXTENDED_SIZE];
 } __packed;
 
 struct p_rs_uuid {
-	struct p_header80 head;
 	u64	    uuid;
 } __packed;
 
 struct p_sizes {
-	struct p_header80 head;
 	u64	    d_size;  /* size of disk */
 	u64	    u_size;  /* user requested size */
 	u64	    c_size;  /* current exported size */
@@ -514,18 +444,15 @@
 } __packed;
 
 struct p_state {
-	struct p_header80 head;
 	u32	    state;
 } __packed;
 
 struct p_req_state {
-	struct p_header80 head;
 	u32	    mask;
 	u32	    val;
 } __packed;
 
 struct p_req_state_reply {
-	struct p_header80 head;
 	u32	    retcode;
 } __packed;
 
@@ -539,15 +466,7 @@
 	u32	  bit_map_gen[5];
 } __packed;
 
-struct p_discard {
-	struct p_header80 head;
-	u64	    block_id;
-	u32	    seq_num;
-	u32	    pad;
-} __packed;
-
 struct p_block_desc {
-	struct p_header80 head;
 	u64 sector;
 	u32 blksize;
 	u32 pad;	/* to multiple of 8 Byte */
@@ -563,7 +482,6 @@
 };
 
 struct p_compressed_bm {
-	struct p_header80 head;
 	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 	 * (encoding & 0x80): polarity (set/unset) of first runlength
 	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -575,90 +493,22 @@
 } __packed;
 
 struct p_delay_probe93 {
-	struct p_header80 head;
 	u32     seq_num; /* sequence number to match the two probe packets */
 	u32     offset;  /* usecs the probe got sent after the reference time point */
 } __packed;
 
-/* DCBP: Drbd Compressed Bitmap Packet ... */
-static inline enum drbd_bitmap_code
-DCBP_get_code(struct p_compressed_bm *p)
-{
-	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
-}
-
-static inline void
-DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
-{
-	BUG_ON(code & ~0xf);
-	p->encoding = (p->encoding & ~0xf) | code;
-}
-
-static inline int
-DCBP_get_start(struct p_compressed_bm *p)
-{
-	return (p->encoding & 0x80) != 0;
-}
-
-static inline void
-DCBP_set_start(struct p_compressed_bm *p, int set)
-{
-	p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
-}
-
-static inline int
-DCBP_get_pad_bits(struct p_compressed_bm *p)
-{
-	return (p->encoding >> 4) & 0x7;
-}
-
-static inline void
-DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
-{
-	BUG_ON(n & ~0x7);
-	p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
-}
-
-/* one bitmap packet, including the p_header,
- * should fit within one _architecture independend_ page.
- * so we need to use the fixed size 4KiB page size
- * most architectures have used for a long time.
+/*
+ * Bitmap packets need to fit within a single page on the sender and receiver,
+ * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
  */
-#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
-#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
-#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
-#if (PAGE_SIZE < 4096)
-/* drbd_send_bitmap / receive_bitmap would break horribly */
-#error "PAGE_SIZE too small"
-#endif
-
-union p_polymorph {
-        union p_header           header;
-        struct p_handshake       handshake;
-        struct p_data            data;
-        struct p_block_ack       block_ack;
-        struct p_barrier         barrier;
-        struct p_barrier_ack     barrier_ack;
-        struct p_rs_param_89     rs_param_89;
-        struct p_rs_param_95     rs_param_95;
-        struct p_protocol        protocol;
-        struct p_sizes           sizes;
-        struct p_uuids           uuids;
-        struct p_state           state;
-        struct p_req_state       req_state;
-        struct p_req_state_reply req_state_reply;
-        struct p_block_req       block_req;
-	struct p_delay_probe93   delay_probe93;
-	struct p_rs_uuid         rs_uuid;
-	struct p_block_desc      block_desc;
-} __packed;
+#define DRBD_SOCKET_BUFFER_SIZE 4096
 
 /**********************************************************************/
 enum drbd_thread_state {
-	None,
-	Running,
-	Exiting,
-	Restarting
+	NONE,
+	RUNNING,
+	EXITING,
+	RESTARTING
 };
 
 struct drbd_thread {
@@ -667,8 +517,9 @@
 	struct completion stop;
 	enum drbd_thread_state t_state;
 	int (*function) (struct drbd_thread *);
-	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
 	int reset_cpu_mask;
+	char name[9];
 };
 
 static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
@@ -681,58 +532,54 @@
 	return thi->t_state;
 }
 
-struct drbd_work;
-typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
 struct drbd_work {
 	struct list_head list;
-	drbd_work_cb cb;
+	int (*cb)(struct drbd_work *, int cancel);
+	union {
+		struct drbd_conf *mdev;
+		struct drbd_tconn *tconn;
+	};
 };
 
-struct drbd_tl_epoch;
+#include "drbd_interval.h"
+
+extern int drbd_wait_misc(struct drbd_conf *, struct drbd_interval *);
+
 struct drbd_request {
 	struct drbd_work w;
-	struct drbd_conf *mdev;
 
 	/* if local IO is not allowed, will be NULL.
 	 * if local IO _is_ allowed, holds the locally submitted bio clone,
 	 * or, after local IO completion, the ERR_PTR(error).
-	 * see drbd_endio_pri(). */
+	 * see drbd_request_endio(). */
 	struct bio *private_bio;
 
-	struct hlist_node collision;
-	sector_t sector;
-	unsigned int size;
-	unsigned int epoch; /* barrier_nr */
+	struct drbd_interval i;
 
-	/* barrier_nr: used to check on "completion" whether this req was in
+	/* epoch: used to check on "completion" whether this req was in
 	 * the current epoch, and we therefore have to close it,
-	 * starting a new epoch...
+	 * causing a p_barrier packet to be send, starting a new epoch.
+	 *
+	 * This corresponds to "barrier" in struct p_barrier[_ack],
+	 * and to "barrier_nr" in struct drbd_epoch (and various
+	 * comments/function parameters/local variable names).
 	 */
+	unsigned int epoch;
 
 	struct list_head tl_requests; /* ring list in the transfer log */
 	struct bio *master_bio;       /* master bio pointer */
-	unsigned long rq_state; /* see comments above _req_mod() */
 	unsigned long start_time;
+
+	/* once it hits 0, we may complete the master_bio */
+	atomic_t completion_ref;
+	/* once it hits 0, we may destroy this drbd_request object */
+	struct kref kref;
+
+	unsigned rq_state; /* see comments above _req_mod() */
 };
 
-struct drbd_tl_epoch {
-	struct drbd_work w;
-	struct list_head requests; /* requests before */
-	struct drbd_tl_epoch *next; /* pointer to the next barrier */
-	unsigned int br_number;  /* the barriers identifier. */
-	int n_writes;	/* number of requests attached before this barrier */
-};
-
-struct drbd_request;
-
-/* These Tl_epoch_entries may be in one of 6 lists:
-   active_ee .. data packet being written
-   sync_ee   .. syncer block being written
-   done_ee   .. block written, need to send P_WRITE_ACK
-   read_ee   .. [RS]P_DATA_REQUEST being read
-*/
-
 struct drbd_epoch {
+	struct drbd_tconn *tconn;
 	struct list_head list;
 	unsigned int barrier_nr;
 	atomic_t epoch_size; /* increased on every request added. */
@@ -762,17 +609,14 @@
 	void *digest;
 };
 
-struct drbd_epoch_entry {
+struct drbd_peer_request {
 	struct drbd_work w;
-	struct hlist_node collision;
 	struct drbd_epoch *epoch; /* for writes */
-	struct drbd_conf *mdev;
 	struct page *pages;
 	atomic_t pending_bios;
-	unsigned int size;
+	struct drbd_interval i;
 	/* see comments on ee flag bits below */
 	unsigned long flags;
-	sector_t sector;
 	union {
 		u64 block_id;
 		struct digest_info *digest;
@@ -793,31 +637,37 @@
 	 * we need to resubmit without the barrier flag. */
 	__EE_RESUBMITTED,
 
-	/* we may have several bios per epoch entry.
+	/* we may have several bios per peer request.
 	 * if any of those fail, we set this flag atomically
 	 * from the endio callback */
 	__EE_WAS_ERROR,
 
 	/* This ee has a pointer to a digest instead of a block id */
 	__EE_HAS_DIGEST,
+
+	/* Conflicting local requests need to be restarted after this request */
+	__EE_RESTART_REQUESTS,
+
+	/* The peer wants a write ACK for this (wire proto C) */
+	__EE_SEND_WRITE_ACK,
+
+	/* Is set when net_conf had two_primaries set while creating this peer_req */
+	__EE_IN_INTERVAL_TREE,
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 #define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
+#define EE_RESTART_REQUESTS	(1<<__EE_RESTART_REQUESTS)
+#define EE_SEND_WRITE_ACK	(1<<__EE_SEND_WRITE_ACK)
+#define EE_IN_INTERVAL_TREE	(1<<__EE_IN_INTERVAL_TREE)
 
-/* global flag bits */
+/* flag bits per mdev */
 enum {
-	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
-	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
-	SEND_PING,		/* whether asender should send a ping asap */
-
 	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
 	MD_DIRTY,		/* current uuids and flags not yet on disk */
-	DISCARD_CONCURRENT,	/* Set on one node, cleared on the peer! */
 	USE_DEGR_WFC_T,		/* degr-wfc-timeout instead of wfc-timeout. */
-	CLUSTER_ST_CHANGE,	/* Cluster wide state change going on... */
 	CL_ST_CHG_SUCCESS,
 	CL_ST_CHG_FAIL,
 	CRASHED_PRIMARY,	/* This node was a crashed primary.
@@ -831,32 +681,18 @@
 				   once no more io in flight, start bitmap io */
 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
 	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
-	WAS_IO_ERROR,		/* Local disk failed returned IO error */
+	WAS_IO_ERROR,		/* Local disk failed, returned IO error */
+	WAS_READ_ERROR,		/* Local disk READ failed (set additionally to the above) */
 	FORCE_DETACH,		/* Force-detach from local disk, aborting any pending local IO */
 	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
-	NET_CONGESTED,		/* The data socket is congested */
-
-	CONFIG_PENDING,		/* serialization of (re)configuration requests.
-				 * if set, also prevents the device from dying */
-	DEVICE_DYING,		/* device became unconfigured,
-				 * but worker thread is still handling the cleanup.
-				 * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
-				 * while this is set. */
 	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
 				 * the peer, if it changed there as well. */
-	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
-	GOT_PING_ACK,		/* set when we receive a ping_ack packet, misc wait gets woken */
 	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
 	AL_SUSPENDED,		/* Activity logging is currently suspended. */
 	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
-	STATE_SENT,		/* Do not change state/UUIDs while this is set */
-
-	CALLBACK_PENDING,	/* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
-				 * pending, from drbd worker context.
-				 * If set, bdi_write_congested() returns true,
-				 * so shrink_page_list() would not recurse into,
-				 * and potentially deadlock on, this drbd worker.
-				 */
+	B_RS_H_DONE,		/* Before resync handler done (already executed) */
+	DISCARD_MY_DATA,	/* discard_my_data flag per volume */
+	READ_BALANCE_RR,
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
@@ -894,24 +730,24 @@
 
 struct drbd_work_queue {
 	struct list_head q;
-	struct semaphore s; /* producers up it, worker down()s it */
 	spinlock_t q_lock;  /* to protect the list. */
+	wait_queue_head_t q_wait;
 };
 
 struct drbd_socket {
-	struct drbd_work_queue work;
 	struct mutex mutex;
 	struct socket    *socket;
 	/* this way we get our
 	 * send/receive buffers off the stack */
-	union p_polymorph sbuf;
-	union p_polymorph rbuf;
+	void *sbuf;
+	void *rbuf;
 };
 
 struct drbd_md {
 	u64 md_offset;		/* sector offset to 'super' block */
 
 	u64 la_size_sect;	/* last agreed size, unit sectors */
+	spinlock_t uuid_lock;
 	u64 uuid[UI_SIZE];
 	u64 device_uuid;
 	u32 flags;
@@ -921,24 +757,16 @@
 	s32 bm_offset;	/* signed relative sector offset to bitmap */
 
 	/* u32 al_nr_extents;	   important for restoring the AL
-	 * is stored into  sync_conf.al_extents, which in turn
+	 * is stored into  ldev->dc.al_extents, which in turn
 	 * gets applied to act_log->nr_elements
 	 */
 };
 
-/* for sync_conf and other types... */
-#define NL_PACKET(name, number, fields) struct name { fields };
-#define NL_INTEGER(pn,pr,member) int member;
-#define NL_INT64(pn,pr,member) __u64 member;
-#define NL_BIT(pn,pr,member)   unsigned member:1;
-#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
-#include <linux/drbd_nl.h>
-
 struct drbd_backing_dev {
 	struct block_device *backing_bdev;
 	struct block_device *md_bdev;
 	struct drbd_md md;
-	struct disk_conf dc; /* The user provided config... */
+	struct disk_conf *disk_conf; /* RCU, for updates: mdev->tconn->conf_update */
 	sector_t known_size; /* last known size of that backing device */
 };
 
@@ -962,18 +790,116 @@
 };
 
 struct fifo_buffer {
-	int *values;
 	unsigned int head_index;
 	unsigned int size;
+	int total; /* sum of all values */
+	int values[0];
+};
+extern struct fifo_buffer *fifo_alloc(int fifo_size);
+
+/* flag bits per tconn */
+enum {
+	NET_CONGESTED,		/* The data socket is congested */
+	RESOLVE_CONFLICTS,	/* Set on one node, cleared on the peer! */
+	SEND_PING,		/* whether asender should send a ping asap */
+	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
+	GOT_PING_ACK,		/* set when we receive a ping_ack packet, ping_wait gets woken */
+	CONN_WD_ST_CHG_REQ,	/* A cluster wide state change on the connection is active */
+	CONN_WD_ST_CHG_OKAY,
+	CONN_WD_ST_CHG_FAIL,
+	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
+	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
+	STATE_SENT,		/* Do not change state/UUIDs while this is set */
+	CALLBACK_PENDING,	/* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
+				 * pending, from drbd worker context.
+				 * If set, bdi_write_congested() returns true,
+				 * so shrink_page_list() would not recurse into,
+				 * and potentially deadlock on, this drbd worker.
+				 */
+	DISCONNECT_SENT,
+};
+
+struct drbd_tconn {			/* is a resource from the config file */
+	char *name;			/* Resource name */
+	struct list_head all_tconn;	/* linked on global drbd_tconns */
+	struct kref kref;
+	struct idr volumes;		/* <tconn, vnr> to mdev mapping */
+	enum drbd_conns cstate;		/* Only C_STANDALONE to C_WF_REPORT_PARAMS */
+	unsigned susp:1;		/* IO suspended by user */
+	unsigned susp_nod:1;		/* IO suspended because no data */
+	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
+	struct mutex cstate_mutex;	/* Protects graceful disconnects */
+
+	unsigned long flags;
+	struct net_conf *net_conf;	/* content protected by rcu */
+	struct mutex conf_update;	/* mutex for ready-copy-update of net_conf and disk_conf */
+	wait_queue_head_t ping_wait;	/* Woken upon reception of a ping, and a state change */
+	struct res_opts res_opts;
+
+	struct sockaddr_storage my_addr;
+	int my_addr_len;
+	struct sockaddr_storage peer_addr;
+	int peer_addr_len;
+
+	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
+	struct drbd_socket meta;	/* ping/ack (metadata) packets */
+	int agreed_pro_version;		/* actually used protocol version */
+	unsigned long last_received;	/* in jiffies, either socket */
+	unsigned int ko_count;
+
+	spinlock_t req_lock;
+
+	struct list_head transfer_log;	/* all requests not yet fully processed */
+
+	struct crypto_hash *cram_hmac_tfm;
+	struct crypto_hash *integrity_tfm;  /* checksums we compute, updates protected by tconn->data->mutex */
+	struct crypto_hash *peer_integrity_tfm;  /* checksums we verify, only accessed from receiver thread  */
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *verify_tfm;
+	void *int_dig_in;
+	void *int_dig_vv;
+
+	/* receiver side */
+	struct drbd_epoch *current_epoch;
+	spinlock_t epoch_lock;
+	unsigned int epochs;
+	enum write_ordering_e write_ordering;
+	atomic_t current_tle_nr;	/* transfer log epoch number */
+	unsigned current_tle_writes;	/* writes seen within this tl epoch */
+
+	unsigned long last_reconnect_jif;
+	struct drbd_thread receiver;
+	struct drbd_thread worker;
+	struct drbd_thread asender;
+	cpumask_var_t cpu_mask;
+
+	/* sender side */
+	struct drbd_work_queue sender_work;
+
+	struct {
+		/* whether this sender thread
+		 * has processed a single write yet. */
+		bool seen_any_write_yet;
+
+		/* Which barrier number to send with the next P_BARRIER */
+		int current_epoch_nr;
+
+		/* how many write requests have been sent
+		 * with req->epoch == current_epoch_nr.
+		 * If none, no P_BARRIER will be sent. */
+		unsigned current_epoch_writes;
+	} send;
 };
 
 struct drbd_conf {
+	struct drbd_tconn *tconn;
+	int vnr;			/* volume number within the connection */
+	struct kref kref;
+
 	/* things that are stored as / read from meta data on disk */
 	unsigned long flags;
 
 	/* configured by drbdsetup */
-	struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
-	struct syncer_conf sync_conf;
 	struct drbd_backing_dev *ldev __protected_by(local);
 
 	sector_t p_size;     /* partner's disk size */
@@ -981,11 +907,7 @@
 	struct block_device *this_bdev;
 	struct gendisk	    *vdisk;
 
-	struct drbd_socket data; /* data/barrier/cstate/parameter packets */
-	struct drbd_socket meta; /* ping/ack (metadata) packets */
-	int agreed_pro_version;  /* actually used protocol version */
-	unsigned long last_received; /* in jiffies, either socket */
-	unsigned int ko_count;
+	unsigned long last_reattach_jif;
 	struct drbd_work  resync_work,
 			  unplug_work,
 			  go_diskless,
@@ -1005,10 +927,9 @@
 	/* Used after attach while negotiating new disk state. */
 	union drbd_state new_state_tmp;
 
-	union drbd_state state;
+	union drbd_dev_state state;
 	wait_queue_head_t misc_wait;
 	wait_queue_head_t state_wait;  /* upon each state change. */
-	wait_queue_head_t net_cnt_wait;
 	unsigned int send_cnt;
 	unsigned int recv_cnt;
 	unsigned int read_cnt;
@@ -1018,17 +939,12 @@
 	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
 	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
 	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
-	atomic_t unacked_cnt;	 /* Need to send replys for */
+	atomic_t unacked_cnt;	 /* Need to send replies for */
 	atomic_t local_cnt;	 /* Waiting for local completion */
-	atomic_t net_cnt;	 /* Users of net_conf */
-	spinlock_t req_lock;
-	struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
-	struct drbd_tl_epoch *newest_tle;
-	struct drbd_tl_epoch *oldest_tle;
-	struct list_head out_of_sequence_requests;
-	struct list_head barrier_acked_requests;
-	struct hlist_head *tl_hash;
-	unsigned int tl_hash_s;
+
+	/* Interval tree of pending local requests */
+	struct rb_root read_requests;
+	struct rb_root write_requests;
 
 	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 	unsigned long rs_total;
@@ -1048,9 +964,11 @@
 	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
 	/* current index into rs_mark_{left,time} */
 	int rs_last_mark;
+	unsigned long rs_last_bcast; /* [unit jiffies] */
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
+	sector_t ov_stop_sector;
 	/* where are we now? (sector) */
 	sector_t ov_position;
 	/* Start sector of out of sync range (to merge printk reporting). */
@@ -1058,14 +976,7 @@
 	/* size of out-of-sync range in sectors. */
 	sector_t ov_last_oos_size;
 	unsigned long ov_left; /* in bits */
-	struct crypto_hash *csums_tfm;
-	struct crypto_hash *verify_tfm;
 
-	unsigned long last_reattach_jif;
-	unsigned long last_reconnect_jif;
-	struct drbd_thread receiver;
-	struct drbd_thread worker;
-	struct drbd_thread asender;
 	struct drbd_bitmap *bitmap;
 	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
 
@@ -1078,29 +989,19 @@
 
 	int open_cnt;
 	u64 *p_uuid;
-	struct drbd_epoch *current_epoch;
-	spinlock_t epoch_lock;
-	unsigned int epochs;
-	enum write_ordering_e write_ordering;
+
 	struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
 	struct list_head sync_ee;   /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
-	struct list_head done_ee;   /* send ack */
-	struct list_head read_ee;   /* IO in progress (any read) */
+	struct list_head done_ee;   /* need to send P_WRITE_ACK */
+	struct list_head read_ee;   /* [RS]P_DATA_REQUEST being read */
 	struct list_head net_ee;    /* zero-copy network send in progress */
-	struct hlist_head *ee_hash; /* is proteced by req_lock! */
-	unsigned int ee_hash_s;
-
-	/* this one is protected by ee_lock, single thread */
-	struct drbd_epoch_entry *last_write_w_barrier;
 
 	int next_barrier_nr;
-	struct hlist_head *app_reads_hash; /* is proteced by req_lock */
 	struct list_head resync_reads;
 	atomic_t pp_in_use;		/* allocated from page pool */
 	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
-	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
 	struct drbd_md_io md_io;
 	atomic_t md_io_in_use;		/* protects the md_io, md_io_page and md_io_tmpp */
 	spinlock_t al_lock;
@@ -1109,22 +1010,16 @@
 	unsigned int al_tr_number;
 	int al_tr_cycle;
 	int al_tr_pos;   /* position of the next transaction in the journal */
-	struct crypto_hash *cram_hmac_tfm;
-	struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
-	struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
-	void *int_dig_out;
-	void *int_dig_in;
-	void *int_dig_vv;
 	wait_queue_head_t seq_wait;
 	atomic_t packet_seq;
 	unsigned int peer_seq;
 	spinlock_t peer_seq_lock;
 	unsigned int minor;
 	unsigned long comm_bm_set; /* communicated number of set bits. */
-	cpumask_var_t cpu_mask;
 	struct bm_io_work bm_io_work;
 	u64 ed_uuid; /* UUID of the exposed data */
-	struct mutex state_mutex;
+	struct mutex own_state_mutex;
+	struct mutex *state_mutex; /* either own_state_mutex or mdev->tconn->cstate_mutex */
 	char congestion_reason;  /* Why we where congested... */
 	atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
 	atomic_t rs_sect_ev; /* for submitted resync data rate, both */
@@ -1132,9 +1027,8 @@
 	int rs_last_events;  /* counter of read or write "events" (unit sectors)
 			      * on the lower level device when we last looked. */
 	int c_sync_rate; /* current resync rate after syncer throttle magic */
-	struct fifo_buffer rs_plan_s; /* correction values of resync planer */
+	struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, tconn->conn_update) */
 	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
-	int rs_planed;    /* resync sectors already planned */
 	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
 	unsigned int peer_max_bio_size;
 	unsigned int local_max_bio_size;
@@ -1142,11 +1036,7 @@
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
 {
-	struct drbd_conf *mdev;
-
-	mdev = minor < minor_count ? minor_table[minor] : NULL;
-
-	return mdev;
+	return (struct drbd_conf *)idr_find(&minors, minor);
 }
 
 static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
@@ -1154,29 +1044,9 @@
 	return mdev->minor;
 }
 
-/* returns 1 if it was successful,
- * returns 0 if there was no data socket.
- * so wherever you are going to use the data.socket, e.g. do
- * if (!drbd_get_data_sock(mdev))
- *	return 0;
- *	CODE();
- * drbd_put_data_sock(mdev);
- */
-static inline int drbd_get_data_sock(struct drbd_conf *mdev)
+static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr)
 {
-	mutex_lock(&mdev->data.mutex);
-	/* drbd_disconnect() could have called drbd_free_sock()
-	 * while we were waiting in down()... */
-	if (unlikely(mdev->data.socket == NULL)) {
-		mutex_unlock(&mdev->data.mutex);
-		return 0;
-	}
-	return 1;
-}
-
-static inline void drbd_put_data_sock(struct drbd_conf *mdev)
-{
-	mutex_unlock(&mdev->data.mutex);
+	return (struct drbd_conf *)idr_find(&tconn->volumes, vnr);
 }
 
 /*
@@ -1185,106 +1055,77 @@
 
 /* drbd_main.c */
 
-enum chg_state_flags {
-	CS_HARD	= 1,
-	CS_VERBOSE = 2,
-	CS_WAIT_COMPLETE = 4,
-	CS_SERIALIZE    = 8,
-	CS_ORDERED      = CS_WAIT_COMPLETE + CS_SERIALIZE,
-};
-
 enum dds_flags {
 	DDSF_FORCED    = 1,
 	DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
 };
 
 extern void drbd_init_set_defaults(struct drbd_conf *mdev);
-extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
-					    enum chg_state_flags f,
-					    union drbd_state mask,
-					    union drbd_state val);
-extern void drbd_force_state(struct drbd_conf *, union drbd_state,
-			union drbd_state);
-extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
-					      union drbd_state,
-					      union drbd_state,
-					      enum chg_state_flags);
-extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
-					   enum chg_state_flags,
-					   struct completion *done);
-extern void print_st_err(struct drbd_conf *, union drbd_state,
-			union drbd_state, int);
 extern int  drbd_thread_start(struct drbd_thread *thi);
 extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
+extern char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task);
 #ifdef CONFIG_SMP
-extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev);
-extern void drbd_calc_cpu_mask(struct drbd_conf *mdev);
+extern void drbd_thread_current_set_cpu(struct drbd_thread *thi);
+extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn);
 #else
 #define drbd_thread_current_set_cpu(A) ({})
 #define drbd_calc_cpu_mask(A) ({})
 #endif
-extern void drbd_free_resources(struct drbd_conf *mdev);
-extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr,
 		       unsigned int set_size);
-extern void tl_clear(struct drbd_conf *mdev);
-extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
-extern void drbd_free_sock(struct drbd_conf *mdev);
-extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
-			void *buf, size_t size, unsigned msg_flags);
-extern int drbd_send_protocol(struct drbd_conf *mdev);
+extern void tl_clear(struct drbd_tconn *);
+extern void drbd_free_sock(struct drbd_tconn *tconn);
+extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
+		     void *buf, size_t size, unsigned msg_flags);
+extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t,
+			 unsigned);
+
+extern int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd);
+extern int drbd_send_protocol(struct drbd_tconn *tconn);
 extern int drbd_send_uuids(struct drbd_conf *mdev);
 extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
-extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
+extern void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
 extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
 extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s);
 extern int drbd_send_current_state(struct drbd_conf *mdev);
-extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			enum drbd_packets cmd, struct p_header80 *h,
-			size_t size, unsigned msg_flags);
-#define USE_DATA_SOCKET 1
-#define USE_META_SOCKET 0
-extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
-			enum drbd_packets cmd, struct p_header80 *h,
-			size_t size);
-extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
-			char *data, size_t size);
-extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
-extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
-			u32 set_size);
-extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
-			struct drbd_epoch_entry *e);
-extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
-			struct p_block_req *rp);
-extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
-			struct p_data *dp, int data_size);
-extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+extern int drbd_send_sync_param(struct drbd_conf *mdev);
+extern void drbd_send_b_ack(struct drbd_tconn *tconn, u32 barrier_nr,
+			    u32 set_size);
+extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet,
+			 struct drbd_peer_request *);
+extern void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
+			     struct p_block_req *rp);
+extern void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
+			     struct p_data *dp, int data_size);
+extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
 			    sector_t sector, int blksize, u64 block_id);
-extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
-extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
-			   struct drbd_epoch_entry *e);
+extern int drbd_send_out_of_sync(struct drbd_conf *, struct drbd_request *);
+extern int drbd_send_block(struct drbd_conf *, enum drbd_packet,
+			   struct drbd_peer_request *);
 extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
 extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
 			      sector_t sector, int size, u64 block_id);
-extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
-				   sector_t sector,int size,
-				   void *digest, int digest_size,
-				   enum drbd_packets cmd);
+extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector,
+				   int size, void *digest, int digest_size,
+				   enum drbd_packet cmd);
 extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size);
 
 extern int drbd_send_bitmap(struct drbd_conf *mdev);
-extern int _drbd_send_bitmap(struct drbd_conf *mdev);
-extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
+extern void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
+extern void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode);
 extern void drbd_free_bc(struct drbd_backing_dev *ldev);
 extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
+extern void conn_md_sync(struct drbd_tconn *tconn);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
-extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
 extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local);
+extern void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local);
+extern void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
 extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
 extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
@@ -1302,33 +1143,52 @@
 extern int drbd_bitmap_io(struct drbd_conf *mdev,
 		int (*io_fn)(struct drbd_conf *),
 		char *why, enum bm_flag flags);
+extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags);
 extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
 extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
 extern void drbd_go_diskless(struct drbd_conf *mdev);
 extern void drbd_ldev_destroy(struct drbd_conf *mdev);
 
-
 /* Meta data layout
    We reserve a 128MB Block (4k aligned)
    * either at the end of the backing device
    * or on a separate meta data device. */
 
-#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
 /* The following numbers are sectors */
-#define MD_AL_OFFSET 8	    /* 8 Sectors after start of meta area */
-#define MD_AL_MAX_SIZE 64   /* = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage */
-/* Allows up to about 3.8TB */
-#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
+/* Allows up to about 3.8TB, so if you want more,
+ * you need to use the "flexible" meta data format. */
+#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
+#define MD_AL_OFFSET	8    /* 8 Sectors after start of meta area */
+#define MD_AL_SECTORS	64   /* = 32 kB on disk activity log ring buffer */
+#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS)
 
-/* Since the smalles IO unit is usually 512 byte */
-#define MD_SECTOR_SHIFT	 9
-#define MD_SECTOR_SIZE	 (1<<MD_SECTOR_SHIFT)
+/* we do all meta data IO in 4k blocks */
+#define MD_BLOCK_SHIFT	12
+#define MD_BLOCK_SIZE	(1<<MD_BLOCK_SHIFT)
 
-/* activity log */
-#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
-#define AL_EXTENT_SHIFT 22		 /* One extent represents 4M Storage */
+/* One activity log extent represents 4M of storage */
+#define AL_EXTENT_SHIFT 22
 #define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
 
+/* We could make these currently hardcoded constants configurable
+ * variables at create-md time (or even re-configurable at runtime?).
+ * Which will require some more changes to the DRBD "super block"
+ * and attach code.
+ *
+ * updates per transaction:
+ *   This many changes to the active set can be logged with one transaction.
+ *   This number is arbitrary.
+ * context per transaction:
+ *   This many context extent numbers are logged with each transaction.
+ *   This number is resulting from the transaction block size (4k), the layout
+ *   of the transaction header, and the number of updates per transaction.
+ *   See drbd_actlog.c:struct al_transaction_on_disk
+ * */
+#define AL_UPDATES_PER_TRANSACTION	 64	// arbitrary
+#define AL_CONTEXT_PER_TRANSACTION	919	// (4096 - 36 - 6*64)/4
+
 #if BITS_PER_LONG == 32
 #define LN2_BPL 5
 #define cpu_to_lel(A) cpu_to_le32(A)
@@ -1364,11 +1224,14 @@
 
 #define SLEEP_TIME (HZ/10)
 
-#define BM_BLOCK_SHIFT  12			 /* 4k per bit */
+/* We do bitmap IO in units of 4k blocks.
+ * We also still have a hardcoded 4k per bit relation. */
+#define BM_BLOCK_SHIFT	12			 /* 4k per bit */
 #define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
-/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
- * per sector of on disk bitmap */
-#define BM_EXT_SHIFT	 (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3)  /* = 24 */
+/* mostly arbitrarily set the represented size of one bitmap extent,
+ * aka resync extent, to 16 MiB (which is also 512 Byte worth of bitmap
+ * at 4k per bit resolution) */
+#define BM_EXT_SHIFT	 24	/* 16 MiB per resync extent */
 #define BM_EXT_SIZE	 (1<<BM_EXT_SHIFT)
 
 #if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
@@ -1436,17 +1299,20 @@
 #endif
 #endif
 
-/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
- * With a value of 8 all IO in one 128K block make it to the same slot of the
- * hash table. */
-#define HT_SHIFT 8
-#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
+/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
+ * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte.
+ * Since we may live in a mixed-platform cluster,
+ * we limit us to a platform agnostic constant here for now.
+ * A followup commit may allow even bigger BIO sizes,
+ * once we thought that through. */
+#define DRBD_MAX_BIO_SIZE (1U << 20)
+#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
+#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
+#endif
 #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12)       /* Works always = 4k */
 
-#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* The old header only allows packets up to 32Kib data */
-
-/* Number of elements in the app_reads_hash */
-#define APP_R_HSIZE 15
+#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
+#define DRBD_MAX_BIO_SIZE_P95    (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
 
 extern int  drbd_bm_init(struct drbd_conf *mdev);
 extern int  drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits);
@@ -1468,11 +1334,11 @@
 extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
 extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
 extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
+extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr);
 extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern int  drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local);
 extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local);
 extern int  drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
-extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
-		unsigned long al_enr);
 extern size_t	     drbd_bm_words(struct drbd_conf *mdev);
 extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
 extern sector_t      drbd_bm_capacity(struct drbd_conf *mdev);
@@ -1497,7 +1363,7 @@
 /* drbd_main.c */
 
 extern struct kmem_cache *drbd_request_cache;
-extern struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+extern struct kmem_cache *drbd_ee_cache;	/* peer requests */
 extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
 extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
 extern mempool_t *drbd_request_mempool;
@@ -1537,12 +1403,22 @@
 
 extern rwlock_t global_state_lock;
 
-extern struct drbd_conf *drbd_new_device(unsigned int minor);
-extern void drbd_free_mdev(struct drbd_conf *mdev);
+extern int conn_lowest_minor(struct drbd_tconn *tconn);
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr);
+extern void drbd_minor_destroy(struct kref *kref);
+
+extern int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts);
+extern struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts);
+extern void conn_destroy(struct kref *kref);
+struct drbd_tconn *conn_get_by_name(const char *name);
+extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+					    void *peer_addr, int peer_addr_len);
+extern void conn_free_crypto(struct drbd_tconn *tconn);
 
 extern int proc_details;
 
 /* drbd_req */
+extern void __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long);
 extern void drbd_make_request(struct request_queue *q, struct bio *bio);
 extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
 extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
@@ -1550,10 +1426,11 @@
 
 
 /* drbd_nl.c */
+extern int drbd_msg_put_info(const char *info);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
-extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
 extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
@@ -1561,13 +1438,14 @@
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
 					enum drbd_role new_role,
 					int force);
-extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
-extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
+extern bool conn_try_outdate_peer(struct drbd_tconn *tconn);
+extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
 /* drbd_worker.c */
 extern int drbd_worker(struct drbd_thread *thi);
-extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor);
+void drbd_resync_after_changed(struct drbd_conf *mdev);
 extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
 extern void resume_next_sg(struct drbd_conf *mdev);
 extern void suspend_other_sg(struct drbd_conf *mdev);
@@ -1576,13 +1454,13 @@
 extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
 extern void drbd_md_put_buffer(struct drbd_conf *mdev);
 extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
-				struct drbd_backing_dev *bdev, sector_t sector, int rw);
-extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
-					    unsigned int *done);
-extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
+		struct drbd_backing_dev *bdev, sector_t sector, int rw);
+extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int);
+extern void wait_until_done_or_force_detached(struct drbd_conf *mdev,
+		struct drbd_backing_dev *bdev, unsigned int *done);
 extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
 
-static inline void ov_oos_print(struct drbd_conf *mdev)
+static inline void ov_out_of_sync_print(struct drbd_conf *mdev)
 {
 	if (mdev->ov_last_oos_size) {
 		dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
@@ -1594,97 +1472,102 @@
 
 
 extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
-extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *);
+extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *,
+			 struct drbd_peer_request *, void *);
 /* worker callbacks */
-extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
-extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
-extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int);
-extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
-extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
-extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_data_req(struct drbd_work *, int);
+extern int w_e_end_rsdata_req(struct drbd_work *, int);
+extern int w_e_end_csum_rs_req(struct drbd_work *, int);
+extern int w_e_end_ov_reply(struct drbd_work *, int);
+extern int w_e_end_ov_req(struct drbd_work *, int);
+extern int w_ov_finished(struct drbd_work *, int);
+extern int w_resync_timer(struct drbd_work *, int);
+extern int w_send_write_hint(struct drbd_work *, int);
+extern int w_make_resync_request(struct drbd_work *, int);
+extern int w_send_dblock(struct drbd_work *, int);
+extern int w_send_read_req(struct drbd_work *, int);
+extern int w_prev_work_done(struct drbd_work *, int);
+extern int w_e_reissue(struct drbd_work *, int);
+extern int w_restart_disk_io(struct drbd_work *, int);
+extern int w_send_out_of_sync(struct drbd_work *, int);
+extern int w_start_resync(struct drbd_work *, int);
 
 extern void resync_timer_fn(unsigned long data);
 extern void start_resync_timer_fn(unsigned long data);
 
 /* drbd_receiver.c */
 extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
-extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
-		const unsigned rw, const int fault_type);
-extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
-extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
-					    u64 id,
-					    sector_t sector,
-					    unsigned int data_size,
-					    gfp_t gfp_mask) __must_hold(local);
-extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
-		int is_net);
-#define drbd_free_ee(m,e)	drbd_free_some_ee(m, e, 0)
-#define drbd_free_net_ee(m,e)	drbd_free_some_ee(m, e, 1)
-extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
-		struct list_head *head);
-extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
-		struct list_head *head);
+extern int drbd_submit_peer_request(struct drbd_conf *,
+				    struct drbd_peer_request *, const unsigned,
+				    const int);
+extern int drbd_free_peer_reqs(struct drbd_conf *, struct list_head *);
+extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_conf *, u64,
+						     sector_t, unsigned int,
+						     gfp_t) __must_hold(local);
+extern void __drbd_free_peer_req(struct drbd_conf *, struct drbd_peer_request *,
+				 int);
+#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0)
+#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1)
+extern struct page *drbd_alloc_pages(struct drbd_conf *, unsigned int, bool);
 extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
-extern void drbd_flush_workqueue(struct drbd_conf *mdev);
-extern void drbd_free_tl_hash(struct drbd_conf *mdev);
-
-/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
- * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
-static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
-			char __user *optval, int optlen)
+extern void conn_flush_workqueue(struct drbd_tconn *tconn);
+extern int drbd_connected(struct drbd_conf *mdev);
+static inline void drbd_flush_workqueue(struct drbd_conf *mdev)
 {
+	conn_flush_workqueue(mdev->tconn);
+}
+
+/* Yes, there is kernel_setsockopt, but only since 2.6.18.
+ * So we have our own copy of it here. */
+static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
+				  char *optval, int optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	char __user *uoptval;
 	int err;
+
+	uoptval = (char __user __force *)optval;
+
+	set_fs(KERNEL_DS);
 	if (level == SOL_SOCKET)
-		err = sock_setsockopt(sock, level, optname, optval, optlen);
+		err = sock_setsockopt(sock, level, optname, uoptval, optlen);
 	else
-		err = sock->ops->setsockopt(sock, level, optname, optval,
+		err = sock->ops->setsockopt(sock, level, optname, uoptval,
 					    optlen);
+	set_fs(oldfs);
 	return err;
 }
 
 static inline void drbd_tcp_cork(struct socket *sock)
 {
-	int __user val = 1;
+	int val = 1;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
 static inline void drbd_tcp_uncork(struct socket *sock)
 {
-	int __user val = 0;
+	int val = 0;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
 static inline void drbd_tcp_nodelay(struct socket *sock)
 {
-	int __user val = 1;
+	int val = 1;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
 static inline void drbd_tcp_quickack(struct socket *sock)
 {
-	int __user val = 2;
+	int val = 2;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
-void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo);
+void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo);
 
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
@@ -1693,8 +1576,8 @@
 extern const char *drbd_role_str(enum drbd_role s);
 
 /* drbd_actlog.c */
-extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector);
-extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i);
+extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i);
 extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector);
 extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
 extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
@@ -1702,7 +1585,6 @@
 extern int drbd_rs_del_all(struct drbd_conf *mdev);
 extern void drbd_rs_failed_io(struct drbd_conf *mdev,
 		sector_t sector, int size);
-extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
 extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
 extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
@@ -1712,73 +1594,24 @@
 		int size, const char *file, const unsigned int line);
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 
-
 /* drbd_nl.c */
-
-void drbd_nl_cleanup(void);
-int __init drbd_nl_init(void);
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
-void drbd_bcast_sync_progress(struct drbd_conf *mdev);
-void drbd_bcast_ee(struct drbd_conf *mdev,
-		const char *reason, const int dgs,
-		const char* seen_hash, const char* calc_hash,
-		const struct drbd_epoch_entry* e);
-
-
-/**
- * DOC: DRBD State macros
- *
- * These macros are used to express state changes in easily readable form.
- *
- * The NS macros expand to a mask and a value, that can be bit ored onto the
- * current state as soon as the spinlock (req_lock) was taken.
- *
- * The _NS macros are used for state functions that get called with the
- * spinlock. These macros expand directly to the new state value.
- *
- * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
- * to express state changes that affect more than one aspect of the state.
- *
- * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
- * Means that the network connection was established and that the peer
- * is in secondary role.
- */
-#define role_MASK R_MASK
-#define peer_MASK R_MASK
-#define disk_MASK D_MASK
-#define pdsk_MASK D_MASK
-#define conn_MASK C_MASK
-#define susp_MASK 1
-#define user_isp_MASK 1
-#define aftr_isp_MASK 1
-#define susp_nod_MASK 1
-#define susp_fen_MASK 1
-
-#define NS(T, S) \
-	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
-	({ union drbd_state val; val.i = 0; val.T = (S); val; })
-#define NS2(T1, S1, T2, S2) \
-	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
-	  mask.T2 = T2##_MASK; mask; }), \
-	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
-	  val.T2 = (S2); val; })
-#define NS3(T1, S1, T2, S2, T3, S3) \
-	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
-	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
-	({ union drbd_state val;  val.i = 0; val.T1 = (S1); \
-	  val.T2 = (S2); val.T3 = (S3); val; })
-
-#define _NS(D, T, S) \
-	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; })
-#define _NS2(D, T1, S1, T2, S2) \
-	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
-	__ns.T2 = (S2); __ns; })
-#define _NS3(D, T1, S1, T2, S2, T3, S3) \
-	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
-	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
+/* state info broadcast */
+struct sib_info {
+	enum drbd_state_info_bcast_reason sib_reason;
+	union {
+		struct {
+			char *helper_name;
+			unsigned helper_exit_code;
+		};
+		struct {
+			union drbd_state os;
+			union drbd_state ns;
+		};
+	};
+};
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib);
 
 /*
  * inline helper functions
@@ -1795,9 +1628,10 @@
 #define page_chain_for_each_safe(page, n) \
 	for (; page && ({ n = page_chain_next(page); 1; }); page = n)
 
-static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
+
+static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req)
 {
-	struct page *page = e->pages;
+	struct page *page = peer_req->pages;
 	page_chain_for_each(page) {
 		if (page_count(page) > 1)
 			return 1;
@@ -1805,18 +1639,6 @@
 	return 0;
 }
 
-static inline void drbd_state_lock(struct drbd_conf *mdev)
-{
-	wait_event(mdev->misc_wait,
-		   !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags));
-}
-
-static inline void drbd_state_unlock(struct drbd_conf *mdev)
-{
-	clear_bit(CLUSTER_ST_CHANGE, &mdev->flags);
-	wake_up(&mdev->misc_wait);
-}
-
 static inline enum drbd_state_rv
 _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 		enum chg_state_flags flags, struct completion *done)
@@ -1830,48 +1652,71 @@
 	return rv;
 }
 
-/**
- * drbd_request_state() - Reqest a state change
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- *
- * This is the most graceful way of requesting a state change. It is verbose
- * quite verbose in case the state change is not possible, and all those
- * state changes are globally serialized.
- */
-static inline int drbd_request_state(struct drbd_conf *mdev,
-				     union drbd_state mask,
-				     union drbd_state val)
+static inline union drbd_state drbd_read_state(struct drbd_conf *mdev)
 {
-	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
+	union drbd_state rv;
+
+	rv.i = mdev->state.i;
+	rv.susp = mdev->tconn->susp;
+	rv.susp_nod = mdev->tconn->susp_nod;
+	rv.susp_fen = mdev->tconn->susp_fen;
+
+	return rv;
 }
 
 enum drbd_force_detach_flags {
-	DRBD_IO_ERROR,
+	DRBD_READ_ERROR,
+	DRBD_WRITE_ERROR,
 	DRBD_META_IO_ERROR,
 	DRBD_FORCE_DETACH,
 };
 
 #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
 static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
-		enum drbd_force_detach_flags forcedetach,
+		enum drbd_force_detach_flags df,
 		const char *where)
 {
-	switch (mdev->ldev->dc.on_io_error) {
-	case EP_PASS_ON:
-		if (forcedetach == DRBD_IO_ERROR) {
+	enum drbd_io_error_p ep;
+
+	rcu_read_lock();
+	ep = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
+	rcu_read_unlock();
+	switch (ep) {
+	case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */
+		if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
 			if (__ratelimit(&drbd_ratelimit_state))
 				dev_err(DEV, "Local IO failed in %s.\n", where);
 			if (mdev->state.disk > D_INCONSISTENT)
 				_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
 			break;
 		}
-		/* NOTE fall through to detach case if forcedetach set */
+		/* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
 	case EP_DETACH:
 	case EP_CALL_HELPER:
+		/* Remember whether we saw a READ or WRITE error.
+		 *
+		 * Recovery of the affected area for WRITE failure is covered
+		 * by the activity log.
+		 * READ errors may fall outside that area though. Certain READ
+		 * errors can be "healed" by writing good data to the affected
+		 * blocks, which triggers block re-allocation in lower layers.
+		 *
+		 * If we can not write the bitmap after a READ error,
+		 * we may need to trigger a full sync (see w_go_diskless()).
+		 *
+		 * Force-detach is not really an IO error, but rather a
+		 * desperate measure to try to deal with a completely
+		 * unresponsive lower level IO stack.
+		 * Still it should be treated as a WRITE error.
+		 *
+		 * Meta IO error is always WRITE error:
+		 * we read meta data only once during attach,
+		 * which will fail in case of errors.
+		 */
 		set_bit(WAS_IO_ERROR, &mdev->flags);
-		if (forcedetach == DRBD_FORCE_DETACH)
+		if (df == DRBD_READ_ERROR)
+			set_bit(WAS_READ_ERROR, &mdev->flags);
+		if (df == DRBD_FORCE_DETACH)
 			set_bit(FORCE_DETACH, &mdev->flags);
 		if (mdev->state.disk > D_FAILED) {
 			_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
@@ -1896,9 +1741,9 @@
 {
 	if (error) {
 		unsigned long flags;
-		spin_lock_irqsave(&mdev->req_lock, flags);
+		spin_lock_irqsave(&mdev->tconn->req_lock, flags);
 		__drbd_chk_io_error_(mdev, forcedetach, where);
-		spin_unlock_irqrestore(&mdev->req_lock, flags);
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 	}
 }
 
@@ -1910,9 +1755,9 @@
  * BTW, for internal meta data, this happens to be the maximum capacity
  * we could agree upon with our peer node.
  */
-static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
+static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct drbd_backing_dev *bdev)
 {
-	switch (bdev->dc.meta_dev_idx) {
+	switch (meta_dev_idx) {
 	case DRBD_MD_INDEX_INTERNAL:
 	case DRBD_MD_INDEX_FLEX_INT:
 		return bdev->md.md_offset + bdev->md.bm_offset;
@@ -1922,13 +1767,30 @@
 	}
 }
 
+static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
+{
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	return _drbd_md_first_sector(meta_dev_idx, bdev);
+}
+
 /**
  * drbd_md_last_sector() - Return the last sector number of the meta data area
  * @bdev:	Meta data block device.
  */
 static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
 {
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	switch (meta_dev_idx) {
 	case DRBD_MD_INDEX_INTERNAL:
 	case DRBD_MD_INDEX_FLEX_INT:
 		return bdev->md.md_offset + MD_AL_OFFSET - 1;
@@ -1956,12 +1818,18 @@
 static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
 {
 	sector_t s;
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	switch (meta_dev_idx) {
 	case DRBD_MD_INDEX_INTERNAL:
 	case DRBD_MD_INDEX_FLEX_INT:
 		s = drbd_get_capacity(bdev->backing_bdev)
 			? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
-					drbd_md_first_sector(bdev))
+				_drbd_md_first_sector(meta_dev_idx, bdev))
 			: 0;
 		break;
 	case DRBD_MD_INDEX_FLEX_EXT:
@@ -1987,9 +1855,15 @@
 static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
 				    struct drbd_backing_dev *bdev)
 {
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	switch (meta_dev_idx) {
 	default: /* external, some index */
-		return MD_RESERVED_SECT * bdev->dc.meta_dev_idx;
+		return MD_RESERVED_SECT * meta_dev_idx;
 	case DRBD_MD_INDEX_INTERNAL:
 		/* with drbd08, internal meta data is always "flexible" */
 	case DRBD_MD_INDEX_FLEX_INT:
@@ -2015,9 +1889,8 @@
 	unsigned long flags;
 	spin_lock_irqsave(&q->q_lock, flags);
 	list_add(&w->list, &q->q);
-	up(&q->s); /* within the spinlock,
-		      see comment near end of drbd_worker() */
 	spin_unlock_irqrestore(&q->q_lock, flags);
+	wake_up(&q->q_wait);
 }
 
 static inline void
@@ -2026,41 +1899,35 @@
 	unsigned long flags;
 	spin_lock_irqsave(&q->q_lock, flags);
 	list_add_tail(&w->list, &q->q);
-	up(&q->s); /* within the spinlock,
-		      see comment near end of drbd_worker() */
 	spin_unlock_irqrestore(&q->q_lock, flags);
+	wake_up(&q->q_wait);
 }
 
-static inline void wake_asender(struct drbd_conf *mdev)
+static inline void wake_asender(struct drbd_tconn *tconn)
 {
-	if (test_bit(SIGNAL_ASENDER, &mdev->flags))
-		force_sig(DRBD_SIG, mdev->asender.task);
+	if (test_bit(SIGNAL_ASENDER, &tconn->flags))
+		force_sig(DRBD_SIG, tconn->asender.task);
 }
 
-static inline void request_ping(struct drbd_conf *mdev)
+static inline void request_ping(struct drbd_tconn *tconn)
 {
-	set_bit(SEND_PING, &mdev->flags);
-	wake_asender(mdev);
+	set_bit(SEND_PING, &tconn->flags);
+	wake_asender(tconn);
 }
 
-static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
-	enum drbd_packets cmd)
-{
-	struct p_header80 h;
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
-}
+extern void *conn_prepare_command(struct drbd_tconn *, struct drbd_socket *);
+extern void *drbd_prepare_command(struct drbd_conf *, struct drbd_socket *);
+extern int conn_send_command(struct drbd_tconn *, struct drbd_socket *,
+			     enum drbd_packet, unsigned int, void *,
+			     unsigned int);
+extern int drbd_send_command(struct drbd_conf *, struct drbd_socket *,
+			     enum drbd_packet, unsigned int, void *,
+			     unsigned int);
 
-static inline int drbd_send_ping(struct drbd_conf *mdev)
-{
-	struct p_header80 h;
-	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
-}
-
-static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
-{
-	struct p_header80 h;
-	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
-}
+extern int drbd_send_ping(struct drbd_tconn *tconn);
+extern int drbd_send_ping_ack(struct drbd_tconn *tconn);
+extern int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state);
+extern int conn_send_state_req(struct drbd_tconn *, union drbd_state, union drbd_state);
 
 static inline void drbd_thread_stop(struct drbd_thread *thi)
 {
@@ -2082,21 +1949,21 @@
  * or implicit barrier packets as necessary.
  * increased:
  *  w_send_barrier
- *  _req_mod(req, queue_for_net_write or queue_for_net_read);
+ *  _req_mod(req, QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ);
  *    it is much easier and equally valid to count what we queue for the
  *    worker, even before it actually was queued or send.
  *    (drbd_make_request_common; recovery path on read io-error)
  * decreased:
  *  got_BarrierAck (respective tl_clear, tl_clear_barrier)
- *  _req_mod(req, data_received)
+ *  _req_mod(req, DATA_RECEIVED)
  *     [from receive_DataReply]
- *  _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked)
+ *  _req_mod(req, WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED)
  *     [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
  *     for some reason it is NOT decreased in got_NegAck,
  *     but in the resulting cleanup code from report_params.
  *     we should try to remember the reason for that...
- *  _req_mod(req, send_failed or send_canceled)
- *  _req_mod(req, connection_lost_while_pending)
+ *  _req_mod(req, SEND_FAILED or SEND_CANCELED)
+ *  _req_mod(req, CONNECTION_LOST_WHILE_PENDING)
  *     [from tl_clear_barrier]
  */
 static inline void inc_ap_pending(struct drbd_conf *mdev)
@@ -2104,17 +1971,19 @@
 	atomic_inc(&mdev->ap_pending_cnt);
 }
 
-#define ERR_IF_CNT_IS_NEGATIVE(which)				\
-	if (atomic_read(&mdev->which) < 0)			\
+#define ERR_IF_CNT_IS_NEGATIVE(which, func, line)			\
+	if (atomic_read(&mdev->which) < 0)				\
 		dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n",	\
-		    __func__ , __LINE__ ,			\
-		    atomic_read(&mdev->which))
+			func, line,					\
+			atomic_read(&mdev->which))
 
-#define dec_ap_pending(mdev)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	if (atomic_dec_and_test(&mdev->ap_pending_cnt))		\
-		wake_up(&mdev->misc_wait);			\
-	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0)
+#define dec_ap_pending(mdev) _dec_ap_pending(mdev, __FUNCTION__, __LINE__)
+static inline void _dec_ap_pending(struct drbd_conf *mdev, const char *func, int line)
+{
+	if (atomic_dec_and_test(&mdev->ap_pending_cnt))
+		wake_up(&mdev->misc_wait);
+	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line);
+}
 
 /* counts how many resync-related answers we still expect from the peer
  *		     increase			decrease
@@ -2127,10 +1996,12 @@
 	atomic_inc(&mdev->rs_pending_cnt);
 }
 
-#define dec_rs_pending(mdev)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	atomic_dec(&mdev->rs_pending_cnt);			\
-	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0)
+#define dec_rs_pending(mdev) _dec_rs_pending(mdev, __FUNCTION__, __LINE__)
+static inline void _dec_rs_pending(struct drbd_conf *mdev, const char *func, int line)
+{
+	atomic_dec(&mdev->rs_pending_cnt);
+	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line);
+}
 
 /* counts how many answers we still need to send to the peer.
  * increased on
@@ -2146,38 +2017,18 @@
 	atomic_inc(&mdev->unacked_cnt);
 }
 
-#define dec_unacked(mdev)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	atomic_dec(&mdev->unacked_cnt);				\
-	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
-
-#define sub_unacked(mdev, n)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	atomic_sub(n, &mdev->unacked_cnt);			\
-	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
-
-
-static inline void put_net_conf(struct drbd_conf *mdev)
+#define dec_unacked(mdev) _dec_unacked(mdev, __FUNCTION__, __LINE__)
+static inline void _dec_unacked(struct drbd_conf *mdev, const char *func, int line)
 {
-	if (atomic_dec_and_test(&mdev->net_cnt))
-		wake_up(&mdev->net_cnt_wait);
+	atomic_dec(&mdev->unacked_cnt);
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
 }
 
-/**
- * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there
- * @mdev:	DRBD device.
- *
- * You have to call put_net_conf() when finished working with mdev->net_conf.
- */
-static inline int get_net_conf(struct drbd_conf *mdev)
+#define sub_unacked(mdev, n) _sub_unacked(mdev, n, __FUNCTION__, __LINE__)
+static inline void _sub_unacked(struct drbd_conf *mdev, int n, const char *func, int line)
 {
-	int have_net_conf;
-
-	atomic_inc(&mdev->net_cnt);
-	have_net_conf = mdev->state.conn >= C_UNCONNECTED;
-	if (!have_net_conf)
-		put_net_conf(mdev);
-	return have_net_conf;
+	atomic_sub(n, &mdev->unacked_cnt);
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
 }
 
 /**
@@ -2281,17 +2132,20 @@
  * maybe re-implement using semaphores? */
 static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
 {
-	int mxb = 1000000; /* arbitrary limit on open requests */
-	if (get_net_conf(mdev)) {
-		mxb = mdev->net_conf->max_buffers;
-		put_net_conf(mdev);
-	}
+	struct net_conf *nc;
+	int mxb;
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	mxb = nc ? nc->max_buffers : 1000000;  /* arbitrary limit on open requests */
+	rcu_read_unlock();
+
 	return mxb;
 }
 
 static inline int drbd_state_is_stable(struct drbd_conf *mdev)
 {
-	union drbd_state s = mdev->state;
+	union drbd_dev_state s = mdev->state;
 
 	/* DO NOT add a default clause, we want the compiler to warn us
 	 * for any newly introduced state we may have forgotten to add here */
@@ -2325,7 +2179,7 @@
 
 		/* Allow IO in BM exchange states with new protocols */
 	case C_WF_BITMAP_S:
-		if (mdev->agreed_pro_version < 96)
+		if (mdev->tconn->agreed_pro_version < 96)
 			return 0;
 		break;
 
@@ -2347,7 +2201,7 @@
 		/* disk state is stable as well. */
 		break;
 
-	/* no new io accepted during tansitional states */
+	/* no new io accepted during transitional states */
 	case D_ATTACHING:
 	case D_NEGOTIATING:
 	case D_UNKNOWN:
@@ -2359,16 +2213,18 @@
 	return 1;
 }
 
-static inline int is_susp(union drbd_state s)
+static inline int drbd_suspended(struct drbd_conf *mdev)
 {
-	return s.susp || s.susp_nod || s.susp_fen;
+	struct drbd_tconn *tconn = mdev->tconn;
+
+	return tconn->susp || tconn->susp_fen || tconn->susp_nod;
 }
 
 static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
 {
 	int mxb = drbd_get_max_buffers(mdev);
 
-	if (is_susp(mdev->state))
+	if (drbd_suspended(mdev))
 		return false;
 	if (test_bit(SUSPEND_IO, &mdev->flags))
 		return false;
@@ -2390,30 +2246,30 @@
 	return true;
 }
 
-static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count)
+static inline bool inc_ap_bio_cond(struct drbd_conf *mdev)
 {
 	bool rv = false;
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	rv = may_inc_ap_bio(mdev);
 	if (rv)
-		atomic_add(count, &mdev->ap_bio_cnt);
-	spin_unlock_irq(&mdev->req_lock);
+		atomic_inc(&mdev->ap_bio_cnt);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	return rv;
 }
 
-static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
+static inline void inc_ap_bio(struct drbd_conf *mdev)
 {
 	/* we wait here
 	 *    as long as the device is suspended
 	 *    until the bitmap is no longer on the fly during connection
-	 *    handshake as long as we would exeed the max_buffer limit.
+	 *    handshake as long as we would exceed the max_buffer limit.
 	 *
 	 * to avoid races with the reconnect code,
 	 * we need to atomic_inc within the spinlock. */
 
-	wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count));
+	wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev));
 }
 
 static inline void dec_ap_bio(struct drbd_conf *mdev)
@@ -2425,7 +2281,7 @@
 
 	if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
 		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
-			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+			drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w);
 	}
 
 	/* this currently does wake_up for every dec_ap_bio!
@@ -2435,6 +2291,12 @@
 		wake_up(&mdev->misc_wait);
 }
 
+static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev)
+{
+	return mdev->tconn->agreed_pro_version >= 97 &&
+		mdev->tconn->agreed_pro_version != 100;
+}
+
 static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
 {
 	int changed = mdev->ed_uuid != val;
@@ -2442,40 +2304,6 @@
 	return changed;
 }
 
-static inline int seq_cmp(u32 a, u32 b)
-{
-	/* we assume wrap around at 32bit.
-	 * for wrap around at 24bit (old atomic_t),
-	 * we'd have to
-	 *  a <<= 8; b <<= 8;
-	 */
-	return (s32)(a) - (s32)(b);
-}
-#define seq_lt(a, b) (seq_cmp((a), (b)) < 0)
-#define seq_gt(a, b) (seq_cmp((a), (b)) > 0)
-#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0)
-#define seq_le(a, b) (seq_cmp((a), (b)) <= 0)
-/* CAUTION: please no side effects in arguments! */
-#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b)))
-
-static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq)
-{
-	unsigned int m;
-	spin_lock(&mdev->peer_seq_lock);
-	m = seq_max(mdev->peer_seq, new_seq);
-	mdev->peer_seq = m;
-	spin_unlock(&mdev->peer_seq_lock);
-	if (m == new_seq)
-		wake_up(&mdev->seq_wait);
-}
-
-static inline void drbd_update_congested(struct drbd_conf *mdev)
-{
-	struct sock *sk = mdev->data.socket->sk;
-	if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
-		set_bit(NET_CONGESTED, &mdev->flags);
-}
-
 static inline int drbd_queue_order_type(struct drbd_conf *mdev)
 {
 	/* sorry, we currently have no working implementation
@@ -2490,10 +2318,15 @@
 {
 	int r;
 
+	if (mdev->ldev == NULL) {
+		dev_warn(DEV, "mdev->ldev == NULL in drbd_md_flush\n");
+		return;
+	}
+
 	if (test_bit(MD_NO_FUA, &mdev->flags))
 		return;
 
-	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
+	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_NOIO, NULL);
 	if (r) {
 		set_bit(MD_NO_FUA, &mdev->flags);
 		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);

diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c
new file mode 100644
index 0000000..89c497c
--- /dev/null
+++ b/drivers/block/drbd/drbd_interval.c

@@ -0,0 +1,207 @@
+#include <asm/bug.h>
+#include <linux/rbtree_augmented.h>
+#include "drbd_interval.h"
+
+/**
+ * interval_end  -  return end of @node
+ */
+static inline
+sector_t interval_end(struct rb_node *node)
+{
+	struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
+	return this->end;
+}
+
+/**
+ * compute_subtree_last  -  compute end of @node
+ *
+ * The end of an interval is the highest (start + (size >> 9)) value of this
+ * node and of its children.  Called for @node and its parents whenever the end
+ * may have changed.
+ */
+static inline sector_t
+compute_subtree_last(struct drbd_interval *node)
+{
+	sector_t max = node->sector + (node->size >> 9);
+
+	if (node->rb.rb_left) {
+		sector_t left = interval_end(node->rb.rb_left);
+		if (left > max)
+			max = left;
+	}
+	if (node->rb.rb_right) {
+		sector_t right = interval_end(node->rb.rb_right);
+		if (right > max)
+			max = right;
+	}
+	return max;
+}
+
+static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+{
+	while (rb != stop) {
+		struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb);
+		sector_t subtree_last = compute_subtree_last(node);
+		if (node->end == subtree_last)
+			break;
+		node->end = subtree_last;
+		rb = rb_parent(&node->rb);
+	}
+}
+
+static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
+	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
+
+	new->end = old->end;
+}
+
+static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
+	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
+
+	new->end = old->end;
+	old->end = compute_subtree_last(old);
+}
+
+static const struct rb_augment_callbacks augment_callbacks = {
+	augment_propagate,
+	augment_copy,
+	augment_rotate,
+};
+
+/**
+ * drbd_insert_interval  -  insert a new interval into a tree
+ */
+bool
+drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
+{
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	BUG_ON(!IS_ALIGNED(this->size, 512));
+
+	while (*new) {
+		struct drbd_interval *here =
+			rb_entry(*new, struct drbd_interval, rb);
+
+		parent = *new;
+		if (this->sector < here->sector)
+			new = &(*new)->rb_left;
+		else if (this->sector > here->sector)
+			new = &(*new)->rb_right;
+		else if (this < here)
+			new = &(*new)->rb_left;
+		else if (this > here)
+			new = &(*new)->rb_right;
+		else
+			return false;
+	}
+
+	rb_link_node(&this->rb, parent, new);
+	rb_insert_augmented(&this->rb, root, &augment_callbacks);
+	return true;
+}
+
+/**
+ * drbd_contains_interval  -  check if a tree contains a given interval
+ * @sector:	start sector of @interval
+ * @interval:	may not be a valid pointer
+ *
+ * Returns if the tree contains the node @interval with start sector @start.
+ * Does not dereference @interval until @interval is known to be a valid object
+ * in @tree.  Returns %false if @interval is in the tree but with a different
+ * sector number.
+ */
+bool
+drbd_contains_interval(struct rb_root *root, sector_t sector,
+		       struct drbd_interval *interval)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct drbd_interval *here =
+			rb_entry(node, struct drbd_interval, rb);
+
+		if (sector < here->sector)
+			node = node->rb_left;
+		else if (sector > here->sector)
+			node = node->rb_right;
+		else if (interval < here)
+			node = node->rb_left;
+		else if (interval > here)
+			node = node->rb_right;
+		else
+			return true;
+	}
+	return false;
+}
+
+/**
+ * drbd_remove_interval  -  remove an interval from a tree
+ */
+void
+drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
+{
+	rb_erase_augmented(&this->rb, root, &augment_callbacks);
+}
+
+/**
+ * drbd_find_overlap  - search for an interval overlapping with [sector, sector + size)
+ * @sector:	start sector
+ * @size:	size, aligned to 512 bytes
+ *
+ * Returns an interval overlapping with [sector, sector + size), or NULL if
+ * there is none.  When there is more than one overlapping interval in the
+ * tree, the interval with the lowest start sector is returned, and all other
+ * overlapping intervals will be on the right side of the tree, reachable with
+ * rb_next().
+ */
+struct drbd_interval *
+drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
+{
+	struct rb_node *node = root->rb_node;
+	struct drbd_interval *overlap = NULL;
+	sector_t end = sector + (size >> 9);
+
+	BUG_ON(!IS_ALIGNED(size, 512));
+
+	while (node) {
+		struct drbd_interval *here =
+			rb_entry(node, struct drbd_interval, rb);
+
+		if (node->rb_left &&
+		    sector < interval_end(node->rb_left)) {
+			/* Overlap if any must be on left side */
+			node = node->rb_left;
+		} else if (here->sector < end &&
+			   sector < here->sector + (here->size >> 9)) {
+			overlap = here;
+			break;
+		} else if (sector >= here->sector) {
+			/* Overlap if any must be on right side */
+			node = node->rb_right;
+		} else
+			break;
+	}
+	return overlap;
+}
+
+struct drbd_interval *
+drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
+{
+	sector_t end = sector + (size >> 9);
+	struct rb_node *node;
+
+	for (;;) {
+		node = rb_next(&i->rb);
+		if (!node)
+			return NULL;
+		i = rb_entry(node, struct drbd_interval, rb);
+		if (i->sector >= end)
+			return NULL;
+		if (sector < i->sector + (i->size >> 9))
+			return i;
+	}
+}

diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
new file mode 100644
index 0000000..f38fcb0
--- /dev/null
+++ b/drivers/block/drbd/drbd_interval.h

@@ -0,0 +1,40 @@
+#ifndef __DRBD_INTERVAL_H
+#define __DRBD_INTERVAL_H
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+struct drbd_interval {
+	struct rb_node rb;
+	sector_t sector;	/* start sector of the interval */
+	unsigned int size;	/* size in bytes */
+	sector_t end;		/* highest interval end in subtree */
+	int local:1		/* local or remote request? */;
+	int waiting:1;
+};
+
+static inline void drbd_clear_interval(struct drbd_interval *i)
+{
+	RB_CLEAR_NODE(&i->rb);
+}
+
+static inline bool drbd_interval_empty(struct drbd_interval *i)
+{
+	return RB_EMPTY_NODE(&i->rb);
+}
+
+extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *);
+extern bool drbd_contains_interval(struct rb_root *, sector_t,
+				   struct drbd_interval *);
+extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *);
+extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t,
+					unsigned int);
+extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t,
+					unsigned int);
+
+#define drbd_for_each_overlap(i, root, sector, size)		\
+	for (i = drbd_find_overlap(root, sector, size);		\
+	     i;							\
+	     i = drbd_next_overlap(i, sector, size))
+
+#endif  /* __DRBD_INTERVAL_H */

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index f55683a..8c13eeb 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c

@@ -56,14 +56,6 @@
 
 #include "drbd_vli.h"
 
-struct after_state_chg_work {
-	struct drbd_work w;
-	union drbd_state os;
-	union drbd_state ns;
-	enum chg_state_flags flags;
-	struct completion *done;
-};
-
 static DEFINE_MUTEX(drbd_main_mutex);
 int drbdd_init(struct drbd_thread *);
 int drbd_worker(struct drbd_thread *);
@@ -72,21 +64,17 @@
 int drbd_init(void);
 static int drbd_open(struct block_device *bdev, fmode_t mode);
 static int drbd_release(struct gendisk *gd, fmode_t mode);
-static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
-			   union drbd_state ns, enum chg_state_flags flags);
-static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static int w_md_sync(struct drbd_work *w, int unused);
 static void md_sync_timer_fn(unsigned long data);
-static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static void _tl_clear(struct drbd_conf *mdev);
+static int w_bitmap_io(struct drbd_work *w, int unused);
+static int w_go_diskless(struct drbd_work *w, int unused);
 
 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 	      "Lars Ellenberg <lars@linbit.com>");
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_VERSION(REL_VERSION);
 MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
 		 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
 MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
 
@@ -98,7 +86,6 @@
 module_param(minor_count, uint, 0444);
 module_param(disable_sendpage, bool, 0644);
 module_param(allow_oos, bool, 0);
-module_param(cn_idx, uint, 0444);
 module_param(proc_details, int, 0644);
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
@@ -120,7 +107,6 @@
 unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 bool disable_sendpage;
 bool allow_oos;
-unsigned int cn_idx = CN_IDX_DRBD;
 int proc_details;       /* Detail level in proc drbd*/
 
 /* Module parameter for setting the user mode helper program
@@ -132,10 +118,11 @@
 /* in 2.6.x, our device mapping and config info contains our virtual gendisks
  * as member "struct gendisk *vdisk;"
  */
-struct drbd_conf **minor_table;
+struct idr minors;
+struct list_head drbd_tconns;  /* list of struct drbd_tconn */
 
 struct kmem_cache *drbd_request_cache;
-struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+struct kmem_cache *drbd_ee_cache;	/* peer requests */
 struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
 struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
 mempool_t *drbd_request_mempool;
@@ -164,10 +151,15 @@
 
 struct bio *bio_alloc_drbd(gfp_t gfp_mask)
 {
+	struct bio *bio;
+
 	if (!drbd_md_io_bio_set)
 		return bio_alloc(gfp_mask, 1);
 
-	return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+	bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+	if (!bio)
+		return NULL;
+	return bio;
 }
 
 #ifdef __CHECKER__
@@ -190,158 +182,87 @@
 #endif
 
 /**
- * DOC: The transfer log
- *
- * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
- * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
- * of the list. There is always at least one &struct drbd_tl_epoch object.
- *
- * Each &struct drbd_tl_epoch has a circular double linked list of requests
- * attached.
- */
-static int tl_init(struct drbd_conf *mdev)
-{
-	struct drbd_tl_epoch *b;
-
-	/* during device minor initialization, we may well use GFP_KERNEL */
-	b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
-	if (!b)
-		return 0;
-	INIT_LIST_HEAD(&b->requests);
-	INIT_LIST_HEAD(&b->w.list);
-	b->next = NULL;
-	b->br_number = 4711;
-	b->n_writes = 0;
-	b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
-
-	mdev->oldest_tle = b;
-	mdev->newest_tle = b;
-	INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
-	INIT_LIST_HEAD(&mdev->barrier_acked_requests);
-
-	mdev->tl_hash = NULL;
-	mdev->tl_hash_s = 0;
-
-	return 1;
-}
-
-static void tl_cleanup(struct drbd_conf *mdev)
-{
-	D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
-	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
-	kfree(mdev->oldest_tle);
-	mdev->oldest_tle = NULL;
-	kfree(mdev->unused_spare_tle);
-	mdev->unused_spare_tle = NULL;
-	kfree(mdev->tl_hash);
-	mdev->tl_hash = NULL;
-	mdev->tl_hash_s = 0;
-}
-
-/**
- * _tl_add_barrier() - Adds a barrier to the transfer log
- * @mdev:	DRBD device.
- * @new:	Barrier to be added before the current head of the TL.
- *
- * The caller must hold the req_lock.
- */
-void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
-{
-	struct drbd_tl_epoch *newest_before;
-
-	INIT_LIST_HEAD(&new->requests);
-	INIT_LIST_HEAD(&new->w.list);
-	new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
-	new->next = NULL;
-	new->n_writes = 0;
-
-	newest_before = mdev->newest_tle;
-	new->br_number = newest_before->br_number+1;
-	if (mdev->newest_tle != new) {
-		mdev->newest_tle->next = new;
-		mdev->newest_tle = new;
-	}
-}
-
-/**
- * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
- * @mdev:	DRBD device.
+ * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch
+ * @tconn:	DRBD connection.
  * @barrier_nr:	Expected identifier of the DRBD write barrier packet.
  * @set_size:	Expected number of requests before that barrier.
  *
  * In case the passed barrier_nr or set_size does not match the oldest
- * &struct drbd_tl_epoch objects this function will cause a termination
- * of the connection.
+ * epoch of not yet barrier-acked requests, this function will cause a
+ * termination of the connection.
  */
-void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
-		       unsigned int set_size)
+void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
+		unsigned int set_size)
 {
-	struct drbd_tl_epoch *b, *nob; /* next old barrier */
-	struct list_head *le, *tle;
 	struct drbd_request *r;
+	struct drbd_request *req = NULL;
+	int expect_epoch = 0;
+	int expect_size = 0;
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&tconn->req_lock);
 
-	b = mdev->oldest_tle;
+	/* find oldest not yet barrier-acked write request,
+	 * count writes in its epoch. */
+	list_for_each_entry(r, &tconn->transfer_log, tl_requests) {
+		const unsigned s = r->rq_state;
+		if (!req) {
+			if (!(s & RQ_WRITE))
+				continue;
+			if (!(s & RQ_NET_MASK))
+				continue;
+			if (s & RQ_NET_DONE)
+				continue;
+			req = r;
+			expect_epoch = req->epoch;
+			expect_size ++;
+		} else {
+			if (r->epoch != expect_epoch)
+				break;
+			if (!(s & RQ_WRITE))
+				continue;
+			/* if (s & RQ_DONE): not expected */
+			/* if (!(s & RQ_NET_MASK)): not expected */
+			expect_size++;
+		}
+	}
 
 	/* first some paranoia code */
-	if (b == NULL) {
-		dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
-			barrier_nr);
+	if (req == NULL) {
+		conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
+			 barrier_nr);
 		goto bail;
 	}
-	if (b->br_number != barrier_nr) {
-		dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
-			barrier_nr, b->br_number);
-		goto bail;
-	}
-	if (b->n_writes != set_size) {
-		dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
-			barrier_nr, set_size, b->n_writes);
+	if (expect_epoch != barrier_nr) {
+		conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
+			 barrier_nr, expect_epoch);
 		goto bail;
 	}
 
-	/* Clean up list of requests processed during current epoch */
-	list_for_each_safe(le, tle, &b->requests) {
-		r = list_entry(le, struct drbd_request, tl_requests);
-		_req_mod(r, barrier_acked);
-	}
-	/* There could be requests on the list waiting for completion
-	   of the write to the local disk. To avoid corruptions of
-	   slab's data structures we have to remove the lists head.
-
-	   Also there could have been a barrier ack out of sequence, overtaking
-	   the write acks - which would be a bug and violating write ordering.
-	   To not deadlock in case we lose connection while such requests are
-	   still pending, we need some way to find them for the
-	   _req_mode(connection_lost_while_pending).
-
-	   These have been list_move'd to the out_of_sequence_requests list in
-	   _req_mod(, barrier_acked) above.
-	   */
-	list_splice_init(&b->requests, &mdev->barrier_acked_requests);
-
-	nob = b->next;
-	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
-		_tl_add_barrier(mdev, b);
-		if (nob)
-			mdev->oldest_tle = nob;
-		/* if nob == NULL b was the only barrier, and becomes the new
-		   barrier. Therefore mdev->oldest_tle points already to b */
-	} else {
-		D_ASSERT(nob != NULL);
-		mdev->oldest_tle = nob;
-		kfree(b);
+	if (expect_size != set_size) {
+		conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
+			 barrier_nr, set_size, expect_size);
+		goto bail;
 	}
 
-	spin_unlock_irq(&mdev->req_lock);
-	dec_ap_pending(mdev);
+	/* Clean up list of requests processed during current epoch. */
+	/* this extra list walk restart is paranoia,
+	 * to catch requests being barrier-acked "unexpectedly".
+	 * It usually should find the same req again, or some READ preceding it. */
+	list_for_each_entry(req, &tconn->transfer_log, tl_requests)
+		if (req->epoch == expect_epoch)
+			break;
+	list_for_each_entry_safe_from(req, r, &tconn->transfer_log, tl_requests) {
+		if (req->epoch != expect_epoch)
+			break;
+		_req_mod(req, BARRIER_ACKED);
+	}
+	spin_unlock_irq(&tconn->req_lock);
 
 	return;
 
 bail:
-	spin_unlock_irq(&mdev->req_lock);
-	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+	spin_unlock_irq(&tconn->req_lock);
+	conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 }
 
 
@@ -350,85 +271,24 @@
  * @mdev:	DRBD device.
  * @what:       The action/event to perform with all request objects
  *
- * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
- * restart_frozen_disk_io.
+ * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
+ * RESTART_FROZEN_DISK_IO.
  */
-static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
+/* must hold resource->req_lock */
+void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 {
-	struct drbd_tl_epoch *b, *tmp, **pn;
-	struct list_head *le, *tle, carry_reads;
-	struct drbd_request *req;
-	int rv, n_writes, n_reads;
+	struct drbd_request *req, *r;
 
-	b = mdev->oldest_tle;
-	pn = &mdev->oldest_tle;
-	while (b) {
-		n_writes = 0;
-		n_reads = 0;
-		INIT_LIST_HEAD(&carry_reads);
-		list_for_each_safe(le, tle, &b->requests) {
-			req = list_entry(le, struct drbd_request, tl_requests);
-			rv = _req_mod(req, what);
-
-			n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
-			n_reads  += (rv & MR_READ) >> MR_READ_SHIFT;
-		}
-		tmp = b->next;
-
-		if (n_writes) {
-			if (what == resend) {
-				b->n_writes = n_writes;
-				if (b->w.cb == NULL) {
-					b->w.cb = w_send_barrier;
-					inc_ap_pending(mdev);
-					set_bit(CREATE_BARRIER, &mdev->flags);
-				}
-
-				drbd_queue_work(&mdev->data.work, &b->w);
-			}
-			pn = &b->next;
-		} else {
-			if (n_reads)
-				list_add(&carry_reads, &b->requests);
-			/* there could still be requests on that ring list,
-			 * in case local io is still pending */
-			list_del(&b->requests);
-
-			/* dec_ap_pending corresponding to queue_barrier.
-			 * the newest barrier may not have been queued yet,
-			 * in which case w.cb is still NULL. */
-			if (b->w.cb != NULL)
-				dec_ap_pending(mdev);
-
-			if (b == mdev->newest_tle) {
-				/* recycle, but reinit! */
-				D_ASSERT(tmp == NULL);
-				INIT_LIST_HEAD(&b->requests);
-				list_splice(&carry_reads, &b->requests);
-				INIT_LIST_HEAD(&b->w.list);
-				b->w.cb = NULL;
-				b->br_number = net_random();
-				b->n_writes = 0;
-
-				*pn = b;
-				break;
-			}
-			*pn = tmp;
-			kfree(b);
-		}
-		b = tmp;
-		list_splice(&carry_reads, &b->requests);
-	}
-
-	/* Actions operating on the disk state, also want to work on
-	   requests that got barrier acked. */
-
-	list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
-		req = list_entry(le, struct drbd_request, tl_requests);
+	list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests)
 		_req_mod(req, what);
-	}
 }
 
+void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
+{
+	spin_lock_irq(&tconn->req_lock);
+	_tl_restart(tconn, what);
+	spin_unlock_irq(&tconn->req_lock);
+}
 
 /**
  * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -438,43 +298,9 @@
  * by the requests on the transfer gets marked as our of sync. Called from the
  * receiver thread and the worker thread.
  */
-void tl_clear(struct drbd_conf *mdev)
+void tl_clear(struct drbd_tconn *tconn)
 {
-	spin_lock_irq(&mdev->req_lock);
-	_tl_clear(mdev);
-	spin_unlock_irq(&mdev->req_lock);
-}
-
-static void _tl_clear(struct drbd_conf *mdev)
-{
-	struct list_head *le, *tle;
-	struct drbd_request *r;
-
-	_tl_restart(mdev, connection_lost_while_pending);
-
-	/* we expect this list to be empty. */
-	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
-
-	/* but just in case, clean it up anyways! */
-	list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
-		r = list_entry(le, struct drbd_request, tl_requests);
-		/* It would be nice to complete outside of spinlock.
-		 * But this is easier for now. */
-		_req_mod(r, connection_lost_while_pending);
-	}
-
-	/* ensure bit indicating barrier is required is clear */
-	clear_bit(CREATE_BARRIER, &mdev->flags);
-
-	memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
-
-}
-
-void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
-{
-	spin_lock_irq(&mdev->req_lock);
-	_tl_restart(mdev, what);
-	spin_unlock_irq(&mdev->req_lock);
+	tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
 }
 
 /**
@@ -483,1377 +309,131 @@
  */
 void tl_abort_disk_io(struct drbd_conf *mdev)
 {
-	struct drbd_tl_epoch *b;
-	struct list_head *le, *tle;
-	struct drbd_request *req;
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct drbd_request *req, *r;
 
-	spin_lock_irq(&mdev->req_lock);
-	b = mdev->oldest_tle;
-	while (b) {
-		list_for_each_safe(le, tle, &b->requests) {
-			req = list_entry(le, struct drbd_request, tl_requests);
-			if (!(req->rq_state & RQ_LOCAL_PENDING))
-				continue;
-			_req_mod(req, abort_disk_io);
-		}
-		b = b->next;
-	}
-
-	list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
-		req = list_entry(le, struct drbd_request, tl_requests);
+	spin_lock_irq(&tconn->req_lock);
+	list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) {
 		if (!(req->rq_state & RQ_LOCAL_PENDING))
 			continue;
-		_req_mod(req, abort_disk_io);
+		if (req->w.mdev != mdev)
+			continue;
+		_req_mod(req, ABORT_DISK_IO);
 	}
-
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&tconn->req_lock);
 }
 
-/**
- * cl_wide_st_chg() - true if the state change is a cluster wide one
- * @mdev:	DRBD device.
- * @os:		old (current) state.
- * @ns:		new (wanted) state.
- */
-static int cl_wide_st_chg(struct drbd_conf *mdev,
-			  union drbd_state os, union drbd_state ns)
-{
-	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
-		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
-		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
-		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
-		  (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
-		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
-		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
-}
-
-enum drbd_state_rv
-drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-		  union drbd_state mask, union drbd_state val)
-{
-	unsigned long flags;
-	union drbd_state os, ns;
-	enum drbd_state_rv rv;
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	rv = _drbd_set_state(mdev, ns, f, NULL);
-	ns = mdev->state;
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	return rv;
-}
-
-/**
- * drbd_force_state() - Impose a change which happens outside our control on our state
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- */
-void drbd_force_state(struct drbd_conf *mdev,
-	union drbd_state mask, union drbd_state val)
-{
-	drbd_change_state(mdev, CS_HARD, mask, val);
-}
-
-static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
-static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
-						    union drbd_state,
-						    union drbd_state);
-enum sanitize_state_warnings {
-	NO_WARNING,
-	ABORTED_ONLINE_VERIFY,
-	ABORTED_RESYNC,
-	CONNECTION_LOST_NEGOTIATING,
-	IMPLICITLY_UPGRADED_DISK,
-	IMPLICITLY_UPGRADED_PDSK,
-};
-static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
-				       union drbd_state ns, enum sanitize_state_warnings *warn);
-int drbd_send_state_req(struct drbd_conf *,
-			union drbd_state, union drbd_state);
-
-static enum drbd_state_rv
-_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
-	     union drbd_state val)
-{
-	union drbd_state os, ns;
-	unsigned long flags;
-	enum drbd_state_rv rv;
-
-	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
-		return SS_CW_SUCCESS;
-
-	if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
-		return SS_CW_FAILED_BY_PEER;
-
-	rv = 0;
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	ns = sanitize_state(mdev, os, ns, NULL);
-
-	if (!cl_wide_st_chg(mdev, os, ns))
-		rv = SS_CW_NO_NEED;
-	if (!rv) {
-		rv = is_valid_state(mdev, ns);
-		if (rv == SS_SUCCESS) {
-			rv = is_valid_state_transition(mdev, ns, os);
-			if (rv == SS_SUCCESS)
-				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
-		}
-	}
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	return rv;
-}
-
-/**
- * drbd_req_state() - Perform an eventually cluster wide state change
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- * @f:		flags
- *
- * Should not be called directly, use drbd_request_state() or
- * _drbd_request_state().
- */
-static enum drbd_state_rv
-drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
-	       union drbd_state val, enum chg_state_flags f)
-{
-	struct completion done;
-	unsigned long flags;
-	union drbd_state os, ns;
-	enum drbd_state_rv rv;
-
-	init_completion(&done);
-
-	if (f & CS_SERIALIZE)
-		mutex_lock(&mdev->state_mutex);
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	ns = sanitize_state(mdev, os, ns, NULL);
-
-	if (cl_wide_st_chg(mdev, os, ns)) {
-		rv = is_valid_state(mdev, ns);
-		if (rv == SS_SUCCESS)
-			rv = is_valid_state_transition(mdev, ns, os);
-		spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-		if (rv < SS_SUCCESS) {
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-
-		drbd_state_lock(mdev);
-		if (!drbd_send_state_req(mdev, mask, val)) {
-			drbd_state_unlock(mdev);
-			rv = SS_CW_FAILED_BY_PEER;
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-
-		wait_event(mdev->state_wait,
-			(rv = _req_st_cond(mdev, mask, val)));
-
-		if (rv < SS_SUCCESS) {
-			drbd_state_unlock(mdev);
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-		spin_lock_irqsave(&mdev->req_lock, flags);
-		os = mdev->state;
-		ns.i = (os.i & ~mask.i) | val.i;
-		rv = _drbd_set_state(mdev, ns, f, &done);
-		drbd_state_unlock(mdev);
-	} else {
-		rv = _drbd_set_state(mdev, ns, f, &done);
-	}
-
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
-		D_ASSERT(current != mdev->worker.task);
-		wait_for_completion(&done);
-	}
-
-abort:
-	if (f & CS_SERIALIZE)
-		mutex_unlock(&mdev->state_mutex);
-
-	return rv;
-}
-
-/**
- * _drbd_request_state() - Request a state change (with flags)
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- * @f:		flags
- *
- * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
- * flag, or when logging of failed state change requests is not desired.
- */
-enum drbd_state_rv
-_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
-		    union drbd_state val, enum chg_state_flags f)
-{
-	enum drbd_state_rv rv;
-
-	wait_event(mdev->state_wait,
-		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
-
-	return rv;
-}
-
-static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
-{
-	dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
-	    name,
-	    drbd_conn_str(ns.conn),
-	    drbd_role_str(ns.role),
-	    drbd_role_str(ns.peer),
-	    drbd_disk_str(ns.disk),
-	    drbd_disk_str(ns.pdsk),
-	    is_susp(ns) ? 's' : 'r',
-	    ns.aftr_isp ? 'a' : '-',
-	    ns.peer_isp ? 'p' : '-',
-	    ns.user_isp ? 'u' : '-'
-	    );
-}
-
-void print_st_err(struct drbd_conf *mdev, union drbd_state os,
-	          union drbd_state ns, enum drbd_state_rv err)
-{
-	if (err == SS_IN_TRANSIENT_STATE)
-		return;
-	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
-	print_st(mdev, " state", os);
-	print_st(mdev, "wanted", ns);
-}
-
-
-/**
- * is_valid_state() - Returns an SS_ error code if ns is not valid
- * @mdev:	DRBD device.
- * @ns:		State to consider.
- */
-static enum drbd_state_rv
-is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
-{
-	/* See drbd_state_sw_errors in drbd_strings.c */
-
-	enum drbd_fencing_p fp;
-	enum drbd_state_rv rv = SS_SUCCESS;
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	if (get_net_conf(mdev)) {
-		if (!mdev->net_conf->two_primaries &&
-		    ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
-			rv = SS_TWO_PRIMARIES;
-		put_net_conf(mdev);
-	}
-
-	if (rv <= 0)
-		/* already found a reason to abort */;
-	else if (ns.role == R_SECONDARY && mdev->open_cnt)
-		rv = SS_DEVICE_IN_USE;
-
-	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if (fp >= FP_RESOURCE &&
-		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
-		rv = SS_PRIMARY_NOP;
-
-	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
-		rv = SS_NO_LOCAL_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
-		rv = SS_NO_REMOTE_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if ((ns.conn == C_CONNECTED ||
-		  ns.conn == C_WF_BITMAP_S ||
-		  ns.conn == C_SYNC_SOURCE ||
-		  ns.conn == C_PAUSED_SYNC_S) &&
-		  ns.disk == D_OUTDATED)
-		rv = SS_CONNECTED_OUTDATES;
-
-	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		 (mdev->sync_conf.verify_alg[0] == 0))
-		rv = SS_NO_VERIFY_ALG;
-
-	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		  mdev->agreed_pro_version < 88)
-		rv = SS_NOT_SUPPORTED;
-
-	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
-		rv = SS_CONNECTED_OUTDATES;
-
-	return rv;
-}
-
-/**
- * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
- * @mdev:	DRBD device.
- * @ns:		new state.
- * @os:		old state.
- */
-static enum drbd_state_rv
-is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
-			  union drbd_state os)
-{
-	enum drbd_state_rv rv = SS_SUCCESS;
-
-	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
-	    os.conn > C_CONNECTED)
-		rv = SS_RESYNC_RUNNING;
-
-	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
-		rv = SS_ALREADY_STANDALONE;
-
-	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
-		rv = SS_IS_DISKLESS;
-
-	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
-		rv = SS_NO_NET_CONFIG;
-
-	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
-		rv = SS_LOWER_THAN_OUTDATED;
-
-	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
-		rv = SS_IN_TRANSIENT_STATE;
-
-	if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
-		rv = SS_IN_TRANSIENT_STATE;
-
-	/* While establishing a connection only allow cstate to change.
-	   Delay/refuse role changes, detach attach etc... */
-	if (test_bit(STATE_SENT, &mdev->flags) &&
-	    !(os.conn == C_WF_REPORT_PARAMS ||
-	      (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
-		rv = SS_IN_TRANSIENT_STATE;
-
-	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
-		rv = SS_NEED_CONNECTION;
-
-	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-	    ns.conn != os.conn && os.conn > C_CONNECTED)
-		rv = SS_RESYNC_RUNNING;
-
-	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
-	    os.conn < C_CONNECTED)
-		rv = SS_NEED_CONNECTION;
-
-	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
-	    && os.conn < C_WF_REPORT_PARAMS)
-		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
-
-	return rv;
-}
-
-static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
-{
-	static const char *msg_table[] = {
-		[NO_WARNING] = "",
-		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
-		[ABORTED_RESYNC] = "Resync aborted.",
-		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
-		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
-		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
-	};
-
-	if (warn != NO_WARNING)
-		dev_warn(DEV, "%s\n", msg_table[warn]);
-}
-
-/**
- * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
- * @mdev:	DRBD device.
- * @os:		old state.
- * @ns:		new state.
- * @warn_sync_abort:
- *
- * When we loose connection, we have to set the state of the peers disk (pdsk)
- * to D_UNKNOWN. This rule and many more along those lines are in this function.
- */
-static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
-				       union drbd_state ns, enum sanitize_state_warnings *warn)
-{
-	enum drbd_fencing_p fp;
-	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
-
-	if (warn)
-		*warn = NO_WARNING;
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	/* Disallow Network errors to configure a device's network part */
-	if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
-	    os.conn <= C_DISCONNECTING)
-		ns.conn = os.conn;
-
-	/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
-	 * If you try to go into some Sync* state, that shall fail (elsewhere). */
-	if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
-	    ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
-		ns.conn = os.conn;
-
-	/* we cannot fail (again) if we already detached */
-	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
-		ns.disk = D_DISKLESS;
-
-	/* After C_DISCONNECTING only C_STANDALONE may follow */
-	if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
-		ns.conn = os.conn;
-
-	if (ns.conn < C_CONNECTED) {
-		ns.peer_isp = 0;
-		ns.peer = R_UNKNOWN;
-		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
-			ns.pdsk = D_UNKNOWN;
-	}
-
-	/* Clear the aftr_isp when becoming unconfigured */
-	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
-		ns.aftr_isp = 0;
-
-	/* Abort resync if a disk fails/detaches */
-	if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
-	    (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
-		if (warn)
-			*warn =	os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
-				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
-		ns.conn = C_CONNECTED;
-	}
-
-	/* Connection breaks down before we finished "Negotiating" */
-	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
-	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
-		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
-			ns.disk = mdev->new_state_tmp.disk;
-			ns.pdsk = mdev->new_state_tmp.pdsk;
-		} else {
-			if (warn)
-				*warn = CONNECTION_LOST_NEGOTIATING;
-			ns.disk = D_DISKLESS;
-			ns.pdsk = D_UNKNOWN;
-		}
-		put_ldev(mdev);
-	}
-
-	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
-	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
-		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
-			ns.disk = D_UP_TO_DATE;
-		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
-			ns.pdsk = D_UP_TO_DATE;
-	}
-
-	/* Implications of the connection stat on the disk states */
-	disk_min = D_DISKLESS;
-	disk_max = D_UP_TO_DATE;
-	pdsk_min = D_INCONSISTENT;
-	pdsk_max = D_UNKNOWN;
-	switch ((enum drbd_conns)ns.conn) {
-	case C_WF_BITMAP_T:
-	case C_PAUSED_SYNC_T:
-	case C_STARTING_SYNC_T:
-	case C_WF_SYNC_UUID:
-	case C_BEHIND:
-		disk_min = D_INCONSISTENT;
-		disk_max = D_OUTDATED;
-		pdsk_min = D_UP_TO_DATE;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_VERIFY_S:
-	case C_VERIFY_T:
-		disk_min = D_UP_TO_DATE;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_UP_TO_DATE;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_CONNECTED:
-		disk_min = D_DISKLESS;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_DISKLESS;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_WF_BITMAP_S:
-	case C_PAUSED_SYNC_S:
-	case C_STARTING_SYNC_S:
-	case C_AHEAD:
-		disk_min = D_UP_TO_DATE;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_INCONSISTENT;
-		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
-		break;
-	case C_SYNC_TARGET:
-		disk_min = D_INCONSISTENT;
-		disk_max = D_INCONSISTENT;
-		pdsk_min = D_UP_TO_DATE;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_SYNC_SOURCE:
-		disk_min = D_UP_TO_DATE;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_INCONSISTENT;
-		pdsk_max = D_INCONSISTENT;
-		break;
-	case C_STANDALONE:
-	case C_DISCONNECTING:
-	case C_UNCONNECTED:
-	case C_TIMEOUT:
-	case C_BROKEN_PIPE:
-	case C_NETWORK_FAILURE:
-	case C_PROTOCOL_ERROR:
-	case C_TEAR_DOWN:
-	case C_WF_CONNECTION:
-	case C_WF_REPORT_PARAMS:
-	case C_MASK:
-		break;
-	}
-	if (ns.disk > disk_max)
-		ns.disk = disk_max;
-
-	if (ns.disk < disk_min) {
-		if (warn)
-			*warn = IMPLICITLY_UPGRADED_DISK;
-		ns.disk = disk_min;
-	}
-	if (ns.pdsk > pdsk_max)
-		ns.pdsk = pdsk_max;
-
-	if (ns.pdsk < pdsk_min) {
-		if (warn)
-			*warn = IMPLICITLY_UPGRADED_PDSK;
-		ns.pdsk = pdsk_min;
-	}
-
-	if (fp == FP_STONITH &&
-	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
-	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
-		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
-
-	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
-	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
-	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
-		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
-
-	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
-		if (ns.conn == C_SYNC_SOURCE)
-			ns.conn = C_PAUSED_SYNC_S;
-		if (ns.conn == C_SYNC_TARGET)
-			ns.conn = C_PAUSED_SYNC_T;
-	} else {
-		if (ns.conn == C_PAUSED_SYNC_S)
-			ns.conn = C_SYNC_SOURCE;
-		if (ns.conn == C_PAUSED_SYNC_T)
-			ns.conn = C_SYNC_TARGET;
-	}
-
-	return ns;
-}
-
-/* helper for __drbd_set_state */
-static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
-{
-	if (mdev->agreed_pro_version < 90)
-		mdev->ov_start_sector = 0;
-	mdev->rs_total = drbd_bm_bits(mdev);
-	mdev->ov_position = 0;
-	if (cs == C_VERIFY_T) {
-		/* starting online verify from an arbitrary position
-		 * does not fit well into the existing protocol.
-		 * on C_VERIFY_T, we initialize ov_left and friends
-		 * implicitly in receive_DataRequest once the
-		 * first P_OV_REQUEST is received */
-		mdev->ov_start_sector = ~(sector_t)0;
-	} else {
-		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
-		if (bit >= mdev->rs_total) {
-			mdev->ov_start_sector =
-				BM_BIT_TO_SECT(mdev->rs_total - 1);
-			mdev->rs_total = 1;
-		} else
-			mdev->rs_total -= bit;
-		mdev->ov_position = mdev->ov_start_sector;
-	}
-	mdev->ov_left = mdev->rs_total;
-}
-
-static void drbd_resume_al(struct drbd_conf *mdev)
-{
-	if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
-		dev_info(DEV, "Resumed AL updates\n");
-}
-
-/**
- * __drbd_set_state() - Set a new DRBD state
- * @mdev:	DRBD device.
- * @ns:		new state.
- * @flags:	Flags
- * @done:	Optional completion, that will get completed after the after_state_ch() finished
- *
- * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
- */
-enum drbd_state_rv
-__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
-	         enum chg_state_flags flags, struct completion *done)
-{
-	union drbd_state os;
-	enum drbd_state_rv rv = SS_SUCCESS;
-	enum sanitize_state_warnings ssw;
-	struct after_state_chg_work *ascw;
-
-	os = mdev->state;
-
-	ns = sanitize_state(mdev, os, ns, &ssw);
-
-	if (ns.i == os.i)
-		return SS_NOTHING_TO_DO;
-
-	if (!(flags & CS_HARD)) {
-		/*  pre-state-change checks ; only look at ns  */
-		/* See drbd_state_sw_errors in drbd_strings.c */
-
-		rv = is_valid_state(mdev, ns);
-		if (rv < SS_SUCCESS) {
-			/* If the old state was illegal as well, then let
-			   this happen...*/
-
-			if (is_valid_state(mdev, os) == rv)
-				rv = is_valid_state_transition(mdev, ns, os);
-		} else
-			rv = is_valid_state_transition(mdev, ns, os);
-	}
-
-	if (rv < SS_SUCCESS) {
-		if (flags & CS_VERBOSE)
-			print_st_err(mdev, os, ns, rv);
-		return rv;
-	}
-
-	print_sanitize_warnings(mdev, ssw);
-
-	{
-	char *pbp, pb[300];
-	pbp = pb;
-	*pbp = 0;
-	if (ns.role != os.role)
-		pbp += sprintf(pbp, "role( %s -> %s ) ",
-			       drbd_role_str(os.role),
-			       drbd_role_str(ns.role));
-	if (ns.peer != os.peer)
-		pbp += sprintf(pbp, "peer( %s -> %s ) ",
-			       drbd_role_str(os.peer),
-			       drbd_role_str(ns.peer));
-	if (ns.conn != os.conn)
-		pbp += sprintf(pbp, "conn( %s -> %s ) ",
-			       drbd_conn_str(os.conn),
-			       drbd_conn_str(ns.conn));
-	if (ns.disk != os.disk)
-		pbp += sprintf(pbp, "disk( %s -> %s ) ",
-			       drbd_disk_str(os.disk),
-			       drbd_disk_str(ns.disk));
-	if (ns.pdsk != os.pdsk)
-		pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
-			       drbd_disk_str(os.pdsk),
-			       drbd_disk_str(ns.pdsk));
-	if (is_susp(ns) != is_susp(os))
-		pbp += sprintf(pbp, "susp( %d -> %d ) ",
-			       is_susp(os),
-			       is_susp(ns));
-	if (ns.aftr_isp != os.aftr_isp)
-		pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
-			       os.aftr_isp,
-			       ns.aftr_isp);
-	if (ns.peer_isp != os.peer_isp)
-		pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
-			       os.peer_isp,
-			       ns.peer_isp);
-	if (ns.user_isp != os.user_isp)
-		pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
-			       os.user_isp,
-			       ns.user_isp);
-	dev_info(DEV, "%s\n", pb);
-	}
-
-	/* solve the race between becoming unconfigured,
-	 * worker doing the cleanup, and
-	 * admin reconfiguring us:
-	 * on (re)configure, first set CONFIG_PENDING,
-	 * then wait for a potentially exiting worker,
-	 * start the worker, and schedule one no_op.
-	 * then proceed with configuration.
-	 */
-	if (ns.disk == D_DISKLESS &&
-	    ns.conn == C_STANDALONE &&
-	    ns.role == R_SECONDARY &&
-	    !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
-		set_bit(DEVICE_DYING, &mdev->flags);
-
-	/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
-	 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
-	 * drbd_ldev_destroy() won't happen before our corresponding
-	 * after_state_ch works run, where we put_ldev again. */
-	if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
-	    (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
-		atomic_inc(&mdev->local_cnt);
-
-	mdev->state = ns;
-
-	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
-		drbd_print_uuids(mdev, "attached to UUIDs");
-
-	wake_up(&mdev->misc_wait);
-	wake_up(&mdev->state_wait);
-
-	/* aborted verify run. log the last position */
-	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
-	    ns.conn < C_CONNECTED) {
-		mdev->ov_start_sector =
-			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
-		dev_info(DEV, "Online Verify reached sector %llu\n",
-			(unsigned long long)mdev->ov_start_sector);
-	}
-
-	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
-	    (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
-		dev_info(DEV, "Syncer continues.\n");
-		mdev->rs_paused += (long)jiffies
-				  -(long)mdev->rs_mark_time[mdev->rs_last_mark];
-		if (ns.conn == C_SYNC_TARGET)
-			mod_timer(&mdev->resync_timer, jiffies);
-	}
-
-	if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
-	    (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
-		dev_info(DEV, "Resync suspended\n");
-		mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
-	}
-
-	if (os.conn == C_CONNECTED &&
-	    (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
-		unsigned long now = jiffies;
-		int i;
-
-		set_ov_position(mdev, ns.conn);
-		mdev->rs_start = now;
-		mdev->rs_last_events = 0;
-		mdev->rs_last_sect_ev = 0;
-		mdev->ov_last_oos_size = 0;
-		mdev->ov_last_oos_start = 0;
-
-		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
-			mdev->rs_mark_left[i] = mdev->ov_left;
-			mdev->rs_mark_time[i] = now;
-		}
-
-		drbd_rs_controller_reset(mdev);
-
-		if (ns.conn == C_VERIFY_S) {
-			dev_info(DEV, "Starting Online Verify from sector %llu\n",
-					(unsigned long long)mdev->ov_position);
-			mod_timer(&mdev->resync_timer, jiffies);
-		}
-	}
-
-	if (get_ldev(mdev)) {
-		u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
-						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
-						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
-
-		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
-			mdf |= MDF_CRASHED_PRIMARY;
-		if (mdev->state.role == R_PRIMARY ||
-		    (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
-			mdf |= MDF_PRIMARY_IND;
-		if (mdev->state.conn > C_WF_REPORT_PARAMS)
-			mdf |= MDF_CONNECTED_IND;
-		if (mdev->state.disk > D_INCONSISTENT)
-			mdf |= MDF_CONSISTENT;
-		if (mdev->state.disk > D_OUTDATED)
-			mdf |= MDF_WAS_UP_TO_DATE;
-		if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
-			mdf |= MDF_PEER_OUT_DATED;
-		if (mdf != mdev->ldev->md.flags) {
-			mdev->ldev->md.flags = mdf;
-			drbd_md_mark_dirty(mdev);
-		}
-		if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
-			drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
-		put_ldev(mdev);
-	}
-
-	/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
-	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
-	    os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
-		set_bit(CONSIDER_RESYNC, &mdev->flags);
-
-	/* Receiver should clean up itself */
-	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
-		drbd_thread_stop_nowait(&mdev->receiver);
-
-	/* Now the receiver finished cleaning up itself, it should die */
-	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
-		drbd_thread_stop_nowait(&mdev->receiver);
-
-	/* Upon network failure, we need to restart the receiver. */
-	if (os.conn > C_WF_CONNECTION &&
-	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
-		drbd_thread_restart_nowait(&mdev->receiver);
-
-	/* Resume AL writing if we get a connection */
-	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
-		drbd_resume_al(mdev);
-
-	/* remember last connect and attach times so request_timer_fn() won't
-	 * kill newly established sessions while we are still trying to thaw
-	 * previously frozen IO */
-	if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
-		mdev->last_reconnect_jif = jiffies;
-	if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
-	    ns.disk > D_NEGOTIATING)
-		mdev->last_reattach_jif = jiffies;
-
-	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
-	if (ascw) {
-		ascw->os = os;
-		ascw->ns = ns;
-		ascw->flags = flags;
-		ascw->w.cb = w_after_state_ch;
-		ascw->done = done;
-		drbd_queue_work(&mdev->data.work, &ascw->w);
-	} else {
-		dev_warn(DEV, "Could not kmalloc an ascw\n");
-	}
-
-	return rv;
-}
-
-static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
-{
-	struct after_state_chg_work *ascw =
-		container_of(w, struct after_state_chg_work, w);
-	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
-	if (ascw->flags & CS_WAIT_COMPLETE) {
-		D_ASSERT(ascw->done != NULL);
-		complete(ascw->done);
-	}
-	kfree(ascw);
-
-	return 1;
-}
-
-static void abw_start_sync(struct drbd_conf *mdev, int rv)
-{
-	if (rv) {
-		dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
-		_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
-		return;
-	}
-
-	switch (mdev->state.conn) {
-	case C_STARTING_SYNC_T:
-		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
-		break;
-	case C_STARTING_SYNC_S:
-		drbd_start_resync(mdev, C_SYNC_SOURCE);
-		break;
-	}
-}
-
-int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
-		int (*io_fn)(struct drbd_conf *),
-		char *why, enum bm_flag flags)
-{
-	int rv;
-
-	D_ASSERT(current == mdev->worker.task);
-
-	/* open coded non-blocking drbd_suspend_io(mdev); */
-	set_bit(SUSPEND_IO, &mdev->flags);
-
-	drbd_bm_lock(mdev, why, flags);
-	rv = io_fn(mdev);
-	drbd_bm_unlock(mdev);
-
-	drbd_resume_io(mdev);
-
-	return rv;
-}
-
-/**
- * after_state_ch() - Perform after state change actions that may sleep
- * @mdev:	DRBD device.
- * @os:		old state.
- * @ns:		new state.
- * @flags:	Flags
- */
-static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
-			   union drbd_state ns, enum chg_state_flags flags)
-{
-	enum drbd_fencing_p fp;
-	enum drbd_req_event what = nothing;
-	union drbd_state nsm = (union drbd_state){ .i = -1 };
-
-	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
-		clear_bit(CRASHED_PRIMARY, &mdev->flags);
-		if (mdev->p_uuid)
-			mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
-	}
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	/* Inform userspace about the change... */
-	drbd_bcast_state(mdev, ns);
-
-	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
-	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
-		drbd_khelper(mdev, "pri-on-incon-degr");
-
-	/* Here we have the actions that are performed after a
-	   state change. This function might sleep */
-
-	if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
-		mod_timer(&mdev->request_timer, jiffies + HZ);
-
-	nsm.i = -1;
-	if (ns.susp_nod) {
-		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
-			what = resend;
-
-		if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
-		    ns.disk > D_NEGOTIATING)
-			what = restart_frozen_disk_io;
-
-		if (what != nothing)
-			nsm.susp_nod = 0;
-	}
-
-	if (ns.susp_fen) {
-		/* case1: The outdate peer handler is successful: */
-		if (os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) {
-			if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
-				drbd_uuid_new_current(mdev);
-				clear_bit(NEW_CUR_UUID, &mdev->flags);
-			}
-			spin_lock_irq(&mdev->req_lock);
-			_tl_clear(mdev);
-			_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
-			spin_unlock_irq(&mdev->req_lock);
-		}
-		/* case2: The connection was established again: */
-		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
-			clear_bit(NEW_CUR_UUID, &mdev->flags);
-			what = resend;
-			nsm.susp_fen = 0;
-		}
-	}
-
-	if (what != nothing) {
-		spin_lock_irq(&mdev->req_lock);
-		_tl_restart(mdev, what);
-		nsm.i &= mdev->state.i;
-		_drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
-		spin_unlock_irq(&mdev->req_lock);
-	}
-
-	/* Became sync source.  With protocol >= 96, we still need to send out
-	 * the sync uuid now. Need to do that before any drbd_send_state, or
-	 * the other side may go "paused sync" before receiving the sync uuids,
-	 * which is unexpected. */
-	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
-	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
-	    mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
-		drbd_gen_and_send_sync_uuid(mdev);
-		put_ldev(mdev);
-	}
-
-	/* Do not change the order of the if above and the two below... */
-	if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
-		/* we probably will start a resync soon.
-		 * make sure those things are properly reset. */
-		mdev->rs_total = 0;
-		mdev->rs_failed = 0;
-		atomic_set(&mdev->rs_pending_cnt, 0);
-		drbd_rs_cancel_all(mdev);
-
-		drbd_send_uuids(mdev);
-		drbd_send_state(mdev, ns);
-	}
-	/* No point in queuing send_bitmap if we don't have a connection
-	 * anymore, so check also the _current_ state, not only the new state
-	 * at the time this work was queued. */
-	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
-	    mdev->state.conn == C_WF_BITMAP_S)
-		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
-				"send_bitmap (WFBitMapS)",
-				BM_LOCKED_TEST_ALLOWED);
-
-	/* Lost contact to peer's copy of the data */
-	if ((os.pdsk >= D_INCONSISTENT &&
-	     os.pdsk != D_UNKNOWN &&
-	     os.pdsk != D_OUTDATED)
-	&&  (ns.pdsk < D_INCONSISTENT ||
-	     ns.pdsk == D_UNKNOWN ||
-	     ns.pdsk == D_OUTDATED)) {
-		if (get_ldev(mdev)) {
-			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
-			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
-				if (is_susp(mdev->state)) {
-					set_bit(NEW_CUR_UUID, &mdev->flags);
-				} else {
-					drbd_uuid_new_current(mdev);
-					drbd_send_uuids(mdev);
-				}
-			}
-			put_ldev(mdev);
-		}
-	}
-
-	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
-		if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
-		    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
-			drbd_uuid_new_current(mdev);
-			drbd_send_uuids(mdev);
-		}
-		/* D_DISKLESS Peer becomes secondary */
-		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
-			/* We may still be Primary ourselves.
-			 * No harm done if the bitmap still changes,
-			 * redirtied pages will follow later. */
-			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
-				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
-		put_ldev(mdev);
-	}
-
-	/* Write out all changed bits on demote.
-	 * Though, no need to da that just yet
-	 * if there is a resync going on still */
-	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
-		mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
-		/* No changes to the bitmap expected this time, so assert that,
-		 * even though no harm was done if it did change. */
-		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
-				"demote", BM_LOCKED_TEST_ALLOWED);
-		put_ldev(mdev);
-	}
-
-	/* Last part of the attaching process ... */
-	if (ns.conn >= C_CONNECTED &&
-	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
-		drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
-		drbd_send_uuids(mdev);
-		drbd_send_state(mdev, ns);
-	}
-
-	/* We want to pause/continue resync, tell peer. */
-	if (ns.conn >= C_CONNECTED &&
-	     ((os.aftr_isp != ns.aftr_isp) ||
-	      (os.user_isp != ns.user_isp)))
-		drbd_send_state(mdev, ns);
-
-	/* In case one of the isp bits got set, suspend other devices. */
-	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
-	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
-		suspend_other_sg(mdev);
-
-	/* Make sure the peer gets informed about eventual state
-	   changes (ISP bits) while we were in WFReportParams. */
-	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
-		drbd_send_state(mdev, ns);
-
-	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
-		drbd_send_state(mdev, ns);
-
-	/* We are in the progress to start a full sync... */
-	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
-	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
-		/* no other bitmap changes expected during this phase */
-		drbd_queue_bitmap_io(mdev,
-			&drbd_bmio_set_n_write, &abw_start_sync,
-			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
-
-	/* We are invalidating our self... */
-	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
-	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
-		/* other bitmap operation expected during this phase */
-		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
-			"set_n_write from invalidate", BM_LOCKED_MASK);
-
-	/* first half of local IO error, failure to attach,
-	 * or administrative detach */
-	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
-		enum drbd_io_error_p eh = EP_PASS_ON;
-		int was_io_error = 0;
-		/* corresponding get_ldev was in __drbd_set_state, to serialize
-		 * our cleanup here with the transition to D_DISKLESS.
-		 * But is is still not save to dreference ldev here, since
-		 * we might come from an failed Attach before ldev was set. */
-		if (mdev->ldev) {
-			eh = mdev->ldev->dc.on_io_error;
-			was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
-
-			if (was_io_error && eh == EP_CALL_HELPER)
-				drbd_khelper(mdev, "local-io-error");
-
-			/* Immediately allow completion of all application IO,
-			 * that waits for completion from the local disk,
-			 * if this was a force-detach due to disk_timeout
-			 * or administrator request (drbdsetup detach --force).
-			 * Do NOT abort otherwise.
-			 * Aborting local requests may cause serious problems,
-			 * if requests are completed to upper layers already,
-			 * and then later the already submitted local bio completes.
-			 * This can cause DMA into former bio pages that meanwhile
-			 * have been re-used for other things.
-			 * So aborting local requests may cause crashes,
-			 * or even worse, silent data corruption.
-			 */
-			if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
-				tl_abort_disk_io(mdev);
-
-			/* current state still has to be D_FAILED,
-			 * there is only one way out: to D_DISKLESS,
-			 * and that may only happen after our put_ldev below. */
-			if (mdev->state.disk != D_FAILED)
-				dev_err(DEV,
-					"ASSERT FAILED: disk is %s during detach\n",
-					drbd_disk_str(mdev->state.disk));
-
-			if (ns.conn >= C_CONNECTED)
-				drbd_send_state(mdev, ns);
-
-			drbd_rs_cancel_all(mdev);
-
-			/* In case we want to get something to stable storage still,
-			 * this may be the last chance.
-			 * Following put_ldev may transition to D_DISKLESS. */
-			drbd_md_sync(mdev);
-		}
-		put_ldev(mdev);
-	}
-
-        /* second half of local IO error, failure to attach,
-         * or administrative detach,
-         * after local_cnt references have reached zero again */
-        if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
-                /* We must still be diskless,
-                 * re-attach has to be serialized with this! */
-                if (mdev->state.disk != D_DISKLESS)
-                        dev_err(DEV,
-                                "ASSERT FAILED: disk is %s while going diskless\n",
-                                drbd_disk_str(mdev->state.disk));
-
-		if (ns.conn >= C_CONNECTED)
-			drbd_send_state(mdev, ns);
-
-		/* corresponding get_ldev in __drbd_set_state
-		 * this may finally trigger drbd_ldev_destroy. */
-		put_ldev(mdev);
-	}
-
-	/* Notify peer that I had a local IO error, and did not detached.. */
-	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
-		drbd_send_state(mdev, ns);
-
-	/* Disks got bigger while they were detached */
-	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
-	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
-		if (ns.conn == C_CONNECTED)
-			resync_after_online_grow(mdev);
-	}
-
-	/* A resync finished or aborted, wake paused devices... */
-	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
-	    (os.peer_isp && !ns.peer_isp) ||
-	    (os.user_isp && !ns.user_isp))
-		resume_next_sg(mdev);
-
-	/* sync target done with resync.  Explicitly notify peer, even though
-	 * it should (at least for non-empty resyncs) already know itself. */
-	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
-		drbd_send_state(mdev, ns);
-
-	/* Wake up role changes, that were delayed because of connection establishing */
-	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
-		clear_bit(STATE_SENT, &mdev->flags);
-		wake_up(&mdev->state_wait);
-	}
-
-	/* This triggers bitmap writeout of potentially still unwritten pages
-	 * if the resync finished cleanly, or aborted because of peer disk
-	 * failure, or because of connection loss.
-	 * For resync aborted because of local disk failure, we cannot do
-	 * any bitmap writeout anymore.
-	 * No harm done if some bits change during this phase.
-	 */
-	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
-		drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
-			"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
-		put_ldev(mdev);
-	}
-
-	/* free tl_hash if we Got thawed and are C_STANDALONE */
-	if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
-		drbd_free_tl_hash(mdev);
-
-	/* Upon network connection, we need to start the receiver */
-	if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
-		drbd_thread_start(&mdev->receiver);
-
-	/* Terminate worker thread if we are unconfigured - it will be
-	   restarted as needed... */
-	if (ns.disk == D_DISKLESS &&
-	    ns.conn == C_STANDALONE &&
-	    ns.role == R_SECONDARY) {
-		if (os.aftr_isp != ns.aftr_isp)
-			resume_next_sg(mdev);
-		/* set in __drbd_set_state, unless CONFIG_PENDING was set */
-		if (test_bit(DEVICE_DYING, &mdev->flags))
-			drbd_thread_stop_nowait(&mdev->worker);
-	}
-
-	drbd_md_sync(mdev);
-}
-
-
 static int drbd_thread_setup(void *arg)
 {
 	struct drbd_thread *thi = (struct drbd_thread *) arg;
-	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_tconn *tconn = thi->tconn;
 	unsigned long flags;
 	int retval;
 
+	snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
+		 thi->name[0], thi->tconn->name);
+
 restart:
 	retval = thi->function(thi);
 
 	spin_lock_irqsave(&thi->t_lock, flags);
 
-	/* if the receiver has been "Exiting", the last thing it did
+	/* if the receiver has been "EXITING", the last thing it did
 	 * was set the conn state to "StandAlone",
 	 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
 	 * and receiver thread will be "started".
-	 * drbd_thread_start needs to set "Restarting" in that case.
+	 * drbd_thread_start needs to set "RESTARTING" in that case.
 	 * t_state check and assignment needs to be within the same spinlock,
-	 * so either thread_start sees Exiting, and can remap to Restarting,
-	 * or thread_start see None, and can proceed as normal.
+	 * so either thread_start sees EXITING, and can remap to RESTARTING,
+	 * or thread_start see NONE, and can proceed as normal.
 	 */
 
-	if (thi->t_state == Restarting) {
-		dev_info(DEV, "Restarting %s\n", current->comm);
-		thi->t_state = Running;
+	if (thi->t_state == RESTARTING) {
+		conn_info(tconn, "Restarting %s thread\n", thi->name);
+		thi->t_state = RUNNING;
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		goto restart;
 	}
 
 	thi->task = NULL;
-	thi->t_state = None;
+	thi->t_state = NONE;
 	smp_mb();
-	complete(&thi->stop);
+	complete_all(&thi->stop);
 	spin_unlock_irqrestore(&thi->t_lock, flags);
 
-	dev_info(DEV, "Terminating %s\n", current->comm);
+	conn_info(tconn, "Terminating %s\n", current->comm);
 
 	/* Release mod reference taken when thread was started */
+
+	kref_put(&tconn->kref, &conn_destroy);
 	module_put(THIS_MODULE);
 	return retval;
 }
 
-static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
-		      int (*func) (struct drbd_thread *))
+static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi,
+			     int (*func) (struct drbd_thread *), char *name)
 {
 	spin_lock_init(&thi->t_lock);
 	thi->task    = NULL;
-	thi->t_state = None;
+	thi->t_state = NONE;
 	thi->function = func;
-	thi->mdev = mdev;
+	thi->tconn = tconn;
+	strncpy(thi->name, name, ARRAY_SIZE(thi->name));
 }
 
 int drbd_thread_start(struct drbd_thread *thi)
 {
-	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_tconn *tconn = thi->tconn;
 	struct task_struct *nt;
 	unsigned long flags;
 
-	const char *me =
-		thi == &mdev->receiver ? "receiver" :
-		thi == &mdev->asender  ? "asender"  :
-		thi == &mdev->worker   ? "worker"   : "NONSENSE";
-
 	/* is used from state engine doing drbd_thread_stop_nowait,
 	 * while holding the req lock irqsave */
 	spin_lock_irqsave(&thi->t_lock, flags);
 
 	switch (thi->t_state) {
-	case None:
-		dev_info(DEV, "Starting %s thread (from %s [%d])\n",
-				me, current->comm, current->pid);
+	case NONE:
+		conn_info(tconn, "Starting %s thread (from %s [%d])\n",
+			 thi->name, current->comm, current->pid);
 
 		/* Get ref on module for thread - this is released when thread exits */
 		if (!try_module_get(THIS_MODULE)) {
-			dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
+			conn_err(tconn, "Failed to get module reference in drbd_thread_start\n");
 			spin_unlock_irqrestore(&thi->t_lock, flags);
 			return false;
 		}
 
+		kref_get(&thi->tconn->kref);
+
 		init_completion(&thi->stop);
-		D_ASSERT(thi->task == NULL);
 		thi->reset_cpu_mask = 1;
-		thi->t_state = Running;
+		thi->t_state = RUNNING;
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
 
 		nt = kthread_create(drbd_thread_setup, (void *) thi,
-				    "drbd%d_%s", mdev_to_minor(mdev), me);
+				    "drbd_%c_%s", thi->name[0], thi->tconn->name);
 
 		if (IS_ERR(nt)) {
-			dev_err(DEV, "Couldn't start thread\n");
+			conn_err(tconn, "Couldn't start thread\n");
 
+			kref_put(&tconn->kref, &conn_destroy);
 			module_put(THIS_MODULE);
 			return false;
 		}
 		spin_lock_irqsave(&thi->t_lock, flags);
 		thi->task = nt;
-		thi->t_state = Running;
+		thi->t_state = RUNNING;
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		wake_up_process(nt);
 		break;
-	case Exiting:
-		thi->t_state = Restarting;
-		dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
-				me, current->comm, current->pid);
+	case EXITING:
+		thi->t_state = RESTARTING;
+		conn_info(tconn, "Restarting %s thread (from %s [%d])\n",
+				thi->name, current->comm, current->pid);
 		/* fall through */
-	case Running:
-	case Restarting:
+	case RUNNING:
+	case RESTARTING:
 	default:
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		break;
@@ -1867,12 +447,12 @@
 {
 	unsigned long flags;
 
-	enum drbd_thread_state ns = restart ? Restarting : Exiting;
+	enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
 
 	/* may be called from state engine, holding the req lock irqsave */
 	spin_lock_irqsave(&thi->t_lock, flags);
 
-	if (thi->t_state == None) {
+	if (thi->t_state == NONE) {
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		if (restart)
 			drbd_thread_start(thi);
@@ -1890,7 +470,6 @@
 		init_completion(&thi->stop);
 		if (thi->task != current)
 			force_sig(DRBD_SIGKILL, thi->task);
-
 	}
 
 	spin_unlock_irqrestore(&thi->t_lock, flags);
@@ -1899,6 +478,35 @@
 		wait_for_completion(&thi->stop);
 }
 
+static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task)
+{
+	struct drbd_thread *thi =
+		task == tconn->receiver.task ? &tconn->receiver :
+		task == tconn->asender.task  ? &tconn->asender :
+		task == tconn->worker.task   ? &tconn->worker : NULL;
+
+	return thi;
+}
+
+char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task)
+{
+	struct drbd_thread *thi = drbd_task_to_thread(tconn, task);
+	return thi ? thi->name : task->comm;
+}
+
+int conn_lowest_minor(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr = 0, m;
+
+	rcu_read_lock();
+	mdev = idr_get_next(&tconn->volumes, &vnr);
+	m = mdev ? mdev_to_minor(mdev) : -1;
+	rcu_read_unlock();
+
+	return m;
+}
+
 #ifdef CONFIG_SMP
 /**
  * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
@@ -1907,238 +515,345 @@
  * Forces all threads of a device onto the same CPU. This is beneficial for
  * DRBD's performance. May be overwritten by user's configuration.
  */
-void drbd_calc_cpu_mask(struct drbd_conf *mdev)
+void drbd_calc_cpu_mask(struct drbd_tconn *tconn)
 {
 	int ord, cpu;
 
 	/* user override. */
-	if (cpumask_weight(mdev->cpu_mask))
+	if (cpumask_weight(tconn->cpu_mask))
 		return;
 
-	ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
+	ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask);
 	for_each_online_cpu(cpu) {
 		if (ord-- == 0) {
-			cpumask_set_cpu(cpu, mdev->cpu_mask);
+			cpumask_set_cpu(cpu, tconn->cpu_mask);
 			return;
 		}
 	}
 	/* should not be reached */
-	cpumask_setall(mdev->cpu_mask);
+	cpumask_setall(tconn->cpu_mask);
 }
 
 /**
  * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
  * @mdev:	DRBD device.
+ * @thi:	drbd_thread object
  *
  * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
  * prematurely.
  */
-void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
+void drbd_thread_current_set_cpu(struct drbd_thread *thi)
 {
 	struct task_struct *p = current;
-	struct drbd_thread *thi =
-		p == mdev->asender.task  ? &mdev->asender  :
-		p == mdev->receiver.task ? &mdev->receiver :
-		p == mdev->worker.task   ? &mdev->worker   :
-		NULL;
-	ERR_IF(thi == NULL)
-		return;
+
 	if (!thi->reset_cpu_mask)
 		return;
 	thi->reset_cpu_mask = 0;
-	set_cpus_allowed_ptr(p, mdev->cpu_mask);
+	set_cpus_allowed_ptr(p, thi->tconn->cpu_mask);
 }
 #endif
 
-/* the appropriate socket mutex must be held already */
-int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			  enum drbd_packets cmd, struct p_header80 *h,
-			  size_t size, unsigned msg_flags)
-{
-	int sent, ok;
-
-	ERR_IF(!h) return false;
-	ERR_IF(!size) return false;
-
-	h->magic   = BE_DRBD_MAGIC;
-	h->command = cpu_to_be16(cmd);
-	h->length  = cpu_to_be16(size-sizeof(struct p_header80));
-
-	sent = drbd_send(mdev, sock, h, size, msg_flags);
-
-	ok = (sent == size);
-	if (!ok && !signal_pending(current))
-		dev_warn(DEV, "short sent %s size=%d sent=%d\n",
-		    cmdname(cmd), (int)size, sent);
-	return ok;
-}
-
-/* don't pass the socket. we may only look at it
- * when we hold the appropriate socket mutex.
+/**
+ * drbd_header_size  -  size of a packet header
+ *
+ * The header size is a multiple of 8, so any payload following the header is
+ * word aligned on 64-bit architectures.  (The bitmap send and receive code
+ * relies on this.)
  */
-int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
-		  enum drbd_packets cmd, struct p_header80 *h, size_t size)
+unsigned int drbd_header_size(struct drbd_tconn *tconn)
 {
-	int ok = 0;
-	struct socket *sock;
-
-	if (use_data_socket) {
-		mutex_lock(&mdev->data.mutex);
-		sock = mdev->data.socket;
+	if (tconn->agreed_pro_version >= 100) {
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
+		return sizeof(struct p_header100);
 	} else {
-		mutex_lock(&mdev->meta.mutex);
-		sock = mdev->meta.socket;
+		BUILD_BUG_ON(sizeof(struct p_header80) !=
+			     sizeof(struct p_header95));
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
+		return sizeof(struct p_header80);
 	}
+}
 
-	/* drbd_disconnect() could have called drbd_free_sock()
-	 * while we were waiting in down()... */
-	if (likely(sock != NULL))
-		ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
+static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
+{
+	h->magic   = cpu_to_be32(DRBD_MAGIC);
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be16(size);
+	return sizeof(struct p_header80);
+}
 
-	if (use_data_socket)
-		mutex_unlock(&mdev->data.mutex);
+static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
+{
+	h->magic   = cpu_to_be16(DRBD_MAGIC_BIG);
+	h->command = cpu_to_be16(cmd);
+	h->length = cpu_to_be32(size);
+	return sizeof(struct p_header95);
+}
+
+static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
+				      int size, int vnr)
+{
+	h->magic = cpu_to_be32(DRBD_MAGIC_100);
+	h->volume = cpu_to_be16(vnr);
+	h->command = cpu_to_be16(cmd);
+	h->length = cpu_to_be32(size);
+	h->pad = 0;
+	return sizeof(struct p_header100);
+}
+
+static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr,
+				   void *buffer, enum drbd_packet cmd, int size)
+{
+	if (tconn->agreed_pro_version >= 100)
+		return prepare_header100(buffer, cmd, size, vnr);
+	else if (tconn->agreed_pro_version >= 95 &&
+		 size > DRBD_MAX_SIZE_H80_PACKET)
+		return prepare_header95(buffer, cmd, size);
 	else
-		mutex_unlock(&mdev->meta.mutex);
-	return ok;
+		return prepare_header80(buffer, cmd, size);
 }
 
-int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
-		   size_t size)
+static void *__conn_prepare_command(struct drbd_tconn *tconn,
+				    struct drbd_socket *sock)
 {
-	struct p_header80 h;
-	int ok;
-
-	h.magic   = BE_DRBD_MAGIC;
-	h.command = cpu_to_be16(cmd);
-	h.length  = cpu_to_be16(size);
-
-	if (!drbd_get_data_sock(mdev))
-		return 0;
-
-	ok = (sizeof(h) ==
-		drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
-	ok = ok && (size ==
-		drbd_send(mdev, mdev->data.socket, data, size, 0));
-
-	drbd_put_data_sock(mdev);
-
-	return ok;
+	if (!sock->socket)
+		return NULL;
+	return sock->sbuf + drbd_header_size(tconn);
 }
 
-int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
+void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock)
 {
+	void *p;
+
+	mutex_lock(&sock->mutex);
+	p = __conn_prepare_command(tconn, sock);
+	if (!p)
+		mutex_unlock(&sock->mutex);
+
+	return p;
+}
+
+void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock)
+{
+	return conn_prepare_command(mdev->tconn, sock);
+}
+
+static int __send_command(struct drbd_tconn *tconn, int vnr,
+			  struct drbd_socket *sock, enum drbd_packet cmd,
+			  unsigned int header_size, void *data,
+			  unsigned int size)
+{
+	int msg_flags;
+	int err;
+
+	/*
+	 * Called with @data == NULL and the size of the data blocks in @size
+	 * for commands that send data blocks.  For those commands, omit the
+	 * MSG_MORE flag: this will increase the likelihood that data blocks
+	 * which are page aligned on the sender will end up page aligned on the
+	 * receiver.
+	 */
+	msg_flags = data ? MSG_MORE : 0;
+
+	header_size += prepare_header(tconn, vnr, sock->sbuf, cmd,
+				      header_size + size);
+	err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size,
+			    msg_flags);
+	if (data && !err)
+		err = drbd_send_all(tconn, sock->socket, data, size, 0);
+	return err;
+}
+
+static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
+			       enum drbd_packet cmd, unsigned int header_size,
+			       void *data, unsigned int size)
+{
+	return __send_command(tconn, 0, sock, cmd, header_size, data, size);
+}
+
+int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
+		      enum drbd_packet cmd, unsigned int header_size,
+		      void *data, unsigned int size)
+{
+	int err;
+
+	err = __conn_send_command(tconn, sock, cmd, header_size, data, size);
+	mutex_unlock(&sock->mutex);
+	return err;
+}
+
+int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock,
+		      enum drbd_packet cmd, unsigned int header_size,
+		      void *data, unsigned int size)
+{
+	int err;
+
+	err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size,
+			     data, size);
+	mutex_unlock(&sock->mutex);
+	return err;
+}
+
+int drbd_send_ping(struct drbd_tconn *tconn)
+{
+	struct drbd_socket *sock;
+
+	sock = &tconn->meta;
+	if (!conn_prepare_command(tconn, sock))
+		return -EIO;
+	return conn_send_command(tconn, sock, P_PING, 0, NULL, 0);
+}
+
+int drbd_send_ping_ack(struct drbd_tconn *tconn)
+{
+	struct drbd_socket *sock;
+
+	sock = &tconn->meta;
+	if (!conn_prepare_command(tconn, sock))
+		return -EIO;
+	return conn_send_command(tconn, sock, P_PING_ACK, 0, NULL, 0);
+}
+
+int drbd_send_sync_param(struct drbd_conf *mdev)
+{
+	struct drbd_socket *sock;
 	struct p_rs_param_95 *p;
-	struct socket *sock;
-	int size, rv;
-	const int apv = mdev->agreed_pro_version;
+	int size;
+	const int apv = mdev->tconn->agreed_pro_version;
+	enum drbd_packet cmd;
+	struct net_conf *nc;
+	struct disk_conf *dc;
+
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
 
 	size = apv <= 87 ? sizeof(struct p_rs_param)
 		: apv == 88 ? sizeof(struct p_rs_param)
-			+ strlen(mdev->sync_conf.verify_alg) + 1
+			+ strlen(nc->verify_alg) + 1
 		: apv <= 94 ? sizeof(struct p_rs_param_89)
 		: /* apv >= 95 */ sizeof(struct p_rs_param_95);
 
-	/* used from admin command context and receiver/worker context.
-	 * to avoid kmalloc, grab the socket right here,
-	 * then use the pre-allocated sbuf there */
-	mutex_lock(&mdev->data.mutex);
-	sock = mdev->data.socket;
+	cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
 
-	if (likely(sock != NULL)) {
-		enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
+	/* initialize verify_alg and csums_alg */
+	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
-		p = &mdev->data.sbuf.rs_param_95;
+	if (get_ldev(mdev)) {
+		dc = rcu_dereference(mdev->ldev->disk_conf);
+		p->resync_rate = cpu_to_be32(dc->resync_rate);
+		p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
+		p->c_delay_target = cpu_to_be32(dc->c_delay_target);
+		p->c_fill_target = cpu_to_be32(dc->c_fill_target);
+		p->c_max_rate = cpu_to_be32(dc->c_max_rate);
+		put_ldev(mdev);
+	} else {
+		p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
+		p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
+		p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
+		p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
+		p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
+	}
 
-		/* initialize verify_alg and csums_alg */
-		memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+	if (apv >= 88)
+		strcpy(p->verify_alg, nc->verify_alg);
+	if (apv >= 89)
+		strcpy(p->csums_alg, nc->csums_alg);
+	rcu_read_unlock();
 
-		p->rate = cpu_to_be32(sc->rate);
-		p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
-		p->c_delay_target = cpu_to_be32(sc->c_delay_target);
-		p->c_fill_target = cpu_to_be32(sc->c_fill_target);
-		p->c_max_rate = cpu_to_be32(sc->c_max_rate);
-
-		if (apv >= 88)
-			strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
-		if (apv >= 89)
-			strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
-
-		rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
-	} else
-		rv = 0; /* not ok */
-
-	mutex_unlock(&mdev->data.mutex);
-
-	return rv;
+	return drbd_send_command(mdev, sock, cmd, size, NULL, 0);
 }
 
-int drbd_send_protocol(struct drbd_conf *mdev)
+int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
 {
+	struct drbd_socket *sock;
 	struct p_protocol *p;
-	int size, cf, rv;
+	struct net_conf *nc;
+	int size, cf;
 
-	size = sizeof(struct p_protocol);
+	sock = &tconn->data;
+	p = __conn_prepare_command(tconn, sock);
+	if (!p)
+		return -EIO;
 
-	if (mdev->agreed_pro_version >= 87)
-		size += strlen(mdev->net_conf->integrity_alg) + 1;
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
 
-	/* we must not recurse into our own queue,
-	 * as that is blocked during handshake */
-	p = kmalloc(size, GFP_NOIO);
-	if (p == NULL)
-		return 0;
-
-	p->protocol      = cpu_to_be32(mdev->net_conf->wire_protocol);
-	p->after_sb_0p   = cpu_to_be32(mdev->net_conf->after_sb_0p);
-	p->after_sb_1p   = cpu_to_be32(mdev->net_conf->after_sb_1p);
-	p->after_sb_2p   = cpu_to_be32(mdev->net_conf->after_sb_2p);
-	p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
-
-	cf = 0;
-	if (mdev->net_conf->want_lose)
-		cf |= CF_WANT_LOSE;
-	if (mdev->net_conf->dry_run) {
-		if (mdev->agreed_pro_version >= 92)
-			cf |= CF_DRY_RUN;
-		else {
-			dev_err(DEV, "--dry-run is not supported by peer");
-			kfree(p);
-			return -1;
-		}
+	if (nc->tentative && tconn->agreed_pro_version < 92) {
+		rcu_read_unlock();
+		mutex_unlock(&sock->mutex);
+		conn_err(tconn, "--dry-run is not supported by peer");
+		return -EOPNOTSUPP;
 	}
+
+	size = sizeof(*p);
+	if (tconn->agreed_pro_version >= 87)
+		size += strlen(nc->integrity_alg) + 1;
+
+	p->protocol      = cpu_to_be32(nc->wire_protocol);
+	p->after_sb_0p   = cpu_to_be32(nc->after_sb_0p);
+	p->after_sb_1p   = cpu_to_be32(nc->after_sb_1p);
+	p->after_sb_2p   = cpu_to_be32(nc->after_sb_2p);
+	p->two_primaries = cpu_to_be32(nc->two_primaries);
+	cf = 0;
+	if (nc->discard_my_data)
+		cf |= CF_DISCARD_MY_DATA;
+	if (nc->tentative)
+		cf |= CF_DRY_RUN;
 	p->conn_flags    = cpu_to_be32(cf);
 
-	if (mdev->agreed_pro_version >= 87)
-		strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
+	if (tconn->agreed_pro_version >= 87)
+		strcpy(p->integrity_alg, nc->integrity_alg);
+	rcu_read_unlock();
 
-	rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
-			   (struct p_header80 *)p, size);
-	kfree(p);
-	return rv;
+	return __conn_send_command(tconn, sock, cmd, size, NULL, 0);
+}
+
+int drbd_send_protocol(struct drbd_tconn *tconn)
+{
+	int err;
+
+	mutex_lock(&tconn->data.mutex);
+	err = __drbd_send_protocol(tconn, P_PROTOCOL);
+	mutex_unlock(&tconn->data.mutex);
+
+	return err;
 }
 
 int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
 {
-	struct p_uuids p;
+	struct drbd_socket *sock;
+	struct p_uuids *p;
 	int i;
 
 	if (!get_ldev_if_state(mdev, D_NEGOTIATING))
-		return 1;
+		return 0;
 
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p) {
+		put_ldev(mdev);
+		return -EIO;
+	}
+	spin_lock_irq(&mdev->ldev->md.uuid_lock);
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
-		p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
+		p->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
+	spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 
 	mdev->comm_bm_set = drbd_bm_total_weight(mdev);
-	p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
-	uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
+	p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
+	rcu_read_lock();
+	uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0;
+	rcu_read_unlock();
 	uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
 	uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
-	p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
+	p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
 
 	put_ldev(mdev);
-
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
-			     (struct p_header80 *)&p, sizeof(p));
+	return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0);
 }
 
 int drbd_send_uuids(struct drbd_conf *mdev)
@@ -2169,9 +884,10 @@
 	}
 }
 
-int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
+void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
 {
-	struct p_rs_uuid p;
+	struct drbd_socket *sock;
+	struct p_rs_uuid *p;
 	u64 uuid;
 
 	D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
@@ -2184,24 +900,29 @@
 	drbd_uuid_set(mdev, UI_BITMAP, uuid);
 	drbd_print_uuids(mdev, "updated sync UUID");
 	drbd_md_sync(mdev);
-	p.uuid = cpu_to_be64(uuid);
 
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
-			     (struct p_header80 *)&p, sizeof(p));
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (p) {
+		p->uuid = cpu_to_be64(uuid);
+		drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0);
+	}
 }
 
 int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
 {
-	struct p_sizes p;
+	struct drbd_socket *sock;
+	struct p_sizes *p;
 	sector_t d_size, u_size;
 	int q_order_type;
 	unsigned int max_bio_size;
-	int ok;
 
 	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
 		D_ASSERT(mdev->ldev->backing_bdev);
 		d_size = drbd_get_max_capacity(mdev->ldev);
-		u_size = mdev->ldev->dc.disk_size;
+		rcu_read_lock();
+		u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+		rcu_read_unlock();
 		q_order_type = drbd_queue_order_type(mdev);
 		max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
 		max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
@@ -2213,20 +934,23 @@
 		max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
 	}
 
-	/* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
-	if (mdev->agreed_pro_version <= 94)
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+
+	if (mdev->tconn->agreed_pro_version <= 94)
 		max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+	else if (mdev->tconn->agreed_pro_version < 100)
+		max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE_P95);
 
-	p.d_size = cpu_to_be64(d_size);
-	p.u_size = cpu_to_be64(u_size);
-	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
-	p.max_bio_size = cpu_to_be32(max_bio_size);
-	p.queue_order_type = cpu_to_be16(q_order_type);
-	p.dds_flags = cpu_to_be16(flags);
-
-	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
-			   (struct p_header80 *)&p, sizeof(p));
-	return ok;
+	p->d_size = cpu_to_be64(d_size);
+	p->u_size = cpu_to_be64(u_size);
+	p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
+	p->max_bio_size = cpu_to_be32(max_bio_size);
+	p->queue_order_type = cpu_to_be16(q_order_type);
+	p->dds_flags = cpu_to_be16(flags);
+	return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0);
 }
 
 /**
@@ -2235,34 +959,21 @@
  */
 int drbd_send_current_state(struct drbd_conf *mdev)
 {
-	struct socket *sock;
-	struct p_state p;
-	int ok = 0;
+	struct drbd_socket *sock;
+	struct p_state *p;
 
-	/* Grab state lock so we wont send state if we're in the middle
-	 * of a cluster wide state change on another thread */
-	drbd_state_lock(mdev);
-
-	mutex_lock(&mdev->data.mutex);
-
-	p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
-	sock = mdev->data.socket;
-
-	if (likely(sock != NULL)) {
-		ok = _drbd_send_cmd(mdev, sock, P_STATE,
-				    (struct p_header80 *)&p, sizeof(p), 0);
-	}
-
-	mutex_unlock(&mdev->data.mutex);
-
-	drbd_state_unlock(mdev);
-	return ok;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
+	return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
 }
 
 /**
  * drbd_send_state() - After a state change, sends the new state to the peer
- * @mdev:	DRBD device.
- * @state:	the state to send, not necessarily the current state.
+ * @mdev:      DRBD device.
+ * @state:     the state to send, not necessarily the current state.
  *
  * Each state change queues an "after_state_ch" work, which will eventually
  * send the resulting new state to the peer. If more state changes happen
@@ -2271,50 +982,95 @@
  */
 int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
 {
-	struct socket *sock;
-	struct p_state p;
-	int ok = 0;
+	struct drbd_socket *sock;
+	struct p_state *p;
 
-	mutex_lock(&mdev->data.mutex);
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->state = cpu_to_be32(state.i); /* Within the send mutex */
+	return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
+}
 
-	p.state = cpu_to_be32(state.i);
-	sock = mdev->data.socket;
+int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val)
+{
+	struct drbd_socket *sock;
+	struct p_req_state *p;
 
-	if (likely(sock != NULL)) {
-		ok = _drbd_send_cmd(mdev, sock, P_STATE,
-				    (struct p_header80 *)&p, sizeof(p), 0);
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->mask = cpu_to_be32(mask.i);
+	p->val = cpu_to_be32(val.i);
+	return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0);
+}
+
+int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
+{
+	enum drbd_packet cmd;
+	struct drbd_socket *sock;
+	struct p_req_state *p;
+
+	cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ;
+	sock = &tconn->data;
+	p = conn_prepare_command(tconn, sock);
+	if (!p)
+		return -EIO;
+	p->mask = cpu_to_be32(mask.i);
+	p->val = cpu_to_be32(val.i);
+	return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
+}
+
+void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
+{
+	struct drbd_socket *sock;
+	struct p_req_state_reply *p;
+
+	sock = &mdev->tconn->meta;
+	p = drbd_prepare_command(mdev, sock);
+	if (p) {
+		p->retcode = cpu_to_be32(retcode);
+		drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0);
 	}
-
-	mutex_unlock(&mdev->data.mutex);
-
-	return ok;
 }
 
-int drbd_send_state_req(struct drbd_conf *mdev,
-	union drbd_state mask, union drbd_state val)
+void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode)
 {
-	struct p_req_state p;
+	struct drbd_socket *sock;
+	struct p_req_state_reply *p;
+	enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
 
-	p.mask    = cpu_to_be32(mask.i);
-	p.val     = cpu_to_be32(val.i);
-
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
-			     (struct p_header80 *)&p, sizeof(p));
+	sock = &tconn->meta;
+	p = conn_prepare_command(tconn, sock);
+	if (p) {
+		p->retcode = cpu_to_be32(retcode);
+		conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
+	}
 }
 
-int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
+static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
 {
-	struct p_req_state_reply p;
+	BUG_ON(code & ~0xf);
+	p->encoding = (p->encoding & ~0xf) | code;
+}
 
-	p.retcode    = cpu_to_be32(retcode);
+static void dcbp_set_start(struct p_compressed_bm *p, int set)
+{
+	p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
+}
 
-	return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
-			     (struct p_header80 *)&p, sizeof(p));
+static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n)
+{
+	BUG_ON(n & ~0x7);
+	p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
 }
 
 int fill_bitmap_rle_bits(struct drbd_conf *mdev,
-	struct p_compressed_bm *p,
-	struct bm_xfer_ctx *c)
+			 struct p_compressed_bm *p,
+			 unsigned int size,
+			 struct bm_xfer_ctx *c)
 {
 	struct bitstream bs;
 	unsigned long plain_bits;
@@ -2322,19 +1078,21 @@
 	unsigned long rl;
 	unsigned len;
 	unsigned toggle;
-	int bits;
+	int bits, use_rle;
 
 	/* may we use this feature? */
-	if ((mdev->sync_conf.use_rle == 0) ||
-		(mdev->agreed_pro_version < 90))
-			return 0;
+	rcu_read_lock();
+	use_rle = rcu_dereference(mdev->tconn->net_conf)->use_rle;
+	rcu_read_unlock();
+	if (!use_rle || mdev->tconn->agreed_pro_version < 90)
+		return 0;
 
 	if (c->bit_offset >= c->bm_bits)
 		return 0; /* nothing to do. */
 
 	/* use at most thus many bytes */
-	bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
-	memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
+	bitstream_init(&bs, p->code, size, 0);
+	memset(p->code, 0, size);
 	/* plain bits covered in this code string */
 	plain_bits = 0;
 
@@ -2356,12 +1114,12 @@
 			if (rl == 0) {
 				/* the first checked bit was set,
 				 * store start value, */
-				DCBP_set_start(p, 1);
+				dcbp_set_start(p, 1);
 				/* but skip encoding of zero run length */
 				toggle = !toggle;
 				continue;
 			}
-			DCBP_set_start(p, 0);
+			dcbp_set_start(p, 0);
 		}
 
 		/* paranoia: catch zero runlength.
@@ -2401,7 +1159,7 @@
 	bm_xfer_ctx_bit_to_word_offset(c);
 
 	/* store pad_bits */
-	DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
+	dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
 
 	return len;
 }
@@ -2413,48 +1171,52 @@
  * code upon failure.
  */
 static int
-send_bitmap_rle_or_plain(struct drbd_conf *mdev,
-			 struct p_header80 *h, struct bm_xfer_ctx *c)
+send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c)
 {
-	struct p_compressed_bm *p = (void*)h;
-	unsigned long num_words;
-	int len;
-	int ok;
+	struct drbd_socket *sock = &mdev->tconn->data;
+	unsigned int header_size = drbd_header_size(mdev->tconn);
+	struct p_compressed_bm *p = sock->sbuf + header_size;
+	int len, err;
 
-	len = fill_bitmap_rle_bits(mdev, p, c);
-
+	len = fill_bitmap_rle_bits(mdev, p,
+			DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c);
 	if (len < 0)
 		return -EIO;
 
 	if (len) {
-		DCBP_set_code(p, RLE_VLI_Bits);
-		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
-			sizeof(*p) + len, 0);
-
+		dcbp_set_code(p, RLE_VLI_Bits);
+		err = __send_command(mdev->tconn, mdev->vnr, sock,
+				     P_COMPRESSED_BITMAP, sizeof(*p) + len,
+				     NULL, 0);
 		c->packets[0]++;
-		c->bytes[0] += sizeof(*p) + len;
+		c->bytes[0] += header_size + sizeof(*p) + len;
 
 		if (c->bit_offset >= c->bm_bits)
 			len = 0; /* DONE */
 	} else {
 		/* was not compressible.
 		 * send a buffer full of plain text bits instead. */
-		num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
-		len = num_words * sizeof(long);
+		unsigned int data_size;
+		unsigned long num_words;
+		unsigned long *p = sock->sbuf + header_size;
+
+		data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
+		num_words = min_t(size_t, data_size / sizeof(*p),
+				  c->bm_words - c->word_offset);
+		len = num_words * sizeof(*p);
 		if (len)
-			drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
-		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
-				   h, sizeof(struct p_header80) + len, 0);
+			drbd_bm_get_lel(mdev, c->word_offset, num_words, p);
+		err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, len, NULL, 0);
 		c->word_offset += num_words;
 		c->bit_offset = c->word_offset * BITS_PER_LONG;
 
 		c->packets[1]++;
-		c->bytes[1] += sizeof(struct p_header80) + len;
+		c->bytes[1] += header_size + len;
 
 		if (c->bit_offset > c->bm_bits)
 			c->bit_offset = c->bm_bits;
 	}
-	if (ok) {
+	if (!err) {
 		if (len == 0) {
 			INFO_bm_xfer_stats(mdev, "send", c);
 			return 0;
@@ -2465,21 +1227,13 @@
 }
 
 /* See the comment at receive_bitmap() */
-int _drbd_send_bitmap(struct drbd_conf *mdev)
+static int _drbd_send_bitmap(struct drbd_conf *mdev)
 {
 	struct bm_xfer_ctx c;
-	struct p_header80 *p;
 	int err;
 
-	ERR_IF(!mdev->bitmap) return false;
-
-	/* maybe we should use some per thread scratch page,
-	 * and allocate that during initial device creation? */
-	p = (struct p_header80 *) __get_free_page(GFP_NOIO);
-	if (!p) {
-		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
+	if (!expect(mdev->bitmap))
 		return false;
-	}
 
 	if (get_ldev(mdev)) {
 		if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
@@ -2504,37 +1258,39 @@
 	};
 
 	do {
-		err = send_bitmap_rle_or_plain(mdev, p, &c);
+		err = send_bitmap_rle_or_plain(mdev, &c);
 	} while (err > 0);
 
-	free_page((unsigned long) p);
 	return err == 0;
 }
 
 int drbd_send_bitmap(struct drbd_conf *mdev)
 {
-	int err;
+	struct drbd_socket *sock = &mdev->tconn->data;
+	int err = -1;
 
-	if (!drbd_get_data_sock(mdev))
-		return -1;
-	err = !_drbd_send_bitmap(mdev);
-	drbd_put_data_sock(mdev);
+	mutex_lock(&sock->mutex);
+	if (sock->socket)
+		err = !_drbd_send_bitmap(mdev);
+	mutex_unlock(&sock->mutex);
 	return err;
 }
 
-int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
+void drbd_send_b_ack(struct drbd_tconn *tconn, u32 barrier_nr, u32 set_size)
 {
-	int ok;
-	struct p_barrier_ack p;
+	struct drbd_socket *sock;
+	struct p_barrier_ack *p;
 
-	p.barrier  = barrier_nr;
-	p.set_size = cpu_to_be32(set_size);
+	if (tconn->cstate < C_WF_REPORT_PARAMS)
+		return;
 
-	if (mdev->state.conn < C_CONNECTED)
-		return false;
-	ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
-			(struct p_header80 *)&p, sizeof(p));
-	return ok;
+	sock = &tconn->meta;
+	p = conn_prepare_command(tconn, sock);
+	if (!p)
+		return;
+	p->barrier = barrier_nr;
+	p->set_size = cpu_to_be32(set_size);
+	conn_send_command(tconn, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0);
 }
 
 /**
@@ -2545,62 +1301,62 @@
  * @blksize:	size in byte, needs to be in big endian byte order
  * @block_id:	Id, big endian byte order
  */
-static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
-			  u64 sector,
-			  u32 blksize,
-			  u64 block_id)
+static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
+			  u64 sector, u32 blksize, u64 block_id)
 {
-	int ok;
-	struct p_block_ack p;
+	struct drbd_socket *sock;
+	struct p_block_ack *p;
 
-	p.sector   = sector;
-	p.block_id = block_id;
-	p.blksize  = blksize;
-	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
+	if (mdev->state.conn < C_CONNECTED)
+		return -EIO;
 
-	if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
-		return false;
-	ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
-				(struct p_header80 *)&p, sizeof(p));
-	return ok;
+	sock = &mdev->tconn->meta;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = sector;
+	p->block_id = block_id;
+	p->blksize = blksize;
+	p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
+	return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
 }
 
 /* dp->sector and dp->block_id already/still in network byte order,
  * data_size is payload size according to dp->head,
  * and may need to be corrected for digest size. */
-int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
-		     struct p_data *dp, int data_size)
+void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
+		      struct p_data *dp, int data_size)
 {
-	data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
-	return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
-			      dp->block_id);
+	if (mdev->tconn->peer_integrity_tfm)
+		data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
+	_drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
+		       dp->block_id);
 }
 
-int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
-		     struct p_block_req *rp)
+void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
+		      struct p_block_req *rp)
 {
-	return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
+	_drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
 }
 
 /**
  * drbd_send_ack() - Sends an ack packet
- * @mdev:	DRBD device.
- * @cmd:	Packet command code.
- * @e:		Epoch entry.
+ * @mdev:	DRBD device
+ * @cmd:	packet command code
+ * @peer_req:	peer request
  */
-int drbd_send_ack(struct drbd_conf *mdev,
-	enum drbd_packets cmd, struct drbd_epoch_entry *e)
+int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
+		  struct drbd_peer_request *peer_req)
 {
 	return _drbd_send_ack(mdev, cmd,
-			      cpu_to_be64(e->sector),
-			      cpu_to_be32(e->size),
-			      e->block_id);
+			      cpu_to_be64(peer_req->i.sector),
+			      cpu_to_be32(peer_req->i.size),
+			      peer_req->block_id);
 }
 
 /* This function misuses the block_id field to signal if the blocks
  * are is sync or not. */
-int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
 		     sector_t sector, int blksize, u64 block_id)
 {
 	return _drbd_send_ack(mdev, cmd,
@@ -2612,85 +1368,87 @@
 int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
 		       sector_t sector, int size, u64 block_id)
 {
-	int ok;
-	struct p_block_req p;
+	struct drbd_socket *sock;
+	struct p_block_req *p;
 
-	p.sector   = cpu_to_be64(sector);
-	p.block_id = block_id;
-	p.blksize  = cpu_to_be32(size);
-
-	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
-				(struct p_header80 *)&p, sizeof(p));
-	return ok;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(sector);
+	p->block_id = block_id;
+	p->blksize = cpu_to_be32(size);
+	return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
 }
 
-int drbd_send_drequest_csum(struct drbd_conf *mdev,
-			    sector_t sector, int size,
-			    void *digest, int digest_size,
-			    enum drbd_packets cmd)
+int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size,
+			    void *digest, int digest_size, enum drbd_packet cmd)
 {
-	int ok;
-	struct p_block_req p;
+	struct drbd_socket *sock;
+	struct p_block_req *p;
 
-	p.sector   = cpu_to_be64(sector);
-	p.block_id = BE_DRBD_MAGIC + 0xbeef;
-	p.blksize  = cpu_to_be32(size);
+	/* FIXME: Put the digest into the preallocated socket buffer.  */
 
-	p.head.magic   = BE_DRBD_MAGIC;
-	p.head.command = cpu_to_be16(cmd);
-	p.head.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
-
-	mutex_lock(&mdev->data.mutex);
-
-	ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
-	ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
-
-	mutex_unlock(&mdev->data.mutex);
-
-	return ok;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(sector);
+	p->block_id = ID_SYNCER /* unused */;
+	p->blksize = cpu_to_be32(size);
+	return drbd_send_command(mdev, sock, cmd, sizeof(*p),
+				 digest, digest_size);
 }
 
 int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
 {
-	int ok;
-	struct p_block_req p;
+	struct drbd_socket *sock;
+	struct p_block_req *p;
 
-	p.sector   = cpu_to_be64(sector);
-	p.block_id = BE_DRBD_MAGIC + 0xbabe;
-	p.blksize  = cpu_to_be32(size);
-
-	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
-			   (struct p_header80 *)&p, sizeof(p));
-	return ok;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(sector);
+	p->block_id = ID_SYNCER /* unused */;
+	p->blksize = cpu_to_be32(size);
+	return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0);
 }
 
 /* called on sndtimeo
  * returns false if we should retry,
  * true if we think connection is dead
  */
-static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
+static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock)
 {
 	int drop_it;
 	/* long elapsed = (long)(jiffies - mdev->last_received); */
 
-	drop_it =   mdev->meta.socket == sock
-		|| !mdev->asender.task
-		|| get_t_state(&mdev->asender) != Running
-		|| mdev->state.conn < C_CONNECTED;
+	drop_it =   tconn->meta.socket == sock
+		|| !tconn->asender.task
+		|| get_t_state(&tconn->asender) != RUNNING
+		|| tconn->cstate < C_WF_REPORT_PARAMS;
 
 	if (drop_it)
 		return true;
 
-	drop_it = !--mdev->ko_count;
+	drop_it = !--tconn->ko_count;
 	if (!drop_it) {
-		dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
-		       current->comm, current->pid, mdev->ko_count);
-		request_ping(mdev);
+		conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
+			 current->comm, current->pid, tconn->ko_count);
+		request_ping(tconn);
 	}
 
 	return drop_it; /* && (mdev->state == R_PRIMARY) */;
 }
 
+static void drbd_update_congested(struct drbd_tconn *tconn)
+{
+	struct sock *sk = tconn->data.socket->sk;
+	if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
+		set_bit(NET_CONGESTED, &tconn->flags);
+}
+
 /* The idea of sendpage seems to be to put some kind of reference
  * to the page into the skb, and to hand it over to the NIC. In
  * this process get_page() gets called.
@@ -2713,21 +1471,28 @@
  * with page_count == 0 or PageSlab.
  */
 static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
-		   int offset, size_t size, unsigned msg_flags)
+			      int offset, size_t size, unsigned msg_flags)
 {
-	int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
+	struct socket *socket;
+	void *addr;
+	int err;
+
+	socket = mdev->tconn->data.socket;
+	addr = kmap(page) + offset;
+	err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags);
 	kunmap(page);
-	if (sent == size)
-		mdev->send_cnt += size>>9;
-	return sent == size;
+	if (!err)
+		mdev->send_cnt += size >> 9;
+	return err;
 }
 
 static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
 		    int offset, size_t size, unsigned msg_flags)
 {
+	struct socket *socket = mdev->tconn->data.socket;
 	mm_segment_t oldfs = get_fs();
-	int sent, ok;
 	int len = size;
+	int err = -EIO;
 
 	/* e.g. XFS meta- & log-data is in slab pages, which have a
 	 * page_count of 0 and/or have PageSlab() set.
@@ -2739,34 +1504,35 @@
 		return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
 
 	msg_flags |= MSG_NOSIGNAL;
-	drbd_update_congested(mdev);
+	drbd_update_congested(mdev->tconn);
 	set_fs(KERNEL_DS);
 	do {
-		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
-							offset, len,
-							msg_flags);
-		if (sent == -EAGAIN) {
-			if (we_should_drop_the_connection(mdev,
-							  mdev->data.socket))
-				break;
-			else
-				continue;
-		}
+		int sent;
+
+		sent = socket->ops->sendpage(socket, page, offset, len, msg_flags);
 		if (sent <= 0) {
+			if (sent == -EAGAIN) {
+				if (we_should_drop_the_connection(mdev->tconn, socket))
+					break;
+				continue;
+			}
 			dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
 			     __func__, (int)size, len, sent);
+			if (sent < 0)
+				err = sent;
 			break;
 		}
 		len    -= sent;
 		offset += sent;
 	} while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
 	set_fs(oldfs);
-	clear_bit(NET_CONGESTED, &mdev->flags);
+	clear_bit(NET_CONGESTED, &mdev->tconn->flags);
 
-	ok = (len == 0);
-	if (likely(ok))
-		mdev->send_cnt += size>>9;
-	return ok;
+	if (len == 0) {
+		err = 0;
+		mdev->send_cnt += size >> 9;
+	}
+	return err;
 }
 
 static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
@@ -2775,12 +1541,15 @@
 	int i;
 	/* hint all but last page with MSG_MORE */
 	bio_for_each_segment(bvec, bio, i) {
-		if (!_drbd_no_send_page(mdev, bvec->bv_page,
-				     bvec->bv_offset, bvec->bv_len,
-				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
-			return 0;
+		int err;
+
+		err = _drbd_no_send_page(mdev, bvec->bv_page,
+					 bvec->bv_offset, bvec->bv_len,
+					 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+		if (err)
+			return err;
 	}
-	return 1;
+	return 0;
 }
 
 static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
@@ -2789,32 +1558,40 @@
 	int i;
 	/* hint all but last page with MSG_MORE */
 	bio_for_each_segment(bvec, bio, i) {
-		if (!_drbd_send_page(mdev, bvec->bv_page,
-				     bvec->bv_offset, bvec->bv_len,
-				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
-			return 0;
+		int err;
+
+		err = _drbd_send_page(mdev, bvec->bv_page,
+				      bvec->bv_offset, bvec->bv_len,
+				      i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+		if (err)
+			return err;
 	}
-	return 1;
+	return 0;
 }
 
-static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+static int _drbd_send_zc_ee(struct drbd_conf *mdev,
+			    struct drbd_peer_request *peer_req)
 {
-	struct page *page = e->pages;
-	unsigned len = e->size;
+	struct page *page = peer_req->pages;
+	unsigned len = peer_req->i.size;
+	int err;
+
 	/* hint all but last page with MSG_MORE */
 	page_chain_for_each(page) {
 		unsigned l = min_t(unsigned, len, PAGE_SIZE);
-		if (!_drbd_send_page(mdev, page, 0, l,
-				page_chain_next(page) ? MSG_MORE : 0))
-			return 0;
+
+		err = _drbd_send_page(mdev, page, 0, l,
+				      page_chain_next(page) ? MSG_MORE : 0);
+		if (err)
+			return err;
 		len -= l;
 	}
-	return 1;
+	return 0;
 }
 
 static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
 {
-	if (mdev->agreed_pro_version >= 95)
+	if (mdev->tconn->agreed_pro_version >= 95)
 		return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
 			(bi_rw & REQ_FUA ? DP_FUA : 0) |
 			(bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
@@ -2828,50 +1605,36 @@
  */
 int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 {
-	int ok = 1;
-	struct p_data p;
+	struct drbd_socket *sock;
+	struct p_data *p;
 	unsigned int dp_flags = 0;
-	void *dgb;
 	int dgs;
+	int err;
 
-	if (!drbd_get_data_sock(mdev))
-		return 0;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
 
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
-
-	if (req->size <= DRBD_MAX_SIZE_H80_PACKET) {
-		p.head.h80.magic   = BE_DRBD_MAGIC;
-		p.head.h80.command = cpu_to_be16(P_DATA);
-		p.head.h80.length  =
-			cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
-	} else {
-		p.head.h95.magic   = BE_DRBD_MAGIC_BIG;
-		p.head.h95.command = cpu_to_be16(P_DATA);
-		p.head.h95.length  =
-			cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
-	}
-
-	p.sector   = cpu_to_be64(req->sector);
-	p.block_id = (unsigned long)req;
-	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
-
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(req->i.sector);
+	p->block_id = (unsigned long)req;
+	p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
 	dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
-
 	if (mdev->state.conn >= C_SYNC_SOURCE &&
 	    mdev->state.conn <= C_PAUSED_SYNC_T)
 		dp_flags |= DP_MAY_SET_IN_SYNC;
-
-	p.dp_flags = cpu_to_be32(dp_flags);
-	set_bit(UNPLUG_REMOTE, &mdev->flags);
-	ok = (sizeof(p) ==
-		drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
-	if (ok && dgs) {
-		dgb = mdev->int_dig_out;
-		drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
-		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+	if (mdev->tconn->agreed_pro_version >= 100) {
+		if (req->rq_state & RQ_EXP_RECEIVE_ACK)
+			dp_flags |= DP_SEND_RECEIVE_ACK;
+		if (req->rq_state & RQ_EXP_WRITE_ACK)
+			dp_flags |= DP_SEND_WRITE_ACK;
 	}
-	if (ok) {
+	p->dp_flags = cpu_to_be32(dp_flags);
+	if (dgs)
+		drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1);
+	err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
+	if (!err) {
 		/* For protocol A, we have to memcpy the payload into
 		 * socket buffers, as we may complete right away
 		 * as soon as we handed it over to tcp, at which point the data
@@ -2883,92 +1646,76 @@
 		 * out ok after sending on this side, but does not fit on the
 		 * receiving side, we sure have detected corruption elsewhere.
 		 */
-		if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
-			ok = _drbd_send_bio(mdev, req->master_bio);
+		if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs)
+			err = _drbd_send_bio(mdev, req->master_bio);
 		else
-			ok = _drbd_send_zc_bio(mdev, req->master_bio);
+			err = _drbd_send_zc_bio(mdev, req->master_bio);
 
 		/* double check digest, sometimes buffers have been modified in flight. */
 		if (dgs > 0 && dgs <= 64) {
 			/* 64 byte, 512 bit, is the largest digest size
 			 * currently supported in kernel crypto. */
 			unsigned char digest[64];
-			drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
-			if (memcmp(mdev->int_dig_out, digest, dgs)) {
+			drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest);
+			if (memcmp(p + 1, digest, dgs)) {
 				dev_warn(DEV,
 					"Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
-					(unsigned long long)req->sector, req->size);
+					(unsigned long long)req->i.sector, req->i.size);
 			}
 		} /* else if (dgs > 64) {
 		     ... Be noisy about digest too large ...
 		} */
 	}
+	mutex_unlock(&sock->mutex);  /* locked by drbd_prepare_command() */
 
-	drbd_put_data_sock(mdev);
-
-	return ok;
+	return err;
 }
 
 /* answer packet, used to send data back for read requests:
  *  Peer       -> (diskless) R_PRIMARY   (P_DATA_REPLY)
  *  C_SYNC_SOURCE -> C_SYNC_TARGET         (P_RS_DATA_REPLY)
  */
-int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
-		    struct drbd_epoch_entry *e)
+int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
+		    struct drbd_peer_request *peer_req)
 {
-	int ok;
-	struct p_data p;
-	void *dgb;
+	struct drbd_socket *sock;
+	struct p_data *p;
+	int err;
 	int dgs;
 
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
 
-	if (e->size <= DRBD_MAX_SIZE_H80_PACKET) {
-		p.head.h80.magic   = BE_DRBD_MAGIC;
-		p.head.h80.command = cpu_to_be16(cmd);
-		p.head.h80.length  =
-			cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
-	} else {
-		p.head.h95.magic   = BE_DRBD_MAGIC_BIG;
-		p.head.h95.command = cpu_to_be16(cmd);
-		p.head.h95.length  =
-			cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
-	}
+	dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
 
-	p.sector   = cpu_to_be64(e->sector);
-	p.block_id = e->block_id;
-	/* p.seq_num  = 0;    No sequence numbers here.. */
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(peer_req->i.sector);
+	p->block_id = peer_req->block_id;
+	p->seq_num = 0;  /* unused */
+	p->dp_flags = 0;
+	if (dgs)
+		drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1);
+	err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size);
+	if (!err)
+		err = _drbd_send_zc_ee(mdev, peer_req);
+	mutex_unlock(&sock->mutex);  /* locked by drbd_prepare_command() */
 
-	/* Only called by our kernel thread.
-	 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
-	 * in response to admin command or module unload.
-	 */
-	if (!drbd_get_data_sock(mdev))
-		return 0;
-
-	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
-	if (ok && dgs) {
-		dgb = mdev->int_dig_out;
-		drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
-		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
-	}
-	if (ok)
-		ok = _drbd_send_zc_ee(mdev, e);
-
-	drbd_put_data_sock(mdev);
-
-	return ok;
+	return err;
 }
 
-int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req)
 {
-	struct p_block_desc p;
+	struct drbd_socket *sock;
+	struct p_block_desc *p;
 
-	p.sector  = cpu_to_be64(req->sector);
-	p.blksize = cpu_to_be32(req->size);
-
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(req->i.sector);
+	p->blksize = cpu_to_be32(req->i.size);
+	return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0);
 }
 
 /*
@@ -2987,7 +1734,7 @@
 /*
  * you must have down()ed the appropriate [m]sock_mutex elsewhere!
  */
-int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
 	      void *buf, size_t size, unsigned msg_flags)
 {
 	struct kvec iov;
@@ -2995,7 +1742,7 @@
 	int rv, sent = 0;
 
 	if (!sock)
-		return -1000;
+		return -EBADR;
 
 	/* THINK  if (signal_pending) return ... ? */
 
@@ -3008,9 +1755,11 @@
 	msg.msg_controllen = 0;
 	msg.msg_flags      = msg_flags | MSG_NOSIGNAL;
 
-	if (sock == mdev->data.socket) {
-		mdev->ko_count = mdev->net_conf->ko_count;
-		drbd_update_congested(mdev);
+	if (sock == tconn->data.socket) {
+		rcu_read_lock();
+		tconn->ko_count = rcu_dereference(tconn->net_conf)->ko_count;
+		rcu_read_unlock();
+		drbd_update_congested(tconn);
 	}
 	do {
 		/* STRANGE
@@ -3024,12 +1773,11 @@
  */
 		rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
 		if (rv == -EAGAIN) {
-			if (we_should_drop_the_connection(mdev, sock))
+			if (we_should_drop_the_connection(tconn, sock))
 				break;
 			else
 				continue;
 		}
-		D_ASSERT(rv != 0);
 		if (rv == -EINTR) {
 			flush_signals(current);
 			rv = 0;
@@ -3041,22 +1789,40 @@
 		iov.iov_len  -= rv;
 	} while (sent < size);
 
-	if (sock == mdev->data.socket)
-		clear_bit(NET_CONGESTED, &mdev->flags);
+	if (sock == tconn->data.socket)
+		clear_bit(NET_CONGESTED, &tconn->flags);
 
 	if (rv <= 0) {
 		if (rv != -EAGAIN) {
-			dev_err(DEV, "%s_sendmsg returned %d\n",
-			    sock == mdev->meta.socket ? "msock" : "sock",
-			    rv);
-			drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+			conn_err(tconn, "%s_sendmsg returned %d\n",
+				 sock == tconn->meta.socket ? "msock" : "sock",
+				 rv);
+			conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
 		} else
-			drbd_force_state(mdev, NS(conn, C_TIMEOUT));
+			conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD);
 	}
 
 	return sent;
 }
 
+/**
+ * drbd_send_all  -  Send an entire buffer
+ *
+ * Returns 0 upon success and a negative error value otherwise.
+ */
+int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer,
+		  size_t size, unsigned msg_flags)
+{
+	int err;
+
+	err = drbd_send(tconn, sock, buffer, size, msg_flags);
+	if (err < 0)
+		return err;
+	if (err != size)
+		return -EIO;
+	return 0;
+}
+
 static int drbd_open(struct block_device *bdev, fmode_t mode)
 {
 	struct drbd_conf *mdev = bdev->bd_disk->private_data;
@@ -3064,7 +1830,7 @@
 	int rv = 0;
 
 	mutex_lock(&drbd_main_mutex);
-	spin_lock_irqsave(&mdev->req_lock, flags);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
 	/* to have a stable mdev->state.role
 	 * and no race with updating open_cnt */
 
@@ -3077,7 +1843,7 @@
 
 	if (!rv)
 		mdev->open_cnt++;
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 	mutex_unlock(&drbd_main_mutex);
 
 	return rv;
@@ -3094,35 +1860,14 @@
 
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
-	/* This way we get a compile error when sync_conf grows,
-	   and we forgot to initialize it here */
-	mdev->sync_conf = (struct syncer_conf) {
-		/* .rate = */		DRBD_RATE_DEF,
-		/* .after = */		DRBD_AFTER_DEF,
-		/* .al_extents = */	DRBD_AL_EXTENTS_DEF,
-		/* .verify_alg = */	{}, 0,
-		/* .cpu_mask = */	{}, 0,
-		/* .csums_alg = */	{}, 0,
-		/* .use_rle = */	0,
-		/* .on_no_data = */	DRBD_ON_NO_DATA_DEF,
-		/* .c_plan_ahead = */	DRBD_C_PLAN_AHEAD_DEF,
-		/* .c_delay_target = */	DRBD_C_DELAY_TARGET_DEF,
-		/* .c_fill_target = */	DRBD_C_FILL_TARGET_DEF,
-		/* .c_max_rate = */	DRBD_C_MAX_RATE_DEF,
-		/* .c_min_rate = */	DRBD_C_MIN_RATE_DEF
-	};
-
-	/* Have to use that way, because the layout differs between
-	   big endian and little endian */
-	mdev->state = (union drbd_state) {
+	/* Beware! The actual layout differs
+	 * between big endian and little endian */
+	mdev->state = (union drbd_dev_state) {
 		{ .role = R_SECONDARY,
 		  .peer = R_UNKNOWN,
 		  .conn = C_STANDALONE,
 		  .disk = D_DISKLESS,
 		  .pdsk = D_UNKNOWN,
-		  .susp = 0,
-		  .susp_nod = 0,
-		  .susp_fen = 0
 		} };
 }
 
@@ -3138,28 +1883,17 @@
 	atomic_set(&mdev->rs_pending_cnt, 0);
 	atomic_set(&mdev->unacked_cnt, 0);
 	atomic_set(&mdev->local_cnt, 0);
-	atomic_set(&mdev->net_cnt, 0);
-	atomic_set(&mdev->packet_seq, 0);
-	atomic_set(&mdev->pp_in_use, 0);
 	atomic_set(&mdev->pp_in_use_by_net, 0);
 	atomic_set(&mdev->rs_sect_in, 0);
 	atomic_set(&mdev->rs_sect_ev, 0);
 	atomic_set(&mdev->ap_in_flight, 0);
 	atomic_set(&mdev->md_io_in_use, 0);
 
-	mutex_init(&mdev->data.mutex);
-	mutex_init(&mdev->meta.mutex);
-	sema_init(&mdev->data.work.s, 0);
-	sema_init(&mdev->meta.work.s, 0);
-	mutex_init(&mdev->state_mutex);
-
-	spin_lock_init(&mdev->data.work.q_lock);
-	spin_lock_init(&mdev->meta.work.q_lock);
+	mutex_init(&mdev->own_state_mutex);
+	mdev->state_mutex = &mdev->own_state_mutex;
 
 	spin_lock_init(&mdev->al_lock);
-	spin_lock_init(&mdev->req_lock);
 	spin_lock_init(&mdev->peer_seq_lock);
-	spin_lock_init(&mdev->epoch_lock);
 
 	INIT_LIST_HEAD(&mdev->active_ee);
 	INIT_LIST_HEAD(&mdev->sync_ee);
@@ -3167,8 +1901,6 @@
 	INIT_LIST_HEAD(&mdev->read_ee);
 	INIT_LIST_HEAD(&mdev->net_ee);
 	INIT_LIST_HEAD(&mdev->resync_reads);
-	INIT_LIST_HEAD(&mdev->data.work.q);
-	INIT_LIST_HEAD(&mdev->meta.work.q);
 	INIT_LIST_HEAD(&mdev->resync_work.list);
 	INIT_LIST_HEAD(&mdev->unplug_work.list);
 	INIT_LIST_HEAD(&mdev->go_diskless.list);
@@ -3182,6 +1914,14 @@
 	mdev->md_sync_work.cb = w_md_sync;
 	mdev->bm_io_work.w.cb = w_bitmap_io;
 	mdev->start_resync_work.cb = w_start_resync;
+
+	mdev->resync_work.mdev  = mdev;
+	mdev->unplug_work.mdev  = mdev;
+	mdev->go_diskless.mdev  = mdev;
+	mdev->md_sync_work.mdev = mdev;
+	mdev->bm_io_work.w.mdev = mdev;
+	mdev->start_resync_work.mdev = mdev;
+
 	init_timer(&mdev->resync_timer);
 	init_timer(&mdev->md_sync_timer);
 	init_timer(&mdev->start_resync_timer);
@@ -3197,17 +1937,10 @@
 
 	init_waitqueue_head(&mdev->misc_wait);
 	init_waitqueue_head(&mdev->state_wait);
-	init_waitqueue_head(&mdev->net_cnt_wait);
 	init_waitqueue_head(&mdev->ee_wait);
 	init_waitqueue_head(&mdev->al_wait);
 	init_waitqueue_head(&mdev->seq_wait);
 
-	drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
-	drbd_thread_init(mdev, &mdev->worker, drbd_worker);
-	drbd_thread_init(mdev, &mdev->asender, drbd_asender);
-
-	mdev->agreed_pro_version = PRO_VERSION_MAX;
-	mdev->write_ordering = WO_bdev_flush;
 	mdev->resync_wenr = LC_FREE;
 	mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
 	mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
@@ -3216,13 +1949,10 @@
 void drbd_mdev_cleanup(struct drbd_conf *mdev)
 {
 	int i;
-	if (mdev->receiver.t_state != None)
+	if (mdev->tconn->receiver.t_state != NONE)
 		dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
-				mdev->receiver.t_state);
+				mdev->tconn->receiver.t_state);
 
-	/* no need to lock it, I'm the only thread alive */
-	if (atomic_read(&mdev->current_epoch->epoch_size) !=  0)
-		dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
 	mdev->al_writ_cnt  =
 	mdev->bm_writ_cnt  =
 	mdev->read_cnt     =
@@ -3239,7 +1969,7 @@
 		mdev->rs_mark_left[i] = 0;
 		mdev->rs_mark_time[i] = 0;
 	}
-	D_ASSERT(mdev->net_conf == NULL);
+	D_ASSERT(mdev->tconn->net_conf == NULL);
 
 	drbd_set_my_capacity(mdev, 0);
 	if (mdev->bitmap) {
@@ -3248,21 +1978,18 @@
 		drbd_bm_cleanup(mdev);
 	}
 
-	drbd_free_resources(mdev);
+	drbd_free_bc(mdev->ldev);
+	mdev->ldev = NULL;
+
 	clear_bit(AL_SUSPENDED, &mdev->flags);
 
-	/*
-	 * currently we drbd_init_ee only on module load, so
-	 * we may do drbd_release_ee only on module unload!
-	 */
 	D_ASSERT(list_empty(&mdev->active_ee));
 	D_ASSERT(list_empty(&mdev->sync_ee));
 	D_ASSERT(list_empty(&mdev->done_ee));
 	D_ASSERT(list_empty(&mdev->read_ee));
 	D_ASSERT(list_empty(&mdev->net_ee));
 	D_ASSERT(list_empty(&mdev->resync_reads));
-	D_ASSERT(list_empty(&mdev->data.work.q));
-	D_ASSERT(list_empty(&mdev->meta.work.q));
+	D_ASSERT(list_empty(&mdev->tconn->sender_work.q));
 	D_ASSERT(list_empty(&mdev->resync_work.list));
 	D_ASSERT(list_empty(&mdev->unplug_work.list));
 	D_ASSERT(list_empty(&mdev->go_diskless.list));
@@ -3336,7 +2063,7 @@
 		goto Enomem;
 
 	drbd_ee_cache = kmem_cache_create(
-		"drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
+		"drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
 	if (drbd_ee_cache == NULL)
 		goto Enomem;
 
@@ -3351,11 +2078,9 @@
 		goto Enomem;
 
 	/* mempools */
-#ifdef COMPAT_HAVE_BIOSET_CREATE
 	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
-#endif
 
 	drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
 	if (drbd_md_io_page_pool == NULL)
@@ -3404,73 +2129,53 @@
 	.notifier_call = drbd_notify_sys,
 };
 
-static void drbd_release_ee_lists(struct drbd_conf *mdev)
+static void drbd_release_all_peer_reqs(struct drbd_conf *mdev)
 {
 	int rr;
 
-	rr = drbd_release_ee(mdev, &mdev->active_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->active_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in active list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->sync_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->sync_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in sync list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->read_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->read_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in read list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->done_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->done_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in done list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->net_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->net_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in net list found!\n", rr);
 }
 
-/* caution. no locking.
- * currently only used from module cleanup code. */
-static void drbd_delete_device(unsigned int minor)
+/* caution. no locking. */
+void drbd_minor_destroy(struct kref *kref)
 {
-	struct drbd_conf *mdev = minor_to_mdev(minor);
-
-	if (!mdev)
-		return;
+	struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref);
+	struct drbd_tconn *tconn = mdev->tconn;
 
 	del_timer_sync(&mdev->request_timer);
 
 	/* paranoia asserts */
-	if (mdev->open_cnt != 0)
-		dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
-				__FILE__ , __LINE__);
-
-	ERR_IF (!list_empty(&mdev->data.work.q)) {
-		struct list_head *lp;
-		list_for_each(lp, &mdev->data.work.q) {
-			dev_err(DEV, "lp = %p\n", lp);
-		}
-	};
+	D_ASSERT(mdev->open_cnt == 0);
 	/* end paranoia asserts */
 
-	del_gendisk(mdev->vdisk);
-
 	/* cleanup stuff that may have been allocated during
 	 * device (re-)configuration or state changes */
 
 	if (mdev->this_bdev)
 		bdput(mdev->this_bdev);
 
-	drbd_free_resources(mdev);
+	drbd_free_bc(mdev->ldev);
+	mdev->ldev = NULL;
 
-	drbd_release_ee_lists(mdev);
-
-	/* should be freed on disconnect? */
-	kfree(mdev->ee_hash);
-	/*
-	mdev->ee_hash_s = 0;
-	mdev->ee_hash = NULL;
-	*/
+	drbd_release_all_peer_reqs(mdev);
 
 	lc_destroy(mdev->act_log);
 	lc_destroy(mdev->resync);
@@ -3478,19 +2183,101 @@
 	kfree(mdev->p_uuid);
 	/* mdev->p_uuid = NULL; */
 
-	kfree(mdev->int_dig_out);
-	kfree(mdev->int_dig_in);
-	kfree(mdev->int_dig_vv);
+	if (mdev->bitmap) /* should no longer be there. */
+		drbd_bm_cleanup(mdev);
+	__free_page(mdev->md_io_page);
+	put_disk(mdev->vdisk);
+	blk_cleanup_queue(mdev->rq_queue);
+	kfree(mdev->rs_plan_s);
+	kfree(mdev);
 
-	/* cleanup the rest that has been
-	 * allocated from drbd_new_device
-	 * and actually free the mdev itself */
-	drbd_free_mdev(mdev);
+	kref_put(&tconn->kref, &conn_destroy);
 }
 
+/* One global retry thread, if we need to push back some bio and have it
+ * reinserted through our make request function.
+ */
+static struct retry_worker {
+	struct workqueue_struct *wq;
+	struct work_struct worker;
+
+	spinlock_t lock;
+	struct list_head writes;
+} retry;
+
+static void do_retry(struct work_struct *ws)
+{
+	struct retry_worker *retry = container_of(ws, struct retry_worker, worker);
+	LIST_HEAD(writes);
+	struct drbd_request *req, *tmp;
+
+	spin_lock_irq(&retry->lock);
+	list_splice_init(&retry->writes, &writes);
+	spin_unlock_irq(&retry->lock);
+
+	list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
+		struct drbd_conf *mdev = req->w.mdev;
+		struct bio *bio = req->master_bio;
+		unsigned long start_time = req->start_time;
+		bool expected;
+
+		expected = 
+			expect(atomic_read(&req->completion_ref) == 0) &&
+			expect(req->rq_state & RQ_POSTPONED) &&
+			expect((req->rq_state & RQ_LOCAL_PENDING) == 0 ||
+				(req->rq_state & RQ_LOCAL_ABORTED) != 0);
+
+		if (!expected)
+			dev_err(DEV, "req=%p completion_ref=%d rq_state=%x\n",
+				req, atomic_read(&req->completion_ref),
+				req->rq_state);
+
+		/* We still need to put one kref associated with the
+		 * "completion_ref" going zero in the code path that queued it
+		 * here.  The request object may still be referenced by a
+		 * frozen local req->private_bio, in case we force-detached.
+		 */
+		kref_put(&req->kref, drbd_req_destroy);
+
+		/* A single suspended or otherwise blocking device may stall
+		 * all others as well.  Fortunately, this code path is to
+		 * recover from a situation that "should not happen":
+		 * concurrent writes in multi-primary setup.
+		 * In a "normal" lifecycle, this workqueue is supposed to be
+		 * destroyed without ever doing anything.
+		 * If it turns out to be an issue anyways, we can do per
+		 * resource (replication group) or per device (minor) retry
+		 * workqueues instead.
+		 */
+
+		/* We are not just doing generic_make_request(),
+		 * as we want to keep the start_time information. */
+		inc_ap_bio(mdev);
+		__drbd_make_request(mdev, bio, start_time);
+	}
+}
+
+void drbd_restart_request(struct drbd_request *req)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&retry.lock, flags);
+	list_move_tail(&req->tl_requests, &retry.writes);
+	spin_unlock_irqrestore(&retry.lock, flags);
+
+	/* Drop the extra reference that would otherwise
+	 * have been dropped by complete_master_bio.
+	 * do_retry() needs to grab a new one. */
+	dec_ap_bio(req->w.mdev);
+
+	queue_work(retry.wq, &retry.worker);
+}
+
+
 static void drbd_cleanup(void)
 {
 	unsigned int i;
+	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn, *tmp;
 
 	unregister_reboot_notifier(&drbd_notifier);
 
@@ -3505,19 +2292,31 @@
 	if (drbd_proc)
 		remove_proc_entry("drbd", NULL);
 
-	drbd_nl_cleanup();
+	if (retry.wq)
+		destroy_workqueue(retry.wq);
 
-	if (minor_table) {
-		i = minor_count;
-		while (i--)
-			drbd_delete_device(i);
-		drbd_destroy_mempools();
+	drbd_genl_unregister();
+
+	idr_for_each_entry(&minors, mdev, i) {
+		idr_remove(&minors, mdev_to_minor(mdev));
+		idr_remove(&mdev->tconn->volumes, mdev->vnr);
+		del_gendisk(mdev->vdisk);
+		/* synchronize_rcu(); No other threads running at this point */
+		kref_put(&mdev->kref, &drbd_minor_destroy);
 	}
 
-	kfree(minor_table);
+	/* not _rcu since, no other updater anymore. Genl already unregistered */
+	list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
+		list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */
+		/* synchronize_rcu(); */
+		kref_put(&tconn->kref, &conn_destroy);
+	}
 
+	drbd_destroy_mempools();
 	unregister_blkdev(DRBD_MAJOR, "drbd");
 
+	idr_destroy(&minors);
+
 	printk(KERN_INFO "drbd: module cleanup done.\n");
 }
 
@@ -3542,7 +2341,7 @@
 		goto out;
 	}
 
-	if (test_bit(CALLBACK_PENDING, &mdev->flags)) {
+	if (test_bit(CALLBACK_PENDING, &mdev->tconn->flags)) {
 		r |= (1 << BDI_async_congested);
 		/* Without good local data, we would need to read from remote,
 		 * and that would need the worker thread as well, which is
@@ -3566,7 +2365,7 @@
 			reason = 'b';
 	}
 
-	if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
+	if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) {
 		r |= (1 << BDI_async_congested);
 		reason = reason == 'b' ? 'a' : 'n';
 	}
@@ -3576,20 +2375,243 @@
 	return r;
 }
 
-struct drbd_conf *drbd_new_device(unsigned int minor)
+static void drbd_init_workqueue(struct drbd_work_queue* wq)
+{
+	spin_lock_init(&wq->q_lock);
+	INIT_LIST_HEAD(&wq->q);
+	init_waitqueue_head(&wq->q_wait);
+}
+
+struct drbd_tconn *conn_get_by_name(const char *name)
+{
+	struct drbd_tconn *tconn;
+
+	if (!name || !name[0])
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
+		if (!strcmp(tconn->name, name)) {
+			kref_get(&tconn->kref);
+			goto found;
+		}
+	}
+	tconn = NULL;
+found:
+	rcu_read_unlock();
+	return tconn;
+}
+
+struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+				     void *peer_addr, int peer_addr_len)
+{
+	struct drbd_tconn *tconn;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
+		if (tconn->my_addr_len == my_addr_len &&
+		    tconn->peer_addr_len == peer_addr_len &&
+		    !memcmp(&tconn->my_addr, my_addr, my_addr_len) &&
+		    !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) {
+			kref_get(&tconn->kref);
+			goto found;
+		}
+	}
+	tconn = NULL;
+found:
+	rcu_read_unlock();
+	return tconn;
+}
+
+static int drbd_alloc_socket(struct drbd_socket *socket)
+{
+	socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
+	if (!socket->rbuf)
+		return -ENOMEM;
+	socket->sbuf = (void *) __get_free_page(GFP_KERNEL);
+	if (!socket->sbuf)
+		return -ENOMEM;
+	return 0;
+}
+
+static void drbd_free_socket(struct drbd_socket *socket)
+{
+	free_page((unsigned long) socket->sbuf);
+	free_page((unsigned long) socket->rbuf);
+}
+
+void conn_free_crypto(struct drbd_tconn *tconn)
+{
+	drbd_free_sock(tconn);
+
+	crypto_free_hash(tconn->csums_tfm);
+	crypto_free_hash(tconn->verify_tfm);
+	crypto_free_hash(tconn->cram_hmac_tfm);
+	crypto_free_hash(tconn->integrity_tfm);
+	crypto_free_hash(tconn->peer_integrity_tfm);
+	kfree(tconn->int_dig_in);
+	kfree(tconn->int_dig_vv);
+
+	tconn->csums_tfm = NULL;
+	tconn->verify_tfm = NULL;
+	tconn->cram_hmac_tfm = NULL;
+	tconn->integrity_tfm = NULL;
+	tconn->peer_integrity_tfm = NULL;
+	tconn->int_dig_in = NULL;
+	tconn->int_dig_vv = NULL;
+}
+
+int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts)
+{
+	cpumask_var_t new_cpu_mask;
+	int err;
+
+	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+		/*
+		retcode = ERR_NOMEM;
+		drbd_msg_put_info("unable to allocate cpumask");
+		*/
+
+	/* silently ignore cpu mask on UP kernel */
+	if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
+		/* FIXME: Get rid of constant 32 here */
+		err = bitmap_parse(res_opts->cpu_mask, 32,
+				   cpumask_bits(new_cpu_mask), nr_cpu_ids);
+		if (err) {
+			conn_warn(tconn, "bitmap_parse() failed with %d\n", err);
+			/* retcode = ERR_CPU_MASK_PARSE; */
+			goto fail;
+		}
+	}
+	tconn->res_opts = *res_opts;
+	if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
+		cpumask_copy(tconn->cpu_mask, new_cpu_mask);
+		drbd_calc_cpu_mask(tconn);
+		tconn->receiver.reset_cpu_mask = 1;
+		tconn->asender.reset_cpu_mask = 1;
+		tconn->worker.reset_cpu_mask = 1;
+	}
+	err = 0;
+
+fail:
+	free_cpumask_var(new_cpu_mask);
+	return err;
+
+}
+
+/* caller must be under genl_lock() */
+struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts)
+{
+	struct drbd_tconn *tconn;
+
+	tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL);
+	if (!tconn)
+		return NULL;
+
+	tconn->name = kstrdup(name, GFP_KERNEL);
+	if (!tconn->name)
+		goto fail;
+
+	if (drbd_alloc_socket(&tconn->data))
+		goto fail;
+	if (drbd_alloc_socket(&tconn->meta))
+		goto fail;
+
+	if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
+		goto fail;
+
+	if (set_resource_options(tconn, res_opts))
+		goto fail;
+
+	tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
+	if (!tconn->current_epoch)
+		goto fail;
+
+	INIT_LIST_HEAD(&tconn->transfer_log);
+
+	INIT_LIST_HEAD(&tconn->current_epoch->list);
+	tconn->epochs = 1;
+	spin_lock_init(&tconn->epoch_lock);
+	tconn->write_ordering = WO_bdev_flush;
+
+	tconn->send.seen_any_write_yet = false;
+	tconn->send.current_epoch_nr = 0;
+	tconn->send.current_epoch_writes = 0;
+
+	tconn->cstate = C_STANDALONE;
+	mutex_init(&tconn->cstate_mutex);
+	spin_lock_init(&tconn->req_lock);
+	mutex_init(&tconn->conf_update);
+	init_waitqueue_head(&tconn->ping_wait);
+	idr_init(&tconn->volumes);
+
+	drbd_init_workqueue(&tconn->sender_work);
+	mutex_init(&tconn->data.mutex);
+	mutex_init(&tconn->meta.mutex);
+
+	drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver");
+	drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
+	drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
+
+	kref_init(&tconn->kref);
+	list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns);
+
+	return tconn;
+
+fail:
+	kfree(tconn->current_epoch);
+	free_cpumask_var(tconn->cpu_mask);
+	drbd_free_socket(&tconn->meta);
+	drbd_free_socket(&tconn->data);
+	kfree(tconn->name);
+	kfree(tconn);
+
+	return NULL;
+}
+
+void conn_destroy(struct kref *kref)
+{
+	struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref);
+
+	if (atomic_read(&tconn->current_epoch->epoch_size) !=  0)
+		conn_err(tconn, "epoch_size:%d\n", atomic_read(&tconn->current_epoch->epoch_size));
+	kfree(tconn->current_epoch);
+
+	idr_destroy(&tconn->volumes);
+
+	free_cpumask_var(tconn->cpu_mask);
+	drbd_free_socket(&tconn->meta);
+	drbd_free_socket(&tconn->data);
+	kfree(tconn->name);
+	kfree(tconn->int_dig_in);
+	kfree(tconn->int_dig_vv);
+	kfree(tconn);
+}
+
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
 {
 	struct drbd_conf *mdev;
 	struct gendisk *disk;
 	struct request_queue *q;
+	int vnr_got = vnr;
+	int minor_got = minor;
+	enum drbd_ret_code err = ERR_NOMEM;
+
+	mdev = minor_to_mdev(minor);
+	if (mdev)
+		return ERR_MINOR_EXISTS;
 
 	/* GFP_KERNEL, we are outside of all write-out paths */
 	mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
 	if (!mdev)
-		return NULL;
-	if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
-		goto out_no_cpumask;
+		return ERR_NOMEM;
+
+	kref_get(&tconn->kref);
+	mdev->tconn = tconn;
 
 	mdev->minor = minor;
+	mdev->vnr = vnr;
 
 	drbd_init_set_defaults(mdev);
 
@@ -3627,7 +2649,7 @@
 	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_merge_bvec(q, drbd_merge_bvec);
-	q->queue_lock = &mdev->req_lock;
+	q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */
 
 	mdev->md_io_page = alloc_page(GFP_KERNEL);
 	if (!mdev->md_io_page)
@@ -3635,30 +2657,44 @@
 
 	if (drbd_bm_init(mdev))
 		goto out_no_bitmap;
-	/* no need to lock access, we are still initializing this minor device. */
-	if (!tl_init(mdev))
-		goto out_no_tl;
+	mdev->read_requests = RB_ROOT;
+	mdev->write_requests = RB_ROOT;
 
-	mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
-	if (!mdev->app_reads_hash)
-		goto out_no_app_reads;
+	if (!idr_pre_get(&minors, GFP_KERNEL))
+		goto out_no_minor_idr;
+	if (idr_get_new_above(&minors, mdev, minor, &minor_got))
+		goto out_no_minor_idr;
+	if (minor_got != minor) {
+		err = ERR_MINOR_EXISTS;
+		drbd_msg_put_info("requested minor exists already");
+		goto out_idr_remove_minor;
+	}
 
-	mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
-	if (!mdev->current_epoch)
-		goto out_no_epoch;
+	if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
+		goto out_idr_remove_minor;
+	if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
+		goto out_idr_remove_minor;
+	if (vnr_got != vnr) {
+		err = ERR_INVALID_REQUEST;
+		drbd_msg_put_info("requested volume exists already");
+		goto out_idr_remove_vol;
+	}
+	add_disk(disk);
+	kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
 
-	INIT_LIST_HEAD(&mdev->current_epoch->list);
-	mdev->epochs = 1;
+	/* inherit the connection state */
+	mdev->state.conn = tconn->cstate;
+	if (mdev->state.conn == C_WF_REPORT_PARAMS)
+		drbd_connected(mdev);
 
-	return mdev;
+	return NO_ERROR;
 
-/* out_whatever_else:
-	kfree(mdev->current_epoch); */
-out_no_epoch:
-	kfree(mdev->app_reads_hash);
-out_no_app_reads:
-	tl_cleanup(mdev);
-out_no_tl:
+out_idr_remove_vol:
+	idr_remove(&tconn->volumes, vnr_got);
+out_idr_remove_minor:
+	idr_remove(&minors, minor_got);
+	synchronize_rcu();
+out_no_minor_idr:
 	drbd_bm_cleanup(mdev);
 out_no_bitmap:
 	__free_page(mdev->md_io_page);
@@ -3667,55 +2703,25 @@
 out_no_disk:
 	blk_cleanup_queue(q);
 out_no_q:
-	free_cpumask_var(mdev->cpu_mask);
-out_no_cpumask:
 	kfree(mdev);
-	return NULL;
+	kref_put(&tconn->kref, &conn_destroy);
+	return err;
 }
 
-/* counterpart of drbd_new_device.
- * last part of drbd_delete_device. */
-void drbd_free_mdev(struct drbd_conf *mdev)
-{
-	kfree(mdev->current_epoch);
-	kfree(mdev->app_reads_hash);
-	tl_cleanup(mdev);
-	if (mdev->bitmap) /* should no longer be there. */
-		drbd_bm_cleanup(mdev);
-	__free_page(mdev->md_io_page);
-	put_disk(mdev->vdisk);
-	blk_cleanup_queue(mdev->rq_queue);
-	free_cpumask_var(mdev->cpu_mask);
-	drbd_free_tl_hash(mdev);
-	kfree(mdev);
-}
-
-
 int __init drbd_init(void)
 {
 	int err;
 
-	if (sizeof(struct p_handshake) != 80) {
-		printk(KERN_ERR
-		       "drbd: never change the size or layout "
-		       "of the HandShake packet.\n");
-		return -EINVAL;
-	}
-
 	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
 		printk(KERN_ERR
-			"drbd: invalid minor_count (%d)\n", minor_count);
+		       "drbd: invalid minor_count (%d)\n", minor_count);
 #ifdef MODULE
 		return -EINVAL;
 #else
-		minor_count = 8;
+		minor_count = DRBD_MINOR_COUNT_DEF;
 #endif
 	}
 
-	err = drbd_nl_init();
-	if (err)
-		return err;
-
 	err = register_blkdev(DRBD_MAJOR, "drbd");
 	if (err) {
 		printk(KERN_ERR
@@ -3724,6 +2730,13 @@
 		return err;
 	}
 
+	err = drbd_genl_register();
+	if (err) {
+		printk(KERN_ERR "drbd: unable to register generic netlink family\n");
+		goto fail;
+	}
+
+
 	register_reboot_notifier(&drbd_notifier);
 
 	/*
@@ -3734,22 +2747,29 @@
 	init_waitqueue_head(&drbd_pp_wait);
 
 	drbd_proc = NULL; /* play safe for drbd_cleanup */
-	minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
-				GFP_KERNEL);
-	if (!minor_table)
-		goto Enomem;
+	idr_init(&minors);
 
 	err = drbd_create_mempools();
 	if (err)
-		goto Enomem;
+		goto fail;
 
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
-		goto Enomem;
+		goto fail;
 	}
 
 	rwlock_init(&global_state_lock);
+	INIT_LIST_HEAD(&drbd_tconns);
+
+	retry.wq = create_singlethread_workqueue("drbd-reissue");
+	if (!retry.wq) {
+		printk(KERN_ERR "drbd: unable to create retry workqueue\n");
+		goto fail;
+	}
+	INIT_WORK(&retry.worker, do_retry);
+	spin_lock_init(&retry.lock);
+	INIT_LIST_HEAD(&retry.writes);
 
 	printk(KERN_INFO "drbd: initialized. "
 	       "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
@@ -3757,11 +2777,10 @@
 	printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
 	printk(KERN_INFO "drbd: registered as block device major %d\n",
 		DRBD_MAJOR);
-	printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
 
 	return 0; /* Success! */
 
-Enomem:
+fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
 		/* currently always the case */
@@ -3782,47 +2801,42 @@
 	kfree(ldev);
 }
 
-void drbd_free_sock(struct drbd_conf *mdev)
+void drbd_free_sock(struct drbd_tconn *tconn)
 {
-	if (mdev->data.socket) {
-		mutex_lock(&mdev->data.mutex);
-		kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
-		sock_release(mdev->data.socket);
-		mdev->data.socket = NULL;
-		mutex_unlock(&mdev->data.mutex);
+	if (tconn->data.socket) {
+		mutex_lock(&tconn->data.mutex);
+		kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR);
+		sock_release(tconn->data.socket);
+		tconn->data.socket = NULL;
+		mutex_unlock(&tconn->data.mutex);
 	}
-	if (mdev->meta.socket) {
-		mutex_lock(&mdev->meta.mutex);
-		kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
-		sock_release(mdev->meta.socket);
-		mdev->meta.socket = NULL;
-		mutex_unlock(&mdev->meta.mutex);
+	if (tconn->meta.socket) {
+		mutex_lock(&tconn->meta.mutex);
+		kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR);
+		sock_release(tconn->meta.socket);
+		tconn->meta.socket = NULL;
+		mutex_unlock(&tconn->meta.mutex);
 	}
 }
 
-
-void drbd_free_resources(struct drbd_conf *mdev)
-{
-	crypto_free_hash(mdev->csums_tfm);
-	mdev->csums_tfm = NULL;
-	crypto_free_hash(mdev->verify_tfm);
-	mdev->verify_tfm = NULL;
-	crypto_free_hash(mdev->cram_hmac_tfm);
-	mdev->cram_hmac_tfm = NULL;
-	crypto_free_hash(mdev->integrity_w_tfm);
-	mdev->integrity_w_tfm = NULL;
-	crypto_free_hash(mdev->integrity_r_tfm);
-	mdev->integrity_r_tfm = NULL;
-
-	drbd_free_sock(mdev);
-
-	__no_warn(local,
-		  drbd_free_bc(mdev->ldev);
-		  mdev->ldev = NULL;);
-}
-
 /* meta data management */
 
+void conn_md_sync(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_md_sync(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+}
+
 struct meta_data_on_disk {
 	u64 la_size;           /* last agreed size. */
 	u64 uuid[UI_SIZE];   /* UUIDs. */
@@ -3833,7 +2847,7 @@
 	u32 md_size_sect;
 	u32 al_offset;         /* offset to this block */
 	u32 al_nr_extents;     /* important for restoring the AL */
-	      /* `-- act_log->nr_elements <-- sync_conf.al_extents */
+	      /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
 	u32 bm_offset;         /* offset to the bitmap, from here */
 	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
 	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
@@ -3871,7 +2885,7 @@
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
 	buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
-	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN);
 
 	buffer->md_size_sect  = cpu_to_be32(mdev->ldev->md.md_size_sect);
 	buffer->al_offset     = cpu_to_be32(mdev->ldev->md.al_offset);
@@ -3885,7 +2899,7 @@
 	D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
 	sector = mdev->ldev->md.md_offset;
 
-	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+	if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
 		/* this was a try anyways ... */
 		dev_err(DEV, "meta data update failed!\n");
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
@@ -3906,11 +2920,12 @@
  * @bdev:	Device from which the meta data should be read in.
  *
  * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
- * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ * something goes wrong.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
 	struct meta_data_on_disk *buffer;
+	u32 magic, flags;
 	int i, rv = NO_ERROR;
 
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
@@ -3920,7 +2935,7 @@
 	if (!buffer)
 		goto out;
 
-	if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
+	if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
 		/* NOTE: can't do normal error processing here as this is
 		   called BEFORE disk is attached */
 		dev_err(DEV, "Error while reading metadata.\n");
@@ -3928,8 +2943,20 @@
 		goto err;
 	}
 
-	if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
-		dev_err(DEV, "Error while reading metadata, magic not found.\n");
+	magic = be32_to_cpu(buffer->magic);
+	flags = be32_to_cpu(buffer->flags);
+	if (magic == DRBD_MD_MAGIC_84_UNCLEAN ||
+	    (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) {
+			/* btw: that's Activity Log clean, not "all" clean. */
+		dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n");
+		rv = ERR_MD_UNCLEAN;
+		goto err;
+	}
+	if (magic != DRBD_MD_MAGIC_08) {
+		if (magic == DRBD_MD_MAGIC_07)
+			dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
+		else
+			dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
 		rv = ERR_MD_INVALID;
 		goto err;
 	}
@@ -3963,20 +2990,16 @@
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
 	bdev->md.flags = be32_to_cpu(buffer->flags);
-	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
 	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	if (mdev->state.conn < C_CONNECTED) {
 		unsigned int peer;
 		peer = be32_to_cpu(buffer->la_peer_max_bio_size);
 		peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE);
 		mdev->peer_max_bio_size = peer;
 	}
-	spin_unlock_irq(&mdev->req_lock);
-
-	if (mdev->sync_conf.al_extents < 7)
-		mdev->sync_conf.al_extents = 127;
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
  err:
 	drbd_md_put_buffer(mdev);
@@ -4011,7 +3034,7 @@
 }
 #endif
 
-static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
+void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
 {
 	int i;
 
@@ -4019,7 +3042,7 @@
 		mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
 }
 
-void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 {
 	if (idx == UI_CURRENT) {
 		if (mdev->state.role == R_PRIMARY)
@@ -4034,14 +3057,24 @@
 	drbd_md_mark_dirty(mdev);
 }
 
+void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags);
+	__drbd_uuid_set(mdev, idx, val);
+	spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags);
+}
 
 void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags);
 	if (mdev->ldev->md.uuid[idx]) {
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
 	}
-	_drbd_uuid_set(mdev, idx, val);
+	__drbd_uuid_set(mdev, idx, val);
+	spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags);
 }
 
 /**
@@ -4054,15 +3087,20 @@
 void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 {
 	u64 val;
-	unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+	unsigned long long bm_uuid;
+
+	get_random_bytes(&val, sizeof(u64));
+
+	spin_lock_irq(&mdev->ldev->md.uuid_lock);
+	bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
 
 	if (bm_uuid)
 		dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
 	mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
+	__drbd_uuid_set(mdev, UI_CURRENT, val);
+	spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 
-	get_random_bytes(&val, sizeof(u64));
-	_drbd_uuid_set(mdev, UI_CURRENT, val);
 	drbd_print_uuids(mdev, "new current UUID");
 	/* get it to stable storage _now_ */
 	drbd_md_sync(mdev);
@@ -4070,9 +3108,11 @@
 
 void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
 {
+	unsigned long flags;
 	if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
 		return;
 
+	spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags);
 	if (val == 0) {
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
@@ -4084,6 +3124,8 @@
 
 		mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
 	}
+	spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags);
+
 	drbd_md_mark_dirty(mdev);
 }
 
@@ -4135,9 +3177,10 @@
 	return rv;
 }
 
-static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int w_bitmap_io(struct drbd_work *w, int unused)
 {
 	struct bm_io_work *work = container_of(w, struct bm_io_work, w);
+	struct drbd_conf *mdev = w->mdev;
 	int rv = -EIO;
 
 	D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
@@ -4149,8 +3192,7 @@
 		put_ldev(mdev);
 	}
 
-	clear_bit(BITMAP_IO, &mdev->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(BITMAP_IO, &mdev->flags);
 	wake_up(&mdev->misc_wait);
 
 	if (work->done)
@@ -4160,7 +3202,7 @@
 	work->why = NULL;
 	work->flags = 0;
 
-	return 1;
+	return 0;
 }
 
 void drbd_ldev_destroy(struct drbd_conf *mdev)
@@ -4173,29 +3215,51 @@
 		drbd_free_bc(mdev->ldev);
 		mdev->ldev = NULL;);
 
-	if (mdev->md_io_tmpp) {
-		__free_page(mdev->md_io_tmpp);
-		mdev->md_io_tmpp = NULL;
-	}
 	clear_bit(GO_DISKLESS, &mdev->flags);
 }
 
-static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int w_go_diskless(struct drbd_work *w, int unused)
 {
+	struct drbd_conf *mdev = w->mdev;
+
 	D_ASSERT(mdev->state.disk == D_FAILED);
 	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
 	 * the protected members anymore, though, so once put_ldev reaches zero
 	 * again, it will be safe to free them. */
+
+	/* Try to write changed bitmap pages, read errors may have just
+	 * set some bits outside the area covered by the activity log.
+	 *
+	 * If we have an IO error during the bitmap writeout,
+	 * we will want a full sync next time, just in case.
+	 * (Do we want a specific meta data flag for this?)
+	 *
+	 * If that does not make it to stable storage either,
+	 * we cannot do anything about that anymore.
+	 *
+	 * We still need to check if both bitmap and ldev are present, we may
+	 * end up here after a failed attach, before ldev was even assigned.
+	 */
+	if (mdev->bitmap && mdev->ldev) {
+		if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
+					"detach", BM_LOCKED_MASK)) {
+			if (test_bit(WAS_READ_ERROR, &mdev->flags)) {
+				drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+				drbd_md_sync(mdev);
+			}
+		}
+	}
+
 	drbd_force_state(mdev, NS(disk, D_DISKLESS));
-	return 1;
+	return 0;
 }
 
 void drbd_go_diskless(struct drbd_conf *mdev)
 {
 	D_ASSERT(mdev->state.disk == D_FAILED);
 	if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
-		drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
+		drbd_queue_work(&mdev->tconn->sender_work, &mdev->go_diskless);
 }
 
 /**
@@ -4215,7 +3279,7 @@
 			  void (*done)(struct drbd_conf *, int),
 			  char *why, enum bm_flag flags)
 {
-	D_ASSERT(current == mdev->worker.task);
+	D_ASSERT(current == mdev->tconn->worker.task);
 
 	D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
 	D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
@@ -4229,13 +3293,13 @@
 	mdev->bm_io_work.why = why;
 	mdev->bm_io_work.flags = flags;
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	set_bit(BITMAP_IO, &mdev->flags);
 	if (atomic_read(&mdev->ap_bio_cnt) == 0) {
 		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
-			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+			drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w);
 	}
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 }
 
 /**
@@ -4252,7 +3316,7 @@
 {
 	int rv;
 
-	D_ASSERT(current != mdev->worker.task);
+	D_ASSERT(current != mdev->tconn->worker.task);
 
 	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
 		drbd_suspend_io(mdev);
@@ -4291,18 +3355,127 @@
 {
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
 
-	drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
+	/* must not double-queue! */
+	if (list_empty(&mdev->md_sync_work.list))
+		drbd_queue_work_front(&mdev->tconn->sender_work, &mdev->md_sync_work);
 }
 
-static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int w_md_sync(struct drbd_work *w, int unused)
 {
+	struct drbd_conf *mdev = w->mdev;
+
 	dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
 #ifdef DEBUG
 	dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
 		mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
 #endif
 	drbd_md_sync(mdev);
-	return 1;
+	return 0;
+}
+
+const char *cmdname(enum drbd_packet cmd)
+{
+	/* THINK may need to become several global tables
+	 * when we want to support more than
+	 * one PRO_VERSION */
+	static const char *cmdnames[] = {
+		[P_DATA]	        = "Data",
+		[P_DATA_REPLY]	        = "DataReply",
+		[P_RS_DATA_REPLY]	= "RSDataReply",
+		[P_BARRIER]	        = "Barrier",
+		[P_BITMAP]	        = "ReportBitMap",
+		[P_BECOME_SYNC_TARGET]  = "BecomeSyncTarget",
+		[P_BECOME_SYNC_SOURCE]  = "BecomeSyncSource",
+		[P_UNPLUG_REMOTE]	= "UnplugRemote",
+		[P_DATA_REQUEST]	= "DataRequest",
+		[P_RS_DATA_REQUEST]     = "RSDataRequest",
+		[P_SYNC_PARAM]	        = "SyncParam",
+		[P_SYNC_PARAM89]	= "SyncParam89",
+		[P_PROTOCOL]            = "ReportProtocol",
+		[P_UUIDS]	        = "ReportUUIDs",
+		[P_SIZES]	        = "ReportSizes",
+		[P_STATE]	        = "ReportState",
+		[P_SYNC_UUID]           = "ReportSyncUUID",
+		[P_AUTH_CHALLENGE]      = "AuthChallenge",
+		[P_AUTH_RESPONSE]	= "AuthResponse",
+		[P_PING]		= "Ping",
+		[P_PING_ACK]	        = "PingAck",
+		[P_RECV_ACK]	        = "RecvAck",
+		[P_WRITE_ACK]	        = "WriteAck",
+		[P_RS_WRITE_ACK]	= "RSWriteAck",
+		[P_SUPERSEDED]          = "Superseded",
+		[P_NEG_ACK]	        = "NegAck",
+		[P_NEG_DREPLY]	        = "NegDReply",
+		[P_NEG_RS_DREPLY]	= "NegRSDReply",
+		[P_BARRIER_ACK]	        = "BarrierAck",
+		[P_STATE_CHG_REQ]       = "StateChgRequest",
+		[P_STATE_CHG_REPLY]     = "StateChgReply",
+		[P_OV_REQUEST]          = "OVRequest",
+		[P_OV_REPLY]            = "OVReply",
+		[P_OV_RESULT]           = "OVResult",
+		[P_CSUM_RS_REQUEST]     = "CsumRSRequest",
+		[P_RS_IS_IN_SYNC]	= "CsumRSIsInSync",
+		[P_COMPRESSED_BITMAP]   = "CBitmap",
+		[P_DELAY_PROBE]         = "DelayProbe",
+		[P_OUT_OF_SYNC]		= "OutOfSync",
+		[P_RETRY_WRITE]		= "RetryWrite",
+		[P_RS_CANCEL]		= "RSCancel",
+		[P_CONN_ST_CHG_REQ]	= "conn_st_chg_req",
+		[P_CONN_ST_CHG_REPLY]	= "conn_st_chg_reply",
+		[P_RETRY_WRITE]		= "retry_write",
+		[P_PROTOCOL_UPDATE]	= "protocol_update",
+
+		/* enum drbd_packet, but not commands - obsoleted flags:
+		 *	P_MAY_IGNORE
+		 *	P_MAX_OPT_CMD
+		 */
+	};
+
+	/* too big for the array: 0xfffX */
+	if (cmd == P_INITIAL_META)
+		return "InitialMeta";
+	if (cmd == P_INITIAL_DATA)
+		return "InitialData";
+	if (cmd == P_CONNECTION_FEATURES)
+		return "ConnectionFeatures";
+	if (cmd >= ARRAY_SIZE(cmdnames))
+		return "Unknown";
+	return cmdnames[cmd];
+}
+
+/**
+ * drbd_wait_misc  -  wait for a request to make progress
+ * @mdev:	device associated with the request
+ * @i:		the struct drbd_interval embedded in struct drbd_request or
+ *		struct drbd_peer_request
+ */
+int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i)
+{
+	struct net_conf *nc;
+	DEFINE_WAIT(wait);
+	long timeout;
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
+		return -ETIMEDOUT;
+	}
+	timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT;
+	rcu_read_unlock();
+
+	/* Indicate to wake up mdev->misc_wait on progress.  */
+	i->waiting = true;
+	prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	timeout = schedule_timeout(timeout);
+	finish_wait(&mdev->misc_wait, &wait);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (!timeout || mdev->state.conn < C_CONNECTED)
+		return -ETIMEDOUT;
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	return 0;
 }
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index edb490a..2af26fc 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c

@@ -29,159 +29,317 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/slab.h>
-#include <linux/connector.h>
 #include <linux/blkpg.h>
 #include <linux/cpumask.h>
 #include "drbd_int.h"
 #include "drbd_req.h"
 #include "drbd_wrappers.h"
 #include <asm/unaligned.h>
-#include <linux/drbd_tag_magic.h>
 #include <linux/drbd_limits.h>
-#include <linux/compiler.h>
 #include <linux/kthread.h>
 
-static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
-static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
-static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
+#include <net/genetlink.h>
 
-/* see get_sb_bdev and bd_claim */
+/* .doit */
+// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
+// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
+/* .dumpit */
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+
+#include <linux/drbd_genl_api.h>
+#include "drbd_nla.h"
+#include <linux/genl_magic_func.h>
+
+/* used blkdev_get_by_path, to claim our meta data device(s) */
 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
 
-/* Generate the tag_list to struct functions */
-#define NL_PACKET(name, number, fields) \
-static int name ## _from_tags(struct drbd_conf *mdev, \
-	unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
-static int name ## _from_tags(struct drbd_conf *mdev, \
-	unsigned short *tags, struct name *arg) \
-{ \
-	int tag; \
-	int dlen; \
-	\
-	while ((tag = get_unaligned(tags++)) != TT_END) {	\
-		dlen = get_unaligned(tags++);			\
-		switch (tag_number(tag)) { \
-		fields \
-		default: \
-			if (tag & T_MANDATORY) { \
-				dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \
-				return 0; \
-			} \
-		} \
-		tags = (unsigned short *)((char *)tags + dlen); \
-	} \
-	return 1; \
-}
-#define NL_INTEGER(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
-		arg->member = get_unaligned((int *)(tags));	\
-		break;
-#define NL_INT64(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
-		arg->member = get_unaligned((u64 *)(tags));	\
-		break;
-#define NL_BIT(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
-		arg->member = *(char *)(tags) ? 1 : 0; \
-		break;
-#define NL_STRING(pn, pr, member, len) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
-		if (dlen > len) { \
-			dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \
-				#member, dlen, (unsigned int)len); \
-			return 0; \
-		} \
-		 arg->member ## _len = dlen; \
-		 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
-		 break;
-#include <linux/drbd_nl.h>
+/* Configuration is strictly serialized, because generic netlink message
+ * processing is strictly serialized by the genl_lock().
+ * Which means we can use one static global drbd_config_context struct.
+ */
+static struct drbd_config_context {
+	/* assigned from drbd_genlmsghdr */
+	unsigned int minor;
+	/* assigned from request attributes, if present */
+	unsigned int volume;
+#define VOLUME_UNSPECIFIED		(-1U)
+	/* pointer into the request skb,
+	 * limited lifetime! */
+	char *resource_name;
+	struct nlattr *my_addr;
+	struct nlattr *peer_addr;
 
-/* Generate the struct to tag_list functions */
-#define NL_PACKET(name, number, fields) \
-static unsigned short* \
-name ## _to_tags(struct drbd_conf *mdev, \
-	struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
-static unsigned short* \
-name ## _to_tags(struct drbd_conf *mdev, \
-	struct name *arg, unsigned short *tags) \
-{ \
-	fields \
-	return tags; \
+	/* reply buffer */
+	struct sk_buff *reply_skb;
+	/* pointer into reply buffer */
+	struct drbd_genlmsghdr *reply_dh;
+	/* resolved from attributes, if possible */
+	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
+} adm_ctx;
+
+static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
+{
+	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
+	if (genlmsg_reply(skb, info))
+		printk(KERN_ERR "drbd: error sending genl reply\n");
 }
 
-#define NL_INTEGER(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INTEGER, tags++);	\
-	put_unaligned(sizeof(int), tags++);		\
-	put_unaligned(arg->member, (int *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(int));
-#define NL_INT64(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INT64, tags++);	\
-	put_unaligned(sizeof(u64), tags++);		\
-	put_unaligned(arg->member, (u64 *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(u64));
-#define NL_BIT(pn, pr, member) \
-	put_unaligned(pn | pr | TT_BIT, tags++);	\
-	put_unaligned(sizeof(char), tags++);		\
-	*(char *)tags = arg->member; \
-	tags = (unsigned short *)((char *)tags+sizeof(char));
-#define NL_STRING(pn, pr, member, len) \
-	put_unaligned(pn | pr | TT_STRING, tags++);	\
-	put_unaligned(arg->member ## _len, tags++);	\
-	memcpy(tags, arg->member, arg->member ## _len); \
-	tags = (unsigned short *)((char *)tags + arg->member ## _len);
-#include <linux/drbd_nl.h>
+/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
+ * reason it could fail was no space in skb, and there are 4k available. */
+int drbd_msg_put_info(const char *info)
+{
+	struct sk_buff *skb = adm_ctx.reply_skb;
+	struct nlattr *nla;
+	int err = -EMSGSIZE;
 
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
-void drbd_nl_send_reply(struct cn_msg *, int);
+	if (!info || !info[0])
+		return 0;
+
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+	if (!nla)
+		return err;
+
+	err = nla_put_string(skb, T_info_text, info);
+	if (err) {
+		nla_nest_cancel(skb, nla);
+		return err;
+	} else
+		nla_nest_end(skb, nla);
+	return 0;
+}
+
+/* This would be a good candidate for a "pre_doit" hook,
+ * and per-family private info->pointers.
+ * But we need to stay compatible with older kernels.
+ * If it returns successfully, adm_ctx members are valid.
+ */
+#define DRBD_ADM_NEED_MINOR	1
+#define DRBD_ADM_NEED_RESOURCE	2
+#define DRBD_ADM_NEED_CONNECTION 4
+static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
+		unsigned flags)
+{
+	struct drbd_genlmsghdr *d_in = info->userhdr;
+	const u8 cmd = info->genlhdr->cmd;
+	int err;
+
+	memset(&adm_ctx, 0, sizeof(adm_ctx));
+
+	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
+	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
+	       return -EPERM;
+
+	adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!adm_ctx.reply_skb) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
+					info, &drbd_genl_family, 0, cmd);
+	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
+	 * but anyways */
+	if (!adm_ctx.reply_dh) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	adm_ctx.reply_dh->minor = d_in->minor;
+	adm_ctx.reply_dh->ret_code = NO_ERROR;
+
+	adm_ctx.volume = VOLUME_UNSPECIFIED;
+	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
+		struct nlattr *nla;
+		/* parse and validate only */
+		err = drbd_cfg_context_from_attrs(NULL, info);
+		if (err)
+			goto fail;
+
+		/* It was present, and valid,
+		 * copy it over to the reply skb. */
+		err = nla_put_nohdr(adm_ctx.reply_skb,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]);
+		if (err)
+			goto fail;
+
+		/* and assign stuff to the global adm_ctx */
+		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
+		if (nla)
+			adm_ctx.volume = nla_get_u32(nla);
+		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
+		if (nla)
+			adm_ctx.resource_name = nla_data(nla);
+		adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
+		adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+		if ((adm_ctx.my_addr &&
+		     nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) ||
+		    (adm_ctx.peer_addr &&
+		     nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) {
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+
+	adm_ctx.minor = d_in->minor;
+	adm_ctx.mdev = minor_to_mdev(d_in->minor);
+	adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name);
+
+	if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
+		drbd_msg_put_info("unknown minor");
+		return ERR_MINOR_INVALID;
+	}
+	if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_RESOURCE)) {
+		drbd_msg_put_info("unknown resource");
+		return ERR_INVALID_REQUEST;
+	}
+
+	if (flags & DRBD_ADM_NEED_CONNECTION) {
+		if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) {
+			drbd_msg_put_info("no resource name expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.mdev) {
+			drbd_msg_put_info("no minor number expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.my_addr && adm_ctx.peer_addr)
+			adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
+							  nla_len(adm_ctx.my_addr),
+							  nla_data(adm_ctx.peer_addr),
+							  nla_len(adm_ctx.peer_addr));
+		if (!adm_ctx.tconn) {
+			drbd_msg_put_info("unknown connection");
+			return ERR_INVALID_REQUEST;
+		}
+	}
+
+	/* some more paranoia, if the request was over-determined */
+	if (adm_ctx.mdev && adm_ctx.tconn &&
+	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
+		pr_warning("request: minor=%u, resource=%s; but that minor belongs to connection %s\n",
+				adm_ctx.minor, adm_ctx.resource_name,
+				adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("minor exists in different resource");
+		return ERR_INVALID_REQUEST;
+	}
+	if (adm_ctx.mdev &&
+	    adm_ctx.volume != VOLUME_UNSPECIFIED &&
+	    adm_ctx.volume != adm_ctx.mdev->vnr) {
+		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
+				adm_ctx.minor, adm_ctx.volume,
+				adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("minor exists as different volume");
+		return ERR_INVALID_REQUEST;
+	}
+
+	return NO_ERROR;
+
+fail:
+	nlmsg_free(adm_ctx.reply_skb);
+	adm_ctx.reply_skb = NULL;
+	return err;
+}
+
+static int drbd_adm_finish(struct genl_info *info, int retcode)
+{
+	if (adm_ctx.tconn) {
+		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
+		adm_ctx.tconn = NULL;
+	}
+
+	if (!adm_ctx.reply_skb)
+		return -ENOMEM;
+
+	adm_ctx.reply_dh->ret_code = retcode;
+	drbd_adm_send_reply(adm_ctx.reply_skb, info);
+	return 0;
+}
+
+static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
+{
+	char *afs;
+
+	/* FIXME: A future version will not allow this case. */
+	if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0)
+		return;
+
+	switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) {
+	case AF_INET6:
+		afs = "ipv6";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
+			 &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr);
+		break;
+	case AF_INET:
+		afs = "ipv4";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
+		break;
+	default:
+		afs = "ssocks";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
+	}
+	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
+}
 
 int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 {
 	char *envp[] = { "HOME=/",
 			"TERM=linux",
 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
-			NULL, /* Will be set to address family */
-			NULL, /* Will be set to address */
+			 (char[20]) { }, /* address family */
+			 (char[60]) { }, /* address */
 			NULL };
-
-	char mb[12], af[20], ad[60], *afs;
+	char mb[12];
 	char *argv[] = {usermode_helper, cmd, mb, NULL };
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct sib_info sib;
 	int ret;
 
-	if (current == mdev->worker.task)
-		set_bit(CALLBACK_PENDING, &mdev->flags);
+	if (current == tconn->worker.task)
+		set_bit(CALLBACK_PENDING, &tconn->flags);
 
 	snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
-
-	if (get_net_conf(mdev)) {
-		switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
-		case AF_INET6:
-			afs = "ipv6";
-			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
-				 &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
-			break;
-		case AF_INET:
-			afs = "ipv4";
-			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
-			break;
-		default:
-			afs = "ssocks";
-			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
-		}
-		snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
-		envp[3]=af;
-		envp[4]=ad;
-		put_net_conf(mdev);
-	}
+	setup_khelper_env(tconn, envp);
 
 	/* The helper may take some time.
 	 * write out any unsynced meta data changes now */
 	drbd_md_sync(mdev);
 
 	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
-
-	drbd_bcast_ev_helper(mdev, cmd);
+	sib.sib_reason = SIB_HELPER_PRE;
+	sib.helper_name = cmd;
+	drbd_bcast_event(mdev, &sib);
 	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 	if (ret)
 		dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -191,9 +349,12 @@
 		dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
 				usermode_helper, cmd, mb,
 				(ret >> 8) & 0xff, ret);
+	sib.sib_reason = SIB_HELPER_POST;
+	sib.helper_exit_code = ret;
+	drbd_bcast_event(mdev, &sib);
 
-	if (current == mdev->worker.task)
-		clear_bit(CALLBACK_PENDING, &mdev->flags);
+	if (current == tconn->worker.task)
+		clear_bit(CALLBACK_PENDING, &tconn->flags);
 
 	if (ret < 0) /* Ignore any ERRNOs we got. */
 		ret = 0;
@@ -201,116 +362,163 @@
 	return ret;
 }
 
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+int conn_khelper(struct drbd_tconn *tconn, char *cmd)
 {
+	char *envp[] = { "HOME=/",
+			"TERM=linux",
+			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+			 (char[20]) { }, /* address family */
+			 (char[60]) { }, /* address */
+			NULL };
+	char *argv[] = {usermode_helper, cmd, tconn->name, NULL };
+	int ret;
+
+	setup_khelper_env(tconn, envp);
+	conn_md_sync(tconn);
+
+	conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name);
+	/* TODO: conn_bcast_event() ?? */
+
+	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+	if (ret)
+		conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
+			  usermode_helper, cmd, tconn->name,
+			  (ret >> 8) & 0xff, ret);
+	else
+		conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
+			  usermode_helper, cmd, tconn->name,
+			  (ret >> 8) & 0xff, ret);
+	/* TODO: conn_bcast_event() ?? */
+
+	if (ret < 0) /* Ignore any ERRNOs we got. */
+		ret = 0;
+
+	return ret;
+}
+
+static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
+{
+	enum drbd_fencing_p fp = FP_NOT_AVAIL;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+			fp = max_t(enum drbd_fencing_p, fp,
+				   rcu_dereference(mdev->ldev->disk_conf)->fencing);
+			put_ldev(mdev);
+		}
+	}
+	rcu_read_unlock();
+
+	return fp;
+}
+
+bool conn_try_outdate_peer(struct drbd_tconn *tconn)
+{
+	union drbd_state mask = { };
+	union drbd_state val = { };
+	enum drbd_fencing_p fp;
 	char *ex_to_string;
 	int r;
-	enum drbd_disk_state nps;
-	enum drbd_fencing_p fp;
 
-	D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
-
-	if (get_ldev_if_state(mdev, D_CONSISTENT)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	} else {
-		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
-		nps = mdev->state.pdsk;
-		goto out;
+	if (tconn->cstate >= C_WF_REPORT_PARAMS) {
+		conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
+		return false;
 	}
 
-	r = drbd_khelper(mdev, "fence-peer");
+	fp = highest_fencing_policy(tconn);
+	switch (fp) {
+	case FP_NOT_AVAIL:
+		conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
+		goto out;
+	case FP_DONT_CARE:
+		return true;
+	default: ;
+	}
+
+	r = conn_khelper(tconn, "fence-peer");
 
 	switch ((r>>8) & 0xff) {
 	case 3: /* peer is inconsistent */
 		ex_to_string = "peer is inconsistent or worse";
-		nps = D_INCONSISTENT;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_INCONSISTENT;
 		break;
 	case 4: /* peer got outdated, or was already outdated */
 		ex_to_string = "peer was fenced";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	case 5: /* peer was down */
-		if (mdev->state.disk == D_UP_TO_DATE) {
+		if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
 			/* we will(have) create(d) a new UUID anyways... */
 			ex_to_string = "peer is unreachable, assumed to be dead";
-			nps = D_OUTDATED;
+			mask.pdsk = D_MASK;
+			val.pdsk = D_OUTDATED;
 		} else {
 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
-			nps = mdev->state.pdsk;
 		}
 		break;
 	case 6: /* Peer is primary, voluntarily outdate myself.
 		 * This is useful when an unconnected R_SECONDARY is asked to
 		 * become R_PRIMARY, but finds the other peer being active. */
 		ex_to_string = "peer is active";
-		dev_warn(DEV, "Peer is primary, outdating myself.\n");
-		nps = D_UNKNOWN;
-		_drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+		conn_warn(tconn, "Peer is primary, outdating myself.\n");
+		mask.disk = D_MASK;
+		val.disk = D_OUTDATED;
 		break;
 	case 7:
 		if (fp != FP_STONITH)
-			dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+			conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
 		ex_to_string = "peer was stonithed";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	default:
 		/* The script is broken ... */
-		nps = D_UNKNOWN;
-		dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
-		return nps;
+		conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+		return false; /* Eventually leave IO frozen */
 	}
 
-	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
-			(r>>8) & 0xff, ex_to_string);
+	conn_info(tconn, "fence-peer helper returned %d (%s)\n",
+		  (r>>8) & 0xff, ex_to_string);
 
-out:
-	if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
-		/* The handler was not successful... unfreeze here, the
-		   state engine can not unfreeze... */
-		_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
-	}
+ out:
 
-	return nps;
+	/* Not using
+	   conn_request_state(tconn, mask, val, CS_VERBOSE);
+	   here, because we might were able to re-establish the connection in the
+	   meantime. */
+	spin_lock_irq(&tconn->req_lock);
+	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
+		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	spin_unlock_irq(&tconn->req_lock);
+
+	return conn_highest_pdsk(tconn) <= D_OUTDATED;
 }
 
 static int _try_outdate_peer_async(void *data)
 {
-	struct drbd_conf *mdev = (struct drbd_conf *)data;
-	enum drbd_disk_state nps;
-	union drbd_state ns;
+	struct drbd_tconn *tconn = (struct drbd_tconn *)data;
 
-	nps = drbd_try_outdate_peer(mdev);
+	conn_try_outdate_peer(tconn);
 
-	/* Not using
-	   drbd_request_state(mdev, NS(pdsk, nps));
-	   here, because we might were able to re-establish the connection
-	   in the meantime. This can only partially be solved in the state's
-	   engine is_valid_state() and is_valid_state_transition()
-	   functions.
-
-	   nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
-	   pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
-	   therefore we have to have the pre state change check here.
-	*/
-	spin_lock_irq(&mdev->req_lock);
-	ns = mdev->state;
-	if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) {
-		ns.pdsk = nps;
-		_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
+	kref_put(&tconn->kref, &conn_destroy);
 	return 0;
 }
 
-void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
+void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
 {
 	struct task_struct *opa;
 
-	opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
-	if (IS_ERR(opa))
-		dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
+	kref_get(&tconn->kref);
+	opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
+	if (IS_ERR(opa)) {
+		conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
+		kref_put(&tconn->kref, &conn_destroy);
+	}
 }
 
 enum drbd_state_rv
@@ -318,15 +526,15 @@
 {
 	const int max_tries = 4;
 	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
+	struct net_conf *nc;
 	int try = 0;
 	int forced = 0;
 	union drbd_state mask, val;
-	enum drbd_disk_state nps;
 
 	if (new_role == R_PRIMARY)
-		request_ping(mdev); /* Detect a dead peer ASAP */
+		request_ping(mdev->tconn); /* Detect a dead peer ASAP */
 
-	mutex_lock(&mdev->state_mutex);
+	mutex_lock(mdev->state_mutex);
 
 	mask.i = 0; mask.role = R_MASK;
 	val.i  = 0; val.role  = new_role;
@@ -354,38 +562,34 @@
 		if (rv == SS_NO_UP_TO_DATE_DISK &&
 		    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 			D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
-			nps = drbd_try_outdate_peer(mdev);
 
-			if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+			if (conn_try_outdate_peer(mdev->tconn)) {
 				val.disk = D_UP_TO_DATE;
 				mask.disk = D_MASK;
 			}
-
-			val.pdsk = nps;
-			mask.pdsk = D_MASK;
-
 			continue;
 		}
 
 		if (rv == SS_NOTHING_TO_DO)
-			goto fail;
+			goto out;
 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
-			nps = drbd_try_outdate_peer(mdev);
-
-			if (force && nps > D_OUTDATED) {
+			if (!conn_try_outdate_peer(mdev->tconn) && force) {
 				dev_warn(DEV, "Forced into split brain situation!\n");
-				nps = D_OUTDATED;
+				mask.pdsk = D_MASK;
+				val.pdsk  = D_OUTDATED;
+
 			}
-
-			mask.pdsk = D_MASK;
-			val.pdsk  = nps;
-
 			continue;
 		}
 		if (rv == SS_TWO_PRIMARIES) {
 			/* Maybe the peer is detected as dead very soon...
 			   retry at most once more in this case. */
-			schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
+			int timeo;
+			rcu_read_lock();
+			nc = rcu_dereference(mdev->tconn->net_conf);
+			timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
+			rcu_read_unlock();
+			schedule_timeout_interruptible(timeo);
 			if (try < max_tries)
 				try = max_tries - 1;
 			continue;
@@ -394,13 +598,13 @@
 			rv = _drbd_request_state(mdev, mask, val,
 						CS_VERBOSE + CS_WAIT_COMPLETE);
 			if (rv < SS_SUCCESS)
-				goto fail;
+				goto out;
 		}
 		break;
 	}
 
 	if (rv < SS_SUCCESS)
-		goto fail;
+		goto out;
 
 	if (forced)
 		dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
@@ -408,6 +612,8 @@
 	/* Wait until nothing is on the fly :) */
 	wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
 
+	/* FIXME also wait for all pending P_BARRIER_ACK? */
+
 	if (new_role == R_SECONDARY) {
 		set_disk_ro(mdev->vdisk, true);
 		if (get_ldev(mdev)) {
@@ -415,10 +621,12 @@
 			put_ldev(mdev);
 		}
 	} else {
-		if (get_net_conf(mdev)) {
-			mdev->net_conf->want_lose = 0;
-			put_net_conf(mdev);
-		}
+		mutex_lock(&mdev->tconn->conf_update);
+		nc = mdev->tconn->net_conf;
+		if (nc)
+			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
+		mutex_unlock(&mdev->tconn->conf_update);
+
 		set_disk_ro(mdev->vdisk, false);
 		if (get_ldev(mdev)) {
 			if (((mdev->state.conn < C_CONNECTED ||
@@ -444,67 +652,47 @@
 	drbd_md_sync(mdev);
 
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
- fail:
-	mutex_unlock(&mdev->state_mutex);
+out:
+	mutex_unlock(mdev->state_mutex);
 	return rv;
 }
 
-static struct drbd_conf *ensure_mdev(int minor, int create)
+static const char *from_attrs_err_to_txt(int err)
 {
-	struct drbd_conf *mdev;
-
-	if (minor >= minor_count)
-		return NULL;
-
-	mdev = minor_to_mdev(minor);
-
-	if (!mdev && create) {
-		struct gendisk *disk = NULL;
-		mdev = drbd_new_device(minor);
-
-		spin_lock_irq(&drbd_pp_lock);
-		if (minor_table[minor] == NULL) {
-			minor_table[minor] = mdev;
-			disk = mdev->vdisk;
-			mdev = NULL;
-		} /* else: we lost the race */
-		spin_unlock_irq(&drbd_pp_lock);
-
-		if (disk) /* we won the race above */
-			/* in case we ever add a drbd_delete_device(),
-			 * don't forget the del_gendisk! */
-			add_disk(disk);
-		else /* we lost the race above */
-			drbd_free_mdev(mdev);
-
-		mdev = minor_to_mdev(minor);
-	}
-
-	return mdev;
+	return	err == -ENOMSG ? "required attribute missing" :
+		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
+		err == -EEXIST ? "can not change invariant setting" :
+		"invalid attribute value";
 }
 
-static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 {
-	struct primary primary_args;
+	struct set_role_parms parms;
+	int err;
+	enum drbd_ret_code retcode;
 
-	memset(&primary_args, 0, sizeof(struct primary));
-	if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
+		err = set_role_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
 	}
 
-	reply->ret_code =
-		drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
-
-	return 0;
-}
-
-static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
-{
-	reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
-
+	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
+		retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
+	else
+		retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -514,7 +702,12 @@
 				       struct drbd_backing_dev *bdev)
 {
 	sector_t md_size_sect = 0;
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+
+	switch (meta_dev_idx) {
 	default:
 		/* v07 style fixed size indexed meta data */
 		bdev->md.md_size_sect = MD_RESERVED_SECT;
@@ -533,7 +726,7 @@
 	case DRBD_MD_INDEX_FLEX_INT:
 		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
 		/* al size is still fixed */
-		bdev->md.al_offset = -MD_AL_MAX_SIZE;
+		bdev->md.al_offset = -MD_AL_SECTORS;
 		/* we need (slightly less than) ~ this much bitmap sectors: */
 		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
@@ -549,6 +742,7 @@
 		bdev->md.bm_offset   = -md_size_sect + MD_AL_OFFSET;
 		break;
 	}
+	rcu_read_unlock();
 }
 
 /* input size is expected to be in KB */
@@ -581,10 +775,16 @@
  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
  *  peer may not initiate a resize.
  */
+/* Note these are not to be confused with
+ * drbd_adm_suspend_io/drbd_adm_resume_io,
+ * which are (sub) state changes triggered by admin (drbdsetup),
+ * and can be long lived.
+ * This changes an mdev->flag, is triggered by drbd internals,
+ * and should be short-lived. */
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
 	set_bit(SUSPEND_IO, &mdev->flags);
-	if (is_susp(mdev->state))
+	if (drbd_suspended(mdev))
 		return;
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
 }
@@ -605,7 +805,7 @@
 enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
-	sector_t la_size;
+	sector_t la_size, u_size;
 	sector_t size;
 	char ppb[10];
 
@@ -633,7 +833,10 @@
 	/* TODO: should only be some assert here, not (re)init... */
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
-	size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED);
+	rcu_read_lock();
+	u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+	rcu_read_unlock();
+	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
@@ -696,12 +899,12 @@
 }
 
 sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+		  sector_t u_size, int assume_peer_has_space)
 {
 	sector_t p_size = mdev->p_size;   /* partner's disk size. */
 	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
 	sector_t m_size; /* my size */
-	sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
 	sector_t size = 0;
 
 	m_size = drbd_get_max_capacity(bdev);
@@ -750,24 +953,21 @@
  * failed, and 0 on success. You should call drbd_md_sync() after you called
  * this function.
  */
-static int drbd_check_al_size(struct drbd_conf *mdev)
+static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
 {
 	struct lru_cache *n, *t;
 	struct lc_element *e;
 	unsigned int in_use;
 	int i;
 
-	ERR_IF(mdev->sync_conf.al_extents < 7)
-		mdev->sync_conf.al_extents = 127;
-
 	if (mdev->act_log &&
-	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+	    mdev->act_log->nr_elements == dc->al_extents)
 		return 0;
 
 	in_use = 0;
 	t = mdev->act_log;
-	n = lc_create("act_log", drbd_al_ext_cache,
-		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
+		dc->al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
 		dev_err(DEV, "Cannot allocate act_log lru!\n");
@@ -808,7 +1008,9 @@
 		struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
 
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
-		max_segments = mdev->ldev->dc.max_bio_bvecs;
+		rcu_read_lock();
+		max_segments = rcu_dereference(mdev->ldev->disk_conf)->max_bio_bvecs;
+		rcu_read_unlock();
 		put_ldev(mdev);
 	}
 
@@ -852,12 +1054,14 @@
 	   Because new from 8.3.8 onwards the peer can use multiple
 	   BIOs for a single peer_request */
 	if (mdev->state.conn >= C_CONNECTED) {
-		if (mdev->agreed_pro_version < 94) {
-			peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+		if (mdev->tconn->agreed_pro_version < 94)
+			peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
 			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
-		} else if (mdev->agreed_pro_version == 94)
+		else if (mdev->tconn->agreed_pro_version == 94)
 			peer = DRBD_MAX_SIZE_H80_PACKET;
-		else /* drbd 8.3.8 onwards */
+		else if (mdev->tconn->agreed_pro_version < 100)
+			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
+		else
 			peer = DRBD_MAX_BIO_SIZE;
 	}
 
@@ -872,36 +1076,27 @@
 	drbd_setup_queue_param(mdev, new);
 }
 
-/* serialize deconfig (worker exiting, doing cleanup)
- * and reconfig (drbdsetup disk, drbdsetup net)
- *
- * Wait for a potentially exiting worker, then restart it,
- * or start a new one.  Flush any pending work, there may still be an
- * after_state_change queued.
- */
-static void drbd_reconfig_start(struct drbd_conf *mdev)
+/* Starts the worker thread */
+static void conn_reconfig_start(struct drbd_tconn *tconn)
 {
-	wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
-	wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
-	drbd_thread_start(&mdev->worker);
-	drbd_flush_workqueue(mdev);
+	drbd_thread_start(&tconn->worker);
+	conn_flush_workqueue(tconn);
 }
 
-/* if still unconfigured, stops worker again.
- * if configured now, clears CONFIG_PENDING.
- * wakes potential waiters */
-static void drbd_reconfig_done(struct drbd_conf *mdev)
+/* if still unconfigured, stops worker again. */
+static void conn_reconfig_done(struct drbd_tconn *tconn)
 {
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->state.disk == D_DISKLESS &&
-	    mdev->state.conn == C_STANDALONE &&
-	    mdev->state.role == R_SECONDARY) {
-		set_bit(DEVICE_DYING, &mdev->flags);
-		drbd_thread_stop_nowait(&mdev->worker);
-	} else
-		clear_bit(CONFIG_PENDING, &mdev->flags);
-	spin_unlock_irq(&mdev->req_lock);
-	wake_up(&mdev->state_wait);
+	bool stop_threads;
+	spin_lock_irq(&tconn->req_lock);
+	stop_threads = conn_all_vols_unconf(tconn) &&
+		tconn->cstate == C_STANDALONE;
+	spin_unlock_irq(&tconn->req_lock);
+	if (stop_threads) {
+		/* asender is implicitly stopped by receiver
+		 * in conn_disconnect() */
+		drbd_thread_stop(&tconn->receiver);
+		drbd_thread_stop(&tconn->worker);
+	}
 }
 
 /* Make sure IO is suspended before calling this function(). */
@@ -909,42 +1104,187 @@
 {
 	int s = 0;
 
-	if (lc_try_lock(mdev->act_log)) {
-		drbd_al_shrink(mdev);
-		lc_unlock(mdev->act_log);
-	} else {
+	if (!lc_try_lock(mdev->act_log)) {
 		dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
 		return;
 	}
 
-	spin_lock_irq(&mdev->req_lock);
+	drbd_al_shrink(mdev);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	if (mdev->state.conn < C_CONNECTED)
 		s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
-
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	lc_unlock(mdev->act_log);
 
 	if (s)
 		dev_info(DEV, "Suspended AL updates\n");
 }
 
-/* does always return 0;
- * interesting return code is in reply->ret_code */
-static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+
+static bool should_set_defaults(struct genl_info *info)
 {
+	unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
+	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
+}
+
+static void enforce_disk_conf_limits(struct disk_conf *dc)
+{
+	if (dc->al_extents < DRBD_AL_EXTENTS_MIN)
+		dc->al_extents = DRBD_AL_EXTENTS_MIN;
+	if (dc->al_extents > DRBD_AL_EXTENTS_MAX)
+		dc->al_extents = DRBD_AL_EXTENTS_MAX;
+
+	if (dc->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
+		dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
+}
+
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_conf *mdev;
+	struct disk_conf *new_disk_conf, *old_disk_conf;
+	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
+	int err, fifo_size;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+
+	/* we also need a disk
+	 * to change the options on */
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
+	if (!new_disk_conf) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	mutex_lock(&mdev->tconn->conf_update);
+	old_disk_conf = mdev->ldev->disk_conf;
+	*new_disk_conf = *old_disk_conf;
+	if (should_set_defaults(info))
+		set_disk_conf_defaults(new_disk_conf);
+
+	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+	}
+
+	if (!expect(new_disk_conf->resync_rate >= 1))
+		new_disk_conf->resync_rate = 1;
+
+	enforce_disk_conf_limits(new_disk_conf);
+
+	fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+	if (fifo_size != mdev->rs_plan_s->size) {
+		new_plan = fifo_alloc(fifo_size);
+		if (!new_plan) {
+			dev_err(DEV, "kmalloc of fifo_buffer failed");
+			retcode = ERR_NOMEM;
+			goto fail_unlock;
+		}
+	}
+
+	drbd_suspend_io(mdev);
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+	drbd_al_shrink(mdev);
+	err = drbd_check_al_size(mdev, new_disk_conf);
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+	drbd_resume_io(mdev);
+
+	if (err) {
+		retcode = ERR_NOMEM;
+		goto fail_unlock;
+	}
+
+	write_lock_irq(&global_state_lock);
+	retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after);
+	if (retcode == NO_ERROR) {
+		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+		drbd_resync_after_changed(mdev);
+	}
+	write_unlock_irq(&global_state_lock);
+
+	if (retcode != NO_ERROR)
+		goto fail_unlock;
+
+	if (new_plan) {
+		old_plan = mdev->rs_plan_s;
+		rcu_assign_pointer(mdev->rs_plan_s, new_plan);
+	}
+
+	mutex_unlock(&mdev->tconn->conf_update);
+
+	if (new_disk_conf->al_updates)
+		mdev->ldev->md.flags &= ~MDF_AL_DISABLED;
+	else
+		mdev->ldev->md.flags |= MDF_AL_DISABLED;
+
+	if (new_disk_conf->md_flushes)
+		clear_bit(MD_NO_FUA, &mdev->flags);
+	else
+		set_bit(MD_NO_FUA, &mdev->flags);
+
+	drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
+
+	drbd_md_sync(mdev);
+
+	if (mdev->state.conn >= C_CONNECTED)
+		drbd_send_sync_param(mdev);
+
+	synchronize_rcu();
+	kfree(old_disk_conf);
+	kfree(old_plan);
+	mod_timer(&mdev->request_timer, jiffies + HZ);
+	goto success;
+
+fail_unlock:
+	mutex_unlock(&mdev->tconn->conf_update);
+ fail:
+	kfree(new_disk_conf);
+	kfree(new_plan);
+success:
+	put_ldev(mdev);
+ out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
+{
+	struct drbd_conf *mdev;
+	int err;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	sector_t max_possible_sectors;
 	sector_t min_md_device_sectors;
 	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
+	struct disk_conf *new_disk_conf = NULL;
 	struct block_device *bdev;
 	struct lru_cache *resync_lru = NULL;
+	struct fifo_buffer *new_plan = NULL;
 	union drbd_state ns, os;
 	enum drbd_state_rv rv;
-	int cp_discovered = 0;
-	int logical_block_size;
+	struct net_conf *nc;
 
-	drbd_reconfig_start(mdev);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto finish;
+
+	mdev = adm_ctx.mdev;
+	conn_reconfig_start(mdev->tconn);
 
 	/* if you want to reconfigure, please tear down first */
 	if (mdev->state.disk > D_DISKLESS) {
@@ -959,47 +1299,65 @@
 
 	/* make sure there is no leftover from previous force-detach attempts */
 	clear_bit(FORCE_DETACH, &mdev->flags);
+	clear_bit(WAS_IO_ERROR, &mdev->flags);
+	clear_bit(WAS_READ_ERROR, &mdev->flags);
 
 	/* and no leftover from previously aborted resync or verify, either */
 	mdev->rs_total = 0;
 	mdev->rs_failed = 0;
 	atomic_set(&mdev->rs_pending_cnt, 0);
 
-	/* allocation not in the IO path, cqueue thread context */
+	/* allocation not in the IO path, drbdsetup context */
 	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
 	if (!nbc) {
 		retcode = ERR_NOMEM;
 		goto fail;
 	}
+	spin_lock_init(&nbc->md.uuid_lock);
 
-	nbc->dc.disk_size     = DRBD_DISK_SIZE_SECT_DEF;
-	nbc->dc.on_io_error   = DRBD_ON_IO_ERROR_DEF;
-	nbc->dc.fencing       = DRBD_FENCING_DEF;
-	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
+	new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
+	if (!new_disk_conf) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+	nbc->disk_conf = new_disk_conf;
 
-	if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) {
+	set_disk_conf_defaults(new_disk_conf);
+	err = disk_conf_from_attrs(new_disk_conf, info);
+	if (err) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+	enforce_disk_conf_limits(new_disk_conf);
+
+	new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
+	if (!new_plan) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
 		retcode = ERR_MD_IDX_INVALID;
 		goto fail;
 	}
 
-	if (get_net_conf(mdev)) {
-		int prot = mdev->net_conf->wire_protocol;
-		put_net_conf(mdev);
-		if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (nc) {
+		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
+			rcu_read_unlock();
 			retcode = ERR_STONITH_AND_PROT_A;
 			goto fail;
 		}
 	}
+	rcu_read_unlock();
 
-	bdev = blkdev_get_by_path(nbc->dc.backing_dev,
+	bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
 	if (IS_ERR(bdev)) {
-		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
 			PTR_ERR(bdev));
 		retcode = ERR_OPEN_DISK;
 		goto fail;
@@ -1014,12 +1372,12 @@
 	 * should check it for you already; but if you don't, or
 	 * someone fooled it, we need to double check here)
 	 */
-	bdev = blkdev_get_by_path(nbc->dc.meta_dev,
+	bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-				  (nbc->dc.meta_dev_idx < 0) ?
+				  (new_disk_conf->meta_dev_idx < 0) ?
 				  (void *)mdev : (void *)drbd_m_holder);
 	if (IS_ERR(bdev)) {
-		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
 			PTR_ERR(bdev));
 		retcode = ERR_OPEN_MD_DISK;
 		goto fail;
@@ -1027,14 +1385,14 @@
 	nbc->md_bdev = bdev;
 
 	if ((nbc->backing_bdev == nbc->md_bdev) !=
-	    (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
-	     nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
+	    (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+	     new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
 		retcode = ERR_MD_IDX_INVALID;
 		goto fail;
 	}
 
 	resync_lru = lc_create("resync", drbd_bm_ext_cache,
-			61, sizeof(struct bm_extent),
+			1, 61, sizeof(struct bm_extent),
 			offsetof(struct bm_extent, lce));
 	if (!resync_lru) {
 		retcode = ERR_NOMEM;
@@ -1044,21 +1402,21 @@
 	/* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
 	drbd_md_set_sector_offsets(mdev, nbc);
 
-	if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
+	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
 		dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
 			(unsigned long long) drbd_get_max_capacity(nbc),
-			(unsigned long long) nbc->dc.disk_size);
+			(unsigned long long) new_disk_conf->disk_size);
 		retcode = ERR_DISK_TOO_SMALL;
 		goto fail;
 	}
 
-	if (nbc->dc.meta_dev_idx < 0) {
+	if (new_disk_conf->meta_dev_idx < 0) {
 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
 		/* at least one MB, otherwise it does not make sense */
 		min_md_device_sectors = (2<<10);
 	} else {
 		max_possible_sectors = DRBD_MAX_SECTORS;
-		min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
+		min_md_device_sectors = MD_RESERVED_SECT * (new_disk_conf->meta_dev_idx + 1);
 	}
 
 	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
@@ -1083,14 +1441,20 @@
 		dev_warn(DEV, "==> truncating very big lower level device "
 			"to currently maximum possible %llu sectors <==\n",
 			(unsigned long long) max_possible_sectors);
-		if (nbc->dc.meta_dev_idx >= 0)
+		if (new_disk_conf->meta_dev_idx >= 0)
 			dev_warn(DEV, "==>> using internal or flexible "
 				      "meta data may help <<==\n");
 	}
 
 	drbd_suspend_io(mdev);
 	/* also wait for the last barrier ack. */
-	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
+	/* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
+	 * We need a way to either ignore barrier acks for barriers sent before a device
+	 * was attached, or a way to wait for all pending barrier acks to come in.
+	 * As barriers are counted per resource,
+	 * we'd need to suspend io on all devices of a resource.
+	 */
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev));
 	/* and for any other previously queued work */
 	drbd_flush_workqueue(mdev);
 
@@ -1105,25 +1469,6 @@
 
 	drbd_md_set_sector_offsets(mdev, nbc);
 
-	/* allocate a second IO page if logical_block_size != 512 */
-	logical_block_size = bdev_logical_block_size(nbc->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		if (!mdev->md_io_tmpp) {
-			struct page *page = alloc_page(GFP_NOIO);
-			if (!page)
-				goto force_diskless_dec;
-
-			dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
-			     logical_block_size, MD_SECTOR_SIZE);
-			dev_warn(DEV, "Workaround engaged (has performance impact).\n");
-
-			mdev->md_io_tmpp = page;
-		}
-	}
-
 	if (!mdev->bitmap) {
 		if (drbd_bm_init(mdev)) {
 			retcode = ERR_NOMEM;
@@ -1145,30 +1490,25 @@
 	}
 
 	/* Since we are diskless, fix the activity log first... */
-	if (drbd_check_al_size(mdev)) {
+	if (drbd_check_al_size(mdev, new_disk_conf)) {
 		retcode = ERR_NOMEM;
 		goto force_diskless_dec;
 	}
 
 	/* Prevent shrinking of consistent devices ! */
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
+	    drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
 		retcode = ERR_DISK_TOO_SMALL;
 		goto force_diskless_dec;
 	}
 
-	if (!drbd_al_read_log(mdev, nbc)) {
-		retcode = ERR_IO_MD_DISK;
-		goto force_diskless_dec;
-	}
-
 	/* Reset the "barriers don't work" bits here, then force meta data to
 	 * be written, to ensure we determine if barriers are supported. */
-	if (nbc->dc.no_md_flush)
-		set_bit(MD_NO_FUA, &mdev->flags);
-	else
+	if (new_disk_conf->md_flushes)
 		clear_bit(MD_NO_FUA, &mdev->flags);
+	else
+		set_bit(MD_NO_FUA, &mdev->flags);
 
 	/* Point of no return reached.
 	 * Devices and memory are no longer released by error cleanup below.
@@ -1177,11 +1517,13 @@
 	D_ASSERT(mdev->ldev == NULL);
 	mdev->ldev = nbc;
 	mdev->resync = resync_lru;
+	mdev->rs_plan_s = new_plan;
 	nbc = NULL;
 	resync_lru = NULL;
+	new_disk_conf = NULL;
+	new_plan = NULL;
 
-	mdev->write_ordering = WO_bdev_flush;
-	drbd_bump_write_ordering(mdev, WO_bdev_flush);
+	drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -1189,10 +1531,8 @@
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
-	    !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
+	    !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod))
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
-		cp_discovered = 1;
-	}
 
 	mdev->send_cnt = 0;
 	mdev->recv_cnt = 0;
@@ -1228,7 +1568,9 @@
 	} else if (dd == grew)
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
-	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
+	    (test_bit(CRASHED_PRIMARY, &mdev->flags) &&
+	     drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) {
 		dev_info(DEV, "Assuming that all blocks are out of sync "
 		     "(aka FullSync)\n");
 		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
@@ -1238,16 +1580,7 @@
 		}
 	} else {
 		if (drbd_bitmap_io(mdev, &drbd_bm_read,
-			"read from attaching", BM_LOCKED_MASK) < 0) {
-			retcode = ERR_IO_MD_DISK;
-			goto force_diskless_dec;
-		}
-	}
-
-	if (cp_discovered) {
-		drbd_al_apply_to_bm(mdev);
-		if (drbd_bitmap_io(mdev, &drbd_bm_write,
-			"crashed primary apply AL", BM_LOCKED_MASK)) {
+			"read from attaching", BM_LOCKED_MASK)) {
 			retcode = ERR_IO_MD_DISK;
 			goto force_diskless_dec;
 		}
@@ -1256,9 +1589,9 @@
 	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
 		drbd_suspend_al(mdev); /* IO is still suspended here... */
 
-	spin_lock_irq(&mdev->req_lock);
-	os = mdev->state;
-	ns.i = os.i;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	os = drbd_read_state(mdev);
+	ns = os;
 	/* If MDF_CONSISTENT is not set go into inconsistent state,
 	   otherwise investigate MDF_WasUpToDate...
 	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
@@ -1276,8 +1609,9 @@
 	if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
 		ns.pdsk = D_OUTDATED;
 
-	if ( ns.disk == D_CONSISTENT &&
-	    (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
+	rcu_read_lock();
+	if (ns.disk == D_CONSISTENT &&
+	    (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE))
 		ns.disk = D_UP_TO_DATE;
 
 	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
@@ -1285,6 +1619,13 @@
 	   this point, because drbd_request_state() modifies these
 	   flags. */
 
+	if (rcu_dereference(mdev->ldev->disk_conf)->al_updates)
+		mdev->ldev->md.flags &= ~MDF_AL_DISABLED;
+	else
+		mdev->ldev->md.flags |= MDF_AL_DISABLED;
+
+	rcu_read_unlock();
+
 	/* In case we are C_CONNECTED postpone any decision on the new disk
 	   state after the negotiation phase. */
 	if (mdev->state.conn == C_CONNECTED) {
@@ -1300,12 +1641,13 @@
 	}
 
 	rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	ns = mdev->state;
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (rv < SS_SUCCESS)
 		goto force_diskless_dec;
 
+	mod_timer(&mdev->request_timer, jiffies + HZ);
+
 	if (mdev->state.role == R_PRIMARY)
 		mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 	else
@@ -1316,16 +1658,17 @@
 
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	put_ldev(mdev);
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
+	conn_reconfig_done(mdev->tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 
  force_diskless_dec:
 	put_ldev(mdev);
  force_diskless:
-	drbd_force_state(mdev, NS(disk, D_FAILED));
+	drbd_force_state(mdev, NS(disk, D_DISKLESS));
 	drbd_md_sync(mdev);
  fail:
+	conn_reconfig_done(mdev->tconn);
 	if (nbc) {
 		if (nbc->backing_bdev)
 			blkdev_put(nbc->backing_bdev,
@@ -1335,34 +1678,24 @@
 				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 		kfree(nbc);
 	}
+	kfree(new_disk_conf);
 	lc_destroy(resync_lru);
+	kfree(new_plan);
 
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
+ finish:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-/* Detaching the disk is a process in multiple stages.  First we need to lock
- * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
- * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
- * internal references as well.
- * Only then we have finally detached. */
-static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
+static int adm_detach(struct drbd_conf *mdev, int force)
 {
-	enum drbd_ret_code retcode;
+	enum drbd_state_rv retcode;
 	int ret;
-	struct detach dt = {};
 
-	if (!detach_from_tags(mdev, nlp->tag_list, &dt)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		goto out;
-	}
-
-	if (dt.detach_force) {
+	if (force) {
 		set_bit(FORCE_DETACH, &mdev->flags);
 		drbd_force_state(mdev, NS(disk, D_FAILED));
-		reply->ret_code = SS_SUCCESS;
+		retcode = SS_SUCCESS;
 		goto out;
 	}
 
@@ -1374,326 +1707,529 @@
 	ret = wait_event_interruptible(mdev->misc_wait,
 			mdev->state.disk != D_FAILED);
 	drbd_resume_io(mdev);
-
 	if ((int)retcode == (int)SS_IS_DISKLESS)
 		retcode = SS_NOTHING_TO_DO;
 	if (ret)
 		retcode = ERR_INTR;
-	reply->ret_code = retcode;
 out:
+	return retcode;
+}
+
+/* Detaching the disk is a process in multiple stages.  First we need to lock
+ * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
+ * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
+ * internal references as well.
+ * Only then we have finally detached. */
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct detach_parms parms = { };
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
+		err = detach_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+	}
+
+	retcode = adm_detach(adm_ctx.mdev, parms.force_detach);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			    struct drbd_nl_cfg_reply *reply)
+static bool conn_resync_running(struct drbd_tconn *tconn)
 {
-	int i, ns;
-	enum drbd_ret_code retcode;
-	struct net_conf *new_conf = NULL;
-	struct crypto_hash *tfm = NULL;
-	struct crypto_hash *integrity_w_tfm = NULL;
-	struct crypto_hash *integrity_r_tfm = NULL;
-	struct hlist_head *new_tl_hash = NULL;
-	struct hlist_head *new_ee_hash = NULL;
-	struct drbd_conf *odev;
+	struct drbd_conf *mdev;
+	bool rv = false;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_SYNC_SOURCE ||
+		    mdev->state.conn == C_SYNC_TARGET ||
+		    mdev->state.conn == C_PAUSED_SYNC_S ||
+		    mdev->state.conn == C_PAUSED_SYNC_T) {
+			rv = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return rv;
+}
+
+static bool conn_ov_running(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	bool rv = false;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T) {
+			rv = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return rv;
+}
+
+static enum drbd_ret_code
+_check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct net_conf *new_conf)
+{
+	struct drbd_conf *mdev;
+	int i;
+
+	if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) {
+		if (new_conf->wire_protocol != old_conf->wire_protocol)
+			return ERR_NEED_APV_100;
+
+		if (new_conf->two_primaries != old_conf->two_primaries)
+			return ERR_NEED_APV_100;
+
+		if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg))
+			return ERR_NEED_APV_100;
+	}
+
+	if (!new_conf->two_primaries &&
+	    conn_highest_role(tconn) == R_PRIMARY &&
+	    conn_highest_peer(tconn) == R_PRIMARY)
+		return ERR_NEED_ALLOW_TWO_PRI;
+
+	if (new_conf->two_primaries &&
+	    (new_conf->wire_protocol != DRBD_PROT_C))
+		return ERR_NOT_PROTO_C;
+
+	idr_for_each_entry(&tconn->volumes, mdev, i) {
+		if (get_ldev(mdev)) {
+			enum drbd_fencing_p fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
+			put_ldev(mdev);
+			if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
+				return ERR_STONITH_AND_PROT_A;
+		}
+		if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data)
+			return ERR_DISCARD_IMPOSSIBLE;
+	}
+
+	if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A)
+		return ERR_CONG_NOT_PROTO_A;
+
+	return NO_ERROR;
+}
+
+static enum drbd_ret_code
+check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf)
+{
+	static enum drbd_ret_code rv;
+	struct drbd_conf *mdev;
+	int i;
+
+	rcu_read_lock();
+	rv = _check_net_options(tconn, rcu_dereference(tconn->net_conf), new_conf);
+	rcu_read_unlock();
+
+	/* tconn->volumes protected by genl_lock() here */
+	idr_for_each_entry(&tconn->volumes, mdev, i) {
+		if (!mdev->bitmap) {
+			if(drbd_bm_init(mdev))
+				return ERR_NOMEM;
+		}
+	}
+
+	return rv;
+}
+
+struct crypto {
+	struct crypto_hash *verify_tfm;
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *cram_hmac_tfm;
+	struct crypto_hash *integrity_tfm;
+};
+
+static int
+alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
+{
+	if (!tfm_name[0])
+		return NO_ERROR;
+
+	*tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(*tfm)) {
+		*tfm = NULL;
+		return err_alg;
+	}
+
+	return NO_ERROR;
+}
+
+static enum drbd_ret_code
+alloc_crypto(struct crypto *crypto, struct net_conf *new_conf)
+{
 	char hmac_name[CRYPTO_MAX_ALG_NAME];
-	void *int_dig_out = NULL;
-	void *int_dig_in = NULL;
-	void *int_dig_vv = NULL;
-	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+	enum drbd_ret_code rv;
 
-	drbd_reconfig_start(mdev);
+	rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg,
+		       ERR_CSUMS_ALG);
+	if (rv != NO_ERROR)
+		return rv;
+	rv = alloc_hash(&crypto->verify_tfm, new_conf->verify_alg,
+		       ERR_VERIFY_ALG);
+	if (rv != NO_ERROR)
+		return rv;
+	rv = alloc_hash(&crypto->integrity_tfm, new_conf->integrity_alg,
+		       ERR_INTEGRITY_ALG);
+	if (rv != NO_ERROR)
+		return rv;
+	if (new_conf->cram_hmac_alg[0] != 0) {
+		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
+			 new_conf->cram_hmac_alg);
 
-	if (mdev->state.conn > C_STANDALONE) {
+		rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
+			       ERR_AUTH_ALG);
+	}
+
+	return rv;
+}
+
+static void free_crypto(struct crypto *crypto)
+{
+	crypto_free_hash(crypto->cram_hmac_tfm);
+	crypto_free_hash(crypto->integrity_tfm);
+	crypto_free_hash(crypto->csums_tfm);
+	crypto_free_hash(crypto->verify_tfm);
+}
+
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_tconn *tconn;
+	struct net_conf *old_conf, *new_conf = NULL;
+	int err;
+	int ovr; /* online verify running */
+	int rsr; /* re-sync running */
+	struct crypto crypto = { };
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	tconn = adm_ctx.tconn;
+
+	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_conf) {
+		retcode = ERR_NOMEM;
+		goto out;
+	}
+
+	conn_reconfig_start(tconn);
+
+	mutex_lock(&tconn->data.mutex);
+	mutex_lock(&tconn->conf_update);
+	old_conf = tconn->net_conf;
+
+	if (!old_conf) {
+		drbd_msg_put_info("net conf missing, try connect");
+		retcode = ERR_INVALID_REQUEST;
+		goto fail;
+	}
+
+	*new_conf = *old_conf;
+	if (should_set_defaults(info))
+		set_net_conf_defaults(new_conf);
+
+	err = net_conf_from_attrs_for_change(new_conf, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto fail;
+	}
+
+	retcode = check_net_options(tconn, new_conf);
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	/* re-sync running */
+	rsr = conn_resync_running(tconn);
+	if (rsr && strcmp(new_conf->csums_alg, old_conf->csums_alg)) {
+		retcode = ERR_CSUMS_RESYNC_RUNNING;
+		goto fail;
+	}
+
+	/* online verify running */
+	ovr = conn_ov_running(tconn);
+	if (ovr && strcmp(new_conf->verify_alg, old_conf->verify_alg)) {
+		retcode = ERR_VERIFY_RUNNING;
+		goto fail;
+	}
+
+	retcode = alloc_crypto(&crypto, new_conf);
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	rcu_assign_pointer(tconn->net_conf, new_conf);
+
+	if (!rsr) {
+		crypto_free_hash(tconn->csums_tfm);
+		tconn->csums_tfm = crypto.csums_tfm;
+		crypto.csums_tfm = NULL;
+	}
+	if (!ovr) {
+		crypto_free_hash(tconn->verify_tfm);
+		tconn->verify_tfm = crypto.verify_tfm;
+		crypto.verify_tfm = NULL;
+	}
+
+	crypto_free_hash(tconn->integrity_tfm);
+	tconn->integrity_tfm = crypto.integrity_tfm;
+	if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100)
+		/* Do this without trying to take tconn->data.mutex again.  */
+		__drbd_send_protocol(tconn, P_PROTOCOL_UPDATE);
+
+	crypto_free_hash(tconn->cram_hmac_tfm);
+	tconn->cram_hmac_tfm = crypto.cram_hmac_tfm;
+
+	mutex_unlock(&tconn->conf_update);
+	mutex_unlock(&tconn->data.mutex);
+	synchronize_rcu();
+	kfree(old_conf);
+
+	if (tconn->cstate >= C_WF_REPORT_PARAMS)
+		drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
+
+	goto done;
+
+ fail:
+	mutex_unlock(&tconn->conf_update);
+	mutex_unlock(&tconn->data.mutex);
+	free_crypto(&crypto);
+	kfree(new_conf);
+ done:
+	conn_reconfig_done(tconn);
+ out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
+{
+	struct drbd_conf *mdev;
+	struct net_conf *old_conf, *new_conf = NULL;
+	struct crypto crypto = { };
+	struct drbd_tconn *tconn;
+	enum drbd_ret_code retcode;
+	int i;
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
+		drbd_msg_put_info("connection endpoint(s) missing");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+
+	/* No need for _rcu here. All reconfiguration is
+	 * strictly serialized on genl_lock(). We are protected against
+	 * concurrent reconfiguration/addition/deletion */
+	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
+		if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len &&
+		    !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) {
+			retcode = ERR_LOCAL_ADDR;
+			goto out;
+		}
+
+		if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len &&
+		    !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) {
+			retcode = ERR_PEER_ADDR;
+			goto out;
+		}
+	}
+
+	tconn = adm_ctx.tconn;
+	conn_reconfig_start(tconn);
+
+	if (tconn->cstate > C_STANDALONE) {
 		retcode = ERR_NET_CONFIGURED;
 		goto fail;
 	}
 
-	/* allocation not in the IO path, cqueue thread context */
-	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	/* allocation not in the IO path, drbdsetup / netlink process context */
+	new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL);
 	if (!new_conf) {
 		retcode = ERR_NOMEM;
 		goto fail;
 	}
 
-	new_conf->timeout	   = DRBD_TIMEOUT_DEF;
-	new_conf->try_connect_int  = DRBD_CONNECT_INT_DEF;
-	new_conf->ping_int	   = DRBD_PING_INT_DEF;
-	new_conf->max_epoch_size   = DRBD_MAX_EPOCH_SIZE_DEF;
-	new_conf->max_buffers	   = DRBD_MAX_BUFFERS_DEF;
-	new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
-	new_conf->sndbuf_size	   = DRBD_SNDBUF_SIZE_DEF;
-	new_conf->rcvbuf_size	   = DRBD_RCVBUF_SIZE_DEF;
-	new_conf->ko_count	   = DRBD_KO_COUNT_DEF;
-	new_conf->after_sb_0p	   = DRBD_AFTER_SB_0P_DEF;
-	new_conf->after_sb_1p	   = DRBD_AFTER_SB_1P_DEF;
-	new_conf->after_sb_2p	   = DRBD_AFTER_SB_2P_DEF;
-	new_conf->want_lose	   = 0;
-	new_conf->two_primaries    = 0;
-	new_conf->wire_protocol    = DRBD_PROT_C;
-	new_conf->ping_timeo	   = DRBD_PING_TIMEO_DEF;
-	new_conf->rr_conflict	   = DRBD_RR_CONFLICT_DEF;
-	new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
-	new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
+	set_net_conf_defaults(new_conf);
 
-	if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
+	err = net_conf_from_attrs(new_conf, info);
+	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	if (new_conf->two_primaries
-	    && (new_conf->wire_protocol != DRBD_PROT_C)) {
-		retcode = ERR_NOT_PROTO_C;
+	retcode = check_net_options(tconn, new_conf);
+	if (retcode != NO_ERROR)
 		goto fail;
-	}
 
-	if (get_ldev(mdev)) {
-		enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-		if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
-			retcode = ERR_STONITH_AND_PROT_A;
-			goto fail;
-		}
-	}
-
-	if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
-		retcode = ERR_CONG_NOT_PROTO_A;
+	retcode = alloc_crypto(&crypto, new_conf);
+	if (retcode != NO_ERROR)
 		goto fail;
-	}
-
-	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
-		retcode = ERR_DISCARD;
-		goto fail;
-	}
-
-	retcode = NO_ERROR;
-
-	new_my_addr = (struct sockaddr *)&new_conf->my_addr;
-	new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
-	for (i = 0; i < minor_count; i++) {
-		odev = minor_to_mdev(i);
-		if (!odev || odev == mdev)
-			continue;
-		if (get_net_conf(odev)) {
-			taken_addr = (struct sockaddr *)&odev->net_conf->my_addr;
-			if (new_conf->my_addr_len == odev->net_conf->my_addr_len &&
-			    !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
-				retcode = ERR_LOCAL_ADDR;
-
-			taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr;
-			if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len &&
-			    !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
-				retcode = ERR_PEER_ADDR;
-
-			put_net_conf(odev);
-			if (retcode != NO_ERROR)
-				goto fail;
-		}
-	}
-
-	if (new_conf->cram_hmac_alg[0] != 0) {
-		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
-			new_conf->cram_hmac_alg);
-		tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(tfm)) {
-			tfm = NULL;
-			retcode = ERR_AUTH_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
-			retcode = ERR_AUTH_ALG_ND;
-			goto fail;
-		}
-	}
-
-	if (new_conf->integrity_alg[0]) {
-		integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(integrity_w_tfm)) {
-			integrity_w_tfm = NULL;
-			retcode=ERR_INTEGRITY_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
-			retcode=ERR_INTEGRITY_ALG_ND;
-			goto fail;
-		}
-
-		integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(integrity_r_tfm)) {
-			integrity_r_tfm = NULL;
-			retcode=ERR_INTEGRITY_ALG;
-			goto fail;
-		}
-	}
-
-	ns = new_conf->max_epoch_size/8;
-	if (mdev->tl_hash_s != ns) {
-		new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
-		if (!new_tl_hash) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
-
-	ns = new_conf->max_buffers/8;
-	if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) {
-		new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
-		if (!new_ee_hash) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
 
 	((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
 
-	if (integrity_w_tfm) {
-		i = crypto_hash_digestsize(integrity_w_tfm);
-		int_dig_out = kmalloc(i, GFP_KERNEL);
-		if (!int_dig_out) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-		int_dig_in = kmalloc(i, GFP_KERNEL);
-		if (!int_dig_in) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-		int_dig_vv = kmalloc(i, GFP_KERNEL);
-		if (!int_dig_vv) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
+	conn_flush_workqueue(tconn);
 
-	if (!mdev->bitmap) {
-		if(drbd_bm_init(mdev)) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
-
-	drbd_flush_workqueue(mdev);
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->net_conf != NULL) {
+	mutex_lock(&tconn->conf_update);
+	old_conf = tconn->net_conf;
+	if (old_conf) {
 		retcode = ERR_NET_CONFIGURED;
-		spin_unlock_irq(&mdev->req_lock);
+		mutex_unlock(&tconn->conf_update);
 		goto fail;
 	}
-	mdev->net_conf = new_conf;
+	rcu_assign_pointer(tconn->net_conf, new_conf);
 
-	mdev->send_cnt = 0;
-	mdev->recv_cnt = 0;
+	conn_free_crypto(tconn);
+	tconn->cram_hmac_tfm = crypto.cram_hmac_tfm;
+	tconn->integrity_tfm = crypto.integrity_tfm;
+	tconn->csums_tfm = crypto.csums_tfm;
+	tconn->verify_tfm = crypto.verify_tfm;
 
-	if (new_tl_hash) {
-		kfree(mdev->tl_hash);
-		mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8;
-		mdev->tl_hash = new_tl_hash;
+	tconn->my_addr_len = nla_len(adm_ctx.my_addr);
+	memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len);
+	tconn->peer_addr_len = nla_len(adm_ctx.peer_addr);
+	memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len);
+
+	mutex_unlock(&tconn->conf_update);
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, i) {
+		mdev->send_cnt = 0;
+		mdev->recv_cnt = 0;
 	}
+	rcu_read_unlock();
 
-	if (new_ee_hash) {
-		kfree(mdev->ee_hash);
-		mdev->ee_hash_s = mdev->net_conf->max_buffers/8;
-		mdev->ee_hash = new_ee_hash;
-	}
+	retcode = conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 
-	crypto_free_hash(mdev->cram_hmac_tfm);
-	mdev->cram_hmac_tfm = tfm;
-
-	crypto_free_hash(mdev->integrity_w_tfm);
-	mdev->integrity_w_tfm = integrity_w_tfm;
-
-	crypto_free_hash(mdev->integrity_r_tfm);
-	mdev->integrity_r_tfm = integrity_r_tfm;
-
-	kfree(mdev->int_dig_out);
-	kfree(mdev->int_dig_in);
-	kfree(mdev->int_dig_vv);
-	mdev->int_dig_out=int_dig_out;
-	mdev->int_dig_in=int_dig_in;
-	mdev->int_dig_vv=int_dig_vv;
-	retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
-	spin_unlock_irq(&mdev->req_lock);
-
-	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
+	conn_reconfig_done(tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 
 fail:
-	kfree(int_dig_out);
-	kfree(int_dig_in);
-	kfree(int_dig_vv);
-	crypto_free_hash(tfm);
-	crypto_free_hash(integrity_w_tfm);
-	crypto_free_hash(integrity_r_tfm);
-	kfree(new_tl_hash);
-	kfree(new_ee_hash);
+	free_crypto(&crypto);
 	kfree(new_conf);
 
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
+	conn_reconfig_done(tconn);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
 {
-	int retcode;
-	struct disconnect dc;
+	enum drbd_state_rv rv;
 
-	memset(&dc, 0, sizeof(struct disconnect));
-	if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
-		retcode = ERR_MANDATORY_TAG;
-		goto fail;
-	}
+	rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
+			force ? CS_HARD : 0);
 
-	if (dc.force) {
-		spin_lock_irq(&mdev->req_lock);
-		if (mdev->state.conn >= C_WF_CONNECTION)
-			_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
-		spin_unlock_irq(&mdev->req_lock);
-		goto done;
-	}
-
-	retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
-
-	if (retcode == SS_NOTHING_TO_DO)
-		goto done;
-	else if (retcode == SS_ALREADY_STANDALONE)
-		goto done;
-	else if (retcode == SS_PRIMARY_NOP) {
-		/* Our statche checking code wants to see the peer outdated. */
-		retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
-						      pdsk, D_OUTDATED));
-	} else if (retcode == SS_CW_FAILED_BY_PEER) {
+	switch (rv) {
+	case SS_NOTHING_TO_DO:
+		break;
+	case SS_ALREADY_STANDALONE:
+		return SS_SUCCESS;
+	case SS_PRIMARY_NOP:
+		/* Our state checking code wants to see the peer outdated. */
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+						pdsk, D_OUTDATED), CS_VERBOSE);
+		break;
+	case SS_CW_FAILED_BY_PEER:
 		/* The peer probably wants to see us outdated. */
-		retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
-							disk, D_OUTDATED),
-					      CS_ORDERED);
-		if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-			retcode = SS_SUCCESS;
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+							disk, D_OUTDATED), 0);
+		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
+			rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
+					CS_HARD);
+		}
+		break;
+	default:;
+		/* no special handling necessary */
+	}
+
+	if (rv >= SS_SUCCESS) {
+		enum drbd_state_rv rv2;
+		/* No one else can reconfigure the network while I am here.
+		 * The state handling only uses drbd_thread_stop_nowait(),
+		 * we want to really wait here until the receiver is no more.
+		 */
+		drbd_thread_stop(&adm_ctx.tconn->receiver);
+
+		/* Race breaker.  This additional state change request may be
+		 * necessary, if this was a forced disconnect during a receiver
+		 * restart.  We may have "killed" the receiver thread just
+		 * after drbdd_init() returned.  Typically, we should be
+		 * C_STANDALONE already, now, and this becomes a no-op.
+		 */
+		rv2 = conn_request_state(tconn, NS(conn, C_STANDALONE),
+				CS_VERBOSE | CS_HARD);
+		if (rv2 < SS_SUCCESS)
+			conn_err(tconn,
+				"unexpected rv2=%d in conn_try_disconnect()\n",
+				rv2);
+	}
+	return rv;
+}
+
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
+{
+	struct disconnect_parms parms;
+	struct drbd_tconn *tconn;
+	enum drbd_state_rv rv;
+	enum drbd_ret_code retcode;
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	tconn = adm_ctx.tconn;
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
+		err = disconnect_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto fail;
 		}
 	}
 
-	if (retcode < SS_SUCCESS)
-		goto fail;
-
-	if (wait_event_interruptible(mdev->state_wait,
-				     mdev->state.conn != C_DISCONNECTING)) {
-		/* Do not test for mdev->state.conn == C_STANDALONE, since
-		   someone else might connect us in the mean time! */
-		retcode = ERR_INTR;
-		goto fail;
-	}
-
- done:
-	retcode = NO_ERROR;
+	rv = conn_try_disconnect(tconn, parms.force_disconnect);
+	if (rv < SS_SUCCESS)
+		retcode = rv;  /* FIXME: Type mismatch. */
+	else
+		retcode = NO_ERROR;
  fail:
-	drbd_md_sync(mdev);
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -1705,7 +2241,7 @@
 	if (mdev->state.role != mdev->state.peer)
 		iass = (mdev->state.role == R_PRIMARY);
 	else
-		iass = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+		iass = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
 
 	if (iass)
 		drbd_start_resync(mdev, C_SYNC_SOURCE);
@@ -1713,20 +2249,34 @@
 		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
 }
 
-static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 {
-	struct resize rs;
-	int retcode = NO_ERROR;
+	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
+	struct resize_parms rs;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	enum dds_flags ddsf;
+	sector_t u_size;
+	int err;
 
-	memset(&rs, 0, sizeof(struct resize));
-	if (!resize_from_tags(mdev, nlp->tag_list, &rs)) {
-		retcode = ERR_MANDATORY_TAG;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
 		goto fail;
+
+	memset(&rs, 0, sizeof(struct resize_parms));
+	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
+		err = resize_parms_from_attrs(&rs, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto fail;
+		}
 	}
 
+	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
 		goto fail;
@@ -1743,15 +2293,36 @@
 		goto fail;
 	}
 
-	if (rs.no_resync && mdev->agreed_pro_version < 93) {
+	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
 		retcode = ERR_NEED_APV_93;
 		goto fail_ldev;
 	}
 
+	rcu_read_lock();
+	u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+	rcu_read_unlock();
+	if (u_size != (sector_t)rs.resize_size) {
+		new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
+		if (!new_disk_conf) {
+			retcode = ERR_NOMEM;
+			goto fail_ldev;
+		}
+	}
+
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
-	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
+	if (new_disk_conf) {
+		mutex_lock(&mdev->tconn->conf_update);
+		old_disk_conf = mdev->ldev->disk_conf;
+		*new_disk_conf = *old_disk_conf;
+		new_disk_conf->disk_size = (sector_t)rs.resize_size;
+		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+		mutex_unlock(&mdev->tconn->conf_update);
+		synchronize_rcu();
+		kfree(old_disk_conf);
+	}
+
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
 	dd = drbd_determine_dev_size(mdev, ddsf);
 	drbd_md_sync(mdev);
@@ -1770,7 +2341,7 @@
 	}
 
  fail:
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 
  fail_ldev:
@@ -1778,204 +2349,55 @@
 	goto fail;
 }
 
-static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	enum drbd_ret_code retcode;
+	struct drbd_tconn *tconn;
+	struct res_opts res_opts;
 	int err;
-	int ovr; /* online verify running */
-	int rsr; /* re-sync running */
-	struct crypto_hash *verify_tfm = NULL;
-	struct crypto_hash *csums_tfm = NULL;
-	struct syncer_conf sc;
-	cpumask_var_t new_cpu_mask;
-	int *rs_plan_s = NULL;
-	int fifo_size;
 
-	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
-		retcode = ERR_NOMEM;
-		goto fail;
-	}
-
-	if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
-		memset(&sc, 0, sizeof(struct syncer_conf));
-		sc.rate       = DRBD_RATE_DEF;
-		sc.after      = DRBD_AFTER_DEF;
-		sc.al_extents = DRBD_AL_EXTENTS_DEF;
-		sc.on_no_data  = DRBD_ON_NO_DATA_DEF;
-		sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
-		sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
-		sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
-		sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
-		sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
-	} else
-		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
-
-	if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) {
-		retcode = ERR_MANDATORY_TAG;
-		goto fail;
-	}
-
-	/* re-sync running */
-	rsr = (	mdev->state.conn == C_SYNC_SOURCE ||
-		mdev->state.conn == C_SYNC_TARGET ||
-		mdev->state.conn == C_PAUSED_SYNC_S ||
-		mdev->state.conn == C_PAUSED_SYNC_T );
-
-	if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
-		retcode = ERR_CSUMS_RESYNC_RUNNING;
-		goto fail;
-	}
-
-	if (!rsr && sc.csums_alg[0]) {
-		csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(csums_tfm)) {
-			csums_tfm = NULL;
-			retcode = ERR_CSUMS_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
-			retcode = ERR_CSUMS_ALG_ND;
-			goto fail;
-		}
-	}
-
-	/* online verify running */
-	ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
-
-	if (ovr) {
-		if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
-			retcode = ERR_VERIFY_RUNNING;
-			goto fail;
-		}
-	}
-
-	if (!ovr && sc.verify_alg[0]) {
-		verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(verify_tfm)) {
-			verify_tfm = NULL;
-			retcode = ERR_VERIFY_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
-			retcode = ERR_VERIFY_ALG_ND;
-			goto fail;
-		}
-	}
-
-	/* silently ignore cpu mask on UP kernel */
-	if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
-		err = bitmap_parse(sc.cpu_mask, 32,
-				cpumask_bits(new_cpu_mask), nr_cpu_ids);
-		if (err) {
-			dev_warn(DEV, "bitmap_parse() failed with %d\n", err);
-			retcode = ERR_CPU_MASK_PARSE;
-			goto fail;
-		}
-	}
-
-	ERR_IF (sc.rate < 1) sc.rate = 1;
-	ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */
-#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
-	if (sc.al_extents > AL_MAX) {
-		dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
-		sc.al_extents = AL_MAX;
-	}
-#undef AL_MAX
-
-	/* to avoid spurious errors when configuring minors before configuring
-	 * the minors they depend on: if necessary, first create the minor we
-	 * depend on */
-	if (sc.after >= 0)
-		ensure_mdev(sc.after, 1);
-
-	/* most sanity checks done, try to assign the new sync-after
-	 * dependency.  need to hold the global lock in there,
-	 * to avoid a race in the dependency loop check. */
-	retcode = drbd_alter_sa(mdev, sc.after);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	if (!adm_ctx.reply_skb)
+		return retcode;
 	if (retcode != NO_ERROR)
 		goto fail;
+	tconn = adm_ctx.tconn;
 
-	fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
-	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
-		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
-		if (!rs_plan_s) {
-			dev_err(DEV, "kmalloc of fifo_buffer failed");
+	res_opts = tconn->res_opts;
+	if (should_set_defaults(info))
+		set_res_opts_defaults(&res_opts);
+
+	err = res_opts_from_attrs(&res_opts, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto fail;
+	}
+
+	err = set_resource_options(tconn, &res_opts);
+	if (err) {
+		retcode = ERR_INVALID_REQUEST;
+		if (err == -ENOMEM)
 			retcode = ERR_NOMEM;
-			goto fail;
-		}
 	}
 
-	/* ok, assign the rest of it as well.
-	 * lock against receive_SyncParam() */
-	spin_lock(&mdev->peer_seq_lock);
-	mdev->sync_conf = sc;
-
-	if (!rsr) {
-		crypto_free_hash(mdev->csums_tfm);
-		mdev->csums_tfm = csums_tfm;
-		csums_tfm = NULL;
-	}
-
-	if (!ovr) {
-		crypto_free_hash(mdev->verify_tfm);
-		mdev->verify_tfm = verify_tfm;
-		verify_tfm = NULL;
-	}
-
-	if (fifo_size != mdev->rs_plan_s.size) {
-		kfree(mdev->rs_plan_s.values);
-		mdev->rs_plan_s.values = rs_plan_s;
-		mdev->rs_plan_s.size   = fifo_size;
-		mdev->rs_planed = 0;
-		rs_plan_s = NULL;
-	}
-
-	spin_unlock(&mdev->peer_seq_lock);
-
-	if (get_ldev(mdev)) {
-		wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-		drbd_al_shrink(mdev);
-		err = drbd_check_al_size(mdev);
-		lc_unlock(mdev->act_log);
-		wake_up(&mdev->al_wait);
-
-		put_ldev(mdev);
-		drbd_md_sync(mdev);
-
-		if (err) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
-
-	if (mdev->state.conn >= C_CONNECTED)
-		drbd_send_sync_param(mdev, &sc);
-
-	if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) {
-		cpumask_copy(mdev->cpu_mask, new_cpu_mask);
-		drbd_calc_cpu_mask(mdev);
-		mdev->receiver.reset_cpu_mask = 1;
-		mdev->asender.reset_cpu_mask = 1;
-		mdev->worker.reset_cpu_mask = 1;
-	}
-
-	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 fail:
-	kfree(rs_plan_s);
-	free_cpumask_var(new_cpu_mask);
-	crypto_free_hash(csums_tfm);
-	crypto_free_hash(verify_tfm);
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -1990,10 +2412,10 @@
 		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
 
 	while (retcode == SS_NEED_CONNECTION) {
-		spin_lock_irq(&mdev->req_lock);
+		spin_lock_irq(&mdev->tconn->req_lock);
 		if (mdev->state.conn < C_CONNECTED)
 			retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
-		spin_unlock_irq(&mdev->req_lock);
+		spin_unlock_irq(&mdev->tconn->req_lock);
 
 		if (retcode != SS_NEED_CONNECTION)
 			break;
@@ -2002,7 +2424,25 @@
 	}
 	drbd_resume_io(mdev);
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
+		union drbd_state mask, union drbd_state val)
+{
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	retcode = drbd_request_state(adm_ctx.mdev, mask, val);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -2015,10 +2455,18 @@
 	return rv;
 }
 
-static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				   struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
+	int retcode; /* drbd_ret_code, drbd_state_rv */
+	struct drbd_conf *mdev;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2028,16 +2476,15 @@
 	drbd_flush_workqueue(mdev);
 
 	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
-
 	if (retcode < SS_SUCCESS) {
 		if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
-			/* The peer will get a resync upon connect anyways. Just make that
-			   into a full resync. */
+			/* The peer will get a resync upon connect anyways.
+			 * Just make that into a full resync. */
 			retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
 			if (retcode >= SS_SUCCESS) {
 				if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
-					"set_n_write from invalidate_peer",
-					BM_LOCKED_SET_ALLOWED))
+						   "set_n_write from invalidate_peer",
+						   BM_LOCKED_SET_ALLOWED))
 					retcode = ERR_IO_MD_DISK;
 			}
 		} else
@@ -2045,30 +2492,41 @@
 	}
 	drbd_resume_io(mdev);
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	enum drbd_ret_code retcode;
 
-	if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
 		retcode = ERR_PAUSE_IS_SET;
-
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
-	union drbd_state s;
+	union drbd_dev_state s;
+	enum drbd_ret_code retcode;
 
-	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
-		s = mdev->state;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
+		s = adm_ctx.mdev->state;
 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
 			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
 				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
@@ -2077,172 +2535,482 @@
 		}
 	}
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
-
-	return 0;
+	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
 }
 
-static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 	if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
 		drbd_uuid_new_current(mdev);
 		clear_bit(NEW_CUR_UUID, &mdev->flags);
 	}
 	drbd_suspend_io(mdev);
-	reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
-	if (reply->ret_code == SS_SUCCESS) {
+	retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
+	if (retcode == SS_SUCCESS) {
 		if (mdev->state.conn < C_CONNECTED)
-			tl_clear(mdev);
+			tl_clear(mdev->tconn);
 		if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
-			tl_restart(mdev, fail_frozen_disk_io);
+			tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO);
 	}
 	drbd_resume_io(mdev);
 
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
+	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
+}
+
+int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr)
+{
+	struct nlattr *nla;
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
+	if (!nla)
+		goto nla_put_failure;
+	if (vnr != VOLUME_UNSPECIFIED &&
+	    nla_put_u32(skb, T_ctx_volume, vnr))
+		goto nla_put_failure;
+	if (nla_put_string(skb, T_ctx_resource_name, tconn->name))
+		goto nla_put_failure;
+	if (tconn->my_addr_len &&
+	    nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr))
+		goto nla_put_failure;
+	if (tconn->peer_addr_len &&
+	    nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr))
+		goto nla_put_failure;
+	nla_nest_end(skb, nla);
 	return 0;
+
+nla_put_failure:
+	if (nla)
+		nla_nest_cancel(skb, nla);
+	return -EMSGSIZE;
 }
 
-static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
+		const struct sib_info *sib)
 {
-	unsigned short *tl;
+	struct state_info *si = NULL; /* for sizeof(si->member); */
+	struct net_conf *nc;
+	struct nlattr *nla;
+	int got_ldev;
+	int err = 0;
+	int exclude_sensitive;
 
-	tl = reply->tag_list;
+	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
+	 * to.  So we better exclude_sensitive information.
+	 *
+	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
+	 * in the context of the requesting user process. Exclude sensitive
+	 * information, unless current has superuser.
+	 *
+	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
+	 * relies on the current implementation of netlink_dump(), which
+	 * executes the dump callback successively from netlink_recvmsg(),
+	 * always in the context of the receiving process */
+	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
 
-	if (get_ldev(mdev)) {
-		tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl);
-		put_ldev(mdev);
-	}
+	got_ldev = get_ldev(mdev);
 
-	if (get_net_conf(mdev)) {
-		tl = net_conf_to_tags(mdev, mdev->net_conf, tl);
-		put_net_conf(mdev);
-	}
-	tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl);
+	/* We need to add connection name and volume number information still.
+	 * Minor number is in drbd_genlmsghdr. */
+	if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr))
+		goto nla_put_failure;
 
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
+		goto nla_put_failure;
 
-	return (int)((char *)tl - (char *)reply->tag_list);
-}
+	rcu_read_lock();
+	if (got_ldev)
+		if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
+			goto nla_put_failure;
 
-static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
-{
-	unsigned short *tl = reply->tag_list;
-	union drbd_state s = mdev->state;
-	unsigned long rs_left;
-	unsigned int res;
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (nc)
+		err = net_conf_to_skb(skb, nc, exclude_sensitive);
+	rcu_read_unlock();
+	if (err)
+		goto nla_put_failure;
 
-	tl = get_state_to_tags(mdev, (struct get_state *)&s, tl);
+	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
+	if (!nla)
+		goto nla_put_failure;
+	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
+	    nla_put_u32(skb, T_current_state, mdev->state.i) ||
+	    nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) ||
+	    nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) ||
+	    nla_put_u64(skb, T_send_cnt, mdev->send_cnt) ||
+	    nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) ||
+	    nla_put_u64(skb, T_read_cnt, mdev->read_cnt) ||
+	    nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) ||
+	    nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) ||
+	    nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) ||
+	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) ||
+	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) ||
+	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt)))
+		goto nla_put_failure;
 
-	/* no local ref, no bitmap, no syncer progress. */
-	if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
-		if (get_ldev(mdev)) {
-			drbd_get_syncer_progress(mdev, &rs_left, &res);
-			tl = tl_add_int(tl, T_sync_progress, &res);
-			put_ldev(mdev);
+	if (got_ldev) {
+		int err;
+
+		spin_lock_irq(&mdev->ldev->md.uuid_lock);
+		err = nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
+		spin_unlock_irq(&mdev->ldev->md.uuid_lock);
+
+		if (err)
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) ||
+		    nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) ||
+		    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev)))
+			goto nla_put_failure;
+		if (C_SYNC_SOURCE <= mdev->state.conn &&
+		    C_PAUSED_SYNC_T >= mdev->state.conn) {
+			if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) ||
+			    nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed))
+				goto nla_put_failure;
 		}
 	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
 
-	return (int)((char *)tl - (char *)reply->tag_list);
-}
+	if (sib) {
+		switch(sib->sib_reason) {
+		case SIB_SYNC_PROGRESS:
+		case SIB_GET_STATUS_REPLY:
+			break;
+		case SIB_STATE_CHANGE:
+			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
+			    nla_put_u32(skb, T_new_state, sib->ns.i))
+				goto nla_put_failure;
+			break;
+		case SIB_HELPER_POST:
+			if (nla_put_u32(skb, T_helper_exit_code,
+					sib->helper_exit_code))
+				goto nla_put_failure;
+			/* fall through */
+		case SIB_HELPER_PRE:
+			if (nla_put_string(skb, T_helper, sib->helper_name))
+				goto nla_put_failure;
+			break;
+		}
+	}
+	nla_nest_end(skb, nla);
 
-static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
-{
-	unsigned short *tl;
-
-	tl = reply->tag_list;
-
-	if (get_ldev(mdev)) {
-		tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
-		tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
+	if (0)
+nla_put_failure:
+		err = -EMSGSIZE;
+	if (got_ldev)
 		put_ldev(mdev);
-	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	return (int)((char *)tl - (char *)reply->tag_list);
+	return err;
 }
 
-/**
- * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
- * @mdev:	DRBD device.
- * @nlp:	Netlink/connector packet from drbdsetup
- * @reply:	Reply packet for drbdsetup
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
+	}
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct drbd_conf *mdev;
+	struct drbd_genlmsghdr *dh;
+	struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0];
+	struct drbd_tconn *tconn = NULL;
+	struct drbd_tconn *tmp;
+	unsigned volume = cb->args[1];
+
+	/* Open coded, deferred, iteration:
+	 * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
+	 *	idr_for_each_entry(&tconn->volumes, mdev, i) {
+	 *	  ...
+	 *	}
+	 * }
+	 * where tconn is cb->args[0];
+	 * and i is cb->args[1];
+	 *
+	 * cb->args[2] indicates if we shall loop over all resources,
+	 * or just dump all volumes of a single resource.
+	 *
+	 * This may miss entries inserted after this dump started,
+	 * or entries deleted before they are reached.
+	 *
+	 * We need to make sure the mdev won't disappear while
+	 * we are looking at it, and revalidate our iterators
+	 * on each iteration.
+	 */
+
+	/* synchronize with conn_create()/conn_destroy() */
+	rcu_read_lock();
+	/* revalidate iterator position */
+	list_for_each_entry_rcu(tmp, &drbd_tconns, all_tconn) {
+		if (pos == NULL) {
+			/* first iteration */
+			pos = tmp;
+			tconn = pos;
+			break;
+		}
+		if (tmp == pos) {
+			tconn = pos;
+			break;
+		}
+	}
+	if (tconn) {
+next_tconn:
+		mdev = idr_get_next(&tconn->volumes, &volume);
+		if (!mdev) {
+			/* No more volumes to dump on this tconn.
+			 * Advance tconn iterator. */
+			pos = list_entry_rcu(tconn->all_tconn.next,
+					     struct drbd_tconn, all_tconn);
+			/* Did we dump any volume on this tconn yet? */
+			if (volume != 0) {
+				/* If we reached the end of the list,
+				 * or only a single resource dump was requested,
+				 * we are done. */
+				if (&pos->all_tconn == &drbd_tconns || cb->args[2])
+					goto out;
+				volume = 0;
+				tconn = pos;
+				goto next_tconn;
+			}
+		}
+
+		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+				cb->nlh->nlmsg_seq, &drbd_genl_family,
+				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
+		if (!dh)
+			goto out;
+
+		if (!mdev) {
+			/* This is a tconn without a single volume.
+			 * Suprisingly enough, it may have a network
+			 * configuration. */
+			struct net_conf *nc;
+			dh->minor = -1U;
+			dh->ret_code = NO_ERROR;
+			if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED))
+				goto cancel;
+			nc = rcu_dereference(tconn->net_conf);
+			if (nc && net_conf_to_skb(skb, nc, 1) != 0)
+				goto cancel;
+			goto done;
+		}
+
+		D_ASSERT(mdev->vnr == volume);
+		D_ASSERT(mdev->tconn == tconn);
+
+		dh->minor = mdev_to_minor(mdev);
+		dh->ret_code = NO_ERROR;
+
+		if (nla_put_status_info(skb, mdev, NULL)) {
+cancel:
+			genlmsg_cancel(skb, dh);
+			goto out;
+		}
+done:
+		genlmsg_end(skb, dh);
+        }
+
+out:
+	rcu_read_unlock();
+	/* where to start the next iteration */
+        cb->args[0] = (long)pos;
+        cb->args[1] = (pos == tconn) ? volume + 1 : 0;
+
+	/* No more tconns/volumes/minors found results in an empty skb.
+	 * Which will terminate the dump. */
+        return skb->len;
+}
+
+/*
+ * Request status of all resources, or of all volumes within a single resource.
+ *
+ * This is a dump, as the answer may not fit in a single reply skb otherwise.
+ * Which means we cannot use the family->attrbuf or other such members, because
+ * dump is NOT protected by the genl_lock().  During dump, we only have access
+ * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
+ *
+ * Once things are setup properly, we call into get_one_status().
  */
-static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	unsigned short *tl;
-	char rv;
+	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
+	struct nlattr *nla;
+	const char *resource_name;
+	struct drbd_tconn *tconn;
+	int maxtype;
 
-	tl = reply->tag_list;
+	/* Is this a followup call? */
+	if (cb->args[0]) {
+		/* ... of a single resource dump,
+		 * and the resource iterator has been advanced already? */
+		if (cb->args[2] && cb->args[2] != cb->args[0])
+			return 0; /* DONE. */
+		goto dump;
+	}
 
-	rv = mdev->state.pdsk == D_OUTDATED        ? UT_PEER_OUTDATED :
-	  test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT;
+	/* First call (from netlink_dump_start).  We need to figure out
+	 * which resource(s) the user wants us to dump. */
+	nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
+			nlmsg_attrlen(cb->nlh, hdrlen),
+			DRBD_NLA_CFG_CONTEXT);
 
-	tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	/* No explicit context given.  Dump all. */
+	if (!nla)
+		goto dump;
+	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
+	if (IS_ERR(nla))
+		return PTR_ERR(nla);
+	/* context given, but no name present? */
+	if (!nla)
+		return -EINVAL;
+	resource_name = nla_data(nla);
+	tconn = conn_get_by_name(resource_name);
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	if (!tconn)
+		return -ENODEV;
+
+	kref_put(&tconn->kref, &conn_destroy); /* get_one_status() (re)validates tconn by itself */
+
+	/* prime iterators, and set "filter" mode mark:
+	 * only dump this tconn. */
+	cb->args[0] = (long)tconn;
+	/* cb->args[1] = 0; passed in this way. */
+	cb->args[2] = (long)tconn;
+
+dump:
+	return get_one_status(skb, cb);
 }
 
-static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
 {
-	/* default to resume from last known position, if possible */
-	struct start_ov args =
-		{ .start_sector = mdev->ov_start_sector };
+	enum drbd_ret_code retcode;
+	struct timeout_parms tp;
+	int err;
 
-	if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	tp.timeout_type =
+		adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
+		test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
+		UT_DEFAULT;
+
+	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
 	}
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
+{
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
+	struct start_ov_parms parms;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+
+	/* resume from last known position, if possible */
+	parms.ov_start_sector = mdev->ov_start_sector;
+	parms.ov_stop_sector = ULLONG_MAX;
+	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
+		int err = start_ov_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+	}
+	/* w_make_ov_request expects position to be aligned */
+	mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
+	mdev->ov_stop_sector = parms.ov_stop_sector;
 
 	/* If there is still bitmap IO pending, e.g. previous resync or verify
 	 * just being finished, wait for it before requesting a new resync. */
 	drbd_suspend_io(mdev);
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
-
-	/* w_make_ov_request expects position to be aligned */
-	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
-	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+	retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
 	drbd_resume_io(mdev);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
 
-static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	int skip_initial_sync = 0;
 	int err;
+	struct new_c_uuid_parms args;
 
-	struct new_c_uuid args;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out_nolock;
 
-	memset(&args, 0, sizeof(struct new_c_uuid));
-	if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
+	mdev = adm_ctx.mdev;
+	memset(&args, 0, sizeof(args));
+	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
+		err = new_c_uuid_parms_from_attrs(&args, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out_nolock;
+		}
 	}
 
-	mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */
+	mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
 
 	if (!get_ldev(mdev)) {
 		retcode = ERR_NO_DISK;
@@ -2250,7 +3018,7 @@
 	}
 
 	/* this is "skip initial sync", assume to be clean */
-	if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 &&
+	if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 &&
 	    mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
 		dev_info(DEV, "Preparing to skip initial sync\n");
 		skip_initial_sync = 1;
@@ -2273,10 +3041,10 @@
 			drbd_send_uuids_skip_initial_sync(mdev);
 			_drbd_uuid_set(mdev, UI_BITMAP, 0);
 			drbd_print_uuids(mdev, "cleared bitmap UUID");
-			spin_lock_irq(&mdev->req_lock);
+			spin_lock_irq(&mdev->tconn->req_lock);
 			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
 					CS_VERBOSE, NULL);
-			spin_unlock_irq(&mdev->req_lock);
+			spin_unlock_irq(&mdev->tconn->req_lock);
 		}
 	}
 
@@ -2284,416 +3052,284 @@
 out_dec:
 	put_ldev(mdev);
 out:
-	mutex_unlock(&mdev->state_mutex);
-
-	reply->ret_code = retcode;
+	mutex_unlock(mdev->state_mutex);
+out_nolock:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-struct cn_handler_struct {
-	int (*function)(struct drbd_conf *,
-			 struct drbd_nl_cfg_req *,
-			 struct drbd_nl_cfg_reply *);
-	int reply_body_size;
-};
-
-static struct cn_handler_struct cnd_table[] = {
-	[ P_primary ]		= { &drbd_nl_primary,		0 },
-	[ P_secondary ]		= { &drbd_nl_secondary,		0 },
-	[ P_disk_conf ]		= { &drbd_nl_disk_conf,		0 },
-	[ P_detach ]		= { &drbd_nl_detach,		0 },
-	[ P_net_conf ]		= { &drbd_nl_net_conf,		0 },
-	[ P_disconnect ]	= { &drbd_nl_disconnect,	0 },
-	[ P_resize ]		= { &drbd_nl_resize,		0 },
-	[ P_syncer_conf ]	= { &drbd_nl_syncer_conf,	0 },
-	[ P_invalidate ]	= { &drbd_nl_invalidate,	0 },
-	[ P_invalidate_peer ]	= { &drbd_nl_invalidate_peer,	0 },
-	[ P_pause_sync ]	= { &drbd_nl_pause_sync,	0 },
-	[ P_resume_sync ]	= { &drbd_nl_resume_sync,	0 },
-	[ P_suspend_io ]	= { &drbd_nl_suspend_io,	0 },
-	[ P_resume_io ]		= { &drbd_nl_resume_io,		0 },
-	[ P_outdate ]		= { &drbd_nl_outdate,		0 },
-	[ P_get_config ]	= { &drbd_nl_get_config,
-				    sizeof(struct syncer_conf_tag_len_struct) +
-				    sizeof(struct disk_conf_tag_len_struct) +
-				    sizeof(struct net_conf_tag_len_struct) },
-	[ P_get_state ]		= { &drbd_nl_get_state,
-				    sizeof(struct get_state_tag_len_struct) +
-				    sizeof(struct sync_progress_tag_len_struct)	},
-	[ P_get_uuids ]		= { &drbd_nl_get_uuids,
-				    sizeof(struct get_uuids_tag_len_struct) },
-	[ P_get_timeout_flag ]	= { &drbd_nl_get_timeout_flag,
-				    sizeof(struct get_timeout_flag_tag_len_struct)},
-	[ P_start_ov ]		= { &drbd_nl_start_ov,		0 },
-	[ P_new_c_uuid ]	= { &drbd_nl_new_c_uuid,	0 },
-};
-
-static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
+static enum drbd_ret_code
+drbd_check_resource_name(const char *name)
 {
-	struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
-	struct cn_handler_struct *cm;
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	struct drbd_conf *mdev;
-	int retcode, rr;
-	int reply_size = sizeof(struct cn_msg)
-		+ sizeof(struct drbd_nl_cfg_reply)
-		+ sizeof(short int);
+	if (!name || !name[0]) {
+		drbd_msg_put_info("resource name missing");
+		return ERR_MANDATORY_TAG;
+	}
+	/* if we want to use these in sysfs/configfs/debugfs some day,
+	 * we must not allow slashes */
+	if (strchr(name, '/')) {
+		drbd_msg_put_info("invalid resource name");
+		return ERR_INVALID_REQUEST;
+	}
+	return NO_ERROR;
+}
 
-	if (!try_module_get(THIS_MODULE)) {
-		printk(KERN_ERR "drbd: try_module_get() failed!\n");
-		return;
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct res_opts res_opts;
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	set_res_opts_defaults(&res_opts);
+	err = res_opts_from_attrs(&res_opts, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto out;
 	}
 
-	if (!capable(CAP_SYS_ADMIN)) {
-		retcode = ERR_PERM;
-		goto fail;
+	retcode = drbd_check_resource_name(adm_ctx.resource_name);
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (adm_ctx.tconn) {
+		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
+			retcode = ERR_INVALID_REQUEST;
+			drbd_msg_put_info("resource exists");
+		}
+		/* else: still NO_ERROR */
+		goto out;
 	}
 
-	mdev = ensure_mdev(nlp->drbd_minor,
-			(nlp->flags & DRBD_NL_CREATE_DEVICE));
-	if (!mdev) {
-		retcode = ERR_MINOR_INVALID;
-		goto fail;
-	}
-
-	if (nlp->packet_type >= P_nl_after_last_packet ||
-	    nlp->packet_type == P_return_code_only) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
-	}
-
-	cm = cnd_table + nlp->packet_type;
-
-	/* This may happen if packet number is 0: */
-	if (cm->function == NULL) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
-	}
-
-	reply_size += cm->reply_body_size;
-
-	/* allocation not in the IO path, cqueue thread context */
-	cn_reply = kzalloc(reply_size, GFP_KERNEL);
-	if (!cn_reply) {
+	if (!conn_create(adm_ctx.resource_name, &res_opts))
 		retcode = ERR_NOMEM;
-		goto fail;
-	}
-	reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
-
-	reply->packet_type =
-		cm->reply_body_size ? nlp->packet_type : P_return_code_only;
-	reply->minor = nlp->drbd_minor;
-	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
-	/* reply->tag_list; might be modified by cm->function. */
-
-	rr = cm->function(mdev, nlp, reply);
-
-	cn_reply->id = req->id;
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
-	cn_reply->flags = 0;
-
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
-
-	kfree(cn_reply);
-	module_put(THIS_MODULE);
-	return;
- fail:
-	drbd_nl_send_reply(req, retcode);
-	module_put(THIS_MODULE);
-}
-
-static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
-
-static unsigned short *
-__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
-	unsigned short len, int nul_terminated)
-{
-	unsigned short l = tag_descriptions[tag_number(tag)].max_len;
-	len = (len < l) ? len :  l;
-	put_unaligned(tag, tl++);
-	put_unaligned(len, tl++);
-	memcpy(tl, data, len);
-	tl = (unsigned short*)((char*)tl + len);
-	if (nul_terminated)
-		*((char*)tl - 1) = 0;
-	return tl;
-}
-
-static unsigned short *
-tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
-{
-	return __tl_add_blob(tl, tag, data, len, 0);
-}
-
-static unsigned short *
-tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
-{
-	return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
-}
-
-static unsigned short *
-tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
-{
-	put_unaligned(tag, tl++);
-	switch(tag_type(tag)) {
-	case TT_INTEGER:
-		put_unaligned(sizeof(int), tl++);
-		put_unaligned(*(int *)val, (int *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(int));
-		break;
-	case TT_INT64:
-		put_unaligned(sizeof(u64), tl++);
-		put_unaligned(*(u64 *)val, (u64 *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(u64));
-		break;
-	default:
-		/* someone did something stupid. */
-		;
-	}
-	return tl;
-}
-
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct get_state_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = get_state_to_tags(mdev, (struct get_state *)&state, tl);
-
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_get_state;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-}
-
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct call_helper_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = tl_add_str(tl, T_helper, helper_name);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_call_helper;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-}
-
-void drbd_bcast_ee(struct drbd_conf *mdev,
-		const char *reason, const int dgs,
-		const char* seen_hash, const char* calc_hash,
-		const struct drbd_epoch_entry* e)
-{
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	unsigned short *tl;
-	struct page *page;
-	unsigned len;
-
-	if (!e)
-		return;
-	if (!reason || !reason[0])
-		return;
-
-	/* apparently we have to memcpy twice, first to prepare the data for the
-	 * struct cn_msg, then within cn_netlink_send from the cn_msg to the
-	 * netlink skb. */
-	/* receiver thread context, which is not in the writeout path (of this node),
-	 * but may be in the writeout path of the _other_ node.
-	 * GFP_NOIO to avoid potential "distributed deadlock". */
-	cn_reply = kzalloc(
-		sizeof(struct cn_msg)+
-		sizeof(struct drbd_nl_cfg_reply)+
-		sizeof(struct dump_ee_tag_len_struct)+
-		sizeof(short int),
-		GFP_NOIO);
-
-	if (!cn_reply) {
-		dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n",
-				(unsigned long long)e->sector, e->size);
-		return;
-	}
-
-	reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
-	tl = reply->tag_list;
-
-	tl = tl_add_str(tl, T_dump_ee_reason, reason);
-	tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
-	tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
-	tl = tl_add_int(tl, T_ee_sector, &e->sector);
-	tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
-
-	/* dump the first 32k */
-	len = min_t(unsigned, e->size, 32 << 10);
-	put_unaligned(T_ee_data, tl++);
-	put_unaligned(len, tl++);
-
-	page = e->pages;
-	page_chain_for_each(page) {
-		void *d = kmap_atomic(page);
-		unsigned l = min_t(unsigned, len, PAGE_SIZE);
-		memcpy(tl, d, l);
-		kunmap_atomic(d);
-		tl = (unsigned short*)((char*)tl + l);
-		len -= l;
-		if (len == 0)
-			break;
-	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_add_return(1,&drbd_nl_seq);
-	cn_reply->ack = 0; // not used here.
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char*)tl - (char*)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_dump_ee;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	kfree(cn_reply);
-}
-
-void drbd_bcast_sync_progress(struct drbd_conf *mdev)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct sync_progress_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-	unsigned long rs_left;
-	unsigned int res;
-
-	/* no local ref, no bitmap, no syncer progress, no broadcast. */
-	if (!get_ldev(mdev))
-		return;
-	drbd_get_syncer_progress(mdev, &rs_left, &res);
-	put_ldev(mdev);
-
-	tl = tl_add_int(tl, T_sync_progress, &res);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_sync_progress;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-}
-
-int __init drbd_nl_init(void)
-{
-	static struct cb_id cn_id_drbd;
-	int err, try=10;
-
-	cn_id_drbd.val = CN_VAL_DRBD;
-	do {
-		cn_id_drbd.idx = cn_idx;
-		err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
-		if (!err)
-			break;
-		cn_idx = (cn_idx + CN_IDX_STEP);
-	} while (try--);
-
-	if (err) {
-		printk(KERN_ERR "drbd: cn_drbd failed to register\n");
-		return err;
-	}
-
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-void drbd_nl_cleanup(void)
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	static struct cb_id cn_id_drbd;
+	struct drbd_genlmsghdr *dh = info->userhdr;
+	enum drbd_ret_code retcode;
 
-	cn_id_drbd.idx = cn_idx;
-	cn_id_drbd.val = CN_VAL_DRBD;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	cn_del_callback(&cn_id_drbd);
+	if (dh->minor > MINORMASK) {
+		drbd_msg_put_info("requested minor out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
+		drbd_msg_put_info("requested volume id out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+
+	/* drbd_adm_prepare made sure already
+	 * that mdev->tconn and mdev->vnr match the request. */
+	if (adm_ctx.mdev) {
+		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
+			retcode = ERR_MINOR_EXISTS;
+		/* else: still NO_ERROR */
+		goto out;
+	}
+
+	retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
+static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
 {
-	char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	int rr;
-
-	memset(buffer, 0, sizeof(buffer));
-	cn_reply->id = req->id;
-
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_return_code_only;
-	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
-	reply->ret_code = ret_code;
-
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+	if (mdev->state.disk == D_DISKLESS &&
+	    /* no need to be mdev->state.conn == C_STANDALONE &&
+	     * we may want to delete a minor from a live replication group.
+	     */
+	    mdev->state.role == R_SECONDARY) {
+		_drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS),
+				    CS_VERBOSE + CS_WAIT_COMPLETE);
+		idr_remove(&mdev->tconn->volumes, mdev->vnr);
+		idr_remove(&minors, mdev_to_minor(mdev));
+		del_gendisk(mdev->vdisk);
+		synchronize_rcu();
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		return NO_ERROR;
+	} else
+		return ERR_MINOR_CONFIGURED;
 }
 
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	retcode = adm_delete_minor(adm_ctx.mdev);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
+{
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+	struct drbd_conf *mdev;
+	unsigned i;
+
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (!adm_ctx.tconn) {
+		retcode = ERR_RES_NOT_KNOWN;
+		goto out;
+	}
+
+	/* demote */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = drbd_set_role(mdev, R_SECONDARY, 0);
+		if (retcode < SS_SUCCESS) {
+			drbd_msg_put_info("failed to demote");
+			goto out;
+		}
+	}
+
+	retcode = conn_try_disconnect(adm_ctx.tconn, 0);
+	if (retcode < SS_SUCCESS) {
+		drbd_msg_put_info("failed to disconnect");
+		goto out;
+	}
+
+	/* detach */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = adm_detach(mdev, 0);
+		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
+			drbd_msg_put_info("failed to detach");
+			goto out;
+		}
+	}
+
+	/* If we reach this, all volumes (of this tconn) are Secondary,
+	 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
+	 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
+	drbd_thread_stop(&adm_ctx.tconn->worker);
+
+	/* Now, nothing can fail anymore */
+
+	/* delete volumes */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = adm_delete_minor(mdev);
+		if (retcode != NO_ERROR) {
+			/* "can not happen" */
+			drbd_msg_put_info("failed to delete volume");
+			goto out;
+		}
+	}
+
+	/* delete connection */
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		list_del_rcu(&adm_ctx.tconn->all_tconn);
+		synchronize_rcu();
+		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
+
+		retcode = NO_ERROR;
+	} else {
+		/* "can not happen" */
+		retcode = ERR_RES_IN_USE;
+		drbd_msg_put_info("failed to delete connection");
+	}
+	goto out;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		list_del_rcu(&adm_ctx.tconn->all_tconn);
+		synchronize_rcu();
+		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
+
+		retcode = NO_ERROR;
+	} else {
+		retcode = ERR_RES_IN_USE;
+	}
+
+	if (retcode == NO_ERROR)
+		drbd_thread_stop(&adm_ctx.tconn->worker);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
+{
+	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
+	struct sk_buff *msg;
+	struct drbd_genlmsghdr *d_out;
+	unsigned seq;
+	int err = -ENOMEM;
+
+	if (sib->sib_reason == SIB_SYNC_PROGRESS) {
+		if (time_after(jiffies, mdev->rs_last_bcast + HZ))
+			mdev->rs_last_bcast = jiffies;
+		else
+			return;
+	}
+
+	seq = atomic_inc_return(&drbd_genl_seq);
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+	if (!msg)
+		goto failed;
+
+	err = -EMSGSIZE;
+	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
+	if (!d_out) /* cannot happen, but anyways. */
+		goto nla_put_failure;
+	d_out->minor = mdev_to_minor(mdev);
+	d_out->ret_code = NO_ERROR;
+
+	if (nla_put_status_info(msg, mdev, sib))
+		goto nla_put_failure;
+	genlmsg_end(msg, d_out);
+	err = drbd_genl_multicast_events(msg, 0);
+	/* msg has been consumed or freed in netlink_broadcast() */
+	if (err && err != -ESRCH)
+		goto failed;
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+failed:
+	dev_err(DEV, "Error %d while broadcasting event. "
+			"Event seq:%u sib_reason:%u\n",
+			err, seq, sib->sib_reason);
+}

diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c
new file mode 100644
index 0000000..fa672b6d
--- /dev/null
+++ b/drivers/block/drbd/drbd_nla.c

@@ -0,0 +1,55 @@
+#include "drbd_wrappers.h"
+#include <linux/kernel.h>
+#include <net/netlink.h>
+#include <linux/drbd_genl_api.h>
+#include "drbd_nla.h"
+
+static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
+{
+	struct nlattr *head = nla_data(nla);
+	int len = nla_len(nla);
+	int rem;
+
+	/*
+	 * validate_nla (called from nla_parse_nested) ignores attributes
+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
+	 * flag set also, check and remove that flag before calling
+	 * nla_parse_nested.
+	 */
+
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
+			if (nla_type(nla) > maxtype)
+				return -EOPNOTSUPP;
+		}
+	}
+	return 0;
+}
+
+int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+			  const struct nla_policy *policy)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (!err)
+		err = nla_parse_nested(tb, maxtype, nla, policy);
+
+	return err;
+}
+
+struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
+{
+	int err;
+	/*
+	 * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
+	 * we don't know about that attribute, reject all the nested
+	 * attributes.
+	 */
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (err)
+		return ERR_PTR(err);
+	return nla_find_nested(nla, attrtype);
+}

diff --git a/drivers/block/drbd/drbd_nla.h b/drivers/block/drbd/drbd_nla.h
new file mode 100644
index 0000000..679c2d5
--- /dev/null
+++ b/drivers/block/drbd/drbd_nla.h

@@ -0,0 +1,8 @@
+#ifndef __DRBD_NLA_H
+#define __DRBD_NLA_H
+
+extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+				 const struct nla_policy *policy);
+extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
+
+#endif  /* __DRBD_NLA_H */

diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 5496104..56672a6 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c

@@ -167,18 +167,24 @@
 		 * we convert to sectors in the display below. */
 		unsigned long bm_bits = drbd_bm_bits(mdev);
 		unsigned long bit_pos;
+		unsigned long long stop_sector = 0;
 		if (mdev->state.conn == C_VERIFY_S ||
-		    mdev->state.conn == C_VERIFY_T)
+		    mdev->state.conn == C_VERIFY_T) {
 			bit_pos = bm_bits - mdev->ov_left;
-		else
+			if (verify_can_do_stop_sector(mdev))
+				stop_sector = mdev->ov_stop_sector;
+		} else
 			bit_pos = mdev->bm_resync_fo;
 		/* Total sectors may be slightly off for oddly
 		 * sized devices. So what. */
 		seq_printf(seq,
-			"\t%3d%% sector pos: %llu/%llu\n",
+			"\t%3d%% sector pos: %llu/%llu",
 			(int)(bit_pos / (bm_bits/100+1)),
 			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
+			seq_printf(seq, " stop sector: %llu", stop_sector);
+		seq_printf(seq, "\n");
 	}
 }
 
@@ -194,9 +200,11 @@
 
 static int drbd_seq_show(struct seq_file *seq, void *v)
 {
-	int i, hole = 0;
+	int i, prev_i = -1;
 	const char *sn;
 	struct drbd_conf *mdev;
+	struct net_conf *nc;
+	char wp;
 
 	static char write_ordering_chars[] = {
 		[WO_none] = 'n',
@@ -227,16 +235,11 @@
 	 oos .. known out-of-sync kB
 	*/
 
-	for (i = 0; i < minor_count; i++) {
-		mdev = minor_to_mdev(i);
-		if (!mdev) {
-			hole = 1;
-			continue;
-		}
-		if (hole) {
-			hole = 0;
+	rcu_read_lock();
+	idr_for_each_entry(&minors, mdev, i) {
+		if (prev_i != i - 1)
 			seq_printf(seq, "\n");
-		}
+		prev_i = i;
 
 		sn = drbd_conn_str(mdev->state.conn);
 
@@ -248,6 +251,8 @@
 			/* reset mdev->congestion_reason */
 			bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
 
+			nc = rcu_dereference(mdev->tconn->net_conf);
+			wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
 			seq_printf(seq,
 			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
@@ -257,9 +262,8 @@
 			   drbd_role_str(mdev->state.peer),
 			   drbd_disk_str(mdev->state.disk),
 			   drbd_disk_str(mdev->state.pdsk),
-			   (mdev->net_conf == NULL ? ' ' :
-			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
-			   is_susp(mdev->state) ? 's' : 'r',
+			   wp,
+			   drbd_suspended(mdev) ? 's' : 'r',
 			   mdev->state.aftr_isp ? 'a' : '-',
 			   mdev->state.peer_isp ? 'p' : '-',
 			   mdev->state.user_isp ? 'u' : '-',
@@ -276,8 +280,8 @@
 			   atomic_read(&mdev->rs_pending_cnt),
 			   atomic_read(&mdev->unacked_cnt),
 			   atomic_read(&mdev->ap_bio_cnt),
-			   mdev->epochs,
-			   write_ordering_chars[mdev->write_ordering]
+			   mdev->tconn->epochs,
+			   write_ordering_chars[mdev->tconn->write_ordering]
 			);
 			seq_printf(seq, " oos:%llu\n",
 				   Bit2KB((unsigned long long)
@@ -302,6 +306,7 @@
 			}
 		}
 	}
+	rcu_read_unlock();
 
 	return 0;
 }

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c74ca2d..a9eccfc 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c

@@ -48,17 +48,25 @@
 
 #include "drbd_vli.h"
 
+struct packet_info {
+	enum drbd_packet cmd;
+	unsigned int size;
+	unsigned int vnr;
+	void *data;
+};
+
 enum finish_epoch {
 	FE_STILL_LIVE,
 	FE_DESTROYED,
 	FE_RECYCLED,
 };
 
-static int drbd_do_handshake(struct drbd_conf *mdev);
-static int drbd_do_auth(struct drbd_conf *mdev);
+static int drbd_do_features(struct drbd_tconn *tconn);
+static int drbd_do_auth(struct drbd_tconn *tconn);
+static int drbd_disconnected(struct drbd_conf *mdev);
 
-static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
-static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
+static int e_end_block(struct drbd_work *, int);
 
 
 #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
@@ -142,11 +150,12 @@
 	*head = chain_first;
 }
 
-static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
+static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
+				       unsigned int number)
 {
 	struct page *page = NULL;
 	struct page *tmp = NULL;
-	int i = 0;
+	unsigned int i = 0;
 
 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
 	 * So what. It saves a spin_lock. */
@@ -175,7 +184,7 @@
 		return page;
 
 	/* Not enough pages immediately available this time.
-	 * No need to jump around here, drbd_pp_alloc will retry this
+	 * No need to jump around here, drbd_alloc_pages will retry this
 	 * function "soon". */
 	if (page) {
 		tmp = page_chain_tail(page, NULL);
@@ -187,9 +196,10 @@
 	return NULL;
 }
 
-static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
+static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
+					   struct list_head *to_be_freed)
 {
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 	struct list_head *le, *tle;
 
 	/* The EEs are always appended to the end of the list. Since
@@ -198,8 +208,8 @@
 	   stop to examine the list... */
 
 	list_for_each_safe(le, tle, &mdev->net_ee) {
-		e = list_entry(le, struct drbd_epoch_entry, w.list);
-		if (drbd_ee_has_active_page(e))
+		peer_req = list_entry(le, struct drbd_peer_request, w.list);
+		if (drbd_peer_req_has_active_page(peer_req))
 			break;
 		list_move(le, to_be_freed);
 	}
@@ -208,18 +218,18 @@
 static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
 {
 	LIST_HEAD(reclaimed);
-	struct drbd_epoch_entry *e, *t;
+	struct drbd_peer_request *peer_req, *t;
 
-	spin_lock_irq(&mdev->req_lock);
-	reclaim_net_ee(mdev, &reclaimed);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	reclaim_finished_net_peer_reqs(mdev, &reclaimed);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	list_for_each_entry_safe(e, t, &reclaimed, w.list)
-		drbd_free_net_ee(mdev, e);
+	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
+		drbd_free_net_peer_req(mdev, peer_req);
 }
 
 /**
- * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
+ * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
  * @mdev:	DRBD device.
  * @number:	number of pages requested
  * @retry:	whether to retry, if not enough pages are available right now
@@ -230,23 +240,31 @@
  *
  * Returns a page chain linked via page->private.
  */
-static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
+struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
+			      bool retry)
 {
 	struct page *page = NULL;
+	struct net_conf *nc;
 	DEFINE_WAIT(wait);
+	int mxb;
 
 	/* Yes, we may run up to @number over max_buffers. If we
 	 * follow it strictly, the admin will get it wrong anyways. */
-	if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers)
-		page = drbd_pp_first_pages_or_try_alloc(mdev, number);
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	mxb = nc ? nc->max_buffers : 1000000;
+	rcu_read_unlock();
+
+	if (atomic_read(&mdev->pp_in_use) < mxb)
+		page = __drbd_alloc_pages(mdev, number);
 
 	while (page == NULL) {
 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
 
 		drbd_kick_lo_and_reclaim_net(mdev);
 
-		if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
-			page = drbd_pp_first_pages_or_try_alloc(mdev, number);
+		if (atomic_read(&mdev->pp_in_use) < mxb) {
+			page = __drbd_alloc_pages(mdev, number);
 			if (page)
 				break;
 		}
@@ -255,7 +273,7 @@
 			break;
 
 		if (signal_pending(current)) {
-			dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
+			dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
 			break;
 		}
 
@@ -268,11 +286,11 @@
 	return page;
 }
 
-/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
- * Is also used from inside an other spin_lock_irq(&mdev->req_lock);
+/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
+ * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
  * Either links the page chain back to the global pool,
  * or returns all pages to the system. */
-static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
+static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
 {
 	atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
 	int i;
@@ -280,7 +298,7 @@
 	if (page == NULL)
 		return;
 
-	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
+	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
 		i = page_chain_free(page);
 	else {
 		struct page *tmp;
@@ -302,127 +320,130 @@
  _drbd_wait_ee_list_empty()
 
 You must not have the req_lock:
- drbd_free_ee()
- drbd_alloc_ee()
- drbd_init_ee()
- drbd_release_ee()
+ drbd_free_peer_req()
+ drbd_alloc_peer_req()
+ drbd_free_peer_reqs()
  drbd_ee_fix_bhs()
- drbd_process_done_ee()
+ drbd_finish_peer_reqs()
  drbd_clear_done_ee()
  drbd_wait_ee_list_empty()
 */
 
-struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
-				     u64 id,
-				     sector_t sector,
-				     unsigned int data_size,
-				     gfp_t gfp_mask) __must_hold(local)
+struct drbd_peer_request *
+drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
+		    unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
 {
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 	struct page *page = NULL;
 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
 
 	if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
 		return NULL;
 
-	e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
-	if (!e) {
+	peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
+	if (!peer_req) {
 		if (!(gfp_mask & __GFP_NOWARN))
-			dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
+			dev_err(DEV, "%s: allocation failed\n", __func__);
 		return NULL;
 	}
 
 	if (data_size) {
-		page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
+		page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
 		if (!page)
 			goto fail;
 	}
 
-	INIT_HLIST_NODE(&e->collision);
-	e->epoch = NULL;
-	e->mdev = mdev;
-	e->pages = page;
-	atomic_set(&e->pending_bios, 0);
-	e->size = data_size;
-	e->flags = 0;
-	e->sector = sector;
-	e->block_id = id;
+	drbd_clear_interval(&peer_req->i);
+	peer_req->i.size = data_size;
+	peer_req->i.sector = sector;
+	peer_req->i.local = false;
+	peer_req->i.waiting = false;
 
-	return e;
+	peer_req->epoch = NULL;
+	peer_req->w.mdev = mdev;
+	peer_req->pages = page;
+	atomic_set(&peer_req->pending_bios, 0);
+	peer_req->flags = 0;
+	/*
+	 * The block_id is opaque to the receiver.  It is not endianness
+	 * converted, and sent back to the sender unchanged.
+	 */
+	peer_req->block_id = id;
+
+	return peer_req;
 
  fail:
-	mempool_free(e, drbd_ee_mempool);
+	mempool_free(peer_req, drbd_ee_mempool);
 	return NULL;
 }
 
-void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net)
+void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
+		       int is_net)
 {
-	if (e->flags & EE_HAS_DIGEST)
-		kfree(e->digest);
-	drbd_pp_free(mdev, e->pages, is_net);
-	D_ASSERT(atomic_read(&e->pending_bios) == 0);
-	D_ASSERT(hlist_unhashed(&e->collision));
-	mempool_free(e, drbd_ee_mempool);
+	if (peer_req->flags & EE_HAS_DIGEST)
+		kfree(peer_req->digest);
+	drbd_free_pages(mdev, peer_req->pages, is_net);
+	D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
+	D_ASSERT(drbd_interval_empty(&peer_req->i));
+	mempool_free(peer_req, drbd_ee_mempool);
 }
 
-int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
+int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
 {
 	LIST_HEAD(work_list);
-	struct drbd_epoch_entry *e, *t;
+	struct drbd_peer_request *peer_req, *t;
 	int count = 0;
 	int is_net = list == &mdev->net_ee;
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	list_splice_init(list, &work_list);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	list_for_each_entry_safe(e, t, &work_list, w.list) {
-		drbd_free_some_ee(mdev, e, is_net);
+	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
+		__drbd_free_peer_req(mdev, peer_req, is_net);
 		count++;
 	}
 	return count;
 }
 
-
 /*
- * This function is called from _asender only_
- * but see also comments in _req_mod(,barrier_acked)
- * and receive_Barrier.
- *
- * Move entries from net_ee to done_ee, if ready.
- * Grab done_ee, call all callbacks, free the entries.
- * The callbacks typically send out ACKs.
+ * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
  */
-static int drbd_process_done_ee(struct drbd_conf *mdev)
+static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
 {
 	LIST_HEAD(work_list);
 	LIST_HEAD(reclaimed);
-	struct drbd_epoch_entry *e, *t;
-	int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
+	struct drbd_peer_request *peer_req, *t;
+	int err = 0;
 
-	spin_lock_irq(&mdev->req_lock);
-	reclaim_net_ee(mdev, &reclaimed);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	reclaim_finished_net_peer_reqs(mdev, &reclaimed);
 	list_splice_init(&mdev->done_ee, &work_list);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	list_for_each_entry_safe(e, t, &reclaimed, w.list)
-		drbd_free_net_ee(mdev, e);
+	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
+		drbd_free_net_peer_req(mdev, peer_req);
 
 	/* possible callbacks here:
-	 * e_end_block, and e_end_resync_block, e_send_discard_ack.
+	 * e_end_block, and e_end_resync_block, e_send_superseded.
 	 * all ignore the last argument.
 	 */
-	list_for_each_entry_safe(e, t, &work_list, w.list) {
+	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
+		int err2;
+
 		/* list_del not necessary, next/prev members not touched */
-		ok = e->w.cb(mdev, &e->w, !ok) && ok;
-		drbd_free_ee(mdev, e);
+		err2 = peer_req->w.cb(&peer_req->w, !!err);
+		if (!err)
+			err = err2;
+		drbd_free_peer_req(mdev, peer_req);
 	}
 	wake_up(&mdev->ee_wait);
 
-	return ok;
+	return err;
 }
 
-void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+				     struct list_head *head)
 {
 	DEFINE_WAIT(wait);
 
@@ -430,55 +451,22 @@
 	 * and calling prepare_to_wait in the fast path */
 	while (!list_empty(head)) {
 		prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_irq(&mdev->req_lock);
+		spin_unlock_irq(&mdev->tconn->req_lock);
 		io_schedule();
 		finish_wait(&mdev->ee_wait, &wait);
-		spin_lock_irq(&mdev->req_lock);
+		spin_lock_irq(&mdev->tconn->req_lock);
 	}
 }
 
-void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+				    struct list_head *head)
 {
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	_drbd_wait_ee_list_empty(mdev, head);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 }
 
-/* see also kernel_accept; which is only present since 2.6.18.
- * also we want to log which part of it failed, exactly */
-static int drbd_accept(struct drbd_conf *mdev, const char **what,
-		struct socket *sock, struct socket **newsock)
-{
-	struct sock *sk = sock->sk;
-	int err = 0;
-
-	*what = "listen";
-	err = sock->ops->listen(sock, 5);
-	if (err < 0)
-		goto out;
-
-	*what = "sock_create_lite";
-	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
-			       newsock);
-	if (err < 0)
-		goto out;
-
-	*what = "accept";
-	err = sock->ops->accept(sock, *newsock, 0);
-	if (err < 0) {
-		sock_release(*newsock);
-		*newsock = NULL;
-		goto out;
-	}
-	(*newsock)->ops  = sock->ops;
-	__module_get((*newsock)->ops->owner);
-
-out:
-	return err;
-}
-
-static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock,
-		    void *buf, size_t size, int flags)
+static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
 {
 	mm_segment_t oldfs;
 	struct kvec iov = {
@@ -500,59 +488,62 @@
 	return rv;
 }
 
-static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
+static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
 {
-	mm_segment_t oldfs;
-	struct kvec iov = {
-		.iov_base = buf,
-		.iov_len = size,
-	};
-	struct msghdr msg = {
-		.msg_iovlen = 1,
-		.msg_iov = (struct iovec *)&iov,
-		.msg_flags = MSG_WAITALL | MSG_NOSIGNAL
-	};
 	int rv;
 
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
+	rv = drbd_recv_short(tconn->data.socket, buf, size, 0);
 
-	for (;;) {
-		rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags);
-		if (rv == size)
-			break;
+	if (rv < 0) {
+		if (rv == -ECONNRESET)
+			conn_info(tconn, "sock was reset by peer\n");
+		else if (rv != -ERESTARTSYS)
+			conn_err(tconn, "sock_recvmsg returned %d\n", rv);
+	} else if (rv == 0) {
+		if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
+			long t;
+			rcu_read_lock();
+			t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
+			rcu_read_unlock();
 
-		/* Note:
-		 * ECONNRESET	other side closed the connection
-		 * ERESTARTSYS	(on  sock) we got a signal
-		 */
+			t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t);
 
-		if (rv < 0) {
-			if (rv == -ECONNRESET)
-				dev_info(DEV, "sock was reset by peer\n");
-			else if (rv != -ERESTARTSYS)
-				dev_err(DEV, "sock_recvmsg returned %d\n", rv);
-			break;
-		} else if (rv == 0) {
-			dev_info(DEV, "sock was shut down by peer\n");
-			break;
-		} else	{
-			/* signal came in, or peer/link went down,
-			 * after we read a partial message
-			 */
-			/* D_ASSERT(signal_pending(current)); */
-			break;
+			if (t)
+				goto out;
 		}
-	};
-
-	set_fs(oldfs);
+		conn_info(tconn, "sock was shut down by peer\n");
+	}
 
 	if (rv != size)
-		drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+		conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
 
+out:
 	return rv;
 }
 
+static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
+{
+	int err;
+
+	err = drbd_recv(tconn, buf, size);
+	if (err != size) {
+		if (err >= 0)
+			err = -EIO;
+	} else
+		err = 0;
+	return err;
+}
+
+static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
+{
+	int err;
+
+	err = drbd_recv_all(tconn, buf, size);
+	if (err && !signal_pending(current))
+		conn_warn(tconn, "short read (expected size %d)\n", (int)size);
+	return err;
+}
+
 /* quoting tcp(7):
  *   On individual connections, the socket buffer size must be set prior to the
  *   listen(2) or connect(2) calls in order to have it take effect.
@@ -572,29 +563,50 @@
 	}
 }
 
-static struct socket *drbd_try_connect(struct drbd_conf *mdev)
+static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 {
 	const char *what;
 	struct socket *sock;
 	struct sockaddr_in6 src_in6;
-	int err;
+	struct sockaddr_in6 peer_in6;
+	struct net_conf *nc;
+	int err, peer_addr_len, my_addr_len;
+	int sndbuf_size, rcvbuf_size, connect_int;
 	int disconnect_on_error = 1;
 
-	if (!get_net_conf(mdev))
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
 		return NULL;
+	}
+	sndbuf_size = nc->sndbuf_size;
+	rcvbuf_size = nc->rcvbuf_size;
+	connect_int = nc->connect_int;
+	rcu_read_unlock();
+
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
+	memcpy(&src_in6, &tconn->my_addr, my_addr_len);
+
+	if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
+		src_in6.sin6_port = 0;
+	else
+		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
+
+	peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
+	memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
 
 	what = "sock_create_kern";
-	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
-		SOCK_STREAM, IPPROTO_TCP, &sock);
+	err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
+			       SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (err < 0) {
 		sock = NULL;
 		goto out;
 	}
 
 	sock->sk->sk_rcvtimeo =
-	sock->sk->sk_sndtimeo =  mdev->net_conf->try_connect_int*HZ;
-	drbd_setbufsize(sock, mdev->net_conf->sndbuf_size,
-			mdev->net_conf->rcvbuf_size);
+	sock->sk->sk_sndtimeo = connect_int * HZ;
+	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
 
        /* explicitly bind to the configured IP as source IP
 	*  for the outgoing connections.
@@ -603,17 +615,8 @@
 	* Make sure to use 0 as port number, so linux selects
 	*  a free one dynamically.
 	*/
-	memcpy(&src_in6, mdev->net_conf->my_addr,
-	       min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6)));
-	if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6)
-		src_in6.sin6_port = 0;
-	else
-		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
-
 	what = "bind before connect";
-	err = sock->ops->bind(sock,
-			      (struct sockaddr *) &src_in6,
-			      mdev->net_conf->my_addr_len);
+	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
 	if (err < 0)
 		goto out;
 
@@ -621,9 +624,7 @@
 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
 	disconnect_on_error = 0;
 	what = "connect";
-	err = sock->ops->connect(sock,
-				 (struct sockaddr *)mdev->net_conf->peer_addr,
-				 mdev->net_conf->peer_addr_len, 0);
+	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
 
 out:
 	if (err < 0) {
@@ -641,91 +642,174 @@
 			disconnect_on_error = 0;
 			break;
 		default:
-			dev_err(DEV, "%s failed, err = %d\n", what, err);
+			conn_err(tconn, "%s failed, err = %d\n", what, err);
 		}
 		if (disconnect_on_error)
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 	}
-	put_net_conf(mdev);
+
 	return sock;
 }
 
-static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
+struct accept_wait_data {
+	struct drbd_tconn *tconn;
+	struct socket *s_listen;
+	struct completion door_bell;
+	void (*original_sk_state_change)(struct sock *sk);
+
+};
+
+static void drbd_incoming_connection(struct sock *sk)
 {
-	int timeo, err;
-	struct socket *s_estab = NULL, *s_listen;
+	struct accept_wait_data *ad = sk->sk_user_data;
+	void (*state_change)(struct sock *sk);
+
+	state_change = ad->original_sk_state_change;
+	if (sk->sk_state == TCP_ESTABLISHED)
+		complete(&ad->door_bell);
+	state_change(sk);
+}
+
+static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
+{
+	int err, sndbuf_size, rcvbuf_size, my_addr_len;
+	struct sockaddr_in6 my_addr;
+	struct socket *s_listen;
+	struct net_conf *nc;
 	const char *what;
 
-	if (!get_net_conf(mdev))
-		return NULL;
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
+		return -EIO;
+	}
+	sndbuf_size = nc->sndbuf_size;
+	rcvbuf_size = nc->rcvbuf_size;
+	rcu_read_unlock();
+
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
+	memcpy(&my_addr, &tconn->my_addr, my_addr_len);
 
 	what = "sock_create_kern";
-	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
-		SOCK_STREAM, IPPROTO_TCP, &s_listen);
+	err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
+			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
 	if (err) {
 		s_listen = NULL;
 		goto out;
 	}
 
-	timeo = mdev->net_conf->try_connect_int * HZ;
-	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
-
-	s_listen->sk->sk_reuse    = SK_CAN_REUSE; /* SO_REUSEADDR */
-	s_listen->sk->sk_rcvtimeo = timeo;
-	s_listen->sk->sk_sndtimeo = timeo;
-	drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size,
-			mdev->net_conf->rcvbuf_size);
+	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
+	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
 
 	what = "bind before listen";
-	err = s_listen->ops->bind(s_listen,
-			      (struct sockaddr *) mdev->net_conf->my_addr,
-			      mdev->net_conf->my_addr_len);
+	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
 	if (err < 0)
 		goto out;
 
-	err = drbd_accept(mdev, &what, s_listen, &s_estab);
+	ad->s_listen = s_listen;
+	write_lock_bh(&s_listen->sk->sk_callback_lock);
+	ad->original_sk_state_change = s_listen->sk->sk_state_change;
+	s_listen->sk->sk_state_change = drbd_incoming_connection;
+	s_listen->sk->sk_user_data = ad;
+	write_unlock_bh(&s_listen->sk->sk_callback_lock);
 
+	what = "listen";
+	err = s_listen->ops->listen(s_listen, 5);
+	if (err < 0)
+		goto out;
+
+	return 0;
 out:
 	if (s_listen)
 		sock_release(s_listen);
 	if (err < 0) {
 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
-			dev_err(DEV, "%s failed, err = %d\n", what, err);
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			conn_err(tconn, "%s failed, err = %d\n", what, err);
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 		}
 	}
-	put_net_conf(mdev);
+
+	return -EIO;
+}
+
+static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
+{
+	write_lock_bh(&sk->sk_callback_lock);
+	sk->sk_state_change = ad->original_sk_state_change;
+	sk->sk_user_data = NULL;
+	write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
+{
+	int timeo, connect_int, err = 0;
+	struct socket *s_estab = NULL;
+	struct net_conf *nc;
+
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
+		return NULL;
+	}
+	connect_int = nc->connect_int;
+	rcu_read_unlock();
+
+	timeo = connect_int * HZ;
+	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
+
+	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
+	if (err <= 0)
+		return NULL;
+
+	err = kernel_accept(ad->s_listen, &s_estab, 0);
+	if (err < 0) {
+		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
+			conn_err(tconn, "accept failed, err = %d\n", err);
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+		}
+	}
+
+	if (s_estab)
+		unregister_state_change(s_estab->sk, ad);
 
 	return s_estab;
 }
 
-static int drbd_send_fp(struct drbd_conf *mdev,
-	struct socket *sock, enum drbd_packets cmd)
-{
-	struct p_header80 *h = &mdev->data.sbuf.header.h80;
+static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
 
-	return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
+static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
+			     enum drbd_packet cmd)
+{
+	if (!conn_prepare_command(tconn, sock))
+		return -EIO;
+	return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
 }
 
-static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
+static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
 {
-	struct p_header80 *h = &mdev->data.rbuf.header.h80;
-	int rr;
+	unsigned int header_size = drbd_header_size(tconn);
+	struct packet_info pi;
+	int err;
 
-	rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
-
-	if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC)
-		return be16_to_cpu(h->command);
-
-	return 0xffff;
+	err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
+	if (err != header_size) {
+		if (err >= 0)
+			err = -EIO;
+		return err;
+	}
+	err = decode_header(tconn, tconn->data.rbuf, &pi);
+	if (err)
+		return err;
+	return pi.cmd;
 }
 
 /**
  * drbd_socket_okay() - Free the socket if its connection is not okay
- * @mdev:	DRBD device.
  * @sock:	pointer to the pointer to the socket.
  */
-static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
+static int drbd_socket_okay(struct socket **sock)
 {
 	int rr;
 	char tb[4];
@@ -733,7 +817,7 @@
 	if (!*sock)
 		return false;
 
-	rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
+	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
 
 	if (rr > 0 || rr == -EAGAIN) {
 		return true;
@@ -743,6 +827,31 @@
 		return false;
 	}
 }
+/* Gets called if a connection is established, or if a new minor gets created
+   in a connection */
+int drbd_connected(struct drbd_conf *mdev)
+{
+	int err;
+
+	atomic_set(&mdev->packet_seq, 0);
+	mdev->peer_seq = 0;
+
+	mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
+		&mdev->tconn->cstate_mutex :
+		&mdev->own_state_mutex;
+
+	err = drbd_send_sync_param(mdev);
+	if (!err)
+		err = drbd_send_sizes(mdev, 0, 0);
+	if (!err)
+		err = drbd_send_uuids(mdev);
+	if (!err)
+		err = drbd_send_current_state(mdev);
+	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+	clear_bit(RESIZE_PENDING, &mdev->flags);
+	mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
+	return err;
+}
 
 /*
  * return values:
@@ -752,232 +861,315 @@
  *     no point in trying again, please go standalone.
  *  -2 We do not have a network config...
  */
-static int drbd_connect(struct drbd_conf *mdev)
+static int conn_connect(struct drbd_tconn *tconn)
 {
-	struct socket *s, *sock, *msock;
-	int try, h, ok;
+	struct drbd_socket sock, msock;
+	struct drbd_conf *mdev;
+	struct net_conf *nc;
+	int vnr, timeout, h, ok;
+	bool discard_my_data;
 	enum drbd_state_rv rv;
+	struct accept_wait_data ad = {
+		.tconn = tconn,
+		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
+	};
 
-	D_ASSERT(!mdev->data.socket);
-
-	if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
+	clear_bit(DISCONNECT_SENT, &tconn->flags);
+	if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
 		return -2;
 
-	clear_bit(DISCARD_CONCURRENT, &mdev->flags);
+	mutex_init(&sock.mutex);
+	sock.sbuf = tconn->data.sbuf;
+	sock.rbuf = tconn->data.rbuf;
+	sock.socket = NULL;
+	mutex_init(&msock.mutex);
+	msock.sbuf = tconn->meta.sbuf;
+	msock.rbuf = tconn->meta.rbuf;
+	msock.socket = NULL;
 
-	sock  = NULL;
-	msock = NULL;
+	/* Assume that the peer only understands protocol 80 until we know better.  */
+	tconn->agreed_pro_version = 80;
+
+	if (prepare_listen_socket(tconn, &ad))
+		return 0;
 
 	do {
-		for (try = 0;;) {
-			/* 3 tries, this should take less than a second! */
-			s = drbd_try_connect(mdev);
-			if (s || ++try >= 3)
-				break;
-			/* give the other side time to call bind() & listen() */
-			schedule_timeout_interruptible(HZ / 10);
-		}
+		struct socket *s;
 
+		s = drbd_try_connect(tconn);
 		if (s) {
-			if (!sock) {
-				drbd_send_fp(mdev, s, P_HAND_SHAKE_S);
-				sock = s;
-				s = NULL;
-			} else if (!msock) {
-				drbd_send_fp(mdev, s, P_HAND_SHAKE_M);
-				msock = s;
-				s = NULL;
+			if (!sock.socket) {
+				sock.socket = s;
+				send_first_packet(tconn, &sock, P_INITIAL_DATA);
+			} else if (!msock.socket) {
+				clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
+				msock.socket = s;
+				send_first_packet(tconn, &msock, P_INITIAL_META);
 			} else {
-				dev_err(DEV, "Logic error in drbd_connect()\n");
+				conn_err(tconn, "Logic error in conn_connect()\n");
 				goto out_release_sockets;
 			}
 		}
 
-		if (sock && msock) {
-			schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10);
-			ok = drbd_socket_okay(mdev, &sock);
-			ok = drbd_socket_okay(mdev, &msock) && ok;
+		if (sock.socket && msock.socket) {
+			rcu_read_lock();
+			nc = rcu_dereference(tconn->net_conf);
+			timeout = nc->ping_timeo * HZ / 10;
+			rcu_read_unlock();
+			schedule_timeout_interruptible(timeout);
+			ok = drbd_socket_okay(&sock.socket);
+			ok = drbd_socket_okay(&msock.socket) && ok;
 			if (ok)
 				break;
 		}
 
 retry:
-		s = drbd_wait_for_connect(mdev);
+		s = drbd_wait_for_connect(tconn, &ad);
 		if (s) {
-			try = drbd_recv_fp(mdev, s);
-			drbd_socket_okay(mdev, &sock);
-			drbd_socket_okay(mdev, &msock);
-			switch (try) {
-			case P_HAND_SHAKE_S:
-				if (sock) {
-					dev_warn(DEV, "initial packet S crossed\n");
-					sock_release(sock);
+			int fp = receive_first_packet(tconn, s);
+			drbd_socket_okay(&sock.socket);
+			drbd_socket_okay(&msock.socket);
+			switch (fp) {
+			case P_INITIAL_DATA:
+				if (sock.socket) {
+					conn_warn(tconn, "initial packet S crossed\n");
+					sock_release(sock.socket);
+					sock.socket = s;
+					goto randomize;
 				}
-				sock = s;
+				sock.socket = s;
 				break;
-			case P_HAND_SHAKE_M:
-				if (msock) {
-					dev_warn(DEV, "initial packet M crossed\n");
-					sock_release(msock);
+			case P_INITIAL_META:
+				set_bit(RESOLVE_CONFLICTS, &tconn->flags);
+				if (msock.socket) {
+					conn_warn(tconn, "initial packet M crossed\n");
+					sock_release(msock.socket);
+					msock.socket = s;
+					goto randomize;
 				}
-				msock = s;
-				set_bit(DISCARD_CONCURRENT, &mdev->flags);
+				msock.socket = s;
 				break;
 			default:
-				dev_warn(DEV, "Error receiving initial packet\n");
+				conn_warn(tconn, "Error receiving initial packet\n");
 				sock_release(s);
+randomize:
 				if (random32() & 1)
 					goto retry;
 			}
 		}
 
-		if (mdev->state.conn <= C_DISCONNECTING)
+		if (tconn->cstate <= C_DISCONNECTING)
 			goto out_release_sockets;
 		if (signal_pending(current)) {
 			flush_signals(current);
 			smp_rmb();
-			if (get_t_state(&mdev->receiver) == Exiting)
+			if (get_t_state(&tconn->receiver) == EXITING)
 				goto out_release_sockets;
 		}
 
-		if (sock && msock) {
-			ok = drbd_socket_okay(mdev, &sock);
-			ok = drbd_socket_okay(mdev, &msock) && ok;
-			if (ok)
-				break;
-		}
-	} while (1);
+		ok = drbd_socket_okay(&sock.socket);
+		ok = drbd_socket_okay(&msock.socket) && ok;
+	} while (!ok);
 
-	msock->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
-	sock->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
+	if (ad.s_listen)
+		sock_release(ad.s_listen);
 
-	sock->sk->sk_allocation = GFP_NOIO;
-	msock->sk->sk_allocation = GFP_NOIO;
+	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
+	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
 
-	sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
-	msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
+	sock.socket->sk->sk_allocation = GFP_NOIO;
+	msock.socket->sk->sk_allocation = GFP_NOIO;
+
+	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
+	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
 
 	/* NOT YET ...
-	 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
-	 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
-	 * first set it to the P_HAND_SHAKE timeout,
+	 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
+	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	 * first set it to the P_CONNECTION_FEATURES timeout,
 	 * which we set to 4x the configured ping_timeout. */
-	sock->sk->sk_sndtimeo =
-	sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10;
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
 
-	msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
-	msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+	sock.socket->sk->sk_sndtimeo =
+	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
+
+	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
+	timeout = nc->timeout * HZ / 10;
+	discard_my_data = nc->discard_my_data;
+	rcu_read_unlock();
+
+	msock.socket->sk->sk_sndtimeo = timeout;
 
 	/* we don't want delays.
 	 * we use TCP_CORK where appropriate, though */
-	drbd_tcp_nodelay(sock);
-	drbd_tcp_nodelay(msock);
+	drbd_tcp_nodelay(sock.socket);
+	drbd_tcp_nodelay(msock.socket);
 
-	mdev->data.socket = sock;
-	mdev->meta.socket = msock;
-	mdev->last_received = jiffies;
+	tconn->data.socket = sock.socket;
+	tconn->meta.socket = msock.socket;
+	tconn->last_received = jiffies;
 
-	D_ASSERT(mdev->asender.task == NULL);
-
-	h = drbd_do_handshake(mdev);
+	h = drbd_do_features(tconn);
 	if (h <= 0)
 		return h;
 
-	if (mdev->cram_hmac_tfm) {
+	if (tconn->cram_hmac_tfm) {
 		/* drbd_request_state(mdev, NS(conn, WFAuth)); */
-		switch (drbd_do_auth(mdev)) {
+		switch (drbd_do_auth(tconn)) {
 		case -1:
-			dev_err(DEV, "Authentication of peer failed\n");
+			conn_err(tconn, "Authentication of peer failed\n");
 			return -1;
 		case 0:
-			dev_err(DEV, "Authentication of peer failed, trying again.\n");
+			conn_err(tconn, "Authentication of peer failed, trying again.\n");
 			return 0;
 		}
 	}
 
-	sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
-	sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	tconn->data.socket->sk->sk_sndtimeo = timeout;
+	tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
 
-	atomic_set(&mdev->packet_seq, 0);
-	mdev->peer_seq = 0;
-
-	if (drbd_send_protocol(mdev) == -1)
+	if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
 		return -1;
-	set_bit(STATE_SENT, &mdev->flags);
-	drbd_send_sync_param(mdev, &mdev->sync_conf);
-	drbd_send_sizes(mdev, 0, 0);
-	drbd_send_uuids(mdev);
-	drbd_send_current_state(mdev);
-	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
-	clear_bit(RESIZE_PENDING, &mdev->flags);
 
-	spin_lock_irq(&mdev->req_lock);
-	rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL);
-	if (mdev->state.conn != C_WF_REPORT_PARAMS)
-		clear_bit(STATE_SENT, &mdev->flags);
-	spin_unlock_irq(&mdev->req_lock);
+	set_bit(STATE_SENT, &tconn->flags);
 
-	if (rv < SS_SUCCESS)
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		/* Prevent a race between resync-handshake and
+		 * being promoted to Primary.
+		 *
+		 * Grab and release the state mutex, so we know that any current
+		 * drbd_set_role() is finished, and any incoming drbd_set_role
+		 * will see the STATE_SENT flag, and wait for it to be cleared.
+		 */
+		mutex_lock(mdev->state_mutex);
+		mutex_unlock(mdev->state_mutex);
+
+		rcu_read_unlock();
+
+		if (discard_my_data)
+			set_bit(DISCARD_MY_DATA, &mdev->flags);
+		else
+			clear_bit(DISCARD_MY_DATA, &mdev->flags);
+
+		drbd_connected(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+
+	rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
+	if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) {
+		clear_bit(STATE_SENT, &tconn->flags);
 		return 0;
+	}
 
-	drbd_thread_start(&mdev->asender);
-	mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
+	drbd_thread_start(&tconn->asender);
 
-	return 1;
+	mutex_lock(&tconn->conf_update);
+	/* The discard_my_data flag is a single-shot modifier to the next
+	 * connection attempt, the handshake of which is now well underway.
+	 * No need for rcu style copying of the whole struct
+	 * just to clear a single value. */
+	tconn->net_conf->discard_my_data = 0;
+	mutex_unlock(&tconn->conf_update);
+
+	return h;
 
 out_release_sockets:
-	if (sock)
-		sock_release(sock);
-	if (msock)
-		sock_release(msock);
+	if (ad.s_listen)
+		sock_release(ad.s_listen);
+	if (sock.socket)
+		sock_release(sock.socket);
+	if (msock.socket)
+		sock_release(msock.socket);
 	return -1;
 }
 
-static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size)
+static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
 {
-	union p_header *h = &mdev->data.rbuf.header;
-	int r;
+	unsigned int header_size = drbd_header_size(tconn);
 
-	r = drbd_recv(mdev, h, sizeof(*h));
-	if (unlikely(r != sizeof(*h))) {
-		if (!signal_pending(current))
-			dev_warn(DEV, "short read expecting header on sock: r=%d\n", r);
-		return false;
-	}
-
-	if (likely(h->h80.magic == BE_DRBD_MAGIC)) {
-		*cmd = be16_to_cpu(h->h80.command);
-		*packet_size = be16_to_cpu(h->h80.length);
-	} else if (h->h95.magic == BE_DRBD_MAGIC_BIG) {
-		*cmd = be16_to_cpu(h->h95.command);
-		*packet_size = be32_to_cpu(h->h95.length);
+	if (header_size == sizeof(struct p_header100) &&
+	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
+		struct p_header100 *h = header;
+		if (h->pad != 0) {
+			conn_err(tconn, "Header padding is not zero\n");
+			return -EINVAL;
+		}
+		pi->vnr = be16_to_cpu(h->volume);
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be32_to_cpu(h->length);
+	} else if (header_size == sizeof(struct p_header95) &&
+		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
+		struct p_header95 *h = header;
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be32_to_cpu(h->length);
+		pi->vnr = 0;
+	} else if (header_size == sizeof(struct p_header80) &&
+		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
+		struct p_header80 *h = header;
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be16_to_cpu(h->length);
+		pi->vnr = 0;
 	} else {
-		dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
-		    be32_to_cpu(h->h80.magic),
-		    be16_to_cpu(h->h80.command),
-		    be16_to_cpu(h->h80.length));
-		return false;
+		conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
+			 be32_to_cpu(*(__be32 *)header),
+			 tconn->agreed_pro_version);
+		return -EINVAL;
 	}
-	mdev->last_received = jiffies;
-
-	return true;
+	pi->data = header + header_size;
+	return 0;
 }
 
-static void drbd_flush(struct drbd_conf *mdev)
+static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	void *buffer = tconn->data.rbuf;
+	int err;
+
+	err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
+	if (err)
+		return err;
+
+	err = decode_header(tconn, buffer, pi);
+	tconn->last_received = jiffies;
+
+	return err;
+}
+
+static void drbd_flush(struct drbd_tconn *tconn)
 {
 	int rv;
+	struct drbd_conf *mdev;
+	int vnr;
 
-	if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
-		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
-					NULL);
-		if (rv) {
-			dev_info(DEV, "local disk flush failed with status %d\n", rv);
-			/* would rather check on EOPNOTSUPP, but that is not reliable.
-			 * don't try again for ANY return value != 0
-			 * if (rv == -EOPNOTSUPP) */
-			drbd_bump_write_ordering(mdev, WO_drain_io);
+	if (tconn->write_ordering >= WO_bdev_flush) {
+		rcu_read_lock();
+		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+			if (!get_ldev(mdev))
+				continue;
+			kref_get(&mdev->kref);
+			rcu_read_unlock();
+
+			rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
+					GFP_NOIO, NULL);
+			if (rv) {
+				dev_info(DEV, "local disk flush failed with status %d\n", rv);
+				/* would rather check on EOPNOTSUPP, but that is not reliable.
+				 * don't try again for ANY return value != 0
+				 * if (rv == -EOPNOTSUPP) */
+				drbd_bump_write_ordering(tconn, WO_drain_io);
+			}
+			put_ldev(mdev);
+			kref_put(&mdev->kref, &drbd_minor_destroy);
+
+			rcu_read_lock();
+			if (rv)
+				break;
 		}
-		put_ldev(mdev);
+		rcu_read_unlock();
 	}
 }
 
@@ -987,7 +1179,7 @@
  * @epoch:	Epoch object.
  * @ev:		Epoch event.
  */
-static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
 					       struct drbd_epoch *epoch,
 					       enum epoch_event ev)
 {
@@ -995,7 +1187,7 @@
 	struct drbd_epoch *next_epoch;
 	enum finish_epoch rv = FE_STILL_LIVE;
 
-	spin_lock(&mdev->epoch_lock);
+	spin_lock(&tconn->epoch_lock);
 	do {
 		next_epoch = NULL;
 
@@ -1017,18 +1209,22 @@
 		    atomic_read(&epoch->active) == 0 &&
 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
 			if (!(ev & EV_CLEANUP)) {
-				spin_unlock(&mdev->epoch_lock);
-				drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
-				spin_lock(&mdev->epoch_lock);
+				spin_unlock(&tconn->epoch_lock);
+				drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
+				spin_lock(&tconn->epoch_lock);
 			}
+#if 0
+			/* FIXME: dec unacked on connection, once we have
+			 * something to count pending connection packets in. */
 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
-				dec_unacked(mdev);
+				dec_unacked(epoch->tconn);
+#endif
 
-			if (mdev->current_epoch != epoch) {
+			if (tconn->current_epoch != epoch) {
 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
 				list_del(&epoch->list);
 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
-				mdev->epochs--;
+				tconn->epochs--;
 				kfree(epoch);
 
 				if (rv == FE_STILL_LIVE)
@@ -1039,7 +1235,6 @@
 				/* atomic_set(&epoch->active, 0); is already zero */
 				if (rv == FE_STILL_LIVE)
 					rv = FE_RECYCLED;
-				wake_up(&mdev->ee_wait);
 			}
 		}
 
@@ -1049,40 +1244,52 @@
 		epoch = next_epoch;
 	} while (1);
 
-	spin_unlock(&mdev->epoch_lock);
+	spin_unlock(&tconn->epoch_lock);
 
 	return rv;
 }
 
 /**
  * drbd_bump_write_ordering() - Fall back to an other write ordering method
- * @mdev:	DRBD device.
+ * @tconn:	DRBD connection.
  * @wo:		Write ordering method to try.
  */
-void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
+void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
 {
+	struct disk_conf *dc;
+	struct drbd_conf *mdev;
 	enum write_ordering_e pwo;
+	int vnr;
 	static char *write_ordering_str[] = {
 		[WO_none] = "none",
 		[WO_drain_io] = "drain",
 		[WO_bdev_flush] = "flush",
 	};
 
-	pwo = mdev->write_ordering;
+	pwo = tconn->write_ordering;
 	wo = min(pwo, wo);
-	if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
-		wo = WO_drain_io;
-	if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
-		wo = WO_none;
-	mdev->write_ordering = wo;
-	if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
-		dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (!get_ldev_if_state(mdev, D_ATTACHING))
+			continue;
+		dc = rcu_dereference(mdev->ldev->disk_conf);
+
+		if (wo == WO_bdev_flush && !dc->disk_flushes)
+			wo = WO_drain_io;
+		if (wo == WO_drain_io && !dc->disk_drain)
+			wo = WO_none;
+		put_ldev(mdev);
+	}
+	rcu_read_unlock();
+	tconn->write_ordering = wo;
+	if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
+		conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
 }
 
 /**
- * drbd_submit_ee()
+ * drbd_submit_peer_request()
  * @mdev:	DRBD device.
- * @e:		epoch entry
+ * @peer_req:	peer request
  * @rw:		flag field, see bio->bi_rw
  *
  * May spread the pages to multiple bios,
@@ -1096,14 +1303,15 @@
  *  on certain Xen deployments.
  */
 /* TODO allocate from our own bio_set. */
-int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
-		const unsigned rw, const int fault_type)
+int drbd_submit_peer_request(struct drbd_conf *mdev,
+			     struct drbd_peer_request *peer_req,
+			     const unsigned rw, const int fault_type)
 {
 	struct bio *bios = NULL;
 	struct bio *bio;
-	struct page *page = e->pages;
-	sector_t sector = e->sector;
-	unsigned ds = e->size;
+	struct page *page = peer_req->pages;
+	sector_t sector = peer_req->i.sector;
+	unsigned ds = peer_req->i.size;
 	unsigned n_bios = 0;
 	unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
 	int err = -ENOMEM;
@@ -1122,12 +1330,12 @@
 		dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
 		goto fail;
 	}
-	/* > e->sector, unless this is the first bio */
+	/* > peer_req->i.sector, unless this is the first bio */
 	bio->bi_sector = sector;
 	bio->bi_bdev = mdev->ldev->backing_bdev;
 	bio->bi_rw = rw;
-	bio->bi_private = e;
-	bio->bi_end_io = drbd_endio_sec;
+	bio->bi_private = peer_req;
+	bio->bi_end_io = drbd_peer_request_endio;
 
 	bio->bi_next = bios;
 	bios = bio;
@@ -1156,7 +1364,7 @@
 	D_ASSERT(page == NULL);
 	D_ASSERT(ds == 0);
 
-	atomic_set(&e->pending_bios, n_bios);
+	atomic_set(&peer_req->pending_bios, n_bios);
 	do {
 		bio = bios;
 		bios = bios->bi_next;
@@ -1175,26 +1383,57 @@
 	return err;
 }
 
-static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
+					     struct drbd_peer_request *peer_req)
+{
+	struct drbd_interval *i = &peer_req->i;
+
+	drbd_remove_interval(&mdev->write_requests, i);
+	drbd_clear_interval(i);
+
+	/* Wake up any processes waiting for this peer request to complete.  */
+	if (i->waiting)
+		wake_up(&mdev->misc_wait);
+}
+
+void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+}
+
+static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 	int rv;
-	struct p_barrier *p = &mdev->data.rbuf.barrier;
+	struct p_barrier *p = pi->data;
 	struct drbd_epoch *epoch;
 
-	inc_unacked(mdev);
-
-	mdev->current_epoch->barrier_nr = p->barrier;
-	rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
+	/* FIXME these are unacked on connection,
+	 * not a specific (peer)device.
+	 */
+	tconn->current_epoch->barrier_nr = p->barrier;
+	tconn->current_epoch->tconn = tconn;
+	rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
 
 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
 	 * the activity log, which means it would not be resynced in case the
 	 * R_PRIMARY crashes now.
 	 * Therefore we must send the barrier_ack after the barrier request was
 	 * completed. */
-	switch (mdev->write_ordering) {
+	switch (tconn->write_ordering) {
 	case WO_none:
 		if (rv == FE_RECYCLED)
-			return true;
+			return 0;
 
 		/* receiver context, in the writeout path of the other node.
 		 * avoid potential distributed deadlock */
@@ -1202,81 +1441,75 @@
 		if (epoch)
 			break;
 		else
-			dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
+			conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
 			/* Fall through */
 
 	case WO_bdev_flush:
 	case WO_drain_io:
-		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
-		drbd_flush(mdev);
+		conn_wait_active_ee_empty(tconn);
+		drbd_flush(tconn);
 
-		if (atomic_read(&mdev->current_epoch->epoch_size)) {
+		if (atomic_read(&tconn->current_epoch->epoch_size)) {
 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
 			if (epoch)
 				break;
 		}
 
-		epoch = mdev->current_epoch;
-		wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
-
-		D_ASSERT(atomic_read(&epoch->active) == 0);
-		D_ASSERT(epoch->flags == 0);
-
-		return true;
+		return 0;
 	default:
-		dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
-		return false;
+		conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
+		return -EIO;
 	}
 
 	epoch->flags = 0;
 	atomic_set(&epoch->epoch_size, 0);
 	atomic_set(&epoch->active, 0);
 
-	spin_lock(&mdev->epoch_lock);
-	if (atomic_read(&mdev->current_epoch->epoch_size)) {
-		list_add(&epoch->list, &mdev->current_epoch->list);
-		mdev->current_epoch = epoch;
-		mdev->epochs++;
+	spin_lock(&tconn->epoch_lock);
+	if (atomic_read(&tconn->current_epoch->epoch_size)) {
+		list_add(&epoch->list, &tconn->current_epoch->list);
+		tconn->current_epoch = epoch;
+		tconn->epochs++;
 	} else {
 		/* The current_epoch got recycled while we allocated this one... */
 		kfree(epoch);
 	}
-	spin_unlock(&mdev->epoch_lock);
+	spin_unlock(&tconn->epoch_lock);
 
-	return true;
+	return 0;
 }
 
 /* used from receive_RSDataReply (recv_resync_read)
  * and from receive_Data */
-static struct drbd_epoch_entry *
-read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local)
+static struct drbd_peer_request *
+read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
+	      int data_size) __must_hold(local)
 {
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 	struct page *page;
-	int dgs, ds, rr;
-	void *dig_in = mdev->int_dig_in;
-	void *dig_vv = mdev->int_dig_vv;
+	int dgs, ds, err;
+	void *dig_in = mdev->tconn->int_dig_in;
+	void *dig_vv = mdev->tconn->int_dig_vv;
 	unsigned long *data;
 
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
-
-	if (dgs) {
-		rr = drbd_recv(mdev, dig_in, dgs);
-		if (rr != dgs) {
-			if (!signal_pending(current))
-				dev_warn(DEV,
-					"short read receiving data digest: read %d expected %d\n",
-					rr, dgs);
+	dgs = 0;
+	if (mdev->tconn->peer_integrity_tfm) {
+		dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
+		/*
+		 * FIXME: Receive the incoming digest into the receive buffer
+		 *	  here, together with its struct p_data?
+		 */
+		err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
+		if (err)
 			return NULL;
-		}
+		data_size -= dgs;
 	}
 
-	data_size -= dgs;
-
-	ERR_IF(data_size &  0x1ff) return NULL;
-	ERR_IF(data_size >  DRBD_MAX_BIO_SIZE) return NULL;
+	if (!expect(IS_ALIGNED(data_size, 512)))
+		return NULL;
+	if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
+		return NULL;
 
 	/* even though we trust out peer,
 	 * we sometimes have to double check. */
@@ -1291,47 +1524,42 @@
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
-	e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
-	if (!e)
+	peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
+	if (!peer_req)
 		return NULL;
 
 	if (!data_size)
-		return e;
+		return peer_req;
 
 	ds = data_size;
-	page = e->pages;
+	page = peer_req->pages;
 	page_chain_for_each(page) {
 		unsigned len = min_t(int, ds, PAGE_SIZE);
 		data = kmap(page);
-		rr = drbd_recv(mdev, data, len);
+		err = drbd_recv_all_warn(mdev->tconn, data, len);
 		if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
 			dev_err(DEV, "Fault injection: Corrupting data on receive\n");
 			data[0] = data[0] ^ (unsigned long)-1;
 		}
 		kunmap(page);
-		if (rr != len) {
-			drbd_free_ee(mdev, e);
-			if (!signal_pending(current))
-				dev_warn(DEV, "short read receiving data: read %d expected %d\n",
-				rr, len);
+		if (err) {
+			drbd_free_peer_req(mdev, peer_req);
 			return NULL;
 		}
-		ds -= rr;
+		ds -= len;
 	}
 
 	if (dgs) {
-		drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv);
+		drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
 		if (memcmp(dig_in, dig_vv, dgs)) {
 			dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
 				(unsigned long long)sector, data_size);
-			drbd_bcast_ee(mdev, "digest failed",
-					dgs, dig_in, dig_vv, e);
-			drbd_free_ee(mdev, e);
+			drbd_free_peer_req(mdev, peer_req);
 			return NULL;
 		}
 	}
 	mdev->recv_cnt += data_size>>9;
-	return e;
+	return peer_req;
 }
 
 /* drbd_drain_block() just takes a data block
@@ -1340,30 +1568,26 @@
 static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
 {
 	struct page *page;
-	int rr, rv = 1;
+	int err = 0;
 	void *data;
 
 	if (!data_size)
-		return true;
+		return 0;
 
-	page = drbd_pp_alloc(mdev, 1, 1);
+	page = drbd_alloc_pages(mdev, 1, 1);
 
 	data = kmap(page);
 	while (data_size) {
-		rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
-		if (rr != min_t(int, data_size, PAGE_SIZE)) {
-			rv = 0;
-			if (!signal_pending(current))
-				dev_warn(DEV,
-					"short read receiving data: read %d expected %d\n",
-					rr, min_t(int, data_size, PAGE_SIZE));
+		unsigned int len = min_t(int, data_size, PAGE_SIZE);
+
+		err = drbd_recv_all_warn(mdev->tconn, data, len);
+		if (err)
 			break;
-		}
-		data_size -= rr;
+		data_size -= len;
 	}
 	kunmap(page);
-	drbd_pp_free(mdev, page, 0);
-	return rv;
+	drbd_free_pages(mdev, page, 0);
+	return err;
 }
 
 static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
@@ -1371,26 +1595,19 @@
 {
 	struct bio_vec *bvec;
 	struct bio *bio;
-	int dgs, rr, i, expect;
-	void *dig_in = mdev->int_dig_in;
-	void *dig_vv = mdev->int_dig_vv;
+	int dgs, err, i, expect;
+	void *dig_in = mdev->tconn->int_dig_in;
+	void *dig_vv = mdev->tconn->int_dig_vv;
 
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
-
-	if (dgs) {
-		rr = drbd_recv(mdev, dig_in, dgs);
-		if (rr != dgs) {
-			if (!signal_pending(current))
-				dev_warn(DEV,
-					"short read receiving data reply digest: read %d expected %d\n",
-					rr, dgs);
-			return 0;
-		}
+	dgs = 0;
+	if (mdev->tconn->peer_integrity_tfm) {
+		dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
+		err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
+		if (err)
+			return err;
+		data_size -= dgs;
 	}
 
-	data_size -= dgs;
-
 	/* optimistically update recv_cnt.  if receiving fails below,
 	 * we disconnect anyways, and counters will be reset. */
 	mdev->recv_cnt += data_size>>9;
@@ -1399,63 +1616,61 @@
 	D_ASSERT(sector == bio->bi_sector);
 
 	bio_for_each_segment(bvec, bio, i) {
+		void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
 		expect = min_t(int, data_size, bvec->bv_len);
-		rr = drbd_recv(mdev,
-			     kmap(bvec->bv_page)+bvec->bv_offset,
-			     expect);
+		err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
 		kunmap(bvec->bv_page);
-		if (rr != expect) {
-			if (!signal_pending(current))
-				dev_warn(DEV, "short read receiving data reply: "
-					"read %d expected %d\n",
-					rr, expect);
-			return 0;
-		}
-		data_size -= rr;
+		if (err)
+			return err;
+		data_size -= expect;
 	}
 
 	if (dgs) {
-		drbd_csum_bio(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+		drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
 		if (memcmp(dig_in, dig_vv, dgs)) {
 			dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
-			return 0;
+			return -EINVAL;
 		}
 	}
 
 	D_ASSERT(data_size == 0);
-	return 1;
+	return 0;
 }
 
-/* e_end_resync_block() is called via
- * drbd_process_done_ee() by asender only */
-static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+/*
+ * e_end_resync_block() is called in asender context via
+ * drbd_finish_peer_reqs().
+ */
+static int e_end_resync_block(struct drbd_work *w, int unused)
 {
-	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-	sector_t sector = e->sector;
-	int ok;
+	struct drbd_peer_request *peer_req =
+		container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	sector_t sector = peer_req->i.sector;
+	int err;
 
-	D_ASSERT(hlist_unhashed(&e->collision));
+	D_ASSERT(drbd_interval_empty(&peer_req->i));
 
-	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
-		drbd_set_in_sync(mdev, sector, e->size);
-		ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e);
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
+		drbd_set_in_sync(mdev, sector, peer_req->i.size);
+		err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
 	} else {
 		/* Record failure to sync */
-		drbd_rs_failed_io(mdev, sector, e->size);
+		drbd_rs_failed_io(mdev, sector, peer_req->i.size);
 
-		ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+		err  = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
 	}
 	dec_unacked(mdev);
 
-	return ok;
+	return err;
 }
 
 static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
 {
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 
-	e = read_in_block(mdev, ID_SYNCER, sector, data_size);
-	if (!e)
+	peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
+	if (!peer_req)
 		goto fail;
 
 	dec_rs_pending(mdev);
@@ -1464,64 +1679,88 @@
 	/* corresponding dec_unacked() in e_end_resync_block()
 	 * respective _drbd_clear_done_ee */
 
-	e->w.cb = e_end_resync_block;
+	peer_req->w.cb = e_end_resync_block;
 
-	spin_lock_irq(&mdev->req_lock);
-	list_add(&e->w.list, &mdev->sync_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_add(&peer_req->w.list, &mdev->sync_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	atomic_add(data_size >> 9, &mdev->rs_sect_ev);
-	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
-		return true;
+	if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
+		return 0;
 
 	/* don't care for the reason here */
 	dev_err(DEV, "submit failed, triggering re-connect\n");
-	spin_lock_irq(&mdev->req_lock);
-	list_del(&e->w.list);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	drbd_free_ee(mdev, e);
+	drbd_free_peer_req(mdev, peer_req);
 fail:
 	put_ldev(mdev);
-	return false;
+	return -EIO;
 }
 
-static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static struct drbd_request *
+find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
+	     sector_t sector, bool missing_ok, const char *func)
 {
 	struct drbd_request *req;
+
+	/* Request object according to our peer */
+	req = (struct drbd_request *)(unsigned long)id;
+	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
+		return req;
+	if (!missing_ok) {
+		dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
+			(unsigned long)id, (unsigned long long)sector);
+	}
+	return NULL;
+}
+
+static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct drbd_request *req;
 	sector_t sector;
-	int ok;
-	struct p_data *p = &mdev->data.rbuf.data;
+	int err;
+	struct p_data *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	sector = be64_to_cpu(p->sector);
 
-	spin_lock_irq(&mdev->req_lock);
-	req = _ar_id_to_req(mdev, p->block_id, sector);
-	spin_unlock_irq(&mdev->req_lock);
-	if (unlikely(!req)) {
-		dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n");
-		return false;
-	}
+	spin_lock_irq(&mdev->tconn->req_lock);
+	req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	if (unlikely(!req))
+		return -EIO;
 
 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
 	 * special casing it there for the various failure cases.
 	 * still no race with drbd_fail_pending_reads */
-	ok = recv_dless_read(mdev, req, sector, data_size);
-
-	if (ok)
-		req_mod(req, data_received);
+	err = recv_dless_read(mdev, req, sector, pi->size);
+	if (!err)
+		req_mod(req, DATA_RECEIVED);
 	/* else: nothing. handled from drbd_disconnect...
 	 * I don't think we may complete this just yet
 	 * in case we are "on-disconnect: freeze" */
 
-	return ok;
+	return err;
 }
 
-static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	sector_t sector;
-	int ok;
-	struct p_data *p = &mdev->data.rbuf.data;
+	int err;
+	struct p_data *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	sector = be64_to_cpu(p->sector);
 	D_ASSERT(p->block_id == ID_SYNCER);
@@ -1529,42 +1768,63 @@
 	if (get_ldev(mdev)) {
 		/* data is submitted to disk within recv_resync_read.
 		 * corresponding put_ldev done below on error,
-		 * or in drbd_endio_write_sec. */
-		ok = recv_resync_read(mdev, sector, data_size);
+		 * or in drbd_peer_request_endio. */
+		err = recv_resync_read(mdev, sector, pi->size);
 	} else {
 		if (__ratelimit(&drbd_ratelimit_state))
 			dev_err(DEV, "Can not write resync data to local disk.\n");
 
-		ok = drbd_drain_block(mdev, data_size);
+		err = drbd_drain_block(mdev, pi->size);
 
-		drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
 	}
 
-	atomic_add(data_size >> 9, &mdev->rs_sect_in);
+	atomic_add(pi->size >> 9, &mdev->rs_sect_in);
 
-	return ok;
+	return err;
 }
 
-/* e_end_block() is called via drbd_process_done_ee().
- * this means this function only runs in the asender thread
- */
-static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+static void restart_conflicting_writes(struct drbd_conf *mdev,
+				       sector_t sector, int size)
 {
-	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-	sector_t sector = e->sector;
-	int ok = 1, pcmd;
+	struct drbd_interval *i;
+	struct drbd_request *req;
 
-	if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
-		if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
+		if (!i->local)
+			continue;
+		req = container_of(i, struct drbd_request, i);
+		if (req->rq_state & RQ_LOCAL_PENDING ||
+		    !(req->rq_state & RQ_POSTPONED))
+			continue;
+		/* as it is RQ_POSTPONED, this will cause it to
+		 * be queued on the retry workqueue. */
+		__req_mod(req, CONFLICT_RESOLVED, NULL);
+	}
+}
+
+/*
+ * e_end_block() is called in asender context via drbd_finish_peer_reqs().
+ */
+static int e_end_block(struct drbd_work *w, int cancel)
+{
+	struct drbd_peer_request *peer_req =
+		container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	sector_t sector = peer_req->i.sector;
+	int err = 0, pcmd;
+
+	if (peer_req->flags & EE_SEND_WRITE_ACK) {
+		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 			pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
 				mdev->state.conn <= C_PAUSED_SYNC_T &&
-				e->flags & EE_MAY_SET_IN_SYNC) ?
+				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
 				P_RS_WRITE_ACK : P_WRITE_ACK;
-			ok &= drbd_send_ack(mdev, pcmd, e);
+			err = drbd_send_ack(mdev, pcmd, peer_req);
 			if (pcmd == P_RS_WRITE_ACK)
-				drbd_set_in_sync(mdev, sector, e->size);
+				drbd_set_in_sync(mdev, sector, peer_req->i.size);
 		} else {
-			ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+			err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
 			/* we expect it to be marked out of sync anyways...
 			 * maybe assert this?  */
 		}
@@ -1572,52 +1832,115 @@
 	}
 	/* we delete from the conflict detection hash _after_ we sent out the
 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
-	if (mdev->net_conf->two_primaries) {
-		spin_lock_irq(&mdev->req_lock);
-		D_ASSERT(!hlist_unhashed(&e->collision));
-		hlist_del_init(&e->collision);
-		spin_unlock_irq(&mdev->req_lock);
-	} else {
-		D_ASSERT(hlist_unhashed(&e->collision));
-	}
+	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
+		spin_lock_irq(&mdev->tconn->req_lock);
+		D_ASSERT(!drbd_interval_empty(&peer_req->i));
+		drbd_remove_epoch_entry_interval(mdev, peer_req);
+		if (peer_req->flags & EE_RESTART_REQUESTS)
+			restart_conflicting_writes(mdev, sector, peer_req->i.size);
+		spin_unlock_irq(&mdev->tconn->req_lock);
+	} else
+		D_ASSERT(drbd_interval_empty(&peer_req->i));
 
-	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+	drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
 
-	return ok;
+	return err;
 }
 
-static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
 {
-	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-	int ok = 1;
+	struct drbd_conf *mdev = w->mdev;
+	struct drbd_peer_request *peer_req =
+		container_of(w, struct drbd_peer_request, w);
+	int err;
 
-	D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-	ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
-
-	spin_lock_irq(&mdev->req_lock);
-	D_ASSERT(!hlist_unhashed(&e->collision));
-	hlist_del_init(&e->collision);
-	spin_unlock_irq(&mdev->req_lock);
-
+	err = drbd_send_ack(mdev, ack, peer_req);
 	dec_unacked(mdev);
 
-	return ok;
+	return err;
 }
 
-static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e)
+static int e_send_superseded(struct drbd_work *w, int unused)
 {
+	return e_send_ack(w, P_SUPERSEDED);
+}
 
-	struct drbd_epoch_entry *rs_e;
+static int e_send_retry_write(struct drbd_work *w, int unused)
+{
+	struct drbd_tconn *tconn = w->mdev->tconn;
+
+	return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
+			     P_RETRY_WRITE : P_SUPERSEDED);
+}
+
+static bool seq_greater(u32 a, u32 b)
+{
+	/*
+	 * We assume 32-bit wrap-around here.
+	 * For 24-bit wrap-around, we would have to shift:
+	 *  a <<= 8; b <<= 8;
+	 */
+	return (s32)a - (s32)b > 0;
+}
+
+static u32 seq_max(u32 a, u32 b)
+{
+	return seq_greater(a, b) ? a : b;
+}
+
+static bool need_peer_seq(struct drbd_conf *mdev)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+	int tp;
+
+	/*
+	 * We only need to keep track of the last packet_seq number of our peer
+	 * if we are in dual-primary mode and we have the resolve-conflicts flag set; see
+	 * handle_write_conflicts().
+	 */
+
+	rcu_read_lock();
+	tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
+	rcu_read_unlock();
+
+	return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
+}
+
+static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
+{
+	unsigned int newest_peer_seq;
+
+	if (need_peer_seq(mdev)) {
+		spin_lock(&mdev->peer_seq_lock);
+		newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
+		mdev->peer_seq = newest_peer_seq;
+		spin_unlock(&mdev->peer_seq_lock);
+		/* wake up only if we actually changed mdev->peer_seq */
+		if (peer_seq == newest_peer_seq)
+			wake_up(&mdev->seq_wait);
+	}
+}
+
+static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
+{
+	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
+}
+
+/* maybe change sync_ee into interval trees as well? */
+static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
+{
+	struct drbd_peer_request *rs_req;
 	bool rv = 0;
 
-	spin_lock_irq(&mdev->req_lock);
-	list_for_each_entry(rs_e, &mdev->sync_ee, w.list) {
-		if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) {
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
+		if (overlaps(peer_req->i.sector, peer_req->i.size,
+			     rs_req->i.sector, rs_req->i.size)) {
 			rv = 1;
 			break;
 		}
 	}
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	return rv;
 }
@@ -1643,35 +1966,41 @@
  *
  * returns 0 if we may process the packet,
  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
-static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
+static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
 {
 	DEFINE_WAIT(wait);
-	unsigned int p_seq;
 	long timeout;
-	int ret = 0;
+	int ret;
+
+	if (!need_peer_seq(mdev))
+		return 0;
+
 	spin_lock(&mdev->peer_seq_lock);
 	for (;;) {
-		prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
-		if (seq_le(packet_seq, mdev->peer_seq+1))
+		if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
+			mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
+			ret = 0;
 			break;
+		}
 		if (signal_pending(current)) {
 			ret = -ERESTARTSYS;
 			break;
 		}
-		p_seq = mdev->peer_seq;
+		prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
 		spin_unlock(&mdev->peer_seq_lock);
-		timeout = schedule_timeout(30*HZ);
+		rcu_read_lock();
+		timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
+		rcu_read_unlock();
+		timeout = schedule_timeout(timeout);
 		spin_lock(&mdev->peer_seq_lock);
-		if (timeout == 0 && p_seq == mdev->peer_seq) {
+		if (!timeout) {
 			ret = -ETIMEDOUT;
-			dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
+			dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
 			break;
 		}
 	}
-	finish_wait(&mdev->seq_wait, &wait);
-	if (mdev->peer_seq+1 == packet_seq)
-		mdev->peer_seq++;
 	spin_unlock(&mdev->peer_seq_lock);
+	finish_wait(&mdev->seq_wait, &wait);
 	return ret;
 }
 
@@ -1686,233 +2015,277 @@
 		(dpf & DP_DISCARD ? REQ_DISCARD : 0);
 }
 
-/* mirrored write */
-static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
+				    unsigned int size)
 {
+	struct drbd_interval *i;
+
+    repeat:
+	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
+		struct drbd_request *req;
+		struct bio_and_error m;
+
+		if (!i->local)
+			continue;
+		req = container_of(i, struct drbd_request, i);
+		if (!(req->rq_state & RQ_POSTPONED))
+			continue;
+		req->rq_state &= ~RQ_POSTPONED;
+		__req_mod(req, NEG_ACKED, &m);
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		if (m.bio)
+			complete_master_bio(mdev, &m);
+		spin_lock_irq(&mdev->tconn->req_lock);
+		goto repeat;
+	}
+}
+
+static int handle_write_conflicts(struct drbd_conf *mdev,
+				  struct drbd_peer_request *peer_req)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
+	sector_t sector = peer_req->i.sector;
+	const unsigned int size = peer_req->i.size;
+	struct drbd_interval *i;
+	bool equal;
+	int err;
+
+	/*
+	 * Inserting the peer request into the write_requests tree will prevent
+	 * new conflicting local requests from being added.
+	 */
+	drbd_insert_interval(&mdev->write_requests, &peer_req->i);
+
+    repeat:
+	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
+		if (i == &peer_req->i)
+			continue;
+
+		if (!i->local) {
+			/*
+			 * Our peer has sent a conflicting remote request; this
+			 * should not happen in a two-node setup.  Wait for the
+			 * earlier peer request to complete.
+			 */
+			err = drbd_wait_misc(mdev, i);
+			if (err)
+				goto out;
+			goto repeat;
+		}
+
+		equal = i->sector == sector && i->size == size;
+		if (resolve_conflicts) {
+			/*
+			 * If the peer request is fully contained within the
+			 * overlapping request, it can be considered overwritten
+			 * and thus superseded; otherwise, it will be retried
+			 * once all overlapping requests have completed.
+			 */
+			bool superseded = i->sector <= sector && i->sector +
+				       (i->size >> 9) >= sector + (size >> 9);
+
+			if (!equal)
+				dev_alert(DEV, "Concurrent writes detected: "
+					       "local=%llus +%u, remote=%llus +%u, "
+					       "assuming %s came first\n",
+					  (unsigned long long)i->sector, i->size,
+					  (unsigned long long)sector, size,
+					  superseded ? "local" : "remote");
+
+			inc_unacked(mdev);
+			peer_req->w.cb = superseded ? e_send_superseded :
+						   e_send_retry_write;
+			list_add_tail(&peer_req->w.list, &mdev->done_ee);
+			wake_asender(mdev->tconn);
+
+			err = -ENOENT;
+			goto out;
+		} else {
+			struct drbd_request *req =
+				container_of(i, struct drbd_request, i);
+
+			if (!equal)
+				dev_alert(DEV, "Concurrent writes detected: "
+					       "local=%llus +%u, remote=%llus +%u\n",
+					  (unsigned long long)i->sector, i->size,
+					  (unsigned long long)sector, size);
+
+			if (req->rq_state & RQ_LOCAL_PENDING ||
+			    !(req->rq_state & RQ_POSTPONED)) {
+				/*
+				 * Wait for the node with the discard flag to
+				 * decide if this request has been superseded
+				 * or needs to be retried.
+				 * Requests that have been superseded will
+				 * disappear from the write_requests tree.
+				 *
+				 * In addition, wait for the conflicting
+				 * request to finish locally before submitting
+				 * the conflicting peer request.
+				 */
+				err = drbd_wait_misc(mdev, &req->i);
+				if (err) {
+					_conn_request_state(mdev->tconn,
+							    NS(conn, C_TIMEOUT),
+							    CS_HARD);
+					fail_postponed_requests(mdev, sector, size);
+					goto out;
+				}
+				goto repeat;
+			}
+			/*
+			 * Remember to restart the conflicting requests after
+			 * the new peer request has completed.
+			 */
+			peer_req->flags |= EE_RESTART_REQUESTS;
+		}
+	}
+	err = 0;
+
+    out:
+	if (err)
+		drbd_remove_epoch_entry_interval(mdev, peer_req);
+	return err;
+}
+
+/* mirrored write */
+static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
 	sector_t sector;
-	struct drbd_epoch_entry *e;
-	struct p_data *p = &mdev->data.rbuf.data;
+	struct drbd_peer_request *peer_req;
+	struct p_data *p = pi->data;
+	u32 peer_seq = be32_to_cpu(p->seq_num);
 	int rw = WRITE;
 	u32 dp_flags;
+	int err, tp;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	if (!get_ldev(mdev)) {
-		spin_lock(&mdev->peer_seq_lock);
-		if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
-			mdev->peer_seq++;
-		spin_unlock(&mdev->peer_seq_lock);
+		int err2;
 
-		drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
-		atomic_inc(&mdev->current_epoch->epoch_size);
-		return drbd_drain_block(mdev, data_size);
+		err = wait_for_and_update_peer_seq(mdev, peer_seq);
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
+		atomic_inc(&tconn->current_epoch->epoch_size);
+		err2 = drbd_drain_block(mdev, pi->size);
+		if (!err)
+			err = err2;
+		return err;
 	}
 
-	/* get_ldev(mdev) successful.
-	 * Corresponding put_ldev done either below (on various errors),
-	 * or in drbd_endio_write_sec, if we successfully submit the data at
-	 * the end of this function. */
+	/*
+	 * Corresponding put_ldev done either below (on various errors), or in
+	 * drbd_peer_request_endio, if we successfully submit the data at the
+	 * end of this function.
+	 */
 
 	sector = be64_to_cpu(p->sector);
-	e = read_in_block(mdev, p->block_id, sector, data_size);
-	if (!e) {
+	peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
+	if (!peer_req) {
 		put_ldev(mdev);
-		return false;
+		return -EIO;
 	}
 
-	e->w.cb = e_end_block;
+	peer_req->w.cb = e_end_block;
 
 	dp_flags = be32_to_cpu(p->dp_flags);
 	rw |= wire_flags_to_bio(mdev, dp_flags);
-	if (e->pages == NULL) {
-		D_ASSERT(e->size == 0);
+	if (peer_req->pages == NULL) {
+		D_ASSERT(peer_req->i.size == 0);
 		D_ASSERT(dp_flags & DP_FLUSH);
 	}
 
 	if (dp_flags & DP_MAY_SET_IN_SYNC)
-		e->flags |= EE_MAY_SET_IN_SYNC;
+		peer_req->flags |= EE_MAY_SET_IN_SYNC;
 
-	spin_lock(&mdev->epoch_lock);
-	e->epoch = mdev->current_epoch;
-	atomic_inc(&e->epoch->epoch_size);
-	atomic_inc(&e->epoch->active);
-	spin_unlock(&mdev->epoch_lock);
+	spin_lock(&tconn->epoch_lock);
+	peer_req->epoch = tconn->current_epoch;
+	atomic_inc(&peer_req->epoch->epoch_size);
+	atomic_inc(&peer_req->epoch->active);
+	spin_unlock(&tconn->epoch_lock);
 
-	/* I'm the receiver, I do hold a net_cnt reference. */
-	if (!mdev->net_conf->two_primaries) {
-		spin_lock_irq(&mdev->req_lock);
-	} else {
-		/* don't get the req_lock yet,
-		 * we may sleep in drbd_wait_peer_seq */
-		const int size = e->size;
-		const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags);
-		DEFINE_WAIT(wait);
-		struct drbd_request *i;
-		struct hlist_node *n;
-		struct hlist_head *slot;
-		int first;
-
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		BUG_ON(mdev->ee_hash == NULL);
-		BUG_ON(mdev->tl_hash == NULL);
-
-		/* conflict detection and handling:
-		 * 1. wait on the sequence number,
-		 *    in case this data packet overtook ACK packets.
-		 * 2. check our hash tables for conflicting requests.
-		 *    we only need to walk the tl_hash, since an ee can not
-		 *    have a conflict with an other ee: on the submitting
-		 *    node, the corresponding req had already been conflicting,
-		 *    and a conflicting req is never sent.
-		 *
-		 * Note: for two_primaries, we are protocol C,
-		 * so there cannot be any request that is DONE
-		 * but still on the transfer log.
-		 *
-		 * unconditionally add to the ee_hash.
-		 *
-		 * if no conflicting request is found:
-		 *    submit.
-		 *
-		 * if any conflicting request is found
-		 * that has not yet been acked,
-		 * AND I have the "discard concurrent writes" flag:
-		 *	 queue (via done_ee) the P_DISCARD_ACK; OUT.
-		 *
-		 * if any conflicting request is found:
-		 *	 block the receiver, waiting on misc_wait
-		 *	 until no more conflicting requests are there,
-		 *	 or we get interrupted (disconnect).
-		 *
-		 *	 we do not just write after local io completion of those
-		 *	 requests, but only after req is done completely, i.e.
-		 *	 we wait for the P_DISCARD_ACK to arrive!
-		 *
-		 *	 then proceed normally, i.e. submit.
-		 */
-		if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
+	rcu_read_lock();
+	tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
+	rcu_read_unlock();
+	if (tp) {
+		peer_req->flags |= EE_IN_INTERVAL_TREE;
+		err = wait_for_and_update_peer_seq(mdev, peer_seq);
+		if (err)
 			goto out_interrupted;
-
-		spin_lock_irq(&mdev->req_lock);
-
-		hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
-
-#define OVERLAPS overlaps(i->sector, i->size, sector, size)
-		slot = tl_hash_slot(mdev, sector);
-		first = 1;
-		for (;;) {
-			int have_unacked = 0;
-			int have_conflict = 0;
-			prepare_to_wait(&mdev->misc_wait, &wait,
-				TASK_INTERRUPTIBLE);
-			hlist_for_each_entry(i, n, slot, collision) {
-				if (OVERLAPS) {
-					/* only ALERT on first iteration,
-					 * we may be woken up early... */
-					if (first)
-						dev_alert(DEV, "%s[%u] Concurrent local write detected!"
-						      "	new: %llus +%u; pending: %llus +%u\n",
-						      current->comm, current->pid,
-						      (unsigned long long)sector, size,
-						      (unsigned long long)i->sector, i->size);
-					if (i->rq_state & RQ_NET_PENDING)
-						++have_unacked;
-					++have_conflict;
-				}
-			}
-#undef OVERLAPS
-			if (!have_conflict)
-				break;
-
-			/* Discard Ack only for the _first_ iteration */
-			if (first && discard && have_unacked) {
-				dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
-				     (unsigned long long)sector);
-				inc_unacked(mdev);
-				e->w.cb = e_send_discard_ack;
-				list_add_tail(&e->w.list, &mdev->done_ee);
-
-				spin_unlock_irq(&mdev->req_lock);
-
-				/* we could probably send that P_DISCARD_ACK ourselves,
-				 * but I don't like the receiver using the msock */
-
+		spin_lock_irq(&mdev->tconn->req_lock);
+		err = handle_write_conflicts(mdev, peer_req);
+		if (err) {
+			spin_unlock_irq(&mdev->tconn->req_lock);
+			if (err == -ENOENT) {
 				put_ldev(mdev);
-				wake_asender(mdev);
-				finish_wait(&mdev->misc_wait, &wait);
-				return true;
+				return 0;
 			}
-
-			if (signal_pending(current)) {
-				hlist_del_init(&e->collision);
-
-				spin_unlock_irq(&mdev->req_lock);
-
-				finish_wait(&mdev->misc_wait, &wait);
-				goto out_interrupted;
-			}
-
-			spin_unlock_irq(&mdev->req_lock);
-			if (first) {
-				first = 0;
-				dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
-				     "sec=%llus\n", (unsigned long long)sector);
-			} else if (discard) {
-				/* we had none on the first iteration.
-				 * there must be none now. */
-				D_ASSERT(have_unacked == 0);
-			}
-			schedule();
-			spin_lock_irq(&mdev->req_lock);
+			goto out_interrupted;
 		}
-		finish_wait(&mdev->misc_wait, &wait);
-	}
-
-	list_add(&e->w.list, &mdev->active_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	} else
+		spin_lock_irq(&mdev->tconn->req_lock);
+	list_add(&peer_req->w.list, &mdev->active_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (mdev->state.conn == C_SYNC_TARGET)
-		wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e));
+		wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req));
 
-	switch (mdev->net_conf->wire_protocol) {
-	case DRBD_PROT_C:
+	if (mdev->tconn->agreed_pro_version < 100) {
+		rcu_read_lock();
+		switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
+		case DRBD_PROT_C:
+			dp_flags |= DP_SEND_WRITE_ACK;
+			break;
+		case DRBD_PROT_B:
+			dp_flags |= DP_SEND_RECEIVE_ACK;
+			break;
+		}
+		rcu_read_unlock();
+	}
+
+	if (dp_flags & DP_SEND_WRITE_ACK) {
+		peer_req->flags |= EE_SEND_WRITE_ACK;
 		inc_unacked(mdev);
 		/* corresponding dec_unacked() in e_end_block()
 		 * respective _drbd_clear_done_ee */
-		break;
-	case DRBD_PROT_B:
+	}
+
+	if (dp_flags & DP_SEND_RECEIVE_ACK) {
 		/* I really don't like it that the receiver thread
 		 * sends on the msock, but anyways */
-		drbd_send_ack(mdev, P_RECV_ACK, e);
-		break;
-	case DRBD_PROT_A:
-		/* nothing to do */
-		break;
+		drbd_send_ack(mdev, P_RECV_ACK, peer_req);
 	}
 
 	if (mdev->state.pdsk < D_INCONSISTENT) {
 		/* In case we have the only disk of the cluster, */
-		drbd_set_out_of_sync(mdev, e->sector, e->size);
-		e->flags |= EE_CALL_AL_COMPLETE_IO;
-		e->flags &= ~EE_MAY_SET_IN_SYNC;
-		drbd_al_begin_io(mdev, e->sector);
+		drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
+		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
+		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
+		drbd_al_begin_io(mdev, &peer_req->i);
 	}
 
-	if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
-		return true;
+	err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
+	if (!err)
+		return 0;
 
 	/* don't care for the reason here */
 	dev_err(DEV, "submit failed, triggering re-connect\n");
-	spin_lock_irq(&mdev->req_lock);
-	list_del(&e->w.list);
-	hlist_del_init(&e->collision);
-	spin_unlock_irq(&mdev->req_lock);
-	if (e->flags & EE_CALL_AL_COMPLETE_IO)
-		drbd_al_complete_io(mdev, e->sector);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	drbd_remove_epoch_entry_interval(mdev, peer_req);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
+		drbd_al_complete_io(mdev, &peer_req->i);
 
 out_interrupted:
-	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP);
+	drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
 	put_ldev(mdev);
-	drbd_free_ee(mdev, e);
-	return false;
+	drbd_free_peer_req(mdev, peer_req);
+	return err;
 }
 
 /* We may throttle resync, if the lower device seems to be busy,
@@ -1933,9 +2306,14 @@
 	struct lc_element *tmp;
 	int curr_events;
 	int throttle = 0;
+	unsigned int c_min_rate;
+
+	rcu_read_lock();
+	c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
+	rcu_read_unlock();
 
 	/* feature disabled? */
-	if (mdev->sync_conf.c_min_rate == 0)
+	if (c_min_rate == 0)
 		return 0;
 
 	spin_lock_irq(&mdev->al_lock);
@@ -1975,40 +2353,46 @@
 		db = mdev->rs_mark_left[i] - rs_left;
 		dbdt = Bit2KB(db/dt);
 
-		if (dbdt > mdev->sync_conf.c_min_rate)
+		if (dbdt > c_min_rate)
 			throttle = 1;
 	}
 	return throttle;
 }
 
 
-static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size)
+static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	sector_t sector;
-	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
-	struct drbd_epoch_entry *e;
+	sector_t capacity;
+	struct drbd_peer_request *peer_req;
 	struct digest_info *di = NULL;
 	int size, verb;
 	unsigned int fault_type;
-	struct p_block_req *p =	&mdev->data.rbuf.block_req;
+	struct p_block_req *p =	pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+	capacity = drbd_get_capacity(mdev->this_bdev);
 
 	sector = be64_to_cpu(p->sector);
 	size   = be32_to_cpu(p->blksize);
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 				(unsigned long long)sector, size);
-		return false;
+		return -EINVAL;
 	}
 	if (sector + (size>>9) > capacity) {
 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 				(unsigned long long)sector, size);
-		return false;
+		return -EINVAL;
 	}
 
 	if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
 		verb = 1;
-		switch (cmd) {
+		switch (pi->cmd) {
 		case P_DATA_REQUEST:
 			drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
 			break;
@@ -2023,35 +2407,34 @@
 			drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
 			break;
 		default:
-			dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
-				cmdname(cmd));
+			BUG();
 		}
 		if (verb && __ratelimit(&drbd_ratelimit_state))
 			dev_err(DEV, "Can not satisfy peer's read request, "
 			    "no local data.\n");
 
 		/* drain possibly payload */
-		return drbd_drain_block(mdev, digest_size);
+		return drbd_drain_block(mdev, pi->size);
 	}
 
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
-	e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
-	if (!e) {
+	peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
+	if (!peer_req) {
 		put_ldev(mdev);
-		return false;
+		return -ENOMEM;
 	}
 
-	switch (cmd) {
+	switch (pi->cmd) {
 	case P_DATA_REQUEST:
-		e->w.cb = w_e_end_data_req;
+		peer_req->w.cb = w_e_end_data_req;
 		fault_type = DRBD_FAULT_DT_RD;
 		/* application IO, don't drbd_rs_begin_io */
 		goto submit;
 
 	case P_RS_DATA_REQUEST:
-		e->w.cb = w_e_end_rsdata_req;
+		peer_req->w.cb = w_e_end_rsdata_req;
 		fault_type = DRBD_FAULT_RS_RD;
 		/* used in the sector offset progress display */
 		mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
@@ -2060,28 +2443,28 @@
 	case P_OV_REPLY:
 	case P_CSUM_RS_REQUEST:
 		fault_type = DRBD_FAULT_RS_RD;
-		di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
+		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
 		if (!di)
 			goto out_free_e;
 
-		di->digest_size = digest_size;
+		di->digest_size = pi->size;
 		di->digest = (((char *)di)+sizeof(struct digest_info));
 
-		e->digest = di;
-		e->flags |= EE_HAS_DIGEST;
+		peer_req->digest = di;
+		peer_req->flags |= EE_HAS_DIGEST;
 
-		if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
+		if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
 			goto out_free_e;
 
-		if (cmd == P_CSUM_RS_REQUEST) {
-			D_ASSERT(mdev->agreed_pro_version >= 89);
-			e->w.cb = w_e_end_csum_rs_req;
+		if (pi->cmd == P_CSUM_RS_REQUEST) {
+			D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
+			peer_req->w.cb = w_e_end_csum_rs_req;
 			/* used in the sector offset progress display */
 			mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
-		} else if (cmd == P_OV_REPLY) {
+		} else if (pi->cmd == P_OV_REPLY) {
 			/* track progress, we may need to throttle */
 			atomic_add(size >> 9, &mdev->rs_sect_in);
-			e->w.cb = w_e_end_ov_reply;
+			peer_req->w.cb = w_e_end_ov_reply;
 			dec_rs_pending(mdev);
 			/* drbd_rs_begin_io done when we sent this request,
 			 * but accounting still needs to be done. */
@@ -2091,7 +2474,7 @@
 
 	case P_OV_REQUEST:
 		if (mdev->ov_start_sector == ~(sector_t)0 &&
-		    mdev->agreed_pro_version >= 90) {
+		    mdev->tconn->agreed_pro_version >= 90) {
 			unsigned long now = jiffies;
 			int i;
 			mdev->ov_start_sector = sector;
@@ -2105,15 +2488,12 @@
 			dev_info(DEV, "Online Verify start sector: %llu\n",
 					(unsigned long long)sector);
 		}
-		e->w.cb = w_e_end_ov_req;
+		peer_req->w.cb = w_e_end_ov_req;
 		fault_type = DRBD_FAULT_RS_RD;
 		break;
 
 	default:
-		dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
-		    cmdname(cmd));
-		fault_type = DRBD_FAULT_MAX;
-		goto out_free_e;
+		BUG();
 	}
 
 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
@@ -2148,30 +2528,31 @@
 
 submit:
 	inc_unacked(mdev);
-	spin_lock_irq(&mdev->req_lock);
-	list_add_tail(&e->w.list, &mdev->read_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_add_tail(&peer_req->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
-		return true;
+	if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
+		return 0;
 
 	/* don't care for the reason here */
 	dev_err(DEV, "submit failed, triggering re-connect\n");
-	spin_lock_irq(&mdev->req_lock);
-	list_del(&e->w.list);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
 
 out_free_e:
 	put_ldev(mdev);
-	drbd_free_ee(mdev, e);
-	return false;
+	drbd_free_peer_req(mdev, peer_req);
+	return -EIO;
 }
 
 static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
 {
 	int self, peer, rv = -100;
 	unsigned long ch_self, ch_peer;
+	enum drbd_after_sb_p after_sb_0p;
 
 	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
 	peer = mdev->p_uuid[UI_BITMAP] & 1;
@@ -2179,10 +2560,14 @@
 	ch_peer = mdev->p_uuid[UI_SIZE];
 	ch_self = mdev->comm_bm_set;
 
-	switch (mdev->net_conf->after_sb_0p) {
+	rcu_read_lock();
+	after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
+	rcu_read_unlock();
+	switch (after_sb_0p) {
 	case ASB_CONSENSUS:
 	case ASB_DISCARD_SECONDARY:
 	case ASB_CALL_HELPER:
+	case ASB_VIOLENTLY:
 		dev_err(DEV, "Configuration error.\n");
 		break;
 	case ASB_DISCONNECT:
@@ -2211,14 +2596,14 @@
 		     "Using discard-least-changes instead\n");
 	case ASB_DISCARD_ZERO_CHG:
 		if (ch_peer == 0 && ch_self == 0) {
-			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+			rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
 				? -1 : 1;
 			break;
 		} else {
 			if (ch_peer == 0) { rv =  1; break; }
 			if (ch_self == 0) { rv = -1; break; }
 		}
-		if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
+		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
 			break;
 	case ASB_DISCARD_LEAST_CHG:
 		if	(ch_self < ch_peer)
@@ -2227,7 +2612,7 @@
 			rv =  1;
 		else /* ( ch_self == ch_peer ) */
 		     /* Well, then use something else. */
-			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+			rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
 				? -1 : 1;
 		break;
 	case ASB_DISCARD_LOCAL:
@@ -2243,13 +2628,18 @@
 static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
 {
 	int hg, rv = -100;
+	enum drbd_after_sb_p after_sb_1p;
 
-	switch (mdev->net_conf->after_sb_1p) {
+	rcu_read_lock();
+	after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
+	rcu_read_unlock();
+	switch (after_sb_1p) {
 	case ASB_DISCARD_YOUNGER_PRI:
 	case ASB_DISCARD_OLDER_PRI:
 	case ASB_DISCARD_LEAST_CHG:
 	case ASB_DISCARD_LOCAL:
 	case ASB_DISCARD_REMOTE:
+	case ASB_DISCARD_ZERO_CHG:
 		dev_err(DEV, "Configuration error.\n");
 		break;
 	case ASB_DISCONNECT:
@@ -2292,8 +2682,12 @@
 static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
 {
 	int hg, rv = -100;
+	enum drbd_after_sb_p after_sb_2p;
 
-	switch (mdev->net_conf->after_sb_2p) {
+	rcu_read_lock();
+	after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
+	rcu_read_unlock();
+	switch (after_sb_2p) {
 	case ASB_DISCARD_YOUNGER_PRI:
 	case ASB_DISCARD_OLDER_PRI:
 	case ASB_DISCARD_LEAST_CHG:
@@ -2301,6 +2695,7 @@
 	case ASB_DISCARD_REMOTE:
 	case ASB_CONSENSUS:
 	case ASB_DISCARD_SECONDARY:
+	case ASB_DISCARD_ZERO_CHG:
 		dev_err(DEV, "Configuration error.\n");
 		break;
 	case ASB_VIOLENTLY:
@@ -2386,13 +2781,15 @@
 
 		if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
 
-			if (mdev->agreed_pro_version < 91)
+			if (mdev->tconn->agreed_pro_version < 91)
 				return -1091;
 
 			if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
 			    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
 				dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
-				drbd_uuid_set_bm(mdev, 0UL);
+				drbd_uuid_move_history(mdev);
+				mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
+				mdev->ldev->md.uuid[UI_BITMAP] = 0;
 
 				drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
 					       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
@@ -2407,7 +2804,7 @@
 
 		if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
 
-			if (mdev->agreed_pro_version < 91)
+			if (mdev->tconn->agreed_pro_version < 91)
 				return -1091;
 
 			if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
@@ -2440,7 +2837,7 @@
 		case 1: /*  self_pri && !peer_pri */ return 1;
 		case 2: /* !self_pri &&  peer_pri */ return -1;
 		case 3: /*  self_pri &&  peer_pri */
-			dc = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+			dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
 			return dc ? -1 : 1;
 		}
 	}
@@ -2453,14 +2850,14 @@
 	*rule_nr = 51;
 	peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		if (mdev->agreed_pro_version < 96 ?
+		if (mdev->tconn->agreed_pro_version < 96 ?
 		    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
 		    (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
 		    peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of the peer's UUIDs. */
 
-			if (mdev->agreed_pro_version < 91)
+			if (mdev->tconn->agreed_pro_version < 91)
 				return -1091;
 
 			mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
@@ -2490,18 +2887,18 @@
 	*rule_nr = 71;
 	self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		if (mdev->agreed_pro_version < 96 ?
+		if (mdev->tconn->agreed_pro_version < 96 ?
 		    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
 		    (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
 		    self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of our UUIDs. */
 
-			if (mdev->agreed_pro_version < 91)
+			if (mdev->tconn->agreed_pro_version < 91)
 				return -1091;
 
-			_drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
-			_drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
+			__drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
+			__drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
 
 			dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
 			drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
@@ -2545,20 +2942,24 @@
 static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
 					   enum drbd_disk_state peer_disk) __must_hold(local)
 {
-	int hg, rule_nr;
 	enum drbd_conns rv = C_MASK;
 	enum drbd_disk_state mydisk;
+	struct net_conf *nc;
+	int hg, rule_nr, rr_conflict, tentative;
 
 	mydisk = mdev->state.disk;
 	if (mydisk == D_NEGOTIATING)
 		mydisk = mdev->new_state_tmp.disk;
 
 	dev_info(DEV, "drbd_sync_handshake:\n");
+
+	spin_lock_irq(&mdev->ldev->md.uuid_lock);
 	drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
 	drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
 		       mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
 
 	hg = drbd_uuid_compare(mdev, &rule_nr);
+	spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 
 	dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
 
@@ -2584,7 +2985,10 @@
 	if (abs(hg) == 100)
 		drbd_khelper(mdev, "initial-split-brain");
 
-	if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) {
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+
+	if (hg == 100 || (hg == -100 && nc->always_asbp)) {
 		int pcount = (mdev->state.role == R_PRIMARY)
 			   + (peer_role == R_PRIMARY);
 		int forced = (hg == -100);
@@ -2613,9 +3017,9 @@
 	}
 
 	if (hg == -100) {
-		if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
+		if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
 			hg = -1;
-		if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
+		if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
 			hg = 1;
 
 		if (abs(hg) < 100)
@@ -2623,6 +3027,9 @@
 			     "Sync from %s node\n",
 			     (hg < 0) ? "peer" : "this");
 	}
+	rr_conflict = nc->rr_conflict;
+	tentative = nc->tentative;
+	rcu_read_unlock();
 
 	if (hg == -100) {
 		/* FIXME this log message is not correct if we end up here
@@ -2641,7 +3048,7 @@
 
 	if (hg < 0 && /* by intention we do not use mydisk here. */
 	    mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
-		switch (mdev->net_conf->rr_conflict) {
+		switch (rr_conflict) {
 		case ASB_CALL_HELPER:
 			drbd_khelper(mdev, "pri-lost");
 			/* fall through */
@@ -2654,7 +3061,7 @@
 		}
 	}
 
-	if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
+	if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
 		if (hg == 0)
 			dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
 		else
@@ -2686,33 +3093,29 @@
 	return rv;
 }
 
-/* returns 1 if invalid */
-static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
+static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
 {
 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
-	if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
-	    (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
-		return 0;
+	if (peer == ASB_DISCARD_REMOTE)
+		return ASB_DISCARD_LOCAL;
 
 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
-	if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
-	    self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
-		return 1;
+	if (peer == ASB_DISCARD_LOCAL)
+		return ASB_DISCARD_REMOTE;
 
 	/* everything else is valid if they are equal on both sides. */
-	if (peer == self)
-		return 0;
-
-	/* everything es is invalid. */
-	return 1;
+	return peer;
 }
 
-static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_protocol *p = &mdev->data.rbuf.protocol;
-	int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
-	int p_want_lose, p_two_primaries, cf;
-	char p_integrity_alg[SHARED_SECRET_MAX] = "";
+	struct p_protocol *p = pi->data;
+	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
+	int p_proto, p_discard_my_data, p_two_primaries, cf;
+	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
+	char integrity_alg[SHARED_SECRET_MAX] = "";
+	struct crypto_hash *peer_integrity_tfm = NULL;
+	void *int_dig_in = NULL, *int_dig_vv = NULL;
 
 	p_proto		= be32_to_cpu(p->protocol);
 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
@@ -2720,63 +3123,138 @@
 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
 	p_two_primaries = be32_to_cpu(p->two_primaries);
 	cf		= be32_to_cpu(p->conn_flags);
-	p_want_lose = cf & CF_WANT_LOSE;
+	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
 
-	clear_bit(CONN_DRY_RUN, &mdev->flags);
+	if (tconn->agreed_pro_version >= 87) {
+		int err;
 
-	if (cf & CF_DRY_RUN)
-		set_bit(CONN_DRY_RUN, &mdev->flags);
-
-	if (p_proto != mdev->net_conf->wire_protocol) {
-		dev_err(DEV, "incompatible communication protocols\n");
-		goto disconnect;
+		if (pi->size > sizeof(integrity_alg))
+			return -EIO;
+		err = drbd_recv_all(tconn, integrity_alg, pi->size);
+		if (err)
+			return err;
+		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
 	}
 
-	if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) {
-		dev_err(DEV, "incompatible after-sb-0pri settings\n");
-		goto disconnect;
+	if (pi->cmd != P_PROTOCOL_UPDATE) {
+		clear_bit(CONN_DRY_RUN, &tconn->flags);
+
+		if (cf & CF_DRY_RUN)
+			set_bit(CONN_DRY_RUN, &tconn->flags);
+
+		rcu_read_lock();
+		nc = rcu_dereference(tconn->net_conf);
+
+		if (p_proto != nc->wire_protocol) {
+			conn_err(tconn, "incompatible %s settings\n", "protocol");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
+			conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
+			conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
+			conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (p_discard_my_data && nc->discard_my_data) {
+			conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (p_two_primaries != nc->two_primaries) {
+			conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (strcmp(integrity_alg, nc->integrity_alg)) {
+			conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
+			goto disconnect_rcu_unlock;
+		}
+
+		rcu_read_unlock();
 	}
 
-	if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) {
-		dev_err(DEV, "incompatible after-sb-1pri settings\n");
-		goto disconnect;
-	}
+	if (integrity_alg[0]) {
+		int hash_size;
 
-	if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) {
-		dev_err(DEV, "incompatible after-sb-2pri settings\n");
-		goto disconnect;
-	}
+		/*
+		 * We can only change the peer data integrity algorithm
+		 * here.  Changing our own data integrity algorithm
+		 * requires that we send a P_PROTOCOL_UPDATE packet at
+		 * the same time; otherwise, the peer has no way to
+		 * tell between which packets the algorithm should
+		 * change.
+		 */
 
-	if (p_want_lose && mdev->net_conf->want_lose) {
-		dev_err(DEV, "both sides have the 'want_lose' flag set\n");
-		goto disconnect;
-	}
-
-	if (p_two_primaries != mdev->net_conf->two_primaries) {
-		dev_err(DEV, "incompatible setting of the two-primaries options\n");
-		goto disconnect;
-	}
-
-	if (mdev->agreed_pro_version >= 87) {
-		unsigned char *my_alg = mdev->net_conf->integrity_alg;
-
-		if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
-			return false;
-
-		p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
-		if (strcmp(p_integrity_alg, my_alg)) {
-			dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
+		peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
+		if (!peer_integrity_tfm) {
+			conn_err(tconn, "peer data-integrity-alg %s not supported\n",
+				 integrity_alg);
 			goto disconnect;
 		}
-		dev_info(DEV, "data-integrity-alg: %s\n",
-		     my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
+
+		hash_size = crypto_hash_digestsize(peer_integrity_tfm);
+		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
+		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
+		if (!(int_dig_in && int_dig_vv)) {
+			conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
+			goto disconnect;
+		}
 	}
 
-	return true;
+	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_net_conf) {
+		conn_err(tconn, "Allocation of new net_conf failed\n");
+		goto disconnect;
+	}
 
+	mutex_lock(&tconn->data.mutex);
+	mutex_lock(&tconn->conf_update);
+	old_net_conf = tconn->net_conf;
+	*new_net_conf = *old_net_conf;
+
+	new_net_conf->wire_protocol = p_proto;
+	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
+	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
+	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
+	new_net_conf->two_primaries = p_two_primaries;
+
+	rcu_assign_pointer(tconn->net_conf, new_net_conf);
+	mutex_unlock(&tconn->conf_update);
+	mutex_unlock(&tconn->data.mutex);
+
+	crypto_free_hash(tconn->peer_integrity_tfm);
+	kfree(tconn->int_dig_in);
+	kfree(tconn->int_dig_vv);
+	tconn->peer_integrity_tfm = peer_integrity_tfm;
+	tconn->int_dig_in = int_dig_in;
+	tconn->int_dig_vv = int_dig_vv;
+
+	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
+		conn_info(tconn, "peer data-integrity-alg: %s\n",
+			  integrity_alg[0] ? integrity_alg : "(none)");
+
+	synchronize_rcu();
+	kfree(old_net_conf);
+	return 0;
+
+disconnect_rcu_unlock:
+	rcu_read_unlock();
 disconnect:
-	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-	return false;
+	crypto_free_hash(peer_integrity_tfm);
+	kfree(int_dig_in);
+	kfree(int_dig_vv);
+	conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+	return -EIO;
 }
 
 /* helper function
@@ -2798,24 +3276,64 @@
 			alg, name, PTR_ERR(tfm));
 		return tfm;
 	}
-	if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
-		crypto_free_hash(tfm);
-		dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
-		return ERR_PTR(-EINVAL);
-	}
 	return tfm;
 }
 
-static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size)
+static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	int ok = true;
-	struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95;
+	void *buffer = tconn->data.rbuf;
+	int size = pi->size;
+
+	while (size) {
+		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
+		s = drbd_recv(tconn, buffer, s);
+		if (s <= 0) {
+			if (s < 0)
+				return s;
+			break;
+		}
+		size -= s;
+	}
+	if (size)
+		return -EIO;
+	return 0;
+}
+
+/*
+ * config_unknown_volume  -  device configuration command for unknown volume
+ *
+ * When a device is added to an existing connection, the node on which the
+ * device is added first will send configuration commands to its peer but the
+ * peer will not know about the device yet.  It will warn and ignore these
+ * commands.  Once the device is added on the second node, the second node will
+ * send the same device configuration commands, but in the other direction.
+ *
+ * (We can also end up here if drbd is misconfigured.)
+ */
+static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
+		  cmdname(pi->cmd), pi->vnr);
+	return ignore_remaining_packet(tconn, pi);
+}
+
+static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct p_rs_param_95 *p;
 	unsigned int header_size, data_size, exp_max_sz;
 	struct crypto_hash *verify_tfm = NULL;
 	struct crypto_hash *csums_tfm = NULL;
-	const int apv = mdev->agreed_pro_version;
-	int *rs_plan_s = NULL;
+	struct net_conf *old_net_conf, *new_net_conf = NULL;
+	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
+	const int apv = tconn->agreed_pro_version;
+	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
 	int fifo_size = 0;
+	int err;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
 
 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
 		    : apv == 88 ? sizeof(struct p_rs_param)
@@ -2823,32 +3341,49 @@
 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
 
-	if (packet_size > exp_max_sz) {
+	if (pi->size > exp_max_sz) {
 		dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
-		    packet_size, exp_max_sz);
-		return false;
+		    pi->size, exp_max_sz);
+		return -EIO;
 	}
 
 	if (apv <= 88) {
-		header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80);
-		data_size   = packet_size  - header_size;
+		header_size = sizeof(struct p_rs_param);
+		data_size = pi->size - header_size;
 	} else if (apv <= 94) {
-		header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80);
-		data_size   = packet_size  - header_size;
+		header_size = sizeof(struct p_rs_param_89);
+		data_size = pi->size - header_size;
 		D_ASSERT(data_size == 0);
 	} else {
-		header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80);
-		data_size   = packet_size  - header_size;
+		header_size = sizeof(struct p_rs_param_95);
+		data_size = pi->size - header_size;
 		D_ASSERT(data_size == 0);
 	}
 
 	/* initialize verify_alg and csums_alg */
+	p = pi->data;
 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
-	if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
-		return false;
+	err = drbd_recv_all(mdev->tconn, p, header_size);
+	if (err)
+		return err;
 
-	mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
+	mutex_lock(&mdev->tconn->conf_update);
+	old_net_conf = mdev->tconn->net_conf;
+	if (get_ldev(mdev)) {
+		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
+		if (!new_disk_conf) {
+			put_ldev(mdev);
+			mutex_unlock(&mdev->tconn->conf_update);
+			dev_err(DEV, "Allocation of new disk_conf failed\n");
+			return -ENOMEM;
+		}
+
+		old_disk_conf = mdev->ldev->disk_conf;
+		*new_disk_conf = *old_disk_conf;
+
+		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
+	}
 
 	if (apv >= 88) {
 		if (apv == 88) {
@@ -2856,12 +3391,13 @@
 				dev_err(DEV, "verify-alg of wrong size, "
 					"peer wants %u, accepting only up to %u byte\n",
 					data_size, SHARED_SECRET_MAX);
-				return false;
+				err = -EIO;
+				goto reconnect;
 			}
 
-			if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
-				return false;
-
+			err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
+			if (err)
+				goto reconnect;
 			/* we expect NUL terminated string */
 			/* but just in case someone tries to be evil */
 			D_ASSERT(p->verify_alg[data_size-1] == 0);
@@ -2876,10 +3412,10 @@
 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
 		}
 
-		if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
+		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.verify_alg, p->verify_alg);
+				    old_net_conf->verify_alg, p->verify_alg);
 				goto disconnect;
 			}
 			verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -2890,10 +3426,10 @@
 			}
 		}
 
-		if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
+		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.csums_alg, p->csums_alg);
+				    old_net_conf->csums_alg, p->csums_alg);
 				goto disconnect;
 			}
 			csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -2904,57 +3440,91 @@
 			}
 		}
 
-		if (apv > 94) {
-			mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
-			mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
-			mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
-			mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
-			mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
+		if (apv > 94 && new_disk_conf) {
+			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
+			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
+			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
+			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
 
-			fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
-			if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
-				rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
-				if (!rs_plan_s) {
+			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+			if (fifo_size != mdev->rs_plan_s->size) {
+				new_plan = fifo_alloc(fifo_size);
+				if (!new_plan) {
 					dev_err(DEV, "kmalloc of fifo_buffer failed");
+					put_ldev(mdev);
 					goto disconnect;
 				}
 			}
 		}
 
-		spin_lock(&mdev->peer_seq_lock);
-		/* lock against drbd_nl_syncer_conf() */
-		if (verify_tfm) {
-			strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
-			mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
-			crypto_free_hash(mdev->verify_tfm);
-			mdev->verify_tfm = verify_tfm;
-			dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
+		if (verify_tfm || csums_tfm) {
+			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+			if (!new_net_conf) {
+				dev_err(DEV, "Allocation of new net_conf failed\n");
+				goto disconnect;
+			}
+
+			*new_net_conf = *old_net_conf;
+
+			if (verify_tfm) {
+				strcpy(new_net_conf->verify_alg, p->verify_alg);
+				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
+				crypto_free_hash(mdev->tconn->verify_tfm);
+				mdev->tconn->verify_tfm = verify_tfm;
+				dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
+			}
+			if (csums_tfm) {
+				strcpy(new_net_conf->csums_alg, p->csums_alg);
+				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
+				crypto_free_hash(mdev->tconn->csums_tfm);
+				mdev->tconn->csums_tfm = csums_tfm;
+				dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
+			}
+			rcu_assign_pointer(tconn->net_conf, new_net_conf);
 		}
-		if (csums_tfm) {
-			strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
-			mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
-			crypto_free_hash(mdev->csums_tfm);
-			mdev->csums_tfm = csums_tfm;
-			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
-		}
-		if (fifo_size != mdev->rs_plan_s.size) {
-			kfree(mdev->rs_plan_s.values);
-			mdev->rs_plan_s.values = rs_plan_s;
-			mdev->rs_plan_s.size   = fifo_size;
-			mdev->rs_planed = 0;
-		}
-		spin_unlock(&mdev->peer_seq_lock);
 	}
 
-	return ok;
+	if (new_disk_conf) {
+		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+		put_ldev(mdev);
+	}
+
+	if (new_plan) {
+		old_plan = mdev->rs_plan_s;
+		rcu_assign_pointer(mdev->rs_plan_s, new_plan);
+	}
+
+	mutex_unlock(&mdev->tconn->conf_update);
+	synchronize_rcu();
+	if (new_net_conf)
+		kfree(old_net_conf);
+	kfree(old_disk_conf);
+	kfree(old_plan);
+
+	return 0;
+
+reconnect:
+	if (new_disk_conf) {
+		put_ldev(mdev);
+		kfree(new_disk_conf);
+	}
+	mutex_unlock(&mdev->tconn->conf_update);
+	return -EIO;
+
 disconnect:
+	kfree(new_plan);
+	if (new_disk_conf) {
+		put_ldev(mdev);
+		kfree(new_disk_conf);
+	}
+	mutex_unlock(&mdev->tconn->conf_update);
 	/* just for completeness: actually not needed,
 	 * as this is not reached if csums_tfm was ok. */
 	crypto_free_hash(csums_tfm);
 	/* but free the verify_tfm again, if csums_tfm did not work out */
 	crypto_free_hash(verify_tfm);
-	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-	return false;
+	conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+	return -EIO;
 }
 
 /* warn if the arguments differ by more than 12.5% */
@@ -2970,59 +3540,77 @@
 		     (unsigned long long)a, (unsigned long long)b);
 }
 
-static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_sizes *p = &mdev->data.rbuf.sizes;
+	struct drbd_conf *mdev;
+	struct p_sizes *p = pi->data;
 	enum determine_dev_size dd = unchanged;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
+
 	p_size = be64_to_cpu(p->d_size);
 	p_usize = be64_to_cpu(p->u_size);
 
-	if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
-		dev_err(DEV, "some backing storage is needed\n");
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return false;
-	}
-
 	/* just store the peer's disk size for now.
 	 * we still need to figure out whether we accept that. */
 	mdev->p_size = p_size;
 
 	if (get_ldev(mdev)) {
+		rcu_read_lock();
+		my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+		rcu_read_unlock();
+
 		warn_if_differ_considerably(mdev, "lower level device sizes",
 			   p_size, drbd_get_max_capacity(mdev->ldev));
 		warn_if_differ_considerably(mdev, "user requested size",
-					    p_usize, mdev->ldev->dc.disk_size);
+					    p_usize, my_usize);
 
 		/* if this is the first connect, or an otherwise expected
 		 * param exchange, choose the minimum */
 		if (mdev->state.conn == C_WF_REPORT_PARAMS)
-			p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
-					     p_usize);
-
-		my_usize = mdev->ldev->dc.disk_size;
-
-		if (mdev->ldev->dc.disk_size != p_usize) {
-			mdev->ldev->dc.disk_size = p_usize;
-			dev_info(DEV, "Peer sets u_size to %lu sectors\n",
-			     (unsigned long)mdev->ldev->dc.disk_size);
-		}
+			p_usize = min_not_zero(my_usize, p_usize);
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
-		   drbd_get_capacity(mdev->this_bdev) &&
-		   mdev->state.disk >= D_OUTDATED &&
-		   mdev->state.conn < C_CONNECTED) {
+		if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
+		    drbd_get_capacity(mdev->this_bdev) &&
+		    mdev->state.disk >= D_OUTDATED &&
+		    mdev->state.conn < C_CONNECTED) {
 			dev_err(DEV, "The peer's disk size is too small!\n");
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-			mdev->ldev->dc.disk_size = my_usize;
+			conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 			put_ldev(mdev);
-			return false;
+			return -EIO;
 		}
+
+		if (my_usize != p_usize) {
+			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
+
+			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
+			if (!new_disk_conf) {
+				dev_err(DEV, "Allocation of new disk_conf failed\n");
+				put_ldev(mdev);
+				return -ENOMEM;
+			}
+
+			mutex_lock(&mdev->tconn->conf_update);
+			old_disk_conf = mdev->ldev->disk_conf;
+			*new_disk_conf = *old_disk_conf;
+			new_disk_conf->disk_size = p_usize;
+
+			rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+			mutex_unlock(&mdev->tconn->conf_update);
+			synchronize_rcu();
+			kfree(old_disk_conf);
+
+			dev_info(DEV, "Peer sets u_size to %lu sectors\n",
+				 (unsigned long)my_usize);
+		}
+
 		put_ldev(mdev);
 	}
 
@@ -3031,7 +3619,7 @@
 		dd = drbd_determine_dev_size(mdev, ddsf);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
-			return false;
+			return -EIO;
 		drbd_md_sync(mdev);
 	} else {
 		/* I am diskless, need to accept the peer's size. */
@@ -3070,16 +3658,25 @@
 		}
 	}
 
-	return true;
+	return 0;
 }
 
-static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_uuids *p = &mdev->data.rbuf.uuids;
+	struct drbd_conf *mdev;
+	struct p_uuids *p = pi->data;
 	u64 *p_uuid;
 	int i, updated_uuids = 0;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
+
 	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
+	if (!p_uuid) {
+		dev_err(DEV, "kmalloc of p_uuid failed\n");
+		return false;
+	}
 
 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
@@ -3093,14 +3690,14 @@
 	    (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
 		dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
 		    (unsigned long long)mdev->ed_uuid);
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return false;
+		conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+		return -EIO;
 	}
 
 	if (get_ldev(mdev)) {
 		int skip_initial_sync =
 			mdev->state.conn == C_CONNECTED &&
-			mdev->agreed_pro_version >= 90 &&
+			mdev->tconn->agreed_pro_version >= 90 &&
 			mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
 			(p_uuid[UI_FLAGS] & 8);
 		if (skip_initial_sync) {
@@ -3127,14 +3724,15 @@
 	   ongoing cluster wide state change is finished. That is important if
 	   we are primary and are detaching from our disk. We need to see the
 	   new disk state... */
-	wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+	mutex_lock(mdev->state_mutex);
+	mutex_unlock(mdev->state_mutex);
 	if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
 		updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
 
 	if (updated_uuids)
 		drbd_print_uuids(mdev, "receiver updated UUIDs to");
 
-	return true;
+	return 0;
 }
 
 /**
@@ -3146,6 +3744,7 @@
 	union drbd_state ms;
 
 	static enum drbd_conns c_tab[] = {
+		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
 		[C_CONNECTED] = C_CONNECTED,
 
 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
@@ -3167,40 +3766,74 @@
 	return ms;
 }
 
-static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_req_state *p = &mdev->data.rbuf.req_state;
+	struct drbd_conf *mdev;
+	struct p_req_state *p = pi->data;
 	union drbd_state mask, val;
 	enum drbd_state_rv rv;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	mask.i = be32_to_cpu(p->mask);
 	val.i = be32_to_cpu(p->val);
 
-	if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
-	    test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
+	if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
+	    mutex_is_locked(mdev->state_mutex)) {
 		drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
-		return true;
+		return 0;
 	}
 
 	mask = convert_state(mask);
 	val = convert_state(val);
 
 	rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
-
 	drbd_send_sr_reply(mdev, rv);
+
 	drbd_md_sync(mdev);
 
-	return true;
+	return 0;
 }
 
-static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_state *p = &mdev->data.rbuf.state;
+	struct p_req_state *p = pi->data;
+	union drbd_state mask, val;
+	enum drbd_state_rv rv;
+
+	mask.i = be32_to_cpu(p->mask);
+	val.i = be32_to_cpu(p->val);
+
+	if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
+	    mutex_is_locked(&tconn->cstate_mutex)) {
+		conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
+		return 0;
+	}
+
+	mask = convert_state(mask);
+	val = convert_state(val);
+
+	rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
+	conn_send_sr_reply(tconn, rv);
+
+	return 0;
+}
+
+static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct p_state *p = pi->data;
 	union drbd_state os, ns, peer_state;
 	enum drbd_disk_state real_peer_disk;
 	enum chg_state_flags cs_flags;
 	int rv;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
+
 	peer_state.i = be32_to_cpu(p->state);
 
 	real_peer_disk = peer_state.disk;
@@ -3209,16 +3842,16 @@
 		dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
 	}
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
  retry:
-	os = ns = mdev->state;
-	spin_unlock_irq(&mdev->req_lock);
+	os = ns = drbd_read_state(mdev);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	/* If some other part of the code (asender thread, timeout)
 	 * already decided to close the connection again,
 	 * we must not "re-establish" it here. */
 	if (os.conn <= C_TEAR_DOWN)
-		return false;
+		return -ECONNRESET;
 
 	/* If this is the "end of sync" confirmation, usually the peer disk
 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
@@ -3246,10 +3879,18 @@
 			 peer_state.conn == C_CONNECTED) {
 			if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
 				drbd_resync_finished(mdev);
-			return true;
+			return 0;
 		}
 	}
 
+	/* explicit verify finished notification, stop sector reached. */
+	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
+	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
+		ov_out_of_sync_print(mdev);
+		drbd_resync_finished(mdev);
+		return 0;
+	}
+
 	/* peer says his disk is inconsistent, while we think it is uptodate,
 	 * and this happens while the peer still thinks we have a sync going on,
 	 * but we think we are already done with the sync.
@@ -3298,17 +3939,17 @@
 				peer_state.disk = D_DISKLESS;
 				real_peer_disk = D_DISKLESS;
 			} else {
-				if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
-					return false;
+				if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
+					return -EIO;
 				D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
-				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-				return false;
+				conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+				return -EIO;
 			}
 		}
 	}
 
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->state.i != os.i)
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (os.i != drbd_read_state(mdev).i)
 		goto retry;
 	clear_bit(CONSIDER_RESYNC, &mdev->flags);
 	ns.peer = peer_state.role;
@@ -3317,25 +3958,25 @@
 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
 		ns.disk = mdev->new_state_tmp.disk;
 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
-	if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
+	if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
 	    test_bit(NEW_CUR_UUID, &mdev->flags)) {
-		/* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
+		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
 		   for temporal network outages! */
-		spin_unlock_irq(&mdev->req_lock);
+		spin_unlock_irq(&mdev->tconn->req_lock);
 		dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
-		tl_clear(mdev);
+		tl_clear(mdev->tconn);
 		drbd_uuid_new_current(mdev);
 		clear_bit(NEW_CUR_UUID, &mdev->flags);
-		drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
-		return false;
+		conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
+		return -EIO;
 	}
 	rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
-	ns = mdev->state;
-	spin_unlock_irq(&mdev->req_lock);
+	ns = drbd_read_state(mdev);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (rv < SS_SUCCESS) {
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return false;
+		conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+		return -EIO;
 	}
 
 	if (os.conn > C_WF_REPORT_PARAMS) {
@@ -3349,16 +3990,21 @@
 		}
 	}
 
-	mdev->net_conf->want_lose = 0;
+	clear_bit(DISCARD_MY_DATA, &mdev->flags);
 
 	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
 
-	return true;
+	return 0;
 }
 
-static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid;
+	struct drbd_conf *mdev;
+	struct p_rs_uuid *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	wait_event(mdev->misc_wait,
 		   mdev->state.conn == C_WF_SYNC_UUID ||
@@ -3381,7 +4027,7 @@
 	} else
 		dev_err(DEV, "Ignoring SyncUUID packet!\n");
 
-	return true;
+	return 0;
 }
 
 /**
@@ -3391,27 +4037,27 @@
  * code upon failure.
  */
 static int
-receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
-		     unsigned long *buffer, struct bm_xfer_ctx *c)
+receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
+		     unsigned long *p, struct bm_xfer_ctx *c)
 {
-	unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
-	unsigned want = num_words * sizeof(long);
+	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
+				 drbd_header_size(mdev->tconn);
+	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
+				       c->bm_words - c->word_offset);
+	unsigned int want = num_words * sizeof(*p);
 	int err;
 
-	if (want != data_size) {
-		dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
+	if (want != size) {
+		dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
 		return -EIO;
 	}
 	if (want == 0)
 		return 0;
-	err = drbd_recv(mdev, buffer, want);
-	if (err != want) {
-		if (err >= 0)
-			err = -EIO;
+	err = drbd_recv_all(mdev->tconn, p, want);
+	if (err)
 		return err;
-	}
 
-	drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
+	drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
 
 	c->word_offset += num_words;
 	c->bit_offset = c->word_offset * BITS_PER_LONG;
@@ -3421,6 +4067,21 @@
 	return 1;
 }
 
+static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
+{
+	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
+}
+
+static int dcbp_get_start(struct p_compressed_bm *p)
+{
+	return (p->encoding & 0x80) != 0;
+}
+
+static int dcbp_get_pad_bits(struct p_compressed_bm *p)
+{
+	return (p->encoding >> 4) & 0x7;
+}
+
 /**
  * recv_bm_rle_bits
  *
@@ -3430,7 +4091,8 @@
 static int
 recv_bm_rle_bits(struct drbd_conf *mdev,
 		struct p_compressed_bm *p,
-		struct bm_xfer_ctx *c)
+		 struct bm_xfer_ctx *c,
+		 unsigned int len)
 {
 	struct bitstream bs;
 	u64 look_ahead;
@@ -3438,12 +4100,11 @@
 	u64 tmp;
 	unsigned long s = c->bit_offset;
 	unsigned long e;
-	int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head));
-	int toggle = DCBP_get_start(p);
+	int toggle = dcbp_get_start(p);
 	int have;
 	int bits;
 
-	bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
+	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
 
 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
 	if (bits < 0)
@@ -3495,17 +4156,18 @@
 static int
 decode_bitmap_c(struct drbd_conf *mdev,
 		struct p_compressed_bm *p,
-		struct bm_xfer_ctx *c)
+		struct bm_xfer_ctx *c,
+		unsigned int len)
 {
-	if (DCBP_get_code(p) == RLE_VLI_Bits)
-		return recv_bm_rle_bits(mdev, p, c);
+	if (dcbp_get_code(p) == RLE_VLI_Bits)
+		return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
 
 	/* other variants had been implemented for evaluation,
 	 * but have been dropped as this one turned out to be "best"
 	 * during all our tests. */
 
 	dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
-	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+	conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 	return -EIO;
 }
 
@@ -3513,11 +4175,13 @@
 		const char *direction, struct bm_xfer_ctx *c)
 {
 	/* what would it take to transfer it "plaintext" */
-	unsigned plain = sizeof(struct p_header80) *
-		((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
-		+ c->bm_words * sizeof(long);
-	unsigned total = c->bytes[0] + c->bytes[1];
-	unsigned r;
+	unsigned int header_size = drbd_header_size(mdev->tconn);
+	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
+	unsigned int plain =
+		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
+		c->bm_words * sizeof(unsigned long);
+	unsigned int total = c->bytes[0] + c->bytes[1];
+	unsigned int r;
 
 	/* total can not be zero. but just in case: */
 	if (total == 0)
@@ -3551,67 +4215,63 @@
    in order to be agnostic to the 32 vs 64 bits issue.
 
    returns 0 on failure, 1 if we successfully received it. */
-static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	struct bm_xfer_ctx c;
-	void *buffer;
 	int err;
-	int ok = false;
-	struct p_header80 *h = &mdev->data.rbuf.header.h80;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
 	/* you are supposed to send additional out-of-sync information
 	 * if you actually set bits during this phase */
 
-	/* maybe we should use some per thread scratch page,
-	 * and allocate that during initial device creation? */
-	buffer	 = (unsigned long *) __get_free_page(GFP_NOIO);
-	if (!buffer) {
-		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
-		goto out;
-	}
-
 	c = (struct bm_xfer_ctx) {
 		.bm_bits = drbd_bm_bits(mdev),
 		.bm_words = drbd_bm_words(mdev),
 	};
 
 	for(;;) {
-		if (cmd == P_BITMAP) {
-			err = receive_bitmap_plain(mdev, data_size, buffer, &c);
-		} else if (cmd == P_COMPRESSED_BITMAP) {
+		if (pi->cmd == P_BITMAP)
+			err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
+		else if (pi->cmd == P_COMPRESSED_BITMAP) {
 			/* MAYBE: sanity check that we speak proto >= 90,
 			 * and the feature is enabled! */
-			struct p_compressed_bm *p;
+			struct p_compressed_bm *p = pi->data;
 
-			if (data_size > BM_PACKET_PAYLOAD_BYTES) {
+			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
 				dev_err(DEV, "ReportCBitmap packet too large\n");
+				err = -EIO;
 				goto out;
 			}
-			/* use the page buff */
-			p = buffer;
-			memcpy(p, h, sizeof(*h));
-			if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
-				goto out;
-			if (data_size <= (sizeof(*p) - sizeof(p->head))) {
-				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
+			if (pi->size <= sizeof(*p)) {
+				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
+				err = -EIO;
 				goto out;
 			}
-			err = decode_bitmap_c(mdev, p, &c);
+			err = drbd_recv_all(mdev->tconn, p, pi->size);
+			if (err)
+			       goto out;
+			err = decode_bitmap_c(mdev, p, &c, pi->size);
 		} else {
-			dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
+			dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
+			err = -EIO;
 			goto out;
 		}
 
-		c.packets[cmd == P_BITMAP]++;
-		c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size;
+		c.packets[pi->cmd == P_BITMAP]++;
+		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
 
 		if (err <= 0) {
 			if (err < 0)
 				goto out;
 			break;
 		}
-		if (!drbd_recv_header(mdev, &cmd, &data_size))
+		err = drbd_recv_header(mdev->tconn, pi);
+		if (err)
 			goto out;
 	}
 
@@ -3620,8 +4280,8 @@
 	if (mdev->state.conn == C_WF_BITMAP_T) {
 		enum drbd_state_rv rv;
 
-		ok = !drbd_send_bitmap(mdev);
-		if (!ok)
+		err = drbd_send_bitmap(mdev);
+		if (err)
 			goto out;
 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
 		rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
@@ -3632,47 +4292,40 @@
 		dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
 		    drbd_conn_str(mdev->state.conn));
 	}
+	err = 0;
 
-	ok = true;
  out:
 	drbd_bm_unlock(mdev);
-	if (ok && mdev->state.conn == C_WF_BITMAP_S)
+	if (!err && mdev->state.conn == C_WF_BITMAP_S)
 		drbd_start_resync(mdev, C_SYNC_SOURCE);
-	free_page((unsigned long) buffer);
-	return ok;
+	return err;
 }
 
-static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	/* TODO zero copy sink :) */
-	static char sink[128];
-	int size, want, r;
+	conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
+		 pi->cmd, pi->size);
 
-	dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
-		 cmd, data_size);
-
-	size = data_size;
-	while (size > 0) {
-		want = min_t(int, size, sizeof(sink));
-		r = drbd_recv(mdev, sink, want);
-		ERR_IF(r <= 0) break;
-		size -= r;
-	}
-	return size == 0;
+	return ignore_remaining_packet(tconn, pi);
 }
 
-static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 	/* Make sure we've acked all the TCP data associated
 	 * with the data requests being unplugged */
-	drbd_tcp_quickack(mdev->data.socket);
+	drbd_tcp_quickack(tconn->data.socket);
 
-	return true;
+	return 0;
 }
 
-static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_desc *p = &mdev->data.rbuf.block_desc;
+	struct drbd_conf *mdev;
+	struct p_block_desc *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	switch (mdev->state.conn) {
 	case C_WF_SYNC_UUID:
@@ -3686,15 +4339,13 @@
 
 	drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
 
-	return true;
+	return 0;
 }
 
-typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
-
 struct data_cmd {
 	int expect_payload;
 	size_t pkt_size;
-	drbd_cmd_handler_f function;
+	int (*fn)(struct drbd_tconn *, struct packet_info *);
 };
 
 static struct data_cmd drbd_cmd_handler[] = {
@@ -3702,13 +4353,13 @@
 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
-	[P_BITMAP]	    = { 1, sizeof(struct p_header80), receive_bitmap } ,
-	[P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
-	[P_UNPLUG_REMOTE]   = { 0, sizeof(struct p_header80), receive_UnplugRemote },
+	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
+	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
+	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
-	[P_SYNC_PARAM]	    = { 1, sizeof(struct p_header80), receive_SyncParam },
-	[P_SYNC_PARAM89]    = { 1, sizeof(struct p_header80), receive_SyncParam },
+	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
+	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
@@ -3720,124 +4371,75 @@
 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
-	/* anything missing from this table is in
-	 * the asender_tbl, see get_asender_cmd */
-	[P_MAX_CMD]	    = { 0, 0, NULL },
+	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
+	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
 };
 
-/* All handler functions that expect a sub-header get that sub-heder in
-   mdev->data.rbuf.header.head.payload.
-
-   Usually in mdev->data.rbuf.header.head the callback can find the usual
-   p_header, but they may not rely on that. Since there is also p_header95 !
- */
-
-static void drbdd(struct drbd_conf *mdev)
+static void drbdd(struct drbd_tconn *tconn)
 {
-	union p_header *header = &mdev->data.rbuf.header;
-	unsigned int packet_size;
-	enum drbd_packets cmd;
+	struct packet_info pi;
 	size_t shs; /* sub header size */
-	int rv;
+	int err;
 
-	while (get_t_state(&mdev->receiver) == Running) {
-		drbd_thread_current_set_cpu(mdev);
-		if (!drbd_recv_header(mdev, &cmd, &packet_size))
+	while (get_t_state(&tconn->receiver) == RUNNING) {
+		struct data_cmd *cmd;
+
+		drbd_thread_current_set_cpu(&tconn->receiver);
+		if (drbd_recv_header(tconn, &pi))
 			goto err_out;
 
-		if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) {
-			dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size);
+		cmd = &drbd_cmd_handler[pi.cmd];
+		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
+			conn_err(tconn, "Unexpected data packet %s (0x%04x)",
+				 cmdname(pi.cmd), pi.cmd);
 			goto err_out;
 		}
 
-		shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
-		if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
-			dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
+		shs = cmd->pkt_size;
+		if (pi.size > shs && !cmd->expect_payload) {
+			conn_err(tconn, "No payload expected %s l:%d\n",
+				 cmdname(pi.cmd), pi.size);
 			goto err_out;
 		}
 
 		if (shs) {
-			rv = drbd_recv(mdev, &header->h80.payload, shs);
-			if (unlikely(rv != shs)) {
-				if (!signal_pending(current))
-					dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
+			err = drbd_recv_all_warn(tconn, pi.data, shs);
+			if (err)
 				goto err_out;
-			}
+			pi.size -= shs;
 		}
 
-		rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
-
-		if (unlikely(!rv)) {
-			dev_err(DEV, "error receiving %s, l: %d!\n",
-			    cmdname(cmd), packet_size);
+		err = cmd->fn(tconn, &pi);
+		if (err) {
+			conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
+				 cmdname(pi.cmd), err, pi.size);
 			goto err_out;
 		}
 	}
+	return;
 
-	if (0) {
-	err_out:
-		drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
-	}
-	/* If we leave here, we probably want to update at least the
-	 * "Connected" indicator on stable storage. Do so explicitly here. */
-	drbd_md_sync(mdev);
+    err_out:
+	conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 }
 
-void drbd_flush_workqueue(struct drbd_conf *mdev)
+void conn_flush_workqueue(struct drbd_tconn *tconn)
 {
 	struct drbd_wq_barrier barr;
 
 	barr.w.cb = w_prev_work_done;
+	barr.w.tconn = tconn;
 	init_completion(&barr.done);
-	drbd_queue_work(&mdev->data.work, &barr.w);
+	drbd_queue_work(&tconn->sender_work, &barr.w);
 	wait_for_completion(&barr.done);
 }
 
-void drbd_free_tl_hash(struct drbd_conf *mdev)
+static void conn_disconnect(struct drbd_tconn *tconn)
 {
-	struct hlist_head *h;
+	struct drbd_conf *mdev;
+	enum drbd_conns oc;
+	int vnr;
 
-	spin_lock_irq(&mdev->req_lock);
-
-	if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) {
-		spin_unlock_irq(&mdev->req_lock);
-		return;
-	}
-	/* paranoia code */
-	for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
-		if (h->first)
-			dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
-				(int)(h - mdev->ee_hash), h->first);
-	kfree(mdev->ee_hash);
-	mdev->ee_hash = NULL;
-	mdev->ee_hash_s = 0;
-
-	/* We may not have had the chance to wait for all locally pending
-	 * application requests. The hlist_add_fake() prevents access after
-	 * free on master bio completion. */
-	for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) {
-		struct drbd_request *req;
-		struct hlist_node *pos, *n;
-		hlist_for_each_entry_safe(req, pos, n, h, collision) {
-			hlist_del_init(&req->collision);
-			hlist_add_fake(&req->collision);
-		}
-	}
-
-	kfree(mdev->tl_hash);
-	mdev->tl_hash = NULL;
-	mdev->tl_hash_s = 0;
-	spin_unlock_irq(&mdev->req_lock);
-}
-
-static void drbd_disconnect(struct drbd_conf *mdev)
-{
-	enum drbd_fencing_p fp;
-	union drbd_state os, ns;
-	int rv = SS_UNKNOWN_ERROR;
-	unsigned int i;
-
-	if (mdev->state.conn == C_STANDALONE)
+	if (tconn->cstate == C_STANDALONE)
 		return;
 
 	/* We are about to start the cleanup after connection loss.
@@ -3845,18 +4447,54 @@
 	 * Usually we should be in some network failure state already,
 	 * but just in case we are not, we fix it up here.
 	 */
-	drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+	conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 
 	/* asender does not clean up anything. it must not interfere, either */
-	drbd_thread_stop(&mdev->asender);
-	drbd_free_sock(mdev);
+	drbd_thread_stop(&tconn->asender);
+	drbd_free_sock(tconn);
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_disconnected(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+
+	if (!list_empty(&tconn->current_epoch->list))
+		conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
+	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
+	atomic_set(&tconn->current_epoch->epoch_size, 0);
+	tconn->send.seen_any_write_yet = false;
+
+	conn_info(tconn, "Connection closed\n");
+
+	if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
+		conn_try_outdate_peer_async(tconn);
+
+	spin_lock_irq(&tconn->req_lock);
+	oc = tconn->cstate;
+	if (oc >= C_UNCONNECTED)
+		_conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
+
+	spin_unlock_irq(&tconn->req_lock);
+
+	if (oc == C_DISCONNECTING)
+		conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
+}
+
+static int drbd_disconnected(struct drbd_conf *mdev)
+{
+	unsigned int i;
 
 	/* wait for current activity to cease. */
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	_drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
 	_drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
 	_drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	/* We do not have data structures that would allow us to
 	 * get the rs_pending_cnt down to 0 again.
@@ -3874,7 +4512,6 @@
 	atomic_set(&mdev->rs_pending_cnt, 0);
 	wake_up(&mdev->misc_wait);
 
-	/* make sure syncer is stopped and w_resume_next_sg queued */
 	del_timer_sync(&mdev->resync_timer);
 	resync_timer_fn((unsigned long)mdev);
 
@@ -3883,50 +4520,25 @@
 	 * to be "canceled" */
 	drbd_flush_workqueue(mdev);
 
-	/* This also does reclaim_net_ee().  If we do this too early, we might
-	 * miss some resync ee and pages.*/
-	drbd_process_done_ee(mdev);
+	drbd_finish_peer_reqs(mdev);
+
+	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
+	   might have issued a work again. The one before drbd_finish_peer_reqs() is
+	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
+	drbd_flush_workqueue(mdev);
+
+	/* need to do it again, drbd_finish_peer_reqs() may have populated it
+	 * again via drbd_try_clear_on_disk_bm(). */
+	drbd_rs_cancel_all(mdev);
 
 	kfree(mdev->p_uuid);
 	mdev->p_uuid = NULL;
 
-	if (!is_susp(mdev->state))
-		tl_clear(mdev);
-
-	dev_info(DEV, "Connection closed\n");
+	if (!drbd_suspended(mdev))
+		tl_clear(mdev->tconn);
 
 	drbd_md_sync(mdev);
 
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
-		drbd_try_outdate_peer_async(mdev);
-
-	spin_lock_irq(&mdev->req_lock);
-	os = mdev->state;
-	if (os.conn >= C_UNCONNECTED) {
-		/* Do not restart in case we are C_DISCONNECTING */
-		ns = os;
-		ns.conn = C_UNCONNECTED;
-		rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
-	if (os.conn == C_DISCONNECTING) {
-		wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
-
-		crypto_free_hash(mdev->cram_hmac_tfm);
-		mdev->cram_hmac_tfm = NULL;
-
-		kfree(mdev->net_conf);
-		mdev->net_conf = NULL;
-		drbd_request_state(mdev, NS(conn, C_STANDALONE));
-	}
-
 	/* serialize with bitmap writeout triggered by the state change,
 	 * if any. */
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
@@ -3938,7 +4550,7 @@
 	 * Actually we don't care for exactly when the network stack does its
 	 * put_page(), but release our reference on these pages right here.
 	 */
-	i = drbd_release_ee(mdev, &mdev->net_ee);
+	i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
 	if (i)
 		dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
 	i = atomic_read(&mdev->pp_in_use_by_net);
@@ -3953,9 +4565,7 @@
 	D_ASSERT(list_empty(&mdev->sync_ee));
 	D_ASSERT(list_empty(&mdev->done_ee));
 
-	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
-	atomic_set(&mdev->current_epoch->epoch_size, 0);
-	D_ASSERT(list_empty(&mdev->current_epoch->list));
+	return 0;
 }
 
 /*
@@ -3967,29 +4577,19 @@
  *
  * for now, they are expected to be zero, but ignored.
  */
-static int drbd_send_handshake(struct drbd_conf *mdev)
+static int drbd_send_features(struct drbd_tconn *tconn)
 {
-	/* ASSERT current == mdev->receiver ... */
-	struct p_handshake *p = &mdev->data.sbuf.handshake;
-	int ok;
+	struct drbd_socket *sock;
+	struct p_connection_features *p;
 
-	if (mutex_lock_interruptible(&mdev->data.mutex)) {
-		dev_err(DEV, "interrupted during initial handshake\n");
-		return 0; /* interrupted. not ok. */
-	}
-
-	if (mdev->data.socket == NULL) {
-		mutex_unlock(&mdev->data.mutex);
-		return 0;
-	}
-
+	sock = &tconn->data;
+	p = conn_prepare_command(tconn, sock);
+	if (!p)
+		return -EIO;
 	memset(p, 0, sizeof(*p));
 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
-	ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
-			     (struct p_header80 *)p, sizeof(*p), 0 );
-	mutex_unlock(&mdev->data.mutex);
-	return ok;
+	return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
 }
 
 /*
@@ -3999,42 +4599,38 @@
  *  -1 peer talks different language,
  *     no point in trying again, please go standalone.
  */
-static int drbd_do_handshake(struct drbd_conf *mdev)
+static int drbd_do_features(struct drbd_tconn *tconn)
 {
-	/* ASSERT current == mdev->receiver ... */
-	struct p_handshake *p = &mdev->data.rbuf.handshake;
-	const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
-	unsigned int length;
-	enum drbd_packets cmd;
-	int rv;
+	/* ASSERT current == tconn->receiver ... */
+	struct p_connection_features *p;
+	const int expect = sizeof(struct p_connection_features);
+	struct packet_info pi;
+	int err;
 
-	rv = drbd_send_handshake(mdev);
-	if (!rv)
+	err = drbd_send_features(tconn);
+	if (err)
 		return 0;
 
-	rv = drbd_recv_header(mdev, &cmd, &length);
-	if (!rv)
+	err = drbd_recv_header(tconn, &pi);
+	if (err)
 		return 0;
 
-	if (cmd != P_HAND_SHAKE) {
-		dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
-		     cmdname(cmd), cmd);
+	if (pi.cmd != P_CONNECTION_FEATURES) {
+		conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
+			 cmdname(pi.cmd), pi.cmd);
 		return -1;
 	}
 
-	if (length != expect) {
-		dev_err(DEV, "expected HandShake length: %u, received: %u\n",
-		     expect, length);
+	if (pi.size != expect) {
+		conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
+		     expect, pi.size);
 		return -1;
 	}
 
-	rv = drbd_recv(mdev, &p->head.payload, expect);
-
-	if (rv != expect) {
-		if (!signal_pending(current))
-			dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
+	p = pi.data;
+	err = drbd_recv_all_warn(tconn, p, expect);
+	if (err)
 		return 0;
-	}
 
 	p->protocol_min = be32_to_cpu(p->protocol_min);
 	p->protocol_max = be32_to_cpu(p->protocol_max);
@@ -4045,15 +4641,15 @@
 	    PRO_VERSION_MIN > p->protocol_max)
 		goto incompat;
 
-	mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
+	tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
 
-	dev_info(DEV, "Handshake successful: "
-	     "Agreed network protocol version %d\n", mdev->agreed_pro_version);
+	conn_info(tconn, "Handshake successful: "
+	     "Agreed network protocol version %d\n", tconn->agreed_pro_version);
 
 	return 1;
 
  incompat:
-	dev_err(DEV, "incompatible DRBD dialects: "
+	conn_err(tconn, "incompatible DRBD dialects: "
 	    "I support %d-%d, peer supports %d-%d\n",
 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
 	    p->protocol_min, p->protocol_max);
@@ -4061,7 +4657,7 @@
 }
 
 #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
-static int drbd_do_auth(struct drbd_conf *mdev)
+static int drbd_do_auth(struct drbd_tconn *tconn)
 {
 	dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
 	dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
@@ -4076,121 +4672,139 @@
 	-1 - auth failed, don't try again.
 */
 
-static int drbd_do_auth(struct drbd_conf *mdev)
+static int drbd_do_auth(struct drbd_tconn *tconn)
 {
+	struct drbd_socket *sock;
 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
 	struct scatterlist sg;
 	char *response = NULL;
 	char *right_response = NULL;
 	char *peers_ch = NULL;
-	unsigned int key_len = strlen(mdev->net_conf->shared_secret);
+	unsigned int key_len;
+	char secret[SHARED_SECRET_MAX]; /* 64 byte */
 	unsigned int resp_size;
 	struct hash_desc desc;
-	enum drbd_packets cmd;
-	unsigned int length;
-	int rv;
+	struct packet_info pi;
+	struct net_conf *nc;
+	int err, rv;
 
-	desc.tfm = mdev->cram_hmac_tfm;
+	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
+
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	key_len = strlen(nc->shared_secret);
+	memcpy(secret, nc->shared_secret, key_len);
+	rcu_read_unlock();
+
+	desc.tfm = tconn->cram_hmac_tfm;
 	desc.flags = 0;
 
-	rv = crypto_hash_setkey(mdev->cram_hmac_tfm,
-				(u8 *)mdev->net_conf->shared_secret, key_len);
+	rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
 	if (rv) {
-		dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
+		conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
 		rv = -1;
 		goto fail;
 	}
 
 	get_random_bytes(my_challenge, CHALLENGE_LEN);
 
-	rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
+	sock = &tconn->data;
+	if (!conn_prepare_command(tconn, sock)) {
+		rv = 0;
+		goto fail;
+	}
+	rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
+				my_challenge, CHALLENGE_LEN);
 	if (!rv)
 		goto fail;
 
-	rv = drbd_recv_header(mdev, &cmd, &length);
-	if (!rv)
-		goto fail;
-
-	if (cmd != P_AUTH_CHALLENGE) {
-		dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
-		    cmdname(cmd), cmd);
+	err = drbd_recv_header(tconn, &pi);
+	if (err) {
 		rv = 0;
 		goto fail;
 	}
 
-	if (length > CHALLENGE_LEN * 2) {
-		dev_err(DEV, "expected AuthChallenge payload too big.\n");
+	if (pi.cmd != P_AUTH_CHALLENGE) {
+		conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
+			 cmdname(pi.cmd), pi.cmd);
+		rv = 0;
+		goto fail;
+	}
+
+	if (pi.size > CHALLENGE_LEN * 2) {
+		conn_err(tconn, "expected AuthChallenge payload too big.\n");
 		rv = -1;
 		goto fail;
 	}
 
-	peers_ch = kmalloc(length, GFP_NOIO);
+	peers_ch = kmalloc(pi.size, GFP_NOIO);
 	if (peers_ch == NULL) {
-		dev_err(DEV, "kmalloc of peers_ch failed\n");
+		conn_err(tconn, "kmalloc of peers_ch failed\n");
 		rv = -1;
 		goto fail;
 	}
 
-	rv = drbd_recv(mdev, peers_ch, length);
-
-	if (rv != length) {
-		if (!signal_pending(current))
-			dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
+	err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
+	if (err) {
 		rv = 0;
 		goto fail;
 	}
 
-	resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
+	resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
 	response = kmalloc(resp_size, GFP_NOIO);
 	if (response == NULL) {
-		dev_err(DEV, "kmalloc of response failed\n");
+		conn_err(tconn, "kmalloc of response failed\n");
 		rv = -1;
 		goto fail;
 	}
 
 	sg_init_table(&sg, 1);
-	sg_set_buf(&sg, peers_ch, length);
+	sg_set_buf(&sg, peers_ch, pi.size);
 
 	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
 	if (rv) {
-		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
 		rv = -1;
 		goto fail;
 	}
 
-	rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
+	if (!conn_prepare_command(tconn, sock)) {
+		rv = 0;
+		goto fail;
+	}
+	rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
+				response, resp_size);
 	if (!rv)
 		goto fail;
 
-	rv = drbd_recv_header(mdev, &cmd, &length);
-	if (!rv)
-		goto fail;
-
-	if (cmd != P_AUTH_RESPONSE) {
-		dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
-			cmdname(cmd), cmd);
+	err = drbd_recv_header(tconn, &pi);
+	if (err) {
 		rv = 0;
 		goto fail;
 	}
 
-	if (length != resp_size) {
-		dev_err(DEV, "expected AuthResponse payload of wrong size\n");
+	if (pi.cmd != P_AUTH_RESPONSE) {
+		conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
+			 cmdname(pi.cmd), pi.cmd);
 		rv = 0;
 		goto fail;
 	}
 
-	rv = drbd_recv(mdev, response , resp_size);
+	if (pi.size != resp_size) {
+		conn_err(tconn, "expected AuthResponse payload of wrong size\n");
+		rv = 0;
+		goto fail;
+	}
 
-	if (rv != resp_size) {
-		if (!signal_pending(current))
-			dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
+	err = drbd_recv_all_warn(tconn, response , resp_size);
+	if (err) {
 		rv = 0;
 		goto fail;
 	}
 
 	right_response = kmalloc(resp_size, GFP_NOIO);
 	if (right_response == NULL) {
-		dev_err(DEV, "kmalloc of right_response failed\n");
+		conn_err(tconn, "kmalloc of right_response failed\n");
 		rv = -1;
 		goto fail;
 	}
@@ -4199,7 +4813,7 @@
 
 	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
 	if (rv) {
-		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
 		rv = -1;
 		goto fail;
 	}
@@ -4207,8 +4821,8 @@
 	rv = !memcmp(response, right_response, resp_size);
 
 	if (rv)
-		dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
-		     resp_size, mdev->net_conf->cram_hmac_alg);
+		conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
+		     resp_size);
 	else
 		rv = -1;
 
@@ -4223,82 +4837,106 @@
 
 int drbdd_init(struct drbd_thread *thi)
 {
-	struct drbd_conf *mdev = thi->mdev;
-	unsigned int minor = mdev_to_minor(mdev);
+	struct drbd_tconn *tconn = thi->tconn;
 	int h;
 
-	sprintf(current->comm, "drbd%d_receiver", minor);
-
-	dev_info(DEV, "receiver (re)started\n");
+	conn_info(tconn, "receiver (re)started\n");
 
 	do {
-		h = drbd_connect(mdev);
+		h = conn_connect(tconn);
 		if (h == 0) {
-			drbd_disconnect(mdev);
+			conn_disconnect(tconn);
 			schedule_timeout_interruptible(HZ);
 		}
 		if (h == -1) {
-			dev_warn(DEV, "Discarding network configuration.\n");
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			conn_warn(tconn, "Discarding network configuration.\n");
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 		}
 	} while (h == 0);
 
-	if (h > 0) {
-		if (get_net_conf(mdev)) {
-			drbdd(mdev);
-			put_net_conf(mdev);
-		}
-	}
+	if (h > 0)
+		drbdd(tconn);
 
-	drbd_disconnect(mdev);
+	conn_disconnect(tconn);
 
-	dev_info(DEV, "receiver terminated\n");
+	conn_info(tconn, "receiver terminated\n");
 	return 0;
 }
 
 /* ********* acknowledge sender ******** */
 
-static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_req_state_reply *p = (struct p_req_state_reply *)h;
-
+	struct p_req_state_reply *p = pi->data;
 	int retcode = be32_to_cpu(p->retcode);
 
 	if (retcode >= SS_SUCCESS) {
+		set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
+	} else {
+		set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
+		conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
+			 drbd_set_st_err_str(retcode), retcode);
+	}
+	wake_up(&tconn->ping_wait);
+
+	return 0;
+}
+
+static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct p_req_state_reply *p = pi->data;
+	int retcode = be32_to_cpu(p->retcode);
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
+	if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
+		D_ASSERT(tconn->agreed_pro_version < 100);
+		return got_conn_RqSReply(tconn, pi);
+	}
+
+	if (retcode >= SS_SUCCESS) {
 		set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
 	} else {
 		set_bit(CL_ST_CHG_FAIL, &mdev->flags);
 		dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
-		    drbd_set_st_err_str(retcode), retcode);
+			drbd_set_st_err_str(retcode), retcode);
 	}
 	wake_up(&mdev->state_wait);
 
-	return true;
+	return 0;
 }
 
-static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	return drbd_send_ping_ack(mdev);
+	return drbd_send_ping_ack(tconn);
 
 }
 
-static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 	/* restore idle timeout */
-	mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
-	if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
-		wake_up(&mdev->misc_wait);
+	tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
+	if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
+		wake_up(&tconn->ping_wait);
 
-	return true;
+	return 0;
 }
 
-static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
 	int blksize = be32_to_cpu(p->blksize);
 
-	D_ASSERT(mdev->agreed_pro_version >= 89);
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
+	D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
@@ -4312,162 +4950,139 @@
 	dec_rs_pending(mdev);
 	atomic_add(blksize >> 9, &mdev->rs_sect_in);
 
-	return true;
+	return 0;
 }
 
-/* when we receive the ACK for a write request,
- * verify that we actually know about it */
-static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
-	u64 id, sector_t sector)
-{
-	struct hlist_head *slot = tl_hash_slot(mdev, sector);
-	struct hlist_node *n;
-	struct drbd_request *req;
-
-	hlist_for_each_entry(req, n, slot, collision) {
-		if ((unsigned long)req == (unsigned long)id) {
-			if (req->sector != sector) {
-				dev_err(DEV, "_ack_id_to_req: found req %p but it has "
-				    "wrong sector (%llus versus %llus)\n", req,
-				    (unsigned long long)req->sector,
-				    (unsigned long long)sector);
-				break;
-			}
-			return req;
-		}
-	}
-	return NULL;
-}
-
-typedef struct drbd_request *(req_validator_fn)
-	(struct drbd_conf *mdev, u64 id, sector_t sector);
-
-static int validate_req_change_req_state(struct drbd_conf *mdev,
-	u64 id, sector_t sector, req_validator_fn validator,
-	const char *func, enum drbd_req_event what)
+static int
+validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
+			      struct rb_root *root, const char *func,
+			      enum drbd_req_event what, bool missing_ok)
 {
 	struct drbd_request *req;
 	struct bio_and_error m;
 
-	spin_lock_irq(&mdev->req_lock);
-	req = validator(mdev, id, sector);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	req = find_request(mdev, root, id, sector, missing_ok, func);
 	if (unlikely(!req)) {
-		spin_unlock_irq(&mdev->req_lock);
-
-		dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func,
-			(void *)(unsigned long)id, (unsigned long long)sector);
-		return false;
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		return -EIO;
 	}
 	__req_mod(req, what, &m);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
-	return true;
+	return 0;
 }
 
-static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
 	int blksize = be32_to_cpu(p->blksize);
 	enum drbd_req_event what;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
-	if (is_syncer_block_id(p->block_id)) {
+	if (p->block_id == ID_SYNCER) {
 		drbd_set_in_sync(mdev, sector, blksize);
 		dec_rs_pending(mdev);
-		return true;
+		return 0;
 	}
-	switch (be16_to_cpu(h->command)) {
+	switch (pi->cmd) {
 	case P_RS_WRITE_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		what = write_acked_by_peer_and_sis;
+		what = WRITE_ACKED_BY_PEER_AND_SIS;
 		break;
 	case P_WRITE_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		what = write_acked_by_peer;
+		what = WRITE_ACKED_BY_PEER;
 		break;
 	case P_RECV_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B);
-		what = recv_acked_by_peer;
+		what = RECV_ACKED_BY_PEER;
 		break;
-	case P_DISCARD_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		what = conflict_discarded_by_peer;
+	case P_SUPERSEDED:
+		what = CONFLICT_RESOLVED;
+		break;
+	case P_RETRY_WRITE:
+		what = POSTPONE_WRITE;
 		break;
 	default:
-		D_ASSERT(0);
-		return false;
+		BUG();
 	}
 
 	return validate_req_change_req_state(mdev, p->block_id, sector,
-		_ack_id_to_req, __func__ , what);
+					     &mdev->write_requests, __func__,
+					     what, false);
 }
 
-static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
 	int size = be32_to_cpu(p->blksize);
-	struct drbd_request *req;
-	struct bio_and_error m;
+	int err;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
-	if (is_syncer_block_id(p->block_id)) {
+	if (p->block_id == ID_SYNCER) {
 		dec_rs_pending(mdev);
 		drbd_rs_failed_io(mdev, sector, size);
-		return true;
+		return 0;
 	}
 
-	spin_lock_irq(&mdev->req_lock);
-	req = _ack_id_to_req(mdev, p->block_id, sector);
-	if (!req) {
-		spin_unlock_irq(&mdev->req_lock);
-		if (mdev->net_conf->wire_protocol == DRBD_PROT_A ||
-		    mdev->net_conf->wire_protocol == DRBD_PROT_B) {
-			/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
-			   The master bio might already be completed, therefore the
-			   request is no longer in the collision hash.
-			   => Do not try to validate block_id as request. */
-			/* In Protocol B we might already have got a P_RECV_ACK
-			   but then get a P_NEG_ACK after wards. */
-			drbd_set_out_of_sync(mdev, sector, size);
-			return true;
-		} else {
-			dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__,
-				(void *)(unsigned long)p->block_id, (unsigned long long)sector);
-			return false;
-		}
+	err = validate_req_change_req_state(mdev, p->block_id, sector,
+					    &mdev->write_requests, __func__,
+					    NEG_ACKED, true);
+	if (err) {
+		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
+		   The master bio might already be completed, therefore the
+		   request is no longer in the collision hash. */
+		/* In Protocol B we might already have got a P_RECV_ACK
+		   but then get a P_NEG_ACK afterwards. */
+		drbd_set_out_of_sync(mdev, sector, size);
 	}
-	__req_mod(req, neg_acked, &m);
-	spin_unlock_irq(&mdev->req_lock);
-
-	if (m.bio)
-		complete_master_bio(mdev, &m);
-	return true;
+	return 0;
 }
 
-static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
-	dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
+
+	dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
 
 	return validate_req_change_req_state(mdev, p->block_id, sector,
-		_ar_id_to_req, __func__ , neg_acked);
+					     &mdev->read_requests, __func__,
+					     NEG_ACKED, false);
 }
 
-static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	sector_t sector;
 	int size;
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct p_block_ack *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	sector = be64_to_cpu(p->sector);
 	size = be32_to_cpu(p->blksize);
@@ -4478,57 +5093,66 @@
 
 	if (get_ldev_if_state(mdev, D_FAILED)) {
 		drbd_rs_complete_io(mdev, sector);
-		switch (be16_to_cpu(h->command)) {
+		switch (pi->cmd) {
 		case P_NEG_RS_DREPLY:
 			drbd_rs_failed_io(mdev, sector, size);
 		case P_RS_CANCEL:
 			break;
 		default:
-			D_ASSERT(0);
-			put_ldev(mdev);
-			return false;
+			BUG();
 		}
 		put_ldev(mdev);
 	}
 
-	return true;
+	return 0;
 }
 
-static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_barrier_ack *p = (struct p_barrier_ack *)h;
+	struct p_barrier_ack *p = pi->data;
+	struct drbd_conf *mdev;
+	int vnr;
 
-	tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
+	tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
 
-	if (mdev->state.conn == C_AHEAD &&
-	    atomic_read(&mdev->ap_in_flight) == 0 &&
-	    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
-		mdev->start_resync_timer.expires = jiffies + HZ;
-		add_timer(&mdev->start_resync_timer);
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_AHEAD &&
+		    atomic_read(&mdev->ap_in_flight) == 0 &&
+		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
+			mdev->start_resync_timer.expires = jiffies + HZ;
+			add_timer(&mdev->start_resync_timer);
+		}
 	}
+	rcu_read_unlock();
 
-	return true;
+	return 0;
 }
 
-static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	struct drbd_work *w;
 	sector_t sector;
 	int size;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	sector = be64_to_cpu(p->sector);
 	size = be32_to_cpu(p->blksize);
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
-		drbd_ov_oos_found(mdev, sector, size);
+		drbd_ov_out_of_sync_found(mdev, sector, size);
 	else
-		ov_oos_print(mdev);
+		ov_out_of_sync_print(mdev);
 
 	if (!get_ldev(mdev))
-		return true;
+		return 0;
 
 	drbd_rs_complete_io(mdev, sector);
 	dec_rs_pending(mdev);
@@ -4543,114 +5167,137 @@
 		w = kmalloc(sizeof(*w), GFP_NOIO);
 		if (w) {
 			w->cb = w_ov_finished;
-			drbd_queue_work_front(&mdev->data.work, w);
+			w->mdev = mdev;
+			drbd_queue_work(&mdev->tconn->sender_work, w);
 		} else {
 			dev_err(DEV, "kmalloc(w) failed.");
-			ov_oos_print(mdev);
+			ov_out_of_sync_print(mdev);
 			drbd_resync_finished(mdev);
 		}
 	}
 	put_ldev(mdev);
-	return true;
+	return 0;
 }
 
-static int got_skip(struct drbd_conf *mdev, struct p_header80 *h)
+static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	return true;
+	return 0;
+}
+
+static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr, not_empty = 0;
+
+	do {
+		clear_bit(SIGNAL_ASENDER, &tconn->flags);
+		flush_signals(current);
+
+		rcu_read_lock();
+		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+			kref_get(&mdev->kref);
+			rcu_read_unlock();
+			if (drbd_finish_peer_reqs(mdev)) {
+				kref_put(&mdev->kref, &drbd_minor_destroy);
+				return 1;
+			}
+			kref_put(&mdev->kref, &drbd_minor_destroy);
+			rcu_read_lock();
+		}
+		set_bit(SIGNAL_ASENDER, &tconn->flags);
+
+		spin_lock_irq(&tconn->req_lock);
+		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+			not_empty = !list_empty(&mdev->done_ee);
+			if (not_empty)
+				break;
+		}
+		spin_unlock_irq(&tconn->req_lock);
+		rcu_read_unlock();
+	} while (not_empty);
+
+	return 0;
 }
 
 struct asender_cmd {
 	size_t pkt_size;
-	int (*process)(struct drbd_conf *mdev, struct p_header80 *h);
+	int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
 };
 
-static struct asender_cmd *get_asender_cmd(int cmd)
-{
-	static struct asender_cmd asender_tbl[] = {
-		/* anything missing from this table is in
-		 * the drbd_cmd_handler (drbd_default_handler) table,
-		 * see the beginning of drbdd() */
-	[P_PING]	    = { sizeof(struct p_header80), got_Ping },
-	[P_PING_ACK]	    = { sizeof(struct p_header80), got_PingAck },
+static struct asender_cmd asender_tbl[] = {
+	[P_PING]	    = { 0, got_Ping },
+	[P_PING_ACK]	    = { 0, got_PingAck },
 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
-	[P_DISCARD_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
-	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply},
+	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
-	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply},
-	[P_MAX_CMD]	    = { 0, NULL },
-	};
-	if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
-		return NULL;
-	return &asender_tbl[cmd];
-}
+	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
+	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
+	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
+};
 
 int drbd_asender(struct drbd_thread *thi)
 {
-	struct drbd_conf *mdev = thi->mdev;
-	struct p_header80 *h = &mdev->meta.rbuf.header.h80;
+	struct drbd_tconn *tconn = thi->tconn;
 	struct asender_cmd *cmd = NULL;
-
-	int rv, len;
-	void *buf    = h;
+	struct packet_info pi;
+	int rv;
+	void *buf    = tconn->meta.rbuf;
 	int received = 0;
-	int expect   = sizeof(struct p_header80);
-	int empty;
-	int ping_timeout_active = 0;
-
-	sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
+	unsigned int header_size = drbd_header_size(tconn);
+	int expect   = header_size;
+	bool ping_timeout_active = false;
+	struct net_conf *nc;
+	int ping_timeo, tcp_cork, ping_int;
 
 	current->policy = SCHED_RR;  /* Make this a realtime task! */
 	current->rt_priority = 2;    /* more important than all other tasks */
 
-	while (get_t_state(thi) == Running) {
-		drbd_thread_current_set_cpu(mdev);
-		if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
-			ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
-			mdev->meta.socket->sk->sk_rcvtimeo =
-				mdev->net_conf->ping_timeo*HZ/10;
-			ping_timeout_active = 1;
+	while (get_t_state(thi) == RUNNING) {
+		drbd_thread_current_set_cpu(thi);
+
+		rcu_read_lock();
+		nc = rcu_dereference(tconn->net_conf);
+		ping_timeo = nc->ping_timeo;
+		tcp_cork = nc->tcp_cork;
+		ping_int = nc->ping_int;
+		rcu_read_unlock();
+
+		if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
+			if (drbd_send_ping(tconn)) {
+				conn_err(tconn, "drbd_send_ping has failed\n");
+				goto reconnect;
+			}
+			tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
+			ping_timeout_active = true;
 		}
 
-		/* conditionally cork;
-		 * it may hurt latency if we cork without much to send */
-		if (!mdev->net_conf->no_cork &&
-			3 < atomic_read(&mdev->unacked_cnt))
-			drbd_tcp_cork(mdev->meta.socket);
-		while (1) {
-			clear_bit(SIGNAL_ASENDER, &mdev->flags);
-			flush_signals(current);
-			if (!drbd_process_done_ee(mdev))
-				goto reconnect;
-			/* to avoid race with newly queued ACKs */
-			set_bit(SIGNAL_ASENDER, &mdev->flags);
-			spin_lock_irq(&mdev->req_lock);
-			empty = list_empty(&mdev->done_ee);
-			spin_unlock_irq(&mdev->req_lock);
-			/* new ack may have been queued right here,
-			 * but then there is also a signal pending,
-			 * and we start over... */
-			if (empty)
-				break;
+		/* TODO: conditionally cork; it may hurt latency if we cork without
+		   much to send */
+		if (tcp_cork)
+			drbd_tcp_cork(tconn->meta.socket);
+		if (tconn_finish_peer_reqs(tconn)) {
+			conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
+			goto reconnect;
 		}
 		/* but unconditionally uncork unless disabled */
-		if (!mdev->net_conf->no_cork)
-			drbd_tcp_uncork(mdev->meta.socket);
+		if (tcp_cork)
+			drbd_tcp_uncork(tconn->meta.socket);
 
 		/* short circuit, recv_msg would return EINTR anyways. */
 		if (signal_pending(current))
 			continue;
 
-		rv = drbd_recv_short(mdev, mdev->meta.socket,
-				     buf, expect-received, 0);
-		clear_bit(SIGNAL_ASENDER, &mdev->flags);
+		rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
+		clear_bit(SIGNAL_ASENDER, &tconn->flags);
 
 		flush_signals(current);
 
@@ -4668,80 +5315,91 @@
 			received += rv;
 			buf	 += rv;
 		} else if (rv == 0) {
-			dev_err(DEV, "meta connection shut down by peer.\n");
+			if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
+				long t;
+				rcu_read_lock();
+				t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
+				rcu_read_unlock();
+
+				t = wait_event_timeout(tconn->ping_wait,
+						       tconn->cstate < C_WF_REPORT_PARAMS,
+						       t);
+				if (t)
+					break;
+			}
+			conn_err(tconn, "meta connection shut down by peer.\n");
 			goto reconnect;
 		} else if (rv == -EAGAIN) {
 			/* If the data socket received something meanwhile,
 			 * that is good enough: peer is still alive. */
-			if (time_after(mdev->last_received,
-				jiffies - mdev->meta.socket->sk->sk_rcvtimeo))
+			if (time_after(tconn->last_received,
+				jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
 				continue;
 			if (ping_timeout_active) {
-				dev_err(DEV, "PingAck did not arrive in time.\n");
+				conn_err(tconn, "PingAck did not arrive in time.\n");
 				goto reconnect;
 			}
-			set_bit(SEND_PING, &mdev->flags);
+			set_bit(SEND_PING, &tconn->flags);
 			continue;
 		} else if (rv == -EINTR) {
 			continue;
 		} else {
-			dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+			conn_err(tconn, "sock_recvmsg returned %d\n", rv);
 			goto reconnect;
 		}
 
 		if (received == expect && cmd == NULL) {
-			if (unlikely(h->magic != BE_DRBD_MAGIC)) {
-				dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n",
-				    be32_to_cpu(h->magic),
-				    be16_to_cpu(h->command),
-				    be16_to_cpu(h->length));
+			if (decode_header(tconn, tconn->meta.rbuf, &pi))
 				goto reconnect;
-			}
-			cmd = get_asender_cmd(be16_to_cpu(h->command));
-			len = be16_to_cpu(h->length);
-			if (unlikely(cmd == NULL)) {
-				dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n",
-				    be32_to_cpu(h->magic),
-				    be16_to_cpu(h->command),
-				    be16_to_cpu(h->length));
+			cmd = &asender_tbl[pi.cmd];
+			if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
+				conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
+					 cmdname(pi.cmd), pi.cmd);
 				goto disconnect;
 			}
-			expect = cmd->pkt_size;
-			ERR_IF(len != expect-sizeof(struct p_header80))
+			expect = header_size + cmd->pkt_size;
+			if (pi.size != expect - header_size) {
+				conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
+					pi.cmd, pi.size);
 				goto reconnect;
+			}
 		}
 		if (received == expect) {
-			mdev->last_received = jiffies;
-			D_ASSERT(cmd != NULL);
-			if (!cmd->process(mdev, h))
+			bool err;
+
+			err = cmd->fn(tconn, &pi);
+			if (err) {
+				conn_err(tconn, "%pf failed\n", cmd->fn);
 				goto reconnect;
+			}
 
-			/* the idle_timeout (ping-int)
-			 * has been restored in got_PingAck() */
-			if (cmd == get_asender_cmd(P_PING_ACK))
-				ping_timeout_active = 0;
+			tconn->last_received = jiffies;
 
-			buf	 = h;
+			if (cmd == &asender_tbl[P_PING_ACK]) {
+				/* restore idle timeout */
+				tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
+				ping_timeout_active = false;
+			}
+
+			buf	 = tconn->meta.rbuf;
 			received = 0;
-			expect	 = sizeof(struct p_header80);
+			expect	 = header_size;
 			cmd	 = NULL;
 		}
 	}
 
 	if (0) {
 reconnect:
-		drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
-		drbd_md_sync(mdev);
+		conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
+		conn_md_sync(tconn);
 	}
 	if (0) {
 disconnect:
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		drbd_md_sync(mdev);
+		conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 	}
-	clear_bit(SIGNAL_ASENDER, &mdev->flags);
+	clear_bit(SIGNAL_ASENDER, &tconn->flags);
 
-	D_ASSERT(mdev->state.conn < C_CONNECTED);
-	dev_info(DEV, "asender terminated\n");
+	conn_info(tconn, "asender terminated\n");
 
 	return 0;
 }

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 01b2ac6..f58a4a4 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c

@@ -31,6 +31,8 @@
 #include "drbd_req.h"
 
 
+static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size);
+
 /* Update disk stats at start of I/O request */
 static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
 {
@@ -40,6 +42,8 @@
 	part_round_stats(cpu, &mdev->vdisk->part0);
 	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
 	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
+	(void) cpu; /* The macro invocations above want the cpu argument, I do not like
+		       the compiler warning about cpu only assigned but never used... */
 	part_inc_in_flight(&mdev->vdisk->part0, rw);
 	part_stat_unlock();
 }
@@ -57,9 +61,51 @@
 	part_stat_unlock();
 }
 
-static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
+static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
+					       struct bio *bio_src)
 {
-	const unsigned long s = req->rq_state;
+	struct drbd_request *req;
+
+	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
+	if (!req)
+		return NULL;
+
+	drbd_req_make_private_bio(req, bio_src);
+	req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
+	req->w.mdev      = mdev;
+	req->master_bio  = bio_src;
+	req->epoch       = 0;
+
+	drbd_clear_interval(&req->i);
+	req->i.sector     = bio_src->bi_sector;
+	req->i.size      = bio_src->bi_size;
+	req->i.local = true;
+	req->i.waiting = false;
+
+	INIT_LIST_HEAD(&req->tl_requests);
+	INIT_LIST_HEAD(&req->w.list);
+
+	/* one reference to be put by __drbd_make_request */
+	atomic_set(&req->completion_ref, 1);
+	/* one kref as long as completion_ref > 0 */
+	kref_init(&req->kref);
+	return req;
+}
+
+void drbd_req_destroy(struct kref *kref)
+{
+	struct drbd_request *req = container_of(kref, struct drbd_request, kref);
+	struct drbd_conf *mdev = req->w.mdev;
+	const unsigned s = req->rq_state;
+
+	if ((req->master_bio && !(s & RQ_POSTPONED)) ||
+		atomic_read(&req->completion_ref) ||
+		(s & RQ_LOCAL_PENDING) ||
+		((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) {
+		dev_err(DEV, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n",
+				s, atomic_read(&req->completion_ref));
+		return;
+	}
 
 	/* remove it from the transfer log.
 	 * well, only if it had been there in the first
@@ -67,24 +113,33 @@
 	 * and never sent), it should still be "empty" as
 	 * initialized in drbd_req_new(), so we can list_del() it
 	 * here unconditionally */
-	list_del(&req->tl_requests);
+	list_del_init(&req->tl_requests);
 
 	/* if it was a write, we may have to set the corresponding
 	 * bit(s) out-of-sync first. If it had a local part, we need to
 	 * release the reference to the activity log. */
-	if (rw == WRITE) {
+	if (s & RQ_WRITE) {
 		/* Set out-of-sync unless both OK flags are set
 		 * (local only or remote failed).
 		 * Other places where we set out-of-sync:
 		 * READ with local io-error */
-		if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
-			drbd_set_out_of_sync(mdev, req->sector, req->size);
 
-		if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
-			drbd_set_in_sync(mdev, req->sector, req->size);
+		/* There is a special case:
+		 * we may notice late that IO was suspended,
+		 * and postpone, or schedule for retry, a write,
+		 * before it even was submitted or sent.
+		 * In that case we do not want to touch the bitmap at all.
+		 */
+		if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) {
+			if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
+				drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
+
+			if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
+				drbd_set_in_sync(mdev, req->i.sector, req->i.size);
+		}
 
 		/* one might be tempted to move the drbd_al_complete_io
-		 * to the local io completion callback drbd_endio_pri.
+		 * to the local io completion callback drbd_request_endio.
 		 * but, if this was a mirror write, we may only
 		 * drbd_al_complete_io after this is RQ_NET_DONE,
 		 * otherwise the extent could be dropped from the al
@@ -93,109 +148,35 @@
 		 * but after the extent has been dropped from the al,
 		 * we would forget to resync the corresponding extent.
 		 */
-		if (s & RQ_LOCAL_MASK) {
+		if (s & RQ_IN_ACT_LOG) {
 			if (get_ldev_if_state(mdev, D_FAILED)) {
-				if (s & RQ_IN_ACT_LOG)
-					drbd_al_complete_io(mdev, req->sector);
+				drbd_al_complete_io(mdev, &req->i);
 				put_ldev(mdev);
 			} else if (__ratelimit(&drbd_ratelimit_state)) {
-				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
-				     "but my Disk seems to have failed :(\n",
-				     (unsigned long long) req->sector);
+				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), "
+					 "but my Disk seems to have failed :(\n",
+					 (unsigned long long) req->i.sector, req->i.size);
 			}
 		}
 	}
 
-	drbd_req_free(req);
+	mempool_free(req, drbd_request_mempool);
 }
 
-static void queue_barrier(struct drbd_conf *mdev)
-{
-	struct drbd_tl_epoch *b;
+static void wake_all_senders(struct drbd_tconn *tconn) {
+	wake_up(&tconn->sender_work.q_wait);
+}
 
-	/* We are within the req_lock. Once we queued the barrier for sending,
-	 * we set the CREATE_BARRIER bit. It is cleared as soon as a new
-	 * barrier/epoch object is added. This is the only place this bit is
-	 * set. It indicates that the barrier for this epoch is already queued,
-	 * and no new epoch has been created yet. */
-	if (test_bit(CREATE_BARRIER, &mdev->flags))
+/* must hold resource->req_lock */
+static void start_new_tl_epoch(struct drbd_tconn *tconn)
+{
+	/* no point closing an epoch, if it is empty, anyways. */
+	if (tconn->current_tle_writes == 0)
 		return;
 
-	b = mdev->newest_tle;
-	b->w.cb = w_send_barrier;
-	/* inc_ap_pending done here, so we won't
-	 * get imbalanced on connection loss.
-	 * dec_ap_pending will be done in got_BarrierAck
-	 * or (on connection loss) in tl_clear.  */
-	inc_ap_pending(mdev);
-	drbd_queue_work(&mdev->data.work, &b->w);
-	set_bit(CREATE_BARRIER, &mdev->flags);
-}
-
-static void _about_to_complete_local_write(struct drbd_conf *mdev,
-	struct drbd_request *req)
-{
-	const unsigned long s = req->rq_state;
-	struct drbd_request *i;
-	struct drbd_epoch_entry *e;
-	struct hlist_node *n;
-	struct hlist_head *slot;
-
-	/* Before we can signal completion to the upper layers,
-	 * we may need to close the current epoch.
-	 * We can skip this, if this request has not even been sent, because we
-	 * did not have a fully established connection yet/anymore, during
-	 * bitmap exchange, or while we are C_AHEAD due to congestion policy.
-	 */
-	if (mdev->state.conn >= C_CONNECTED &&
-	    (s & RQ_NET_SENT) != 0 &&
-	    req->epoch == mdev->newest_tle->br_number)
-		queue_barrier(mdev);
-
-	/* we need to do the conflict detection stuff,
-	 * if we have the ee_hash (two_primaries) and
-	 * this has been on the network */
-	if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) {
-		const sector_t sector = req->sector;
-		const int size = req->size;
-
-		/* ASSERT:
-		 * there must be no conflicting requests, since
-		 * they must have been failed on the spot */
-#define OVERLAPS overlaps(sector, size, i->sector, i->size)
-		slot = tl_hash_slot(mdev, sector);
-		hlist_for_each_entry(i, n, slot, collision) {
-			if (OVERLAPS) {
-				dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
-				      "other: %p %llus +%u\n",
-				      req, (unsigned long long)sector, size,
-				      i, (unsigned long long)i->sector, i->size);
-			}
-		}
-
-		/* maybe "wake" those conflicting epoch entries
-		 * that wait for this request to finish.
-		 *
-		 * currently, there can be only _one_ such ee
-		 * (well, or some more, which would be pending
-		 * P_DISCARD_ACK not yet sent by the asender...),
-		 * since we block the receiver thread upon the
-		 * first conflict detection, which will wait on
-		 * misc_wait.  maybe we want to assert that?
-		 *
-		 * anyways, if we found one,
-		 * we just have to do a wake_up.  */
-#undef OVERLAPS
-#define OVERLAPS overlaps(sector, size, e->sector, e->size)
-		slot = ee_hash_slot(mdev, req->sector);
-		hlist_for_each_entry(e, n, slot, collision) {
-			if (OVERLAPS) {
-				wake_up(&mdev->misc_wait);
-				break;
-			}
-		}
-	}
-#undef OVERLAPS
+	tconn->current_tle_writes = 0;
+	atomic_inc(&tconn->current_tle_nr);
+	wake_all_senders(tconn);
 }
 
 void complete_master_bio(struct drbd_conf *mdev,
@@ -205,17 +186,33 @@
 	dec_ap_bio(mdev);
 }
 
+
+static void drbd_remove_request_interval(struct rb_root *root,
+					 struct drbd_request *req)
+{
+	struct drbd_conf *mdev = req->w.mdev;
+	struct drbd_interval *i = &req->i;
+
+	drbd_remove_interval(root, i);
+
+	/* Wake up any processes waiting for this request to complete.  */
+	if (i->waiting)
+		wake_up(&mdev->misc_wait);
+}
+
 /* Helper for __req_mod().
  * Set m->bio to the master bio, if it is fit to be completed,
  * or leave it alone (it is initialized to NULL in __req_mod),
  * if it has already been completed, or cannot be completed yet.
  * If m->bio is set, the error status to be returned is placed in m->error.
  */
-void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
+static
+void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 {
-	const unsigned long s = req->rq_state;
-	struct drbd_conf *mdev = req->mdev;
-	int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
+	const unsigned s = req->rq_state;
+	struct drbd_conf *mdev = req->w.mdev;
+	int rw;
+	int error, ok;
 
 	/* we must not complete the master bio, while it is
 	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
@@ -226,167 +223,222 @@
 	 *	the receiver,
 	 *	the bio_endio completion callbacks.
 	 */
-	if (s & RQ_NET_QUEUED)
+	if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) ||
+	    (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) ||
+	    (s & RQ_COMPLETION_SUSP)) {
+		dev_err(DEV, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s);
 		return;
-	if (s & RQ_NET_PENDING)
+	}
+
+	if (!req->master_bio) {
+		dev_err(DEV, "drbd_req_complete: Logic BUG, master_bio == NULL!\n");
 		return;
-	if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
-		return;
+	}
 
-	if (req->master_bio) {
-		/* this is data_received (remote read)
-		 * or protocol C P_WRITE_ACK
-		 * or protocol B P_RECV_ACK
-		 * or protocol A "handed_over_to_network" (SendAck)
-		 * or canceled or failed,
-		 * or killed from the transfer log due to connection loss.
-		 */
+	rw = bio_rw(req->master_bio);
 
-		/*
-		 * figure out whether to report success or failure.
-		 *
-		 * report success when at least one of the operations succeeded.
-		 * or, to put the other way,
-		 * only report failure, when both operations failed.
-		 *
-		 * what to do about the failures is handled elsewhere.
-		 * what we need to do here is just: complete the master_bio.
-		 *
-		 * local completion error, if any, has been stored as ERR_PTR
-		 * in private_bio within drbd_endio_pri.
-		 */
-		int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
-		int error = PTR_ERR(req->private_bio);
+	/*
+	 * figure out whether to report success or failure.
+	 *
+	 * report success when at least one of the operations succeeded.
+	 * or, to put the other way,
+	 * only report failure, when both operations failed.
+	 *
+	 * what to do about the failures is handled elsewhere.
+	 * what we need to do here is just: complete the master_bio.
+	 *
+	 * local completion error, if any, has been stored as ERR_PTR
+	 * in private_bio within drbd_request_endio.
+	 */
+	ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
+	error = PTR_ERR(req->private_bio);
 
-		/* remove the request from the conflict detection
-		 * respective block_id verification hash */
-		if (!hlist_unhashed(&req->collision))
-			hlist_del(&req->collision);
-		else
-			D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
+	/* remove the request from the conflict detection
+	 * respective block_id verification hash */
+	if (!drbd_interval_empty(&req->i)) {
+		struct rb_root *root;
 
-		/* for writes we need to do some extra housekeeping */
 		if (rw == WRITE)
-			_about_to_complete_local_write(mdev, req);
+			root = &mdev->write_requests;
+		else
+			root = &mdev->read_requests;
+		drbd_remove_request_interval(root, req);
+	} else if (!(s & RQ_POSTPONED))
+		D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
 
-		/* Update disk stats */
-		_drbd_end_io_acct(mdev, req);
+	/* Before we can signal completion to the upper layers,
+	 * we may need to close the current transfer log epoch.
+	 * We are within the request lock, so we can simply compare
+	 * the request epoch number with the current transfer log
+	 * epoch number.  If they match, increase the current_tle_nr,
+	 * and reset the transfer log epoch write_cnt.
+	 */
+	if (rw == WRITE &&
+	    req->epoch == atomic_read(&mdev->tconn->current_tle_nr))
+		start_new_tl_epoch(mdev->tconn);
 
+	/* Update disk stats */
+	_drbd_end_io_acct(mdev, req);
+
+	/* If READ failed,
+	 * have it be pushed back to the retry work queue,
+	 * so it will re-enter __drbd_make_request(),
+	 * and be re-assigned to a suitable local or remote path,
+	 * or failed if we do not have access to good data anymore.
+	 *
+	 * Unless it was failed early by __drbd_make_request(),
+	 * because no path was available, in which case
+	 * it was not even added to the transfer_log.
+	 *
+	 * READA may fail, and will not be retried.
+	 *
+	 * WRITE should have used all available paths already.
+	 */
+	if (!ok && rw == READ && !list_empty(&req->tl_requests))
+		req->rq_state |= RQ_POSTPONED;
+
+	if (!(req->rq_state & RQ_POSTPONED)) {
 		m->error = ok ? 0 : (error ?: -EIO);
 		m->bio = req->master_bio;
 		req->master_bio = NULL;
 	}
-
-	if (s & RQ_LOCAL_PENDING)
-		return;
-
-	if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
-		/* this is disconnected (local only) operation,
-		 * or protocol C P_WRITE_ACK,
-		 * or protocol A or B P_BARRIER_ACK,
-		 * or killed from the transfer log due to connection loss. */
-		_req_is_done(mdev, req, rw);
-	}
-	/* else: network part and not DONE yet. that is
-	 * protocol A or B, barrier ack still pending... */
 }
 
-static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
+static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
 {
-	struct drbd_conf *mdev = req->mdev;
+	struct drbd_conf *mdev = req->w.mdev;
+	D_ASSERT(m || (req->rq_state & RQ_POSTPONED));
 
-	if (!is_susp(mdev->state))
-		_req_may_be_done(req, m);
-}
-
-/*
- * checks whether there was an overlapping request
- * or ee already registered.
- *
- * if so, return 1, in which case this request is completed on the spot,
- * without ever being submitted or send.
- *
- * return 0 if it is ok to submit this request.
- *
- * NOTE:
- * paranoia: assume something above us is broken, and issues different write
- * requests for the same block simultaneously...
- *
- * To ensure these won't be reordered differently on both nodes, resulting in
- * diverging data sets, we discard the later one(s). Not that this is supposed
- * to happen, but this is the rationale why we also have to check for
- * conflicting requests with local origin, and why we have to do so regardless
- * of whether we allowed multiple primaries.
- *
- * BTW, in case we only have one primary, the ee_hash is empty anyways, and the
- * second hlist_for_each_entry becomes a noop. This is even simpler than to
- * grab a reference on the net_conf, and check for the two_primaries flag...
- */
-static int _req_conflicts(struct drbd_request *req)
-{
-	struct drbd_conf *mdev = req->mdev;
-	const sector_t sector = req->sector;
-	const int size = req->size;
-	struct drbd_request *i;
-	struct drbd_epoch_entry *e;
-	struct hlist_node *n;
-	struct hlist_head *slot;
-
-	D_ASSERT(hlist_unhashed(&req->collision));
-
-	if (!get_net_conf(mdev))
+	if (!atomic_sub_and_test(put, &req->completion_ref))
 		return 0;
 
-	/* BUG_ON */
-	ERR_IF (mdev->tl_hash_s == 0)
-		goto out_no_conflict;
-	BUG_ON(mdev->tl_hash == NULL);
+	drbd_req_complete(req, m);
 
-#define OVERLAPS overlaps(i->sector, i->size, sector, size)
-	slot = tl_hash_slot(mdev, sector);
-	hlist_for_each_entry(i, n, slot, collision) {
-		if (OVERLAPS) {
-			dev_alert(DEV, "%s[%u] Concurrent local write detected! "
-			      "[DISCARD L] new: %llus +%u; "
-			      "pending: %llus +%u\n",
-			      current->comm, current->pid,
-			      (unsigned long long)sector, size,
-			      (unsigned long long)i->sector, i->size);
-			goto out_conflict;
-		}
+	if (req->rq_state & RQ_POSTPONED) {
+		/* don't destroy the req object just yet,
+		 * but queue it for retry */
+		drbd_restart_request(req);
+		return 0;
 	}
 
-	if (mdev->ee_hash_s) {
-		/* now, check for overlapping requests with remote origin */
-		BUG_ON(mdev->ee_hash == NULL);
-#undef OVERLAPS
-#define OVERLAPS overlaps(e->sector, e->size, sector, size)
-		slot = ee_hash_slot(mdev, sector);
-		hlist_for_each_entry(e, n, slot, collision) {
-			if (OVERLAPS) {
-				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
-				      " [DISCARD L] new: %llus +%u; "
-				      "pending: %llus +%u\n",
-				      current->comm, current->pid,
-				      (unsigned long long)sector, size,
-				      (unsigned long long)e->sector, e->size);
-				goto out_conflict;
-			}
-		}
-	}
-#undef OVERLAPS
-
-out_no_conflict:
-	/* this is like it should be, and what we expected.
-	 * our users do behave after all... */
-	put_net_conf(mdev);
-	return 0;
-
-out_conflict:
-	put_net_conf(mdev);
 	return 1;
 }
 
+/* I'd like this to be the only place that manipulates
+ * req->completion_ref and req->kref. */
+static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
+		int clear, int set)
+{
+	struct drbd_conf *mdev = req->w.mdev;
+	unsigned s = req->rq_state;
+	int c_put = 0;
+	int k_put = 0;
+
+	if (drbd_suspended(mdev) && !((s | clear) & RQ_COMPLETION_SUSP))
+		set |= RQ_COMPLETION_SUSP;
+
+	/* apply */
+
+	req->rq_state &= ~clear;
+	req->rq_state |= set;
+
+	/* no change? */
+	if (req->rq_state == s)
+		return;
+
+	/* intent: get references */
+
+	if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
+		atomic_inc(&req->completion_ref);
+
+	if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) {
+		inc_ap_pending(mdev);
+		atomic_inc(&req->completion_ref);
+	}
+
+	if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED))
+		atomic_inc(&req->completion_ref);
+
+	if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
+		kref_get(&req->kref); /* wait for the DONE */
+
+	if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT))
+		atomic_add(req->i.size >> 9, &mdev->ap_in_flight);
+
+	if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
+		atomic_inc(&req->completion_ref);
+
+	/* progress: put references */
+
+	if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP))
+		++c_put;
+
+	if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
+		D_ASSERT(req->rq_state & RQ_LOCAL_PENDING);
+		/* local completion may still come in later,
+		 * we need to keep the req object around. */
+		kref_get(&req->kref);
+		++c_put;
+	}
+
+	if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
+		if (req->rq_state & RQ_LOCAL_ABORTED)
+			++k_put;
+		else
+			++c_put;
+	}
+
+	if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
+		dec_ap_pending(mdev);
+		++c_put;
+	}
+
+	if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED))
+		++c_put;
+
+	if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
+		if (req->rq_state & RQ_NET_SENT)
+			atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
+		++k_put;
+	}
+
+	/* potentially complete and destroy */
+
+	if (k_put || c_put) {
+		/* Completion does it's own kref_put.  If we are going to
+		 * kref_sub below, we need req to be still around then. */
+		int at_least = k_put + !!c_put;
+		int refcount = atomic_read(&req->kref.refcount);
+		if (refcount < at_least)
+			dev_err(DEV,
+				"mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n",
+				s, req->rq_state, refcount, at_least);
+	}
+
+	/* If we made progress, retry conflicting peer requests, if any. */
+	if (req->i.waiting)
+		wake_up(&mdev->misc_wait);
+
+	if (c_put)
+		k_put += drbd_req_put_completion_ref(req, m, c_put);
+	if (k_put)
+		kref_sub(&req->kref, k_put, drbd_req_destroy);
+}
+
+static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *req)
+{
+        char b[BDEVNAME_SIZE];
+
+	if (!__ratelimit(&drbd_ratelimit_state))
+		return;
+
+	dev_warn(DEV, "local %s IO error sector %llu+%u on %s\n",
+			(req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
+			(unsigned long long)req->i.sector,
+			req->i.size >> 9,
+			bdevname(mdev->ldev->backing_bdev, b));
+}
+
 /* obviously this could be coded as many single functions
  * instead of one huge switch,
  * or by putting the code directly in the respective locations
@@ -402,9 +454,12 @@
 int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		struct bio_and_error *m)
 {
-	struct drbd_conf *mdev = req->mdev;
-	int rv = 0;
-	m->bio = NULL;
+	struct drbd_conf *mdev = req->w.mdev;
+	struct net_conf *nc;
+	int p, rv = 0;
+
+	if (m)
+		m->bio = NULL;
 
 	switch (what) {
 	default:
@@ -413,116 +468,91 @@
 
 	/* does not happen...
 	 * initialization done in drbd_req_new
-	case created:
+	case CREATED:
 		break;
 		*/
 
-	case to_be_send: /* via network */
-		/* reached via drbd_make_request_common
+	case TO_BE_SENT: /* via network */
+		/* reached via __drbd_make_request
 		 * and from w_read_retry_remote */
 		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
-		req->rq_state |= RQ_NET_PENDING;
-		inc_ap_pending(mdev);
+		rcu_read_lock();
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		p = nc->wire_protocol;
+		rcu_read_unlock();
+		req->rq_state |=
+			p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
+			p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
+		mod_rq_state(req, m, 0, RQ_NET_PENDING);
 		break;
 
-	case to_be_submitted: /* locally */
-		/* reached via drbd_make_request_common */
+	case TO_BE_SUBMITTED: /* locally */
+		/* reached via __drbd_make_request */
 		D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
-		req->rq_state |= RQ_LOCAL_PENDING;
+		mod_rq_state(req, m, 0, RQ_LOCAL_PENDING);
 		break;
 
-	case completed_ok:
+	case COMPLETED_OK:
 		if (req->rq_state & RQ_WRITE)
-			mdev->writ_cnt += req->size>>9;
+			mdev->writ_cnt += req->i.size >> 9;
 		else
-			mdev->read_cnt += req->size>>9;
+			mdev->read_cnt += req->i.size >> 9;
 
-		req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
-		req->rq_state &= ~RQ_LOCAL_PENDING;
-
-		_req_may_be_done_not_susp(req, m);
+		mod_rq_state(req, m, RQ_LOCAL_PENDING,
+				RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
 		break;
 
-	case abort_disk_io:
-		req->rq_state |= RQ_LOCAL_ABORTED;
-		if (req->rq_state & RQ_WRITE)
-			_req_may_be_done_not_susp(req, m);
-		else
-			goto goto_queue_for_net_read;
+	case ABORT_DISK_IO:
+		mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
 		break;
 
-	case write_completed_with_error:
-		req->rq_state |= RQ_LOCAL_COMPLETED;
-		req->rq_state &= ~RQ_LOCAL_PENDING;
-
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
-		_req_may_be_done_not_susp(req, m);
+	case WRITE_COMPLETED_WITH_ERROR:
+		drbd_report_io_error(mdev, req);
+		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
+		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
 		break;
 
-	case read_ahead_completed_with_error:
-		/* it is legal to fail READA */
-		req->rq_state |= RQ_LOCAL_COMPLETED;
-		req->rq_state &= ~RQ_LOCAL_PENDING;
-		_req_may_be_done_not_susp(req, m);
+	case READ_COMPLETED_WITH_ERROR:
+		drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
+		drbd_report_io_error(mdev, req);
+		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
+		/* fall through. */
+	case READ_AHEAD_COMPLETED_WITH_ERROR:
+		/* it is legal to fail READA, no __drbd_chk_io_error in that case. */
+		mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
 		break;
 
-	case read_completed_with_error:
-		drbd_set_out_of_sync(mdev, req->sector, req->size);
-
-		req->rq_state |= RQ_LOCAL_COMPLETED;
-		req->rq_state &= ~RQ_LOCAL_PENDING;
-
-		if (req->rq_state & RQ_LOCAL_ABORTED) {
-			_req_may_be_done(req, m);
-			break;
-		}
-
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
-
-	goto_queue_for_net_read:
-
-		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
-
-		/* no point in retrying if there is no good remote data,
-		 * or we have no connection. */
-		if (mdev->state.pdsk != D_UP_TO_DATE) {
-			_req_may_be_done_not_susp(req, m);
-			break;
-		}
-
-		/* _req_mod(req,to_be_send); oops, recursion... */
-		req->rq_state |= RQ_NET_PENDING;
-		inc_ap_pending(mdev);
-		/* fall through: _req_mod(req,queue_for_net_read); */
-
-	case queue_for_net_read:
+	case QUEUE_FOR_NET_READ:
 		/* READ or READA, and
 		 * no local disk,
 		 * or target area marked as invalid,
 		 * or just got an io-error. */
-		/* from drbd_make_request_common
+		/* from __drbd_make_request
 		 * or from bio_endio during read io-error recovery */
 
-		/* so we can verify the handle in the answer packet
-		 * corresponding hlist_del is in _req_may_be_done() */
-		hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
+		/* So we can verify the handle in the answer packet.
+		 * Corresponding drbd_remove_request_interval is in
+		 * drbd_req_complete() */
+		D_ASSERT(drbd_interval_empty(&req->i));
+		drbd_insert_interval(&mdev->read_requests, &req->i);
 
 		set_bit(UNPLUG_REMOTE, &mdev->flags);
 
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
-		req->rq_state |= RQ_NET_QUEUED;
-		req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
-			? w_read_retry_remote
-			: w_send_read_req;
-		drbd_queue_work(&mdev->data.work, &req->w);
+		D_ASSERT((req->rq_state & RQ_LOCAL_MASK) == 0);
+		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
+		req->w.cb = w_send_read_req;
+		drbd_queue_work(&mdev->tconn->sender_work, &req->w);
 		break;
 
-	case queue_for_net_write:
+	case QUEUE_FOR_NET_WRITE:
 		/* assert something? */
-		/* from drbd_make_request_common only */
+		/* from __drbd_make_request only */
 
-		hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
-		/* corresponding hlist_del is in _req_may_be_done() */
+		/* Corresponding drbd_remove_request_interval is in
+		 * drbd_req_complete() */
+		D_ASSERT(drbd_interval_empty(&req->i));
+		drbd_insert_interval(&mdev->write_requests, &req->i);
 
 		/* NOTE
 		 * In case the req ended up on the transfer log before being
@@ -533,7 +563,7 @@
 		 *
 		 * _req_add_to_epoch(req); this has to be after the
 		 * _maybe_start_new_epoch(req); which happened in
-		 * drbd_make_request_common, because we now may set the bit
+		 * __drbd_make_request, because we now may set the bit
 		 * again ourselves to close the current epoch.
 		 *
 		 * Add req to the (now) current epoch (barrier). */
@@ -543,202 +573,187 @@
 		 * hurting performance. */
 		set_bit(UNPLUG_REMOTE, &mdev->flags);
 
-		/* see drbd_make_request_common,
-		 * just after it grabs the req_lock */
-		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
-
-		req->epoch = mdev->newest_tle->br_number;
-
-		/* increment size of current epoch */
-		mdev->newest_tle->n_writes++;
-
 		/* queue work item to send data */
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
-		req->rq_state |= RQ_NET_QUEUED;
+		mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
 		req->w.cb =  w_send_dblock;
-		drbd_queue_work(&mdev->data.work, &req->w);
+		drbd_queue_work(&mdev->tconn->sender_work, &req->w);
 
 		/* close the epoch, in case it outgrew the limit */
-		if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size)
-			queue_barrier(mdev);
+		rcu_read_lock();
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		p = nc->max_epoch_size;
+		rcu_read_unlock();
+		if (mdev->tconn->current_tle_writes >= p)
+			start_new_tl_epoch(mdev->tconn);
 
 		break;
 
-	case queue_for_send_oos:
-		req->rq_state |= RQ_NET_QUEUED;
-		req->w.cb =  w_send_oos;
-		drbd_queue_work(&mdev->data.work, &req->w);
+	case QUEUE_FOR_SEND_OOS:
+		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
+		req->w.cb =  w_send_out_of_sync;
+		drbd_queue_work(&mdev->tconn->sender_work, &req->w);
 		break;
 
-	case read_retry_remote_canceled:
-	case send_canceled:
-	case send_failed:
+	case READ_RETRY_REMOTE_CANCELED:
+	case SEND_CANCELED:
+	case SEND_FAILED:
 		/* real cleanup will be done from tl_clear.  just update flags
 		 * so it is no longer marked as on the worker queue */
-		req->rq_state &= ~RQ_NET_QUEUED;
-		/* if we did it right, tl_clear should be scheduled only after
-		 * this, so this should not be necessary! */
-		_req_may_be_done_not_susp(req, m);
+		mod_rq_state(req, m, RQ_NET_QUEUED, 0);
 		break;
 
-	case handed_over_to_network:
+	case HANDED_OVER_TO_NETWORK:
 		/* assert something? */
-		if (bio_data_dir(req->master_bio) == WRITE)
-			atomic_add(req->size>>9, &mdev->ap_in_flight);
-
 		if (bio_data_dir(req->master_bio) == WRITE &&
-		    mdev->net_conf->wire_protocol == DRBD_PROT_A) {
+		    !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) {
 			/* this is what is dangerous about protocol A:
 			 * pretend it was successfully written on the peer. */
-			if (req->rq_state & RQ_NET_PENDING) {
-				dec_ap_pending(mdev);
-				req->rq_state &= ~RQ_NET_PENDING;
-				req->rq_state |= RQ_NET_OK;
-			} /* else: neg-ack was faster... */
+			if (req->rq_state & RQ_NET_PENDING)
+				mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
+			/* else: neg-ack was faster... */
 			/* it is still not yet RQ_NET_DONE until the
 			 * corresponding epoch barrier got acked as well,
 			 * so we know what to dirty on connection loss */
 		}
-		req->rq_state &= ~RQ_NET_QUEUED;
-		req->rq_state |= RQ_NET_SENT;
-		_req_may_be_done_not_susp(req, m);
+		mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
 		break;
 
-	case oos_handed_to_network:
+	case OOS_HANDED_TO_NETWORK:
 		/* Was not set PENDING, no longer QUEUED, so is now DONE
 		 * as far as this connection is concerned. */
-		req->rq_state &= ~RQ_NET_QUEUED;
-		req->rq_state |= RQ_NET_DONE;
-		_req_may_be_done_not_susp(req, m);
+		mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE);
 		break;
 
-	case connection_lost_while_pending:
+	case CONNECTION_LOST_WHILE_PENDING:
 		/* transfer log cleanup after connection loss */
-		/* assert something? */
-		if (req->rq_state & RQ_NET_PENDING)
-			dec_ap_pending(mdev);
-		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
-		req->rq_state |= RQ_NET_DONE;
-		if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
-			atomic_sub(req->size>>9, &mdev->ap_in_flight);
-
-		/* if it is still queued, we may not complete it here.
-		 * it will be canceled soon. */
-		if (!(req->rq_state & RQ_NET_QUEUED))
-			_req_may_be_done(req, m); /* Allowed while state.susp */
+		mod_rq_state(req, m,
+				RQ_NET_OK|RQ_NET_PENDING|RQ_COMPLETION_SUSP,
+				RQ_NET_DONE);
 		break;
 
-	case conflict_discarded_by_peer:
-		/* for discarded conflicting writes of multiple primaries,
+	case CONFLICT_RESOLVED:
+		/* for superseded conflicting writes of multiple primaries,
 		 * there is no need to keep anything in the tl, potential
-		 * node crashes are covered by the activity log. */
-		if (what == conflict_discarded_by_peer)
-			dev_alert(DEV, "Got DiscardAck packet %llus +%u!"
-			      " DRBD is not a random data generator!\n",
-			      (unsigned long long)req->sector, req->size);
-		req->rq_state |= RQ_NET_DONE;
-		/* fall through */
-	case write_acked_by_peer_and_sis:
-	case write_acked_by_peer:
-		if (what == write_acked_by_peer_and_sis)
-			req->rq_state |= RQ_NET_SIS;
+		 * node crashes are covered by the activity log.
+		 *
+		 * If this request had been marked as RQ_POSTPONED before,
+		 * it will actually not be completed, but "restarted",
+		 * resubmitted from the retry worker context. */
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
+		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK);
+		break;
+
+	case WRITE_ACKED_BY_PEER_AND_SIS:
+		req->rq_state |= RQ_NET_SIS;
+	case WRITE_ACKED_BY_PEER:
+		D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
 		/* protocol C; successfully written on peer.
 		 * Nothing more to do here.
 		 * We want to keep the tl in place for all protocols, to cater
 		 * for volatile write-back caches on lower level devices. */
 
-	case recv_acked_by_peer:
+		goto ack_common;
+	case RECV_ACKED_BY_PEER:
+		D_ASSERT(req->rq_state & RQ_EXP_RECEIVE_ACK);
 		/* protocol B; pretends to be successfully written on peer.
-		 * see also notes above in handed_over_to_network about
+		 * see also notes above in HANDED_OVER_TO_NETWORK about
 		 * protocol != C */
-		req->rq_state |= RQ_NET_OK;
+	ack_common:
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
-		dec_ap_pending(mdev);
-		atomic_sub(req->size>>9, &mdev->ap_in_flight);
-		req->rq_state &= ~RQ_NET_PENDING;
-		_req_may_be_done_not_susp(req, m);
+		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
 		break;
 
-	case neg_acked:
-		/* assert something? */
-		if (req->rq_state & RQ_NET_PENDING) {
-			dec_ap_pending(mdev);
-			atomic_sub(req->size>>9, &mdev->ap_in_flight);
-		}
-		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
-
-		req->rq_state |= RQ_NET_DONE;
-		_req_may_be_done_not_susp(req, m);
-		/* else: done by handed_over_to_network */
+	case POSTPONE_WRITE:
+		D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
+		/* If this node has already detected the write conflict, the
+		 * worker will be waiting on misc_wait.  Wake it up once this
+		 * request has completed locally.
+		 */
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		req->rq_state |= RQ_POSTPONED;
+		if (req->i.waiting)
+			wake_up(&mdev->misc_wait);
+		/* Do not clear RQ_NET_PENDING. This request will make further
+		 * progress via restart_conflicting_writes() or
+		 * fail_postponed_requests(). Hopefully. */
 		break;
 
-	case fail_frozen_disk_io:
+	case NEG_ACKED:
+		mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, 0);
+		break;
+
+	case FAIL_FROZEN_DISK_IO:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+		mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
+		break;
+
+	case RESTART_FROZEN_DISK_IO:
 		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
 			break;
 
-		_req_may_be_done(req, m); /* Allowed while state.susp */
-		break;
-
-	case restart_frozen_disk_io:
-		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
-			break;
-
-		req->rq_state &= ~RQ_LOCAL_COMPLETED;
+		mod_rq_state(req, m,
+				RQ_COMPLETION_SUSP|RQ_LOCAL_COMPLETED,
+				RQ_LOCAL_PENDING);
 
 		rv = MR_READ;
 		if (bio_data_dir(req->master_bio) == WRITE)
 			rv = MR_WRITE;
 
-		get_ldev(mdev);
+		get_ldev(mdev); /* always succeeds in this call path */
 		req->w.cb = w_restart_disk_io;
-		drbd_queue_work(&mdev->data.work, &req->w);
+		drbd_queue_work(&mdev->tconn->sender_work, &req->w);
 		break;
 
-	case resend:
+	case RESEND:
 		/* Simply complete (local only) READs. */
 		if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
-			_req_may_be_done(req, m);
+			mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
 			break;
 		}
 
 		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
-		   before the connection loss (B&C only); only P_BARRIER_ACK was missing.
-		   Trowing them out of the TL here by pretending we got a BARRIER_ACK
-		   We ensure that the peer was not rebooted */
+		   before the connection loss (B&C only); only P_BARRIER_ACK
+		   (or the local completion?) was missing when we suspended.
+		   Throwing them out of the TL here by pretending we got a BARRIER_ACK.
+		   During connection handshake, we ensure that the peer was not rebooted. */
 		if (!(req->rq_state & RQ_NET_OK)) {
+			/* FIXME could this possibly be a req->w.cb == w_send_out_of_sync?
+			 * in that case we must not set RQ_NET_PENDING. */
+
+			mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
 			if (req->w.cb) {
-				drbd_queue_work(&mdev->data.work, &req->w);
+				drbd_queue_work(&mdev->tconn->sender_work, &req->w);
 				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
-			}
+			} /* else: FIXME can this happen? */
 			break;
 		}
-		/* else, fall through to barrier_acked */
+		/* else, fall through to BARRIER_ACKED */
 
-	case barrier_acked:
+	case BARRIER_ACKED:
+		/* barrier ack for READ requests does not make sense */
 		if (!(req->rq_state & RQ_WRITE))
 			break;
 
 		if (req->rq_state & RQ_NET_PENDING) {
-			/* barrier came in before all requests have been acked.
+			/* barrier came in before all requests were acked.
 			 * this is bad, because if the connection is lost now,
 			 * we won't be able to clean them up... */
-			dev_err(DEV, "FIXME (barrier_acked but pending)\n");
-			list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
+			dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n");
 		}
-		if ((req->rq_state & RQ_NET_MASK) != 0) {
-			req->rq_state |= RQ_NET_DONE;
-			if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
-				atomic_sub(req->size>>9, &mdev->ap_in_flight);
-		}
-		_req_may_be_done(req, m); /* Allowed while state.susp */
+		/* Allowed to complete requests, even while suspended.
+		 * As this is called for all requests within a matching epoch,
+		 * we need to filter, and only set RQ_NET_DONE for those that
+		 * have actually been on the wire. */
+		mod_rq_state(req, m, RQ_COMPLETION_SUSP,
+				(req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0);
 		break;
 
-	case data_received:
+	case DATA_RECEIVED:
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
-		dec_ap_pending(mdev);
-		req->rq_state &= ~RQ_NET_PENDING;
-		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
-		_req_may_be_done_not_susp(req, m);
+		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
 		break;
 	};
 
@@ -752,75 +767,265 @@
  *   since size may be bigger than BM_BLOCK_SIZE,
  *   we may need to check several bits.
  */
-static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
+static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
 {
 	unsigned long sbnr, ebnr;
 	sector_t esector, nr_sectors;
 
 	if (mdev->state.disk == D_UP_TO_DATE)
-		return 1;
-	if (mdev->state.disk >= D_OUTDATED)
-		return 0;
-	if (mdev->state.disk <  D_INCONSISTENT)
-		return 0;
-	/* state.disk == D_INCONSISTENT   We will have a look at the BitMap */
-	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+		return true;
+	if (mdev->state.disk != D_INCONSISTENT)
+		return false;
 	esector = sector + (size >> 9) - 1;
-
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	D_ASSERT(sector  < nr_sectors);
 	D_ASSERT(esector < nr_sectors);
 
 	sbnr = BM_SECT_TO_BIT(sector);
 	ebnr = BM_SECT_TO_BIT(esector);
 
-	return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
+	return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
 }
 
+static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector,
+		enum drbd_read_balancing rbm)
+{
+	struct backing_dev_info *bdi;
+	int stripe_shift;
+
+	switch (rbm) {
+	case RB_CONGESTED_REMOTE:
+		bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
+		return bdi_read_congested(bdi);
+	case RB_LEAST_PENDING:
+		return atomic_read(&mdev->local_cnt) >
+			atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
+	case RB_32K_STRIPING:  /* stripe_shift = 15 */
+	case RB_64K_STRIPING:
+	case RB_128K_STRIPING:
+	case RB_256K_STRIPING:
+	case RB_512K_STRIPING:
+	case RB_1M_STRIPING:   /* stripe_shift = 20 */
+		stripe_shift = (rbm - RB_32K_STRIPING + 15);
+		return (sector >> (stripe_shift - 9)) & 1;
+	case RB_ROUND_ROBIN:
+		return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
+	case RB_PREFER_REMOTE:
+		return true;
+	case RB_PREFER_LOCAL:
+	default:
+		return false;
+	}
+}
+
+/*
+ * complete_conflicting_writes  -  wait for any conflicting write requests
+ *
+ * The write_requests tree contains all active write requests which we
+ * currently know about.  Wait for any requests to complete which conflict with
+ * the new one.
+ *
+ * Only way out: remove the conflicting intervals from the tree.
+ */
+static void complete_conflicting_writes(struct drbd_request *req)
+{
+	DEFINE_WAIT(wait);
+	struct drbd_conf *mdev = req->w.mdev;
+	struct drbd_interval *i;
+	sector_t sector = req->i.sector;
+	int size = req->i.size;
+
+	i = drbd_find_overlap(&mdev->write_requests, sector, size);
+	if (!i)
+		return;
+
+	for (;;) {
+		prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
+		i = drbd_find_overlap(&mdev->write_requests, sector, size);
+		if (!i)
+			break;
+		/* Indicate to wake up device->misc_wait on progress.  */
+		i->waiting = true;
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		schedule();
+		spin_lock_irq(&mdev->tconn->req_lock);
+	}
+	finish_wait(&mdev->misc_wait, &wait);
+}
+
+/* called within req_lock and rcu_read_lock() */
 static void maybe_pull_ahead(struct drbd_conf *mdev)
 {
-	int congested = 0;
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct net_conf *nc;
+	bool congested = false;
+	enum drbd_on_congestion on_congestion;
+
+	nc = rcu_dereference(tconn->net_conf);
+	on_congestion = nc ? nc->on_congestion : OC_BLOCK;
+	if (on_congestion == OC_BLOCK ||
+	    tconn->agreed_pro_version < 96)
+		return;
 
 	/* If I don't even have good local storage, we can not reasonably try
 	 * to pull ahead of the peer. We also need the local reference to make
 	 * sure mdev->act_log is there.
-	 * Note: caller has to make sure that net_conf is there.
 	 */
 	if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
 		return;
 
-	if (mdev->net_conf->cong_fill &&
-	    atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
+	if (nc->cong_fill &&
+	    atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) {
 		dev_info(DEV, "Congestion-fill threshold reached\n");
-		congested = 1;
+		congested = true;
 	}
 
-	if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
+	if (mdev->act_log->used >= nc->cong_extents) {
 		dev_info(DEV, "Congestion-extents threshold reached\n");
-		congested = 1;
+		congested = true;
 	}
 
 	if (congested) {
-		queue_barrier(mdev); /* last barrier, after mirrored writes */
+		/* start a new epoch for non-mirrored writes */
+		start_new_tl_epoch(mdev->tconn);
 
-		if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
+		if (on_congestion == OC_PULL_AHEAD)
 			_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
-		else  /*mdev->net_conf->on_congestion == OC_DISCONNECT */
+		else  /*nc->on_congestion == OC_DISCONNECT */
 			_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
 	}
 	put_ldev(mdev);
 }
 
-static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
+/* If this returns false, and req->private_bio is still set,
+ * this should be submitted locally.
+ *
+ * If it returns false, but req->private_bio is not set,
+ * we do not have access to good data :(
+ *
+ * Otherwise, this destroys req->private_bio, if any,
+ * and returns true.
+ */
+static bool do_remote_read(struct drbd_request *req)
+{
+	struct drbd_conf *mdev = req->w.mdev;
+	enum drbd_read_balancing rbm;
+
+	if (req->private_bio) {
+		if (!drbd_may_do_local_read(mdev,
+					req->i.sector, req->i.size)) {
+			bio_put(req->private_bio);
+			req->private_bio = NULL;
+			put_ldev(mdev);
+		}
+	}
+
+	if (mdev->state.pdsk != D_UP_TO_DATE)
+		return false;
+
+	if (req->private_bio == NULL)
+		return true;
+
+	/* TODO: improve read balancing decisions, take into account drbd
+	 * protocol, pending requests etc. */
+
+	rcu_read_lock();
+	rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing;
+	rcu_read_unlock();
+
+	if (rbm == RB_PREFER_LOCAL && req->private_bio)
+		return false; /* submit locally */
+
+	if (remote_due_to_read_balancing(mdev, req->i.sector, rbm)) {
+		if (req->private_bio) {
+			bio_put(req->private_bio);
+			req->private_bio = NULL;
+			put_ldev(mdev);
+		}
+		return true;
+	}
+
+	return false;
+}
+
+/* returns number of connections (== 1, for drbd 8.4)
+ * expected to actually write this data,
+ * which does NOT include those that we are L_AHEAD for. */
+static int drbd_process_write_request(struct drbd_request *req)
+{
+	struct drbd_conf *mdev = req->w.mdev;
+	int remote, send_oos;
+
+	rcu_read_lock();
+	remote = drbd_should_do_remote(mdev->state);
+	if (remote) {
+		maybe_pull_ahead(mdev);
+		remote = drbd_should_do_remote(mdev->state);
+	}
+	send_oos = drbd_should_send_out_of_sync(mdev->state);
+	rcu_read_unlock();
+
+	/* Need to replicate writes.  Unless it is an empty flush,
+	 * which is better mapped to a DRBD P_BARRIER packet,
+	 * also for drbd wire protocol compatibility reasons.
+	 * If this was a flush, just start a new epoch.
+	 * Unless the current epoch was empty anyways, or we are not currently
+	 * replicating, in which case there is no point. */
+	if (unlikely(req->i.size == 0)) {
+		/* The only size==0 bios we expect are empty flushes. */
+		D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);
+		if (remote)
+			start_new_tl_epoch(mdev->tconn);
+		return 0;
+	}
+
+	if (!remote && !send_oos)
+		return 0;
+
+	D_ASSERT(!(remote && send_oos));
+
+	if (remote) {
+		_req_mod(req, TO_BE_SENT);
+		_req_mod(req, QUEUE_FOR_NET_WRITE);
+	} else if (drbd_set_out_of_sync(mdev, req->i.sector, req->i.size))
+		_req_mod(req, QUEUE_FOR_SEND_OOS);
+
+	return remote;
+}
+
+static void
+drbd_submit_req_private_bio(struct drbd_request *req)
+{
+	struct drbd_conf *mdev = req->w.mdev;
+	struct bio *bio = req->private_bio;
+	const int rw = bio_rw(bio);
+
+	bio->bi_bdev = mdev->ldev->backing_bdev;
+
+	/* State may have changed since we grabbed our reference on the
+	 * ->ldev member. Double check, and short-circuit to endio.
+	 * In case the last activity log transaction failed to get on
+	 * stable storage, and this is a WRITE, we may not even submit
+	 * this bio. */
+	if (get_ldev(mdev)) {
+		if (drbd_insert_fault(mdev,
+				      rw == WRITE ? DRBD_FAULT_DT_WR
+				    : rw == READ  ? DRBD_FAULT_DT_RD
+				    :               DRBD_FAULT_DT_RA))
+			bio_endio(bio, -EIO);
+		else
+			generic_make_request(bio);
+		put_ldev(mdev);
+	} else
+		bio_endio(bio, -EIO);
+}
+
+void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
 {
 	const int rw = bio_rw(bio);
-	const int size = bio->bi_size;
-	const sector_t sector = bio->bi_sector;
-	struct drbd_tl_epoch *b = NULL;
+	struct bio_and_error m = { NULL, };
 	struct drbd_request *req;
-	int local, remote, send_oos = 0;
-	int err = -EIO;
-	int ret = 0;
-	union drbd_state s;
+	bool no_remote = false;
 
 	/* allocate outside of all locks; */
 	req = drbd_req_new(mdev, bio);
@@ -830,55 +1035,14 @@
 		 * if user cannot handle io errors, that's not our business. */
 		dev_err(DEV, "could not kmalloc() req\n");
 		bio_endio(bio, -ENOMEM);
-		return 0;
+		return;
 	}
 	req->start_time = start_time;
 
-	local = get_ldev(mdev);
-	if (!local) {
-		bio_put(req->private_bio); /* or we get a bio leak */
+	if (!get_ldev(mdev)) {
+		bio_put(req->private_bio);
 		req->private_bio = NULL;
 	}
-	if (rw == WRITE) {
-		/* Need to replicate writes.  Unless it is an empty flush,
-		 * which is better mapped to a DRBD P_BARRIER packet,
-		 * also for drbd wire protocol compatibility reasons. */
-		if (unlikely(size == 0)) {
-			/* The only size==0 bios we expect are empty flushes. */
-			D_ASSERT(bio->bi_rw & REQ_FLUSH);
-			remote = 0;
-		} else
-			remote = 1;
-	} else {
-		/* READ || READA */
-		if (local) {
-			if (!drbd_may_do_local_read(mdev, sector, size)) {
-				/* we could kick the syncer to
-				 * sync this extent asap, wait for
-				 * it, then continue locally.
-				 * Or just issue the request remotely.
-				 */
-				local = 0;
-				bio_put(req->private_bio);
-				req->private_bio = NULL;
-				put_ldev(mdev);
-			}
-		}
-		remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
-	}
-
-	/* If we have a disk, but a READA request is mapped to remote,
-	 * we are R_PRIMARY, D_INCONSISTENT, SyncTarget.
-	 * Just fail that READA request right here.
-	 *
-	 * THINK: maybe fail all READA when not local?
-	 *        or make this configurable...
-	 *        if network is slow, READA won't do any good.
-	 */
-	if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) {
-		err = -EWOULDBLOCK;
-		goto fail_and_free_req;
-	}
 
 	/* For WRITES going to the local disk, grab a reference on the target
 	 * extent.  This waits for any resync activity in the corresponding
@@ -887,348 +1051,131 @@
 	 * of transactional on-disk meta data updates.
 	 * Empty flushes don't need to go into the activity log, they can only
 	 * flush data for pending writes which are already in there. */
-	if (rw == WRITE && local && size
+	if (rw == WRITE && req->private_bio && req->i.size
 	&& !test_bit(AL_SUSPENDED, &mdev->flags)) {
 		req->rq_state |= RQ_IN_ACT_LOG;
-		drbd_al_begin_io(mdev, sector);
+		drbd_al_begin_io(mdev, &req->i);
 	}
 
-	s = mdev->state;
-	remote = remote && drbd_should_do_remote(s);
-	send_oos = rw == WRITE && drbd_should_send_oos(s);
-	D_ASSERT(!(remote && send_oos));
-
-	if (!(local || remote) && !is_susp(mdev->state)) {
-		if (__ratelimit(&drbd_ratelimit_state))
-			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
-		goto fail_free_complete;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (rw == WRITE) {
+		/* This may temporarily give up the req_lock,
+		 * but will re-aquire it before it returns here.
+		 * Needs to be before the check on drbd_suspended() */
+		complete_conflicting_writes(req);
 	}
 
-	/* For WRITE request, we have to make sure that we have an
-	 * unused_spare_tle, in case we need to start a new epoch.
-	 * I try to be smart and avoid to pre-allocate always "just in case",
-	 * but there is a race between testing the bit and pointer outside the
-	 * spinlock, and grabbing the spinlock.
-	 * if we lost that race, we retry.  */
-	if (rw == WRITE && (remote || send_oos) &&
-	    mdev->unused_spare_tle == NULL &&
-	    test_bit(CREATE_BARRIER, &mdev->flags)) {
-allocate_barrier:
-		b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
-		if (!b) {
-			dev_err(DEV, "Failed to alloc barrier.\n");
-			err = -ENOMEM;
-			goto fail_free_complete;
+	/* no more giving up req_lock from now on! */
+
+	if (drbd_suspended(mdev)) {
+		/* push back and retry: */
+		req->rq_state |= RQ_POSTPONED;
+		if (req->private_bio) {
+			bio_put(req->private_bio);
+			req->private_bio = NULL;
+			put_ldev(mdev);
 		}
+		goto out;
 	}
 
-	/* GOOD, everything prepared, grab the spin_lock */
-	spin_lock_irq(&mdev->req_lock);
-
-	if (is_susp(mdev->state)) {
-		/* If we got suspended, use the retry mechanism of
-		   drbd_make_request() to restart processing of this
-		   bio. In the next call to drbd_make_request
-		   we sleep in inc_ap_bio() */
-		ret = 1;
-		spin_unlock_irq(&mdev->req_lock);
-		goto fail_free_complete;
-	}
-
-	if (remote || send_oos) {
-		remote = drbd_should_do_remote(mdev->state);
-		send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
-		D_ASSERT(!(remote && send_oos));
-
-		if (!(remote || send_oos))
-			dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
-		if (!(local || remote)) {
-			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
-			spin_unlock_irq(&mdev->req_lock);
-			goto fail_free_complete;
-		}
-	}
-
-	if (b && mdev->unused_spare_tle == NULL) {
-		mdev->unused_spare_tle = b;
-		b = NULL;
-	}
-	if (rw == WRITE && (remote || send_oos) &&
-	    mdev->unused_spare_tle == NULL &&
-	    test_bit(CREATE_BARRIER, &mdev->flags)) {
-		/* someone closed the current epoch
-		 * while we were grabbing the spinlock */
-		spin_unlock_irq(&mdev->req_lock);
-		goto allocate_barrier;
-	}
-
-
 	/* Update disk stats */
 	_drbd_start_io_acct(mdev, req, bio);
 
-	/* _maybe_start_new_epoch(mdev);
-	 * If we need to generate a write barrier packet, we have to add the
-	 * new epoch (barrier) object, and queue the barrier packet for sending,
-	 * and queue the req's data after it _within the same lock_, otherwise
-	 * we have race conditions were the reorder domains could be mixed up.
-	 *
-	 * Even read requests may start a new epoch and queue the corresponding
-	 * barrier packet.  To get the write ordering right, we only have to
-	 * make sure that, if this is a write request and it triggered a
-	 * barrier packet, this request is queued within the same spinlock. */
-	if ((remote || send_oos) && mdev->unused_spare_tle &&
-	    test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
-		_tl_add_barrier(mdev, mdev->unused_spare_tle);
-		mdev->unused_spare_tle = NULL;
-	} else {
-		D_ASSERT(!(remote && rw == WRITE &&
-			   test_bit(CREATE_BARRIER, &mdev->flags)));
+	/* We fail READ/READA early, if we can not serve it.
+	 * We must do this before req is registered on any lists.
+	 * Otherwise, drbd_req_complete() will queue failed READ for retry. */
+	if (rw != WRITE) {
+		if (!do_remote_read(req) && !req->private_bio)
+			goto nodata;
 	}
 
-	/* NOTE
-	 * Actually, 'local' may be wrong here already, since we may have failed
-	 * to write to the meta data, and may become wrong anytime because of
-	 * local io-error for some other request, which would lead to us
-	 * "detaching" the local disk.
-	 *
-	 * 'remote' may become wrong any time because the network could fail.
-	 *
-	 * This is a harmless race condition, though, since it is handled
-	 * correctly at the appropriate places; so it just defers the failure
-	 * of the respective operation.
-	 */
-
-	/* mark them early for readability.
-	 * this just sets some state flags. */
-	if (remote)
-		_req_mod(req, to_be_send);
-	if (local)
-		_req_mod(req, to_be_submitted);
-
-	/* check this request on the collision detection hash tables.
-	 * if we have a conflict, just complete it here.
-	 * THINK do we want to check reads, too? (I don't think so...) */
-	if (rw == WRITE && _req_conflicts(req))
-		goto fail_conflicting;
+	/* which transfer log epoch does this belong to? */
+	req->epoch = atomic_read(&mdev->tconn->current_tle_nr);
 
 	/* no point in adding empty flushes to the transfer log,
 	 * they are mapped to drbd barriers already. */
-	if (likely(size!=0))
-		list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
+	if (likely(req->i.size!=0)) {
+		if (rw == WRITE)
+			mdev->tconn->current_tle_writes++;
 
-	/* NOTE remote first: to get the concurrent write detection right,
-	 * we must register the request before start of local IO.  */
-	if (remote) {
-		/* either WRITE and C_CONNECTED,
-		 * or READ, and no local disk,
-		 * or READ, but not in sync.
-		 */
-		_req_mod(req, (rw == WRITE)
-				? queue_for_net_write
-				: queue_for_net_read);
+		list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log);
 	}
-	if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
-		_req_mod(req, queue_for_send_oos);
 
-	if (remote &&
-	    mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96)
-		maybe_pull_ahead(mdev);
-
-	/* If this was a flush, queue a drbd barrier/start a new epoch.
-	 * Unless the current epoch was empty anyways, or we are not currently
-	 * replicating, in which case there is no point. */
-	if (unlikely(bio->bi_rw & REQ_FLUSH)
-		&& mdev->newest_tle->n_writes
-		&& drbd_should_do_remote(mdev->state))
-		queue_barrier(mdev);
-
-	spin_unlock_irq(&mdev->req_lock);
-	kfree(b); /* if someone else has beaten us to it... */
-
-	if (local) {
-		req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
-
-		/* State may have changed since we grabbed our reference on the
-		 * mdev->ldev member. Double check, and short-circuit to endio.
-		 * In case the last activity log transaction failed to get on
-		 * stable storage, and this is a WRITE, we may not even submit
-		 * this bio. */
-		if (get_ldev(mdev)) {
-			if (drbd_insert_fault(mdev,   rw == WRITE ? DRBD_FAULT_DT_WR
-						    : rw == READ  ? DRBD_FAULT_DT_RD
-						    :               DRBD_FAULT_DT_RA))
-				bio_endio(req->private_bio, -EIO);
-			else
-				generic_make_request(req->private_bio);
-			put_ldev(mdev);
+	if (rw == WRITE) {
+		if (!drbd_process_write_request(req))
+			no_remote = true;
+	} else {
+		/* We either have a private_bio, or we can read from remote.
+		 * Otherwise we had done the goto nodata above. */
+		if (req->private_bio == NULL) {
+			_req_mod(req, TO_BE_SENT);
+			_req_mod(req, QUEUE_FOR_NET_READ);
 		} else
-			bio_endio(req->private_bio, -EIO);
+			no_remote = true;
 	}
 
-	return 0;
-
-fail_conflicting:
-	/* this is a conflicting request.
-	 * even though it may have been only _partially_
-	 * overlapping with one of the currently pending requests,
-	 * without even submitting or sending it, we will
-	 * pretend that it was successfully served right now.
-	 */
-	_drbd_end_io_acct(mdev, req);
-	spin_unlock_irq(&mdev->req_lock);
-	if (remote)
-		dec_ap_pending(mdev);
-	/* THINK: do we want to fail it (-EIO), or pretend success?
-	 * this pretends success. */
-	err = 0;
-
-fail_free_complete:
-	if (req->rq_state & RQ_IN_ACT_LOG)
-		drbd_al_complete_io(mdev, sector);
-fail_and_free_req:
-	if (local) {
-		bio_put(req->private_bio);
-		req->private_bio = NULL;
-		put_ldev(mdev);
-	}
-	if (!ret)
-		bio_endio(bio, err);
-
-	drbd_req_free(req);
-	dec_ap_bio(mdev);
-	kfree(b);
-
-	return ret;
-}
-
-/* helper function for drbd_make_request
- * if we can determine just by the mdev (state) that this request will fail,
- * return 1
- * otherwise return 0
- */
-static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
-{
-	if (mdev->state.role != R_PRIMARY &&
-		(!allow_oos || is_write)) {
-		if (__ratelimit(&drbd_ratelimit_state)) {
-			dev_err(DEV, "Process %s[%u] tried to %s; "
-			    "since we are not in Primary state, "
-			    "we cannot allow this\n",
-			    current->comm, current->pid,
-			    is_write ? "WRITE" : "READ");
-		}
-		return 1;
+	if (req->private_bio) {
+		/* needs to be marked within the same spinlock */
+		_req_mod(req, TO_BE_SUBMITTED);
+		/* but we need to give up the spinlock to submit */
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		drbd_submit_req_private_bio(req);
+		spin_lock_irq(&mdev->tconn->req_lock);
+	} else if (no_remote) {
+nodata:
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
+					(unsigned long long)req->i.sector, req->i.size >> 9);
+		/* A write may have been queued for send_oos, however.
+		 * So we can not simply free it, we must go through drbd_req_put_completion_ref() */
 	}
 
-	return 0;
+out:
+	if (drbd_req_put_completion_ref(req, &m, 1))
+		kref_put(&req->kref, drbd_req_destroy);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+	return;
 }
 
 void drbd_make_request(struct request_queue *q, struct bio *bio)
 {
-	unsigned int s_enr, e_enr;
 	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
 	unsigned long start_time;
 
-	if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
-		bio_endio(bio, -EPERM);
-		return;
-	}
-
 	start_time = jiffies;
 
 	/*
 	 * what we "blindly" assume:
 	 */
-	D_ASSERT((bio->bi_size & 0x1ff) == 0);
+	D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
 
-	/* to make some things easier, force alignment of requests within the
-	 * granularity of our hash tables */
-	s_enr = bio->bi_sector >> HT_SHIFT;
-	e_enr = bio->bi_size ? (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT : s_enr;
-
-	if (likely(s_enr == e_enr)) {
-		do {
-			inc_ap_bio(mdev, 1);
-		} while (drbd_make_request_common(mdev, bio, start_time));
-		return;
-	}
-
-	/* can this bio be split generically?
-	 * Maybe add our own split-arbitrary-bios function. */
-	if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
-		/* rather error out here than BUG in bio_split */
-		dev_err(DEV, "bio would need to, but cannot, be split: "
-		    "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
-		    bio->bi_vcnt, bio->bi_idx, bio->bi_size,
-		    (unsigned long long)bio->bi_sector);
-		bio_endio(bio, -EINVAL);
-	} else {
-		/* This bio crosses some boundary, so we have to split it. */
-		struct bio_pair *bp;
-		/* works for the "do not cross hash slot boundaries" case
-		 * e.g. sector 262269, size 4096
-		 * s_enr = 262269 >> 6 = 4097
-		 * e_enr = (262269+8-1) >> 6 = 4098
-		 * HT_SHIFT = 6
-		 * sps = 64, mask = 63
-		 * first_sectors = 64 - (262269 & 63) = 3
-		 */
-		const sector_t sect = bio->bi_sector;
-		const int sps = 1 << HT_SHIFT; /* sectors per slot */
-		const int mask = sps - 1;
-		const sector_t first_sectors = sps - (sect & mask);
-		bp = bio_split(bio, first_sectors);
-
-		/* we need to get a "reference count" (ap_bio_cnt)
-		 * to avoid races with the disconnect/reconnect/suspend code.
-		 * In case we need to split the bio here, we need to get three references
-		 * atomically, otherwise we might deadlock when trying to submit the
-		 * second one! */
-		inc_ap_bio(mdev, 3);
-
-		D_ASSERT(e_enr == s_enr + 1);
-
-		while (drbd_make_request_common(mdev, &bp->bio1, start_time))
-			inc_ap_bio(mdev, 1);
-
-		while (drbd_make_request_common(mdev, &bp->bio2, start_time))
-			inc_ap_bio(mdev, 1);
-
-		dec_ap_bio(mdev);
-
-		bio_pair_release(bp);
-	}
+	inc_ap_bio(mdev);
+	__drbd_make_request(mdev, bio, start_time);
 }
 
-/* This is called by bio_add_page().  With this function we reduce
- * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs
- * units (was AL_EXTENTs).
+/* This is called by bio_add_page().
  *
- * we do the calculation within the lower 32bit of the byte offsets,
- * since we don't care for actual offset, but only check whether it
- * would cross "activity log extent" boundaries.
+ * q->max_hw_sectors and other global limits are already enforced there.
+ *
+ * We need to call down to our lower level device,
+ * in case it has special restrictions.
+ *
+ * We also may need to enforce configured max-bio-bvecs limits.
  *
  * As long as the BIO is empty we have to allow at least one bvec,
- * regardless of size and offset.  so the resulting bio may still
- * cross extent boundaries.  those are dealt with (bio_split) in
- * drbd_make_request.
+ * regardless of size and offset, so no need to ask lower levels.
  */
 int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
 {
 	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
-	unsigned int bio_offset =
-		(unsigned int)bvm->bi_sector << 9; /* 32 bit */
 	unsigned int bio_size = bvm->bi_size;
-	int limit, backing_limit;
+	int limit = DRBD_MAX_BIO_SIZE;
+	int backing_limit;
 
-	limit = DRBD_MAX_BIO_SIZE
-	      - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size);
-	if (limit < 0)
-		limit = 0;
-	if (bio_size == 0) {
-		if (limit <= bvec->bv_len)
-			limit = bvec->bv_len;
-	} else if (limit && get_ldev(mdev)) {
+	if (bio_size && get_ldev(mdev)) {
 		struct request_queue * const b =
 			mdev->ldev->backing_bdev->bd_disk->queue;
 		if (b->merge_bvec_fn) {
@@ -1240,24 +1187,38 @@
 	return limit;
 }
 
+struct drbd_request *find_oldest_request(struct drbd_tconn *tconn)
+{
+	/* Walk the transfer log,
+	 * and find the oldest not yet completed request */
+	struct drbd_request *r;
+	list_for_each_entry(r, &tconn->transfer_log, tl_requests) {
+		if (atomic_read(&r->completion_ref))
+			return r;
+	}
+	return NULL;
+}
+
 void request_timer_fn(unsigned long data)
 {
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
+	struct drbd_tconn *tconn = mdev->tconn;
 	struct drbd_request *req; /* oldest request */
-	struct list_head *le;
+	struct net_conf *nc;
 	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
 	unsigned long now;
 
-	if (get_net_conf(mdev)) {
-		if (mdev->state.conn >= C_WF_REPORT_PARAMS)
-			ent = mdev->net_conf->timeout*HZ/10
-				* mdev->net_conf->ko_count;
-		put_net_conf(mdev);
-	}
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS)
+		ent = nc->timeout * HZ/10 * nc->ko_count;
+
 	if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
-		dt = mdev->ldev->dc.disk_timeout * HZ / 10;
+		dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
 		put_ldev(mdev);
 	}
+	rcu_read_unlock();
+
 	et = min_not_zero(dt, ent);
 
 	if (!et)
@@ -1265,17 +1226,14 @@
 
 	now = jiffies;
 
-	spin_lock_irq(&mdev->req_lock);
-	le = &mdev->oldest_tle->requests;
-	if (list_empty(le)) {
-		spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&tconn->req_lock);
+	req = find_oldest_request(tconn);
+	if (!req) {
+		spin_unlock_irq(&tconn->req_lock);
 		mod_timer(&mdev->request_timer, now + et);
 		return;
 	}
 
-	le = le->prev;
-	req = list_entry(le, struct drbd_request, tl_requests);
-
 	/* The request is considered timed out, if
 	 * - we have some effective timeout from the configuration,
 	 *   with above state restrictions applied,
@@ -1294,17 +1252,17 @@
 	 */
 	if (ent && req->rq_state & RQ_NET_PENDING &&
 		 time_after(now, req->start_time + ent) &&
-		!time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
+		!time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) {
 		dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
 		_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
 	}
-	if (dt && req->rq_state & RQ_LOCAL_PENDING &&
+	if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev &&
 		 time_after(now, req->start_time + dt) &&
 		!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
 		dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
 		__drbd_chk_io_error(mdev, DRBD_FORCE_DETACH);
 	}
 	nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&tconn->req_lock);
 	mod_timer(&mdev->request_timer, nt);
 }

diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 3d21119..016de6b 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h

@@ -77,40 +77,41 @@
  */
 
 enum drbd_req_event {
-	created,
-	to_be_send,
-	to_be_submitted,
+	CREATED,
+	TO_BE_SENT,
+	TO_BE_SUBMITTED,
 
 	/* XXX yes, now I am inconsistent...
 	 * these are not "events" but "actions"
 	 * oh, well... */
-	queue_for_net_write,
-	queue_for_net_read,
-	queue_for_send_oos,
+	QUEUE_FOR_NET_WRITE,
+	QUEUE_FOR_NET_READ,
+	QUEUE_FOR_SEND_OOS,
 
-	send_canceled,
-	send_failed,
-	handed_over_to_network,
-	oos_handed_to_network,
-	connection_lost_while_pending,
-	read_retry_remote_canceled,
-	recv_acked_by_peer,
-	write_acked_by_peer,
-	write_acked_by_peer_and_sis, /* and set_in_sync */
-	conflict_discarded_by_peer,
-	neg_acked,
-	barrier_acked, /* in protocol A and B */
-	data_received, /* (remote read) */
+	SEND_CANCELED,
+	SEND_FAILED,
+	HANDED_OVER_TO_NETWORK,
+	OOS_HANDED_TO_NETWORK,
+	CONNECTION_LOST_WHILE_PENDING,
+	READ_RETRY_REMOTE_CANCELED,
+	RECV_ACKED_BY_PEER,
+	WRITE_ACKED_BY_PEER,
+	WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
+	CONFLICT_RESOLVED,
+	POSTPONE_WRITE,
+	NEG_ACKED,
+	BARRIER_ACKED, /* in protocol A and B */
+	DATA_RECEIVED, /* (remote read) */
 
-	read_completed_with_error,
-	read_ahead_completed_with_error,
-	write_completed_with_error,
-	abort_disk_io,
-	completed_ok,
-	resend,
-	fail_frozen_disk_io,
-	restart_frozen_disk_io,
-	nothing, /* for tracing only */
+	READ_COMPLETED_WITH_ERROR,
+	READ_AHEAD_COMPLETED_WITH_ERROR,
+	WRITE_COMPLETED_WITH_ERROR,
+	ABORT_DISK_IO,
+	COMPLETED_OK,
+	RESEND,
+	FAIL_FROZEN_DISK_IO,
+	RESTART_FROZEN_DISK_IO,
+	NOTHING,
 };
 
 /* encoding of request states for now.  we don't actually need that many bits.
@@ -142,8 +143,8 @@
 	 *        recv_ack (B) or implicit "ack" (A),
 	 *        still waiting for the barrier ack.
 	 *        master_bio may already be completed and invalidated.
-	 * 11100: write_acked (C),
-	 *        data_received (for remote read, any protocol)
+	 * 11100: write acked (C),
+	 *        data received (for remote read, any protocol)
 	 *        or finally the barrier ack has arrived (B,A)...
 	 *        request can be freed
 	 * 01100: neg-acked (write, protocol C)
@@ -198,6 +199,22 @@
 
 	/* Should call drbd_al_complete_io() for this request... */
 	__RQ_IN_ACT_LOG,
+
+	/* The peer has sent a retry ACK */
+	__RQ_POSTPONED,
+
+	/* would have been completed,
+	 * but was not, because of drbd_suspended() */
+	__RQ_COMPLETION_SUSP,
+
+	/* We expect a receive ACK (wire proto B) */
+	__RQ_EXP_RECEIVE_ACK,
+
+	/* We expect a write ACK (wite proto C) */
+	__RQ_EXP_WRITE_ACK,
+
+	/* waiting for a barrier ack, did an extra kref_get */
+	__RQ_EXP_BARR_ACK,
 };
 
 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
@@ -219,56 +236,16 @@
 
 #define RQ_WRITE           (1UL << __RQ_WRITE)
 #define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
+#define RQ_POSTPONED	   (1UL << __RQ_POSTPONED)
+#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
+#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
+#define RQ_EXP_WRITE_ACK   (1UL << __RQ_EXP_WRITE_ACK)
+#define RQ_EXP_BARR_ACK    (1UL << __RQ_EXP_BARR_ACK)
 
 /* For waking up the frozen transfer log mod_req() has to return if the request
    should be counted in the epoch object*/
-#define MR_WRITE_SHIFT 0
-#define MR_WRITE       (1 << MR_WRITE_SHIFT)
-#define MR_READ_SHIFT  1
-#define MR_READ        (1 << MR_READ_SHIFT)
-
-/* epoch entries */
-static inline
-struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
-{
-	BUG_ON(mdev->ee_hash_s == 0);
-	return mdev->ee_hash +
-		((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
-}
-
-/* transfer log (drbd_request objects) */
-static inline
-struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
-{
-	BUG_ON(mdev->tl_hash_s == 0);
-	return mdev->tl_hash +
-		((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
-}
-
-/* application reads (drbd_request objects) */
-static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
-{
-	return mdev->app_reads_hash
-		+ ((unsigned int)(sector) % APP_R_HSIZE);
-}
-
-/* when we receive the answer for a read request,
- * verify that we actually know about it */
-static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
-	u64 id, sector_t sector)
-{
-	struct hlist_head *slot = ar_hash_slot(mdev, sector);
-	struct hlist_node *n;
-	struct drbd_request *req;
-
-	hlist_for_each_entry(req, n, slot, collision) {
-		if ((unsigned long)req == (unsigned long)id) {
-			D_ASSERT(req->sector == sector);
-			return req;
-		}
-	}
-	return NULL;
-}
+#define MR_WRITE       1
+#define MR_READ        2
 
 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
 {
@@ -278,41 +255,10 @@
 	req->private_bio = bio;
 
 	bio->bi_private  = req;
-	bio->bi_end_io   = drbd_endio_pri;
+	bio->bi_end_io   = drbd_request_endio;
 	bio->bi_next     = NULL;
 }
 
-static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
-	struct bio *bio_src)
-{
-	struct drbd_request *req =
-		mempool_alloc(drbd_request_mempool, GFP_NOIO);
-	if (likely(req)) {
-		drbd_req_make_private_bio(req, bio_src);
-
-		req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
-		req->mdev        = mdev;
-		req->master_bio  = bio_src;
-		req->epoch       = 0;
-		req->sector      = bio_src->bi_sector;
-		req->size        = bio_src->bi_size;
-		INIT_HLIST_NODE(&req->collision);
-		INIT_LIST_HEAD(&req->tl_requests);
-		INIT_LIST_HEAD(&req->w.list);
-	}
-	return req;
-}
-
-static inline void drbd_req_free(struct drbd_request *req)
-{
-	mempool_free(req, drbd_request_mempool);
-}
-
-static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
-{
-	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
-}
-
 /* Short lived temporary struct on the stack.
  * We could squirrel the error to be returned into
  * bio->bi_size, or similar. But that would be too ugly. */
@@ -321,6 +267,7 @@
 	int error;
 };
 
+extern void drbd_req_destroy(struct kref *kref);
 extern void _req_may_be_done(struct drbd_request *req,
 		struct bio_and_error *m);
 extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
@@ -328,13 +275,17 @@
 extern void complete_master_bio(struct drbd_conf *mdev,
 		struct bio_and_error *m);
 extern void request_timer_fn(unsigned long data);
-extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
+extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
+extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
+
+/* this is in drbd_main.c */
+extern void drbd_restart_request(struct drbd_request *req);
 
 /* use this if you don't want to deal with calling complete_master_bio()
  * outside the spinlock, e.g. when walking some list on cleanup. */
 static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
 {
-	struct drbd_conf *mdev = req->mdev;
+	struct drbd_conf *mdev = req->w.mdev;
 	struct bio_and_error m;
 	int rv;
 
@@ -354,13 +305,13 @@
 		enum drbd_req_event what)
 {
 	unsigned long flags;
-	struct drbd_conf *mdev = req->mdev;
+	struct drbd_conf *mdev = req->w.mdev;
 	struct bio_and_error m;
 	int rv;
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
 	rv = __req_mod(req, what, &m);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
@@ -368,7 +319,7 @@
 	return rv;
 }
 
-static inline bool drbd_should_do_remote(union drbd_state s)
+static inline bool drbd_should_do_remote(union drbd_dev_state s)
 {
 	return s.pdsk == D_UP_TO_DATE ||
 		(s.pdsk >= D_INCONSISTENT &&
@@ -378,7 +329,7 @@
 	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
 	   states. */
 }
-static inline bool drbd_should_send_oos(union drbd_state s)
+static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
 {
 	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
 	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary

diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
new file mode 100644
index 0000000..53bf618
--- /dev/null
+++ b/drivers/block/drbd/drbd_state.c

@@ -0,0 +1,1856 @@
+/*
+   drbd_state.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
+   from Logicworks, Inc. for making SDP replication support possible.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/drbd_limits.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+/* in drbd_main.c */
+extern void tl_abort_disk_io(struct drbd_conf *mdev);
+
+struct after_state_chg_work {
+	struct drbd_work w;
+	union drbd_state os;
+	union drbd_state ns;
+	enum chg_state_flags flags;
+	struct completion *done;
+};
+
+enum sanitize_state_warnings {
+	NO_WARNING,
+	ABORTED_ONLINE_VERIFY,
+	ABORTED_RESYNC,
+	CONNECTION_LOST_NEGOTIATING,
+	IMPLICITLY_UPGRADED_DISK,
+	IMPLICITLY_UPGRADED_PDSK,
+};
+
+static int w_after_state_ch(struct drbd_work *w, int unused);
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags);
+static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
+static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_tconn *);
+static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
+				       enum sanitize_state_warnings *warn);
+
+static inline bool is_susp(union drbd_state s)
+{
+        return s.susp || s.susp_nod || s.susp_fen;
+}
+
+bool conn_all_vols_unconf(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	bool rv = true;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.disk != D_DISKLESS ||
+		    mdev->state.conn != C_STANDALONE ||
+		    mdev->state.role != R_SECONDARY) {
+			rv = false;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return rv;
+}
+
+/* Unfortunately the states where not correctly ordered, when
+   they where defined. therefore can not use max_t() here. */
+static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_PRIMARY || role2 == R_PRIMARY)
+		return R_PRIMARY;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_UNKNOWN;
+}
+static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
+		return R_UNKNOWN;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_PRIMARY;
+}
+
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn)
+{
+	enum drbd_role role = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		role = max_role(role, mdev->state.role);
+	rcu_read_unlock();
+
+	return role;
+}
+
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn)
+{
+	enum drbd_role peer = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		peer = max_role(peer, mdev->state.peer);
+	rcu_read_unlock();
+
+	return peer;
+}
+
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.disk);
+	rcu_read_unlock();
+
+	return ds;
+}
+
+enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_MASK;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = min_t(enum drbd_disk_state, ds, mdev->state.disk);
+	rcu_read_unlock();
+
+	return ds;
+}
+
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk);
+	rcu_read_unlock();
+
+	return ds;
+}
+
+enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn)
+{
+	enum drbd_conns conn = C_MASK;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		conn = min_t(enum drbd_conns, conn, mdev->state.conn);
+	rcu_read_unlock();
+
+	return conn;
+}
+
+static bool no_peer_wf_report_params(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+	bool rv = true;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		if (mdev->state.conn == C_WF_REPORT_PARAMS) {
+			rv = false;
+			break;
+		}
+	rcu_read_unlock();
+
+	return rv;
+}
+
+
+/**
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
+ * @mdev:	DRBD device.
+ * @os:		old (current) state.
+ * @ns:		new (wanted) state.
+ */
+static int cl_wide_st_chg(struct drbd_conf *mdev,
+			  union drbd_state os, union drbd_state ns)
+{
+	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
+		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
+		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
+		  (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
+		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
+		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) ||
+		(os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS);
+}
+
+static union drbd_state
+apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val)
+{
+	union drbd_state ns;
+	ns.i = (os.i & ~mask.i) | val.i;
+	return ns;
+}
+
+enum drbd_state_rv
+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+		  union drbd_state mask, union drbd_state val)
+{
+	unsigned long flags;
+	union drbd_state ns;
+	enum drbd_state_rv rv;
+
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	ns = apply_mask_val(drbd_read_state(mdev), mask, val);
+	rv = _drbd_set_state(mdev, ns, f, NULL);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_force_state() - Impose a change which happens outside our control on our state
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ */
+void drbd_force_state(struct drbd_conf *mdev,
+	union drbd_state mask, union drbd_state val)
+{
+	drbd_change_state(mdev, CS_HARD, mask, val);
+}
+
+static enum drbd_state_rv
+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
+	     union drbd_state val)
+{
+	union drbd_state os, ns;
+	unsigned long flags;
+	enum drbd_state_rv rv;
+
+	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
+		return SS_CW_SUCCESS;
+
+	if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
+		return SS_CW_FAILED_BY_PEER;
+
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	os = drbd_read_state(mdev);
+	ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
+	rv = is_valid_transition(os, ns);
+	if (rv >= SS_SUCCESS)
+		rv = SS_UNKNOWN_ERROR;  /* cont waiting, otherwise fail. */
+
+	if (!cl_wide_st_chg(mdev, os, ns))
+		rv = SS_CW_NO_NEED;
+	if (rv == SS_UNKNOWN_ERROR) {
+		rv = is_valid_state(mdev, ns);
+		if (rv >= SS_SUCCESS) {
+			rv = is_valid_soft_transition(os, ns, mdev->tconn);
+			if (rv >= SS_SUCCESS)
+				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
+		}
+	}
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_req_state() - Perform an eventually cluster wide state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Should not be called directly, use drbd_request_state() or
+ * _drbd_request_state().
+ */
+static enum drbd_state_rv
+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
+	       union drbd_state val, enum chg_state_flags f)
+{
+	struct completion done;
+	unsigned long flags;
+	union drbd_state os, ns;
+	enum drbd_state_rv rv;
+
+	init_completion(&done);
+
+	if (f & CS_SERIALIZE)
+		mutex_lock(mdev->state_mutex);
+
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	os = drbd_read_state(mdev);
+	ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
+	rv = is_valid_transition(os, ns);
+	if (rv < SS_SUCCESS) {
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+		goto abort;
+	}
+
+	if (cl_wide_st_chg(mdev, os, ns)) {
+		rv = is_valid_state(mdev, ns);
+		if (rv == SS_SUCCESS)
+			rv = is_valid_soft_transition(os, ns, mdev->tconn);
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+		if (rv < SS_SUCCESS) {
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		if (drbd_send_state_req(mdev, mask, val)) {
+			rv = SS_CW_FAILED_BY_PEER;
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		wait_event(mdev->state_wait,
+			(rv = _req_st_cond(mdev, mask, val)));
+
+		if (rv < SS_SUCCESS) {
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+		spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+		ns = apply_mask_val(drbd_read_state(mdev), mask, val);
+		rv = _drbd_set_state(mdev, ns, f, &done);
+	} else {
+		rv = _drbd_set_state(mdev, ns, f, &done);
+	}
+
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
+		D_ASSERT(current != mdev->tconn->worker.task);
+		wait_for_completion(&done);
+	}
+
+abort:
+	if (f & CS_SERIALIZE)
+		mutex_unlock(mdev->state_mutex);
+
+	return rv;
+}
+
+/**
+ * _drbd_request_state() - Request a state change (with flags)
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
+ * flag, or when logging of failed state change requests is not desired.
+ */
+enum drbd_state_rv
+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+		    union drbd_state val, enum chg_state_flags f)
+{
+	enum drbd_state_rv rv;
+
+	wait_event(mdev->state_wait,
+		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
+
+	return rv;
+}
+
+static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
+{
+	dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
+	    name,
+	    drbd_conn_str(ns.conn),
+	    drbd_role_str(ns.role),
+	    drbd_role_str(ns.peer),
+	    drbd_disk_str(ns.disk),
+	    drbd_disk_str(ns.pdsk),
+	    is_susp(ns) ? 's' : 'r',
+	    ns.aftr_isp ? 'a' : '-',
+	    ns.peer_isp ? 'p' : '-',
+	    ns.user_isp ? 'u' : '-',
+	    ns.susp_fen ? 'F' : '-',
+	    ns.susp_nod ? 'N' : '-'
+	    );
+}
+
+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
+	          union drbd_state ns, enum drbd_state_rv err)
+{
+	if (err == SS_IN_TRANSIENT_STATE)
+		return;
+	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
+	print_st(mdev, " state", os);
+	print_st(mdev, "wanted", ns);
+}
+
+static long print_state_change(char *pb, union drbd_state os, union drbd_state ns,
+			       enum chg_state_flags flags)
+{
+	char *pbp;
+	pbp = pb;
+	*pbp = 0;
+
+	if (ns.role != os.role && flags & CS_DC_ROLE)
+		pbp += sprintf(pbp, "role( %s -> %s ) ",
+			       drbd_role_str(os.role),
+			       drbd_role_str(ns.role));
+	if (ns.peer != os.peer && flags & CS_DC_PEER)
+		pbp += sprintf(pbp, "peer( %s -> %s ) ",
+			       drbd_role_str(os.peer),
+			       drbd_role_str(ns.peer));
+	if (ns.conn != os.conn && flags & CS_DC_CONN)
+		pbp += sprintf(pbp, "conn( %s -> %s ) ",
+			       drbd_conn_str(os.conn),
+			       drbd_conn_str(ns.conn));
+	if (ns.disk != os.disk && flags & CS_DC_DISK)
+		pbp += sprintf(pbp, "disk( %s -> %s ) ",
+			       drbd_disk_str(os.disk),
+			       drbd_disk_str(ns.disk));
+	if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK)
+		pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
+			       drbd_disk_str(os.pdsk),
+			       drbd_disk_str(ns.pdsk));
+
+	return pbp - pb;
+}
+
+static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns,
+				 enum chg_state_flags flags)
+{
+	char pb[300];
+	char *pbp = pb;
+
+	pbp += print_state_change(pbp, os, ns, flags ^ CS_DC_MASK);
+
+	if (ns.aftr_isp != os.aftr_isp)
+		pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
+			       os.aftr_isp,
+			       ns.aftr_isp);
+	if (ns.peer_isp != os.peer_isp)
+		pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
+			       os.peer_isp,
+			       ns.peer_isp);
+	if (ns.user_isp != os.user_isp)
+		pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
+			       os.user_isp,
+			       ns.user_isp);
+
+	if (pbp != pb)
+		dev_info(DEV, "%s\n", pb);
+}
+
+static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns,
+				 enum chg_state_flags flags)
+{
+	char pb[300];
+	char *pbp = pb;
+
+	pbp += print_state_change(pbp, os, ns, flags);
+
+	if (is_susp(ns) != is_susp(os) && flags & CS_DC_SUSP)
+		pbp += sprintf(pbp, "susp( %d -> %d ) ",
+			       is_susp(os),
+			       is_susp(ns));
+
+	if (pbp != pb)
+		conn_info(tconn, "%s\n", pb);
+}
+
+
+/**
+ * is_valid_state() - Returns an SS_ error code if ns is not valid
+ * @mdev:	DRBD device.
+ * @ns:		State to consider.
+ */
+static enum drbd_state_rv
+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+{
+	/* See drbd_state_sw_errors in drbd_strings.c */
+
+	enum drbd_fencing_p fp;
+	enum drbd_state_rv rv = SS_SUCCESS;
+	struct net_conf *nc;
+
+	rcu_read_lock();
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
+		put_ldev(mdev);
+	}
+
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (nc) {
+		if (!nc->two_primaries && ns.role == R_PRIMARY) {
+			if (ns.peer == R_PRIMARY)
+				rv = SS_TWO_PRIMARIES;
+			else if (conn_highest_peer(mdev->tconn) == R_PRIMARY)
+				rv = SS_O_VOL_PEER_PRI;
+		}
+	}
+
+	if (rv <= 0)
+		/* already found a reason to abort */;
+	else if (ns.role == R_SECONDARY && mdev->open_cnt)
+		rv = SS_DEVICE_IN_USE;
+
+	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (fp >= FP_RESOURCE &&
+		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
+		rv = SS_PRIMARY_NOP;
+
+	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
+		rv = SS_NO_LOCAL_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
+		rv = SS_NO_REMOTE_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if ((ns.conn == C_CONNECTED ||
+		  ns.conn == C_WF_BITMAP_S ||
+		  ns.conn == C_SYNC_SOURCE ||
+		  ns.conn == C_PAUSED_SYNC_S) &&
+		  ns.disk == D_OUTDATED)
+		rv = SS_CONNECTED_OUTDATES;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		 (nc->verify_alg[0] == 0))
+		rv = SS_NO_VERIFY_ALG;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		  mdev->tconn->agreed_pro_version < 88)
+		rv = SS_NOT_SUPPORTED;
+
+	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
+		rv = SS_CONNECTED_OUTDATES;
+
+	rcu_read_unlock();
+
+	return rv;
+}
+
+/**
+ * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
+ * This function limits state transitions that may be declined by DRBD. I.e.
+ * user requests (aka soft transitions).
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @os:		old state.
+ */
+static enum drbd_state_rv
+is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_tconn *tconn)
+{
+	enum drbd_state_rv rv = SS_SUCCESS;
+
+	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
+	    os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
+		rv = SS_ALREADY_STANDALONE;
+
+	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
+		rv = SS_IS_DISKLESS;
+
+	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
+		rv = SS_NO_NET_CONFIG;
+
+	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
+		rv = SS_LOWER_THAN_OUTDATED;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
+		rv = SS_IN_TRANSIENT_STATE;
+
+	/* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
+	   rv = SS_IN_TRANSIENT_STATE; */
+
+	/* While establishing a connection only allow cstate to change.
+	   Delay/refuse role changes, detach attach etc... */
+	if (test_bit(STATE_SENT, &tconn->flags) &&
+	    !(os.conn == C_WF_REPORT_PARAMS ||
+	      (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
+		rv = SS_IN_TRANSIENT_STATE;
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+	    ns.conn != os.conn && os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
+	    os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
+	    && os.conn < C_WF_REPORT_PARAMS)
+		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
+
+	return rv;
+}
+
+static enum drbd_state_rv
+is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
+{
+	/* no change -> nothing to do, at least for the connection part */
+	if (oc == nc)
+		return SS_NOTHING_TO_DO;
+
+	/* disconnect of an unconfigured connection does not make sense */
+	if (oc == C_STANDALONE && nc == C_DISCONNECTING)
+		return SS_ALREADY_STANDALONE;
+
+	/* from C_STANDALONE, we start with C_UNCONNECTED */
+	if (oc == C_STANDALONE && nc != C_UNCONNECTED)
+		return SS_NEED_CONNECTION;
+
+	/* When establishing a connection we need to go through WF_REPORT_PARAMS!
+	   Necessary to do the right thing upon invalidate-remote on a disconnected resource */
+	if (oc < C_WF_REPORT_PARAMS && nc >= C_CONNECTED)
+		return SS_NEED_CONNECTION;
+
+	/* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
+	if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
+		return SS_IN_TRANSIENT_STATE;
+
+	/* After C_DISCONNECTING only C_STANDALONE may follow */
+	if (oc == C_DISCONNECTING && nc != C_STANDALONE)
+		return SS_IN_TRANSIENT_STATE;
+
+	return SS_SUCCESS;
+}
+
+
+/**
+ * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
+ * This limits hard state transitions. Hard state transitions are facts there are
+ * imposed on DRBD by the environment. E.g. disk broke or network broke down.
+ * But those hard state transitions are still not allowed to do everything.
+ * @ns:		new state.
+ * @os:		old state.
+ */
+static enum drbd_state_rv
+is_valid_transition(union drbd_state os, union drbd_state ns)
+{
+	enum drbd_state_rv rv;
+
+	rv = is_valid_conn_transition(os.conn, ns.conn);
+
+	/* we cannot fail (again) if we already detached */
+	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
+		rv = SS_IS_DISKLESS;
+
+	return rv;
+}
+
+static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
+{
+	static const char *msg_table[] = {
+		[NO_WARNING] = "",
+		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
+		[ABORTED_RESYNC] = "Resync aborted.",
+		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
+		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
+		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
+	};
+
+	if (warn != NO_WARNING)
+		dev_warn(DEV, "%s\n", msg_table[warn]);
+}
+
+/**
+ * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @warn_sync_abort:
+ *
+ * When we loose connection, we have to set the state of the peers disk (pdsk)
+ * to D_UNKNOWN. This rule and many more along those lines are in this function.
+ */
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
+				       enum sanitize_state_warnings *warn)
+{
+	enum drbd_fencing_p fp;
+	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
+
+	if (warn)
+		*warn = NO_WARNING;
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		rcu_read_lock();
+		fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
+		rcu_read_unlock();
+		put_ldev(mdev);
+	}
+
+	/* Implications from connection to peer and peer_isp */
+	if (ns.conn < C_CONNECTED) {
+		ns.peer_isp = 0;
+		ns.peer = R_UNKNOWN;
+		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
+			ns.pdsk = D_UNKNOWN;
+	}
+
+	/* Clear the aftr_isp when becoming unconfigured */
+	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
+		ns.aftr_isp = 0;
+
+	/* An implication of the disk states onto the connection state */
+	/* Abort resync if a disk fails/detaches */
+	if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
+		if (warn)
+			*warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
+				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
+		ns.conn = C_CONNECTED;
+	}
+
+	/* Connection breaks down before we finished "Negotiating" */
+	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
+	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
+			ns.disk = mdev->new_state_tmp.disk;
+			ns.pdsk = mdev->new_state_tmp.pdsk;
+		} else {
+			if (warn)
+				*warn = CONNECTION_LOST_NEGOTIATING;
+			ns.disk = D_DISKLESS;
+			ns.pdsk = D_UNKNOWN;
+		}
+		put_ldev(mdev);
+	}
+
+	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
+	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
+		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
+			ns.disk = D_UP_TO_DATE;
+		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
+			ns.pdsk = D_UP_TO_DATE;
+	}
+
+	/* Implications of the connection stat on the disk states */
+	disk_min = D_DISKLESS;
+	disk_max = D_UP_TO_DATE;
+	pdsk_min = D_INCONSISTENT;
+	pdsk_max = D_UNKNOWN;
+	switch ((enum drbd_conns)ns.conn) {
+	case C_WF_BITMAP_T:
+	case C_PAUSED_SYNC_T:
+	case C_STARTING_SYNC_T:
+	case C_WF_SYNC_UUID:
+	case C_BEHIND:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_OUTDATED;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_VERIFY_S:
+	case C_VERIFY_T:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_CONNECTED:
+		disk_min = D_DISKLESS;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_DISKLESS;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_WF_BITMAP_S:
+	case C_PAUSED_SYNC_S:
+	case C_STARTING_SYNC_S:
+	case C_AHEAD:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
+		break;
+	case C_SYNC_TARGET:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_INCONSISTENT;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_SYNC_SOURCE:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_INCONSISTENT;
+		break;
+	case C_STANDALONE:
+	case C_DISCONNECTING:
+	case C_UNCONNECTED:
+	case C_TIMEOUT:
+	case C_BROKEN_PIPE:
+	case C_NETWORK_FAILURE:
+	case C_PROTOCOL_ERROR:
+	case C_TEAR_DOWN:
+	case C_WF_CONNECTION:
+	case C_WF_REPORT_PARAMS:
+	case C_MASK:
+		break;
+	}
+	if (ns.disk > disk_max)
+		ns.disk = disk_max;
+
+	if (ns.disk < disk_min) {
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_DISK;
+		ns.disk = disk_min;
+	}
+	if (ns.pdsk > pdsk_max)
+		ns.pdsk = pdsk_max;
+
+	if (ns.pdsk < pdsk_min) {
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_PDSK;
+		ns.pdsk = pdsk_min;
+	}
+
+	if (fp == FP_STONITH &&
+	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
+		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
+
+	if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
+
+	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
+		if (ns.conn == C_SYNC_SOURCE)
+			ns.conn = C_PAUSED_SYNC_S;
+		if (ns.conn == C_SYNC_TARGET)
+			ns.conn = C_PAUSED_SYNC_T;
+	} else {
+		if (ns.conn == C_PAUSED_SYNC_S)
+			ns.conn = C_SYNC_SOURCE;
+		if (ns.conn == C_PAUSED_SYNC_T)
+			ns.conn = C_SYNC_TARGET;
+	}
+
+	return ns;
+}
+
+void drbd_resume_al(struct drbd_conf *mdev)
+{
+	if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
+		dev_info(DEV, "Resumed AL updates\n");
+}
+
+/* helper for __drbd_set_state */
+static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
+{
+	if (mdev->tconn->agreed_pro_version < 90)
+		mdev->ov_start_sector = 0;
+	mdev->rs_total = drbd_bm_bits(mdev);
+	mdev->ov_position = 0;
+	if (cs == C_VERIFY_T) {
+		/* starting online verify from an arbitrary position
+		 * does not fit well into the existing protocol.
+		 * on C_VERIFY_T, we initialize ov_left and friends
+		 * implicitly in receive_DataRequest once the
+		 * first P_OV_REQUEST is received */
+		mdev->ov_start_sector = ~(sector_t)0;
+	} else {
+		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
+		if (bit >= mdev->rs_total) {
+			mdev->ov_start_sector =
+				BM_BIT_TO_SECT(mdev->rs_total - 1);
+			mdev->rs_total = 1;
+		} else
+			mdev->rs_total -= bit;
+		mdev->ov_position = mdev->ov_start_sector;
+	}
+	mdev->ov_left = mdev->rs_total;
+}
+
+/**
+ * __drbd_set_state() - Set a new DRBD state
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @flags:	Flags
+ * @done:	Optional completion, that will get completed after the after_state_ch() finished
+ *
+ * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ */
+enum drbd_state_rv
+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+	         enum chg_state_flags flags, struct completion *done)
+{
+	union drbd_state os;
+	enum drbd_state_rv rv = SS_SUCCESS;
+	enum sanitize_state_warnings ssw;
+	struct after_state_chg_work *ascw;
+
+	os = drbd_read_state(mdev);
+
+	ns = sanitize_state(mdev, ns, &ssw);
+	if (ns.i == os.i)
+		return SS_NOTHING_TO_DO;
+
+	rv = is_valid_transition(os, ns);
+	if (rv < SS_SUCCESS)
+		return rv;
+
+	if (!(flags & CS_HARD)) {
+		/*  pre-state-change checks ; only look at ns  */
+		/* See drbd_state_sw_errors in drbd_strings.c */
+
+		rv = is_valid_state(mdev, ns);
+		if (rv < SS_SUCCESS) {
+			/* If the old state was illegal as well, then let
+			   this happen...*/
+
+			if (is_valid_state(mdev, os) == rv)
+				rv = is_valid_soft_transition(os, ns, mdev->tconn);
+		} else
+			rv = is_valid_soft_transition(os, ns, mdev->tconn);
+	}
+
+	if (rv < SS_SUCCESS) {
+		if (flags & CS_VERBOSE)
+			print_st_err(mdev, os, ns, rv);
+		return rv;
+	}
+
+	print_sanitize_warnings(mdev, ssw);
+
+	drbd_pr_state_change(mdev, os, ns, flags);
+
+	/* Display changes to the susp* flags that where caused by the call to
+	   sanitize_state(). Only display it here if we where not called from
+	   _conn_request_state() */
+	if (!(flags & CS_DC_SUSP))
+		conn_pr_state_change(mdev->tconn, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP);
+
+	/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
+	 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
+	 * drbd_ldev_destroy() won't happen before our corresponding
+	 * after_state_ch works run, where we put_ldev again. */
+	if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
+	    (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
+		atomic_inc(&mdev->local_cnt);
+
+	mdev->state.i = ns.i;
+	mdev->tconn->susp = ns.susp;
+	mdev->tconn->susp_nod = ns.susp_nod;
+	mdev->tconn->susp_fen = ns.susp_fen;
+
+	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+		drbd_print_uuids(mdev, "attached to UUIDs");
+
+	/* Wake up role changes, that were delayed because of connection establishing */
+	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
+	    no_peer_wf_report_params(mdev->tconn))
+		clear_bit(STATE_SENT, &mdev->tconn->flags);
+
+	wake_up(&mdev->misc_wait);
+	wake_up(&mdev->state_wait);
+	wake_up(&mdev->tconn->ping_wait);
+
+	/* Aborted verify run, or we reached the stop sector.
+	 * Log the last position, unless end-of-device. */
+	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
+	    ns.conn <= C_CONNECTED) {
+		mdev->ov_start_sector =
+			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
+		if (mdev->ov_left)
+			dev_info(DEV, "Online Verify reached sector %llu\n",
+				(unsigned long long)mdev->ov_start_sector);
+	}
+
+	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
+		dev_info(DEV, "Syncer continues.\n");
+		mdev->rs_paused += (long)jiffies
+				  -(long)mdev->rs_mark_time[mdev->rs_last_mark];
+		if (ns.conn == C_SYNC_TARGET)
+			mod_timer(&mdev->resync_timer, jiffies);
+	}
+
+	if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
+	    (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
+		dev_info(DEV, "Resync suspended\n");
+		mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
+	}
+
+	if (os.conn == C_CONNECTED &&
+	    (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
+		unsigned long now = jiffies;
+		int i;
+
+		set_ov_position(mdev, ns.conn);
+		mdev->rs_start = now;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
+		mdev->ov_last_oos_size = 0;
+		mdev->ov_last_oos_start = 0;
+
+		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+			mdev->rs_mark_left[i] = mdev->ov_left;
+			mdev->rs_mark_time[i] = now;
+		}
+
+		drbd_rs_controller_reset(mdev);
+
+		if (ns.conn == C_VERIFY_S) {
+			dev_info(DEV, "Starting Online Verify from sector %llu\n",
+					(unsigned long long)mdev->ov_position);
+			mod_timer(&mdev->resync_timer, jiffies);
+		}
+	}
+
+	if (get_ldev(mdev)) {
+		u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
+						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
+						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
+
+		mdf &= ~MDF_AL_CLEAN;
+		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
+			mdf |= MDF_CRASHED_PRIMARY;
+		if (mdev->state.role == R_PRIMARY ||
+		    (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
+			mdf |= MDF_PRIMARY_IND;
+		if (mdev->state.conn > C_WF_REPORT_PARAMS)
+			mdf |= MDF_CONNECTED_IND;
+		if (mdev->state.disk > D_INCONSISTENT)
+			mdf |= MDF_CONSISTENT;
+		if (mdev->state.disk > D_OUTDATED)
+			mdf |= MDF_WAS_UP_TO_DATE;
+		if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
+			mdf |= MDF_PEER_OUT_DATED;
+		if (mdf != mdev->ldev->md.flags) {
+			mdev->ldev->md.flags = mdf;
+			drbd_md_mark_dirty(mdev);
+		}
+		if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
+			drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
+		put_ldev(mdev);
+	}
+
+	/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
+	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
+	    os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
+		set_bit(CONSIDER_RESYNC, &mdev->flags);
+
+	/* Receiver should clean up itself */
+	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
+		drbd_thread_stop_nowait(&mdev->tconn->receiver);
+
+	/* Now the receiver finished cleaning up itself, it should die */
+	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
+		drbd_thread_stop_nowait(&mdev->tconn->receiver);
+
+	/* Upon network failure, we need to restart the receiver. */
+	if (os.conn > C_WF_CONNECTION &&
+	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
+		drbd_thread_restart_nowait(&mdev->tconn->receiver);
+
+	/* Resume AL writing if we get a connection */
+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+		drbd_resume_al(mdev);
+
+	/* remember last attach time so request_timer_fn() won't
+	 * kill newly established sessions while we are still trying to thaw
+	 * previously frozen IO */
+	if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
+	    ns.disk > D_NEGOTIATING)
+		mdev->last_reattach_jif = jiffies;
+
+	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
+	if (ascw) {
+		ascw->os = os;
+		ascw->ns = ns;
+		ascw->flags = flags;
+		ascw->w.cb = w_after_state_ch;
+		ascw->w.mdev = mdev;
+		ascw->done = done;
+		drbd_queue_work(&mdev->tconn->sender_work, &ascw->w);
+	} else {
+		dev_err(DEV, "Could not kmalloc an ascw\n");
+	}
+
+	return rv;
+}
+
+static int w_after_state_ch(struct drbd_work *w, int unused)
+{
+	struct after_state_chg_work *ascw =
+		container_of(w, struct after_state_chg_work, w);
+	struct drbd_conf *mdev = w->mdev;
+
+	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
+	if (ascw->flags & CS_WAIT_COMPLETE) {
+		D_ASSERT(ascw->done != NULL);
+		complete(ascw->done);
+	}
+	kfree(ascw);
+
+	return 0;
+}
+
+static void abw_start_sync(struct drbd_conf *mdev, int rv)
+{
+	if (rv) {
+		dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
+		_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
+		return;
+	}
+
+	switch (mdev->state.conn) {
+	case C_STARTING_SYNC_T:
+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		break;
+	case C_STARTING_SYNC_S:
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+		break;
+	}
+}
+
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
+{
+	int rv;
+
+	D_ASSERT(current == mdev->tconn->worker.task);
+
+	/* open coded non-blocking drbd_suspend_io(mdev); */
+	set_bit(SUSPEND_IO, &mdev->flags);
+
+	drbd_bm_lock(mdev, why, flags);
+	rv = io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
+/**
+ * after_state_ch() - Perform after state change actions that may sleep
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @flags:	Flags
+ */
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags)
+{
+	struct sib_info sib;
+
+	sib.sib_reason = SIB_STATE_CHANGE;
+	sib.os = os;
+	sib.ns = ns;
+
+	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+		clear_bit(CRASHED_PRIMARY, &mdev->flags);
+		if (mdev->p_uuid)
+			mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
+	}
+
+	/* Inform userspace about the change... */
+	drbd_bcast_event(mdev, &sib);
+
+	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+		drbd_khelper(mdev, "pri-on-incon-degr");
+
+	/* Here we have the actions that are performed after a
+	   state change. This function might sleep */
+
+	if (ns.susp_nod) {
+		struct drbd_tconn *tconn = mdev->tconn;
+		enum drbd_req_event what = NOTHING;
+
+		spin_lock_irq(&tconn->req_lock);
+		if (os.conn < C_CONNECTED && conn_lowest_conn(tconn) >= C_CONNECTED)
+			what = RESEND;
+
+		if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
+		    conn_lowest_disk(tconn) > D_NEGOTIATING)
+			what = RESTART_FROZEN_DISK_IO;
+
+		if (tconn->susp_nod && what != NOTHING) {
+			_tl_restart(tconn, what);
+			_conn_request_state(tconn,
+					    (union drbd_state) { { .susp_nod = 1 } },
+					    (union drbd_state) { { .susp_nod = 0 } },
+					    CS_VERBOSE);
+		}
+		spin_unlock_irq(&tconn->req_lock);
+	}
+
+	if (ns.susp_fen) {
+		struct drbd_tconn *tconn = mdev->tconn;
+
+		spin_lock_irq(&tconn->req_lock);
+		if (tconn->susp_fen && conn_lowest_conn(tconn) >= C_CONNECTED) {
+			/* case2: The connection was established again: */
+			struct drbd_conf *odev;
+			int vnr;
+
+			rcu_read_lock();
+			idr_for_each_entry(&tconn->volumes, odev, vnr)
+				clear_bit(NEW_CUR_UUID, &odev->flags);
+			rcu_read_unlock();
+			_tl_restart(tconn, RESEND);
+			_conn_request_state(tconn,
+					    (union drbd_state) { { .susp_fen = 1 } },
+					    (union drbd_state) { { .susp_fen = 0 } },
+					    CS_VERBOSE);
+		}
+		spin_unlock_irq(&tconn->req_lock);
+	}
+
+	/* Became sync source.  With protocol >= 96, we still need to send out
+	 * the sync uuid now. Need to do that before any drbd_send_state, or
+	 * the other side may go "paused sync" before receiving the sync uuids,
+	 * which is unexpected. */
+	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+	    mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) {
+		drbd_gen_and_send_sync_uuid(mdev);
+		put_ldev(mdev);
+	}
+
+	/* Do not change the order of the if above and the two below... */
+	if (os.pdsk == D_DISKLESS &&
+	    ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) {      /* attach on the peer */
+		/* we probably will start a resync soon.
+		 * make sure those things are properly reset. */
+		mdev->rs_total = 0;
+		mdev->rs_failed = 0;
+		atomic_set(&mdev->rs_pending_cnt, 0);
+		drbd_rs_cancel_all(mdev);
+
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev, ns);
+	}
+	/* No point in queuing send_bitmap if we don't have a connection
+	 * anymore, so check also the _current_ state, not only the new state
+	 * at the time this work was queued. */
+	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+	    mdev->state.conn == C_WF_BITMAP_S)
+		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+				"send_bitmap (WFBitMapS)",
+				BM_LOCKED_TEST_ALLOWED);
+
+	/* Lost contact to peer's copy of the data */
+	if ((os.pdsk >= D_INCONSISTENT &&
+	     os.pdsk != D_UNKNOWN &&
+	     os.pdsk != D_OUTDATED)
+	&&  (ns.pdsk < D_INCONSISTENT ||
+	     ns.pdsk == D_UNKNOWN ||
+	     ns.pdsk == D_OUTDATED)) {
+		if (get_ldev(mdev)) {
+			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
+			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+				if (drbd_suspended(mdev)) {
+					set_bit(NEW_CUR_UUID, &mdev->flags);
+				} else {
+					drbd_uuid_new_current(mdev);
+					drbd_send_uuids(mdev);
+				}
+			}
+			put_ldev(mdev);
+		}
+	}
+
+	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
+		if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
+		    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+			drbd_uuid_new_current(mdev);
+			drbd_send_uuids(mdev);
+		}
+		/* D_DISKLESS Peer becomes secondary */
+		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
+			/* We may still be Primary ourselves.
+			 * No harm done if the bitmap still changes,
+			 * redirtied pages will follow later. */
+			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Write out all changed bits on demote.
+	 * Though, no need to da that just yet
+	 * if there is a resync going on still */
+	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+		mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+		/* No changes to the bitmap expected this time, so assert that,
+		 * even though no harm was done if it did change. */
+		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote", BM_LOCKED_TEST_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Last part of the attaching process ... */
+	if (ns.conn >= C_CONNECTED &&
+	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
+		drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev, ns);
+	}
+
+	/* We want to pause/continue resync, tell peer. */
+	if (ns.conn >= C_CONNECTED &&
+	     ((os.aftr_isp != ns.aftr_isp) ||
+	      (os.user_isp != ns.user_isp)))
+		drbd_send_state(mdev, ns);
+
+	/* In case one of the isp bits got set, suspend other devices. */
+	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
+	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
+		suspend_other_sg(mdev);
+
+	/* Make sure the peer gets informed about eventual state
+	   changes (ISP bits) while we were in WFReportParams. */
+	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
+		drbd_send_state(mdev, ns);
+
+	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
+		drbd_send_state(mdev, ns);
+
+	/* We are in the progress to start a full sync... */
+	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
+		/* no other bitmap changes expected during this phase */
+		drbd_queue_bitmap_io(mdev,
+			&drbd_bmio_set_n_write, &abw_start_sync,
+			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
+
+	/* We are invalidating our self... */
+	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
+	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
+		/* other bitmap operation expected during this phase */
+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+			"set_n_write from invalidate", BM_LOCKED_MASK);
+
+	/* first half of local IO error, failure to attach,
+	 * or administrative detach */
+	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
+		enum drbd_io_error_p eh = EP_PASS_ON;
+		int was_io_error = 0;
+		/* corresponding get_ldev was in __drbd_set_state, to serialize
+		 * our cleanup here with the transition to D_DISKLESS.
+		 * But is is still not save to dreference ldev here, since
+		 * we might come from an failed Attach before ldev was set. */
+		if (mdev->ldev) {
+			rcu_read_lock();
+			eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
+			rcu_read_unlock();
+
+			was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
+
+			if (was_io_error && eh == EP_CALL_HELPER)
+				drbd_khelper(mdev, "local-io-error");
+
+			/* Immediately allow completion of all application IO,
+			 * that waits for completion from the local disk,
+			 * if this was a force-detach due to disk_timeout
+			 * or administrator request (drbdsetup detach --force).
+			 * Do NOT abort otherwise.
+			 * Aborting local requests may cause serious problems,
+			 * if requests are completed to upper layers already,
+			 * and then later the already submitted local bio completes.
+			 * This can cause DMA into former bio pages that meanwhile
+			 * have been re-used for other things.
+			 * So aborting local requests may cause crashes,
+			 * or even worse, silent data corruption.
+			 */
+			if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
+				tl_abort_disk_io(mdev);
+
+			/* current state still has to be D_FAILED,
+			 * there is only one way out: to D_DISKLESS,
+			 * and that may only happen after our put_ldev below. */
+			if (mdev->state.disk != D_FAILED)
+				dev_err(DEV,
+					"ASSERT FAILED: disk is %s during detach\n",
+					drbd_disk_str(mdev->state.disk));
+
+			if (ns.conn >= C_CONNECTED)
+				drbd_send_state(mdev, ns);
+
+			drbd_rs_cancel_all(mdev);
+
+			/* In case we want to get something to stable storage still,
+			 * this may be the last chance.
+			 * Following put_ldev may transition to D_DISKLESS. */
+			drbd_md_sync(mdev);
+		}
+		put_ldev(mdev);
+	}
+
+        /* second half of local IO error, failure to attach,
+         * or administrative detach,
+         * after local_cnt references have reached zero again */
+        if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
+                /* We must still be diskless,
+                 * re-attach has to be serialized with this! */
+                if (mdev->state.disk != D_DISKLESS)
+                        dev_err(DEV,
+                                "ASSERT FAILED: disk is %s while going diskless\n",
+                                drbd_disk_str(mdev->state.disk));
+
+		if (ns.conn >= C_CONNECTED)
+			drbd_send_state(mdev, ns);
+		/* corresponding get_ldev in __drbd_set_state
+		 * this may finally trigger drbd_ldev_destroy. */
+		put_ldev(mdev);
+	}
+
+	/* Notify peer that I had a local IO error, and did not detached.. */
+	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
+		drbd_send_state(mdev, ns);
+
+	/* Disks got bigger while they were detached */
+	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
+	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
+		if (ns.conn == C_CONNECTED)
+			resync_after_online_grow(mdev);
+	}
+
+	/* A resync finished or aborted, wake paused devices... */
+	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
+	    (os.peer_isp && !ns.peer_isp) ||
+	    (os.user_isp && !ns.user_isp))
+		resume_next_sg(mdev);
+
+	/* sync target done with resync.  Explicitly notify peer, even though
+	 * it should (at least for non-empty resyncs) already know itself. */
+	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
+		drbd_send_state(mdev, ns);
+
+	/* Verify finished, or reached stop sector.  Peer did not know about
+	 * the stop sector, and we may even have changed the stop sector during
+	 * verify to interrupt/stop early.  Send the new state. */
+	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
+	&& verify_can_do_stop_sector(mdev))
+		drbd_send_state(mdev, ns);
+
+	/* This triggers bitmap writeout of potentially still unwritten pages
+	 * if the resync finished cleanly, or aborted because of peer disk
+	 * failure, or because of connection loss.
+	 * For resync aborted because of local disk failure, we cannot do
+	 * any bitmap writeout anymore.
+	 * No harm done if some bits change during this phase.
+	 */
+	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+		drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
+			"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	if (ns.disk == D_DISKLESS &&
+	    ns.conn == C_STANDALONE &&
+	    ns.role == R_SECONDARY) {
+		if (os.aftr_isp != ns.aftr_isp)
+			resume_next_sg(mdev);
+	}
+
+	drbd_md_sync(mdev);
+}
+
+struct after_conn_state_chg_work {
+	struct drbd_work w;
+	enum drbd_conns oc;
+	union drbd_state ns_min;
+	union drbd_state ns_max; /* new, max state, over all mdevs */
+	enum chg_state_flags flags;
+};
+
+static int w_after_conn_state_ch(struct drbd_work *w, int unused)
+{
+	struct after_conn_state_chg_work *acscw =
+		container_of(w, struct after_conn_state_chg_work, w);
+	struct drbd_tconn *tconn = w->tconn;
+	enum drbd_conns oc = acscw->oc;
+	union drbd_state ns_max = acscw->ns_max;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	kfree(acscw);
+
+	/* Upon network configuration, we need to start the receiver */
+	if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
+		drbd_thread_start(&tconn->receiver);
+
+	if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
+		struct net_conf *old_conf;
+
+		mutex_lock(&tconn->conf_update);
+		old_conf = tconn->net_conf;
+		tconn->my_addr_len = 0;
+		tconn->peer_addr_len = 0;
+		rcu_assign_pointer(tconn->net_conf, NULL);
+		conn_free_crypto(tconn);
+		mutex_unlock(&tconn->conf_update);
+
+		synchronize_rcu();
+		kfree(old_conf);
+	}
+
+	if (ns_max.susp_fen) {
+		/* case1: The outdate peer handler is successful: */
+		if (ns_max.pdsk <= D_OUTDATED) {
+			rcu_read_lock();
+			idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+				if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
+					drbd_uuid_new_current(mdev);
+					clear_bit(NEW_CUR_UUID, &mdev->flags);
+				}
+			}
+			rcu_read_unlock();
+			spin_lock_irq(&tconn->req_lock);
+			_tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
+			_conn_request_state(tconn,
+					    (union drbd_state) { { .susp_fen = 1 } },
+					    (union drbd_state) { { .susp_fen = 0 } },
+					    CS_VERBOSE);
+			spin_unlock_irq(&tconn->req_lock);
+		}
+	}
+	kref_put(&tconn->kref, &conn_destroy);
+
+	conn_md_sync(tconn);
+
+	return 0;
+}
+
+void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf)
+{
+	enum chg_state_flags flags = ~0;
+	struct drbd_conf *mdev;
+	int vnr, first_vol = 1;
+	union drbd_dev_state os, cs = {
+		{ .role = R_SECONDARY,
+		  .peer = R_UNKNOWN,
+		  .conn = tconn->cstate,
+		  .disk = D_DISKLESS,
+		  .pdsk = D_UNKNOWN,
+		} };
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		os = mdev->state;
+
+		if (first_vol) {
+			cs = os;
+			first_vol = 0;
+			continue;
+		}
+
+		if (cs.role != os.role)
+			flags &= ~CS_DC_ROLE;
+
+		if (cs.peer != os.peer)
+			flags &= ~CS_DC_PEER;
+
+		if (cs.conn != os.conn)
+			flags &= ~CS_DC_CONN;
+
+		if (cs.disk != os.disk)
+			flags &= ~CS_DC_DISK;
+
+		if (cs.pdsk != os.pdsk)
+			flags &= ~CS_DC_PDSK;
+	}
+	rcu_read_unlock();
+
+	*pf |= CS_DC_MASK;
+	*pf &= flags;
+	(*pcs).i = cs.i;
+}
+
+static enum drbd_state_rv
+conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+			 enum chg_state_flags flags)
+{
+	enum drbd_state_rv rv = SS_SUCCESS;
+	union drbd_state ns, os;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		os = drbd_read_state(mdev);
+		ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
+
+		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
+			ns.disk = os.disk;
+
+		if (ns.i == os.i)
+			continue;
+
+		rv = is_valid_transition(os, ns);
+		if (rv < SS_SUCCESS)
+			break;
+
+		if (!(flags & CS_HARD)) {
+			rv = is_valid_state(mdev, ns);
+			if (rv < SS_SUCCESS) {
+				if (is_valid_state(mdev, os) == rv)
+					rv = is_valid_soft_transition(os, ns, tconn);
+			} else
+				rv = is_valid_soft_transition(os, ns, tconn);
+		}
+		if (rv < SS_SUCCESS)
+			break;
+	}
+	rcu_read_unlock();
+
+	if (rv < SS_SUCCESS && flags & CS_VERBOSE)
+		print_st_err(mdev, os, ns, rv);
+
+	return rv;
+}
+
+void
+conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+	       union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
+{
+	union drbd_state ns, os, ns_max = { };
+	union drbd_state ns_min = {
+		{ .role = R_MASK,
+		  .peer = R_MASK,
+		  .conn = val.conn,
+		  .disk = D_MASK,
+		  .pdsk = D_MASK
+		} };
+	struct drbd_conf *mdev;
+	enum drbd_state_rv rv;
+	int vnr, number_of_volumes = 0;
+
+	if (mask.conn == C_MASK) {
+		/* remember last connect time so request_timer_fn() won't
+		 * kill newly established sessions while we are still trying to thaw
+		 * previously frozen IO */
+		if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
+			tconn->last_reconnect_jif = jiffies;
+
+		tconn->cstate = val.conn;
+	}
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		number_of_volumes++;
+		os = drbd_read_state(mdev);
+		ns = apply_mask_val(os, mask, val);
+		ns = sanitize_state(mdev, ns, NULL);
+
+		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
+			ns.disk = os.disk;
+
+		rv = __drbd_set_state(mdev, ns, flags, NULL);
+		if (rv < SS_SUCCESS)
+			BUG();
+
+		ns.i = mdev->state.i;
+		ns_max.role = max_role(ns.role, ns_max.role);
+		ns_max.peer = max_role(ns.peer, ns_max.peer);
+		ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn);
+		ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk);
+		ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk);
+
+		ns_min.role = min_role(ns.role, ns_min.role);
+		ns_min.peer = min_role(ns.peer, ns_min.peer);
+		ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn);
+		ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk);
+		ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk);
+	}
+	rcu_read_unlock();
+
+	if (number_of_volumes == 0) {
+		ns_min = ns_max = (union drbd_state) { {
+				.role = R_SECONDARY,
+				.peer = R_UNKNOWN,
+				.conn = val.conn,
+				.disk = D_DISKLESS,
+				.pdsk = D_UNKNOWN
+			} };
+	}
+
+	ns_min.susp = ns_max.susp = tconn->susp;
+	ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod;
+	ns_min.susp_fen = ns_max.susp_fen = tconn->susp_fen;
+
+	*pns_min = ns_min;
+	*pns_max = ns_max;
+}
+
+static enum drbd_state_rv
+_conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
+{
+	enum drbd_state_rv rv;
+
+	if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags))
+		return SS_CW_SUCCESS;
+
+	if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
+		return SS_CW_FAILED_BY_PEER;
+
+	rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR;
+
+	if (rv == SS_UNKNOWN_ERROR)
+		rv = conn_is_valid_transition(tconn, mask, val, 0);
+
+	if (rv == SS_SUCCESS)
+		rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
+
+	return rv;
+}
+
+enum drbd_state_rv
+_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		    enum chg_state_flags flags)
+{
+	enum drbd_state_rv rv = SS_SUCCESS;
+	struct after_conn_state_chg_work *acscw;
+	enum drbd_conns oc = tconn->cstate;
+	union drbd_state ns_max, ns_min, os;
+	bool have_mutex = false;
+
+	if (mask.conn) {
+		rv = is_valid_conn_transition(oc, val.conn);
+		if (rv < SS_SUCCESS)
+			goto abort;
+	}
+
+	rv = conn_is_valid_transition(tconn, mask, val, flags);
+	if (rv < SS_SUCCESS)
+		goto abort;
+
+	if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING &&
+	    !(flags & (CS_LOCAL_ONLY | CS_HARD))) {
+
+		/* This will be a cluster-wide state change.
+		 * Need to give up the spinlock, grab the mutex,
+		 * then send the state change request, ... */
+		spin_unlock_irq(&tconn->req_lock);
+		mutex_lock(&tconn->cstate_mutex);
+		have_mutex = true;
+
+		set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
+		if (conn_send_state_req(tconn, mask, val)) {
+			/* sending failed. */
+			clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
+			rv = SS_CW_FAILED_BY_PEER;
+			/* need to re-aquire the spin lock, though */
+			goto abort_unlocked;
+		}
+
+		if (val.conn == C_DISCONNECTING)
+			set_bit(DISCONNECT_SENT, &tconn->flags);
+
+		/* ... and re-aquire the spinlock.
+		 * If _conn_rq_cond() returned >= SS_SUCCESS, we must call
+		 * conn_set_state() within the same spinlock. */
+		spin_lock_irq(&tconn->req_lock);
+		wait_event_lock_irq(tconn->ping_wait,
+				(rv = _conn_rq_cond(tconn, mask, val)),
+				tconn->req_lock);
+		clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
+		if (rv < SS_SUCCESS)
+			goto abort;
+	}
+
+	conn_old_common_state(tconn, &os, &flags);
+	flags |= CS_DC_SUSP;
+	conn_set_state(tconn, mask, val, &ns_min, &ns_max, flags);
+	conn_pr_state_change(tconn, os, ns_max, flags);
+
+	acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
+	if (acscw) {
+		acscw->oc = os.conn;
+		acscw->ns_min = ns_min;
+		acscw->ns_max = ns_max;
+		acscw->flags = flags;
+		acscw->w.cb = w_after_conn_state_ch;
+		kref_get(&tconn->kref);
+		acscw->w.tconn = tconn;
+		drbd_queue_work(&tconn->sender_work, &acscw->w);
+	} else {
+		conn_err(tconn, "Could not kmalloc an acscw\n");
+	}
+
+ abort:
+	if (have_mutex) {
+		/* mutex_unlock() "... must not be used in interrupt context.",
+		 * so give up the spinlock, then re-aquire it */
+		spin_unlock_irq(&tconn->req_lock);
+ abort_unlocked:
+		mutex_unlock(&tconn->cstate_mutex);
+		spin_lock_irq(&tconn->req_lock);
+	}
+	if (rv < SS_SUCCESS && flags & CS_VERBOSE) {
+		conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv));
+		conn_err(tconn, " mask = 0x%x val = 0x%x\n", mask.i, val.i);
+		conn_err(tconn, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn));
+	}
+	return rv;
+}
+
+enum drbd_state_rv
+conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		   enum chg_state_flags flags)
+{
+	enum drbd_state_rv rv;
+
+	spin_lock_irq(&tconn->req_lock);
+	rv = _conn_request_state(tconn, mask, val, flags);
+	spin_unlock_irq(&tconn->req_lock);
+
+	return rv;
+}

diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
new file mode 100644
index 0000000..a3c361b
--- /dev/null
+++ b/drivers/block/drbd/drbd_state.h

@@ -0,0 +1,161 @@
+#ifndef DRBD_STATE_H
+#define DRBD_STATE_H
+
+struct drbd_conf;
+struct drbd_tconn;
+
+/**
+ * DOC: DRBD State macros
+ *
+ * These macros are used to express state changes in easily readable form.
+ *
+ * The NS macros expand to a mask and a value, that can be bit ored onto the
+ * current state as soon as the spinlock (req_lock) was taken.
+ *
+ * The _NS macros are used for state functions that get called with the
+ * spinlock. These macros expand directly to the new state value.
+ *
+ * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
+ * to express state changes that affect more than one aspect of the state.
+ *
+ * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
+ * Means that the network connection was established and that the peer
+ * is in secondary role.
+ */
+#define role_MASK R_MASK
+#define peer_MASK R_MASK
+#define disk_MASK D_MASK
+#define pdsk_MASK D_MASK
+#define conn_MASK C_MASK
+#define susp_MASK 1
+#define user_isp_MASK 1
+#define aftr_isp_MASK 1
+#define susp_nod_MASK 1
+#define susp_fen_MASK 1
+
+#define NS(T, S) \
+	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T = (S); val; })
+#define NS2(T1, S1, T2, S2) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val; })
+#define NS3(T1, S1, T2, S2, T3, S3) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
+	({ union drbd_state val;  val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val.T3 = (S3); val; })
+
+#define _NS(D, T, S) \
+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; })
+#define _NS2(D, T1, S1, T2, S2) \
+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns; })
+#define _NS3(D, T1, S1, T2, S2, T3, S3) \
+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
+
+enum chg_state_flags {
+	CS_HARD	         = 1 << 0,
+	CS_VERBOSE       = 1 << 1,
+	CS_WAIT_COMPLETE = 1 << 2,
+	CS_SERIALIZE     = 1 << 3,
+	CS_ORDERED       = CS_WAIT_COMPLETE + CS_SERIALIZE,
+	CS_LOCAL_ONLY    = 1 << 4, /* Do not consider a device pair wide state change */
+	CS_DC_ROLE       = 1 << 5, /* DC = display as connection state change */
+	CS_DC_PEER       = 1 << 6,
+	CS_DC_CONN       = 1 << 7,
+	CS_DC_DISK       = 1 << 8,
+	CS_DC_PDSK       = 1 << 9,
+	CS_DC_SUSP       = 1 << 10,
+	CS_DC_MASK       = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
+	CS_IGN_OUTD_FAIL = 1 << 11,
+};
+
+/* drbd_dev_state and drbd_state are different types. This is to stress the
+   small difference. There is no suspended flag (.susp), and no suspended
+   while fence handler runs flas (susp_fen). */
+union drbd_dev_state {
+	struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned _unused:1 ;
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned peer_isp:1 ;
+		unsigned user_isp:1 ;
+		unsigned _pad:11;   /* 0	 unused */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+		unsigned _pad:11;
+		unsigned user_isp:1 ;
+		unsigned peer_isp:1 ;
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned _unused:1 ;
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+#else
+# error "this endianess is not supported"
+#endif
+	};
+	unsigned int i;
+};
+
+extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
+					    enum chg_state_flags f,
+					    union drbd_state mask,
+					    union drbd_state val);
+extern void drbd_force_state(struct drbd_conf *, union drbd_state,
+			union drbd_state);
+extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
+					      union drbd_state,
+					      union drbd_state,
+					      enum chg_state_flags);
+extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
+					   enum chg_state_flags,
+					   struct completion *done);
+extern void print_st_err(struct drbd_conf *, union drbd_state,
+			union drbd_state, int);
+
+enum drbd_state_rv
+_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		    enum chg_state_flags flags);
+
+enum drbd_state_rv
+conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		   enum chg_state_flags flags);
+
+extern void drbd_resume_al(struct drbd_conf *mdev);
+extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
+
+/**
+ * drbd_request_state() - Reqest a state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ *
+ * This is the most graceful way of requesting a state change. It is verbose
+ * quite verbose in case the state change is not possible, and all those
+ * state changes are globally serialized.
+ */
+static inline int drbd_request_state(struct drbd_conf *mdev,
+				     union drbd_state mask,
+				     union drbd_state val)
+{
+	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
+}
+
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
+enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn);
+
+#endif

diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index c44a2a6..9a664bd 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c

@@ -89,6 +89,7 @@
 	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
 	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
 	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+	[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
 };
 
 const char *drbd_conn_str(enum drbd_conns s)

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 6bce2cc..424dc7b 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c

@@ -38,16 +38,13 @@
 #include "drbd_int.h"
 #include "drbd_req.h"
 
-static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
-static int w_make_resync_request(struct drbd_conf *mdev,
-				 struct drbd_work *w, int cancel);
-
+static int w_make_ov_request(struct drbd_work *w, int cancel);
 
 
 /* endio handlers:
  *   drbd_md_io_complete (defined here)
- *   drbd_endio_pri (defined here)
- *   drbd_endio_sec (defined here)
+ *   drbd_request_endio (defined here)
+ *   drbd_peer_request_endio (defined here)
  *   bm_async_io_complete (defined in drbd_bitmap.c)
  *
  * For all these callbacks, note the following:
@@ -60,7 +57,7 @@
 
 /* About the global_state_lock
    Each state transition on an device holds a read lock. In case we have
-   to evaluate the sync after dependencies, we grab a write lock, because
+   to evaluate the resync after dependencies, we grab a write lock, because
    we need stable states on all devices for that.  */
 rwlock_t global_state_lock;
 
@@ -98,97 +95,93 @@
 /* reads on behalf of the partner,
  * "submitted" by the receiver
  */
-void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
+void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
 {
 	unsigned long flags = 0;
-	struct drbd_conf *mdev = e->mdev;
+	struct drbd_conf *mdev = peer_req->w.mdev;
 
-	D_ASSERT(e->block_id != ID_VACANT);
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	mdev->read_cnt += e->size >> 9;
-	list_del(&e->w.list);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	mdev->read_cnt += peer_req->i.size >> 9;
+	list_del(&peer_req->w.list);
 	if (list_empty(&mdev->read_ee))
 		wake_up(&mdev->ee_wait);
-	if (test_bit(__EE_WAS_ERROR, &e->flags))
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
+		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 
-	drbd_queue_work(&mdev->data.work, &e->w);
+	drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w);
 	put_ldev(mdev);
 }
 
 /* writes on behalf of the partner, or resync writes,
  * "submitted" by the receiver, final stage.  */
-static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local)
+static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
 {
 	unsigned long flags = 0;
-	struct drbd_conf *mdev = e->mdev;
-	sector_t e_sector;
+	struct drbd_conf *mdev = peer_req->w.mdev;
+	struct drbd_interval i;
 	int do_wake;
-	int is_syncer_req;
+	u64 block_id;
 	int do_al_complete_io;
 
-	D_ASSERT(e->block_id != ID_VACANT);
-
-	/* after we moved e to done_ee,
+	/* after we moved peer_req to done_ee,
 	 * we may no longer access it,
 	 * it may be freed/reused already!
 	 * (as soon as we release the req_lock) */
-	e_sector = e->sector;
-	do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO;
-	is_syncer_req = is_syncer_block_id(e->block_id);
+	i = peer_req->i;
+	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
+	block_id = peer_req->block_id;
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	mdev->writ_cnt += e->size >> 9;
-	list_del(&e->w.list); /* has been on active_ee or sync_ee */
-	list_add_tail(&e->w.list, &mdev->done_ee);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	mdev->writ_cnt += peer_req->i.size >> 9;
+	list_move_tail(&peer_req->w.list, &mdev->done_ee);
 
-	/* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
-	 * neither did we wake possibly waiting conflicting requests.
-	 * done from "drbd_process_done_ee" within the appropriate w.cb
-	 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
+	/*
+	 * Do not remove from the write_requests tree here: we did not send the
+	 * Ack yet and did not wake possibly waiting conflicting requests.
+	 * Removed from the tree from "drbd_process_done_ee" within the
+	 * appropriate w.cb (e_end_block/e_end_resync_block) or from
+	 * _drbd_clear_done_ee.
+	 */
 
-	do_wake = is_syncer_req
-		? list_empty(&mdev->sync_ee)
-		: list_empty(&mdev->active_ee);
+	do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
 
-	if (test_bit(__EE_WAS_ERROR, &e->flags))
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
+		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 
-	if (is_syncer_req)
-		drbd_rs_complete_io(mdev, e_sector);
+	if (block_id == ID_SYNCER)
+		drbd_rs_complete_io(mdev, i.sector);
 
 	if (do_wake)
 		wake_up(&mdev->ee_wait);
 
 	if (do_al_complete_io)
-		drbd_al_complete_io(mdev, e_sector);
+		drbd_al_complete_io(mdev, &i);
 
-	wake_asender(mdev);
+	wake_asender(mdev->tconn);
 	put_ldev(mdev);
 }
 
 /* writes on behalf of the partner, or resync writes,
  * "submitted" by the receiver.
  */
-void drbd_endio_sec(struct bio *bio, int error)
+void drbd_peer_request_endio(struct bio *bio, int error)
 {
-	struct drbd_epoch_entry *e = bio->bi_private;
-	struct drbd_conf *mdev = e->mdev;
+	struct drbd_peer_request *peer_req = bio->bi_private;
+	struct drbd_conf *mdev = peer_req->w.mdev;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 	int is_write = bio_data_dir(bio) == WRITE;
 
 	if (error && __ratelimit(&drbd_ratelimit_state))
 		dev_warn(DEV, "%s: error=%d s=%llus\n",
 				is_write ? "write" : "read", error,
-				(unsigned long long)e->sector);
+				(unsigned long long)peer_req->i.sector);
 	if (!error && !uptodate) {
 		if (__ratelimit(&drbd_ratelimit_state))
 			dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
 					is_write ? "write" : "read",
-					(unsigned long long)e->sector);
+					(unsigned long long)peer_req->i.sector);
 		/* strange behavior of some lower level drivers...
 		 * fail the request by clearing the uptodate flag,
 		 * but do not return any error?! */
@@ -196,24 +189,24 @@
 	}
 
 	if (error)
-		set_bit(__EE_WAS_ERROR, &e->flags);
+		set_bit(__EE_WAS_ERROR, &peer_req->flags);
 
 	bio_put(bio); /* no need for the bio anymore */
-	if (atomic_dec_and_test(&e->pending_bios)) {
+	if (atomic_dec_and_test(&peer_req->pending_bios)) {
 		if (is_write)
-			drbd_endio_write_sec_final(e);
+			drbd_endio_write_sec_final(peer_req);
 		else
-			drbd_endio_read_sec_final(e);
+			drbd_endio_read_sec_final(peer_req);
 	}
 }
 
 /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
  */
-void drbd_endio_pri(struct bio *bio, int error)
+void drbd_request_endio(struct bio *bio, int error)
 {
 	unsigned long flags;
 	struct drbd_request *req = bio->bi_private;
-	struct drbd_conf *mdev = req->mdev;
+	struct drbd_conf *mdev = req->w.mdev;
 	struct bio_and_error m;
 	enum drbd_req_event what;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
@@ -227,53 +220,72 @@
 		error = -EIO;
 	}
 
+
+	/* If this request was aborted locally before,
+	 * but now was completed "successfully",
+	 * chances are that this caused arbitrary data corruption.
+	 *
+	 * "aborting" requests, or force-detaching the disk, is intended for
+	 * completely blocked/hung local backing devices which do no longer
+	 * complete requests at all, not even do error completions.  In this
+	 * situation, usually a hard-reset and failover is the only way out.
+	 *
+	 * By "aborting", basically faking a local error-completion,
+	 * we allow for a more graceful swichover by cleanly migrating services.
+	 * Still the affected node has to be rebooted "soon".
+	 *
+	 * By completing these requests, we allow the upper layers to re-use
+	 * the associated data pages.
+	 *
+	 * If later the local backing device "recovers", and now DMAs some data
+	 * from disk into the original request pages, in the best case it will
+	 * just put random data into unused pages; but typically it will corrupt
+	 * meanwhile completely unrelated data, causing all sorts of damage.
+	 *
+	 * Which means delayed successful completion,
+	 * especially for READ requests,
+	 * is a reason to panic().
+	 *
+	 * We assume that a delayed *error* completion is OK,
+	 * though we still will complain noisily about it.
+	 */
+	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
+
+		if (!error)
+			panic("possible random memory corruption caused by delayed completion of aborted local request\n");
+	}
+
 	/* to avoid recursion in __req_mod */
 	if (unlikely(error)) {
 		what = (bio_data_dir(bio) == WRITE)
-			? write_completed_with_error
+			? WRITE_COMPLETED_WITH_ERROR
 			: (bio_rw(bio) == READ)
-			  ? read_completed_with_error
-			  : read_ahead_completed_with_error;
+			  ? READ_COMPLETED_WITH_ERROR
+			  : READ_AHEAD_COMPLETED_WITH_ERROR;
 	} else
-		what = completed_ok;
+		what = COMPLETED_OK;
 
 	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(error);
 
 	/* not req_mod(), we need irqsave here! */
-	spin_lock_irqsave(&mdev->req_lock, flags);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
 	__req_mod(req, what, &m);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 	put_ldev(mdev);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
 }
 
-int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-	struct drbd_request *req = container_of(w, struct drbd_request, w);
-
-	/* We should not detach for read io-error,
-	 * but try to WRITE the P_DATA_REPLY to the failed location,
-	 * to give the disk the chance to relocate that block */
-
-	spin_lock_irq(&mdev->req_lock);
-	if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
-		_req_mod(req, read_retry_remote_canceled);
-		spin_unlock_irq(&mdev->req_lock);
-		return 1;
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
-	return w_send_read_req(mdev, w, 0);
-}
-
-void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest)
+void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
+		  struct drbd_peer_request *peer_req, void *digest)
 {
 	struct hash_desc desc;
 	struct scatterlist sg;
-	struct page *page = e->pages;
+	struct page *page = peer_req->pages;
 	struct page *tmp;
 	unsigned len;
 
@@ -290,7 +302,7 @@
 		page = tmp;
 	}
 	/* and now the last, possibly only partially used page */
-	len = e->size & (PAGE_SIZE - 1);
+	len = peer_req->i.size & (PAGE_SIZE - 1);
 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
 	crypto_hash_update(&desc, &sg, sg.length);
 	crypto_hash_final(&desc, digest);
@@ -316,59 +328,58 @@
 	crypto_hash_final(&desc, digest);
 }
 
-/* TODO merge common code with w_e_end_ov_req */
-int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+/* MAYBE merge common code with w_e_end_ov_req */
+static int w_e_send_csum(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
 	int digest_size;
 	void *digest;
-	int ok = 1;
-
-	D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
+	int err = 0;
 
 	if (unlikely(cancel))
 		goto out;
 
-	if (likely((e->flags & EE_WAS_ERROR) != 0))
+	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
 		goto out;
 
-	digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+	digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
 	digest = kmalloc(digest_size, GFP_NOIO);
 	if (digest) {
-		sector_t sector = e->sector;
-		unsigned int size = e->size;
-		drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
-		/* Free e and pages before send.
+		sector_t sector = peer_req->i.sector;
+		unsigned int size = peer_req->i.size;
+		drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
+		/* Free peer_req and pages before send.
 		 * In case we block on congestion, we could otherwise run into
 		 * some distributed deadlock, if the other side blocks on
 		 * congestion as well, because our receiver blocks in
-		 * drbd_pp_alloc due to pp_in_use > max_buffers. */
-		drbd_free_ee(mdev, e);
-		e = NULL;
+		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
+		drbd_free_peer_req(mdev, peer_req);
+		peer_req = NULL;
 		inc_rs_pending(mdev);
-		ok = drbd_send_drequest_csum(mdev, sector, size,
-					     digest, digest_size,
-					     P_CSUM_RS_REQUEST);
+		err = drbd_send_drequest_csum(mdev, sector, size,
+					      digest, digest_size,
+					      P_CSUM_RS_REQUEST);
 		kfree(digest);
 	} else {
 		dev_err(DEV, "kmalloc() of digest failed.\n");
-		ok = 0;
+		err = -ENOMEM;
 	}
 
 out:
-	if (e)
-		drbd_free_ee(mdev, e);
+	if (peer_req)
+		drbd_free_peer_req(mdev, peer_req);
 
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
-	return ok;
+	return err;
 }
 
 #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
 
 static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
 {
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 
 	if (!get_ldev(mdev))
 		return -EIO;
@@ -378,45 +389,47 @@
 
 	/* GFP_TRY, because if there is no memory available right now, this may
 	 * be rescheduled for later. It is "only" background resync, after all. */
-	e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
-	if (!e)
+	peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
+				       size, GFP_TRY);
+	if (!peer_req)
 		goto defer;
 
-	e->w.cb = w_e_send_csum;
-	spin_lock_irq(&mdev->req_lock);
-	list_add(&e->w.list, &mdev->read_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	peer_req->w.cb = w_e_send_csum;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_add(&peer_req->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	atomic_add(size >> 9, &mdev->rs_sect_ev);
-	if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
+	if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
 		return 0;
 
 	/* If it failed because of ENOMEM, retry should help.  If it failed
 	 * because bio_add_page failed (probably broken lower level driver),
 	 * retry may or may not help.
 	 * If it does not, you may need to force disconnect. */
-	spin_lock_irq(&mdev->req_lock);
-	list_del(&e->w.list);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	drbd_free_ee(mdev, e);
+	drbd_free_peer_req(mdev, peer_req);
 defer:
 	put_ldev(mdev);
 	return -EAGAIN;
 }
 
-int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_resync_timer(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	switch (mdev->state.conn) {
 	case C_VERIFY_S:
-		w_make_ov_request(mdev, w, cancel);
+		w_make_ov_request(w, cancel);
 		break;
 	case C_SYNC_TARGET:
-		w_make_resync_request(mdev, w, cancel);
+		w_make_resync_request(w, cancel);
 		break;
 	}
 
-	return 1;
+	return 0;
 }
 
 void resync_timer_fn(unsigned long data)
@@ -424,7 +437,7 @@
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
 
 	if (list_empty(&mdev->resync_work.list))
-		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
+		drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work);
 }
 
 static void fifo_set(struct fifo_buffer *fb, int value)
@@ -456,8 +469,24 @@
 		fb->values[i] += value;
 }
 
+struct fifo_buffer *fifo_alloc(int fifo_size)
+{
+	struct fifo_buffer *fb;
+
+	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
+	if (!fb)
+		return NULL;
+
+	fb->head_index = 0;
+	fb->size = fifo_size;
+	fb->total = 0;
+
+	return fb;
+}
+
 static int drbd_rs_controller(struct drbd_conf *mdev)
 {
+	struct disk_conf *dc;
 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
 	unsigned int want;     /* The number of sectors we want in the proxy */
 	int req_sect; /* Number of sectors to request in this turn */
@@ -466,38 +495,39 @@
 	int steps; /* Number of time steps to plan ahead */
 	int curr_corr;
 	int max_sect;
+	struct fifo_buffer *plan;
 
 	sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
 	mdev->rs_in_flight -= sect_in;
 
-	spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
+	dc = rcu_dereference(mdev->ldev->disk_conf);
+	plan = rcu_dereference(mdev->rs_plan_s);
 
-	steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
+	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
 
 	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
-		want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
+		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
 	} else { /* normal path */
-		want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
-			sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
+		want = dc->c_fill_target ? dc->c_fill_target :
+			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
 	}
 
-	correction = want - mdev->rs_in_flight - mdev->rs_planed;
+	correction = want - mdev->rs_in_flight - plan->total;
 
 	/* Plan ahead */
 	cps = correction / steps;
-	fifo_add_val(&mdev->rs_plan_s, cps);
-	mdev->rs_planed += cps * steps;
+	fifo_add_val(plan, cps);
+	plan->total += cps * steps;
 
 	/* What we do in this step */
-	curr_corr = fifo_push(&mdev->rs_plan_s, 0);
-	spin_unlock(&mdev->peer_seq_lock);
-	mdev->rs_planed -= curr_corr;
+	curr_corr = fifo_push(plan, 0);
+	plan->total -= curr_corr;
 
 	req_sect = sect_in + curr_corr;
 	if (req_sect < 0)
 		req_sect = 0;
 
-	max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
+	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
 	if (req_sect > max_sect)
 		req_sect = max_sect;
 
@@ -513,22 +543,25 @@
 static int drbd_rs_number_requests(struct drbd_conf *mdev)
 {
 	int number;
-	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+
+	rcu_read_lock();
+	if (rcu_dereference(mdev->rs_plan_s)->size) {
 		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
 		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
 	} else {
-		mdev->c_sync_rate = mdev->sync_conf.rate;
+		mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
 		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
 	}
+	rcu_read_unlock();
 
 	/* ignore the amount of pending requests, the resync controller should
 	 * throttle down to incoming reply rate soon enough anyways. */
 	return number;
 }
 
-static int w_make_resync_request(struct drbd_conf *mdev,
-				 struct drbd_work *w, int cancel)
+int w_make_resync_request(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	unsigned long bit;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
@@ -538,12 +571,12 @@
 	int i = 0;
 
 	if (unlikely(cancel))
-		return 1;
+		return 0;
 
 	if (mdev->rs_total == 0) {
 		/* empty resync? */
 		drbd_resync_finished(mdev);
-		return 1;
+		return 0;
 	}
 
 	if (!get_ldev(mdev)) {
@@ -552,7 +585,7 @@
 		   to continue resync with a broken disk makes no sense at
 		   all */
 		dev_err(DEV, "Disk broke down during resync!\n");
-		return 1;
+		return 0;
 	}
 
 	max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
@@ -562,15 +595,15 @@
 
 	for (i = 0; i < number; i++) {
 		/* Stop generating RS requests, when half of the send buffer is filled */
-		mutex_lock(&mdev->data.mutex);
-		if (mdev->data.socket) {
-			queued = mdev->data.socket->sk->sk_wmem_queued;
-			sndbuf = mdev->data.socket->sk->sk_sndbuf;
+		mutex_lock(&mdev->tconn->data.mutex);
+		if (mdev->tconn->data.socket) {
+			queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
+			sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
 		} else {
 			queued = 1;
 			sndbuf = 0;
 		}
-		mutex_unlock(&mdev->data.mutex);
+		mutex_unlock(&mdev->tconn->data.mutex);
 		if (queued > sndbuf / 2)
 			goto requeue;
 
@@ -581,7 +614,7 @@
 		if (bit == DRBD_END_OF_BITMAP) {
 			mdev->bm_resync_fo = drbd_bm_bits(mdev);
 			put_ldev(mdev);
-			return 1;
+			return 0;
 		}
 
 		sector = BM_BIT_TO_SECT(bit);
@@ -640,11 +673,11 @@
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
+		if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
 			switch (read_for_csum(mdev, sector, size)) {
 			case -EIO: /* Disk failure */
 				put_ldev(mdev);
-				return 0;
+				return -EIO;
 			case -EAGAIN: /* allocation failed, or ldev busy */
 				drbd_rs_complete_io(mdev, sector);
 				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
@@ -657,13 +690,16 @@
 				BUG();
 			}
 		} else {
+			int err;
+
 			inc_rs_pending(mdev);
-			if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
-					       sector, size, ID_SYNCER)) {
+			err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
+						 sector, size, ID_SYNCER);
+			if (err) {
 				dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
 				dec_rs_pending(mdev);
 				put_ldev(mdev);
-				return 0;
+				return err;
 			}
 		}
 	}
@@ -676,21 +712,23 @@
 		 * until then resync "work" is "inactive" ...
 		 */
 		put_ldev(mdev);
-		return 1;
+		return 0;
 	}
 
  requeue:
 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	put_ldev(mdev);
-	return 1;
+	return 0;
 }
 
-static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+static int w_make_ov_request(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	int number, i, size;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel))
 		return 1;
@@ -699,9 +737,17 @@
 
 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
-		if (sector >= capacity) {
+		if (sector >= capacity)
 			return 1;
-		}
+
+		/* We check for "finished" only in the reply path:
+		 * w_e_end_ov_reply().
+		 * We need to send at least one request out. */
+		stop_sector_reached = i > 0
+			&& verify_can_do_stop_sector(mdev)
+			&& sector >= mdev->ov_stop_sector;
+		if (stop_sector_reached)
+			break;
 
 		size = BM_BLOCK_SIZE;
 
@@ -715,7 +761,7 @@
 			size = (capacity-sector)<<9;
 
 		inc_rs_pending(mdev);
-		if (!drbd_send_ov_request(mdev, sector, size)) {
+		if (drbd_send_ov_request(mdev, sector, size)) {
 			dec_rs_pending(mdev);
 			return 0;
 		}
@@ -725,56 +771,39 @@
 
  requeue:
 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
-	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	if (i == 0 || !stop_sector_reached)
+		mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	return 1;
 }
 
-
-void start_resync_timer_fn(unsigned long data)
+int w_ov_finished(struct drbd_work *w, int cancel)
 {
-	struct drbd_conf *mdev = (struct drbd_conf *) data;
-
-	drbd_queue_work(&mdev->data.work, &mdev->start_resync_work);
-}
-
-int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-	if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
-		dev_warn(DEV, "w_start_resync later...\n");
-		mdev->start_resync_timer.expires = jiffies + HZ/10;
-		add_timer(&mdev->start_resync_timer);
-		return 1;
-	}
-
-	drbd_start_resync(mdev, C_SYNC_SOURCE);
-	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
-	return 1;
-}
-
-int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
+	struct drbd_conf *mdev = w->mdev;
 	kfree(w);
-	ov_oos_print(mdev);
+	ov_out_of_sync_print(mdev);
 	drbd_resync_finished(mdev);
 
-	return 1;
+	return 0;
 }
 
-static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+static int w_resync_finished(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	kfree(w);
 
 	drbd_resync_finished(mdev);
 
-	return 1;
+	return 0;
 }
 
 static void ping_peer(struct drbd_conf *mdev)
 {
-	clear_bit(GOT_PING_ACK, &mdev->flags);
-	request_ping(mdev);
-	wait_event(mdev->misc_wait,
-		   test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
+	struct drbd_tconn *tconn = mdev->tconn;
+
+	clear_bit(GOT_PING_ACK, &tconn->flags);
+	request_ping(tconn);
+	wait_event(tconn->ping_wait,
+		   test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
 }
 
 int drbd_resync_finished(struct drbd_conf *mdev)
@@ -799,7 +828,8 @@
 		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
 		if (w) {
 			w->cb = w_resync_finished;
-			drbd_queue_work(&mdev->data.work, w);
+			w->mdev = mdev;
+			drbd_queue_work(&mdev->tconn->sender_work, w);
 			return 1;
 		}
 		dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
@@ -808,7 +838,12 @@
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
 	if (dt <= 0)
 		dt = 1;
+	
 	db = mdev->rs_total;
+	/* adjust for verify start and stop sectors, respective reached position */
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		db -= mdev->ov_left;
+
 	dbdt = Bit2KB(db/dt);
 	mdev->rs_paused /= HZ;
 
@@ -817,8 +852,8 @@
 
 	ping_peer(mdev);
 
-	spin_lock_irq(&mdev->req_lock);
-	os = mdev->state;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	os = drbd_read_state(mdev);
 
 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
 
@@ -831,7 +866,7 @@
 	ns.conn = C_CONNECTED;
 
 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-	     verify_done ? "Online verify " : "Resync",
+	     verify_done ? "Online verify" : "Resync",
 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
 	n_oos = drbd_bm_total_weight(mdev);
@@ -848,7 +883,7 @@
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (mdev->csums_tfm && mdev->rs_total) {
+		if (mdev->tconn->csums_tfm && mdev->rs_total) {
 			const unsigned long s = mdev->rs_same_csum;
 			const unsigned long t = mdev->rs_total;
 			const int ratio =
@@ -906,13 +941,15 @@
 
 	_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
 out_unlock:
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 	put_ldev(mdev);
 out:
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
-	if (verify_done)
+
+	/* reset start sector, if we reached end of device */
+	if (verify_done && mdev->ov_left == 0)
 		mdev->ov_start_sector = 0;
 
 	drbd_md_sync(mdev);
@@ -924,19 +961,19 @@
 }
 
 /* helper */
-static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
 {
-	if (drbd_ee_has_active_page(e)) {
+	if (drbd_peer_req_has_active_page(peer_req)) {
 		/* This might happen if sendpage() has not finished */
-		int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT;
+		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
 		atomic_add(i, &mdev->pp_in_use_by_net);
 		atomic_sub(i, &mdev->pp_in_use);
-		spin_lock_irq(&mdev->req_lock);
-		list_add_tail(&e->w.list, &mdev->net_ee);
-		spin_unlock_irq(&mdev->req_lock);
+		spin_lock_irq(&mdev->tconn->req_lock);
+		list_add_tail(&peer_req->w.list, &mdev->net_ee);
+		spin_unlock_irq(&mdev->tconn->req_lock);
 		wake_up(&drbd_pp_wait);
 	} else
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 }
 
 /**
@@ -945,174 +982,177 @@
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_data_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
-	int ok;
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
-	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
-		ok = drbd_send_block(mdev, P_DATA_REPLY, e);
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
+		err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
 	} else {
 		if (__ratelimit(&drbd_ratelimit_state))
 			dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
-			    (unsigned long long)e->sector);
+			    (unsigned long long)peer_req->i.sector);
 
-		ok = drbd_send_ack(mdev, P_NEG_DREPLY, e);
+		err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
 	}
 
 	dec_unacked(mdev);
 
-	move_to_net_ee_or_free(mdev, e);
+	move_to_net_ee_or_free(mdev, peer_req);
 
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_block() failed\n");
-	return ok;
+	return err;
 }
 
 /**
- * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
+ * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
  * @mdev:	DRBD device.
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
-	int ok;
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
 	if (get_ldev_if_state(mdev, D_FAILED)) {
-		drbd_rs_complete_io(mdev, e->sector);
+		drbd_rs_complete_io(mdev, peer_req->i.sector);
 		put_ldev(mdev);
 	}
 
 	if (mdev->state.conn == C_AHEAD) {
-		ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
-	} else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+		err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
+	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 		if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
 			inc_rs_pending(mdev);
-			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
 		} else {
 			if (__ratelimit(&drbd_ratelimit_state))
 				dev_err(DEV, "Not sending RSDataReply, "
 				    "partner DISKLESS!\n");
-			ok = 1;
+			err = 0;
 		}
 	} else {
 		if (__ratelimit(&drbd_ratelimit_state))
 			dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
-			    (unsigned long long)e->sector);
+			    (unsigned long long)peer_req->i.sector);
 
-		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
 
 		/* update resync data with failure */
-		drbd_rs_failed_io(mdev, e->sector, e->size);
+		drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
 	}
 
 	dec_unacked(mdev);
 
-	move_to_net_ee_or_free(mdev, e);
+	move_to_net_ee_or_free(mdev, peer_req);
 
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_block() failed\n");
-	return ok;
+	return err;
 }
 
-int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
 	struct digest_info *di;
 	int digest_size;
 	void *digest = NULL;
-	int ok, eq = 0;
+	int err, eq = 0;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
 	if (get_ldev(mdev)) {
-		drbd_rs_complete_io(mdev, e->sector);
+		drbd_rs_complete_io(mdev, peer_req->i.sector);
 		put_ldev(mdev);
 	}
 
-	di = e->digest;
+	di = peer_req->digest;
 
-	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 		/* quick hack to try to avoid a race against reconfiguration.
 		 * a real fix would be much more involved,
 		 * introducing more locking mechanisms */
-		if (mdev->csums_tfm) {
-			digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+		if (mdev->tconn->csums_tfm) {
+			digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
 			D_ASSERT(digest_size == di->digest_size);
 			digest = kmalloc(digest_size, GFP_NOIO);
 		}
 		if (digest) {
-			drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
+			drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
 			eq = !memcmp(digest, di->digest, digest_size);
 			kfree(digest);
 		}
 
 		if (eq) {
-			drbd_set_in_sync(mdev, e->sector, e->size);
+			drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
 			/* rs_same_csums unit is BM_BLOCK_SIZE */
-			mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT;
-			ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
+			mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
+			err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
 		} else {
 			inc_rs_pending(mdev);
-			e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
-			e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */
+			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
+			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
 			kfree(di);
-			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
 		}
 	} else {
-		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
 		if (__ratelimit(&drbd_ratelimit_state))
 			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
 	}
 
 	dec_unacked(mdev);
-	move_to_net_ee_or_free(mdev, e);
+	move_to_net_ee_or_free(mdev, peer_req);
 
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_block/ack() failed\n");
-	return ok;
+	return err;
 }
 
-/* TODO merge common code with w_e_send_csum */
-int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_ov_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
-	sector_t sector = e->sector;
-	unsigned int size = e->size;
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	sector_t sector = peer_req->i.sector;
+	unsigned int size = peer_req->i.size;
 	int digest_size;
 	void *digest;
-	int ok = 1;
+	int err = 0;
 
 	if (unlikely(cancel))
 		goto out;
 
-	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 	digest = kmalloc(digest_size, GFP_NOIO);
 	if (!digest) {
-		ok = 0;	/* terminate the connection in case the allocation failed */
+		err = 1;	/* terminate the connection in case the allocation failed */
 		goto out;
 	}
 
-	if (likely(!(e->flags & EE_WAS_ERROR)))
-		drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
+		drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
 	else
 		memset(digest, 0, digest_size);
 
@@ -1120,25 +1160,23 @@
 	 * In case we block on congestion, we could otherwise run into
 	 * some distributed deadlock, if the other side blocks on
 	 * congestion as well, because our receiver blocks in
-	 * drbd_pp_alloc due to pp_in_use > max_buffers. */
-	drbd_free_ee(mdev, e);
-	e = NULL;
+	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
+	drbd_free_peer_req(mdev, peer_req);
+	peer_req = NULL;
 	inc_rs_pending(mdev);
-	ok = drbd_send_drequest_csum(mdev, sector, size,
-				     digest, digest_size,
-				     P_OV_REPLY);
-	if (!ok)
+	err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
+	if (err)
 		dec_rs_pending(mdev);
 	kfree(digest);
 
 out:
-	if (e)
-		drbd_free_ee(mdev, e);
+	if (peer_req)
+		drbd_free_peer_req(mdev, peer_req);
 	dec_unacked(mdev);
-	return ok;
+	return err;
 }
 
-void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
+void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
 {
 	if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
 		mdev->ov_last_oos_size += size>>9;
@@ -1149,36 +1187,38 @@
 	drbd_set_out_of_sync(mdev, sector, size);
 }
 
-int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
 	struct digest_info *di;
 	void *digest;
-	sector_t sector = e->sector;
-	unsigned int size = e->size;
+	sector_t sector = peer_req->i.sector;
+	unsigned int size = peer_req->i.size;
 	int digest_size;
-	int ok, eq = 0;
+	int err, eq = 0;
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
 	 * the resync lru has been cleaned up already */
 	if (get_ldev(mdev)) {
-		drbd_rs_complete_io(mdev, e->sector);
+		drbd_rs_complete_io(mdev, peer_req->i.sector);
 		put_ldev(mdev);
 	}
 
-	di = e->digest;
+	di = peer_req->digest;
 
-	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
-		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
+		digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 		digest = kmalloc(digest_size, GFP_NOIO);
 		if (digest) {
-			drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+			drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
 
 			D_ASSERT(digest_size == di->digest_size);
 			eq = !memcmp(digest, di->digest, digest_size);
@@ -1186,19 +1226,19 @@
 		}
 	}
 
-		/* Free e and pages before send.
-		 * In case we block on congestion, we could otherwise run into
-		 * some distributed deadlock, if the other side blocks on
-		 * congestion as well, because our receiver blocks in
-		 * drbd_pp_alloc due to pp_in_use > max_buffers. */
-	drbd_free_ee(mdev, e);
+	/* Free peer_req and pages before send.
+	 * In case we block on congestion, we could otherwise run into
+	 * some distributed deadlock, if the other side blocks on
+	 * congestion as well, because our receiver blocks in
+	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
+	drbd_free_peer_req(mdev, peer_req);
 	if (!eq)
-		drbd_ov_oos_found(mdev, sector, size);
+		drbd_ov_out_of_sync_found(mdev, sector, size);
 	else
-		ov_oos_print(mdev);
+		ov_out_of_sync_print(mdev);
 
-	ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
-			      eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
+	err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
+			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
 
 	dec_unacked(mdev);
 
@@ -1208,73 +1248,102 @@
 	if ((mdev->ov_left & 0x200) == 0x200)
 		drbd_advance_rs_marks(mdev, mdev->ov_left);
 
-	if (mdev->ov_left == 0) {
-		ov_oos_print(mdev);
+	stop_sector_reached = verify_can_do_stop_sector(mdev) &&
+		(sector + (size>>9)) >= mdev->ov_stop_sector;
+
+	if (mdev->ov_left == 0 || stop_sector_reached) {
+		ov_out_of_sync_print(mdev);
 		drbd_resync_finished(mdev);
 	}
 
-	return ok;
+	return err;
 }
 
-int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_prev_work_done(struct drbd_work *w, int cancel)
 {
 	struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
+
 	complete(&b->done);
-	return 1;
+	return 0;
 }
 
-int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+/* FIXME
+ * We need to track the number of pending barrier acks,
+ * and to be able to wait for them.
+ * See also comment in drbd_adm_attach before drbd_suspend_io.
+ */
+int drbd_send_barrier(struct drbd_tconn *tconn)
 {
-	struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
-	struct p_barrier *p = &mdev->data.sbuf.barrier;
-	int ok = 1;
+	struct p_barrier *p;
+	struct drbd_socket *sock;
 
-	/* really avoid racing with tl_clear.  w.cb may have been referenced
-	 * just before it was reassigned and re-queued, so double check that.
-	 * actually, this race was harmless, since we only try to send the
-	 * barrier packet here, and otherwise do nothing with the object.
-	 * but compare with the head of w_clear_epoch */
-	spin_lock_irq(&mdev->req_lock);
-	if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
-		cancel = 1;
-	spin_unlock_irq(&mdev->req_lock);
+	sock = &tconn->data;
+	p = conn_prepare_command(tconn, sock);
+	if (!p)
+		return -EIO;
+	p->barrier = tconn->send.current_epoch_nr;
+	p->pad = 0;
+	tconn->send.current_epoch_writes = 0;
+
+	return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0);
+}
+
+int w_send_write_hint(struct drbd_work *w, int cancel)
+{
+	struct drbd_conf *mdev = w->mdev;
+	struct drbd_socket *sock;
+
 	if (cancel)
-		return 1;
-
-	if (!drbd_get_data_sock(mdev))
 		return 0;
-	p->barrier = b->br_number;
-	/* inc_ap_pending was done where this was queued.
-	 * dec_ap_pending will be done in got_BarrierAck
-	 * or (on connection loss) in w_clear_epoch.  */
-	ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
-				(struct p_header80 *)p, sizeof(*p), 0);
-	drbd_put_data_sock(mdev);
-
-	return ok;
+	sock = &mdev->tconn->data;
+	if (!drbd_prepare_command(mdev, sock))
+		return -EIO;
+	return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
 }
 
-int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch)
 {
-	if (cancel)
-		return 1;
-	return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
+	if (!tconn->send.seen_any_write_yet) {
+		tconn->send.seen_any_write_yet = true;
+		tconn->send.current_epoch_nr = epoch;
+		tconn->send.current_epoch_writes = 0;
+	}
 }
 
-int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch)
+{
+	/* re-init if first write on this connection */
+	if (!tconn->send.seen_any_write_yet)
+		return;
+	if (tconn->send.current_epoch_nr != epoch) {
+		if (tconn->send.current_epoch_writes)
+			drbd_send_barrier(tconn);
+		tconn->send.current_epoch_nr = epoch;
+	}
+}
+
+int w_send_out_of_sync(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
-	int ok;
+	struct drbd_conf *mdev = w->mdev;
+	struct drbd_tconn *tconn = mdev->tconn;
+	int err;
 
 	if (unlikely(cancel)) {
-		req_mod(req, send_canceled);
-		return 1;
+		req_mod(req, SEND_CANCELED);
+		return 0;
 	}
 
-	ok = drbd_send_oos(mdev, req);
-	req_mod(req, oos_handed_to_network);
+	/* this time, no tconn->send.current_epoch_writes++;
+	 * If it was sent, it was the closing barrier for the last
+	 * replicated epoch, before we went into AHEAD mode.
+	 * No more barriers will be sent, until we leave AHEAD mode again. */
+	maybe_send_barrier(tconn, req->epoch);
 
-	return ok;
+	err = drbd_send_out_of_sync(mdev, req);
+	req_mod(req, OOS_HANDED_TO_NETWORK);
+
+	return err;
 }
 
 /**
@@ -1283,20 +1352,26 @@
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_send_dblock(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
-	int ok;
+	struct drbd_conf *mdev = w->mdev;
+	struct drbd_tconn *tconn = mdev->tconn;
+	int err;
 
 	if (unlikely(cancel)) {
-		req_mod(req, send_canceled);
-		return 1;
+		req_mod(req, SEND_CANCELED);
+		return 0;
 	}
 
-	ok = drbd_send_dblock(mdev, req);
-	req_mod(req, ok ? handed_over_to_network : send_failed);
+	re_init_if_first_write(tconn, req->epoch);
+	maybe_send_barrier(tconn, req->epoch);
+	tconn->send.current_epoch_writes++;
 
-	return ok;
+	err = drbd_send_dblock(mdev, req);
+	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+
+	return err;
 }
 
 /**
@@ -1305,57 +1380,61 @@
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_send_read_req(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
-	int ok;
+	struct drbd_conf *mdev = w->mdev;
+	struct drbd_tconn *tconn = mdev->tconn;
+	int err;
 
 	if (unlikely(cancel)) {
-		req_mod(req, send_canceled);
-		return 1;
+		req_mod(req, SEND_CANCELED);
+		return 0;
 	}
 
-	ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size,
-				(unsigned long)req);
+	/* Even read requests may close a write epoch,
+	 * if there was any yet. */
+	maybe_send_barrier(tconn, req->epoch);
 
-	if (!ok) {
-		/* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send();
-		 * so this is probably redundant */
-		if (mdev->state.conn >= C_CONNECTED)
-			drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
-	}
-	req_mod(req, ok ? handed_over_to_network : send_failed);
+	err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
+				 (unsigned long)req);
 
-	return ok;
+	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+
+	return err;
 }
 
-int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_restart_disk_io(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	struct drbd_conf *mdev = w->mdev;
 
 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
-		drbd_al_begin_io(mdev, req->sector);
-	/* Calling drbd_al_begin_io() out of the worker might deadlocks
-	   theoretically. Practically it can not deadlock, since this is
-	   only used when unfreezing IOs. All the extents of the requests
-	   that made it into the TL are already active */
+		drbd_al_begin_io(mdev, &req->i);
 
 	drbd_req_make_private_bio(req, req->master_bio);
 	req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
 	generic_make_request(req->private_bio);
 
-	return 1;
+	return 0;
 }
 
 static int _drbd_may_sync_now(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev = mdev;
+	int resync_after;
 
 	while (1) {
-		if (odev->sync_conf.after == -1)
+		if (!odev->ldev)
 			return 1;
-		odev = minor_to_mdev(odev->sync_conf.after);
-		ERR_IF(!odev) return 1;
+		rcu_read_lock();
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		rcu_read_unlock();
+		if (resync_after == -1)
+			return 1;
+		odev = minor_to_mdev(resync_after);
+		if (!expect(odev))
+			return 1;
 		if ((odev->state.conn >= C_SYNC_SOURCE &&
 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
 		    odev->state.aftr_isp || odev->state.peer_isp ||
@@ -1375,16 +1454,15 @@
 	struct drbd_conf *odev;
 	int i, rv = 0;
 
-	for (i = 0; i < minor_count; i++) {
-		odev = minor_to_mdev(i);
-		if (!odev)
-			continue;
+	rcu_read_lock();
+	idr_for_each_entry(&minors, odev, i) {
 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
 			continue;
 		if (!_drbd_may_sync_now(odev))
 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
 			       != SS_NOTHING_TO_DO);
 	}
+	rcu_read_unlock();
 
 	return rv;
 }
@@ -1400,10 +1478,8 @@
 	struct drbd_conf *odev;
 	int i, rv = 0;
 
-	for (i = 0; i < minor_count; i++) {
-		odev = minor_to_mdev(i);
-		if (!odev)
-			continue;
+	rcu_read_lock();
+	idr_for_each_entry(&minors, odev, i) {
 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
 			continue;
 		if (odev->state.aftr_isp) {
@@ -1413,6 +1489,7 @@
 				       != SS_NOTHING_TO_DO) ;
 		}
 	}
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -1430,57 +1507,86 @@
 	write_unlock_irq(&global_state_lock);
 }
 
-static int sync_after_error(struct drbd_conf *mdev, int o_minor)
+/* caller must hold global_state_lock */
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
 {
 	struct drbd_conf *odev;
+	int resync_after;
 
 	if (o_minor == -1)
 		return NO_ERROR;
 	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
-		return ERR_SYNC_AFTER;
+		return ERR_RESYNC_AFTER;
 
 	/* check for loops */
 	odev = minor_to_mdev(o_minor);
 	while (1) {
 		if (odev == mdev)
-			return ERR_SYNC_AFTER_CYCLE;
+			return ERR_RESYNC_AFTER_CYCLE;
 
+		rcu_read_lock();
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		rcu_read_unlock();
 		/* dependency chain ends here, no cycles. */
-		if (odev->sync_conf.after == -1)
+		if (resync_after == -1)
 			return NO_ERROR;
 
 		/* follow the dependency chain */
-		odev = minor_to_mdev(odev->sync_conf.after);
+		odev = minor_to_mdev(resync_after);
 	}
 }
 
-int drbd_alter_sa(struct drbd_conf *mdev, int na)
+/* caller must hold global_state_lock */
+void drbd_resync_after_changed(struct drbd_conf *mdev)
 {
 	int changes;
-	int retcode;
 
-	write_lock_irq(&global_state_lock);
-	retcode = sync_after_error(mdev, na);
-	if (retcode == NO_ERROR) {
-		mdev->sync_conf.after = na;
-		do {
-			changes  = _drbd_pause_after(mdev);
-			changes |= _drbd_resume_next(mdev);
-		} while (changes);
-	}
-	write_unlock_irq(&global_state_lock);
-	return retcode;
+	do {
+		changes  = _drbd_pause_after(mdev);
+		changes |= _drbd_resume_next(mdev);
+	} while (changes);
 }
 
 void drbd_rs_controller_reset(struct drbd_conf *mdev)
 {
+	struct fifo_buffer *plan;
+
 	atomic_set(&mdev->rs_sect_in, 0);
 	atomic_set(&mdev->rs_sect_ev, 0);
 	mdev->rs_in_flight = 0;
-	mdev->rs_planed = 0;
-	spin_lock(&mdev->peer_seq_lock);
-	fifo_set(&mdev->rs_plan_s, 0);
-	spin_unlock(&mdev->peer_seq_lock);
+
+	/* Updating the RCU protected object in place is necessary since
+	   this function gets called from atomic context.
+	   It is valid since all other updates also lead to an completely
+	   empty fifo */
+	rcu_read_lock();
+	plan = rcu_dereference(mdev->rs_plan_s);
+	plan->total = 0;
+	fifo_set(plan, 0);
+	rcu_read_unlock();
+}
+
+void start_resync_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+	drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work);
+}
+
+int w_start_resync(struct drbd_work *w, int cancel)
+{
+	struct drbd_conf *mdev = w->mdev;
+
+	if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
+		dev_warn(DEV, "w_start_resync later...\n");
+		mdev->start_resync_timer.expires = jiffies + HZ/10;
+		add_timer(&mdev->start_resync_timer);
+		return 0;
+	}
+
+	drbd_start_resync(mdev, C_SYNC_SOURCE);
+	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
+	return 0;
 }
 
 /**
@@ -1501,43 +1607,58 @@
 		return;
 	}
 
-	if (side == C_SYNC_TARGET) {
-		/* Since application IO was locked out during C_WF_BITMAP_T and
-		   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
-		   we check that we might make the data inconsistent. */
-		r = drbd_khelper(mdev, "before-resync-target");
-		r = (r >> 8) & 0xff;
-		if (r > 0) {
-			dev_info(DEV, "before-resync-target handler returned %d, "
-			     "dropping connection.\n", r);
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-			return;
-		}
-	} else /* C_SYNC_SOURCE */ {
-		r = drbd_khelper(mdev, "before-resync-source");
-		r = (r >> 8) & 0xff;
-		if (r > 0) {
-			if (r == 3) {
-				dev_info(DEV, "before-resync-source handler returned %d, "
-					 "ignoring. Old userland tools?", r);
-			} else {
-				dev_info(DEV, "before-resync-source handler returned %d, "
+	if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
+		if (side == C_SYNC_TARGET) {
+			/* Since application IO was locked out during C_WF_BITMAP_T and
+			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
+			   we check that we might make the data inconsistent. */
+			r = drbd_khelper(mdev, "before-resync-target");
+			r = (r >> 8) & 0xff;
+			if (r > 0) {
+				dev_info(DEV, "before-resync-target handler returned %d, "
 					 "dropping connection.\n", r);
-				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+				conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 				return;
 			}
+		} else /* C_SYNC_SOURCE */ {
+			r = drbd_khelper(mdev, "before-resync-source");
+			r = (r >> 8) & 0xff;
+			if (r > 0) {
+				if (r == 3) {
+					dev_info(DEV, "before-resync-source handler returned %d, "
+						 "ignoring. Old userland tools?", r);
+				} else {
+					dev_info(DEV, "before-resync-source handler returned %d, "
+						 "dropping connection.\n", r);
+					conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+					return;
+				}
+			}
 		}
 	}
 
-	drbd_state_lock(mdev);
+	if (current == mdev->tconn->worker.task) {
+		/* The worker should not sleep waiting for state_mutex,
+		   that can take long */
+		if (!mutex_trylock(mdev->state_mutex)) {
+			set_bit(B_RS_H_DONE, &mdev->flags);
+			mdev->start_resync_timer.expires = jiffies + HZ/5;
+			add_timer(&mdev->start_resync_timer);
+			return;
+		}
+	} else {
+		mutex_lock(mdev->state_mutex);
+	}
+	clear_bit(B_RS_H_DONE, &mdev->flags);
+
 	write_lock_irq(&global_state_lock);
 	if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
 		write_unlock_irq(&global_state_lock);
-		drbd_state_unlock(mdev);
+		mutex_unlock(mdev->state_mutex);
 		return;
 	}
 
-	ns.i = mdev->state.i;
+	ns = drbd_read_state(mdev);
 
 	ns.aftr_isp = !_drbd_may_sync_now(mdev);
 
@@ -1549,7 +1670,7 @@
 		ns.pdsk = D_INCONSISTENT;
 
 	r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	ns = mdev->state;
+	ns = drbd_read_state(mdev);
 
 	if (ns.conn < C_CONNECTED)
 		r = SS_UNKNOWN_ERROR;
@@ -1575,6 +1696,10 @@
 	write_unlock_irq(&global_state_lock);
 
 	if (r == SS_SUCCESS) {
+		/* reset rs_last_bcast when a resync or verify is started,
+		 * to deal with potential jiffies wrap. */
+		mdev->rs_last_bcast = jiffies - HZ;
+
 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
@@ -1589,10 +1714,10 @@
 		 * drbd_resync_finished from here in that case.
 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
 		 * and from after_state_ch otherwise. */
-		if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96)
+		if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
 			drbd_gen_and_send_sync_uuid(mdev);
 
-		if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
+		if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
 			/* This still has a race (about when exactly the peers
 			 * detect connection loss) that can lead to a full sync
 			 * on next handshake. In 8.3.9 we fixed this with explicit
@@ -1603,10 +1728,16 @@
 			 * detect connection loss, then waiting for a ping
 			 * response (implicit in drbd_resync_finished) reduces
 			 * the race considerably, but does not solve it. */
-			if (side == C_SYNC_SOURCE)
-				schedule_timeout_interruptible(
-					mdev->net_conf->ping_int * HZ +
-					mdev->net_conf->ping_timeo*HZ/9);
+			if (side == C_SYNC_SOURCE) {
+				struct net_conf *nc;
+				int timeo;
+
+				rcu_read_lock();
+				nc = rcu_dereference(mdev->tconn->net_conf);
+				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
+				rcu_read_unlock();
+				schedule_timeout_interruptible(timeo);
+			}
 			drbd_resync_finished(mdev);
 		}
 
@@ -1621,114 +1752,180 @@
 		drbd_md_sync(mdev);
 	}
 	put_ldev(mdev);
-	drbd_state_unlock(mdev);
+	mutex_unlock(mdev->state_mutex);
+}
+
+/* If the resource already closed the current epoch, but we did not
+ * (because we have not yet seen new requests), we should send the
+ * corresponding barrier now.  Must be checked within the same spinlock
+ * that is used to check for new requests. */
+bool need_to_send_barrier(struct drbd_tconn *connection)
+{
+	if (!connection->send.seen_any_write_yet)
+		return false;
+
+	/* Skip barriers that do not contain any writes.
+	 * This may happen during AHEAD mode. */
+	if (!connection->send.current_epoch_writes)
+		return false;
+
+	/* ->req_lock is held when requests are queued on
+	 * connection->sender_work, and put into ->transfer_log.
+	 * It is also held when ->current_tle_nr is increased.
+	 * So either there are already new requests queued,
+	 * and corresponding barriers will be send there.
+	 * Or nothing new is queued yet, so the difference will be 1.
+	 */
+	if (atomic_read(&connection->current_tle_nr) !=
+	    connection->send.current_epoch_nr + 1)
+		return false;
+
+	return true;
+}
+
+bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
+{
+	spin_lock_irq(&queue->q_lock);
+	list_splice_init(&queue->q, work_list);
+	spin_unlock_irq(&queue->q_lock);
+	return !list_empty(work_list);
+}
+
+bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
+{
+	spin_lock_irq(&queue->q_lock);
+	if (!list_empty(&queue->q))
+		list_move(queue->q.next, work_list);
+	spin_unlock_irq(&queue->q_lock);
+	return !list_empty(work_list);
+}
+
+void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list)
+{
+	DEFINE_WAIT(wait);
+	struct net_conf *nc;
+	int uncork, cork;
+
+	dequeue_work_item(&connection->sender_work, work_list);
+	if (!list_empty(work_list))
+		return;
+
+	/* Still nothing to do?
+	 * Maybe we still need to close the current epoch,
+	 * even if no new requests are queued yet.
+	 *
+	 * Also, poke TCP, just in case.
+	 * Then wait for new work (or signal). */
+	rcu_read_lock();
+	nc = rcu_dereference(connection->net_conf);
+	uncork = nc ? nc->tcp_cork : 0;
+	rcu_read_unlock();
+	if (uncork) {
+		mutex_lock(&connection->data.mutex);
+		if (connection->data.socket)
+			drbd_tcp_uncork(connection->data.socket);
+		mutex_unlock(&connection->data.mutex);
+	}
+
+	for (;;) {
+		int send_barrier;
+		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
+		spin_lock_irq(&connection->req_lock);
+		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
+		/* dequeue single item only,
+		 * we still use drbd_queue_work_front() in some places */
+		if (!list_empty(&connection->sender_work.q))
+			list_move(connection->sender_work.q.next, work_list);
+		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
+		if (!list_empty(work_list) || signal_pending(current)) {
+			spin_unlock_irq(&connection->req_lock);
+			break;
+		}
+		send_barrier = need_to_send_barrier(connection);
+		spin_unlock_irq(&connection->req_lock);
+		if (send_barrier) {
+			drbd_send_barrier(connection);
+			connection->send.current_epoch_nr++;
+		}
+		schedule();
+		/* may be woken up for other things but new work, too,
+		 * e.g. if the current epoch got closed.
+		 * In which case we send the barrier above. */
+	}
+	finish_wait(&connection->sender_work.q_wait, &wait);
+
+	/* someone may have changed the config while we have been waiting above. */
+	rcu_read_lock();
+	nc = rcu_dereference(connection->net_conf);
+	cork = nc ? nc->tcp_cork : 0;
+	rcu_read_unlock();
+	mutex_lock(&connection->data.mutex);
+	if (connection->data.socket) {
+		if (cork)
+			drbd_tcp_cork(connection->data.socket);
+		else if (!uncork)
+			drbd_tcp_uncork(connection->data.socket);
+	}
+	mutex_unlock(&connection->data.mutex);
 }
 
 int drbd_worker(struct drbd_thread *thi)
 {
-	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_tconn *tconn = thi->tconn;
 	struct drbd_work *w = NULL;
+	struct drbd_conf *mdev;
 	LIST_HEAD(work_list);
-	int intr = 0, i;
+	int vnr;
 
-	sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev));
+	while (get_t_state(thi) == RUNNING) {
+		drbd_thread_current_set_cpu(thi);
 
-	while (get_t_state(thi) == Running) {
-		drbd_thread_current_set_cpu(mdev);
+		/* as long as we use drbd_queue_work_front(),
+		 * we may only dequeue single work items here, not batches. */
+		if (list_empty(&work_list))
+			wait_for_work(tconn, &work_list);
 
-		if (down_trylock(&mdev->data.work.s)) {
-			mutex_lock(&mdev->data.mutex);
-			if (mdev->data.socket && !mdev->net_conf->no_cork)
-				drbd_tcp_uncork(mdev->data.socket);
-			mutex_unlock(&mdev->data.mutex);
-
-			intr = down_interruptible(&mdev->data.work.s);
-
-			mutex_lock(&mdev->data.mutex);
-			if (mdev->data.socket  && !mdev->net_conf->no_cork)
-				drbd_tcp_cork(mdev->data.socket);
-			mutex_unlock(&mdev->data.mutex);
-		}
-
-		if (intr) {
-			D_ASSERT(intr == -EINTR);
+		if (signal_pending(current)) {
 			flush_signals(current);
-			ERR_IF (get_t_state(thi) == Running)
+			if (get_t_state(thi) == RUNNING) {
+				conn_warn(tconn, "Worker got an unexpected signal\n");
 				continue;
+			}
 			break;
 		}
 
-		if (get_t_state(thi) != Running)
+		if (get_t_state(thi) != RUNNING)
 			break;
-		/* With this break, we have done a down() but not consumed
-		   the entry from the list. The cleanup code takes care of
-		   this...   */
-
-		w = NULL;
-		spin_lock_irq(&mdev->data.work.q_lock);
-		ERR_IF(list_empty(&mdev->data.work.q)) {
-			/* something terribly wrong in our logic.
-			 * we were able to down() the semaphore,
-			 * but the list is empty... doh.
-			 *
-			 * what is the best thing to do now?
-			 * try again from scratch, restarting the receiver,
-			 * asender, whatnot? could break even more ugly,
-			 * e.g. when we are primary, but no good local data.
-			 *
-			 * I'll try to get away just starting over this loop.
-			 */
-			spin_unlock_irq(&mdev->data.work.q_lock);
-			continue;
-		}
-		w = list_entry(mdev->data.work.q.next, struct drbd_work, list);
-		list_del_init(&w->list);
-		spin_unlock_irq(&mdev->data.work.q_lock);
-
-		if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) {
-			/* dev_warn(DEV, "worker: a callback failed! \n"); */
-			if (mdev->state.conn >= C_CONNECTED)
-				drbd_force_state(mdev,
-						NS(conn, C_NETWORK_FAILURE));
-		}
-	}
-	D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags));
-	D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags));
-
-	spin_lock_irq(&mdev->data.work.q_lock);
-	i = 0;
-	while (!list_empty(&mdev->data.work.q)) {
-		list_splice_init(&mdev->data.work.q, &work_list);
-		spin_unlock_irq(&mdev->data.work.q_lock);
 
 		while (!list_empty(&work_list)) {
-			w = list_entry(work_list.next, struct drbd_work, list);
+			w = list_first_entry(&work_list, struct drbd_work, list);
 			list_del_init(&w->list);
-			w->cb(mdev, w, 1);
-			i++; /* dead debugging code */
+			if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0)
+				continue;
+			if (tconn->cstate >= C_WF_REPORT_PARAMS)
+				conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 		}
-
-		spin_lock_irq(&mdev->data.work.q_lock);
 	}
-	sema_init(&mdev->data.work.s, 0);
-	/* DANGEROUS race: if someone did queue his work within the spinlock,
-	 * but up() ed outside the spinlock, we could get an up() on the
-	 * semaphore without corresponding list entry.
-	 * So don't do that.
-	 */
-	spin_unlock_irq(&mdev->data.work.q_lock);
 
-	D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
-	/* _drbd_set_state only uses stop_nowait.
-	 * wait here for the Exiting receiver. */
-	drbd_thread_stop(&mdev->receiver);
-	drbd_mdev_cleanup(mdev);
+	do {
+		while (!list_empty(&work_list)) {
+			w = list_first_entry(&work_list, struct drbd_work, list);
+			list_del_init(&w->list);
+			w->cb(w, 1);
+		}
+		dequeue_work_batch(&tconn->sender_work, &work_list);
+	} while (!list_empty(&work_list));
 
-	dev_info(DEV, "worker terminated\n");
-
-	clear_bit(DEVICE_DYING, &mdev->flags);
-	clear_bit(CONFIG_PENDING, &mdev->flags);
-	wake_up(&mdev->state_wait);
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_mdev_cleanup(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
 
 	return 0;
 }

diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
index 151f1a3..328f18e 100644
--- a/drivers/block/drbd/drbd_wrappers.h
+++ b/drivers/block/drbd/drbd_wrappers.h

@@ -3,6 +3,7 @@
 
 #include <linux/ctype.h>
 #include <linux/mm.h>
+#include "drbd_int.h"
 
 /* see get_sb_bdev and bd_claim */
 extern char *drbd_sec_holder;
@@ -20,8 +21,8 @@
 
 /* bi_end_io handlers */
 extern void drbd_md_io_complete(struct bio *bio, int error);
-extern void drbd_endio_sec(struct bio *bio, int error);
-extern void drbd_endio_pri(struct bio *bio, int error);
+extern void drbd_peer_request_endio(struct bio *bio, int error);
+extern void drbd_request_endio(struct bio *bio, int error);
 
 /*
  * used to submit our private bio
@@ -45,12 +46,6 @@
 		generic_make_request(bio);
 }
 
-static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
-{
-        return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
-                == CRYPTO_ALG_TYPE_HASH;
-}
-
 #ifndef __CHECKER__
 # undef __cond_lock
 # define __cond_lock(x,c) (c)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 54046e5..ae12512 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c

@@ -463,6 +463,7 @@
  */
 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
+	lo->lo_bio_count++;
 	bio_list_add(&lo->lo_bio_list, bio);
 }
 
@@ -471,6 +472,7 @@
  */
 static struct bio *loop_get_bio(struct loop_device *lo)
 {
+	lo->lo_bio_count--;
 	return bio_list_pop(&lo->lo_bio_list);
 }
 
@@ -489,6 +491,10 @@
 		goto out;
 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 		goto out;
+	if (lo->lo_bio_count >= q->nr_congestion_on)
+		wait_event_lock_irq(lo->lo_req_wait,
+				    lo->lo_bio_count < q->nr_congestion_off,
+				    lo->lo_lock);
 	loop_add_bio(lo, old_bio);
 	wake_up(&lo->lo_event);
 	spin_unlock_irq(&lo->lo_lock);
@@ -546,6 +552,8 @@
 			continue;
 		spin_lock_irq(&lo->lo_lock);
 		bio = loop_get_bio(lo);
+		if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
+			wake_up(&lo->lo_req_wait);
 		spin_unlock_irq(&lo->lo_lock);
 
 		BUG_ON(!bio);
@@ -873,6 +881,7 @@
 	lo->transfer = transfer_none;
 	lo->ioctl = NULL;
 	lo->lo_sizelimit = 0;
+	lo->lo_bio_count = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
@@ -1673,6 +1682,7 @@
 	lo->lo_number		= i;
 	lo->lo_thread		= NULL;
 	init_waitqueue_head(&lo->lo_event);
+	init_waitqueue_head(&lo->lo_req_wait);
 	spin_lock_init(&lo->lo_lock);
 	disk->major		= LOOP_MAJOR;
 	disk->first_minor	= i << part_shift;

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bb3d9be..89576a0 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c

@@ -61,15 +61,29 @@
 
 #define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
 
-#define RBD_MAX_SNAP_NAME_LEN	32
+#define RBD_SNAP_DEV_NAME_PREFIX	"snap_"
+#define RBD_MAX_SNAP_NAME_LEN	\
+			(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
+
 #define RBD_MAX_SNAP_COUNT	510	/* allows max snapc to fit in 4KB */
 #define RBD_MAX_OPT_LEN		1024
 
 #define RBD_SNAP_HEAD_NAME	"-"
 
+/* This allows a single page to hold an image name sent by OSD */
+#define RBD_IMAGE_NAME_LEN_MAX	(PAGE_SIZE - sizeof (__le32) - 1)
 #define RBD_IMAGE_ID_LEN_MAX	64
+
 #define RBD_OBJ_PREFIX_LEN_MAX	64
 
+/* Feature bits */
+
+#define RBD_FEATURE_LAYERING      1
+
+/* Features supported by this (client software) implementation. */
+
+#define RBD_FEATURES_ALL          (0)
+
 /*
  * An RBD device name will be "rbd#", where the "rbd" comes from
  * RBD_DRV_NAME above, and # is a unique integer identifier.
@@ -101,6 +115,27 @@
 	u64 obj_version;
 };
 
+/*
+ * An rbd image specification.
+ *
+ * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
+ * identify an image.
+ */
+struct rbd_spec {
+	u64		pool_id;
+	char		*pool_name;
+
+	char		*image_id;
+	size_t		image_id_len;
+	char		*image_name;
+	size_t		image_name_len;
+
+	u64		snap_id;
+	char		*snap_name;
+
+	struct kref	kref;
+};
+
 struct rbd_options {
 	bool	read_only;
 };
@@ -155,11 +190,8 @@
 };
 
 struct rbd_mapping {
-	char                    *snap_name;
-	u64                     snap_id;
 	u64                     size;
 	u64                     features;
-	bool                    snap_exists;
 	bool			read_only;
 };
 
@@ -173,7 +205,6 @@
 	struct gendisk		*disk;		/* blkdev's gendisk and rq */
 
 	u32			image_format;	/* Either 1 or 2 */
-	struct rbd_options	rbd_opts;
 	struct rbd_client	*rbd_client;
 
 	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
@@ -181,17 +212,17 @@
 	spinlock_t		lock;		/* queue lock */
 
 	struct rbd_image_header	header;
-	char			*image_id;
-	size_t			image_id_len;
-	char			*image_name;
-	size_t			image_name_len;
+	bool                    exists;
+	struct rbd_spec		*spec;
+
 	char			*header_name;
-	char			*pool_name;
-	int			pool_id;
 
 	struct ceph_osd_event   *watch_event;
 	struct ceph_osd_request *watch_request;
 
+	struct rbd_spec		*parent_spec;
+	u64			parent_overlap;
+
 	/* protects updating the header */
 	struct rw_semaphore     header_rwsem;
 
@@ -204,6 +235,7 @@
 
 	/* sysfs related */
 	struct device		dev;
+	unsigned long		open_count;
 };
 
 static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
@@ -218,7 +250,7 @@
 static int rbd_dev_snaps_register(struct rbd_device *rbd_dev);
 
 static void rbd_dev_release(struct device *dev);
-static void __rbd_remove_snap_dev(struct rbd_snap *snap);
+static void rbd_remove_snap_dev(struct rbd_snap *snap);
 
 static ssize_t rbd_add(struct bus_type *bus, const char *buf,
 		       size_t count);
@@ -258,17 +290,8 @@
 #  define rbd_assert(expr)	((void) 0)
 #endif /* !RBD_DEBUG */
 
-static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
-{
-	return get_device(&rbd_dev->dev);
-}
-
-static void rbd_put_dev(struct rbd_device *rbd_dev)
-{
-	put_device(&rbd_dev->dev);
-}
-
-static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver);
+static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver);
+static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
 
 static int rbd_open(struct block_device *bdev, fmode_t mode)
 {
@@ -277,8 +300,11 @@
 	if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
 		return -EROFS;
 
-	rbd_get_dev(rbd_dev);
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	(void) get_device(&rbd_dev->dev);
 	set_device_ro(bdev, rbd_dev->mapping.read_only);
+	rbd_dev->open_count++;
+	mutex_unlock(&ctl_mutex);
 
 	return 0;
 }
@@ -287,7 +313,11 @@
 {
 	struct rbd_device *rbd_dev = disk->private_data;
 
-	rbd_put_dev(rbd_dev);
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	rbd_assert(rbd_dev->open_count > 0);
+	rbd_dev->open_count--;
+	put_device(&rbd_dev->dev);
+	mutex_unlock(&ctl_mutex);
 
 	return 0;
 }
@@ -388,7 +418,7 @@
 static match_table_t rbd_opts_tokens = {
 	/* int args above */
 	/* string args above */
-	{Opt_read_only, "mapping.read_only"},
+	{Opt_read_only, "read_only"},
 	{Opt_read_only, "ro"},		/* Alternate spelling */
 	{Opt_read_write, "read_write"},
 	{Opt_read_write, "rw"},		/* Alternate spelling */
@@ -441,33 +471,17 @@
  * Get a ceph client with specific addr and configuration, if one does
  * not exist create it.
  */
-static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
-				size_t mon_addr_len, char *options)
+static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
 {
-	struct rbd_options *rbd_opts = &rbd_dev->rbd_opts;
-	struct ceph_options *ceph_opts;
 	struct rbd_client *rbdc;
 
-	rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
-
-	ceph_opts = ceph_parse_options(options, mon_addr,
-					mon_addr + mon_addr_len,
-					parse_rbd_opts_token, rbd_opts);
-	if (IS_ERR(ceph_opts))
-		return PTR_ERR(ceph_opts);
-
 	rbdc = rbd_client_find(ceph_opts);
-	if (rbdc) {
-		/* using an existing client */
+	if (rbdc)	/* using an existing client */
 		ceph_destroy_options(ceph_opts);
-	} else {
+	else
 		rbdc = rbd_client_create(ceph_opts);
-		if (IS_ERR(rbdc))
-			return PTR_ERR(rbdc);
-	}
-	rbd_dev->rbd_client = rbdc;
 
-	return 0;
+	return rbdc;
 }
 
 /*
@@ -492,10 +506,10 @@
  * Drop reference to ceph client node. If it's not referenced anymore, release
  * it.
  */
-static void rbd_put_client(struct rbd_device *rbd_dev)
+static void rbd_put_client(struct rbd_client *rbdc)
 {
-	kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
-	rbd_dev->rbd_client = NULL;
+	if (rbdc)
+		kref_put(&rbdc->kref, rbd_client_release);
 }
 
 /*
@@ -524,6 +538,16 @@
 	if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
 		return false;
 
+	/* The bio layer requires at least sector-sized I/O */
+
+	if (ondisk->options.order < SECTOR_SHIFT)
+		return false;
+
+	/* If we use u64 in a few spots we may be able to loosen this */
+
+	if (ondisk->options.order > 8 * sizeof (int) - 1)
+		return false;
+
 	/*
 	 * The size of a snapshot header has to fit in a size_t, and
 	 * that limits the number of snapshots.
@@ -635,6 +659,20 @@
 	return -ENOMEM;
 }
 
+static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
+{
+	struct rbd_snap *snap;
+
+	if (snap_id == CEPH_NOSNAP)
+		return RBD_SNAP_HEAD_NAME;
+
+	list_for_each_entry(snap, &rbd_dev->snaps, node)
+		if (snap_id == snap->id)
+			return snap->name;
+
+	return NULL;
+}
+
 static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name)
 {
 
@@ -642,7 +680,7 @@
 
 	list_for_each_entry(snap, &rbd_dev->snaps, node) {
 		if (!strcmp(snap_name, snap->name)) {
-			rbd_dev->mapping.snap_id = snap->id;
+			rbd_dev->spec->snap_id = snap->id;
 			rbd_dev->mapping.size = snap->size;
 			rbd_dev->mapping.features = snap->features;
 
@@ -653,26 +691,23 @@
 	return -ENOENT;
 }
 
-static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name)
+static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
 {
 	int ret;
 
-	if (!memcmp(snap_name, RBD_SNAP_HEAD_NAME,
+	if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME,
 		    sizeof (RBD_SNAP_HEAD_NAME))) {
-		rbd_dev->mapping.snap_id = CEPH_NOSNAP;
+		rbd_dev->spec->snap_id = CEPH_NOSNAP;
 		rbd_dev->mapping.size = rbd_dev->header.image_size;
 		rbd_dev->mapping.features = rbd_dev->header.features;
-		rbd_dev->mapping.snap_exists = false;
-		rbd_dev->mapping.read_only = rbd_dev->rbd_opts.read_only;
 		ret = 0;
 	} else {
-		ret = snap_by_name(rbd_dev, snap_name);
+		ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name);
 		if (ret < 0)
 			goto done;
-		rbd_dev->mapping.snap_exists = true;
 		rbd_dev->mapping.read_only = true;
 	}
-	rbd_dev->mapping.snap_name = snap_name;
+	rbd_dev->exists = true;
 done:
 	return ret;
 }
@@ -695,13 +730,13 @@
 	u64 segment;
 	int ret;
 
-	name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
+	name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO);
 	if (!name)
 		return NULL;
 	segment = offset >> rbd_dev->header.obj_order;
-	ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx",
+	ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx",
 			rbd_dev->header.object_prefix, segment);
-	if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) {
+	if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) {
 		pr_err("error formatting segment name for #%llu (%d)\n",
 			segment, ret);
 		kfree(name);
@@ -800,77 +835,144 @@
 }
 
 /*
- * bio_chain_clone - clone a chain of bios up to a certain length.
- * might return a bio_pair that will need to be released.
+ * Clone a portion of a bio, starting at the given byte offset
+ * and continuing for the number of bytes indicated.
  */
-static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
-				   struct bio_pair **bp,
-				   int len, gfp_t gfpmask)
+static struct bio *bio_clone_range(struct bio *bio_src,
+					unsigned int offset,
+					unsigned int len,
+					gfp_t gfpmask)
 {
-	struct bio *old_chain = *old;
-	struct bio *new_chain = NULL;
-	struct bio *tail;
-	int total = 0;
+	struct bio_vec *bv;
+	unsigned int resid;
+	unsigned short idx;
+	unsigned int voff;
+	unsigned short end_idx;
+	unsigned short vcnt;
+	struct bio *bio;
 
-	if (*bp) {
-		bio_pair_release(*bp);
-		*bp = NULL;
+	/* Handle the easy case for the caller */
+
+	if (!offset && len == bio_src->bi_size)
+		return bio_clone(bio_src, gfpmask);
+
+	if (WARN_ON_ONCE(!len))
+		return NULL;
+	if (WARN_ON_ONCE(len > bio_src->bi_size))
+		return NULL;
+	if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
+		return NULL;
+
+	/* Find first affected segment... */
+
+	resid = offset;
+	__bio_for_each_segment(bv, bio_src, idx, 0) {
+		if (resid < bv->bv_len)
+			break;
+		resid -= bv->bv_len;
+	}
+	voff = resid;
+
+	/* ...and the last affected segment */
+
+	resid += len;
+	__bio_for_each_segment(bv, bio_src, end_idx, idx) {
+		if (resid <= bv->bv_len)
+			break;
+		resid -= bv->bv_len;
+	}
+	vcnt = end_idx - idx + 1;
+
+	/* Build the clone */
+
+	bio = bio_alloc(gfpmask, (unsigned int) vcnt);
+	if (!bio)
+		return NULL;	/* ENOMEM */
+
+	bio->bi_bdev = bio_src->bi_bdev;
+	bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT);
+	bio->bi_rw = bio_src->bi_rw;
+	bio->bi_flags |= 1 << BIO_CLONED;
+
+	/*
+	 * Copy over our part of the bio_vec, then update the first
+	 * and last (or only) entries.
+	 */
+	memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
+			vcnt * sizeof (struct bio_vec));
+	bio->bi_io_vec[0].bv_offset += voff;
+	if (vcnt > 1) {
+		bio->bi_io_vec[0].bv_len -= voff;
+		bio->bi_io_vec[vcnt - 1].bv_len = resid;
+	} else {
+		bio->bi_io_vec[0].bv_len = len;
 	}
 
-	while (old_chain && (total < len)) {
-		struct bio *tmp;
+	bio->bi_vcnt = vcnt;
+	bio->bi_size = len;
+	bio->bi_idx = 0;
 
-		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
-		if (!tmp)
-			goto err_out;
-		gfpmask &= ~__GFP_WAIT;	/* can't wait after the first */
+	return bio;
+}
 
-		if (total + old_chain->bi_size > len) {
-			struct bio_pair *bp;
+/*
+ * Clone a portion of a bio chain, starting at the given byte offset
+ * into the first bio in the source chain and continuing for the
+ * number of bytes indicated.  The result is another bio chain of
+ * exactly the given length, or a null pointer on error.
+ *
+ * The bio_src and offset parameters are both in-out.  On entry they
+ * refer to the first source bio and the offset into that bio where
+ * the start of data to be cloned is located.
+ *
+ * On return, bio_src is updated to refer to the bio in the source
+ * chain that contains first un-cloned byte, and *offset will
+ * contain the offset of that byte within that bio.
+ */
+static struct bio *bio_chain_clone_range(struct bio **bio_src,
+					unsigned int *offset,
+					unsigned int len,
+					gfp_t gfpmask)
+{
+	struct bio *bi = *bio_src;
+	unsigned int off = *offset;
+	struct bio *chain = NULL;
+	struct bio **end;
 
-			/*
-			 * this split can only happen with a single paged bio,
-			 * split_bio will BUG_ON if this is not the case
-			 */
-			dout("bio_chain_clone split! total=%d remaining=%d"
-			     "bi_size=%u\n",
-			     total, len - total, old_chain->bi_size);
+	/* Build up a chain of clone bios up to the limit */
 
-			/* split the bio. We'll release it either in the next
-			   call, or it will have to be released outside */
-			bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
-			if (!bp)
-				goto err_out;
+	if (!bi || off >= bi->bi_size || !len)
+		return NULL;		/* Nothing to clone */
 
-			__bio_clone(tmp, &bp->bio1);
+	end = &chain;
+	while (len) {
+		unsigned int bi_size;
+		struct bio *bio;
 
-			*next = &bp->bio2;
-		} else {
-			__bio_clone(tmp, old_chain);
-			*next = old_chain->bi_next;
+		if (!bi)
+			goto out_err;	/* EINVAL; ran out of bio's */
+		bi_size = min_t(unsigned int, bi->bi_size - off, len);
+		bio = bio_clone_range(bi, off, bi_size, gfpmask);
+		if (!bio)
+			goto out_err;	/* ENOMEM */
+
+		*end = bio;
+		end = &bio->bi_next;
+
+		off += bi_size;
+		if (off == bi->bi_size) {
+			bi = bi->bi_next;
+			off = 0;
 		}
-
-		tmp->bi_bdev = NULL;
-		tmp->bi_next = NULL;
-		if (new_chain)
-			tail->bi_next = tmp;
-		else
-			new_chain = tmp;
-		tail = tmp;
-		old_chain = old_chain->bi_next;
-
-		total += tmp->bi_size;
+		len -= bi_size;
 	}
+	*bio_src = bi;
+	*offset = off;
 
-	rbd_assert(total == len);
+	return chain;
+out_err:
+	bio_chain_put(chain);
 
-	*old = old_chain;
-
-	return new_chain;
-
-err_out:
-	dout("bio_chain_clone with err\n");
-	bio_chain_put(new_chain);
 	return NULL;
 }
 
@@ -988,8 +1090,9 @@
 		req_data->coll_index = coll_index;
 	}
 
-	dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name,
-		(unsigned long long) ofs, (unsigned long long) len);
+	dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n",
+		object_name, (unsigned long long) ofs,
+		(unsigned long long) len, coll, coll_index);
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
 	req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
@@ -1019,7 +1122,7 @@
 	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
 	layout->fl_stripe_count = cpu_to_le32(1);
 	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-	layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id);
+	layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id);
 	ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
 				   req, ops);
 	rbd_assert(ret == 0);
@@ -1154,8 +1257,6 @@
 static int rbd_do_op(struct request *rq,
 		     struct rbd_device *rbd_dev,
 		     struct ceph_snap_context *snapc,
-		     u64 snapid,
-		     int opcode, int flags,
 		     u64 ofs, u64 len,
 		     struct bio *bio,
 		     struct rbd_req_coll *coll,
@@ -1167,6 +1268,9 @@
 	int ret;
 	struct ceph_osd_req_op *ops;
 	u32 payload_len;
+	int opcode;
+	int flags;
+	u64 snapid;
 
 	seg_name = rbd_segment_name(rbd_dev, ofs);
 	if (!seg_name)
@@ -1174,7 +1278,18 @@
 	seg_len = rbd_segment_length(rbd_dev, ofs, len);
 	seg_ofs = rbd_segment_offset(rbd_dev, ofs);
 
-	payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
+	if (rq_data_dir(rq) == WRITE) {
+		opcode = CEPH_OSD_OP_WRITE;
+		flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK;
+		snapid = CEPH_NOSNAP;
+		payload_len = seg_len;
+	} else {
+		opcode = CEPH_OSD_OP_READ;
+		flags = CEPH_OSD_FLAG_READ;
+		snapc = NULL;
+		snapid = rbd_dev->spec->snap_id;
+		payload_len = 0;
+	}
 
 	ret = -ENOMEM;
 	ops = rbd_create_rw_ops(1, opcode, payload_len);
@@ -1202,41 +1317,6 @@
 }
 
 /*
- * Request async osd write
- */
-static int rbd_req_write(struct request *rq,
-			 struct rbd_device *rbd_dev,
-			 struct ceph_snap_context *snapc,
-			 u64 ofs, u64 len,
-			 struct bio *bio,
-			 struct rbd_req_coll *coll,
-			 int coll_index)
-{
-	return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
-			 CEPH_OSD_OP_WRITE,
-			 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			 ofs, len, bio, coll, coll_index);
-}
-
-/*
- * Request async osd read
- */
-static int rbd_req_read(struct request *rq,
-			 struct rbd_device *rbd_dev,
-			 u64 snapid,
-			 u64 ofs, u64 len,
-			 struct bio *bio,
-			 struct rbd_req_coll *coll,
-			 int coll_index)
-{
-	return rbd_do_op(rq, rbd_dev, NULL,
-			 snapid,
-			 CEPH_OSD_OP_READ,
-			 CEPH_OSD_FLAG_READ,
-			 ofs, len, bio, coll, coll_index);
-}
-
-/*
  * Request sync osd read
  */
 static int rbd_req_sync_read(struct rbd_device *rbd_dev,
@@ -1304,7 +1384,7 @@
 	dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n",
 		rbd_dev->header_name, (unsigned long long) notify_id,
 		(unsigned int) opcode);
-	rc = rbd_refresh_header(rbd_dev, &hver);
+	rc = rbd_dev_refresh(rbd_dev, &hver);
 	if (rc)
 		pr_warning(RBD_DRV_NAME "%d got notification but failed to "
 			   " update snaps: %d\n", rbd_dev->major, rc);
@@ -1460,18 +1540,16 @@
 {
 	struct rbd_device *rbd_dev = q->queuedata;
 	struct request *rq;
-	struct bio_pair *bp = NULL;
 
 	while ((rq = blk_fetch_request(q))) {
 		struct bio *bio;
-		struct bio *rq_bio, *next_bio = NULL;
 		bool do_write;
 		unsigned int size;
-		u64 op_size = 0;
 		u64 ofs;
 		int num_segs, cur_seg = 0;
 		struct rbd_req_coll *coll;
 		struct ceph_snap_context *snapc;
+		unsigned int bio_offset;
 
 		dout("fetched request\n");
 
@@ -1483,10 +1561,6 @@
 
 		/* deduce our operation (read, write) */
 		do_write = (rq_data_dir(rq) == WRITE);
-
-		size = blk_rq_bytes(rq);
-		ofs = blk_rq_pos(rq) * SECTOR_SIZE;
-		rq_bio = rq->bio;
 		if (do_write && rbd_dev->mapping.read_only) {
 			__blk_end_request_all(rq, -EROFS);
 			continue;
@@ -1496,8 +1570,8 @@
 
 		down_read(&rbd_dev->header_rwsem);
 
-		if (rbd_dev->mapping.snap_id != CEPH_NOSNAP &&
-				!rbd_dev->mapping.snap_exists) {
+		if (!rbd_dev->exists) {
+			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
 			up_read(&rbd_dev->header_rwsem);
 			dout("request for non-existent snapshot");
 			spin_lock_irq(q->queue_lock);
@@ -1509,6 +1583,10 @@
 
 		up_read(&rbd_dev->header_rwsem);
 
+		size = blk_rq_bytes(rq);
+		ofs = blk_rq_pos(rq) * SECTOR_SIZE;
+		bio = rq->bio;
+
 		dout("%s 0x%x bytes at 0x%llx\n",
 		     do_write ? "write" : "read",
 		     size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
@@ -1528,45 +1606,37 @@
 			continue;
 		}
 
+		bio_offset = 0;
 		do {
-			/* a bio clone to be passed down to OSD req */
+			u64 limit = rbd_segment_length(rbd_dev, ofs, size);
+			unsigned int chain_size;
+			struct bio *bio_chain;
+
+			BUG_ON(limit > (u64) UINT_MAX);
+			chain_size = (unsigned int) limit;
 			dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
-			op_size = rbd_segment_length(rbd_dev, ofs, size);
+
 			kref_get(&coll->kref);
-			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
-					      op_size, GFP_ATOMIC);
-			if (!bio) {
-				rbd_coll_end_req_index(rq, coll, cur_seg,
-						       -ENOMEM, op_size);
-				goto next_seg;
-			}
 
+			/* Pass a cloned bio chain via an osd request */
 
-			/* init OSD command: write or read */
-			if (do_write)
-				rbd_req_write(rq, rbd_dev,
-					      snapc,
-					      ofs,
-					      op_size, bio,
-					      coll, cur_seg);
+			bio_chain = bio_chain_clone_range(&bio,
+						&bio_offset, chain_size,
+						GFP_ATOMIC);
+			if (bio_chain)
+				(void) rbd_do_op(rq, rbd_dev, snapc,
+						ofs, chain_size,
+						bio_chain, coll, cur_seg);
 			else
-				rbd_req_read(rq, rbd_dev,
-					     rbd_dev->mapping.snap_id,
-					     ofs,
-					     op_size, bio,
-					     coll, cur_seg);
-
-next_seg:
-			size -= op_size;
-			ofs += op_size;
+				rbd_coll_end_req_index(rq, coll, cur_seg,
+						       -ENOMEM, chain_size);
+			size -= chain_size;
+			ofs += chain_size;
 
 			cur_seg++;
-			rq_bio = next_bio;
 		} while (size > 0);
 		kref_put(&coll->kref, rbd_coll_release);
 
-		if (bp)
-			bio_pair_release(bp);
 		spin_lock_irq(q->queue_lock);
 
 		ceph_put_snap_context(snapc);
@@ -1576,28 +1646,47 @@
 /*
  * a queue callback. Makes sure that we don't create a bio that spans across
  * multiple osd objects. One exception would be with a single page bios,
- * which we handle later at bio_chain_clone
+ * which we handle later at bio_chain_clone_range()
  */
 static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
 			  struct bio_vec *bvec)
 {
 	struct rbd_device *rbd_dev = q->queuedata;
-	unsigned int chunk_sectors;
-	sector_t sector;
-	unsigned int bio_sectors;
-	int max;
+	sector_t sector_offset;
+	sector_t sectors_per_obj;
+	sector_t obj_sector_offset;
+	int ret;
 
-	chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
-	sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
-	bio_sectors = bmd->bi_size >> SECTOR_SHIFT;
+	/*
+	 * Find how far into its rbd object the partition-relative
+	 * bio start sector is to offset relative to the enclosing
+	 * device.
+	 */
+	sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
+	sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
+	obj_sector_offset = sector_offset & (sectors_per_obj - 1);
 
-	max =  (chunk_sectors - ((sector & (chunk_sectors - 1))
-				 + bio_sectors)) << SECTOR_SHIFT;
-	if (max < 0)
-		max = 0; /* bio_add cannot handle a negative return */
-	if (max <= bvec->bv_len && bio_sectors == 0)
-		return bvec->bv_len;
-	return max;
+	/*
+	 * Compute the number of bytes from that offset to the end
+	 * of the object.  Account for what's already used by the bio.
+	 */
+	ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
+	if (ret > bmd->bi_size)
+		ret -= bmd->bi_size;
+	else
+		ret = 0;
+
+	/*
+	 * Don't send back more than was asked for.  And if the bio
+	 * was empty, let the whole thing through because:  "Note
+	 * that a block device *must* allow a single page to be
+	 * added to an empty bio."
+	 */
+	rbd_assert(bvec->bv_len <= PAGE_SIZE);
+	if (ret > (int) bvec->bv_len || !bmd->bi_size)
+		ret = (int) bvec->bv_len;
+
+	return ret;
 }
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
@@ -1663,13 +1752,13 @@
 			ret = -ENXIO;
 			pr_warning("short header read for image %s"
 					" (want %zd got %d)\n",
-				rbd_dev->image_name, size, ret);
+				rbd_dev->spec->image_name, size, ret);
 			goto out_err;
 		}
 		if (!rbd_dev_ondisk_valid(ondisk)) {
 			ret = -ENXIO;
 			pr_warning("invalid header for image %s\n",
-				rbd_dev->image_name);
+				rbd_dev->spec->image_name);
 			goto out_err;
 		}
 
@@ -1707,19 +1796,32 @@
 	return ret;
 }
 
-static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
+static void rbd_remove_all_snaps(struct rbd_device *rbd_dev)
 {
 	struct rbd_snap *snap;
 	struct rbd_snap *next;
 
 	list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node)
-		__rbd_remove_snap_dev(snap);
+		rbd_remove_snap_dev(snap);
+}
+
+static void rbd_update_mapping_size(struct rbd_device *rbd_dev)
+{
+	sector_t size;
+
+	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+		return;
+
+	size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE;
+	dout("setting size to %llu sectors", (unsigned long long) size);
+	rbd_dev->mapping.size = (u64) size;
+	set_capacity(rbd_dev->disk, size);
 }
 
 /*
  * only read the first part of the ondisk header, without the snaps info
  */
-static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
+static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver)
 {
 	int ret;
 	struct rbd_image_header h;
@@ -1730,17 +1832,9 @@
 
 	down_write(&rbd_dev->header_rwsem);
 
-	/* resized? */
-	if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) {
-		sector_t size = (sector_t) h.image_size / SECTOR_SIZE;
-
-		if (size != (sector_t) rbd_dev->mapping.size) {
-			dout("setting size to %llu sectors",
-				(unsigned long long) size);
-			rbd_dev->mapping.size = (u64) size;
-			set_capacity(rbd_dev->disk, size);
-		}
-	}
+	/* Update image size, and check for resize of mapped image */
+	rbd_dev->header.image_size = h.image_size;
+	rbd_update_mapping_size(rbd_dev);
 
 	/* rbd_dev->header.object_prefix shouldn't change */
 	kfree(rbd_dev->header.snap_sizes);
@@ -1768,12 +1862,16 @@
 	return ret;
 }
 
-static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
+static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver)
 {
 	int ret;
 
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-	ret = __rbd_refresh_header(rbd_dev, hver);
+	if (rbd_dev->image_format == 1)
+		ret = rbd_dev_v1_refresh(rbd_dev, hver);
+	else
+		ret = rbd_dev_v2_refresh(rbd_dev, hver);
 	mutex_unlock(&ctl_mutex);
 
 	return ret;
@@ -1885,7 +1983,7 @@
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	return sprintf(buf, "%s\n", rbd_dev->pool_name);
+	return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
 }
 
 static ssize_t rbd_pool_id_show(struct device *dev,
@@ -1893,7 +1991,8 @@
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	return sprintf(buf, "%d\n", rbd_dev->pool_id);
+	return sprintf(buf, "%llu\n",
+		(unsigned long long) rbd_dev->spec->pool_id);
 }
 
 static ssize_t rbd_name_show(struct device *dev,
@@ -1901,7 +2000,10 @@
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	return sprintf(buf, "%s\n", rbd_dev->image_name);
+	if (rbd_dev->spec->image_name)
+		return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
+
+	return sprintf(buf, "(unknown)\n");
 }
 
 static ssize_t rbd_image_id_show(struct device *dev,
@@ -1909,7 +2011,7 @@
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	return sprintf(buf, "%s\n", rbd_dev->image_id);
+	return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
 }
 
 /*
@@ -1922,7 +2024,50 @@
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	return sprintf(buf, "%s\n", rbd_dev->mapping.snap_name);
+	return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
+}
+
+/*
+ * For an rbd v2 image, shows the pool id, image id, and snapshot id
+ * for the parent image.  If there is no parent, simply shows
+ * "(no parent image)".
+ */
+static ssize_t rbd_parent_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+	struct rbd_spec *spec = rbd_dev->parent_spec;
+	int count;
+	char *bufp = buf;
+
+	if (!spec)
+		return sprintf(buf, "(no parent image)\n");
+
+	count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
+			(unsigned long long) spec->pool_id, spec->pool_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
+			spec->image_name ? spec->image_name : "(unknown)");
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
+			(unsigned long long) spec->snap_id, spec->snap_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	return (ssize_t) (bufp - buf);
 }
 
 static ssize_t rbd_image_refresh(struct device *dev,
@@ -1933,7 +2078,7 @@
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 	int ret;
 
-	ret = rbd_refresh_header(rbd_dev, NULL);
+	ret = rbd_dev_refresh(rbd_dev, NULL);
 
 	return ret < 0 ? ret : size;
 }
@@ -1948,6 +2093,7 @@
 static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
 static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
 static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
+static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
 
 static struct attribute *rbd_attrs[] = {
 	&dev_attr_size.attr,
@@ -1959,6 +2105,7 @@
 	&dev_attr_name.attr,
 	&dev_attr_image_id.attr,
 	&dev_attr_current_snap.attr,
+	&dev_attr_parent.attr,
 	&dev_attr_refresh.attr,
 	NULL
 };
@@ -2047,6 +2194,74 @@
 	.release	= rbd_snap_dev_release,
 };
 
+static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
+{
+	kref_get(&spec->kref);
+
+	return spec;
+}
+
+static void rbd_spec_free(struct kref *kref);
+static void rbd_spec_put(struct rbd_spec *spec)
+{
+	if (spec)
+		kref_put(&spec->kref, rbd_spec_free);
+}
+
+static struct rbd_spec *rbd_spec_alloc(void)
+{
+	struct rbd_spec *spec;
+
+	spec = kzalloc(sizeof (*spec), GFP_KERNEL);
+	if (!spec)
+		return NULL;
+	kref_init(&spec->kref);
+
+	rbd_spec_put(rbd_spec_get(spec));	/* TEMPORARY */
+
+	return spec;
+}
+
+static void rbd_spec_free(struct kref *kref)
+{
+	struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
+
+	kfree(spec->pool_name);
+	kfree(spec->image_id);
+	kfree(spec->image_name);
+	kfree(spec->snap_name);
+	kfree(spec);
+}
+
+struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
+				struct rbd_spec *spec)
+{
+	struct rbd_device *rbd_dev;
+
+	rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL);
+	if (!rbd_dev)
+		return NULL;
+
+	spin_lock_init(&rbd_dev->lock);
+	INIT_LIST_HEAD(&rbd_dev->node);
+	INIT_LIST_HEAD(&rbd_dev->snaps);
+	init_rwsem(&rbd_dev->header_rwsem);
+
+	rbd_dev->spec = spec;
+	rbd_dev->rbd_client = rbdc;
+
+	return rbd_dev;
+}
+
+static void rbd_dev_destroy(struct rbd_device *rbd_dev)
+{
+	rbd_spec_put(rbd_dev->parent_spec);
+	kfree(rbd_dev->header_name);
+	rbd_put_client(rbd_dev->rbd_client);
+	rbd_spec_put(rbd_dev->spec);
+	kfree(rbd_dev);
+}
+
 static bool rbd_snap_registered(struct rbd_snap *snap)
 {
 	bool ret = snap->dev.type == &rbd_snap_device_type;
@@ -2057,7 +2272,7 @@
 	return ret;
 }
 
-static void __rbd_remove_snap_dev(struct rbd_snap *snap)
+static void rbd_remove_snap_dev(struct rbd_snap *snap)
 {
 	list_del(&snap->node);
 	if (device_is_registered(&snap->dev))
@@ -2073,7 +2288,7 @@
 	dev->type = &rbd_snap_device_type;
 	dev->parent = parent;
 	dev->release = rbd_snap_dev_release;
-	dev_set_name(dev, "snap_%s", snap->name);
+	dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name);
 	dout("%s: registering device for snapshot %s\n", __func__, snap->name);
 
 	ret = device_register(dev);
@@ -2189,6 +2404,7 @@
 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
+	ret = 0;    /* rbd_req_sync_exec() can return positive */
 
 	p = reply_buf;
 	rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
@@ -2216,6 +2432,7 @@
 		__le64 features;
 		__le64 incompat;
 	} features_buf = { 0 };
+	u64 incompat;
 	int ret;
 
 	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
@@ -2226,6 +2443,11 @@
 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
+
+	incompat = le64_to_cpu(features_buf.incompat);
+	if (incompat & ~RBD_FEATURES_ALL)
+		return -ENXIO;
+
 	*snap_features = le64_to_cpu(features_buf.features);
 
 	dout("  snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
@@ -2242,6 +2464,183 @@
 						&rbd_dev->header.features);
 }
 
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+{
+	struct rbd_spec *parent_spec;
+	size_t size;
+	void *reply_buf = NULL;
+	__le64 snapid;
+	void *p;
+	void *end;
+	char *image_id;
+	u64 overlap;
+	size_t len = 0;
+	int ret;
+
+	parent_spec = rbd_spec_alloc();
+	if (!parent_spec)
+		return -ENOMEM;
+
+	size = sizeof (__le64) +				/* pool_id */
+		sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +	/* image_id */
+		sizeof (__le64) +				/* snap_id */
+		sizeof (__le64);				/* overlap */
+	reply_buf = kmalloc(size, GFP_KERNEL);
+	if (!reply_buf) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	snapid = cpu_to_le64(CEPH_NOSNAP);
+	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_parent",
+				(char *) &snapid, sizeof (snapid),
+				(char *) reply_buf, size,
+				CEPH_OSD_FLAG_READ, NULL);
+	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+	if (ret < 0)
+		goto out_err;
+
+	ret = -ERANGE;
+	p = reply_buf;
+	end = (char *) reply_buf + size;
+	ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err);
+	if (parent_spec->pool_id == CEPH_NOPOOL)
+		goto out;	/* No parent?  No problem. */
+
+	image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+	if (IS_ERR(image_id)) {
+		ret = PTR_ERR(image_id);
+		goto out_err;
+	}
+	parent_spec->image_id = image_id;
+	parent_spec->image_id_len = len;
+	ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
+	ceph_decode_64_safe(&p, end, overlap, out_err);
+
+	rbd_dev->parent_overlap = overlap;
+	rbd_dev->parent_spec = parent_spec;
+	parent_spec = NULL;	/* rbd_dev now owns this */
+out:
+	ret = 0;
+out_err:
+	kfree(reply_buf);
+	rbd_spec_put(parent_spec);
+
+	return ret;
+}
+
+static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
+{
+	size_t image_id_size;
+	char *image_id;
+	void *p;
+	void *end;
+	size_t size;
+	void *reply_buf = NULL;
+	size_t len = 0;
+	char *image_name = NULL;
+	int ret;
+
+	rbd_assert(!rbd_dev->spec->image_name);
+
+	image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len;
+	image_id = kmalloc(image_id_size, GFP_KERNEL);
+	if (!image_id)
+		return NULL;
+
+	p = image_id;
+	end = (char *) image_id + image_id_size;
+	ceph_encode_string(&p, end, rbd_dev->spec->image_id,
+				(u32) rbd_dev->spec->image_id_len);
+
+	size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
+	reply_buf = kmalloc(size, GFP_KERNEL);
+	if (!reply_buf)
+		goto out;
+
+	ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY,
+				"rbd", "dir_get_name",
+				image_id, image_id_size,
+				(char *) reply_buf, size,
+				CEPH_OSD_FLAG_READ, NULL);
+	if (ret < 0)
+		goto out;
+	p = reply_buf;
+	end = (char *) reply_buf + size;
+	image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+	if (IS_ERR(image_name))
+		image_name = NULL;
+	else
+		dout("%s: name is %s len is %zd\n", __func__, image_name, len);
+out:
+	kfree(reply_buf);
+	kfree(image_id);
+
+	return image_name;
+}
+
+/*
+ * When a parent image gets probed, we only have the pool, image,
+ * and snapshot ids but not the names of any of them.  This call
+ * is made later to fill in those names.  It has to be done after
+ * rbd_dev_snaps_update() has completed because some of the
+ * information (in particular, snapshot name) is not available
+ * until then.
+ */
+static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
+{
+	struct ceph_osd_client *osdc;
+	const char *name;
+	void *reply_buf = NULL;
+	int ret;
+
+	if (rbd_dev->spec->pool_name)
+		return 0;	/* Already have the names */
+
+	/* Look up the pool name */
+
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id);
+	if (!name)
+		return -EIO;	/* pool id too large (>= 2^31) */
+
+	rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL);
+	if (!rbd_dev->spec->pool_name)
+		return -ENOMEM;
+
+	/* Fetch the image name; tolerate failure here */
+
+	name = rbd_dev_image_name(rbd_dev);
+	if (name) {
+		rbd_dev->spec->image_name_len = strlen(name);
+		rbd_dev->spec->image_name = (char *) name;
+	} else {
+		pr_warning(RBD_DRV_NAME "%d "
+			"unable to get image name for image id %s\n",
+			rbd_dev->major, rbd_dev->spec->image_id);
+	}
+
+	/* Look up the snapshot name. */
+
+	name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id);
+	if (!name) {
+		ret = -EIO;
+		goto out_err;
+	}
+	rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL);
+	if(!rbd_dev->spec->snap_name)
+		goto out_err;
+
+	return 0;
+out_err:
+	kfree(reply_buf);
+	kfree(rbd_dev->spec->pool_name);
+	rbd_dev->spec->pool_name = NULL;
+
+	return ret;
+}
+
 static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
 {
 	size_t size;
@@ -2328,7 +2727,6 @@
 	int ret;
 	void *p;
 	void *end;
-	size_t snap_name_len;
 	char *snap_name;
 
 	size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
@@ -2348,9 +2746,7 @@
 
 	p = reply_buf;
 	end = (char *) reply_buf + size;
-	snap_name_len = 0;
-	snap_name = ceph_extract_encoded_string(&p, end, &snap_name_len,
-				GFP_KERNEL);
+	snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
 	if (IS_ERR(snap_name)) {
 		ret = PTR_ERR(snap_name);
 		goto out;
@@ -2397,6 +2793,41 @@
 	return ERR_PTR(-EINVAL);
 }
 
+static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver)
+{
+	int ret;
+	__u8 obj_order;
+
+	down_write(&rbd_dev->header_rwsem);
+
+	/* Grab old order first, to see if it changes */
+
+	obj_order = rbd_dev->header.obj_order,
+	ret = rbd_dev_v2_image_size(rbd_dev);
+	if (ret)
+		goto out;
+	if (rbd_dev->header.obj_order != obj_order) {
+		ret = -EIO;
+		goto out;
+	}
+	rbd_update_mapping_size(rbd_dev);
+
+	ret = rbd_dev_v2_snap_context(rbd_dev, hver);
+	dout("rbd_dev_v2_snap_context returned %d\n", ret);
+	if (ret)
+		goto out;
+	ret = rbd_dev_snaps_update(rbd_dev);
+	dout("rbd_dev_snaps_update returned %d\n", ret);
+	if (ret)
+		goto out;
+	ret = rbd_dev_snaps_register(rbd_dev);
+	dout("rbd_dev_snaps_register returned %d\n", ret);
+out:
+	up_write(&rbd_dev->header_rwsem);
+
+	return ret;
+}
+
 /*
  * Scan the rbd device's current snapshot list and compare it to the
  * newly-received snapshot context.  Remove any existing snapshots
@@ -2436,12 +2867,12 @@
 
 			/* Existing snapshot not in the new snap context */
 
-			if (rbd_dev->mapping.snap_id == snap->id)
-				rbd_dev->mapping.snap_exists = false;
-			__rbd_remove_snap_dev(snap);
+			if (rbd_dev->spec->snap_id == snap->id)
+				rbd_dev->exists = false;
+			rbd_remove_snap_dev(snap);
 			dout("%ssnap id %llu has been removed\n",
-				rbd_dev->mapping.snap_id == snap->id ?
-								"mapped " : "",
+				rbd_dev->spec->snap_id == snap->id ?
+							"mapped " : "",
 				(unsigned long long) snap->id);
 
 			/* Done with this list entry; advance */
@@ -2559,7 +2990,7 @@
 	do {
 		ret = rbd_req_sync_watch(rbd_dev);
 		if (ret == -ERANGE) {
-			rc = rbd_refresh_header(rbd_dev, NULL);
+			rc = rbd_dev_refresh(rbd_dev, NULL);
 			if (rc < 0)
 				return rc;
 		}
@@ -2621,8 +3052,8 @@
 		struct rbd_device *rbd_dev;
 
 		rbd_dev = list_entry(tmp, struct rbd_device, node);
-		if (rbd_id > max_id)
-			max_id = rbd_id;
+		if (rbd_dev->dev_id > max_id)
+			max_id = rbd_dev->dev_id;
 	}
 	spin_unlock(&rbd_dev_list_lock);
 
@@ -2722,73 +3153,140 @@
 }
 
 /*
- * This fills in the pool_name, image_name, image_name_len, rbd_dev,
- * rbd_md_name, and name fields of the given rbd_dev, based on the
- * list of monitor addresses and other options provided via
- * /sys/bus/rbd/add.  Returns a pointer to a dynamically-allocated
- * copy of the snapshot name to map if successful, or a
- * pointer-coded error otherwise.
+ * Parse the options provided for an "rbd add" (i.e., rbd image
+ * mapping) request.  These arrive via a write to /sys/bus/rbd/add,
+ * and the data written is passed here via a NUL-terminated buffer.
+ * Returns 0 if successful or an error code otherwise.
  *
- * Note: rbd_dev is assumed to have been initially zero-filled.
+ * The information extracted from these options is recorded in
+ * the other parameters which return dynamically-allocated
+ * structures:
+ *  ceph_opts
+ *      The address of a pointer that will refer to a ceph options
+ *      structure.  Caller must release the returned pointer using
+ *      ceph_destroy_options() when it is no longer needed.
+ *  rbd_opts
+ *	Address of an rbd options pointer.  Fully initialized by
+ *	this function; caller must release with kfree().
+ *  spec
+ *	Address of an rbd image specification pointer.  Fully
+ *	initialized by this function based on parsed options.
+ *	Caller must release with rbd_spec_put().
+ *
+ * The options passed take this form:
+ *  <mon_addrs> <options> <pool_name> <image_name> [<snap_id>]
+ * where:
+ *  <mon_addrs>
+ *      A comma-separated list of one or more monitor addresses.
+ *      A monitor address is an ip address, optionally followed
+ *      by a port number (separated by a colon).
+ *        I.e.:  ip1[:port1][,ip2[:port2]...]
+ *  <options>
+ *      A comma-separated list of ceph and/or rbd options.
+ *  <pool_name>
+ *      The name of the rados pool containing the rbd image.
+ *  <image_name>
+ *      The name of the image in that pool to map.
+ *  <snap_id>
+ *      An optional snapshot id.  If provided, the mapping will
+ *      present data from the image at the time that snapshot was
+ *      created.  The image head is used if no snapshot id is
+ *      provided.  Snapshot mappings are always read-only.
  */
-static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
-				const char *buf,
-				const char **mon_addrs,
-				size_t *mon_addrs_size,
-				char *options,
-				size_t options_size)
+static int rbd_add_parse_args(const char *buf,
+				struct ceph_options **ceph_opts,
+				struct rbd_options **opts,
+				struct rbd_spec **rbd_spec)
 {
 	size_t len;
-	char *err_ptr = ERR_PTR(-EINVAL);
-	char *snap_name;
+	char *options;
+	const char *mon_addrs;
+	size_t mon_addrs_size;
+	struct rbd_spec *spec = NULL;
+	struct rbd_options *rbd_opts = NULL;
+	struct ceph_options *copts;
+	int ret;
 
 	/* The first four tokens are required */
 
 	len = next_token(&buf);
 	if (!len)
-		return err_ptr;
-	*mon_addrs_size = len + 1;
-	*mon_addrs = buf;
-
+		return -EINVAL;	/* Missing monitor address(es) */
+	mon_addrs = buf;
+	mon_addrs_size = len + 1;
 	buf += len;
 
-	len = copy_token(&buf, options, options_size);
-	if (!len || len >= options_size)
-		return err_ptr;
+	ret = -EINVAL;
+	options = dup_token(&buf, NULL);
+	if (!options)
+		return -ENOMEM;
+	if (!*options)
+		goto out_err;	/* Missing options */
 
-	err_ptr = ERR_PTR(-ENOMEM);
-	rbd_dev->pool_name = dup_token(&buf, NULL);
-	if (!rbd_dev->pool_name)
-		goto out_err;
+	spec = rbd_spec_alloc();
+	if (!spec)
+		goto out_mem;
 
-	rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len);
-	if (!rbd_dev->image_name)
-		goto out_err;
+	spec->pool_name = dup_token(&buf, NULL);
+	if (!spec->pool_name)
+		goto out_mem;
+	if (!*spec->pool_name)
+		goto out_err;	/* Missing pool name */
 
-	/* Snapshot name is optional */
+	spec->image_name = dup_token(&buf, &spec->image_name_len);
+	if (!spec->image_name)
+		goto out_mem;
+	if (!*spec->image_name)
+		goto out_err;	/* Missing image name */
+
+	/*
+	 * Snapshot name is optional; default is to use "-"
+	 * (indicating the head/no snapshot).
+	 */
 	len = next_token(&buf);
 	if (!len) {
 		buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
 		len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
-	}
-	snap_name = kmalloc(len + 1, GFP_KERNEL);
-	if (!snap_name)
+	} else if (len > RBD_MAX_SNAP_NAME_LEN) {
+		ret = -ENAMETOOLONG;
 		goto out_err;
-	memcpy(snap_name, buf, len);
-	*(snap_name + len) = '\0';
+	}
+	spec->snap_name = kmalloc(len + 1, GFP_KERNEL);
+	if (!spec->snap_name)
+		goto out_mem;
+	memcpy(spec->snap_name, buf, len);
+	*(spec->snap_name + len) = '\0';
 
-dout("    SNAP_NAME is <%s>, len is %zd\n", snap_name, len);
+	/* Initialize all rbd options to the defaults */
 
-	return snap_name;
+	rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
+	if (!rbd_opts)
+		goto out_mem;
 
+	rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
+
+	copts = ceph_parse_options(options, mon_addrs,
+					mon_addrs + mon_addrs_size - 1,
+					parse_rbd_opts_token, rbd_opts);
+	if (IS_ERR(copts)) {
+		ret = PTR_ERR(copts);
+		goto out_err;
+	}
+	kfree(options);
+
+	*ceph_opts = copts;
+	*opts = rbd_opts;
+	*rbd_spec = spec;
+
+	return 0;
+out_mem:
+	ret = -ENOMEM;
 out_err:
-	kfree(rbd_dev->image_name);
-	rbd_dev->image_name = NULL;
-	rbd_dev->image_name_len = 0;
-	kfree(rbd_dev->pool_name);
-	rbd_dev->pool_name = NULL;
+	kfree(rbd_opts);
+	rbd_spec_put(spec);
+	kfree(options);
 
-	return err_ptr;
+	return ret;
 }
 
 /*
@@ -2814,14 +3312,22 @@
 	void *p;
 
 	/*
+	 * When probing a parent image, the image id is already
+	 * known (and the image name likely is not).  There's no
+	 * need to fetch the image id again in this case.
+	 */
+	if (rbd_dev->spec->image_id)
+		return 0;
+
+	/*
 	 * First, see if the format 2 image id file exists, and if
 	 * so, get the image's persistent id from it.
 	 */
-	size = sizeof (RBD_ID_PREFIX) + rbd_dev->image_name_len;
+	size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len;
 	object_name = kmalloc(size, GFP_NOIO);
 	if (!object_name)
 		return -ENOMEM;
-	sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->image_name);
+	sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name);
 	dout("rbd id object name is %s\n", object_name);
 
 	/* Response will be an encoded string, which includes a length */
@@ -2841,17 +3347,18 @@
 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
+	ret = 0;    /* rbd_req_sync_exec() can return positive */
 
 	p = response;
-	rbd_dev->image_id = ceph_extract_encoded_string(&p,
+	rbd_dev->spec->image_id = ceph_extract_encoded_string(&p,
 						p + RBD_IMAGE_ID_LEN_MAX,
-						&rbd_dev->image_id_len,
+						&rbd_dev->spec->image_id_len,
 						GFP_NOIO);
-	if (IS_ERR(rbd_dev->image_id)) {
-		ret = PTR_ERR(rbd_dev->image_id);
-		rbd_dev->image_id = NULL;
+	if (IS_ERR(rbd_dev->spec->image_id)) {
+		ret = PTR_ERR(rbd_dev->spec->image_id);
+		rbd_dev->spec->image_id = NULL;
 	} else {
-		dout("image_id is %s\n", rbd_dev->image_id);
+		dout("image_id is %s\n", rbd_dev->spec->image_id);
 	}
 out:
 	kfree(response);
@@ -2867,26 +3374,33 @@
 
 	/* Version 1 images have no id; empty string is used */
 
-	rbd_dev->image_id = kstrdup("", GFP_KERNEL);
-	if (!rbd_dev->image_id)
+	rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL);
+	if (!rbd_dev->spec->image_id)
 		return -ENOMEM;
-	rbd_dev->image_id_len = 0;
+	rbd_dev->spec->image_id_len = 0;
 
 	/* Record the header object name for this rbd image. */
 
-	size = rbd_dev->image_name_len + sizeof (RBD_SUFFIX);
+	size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX);
 	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
 	if (!rbd_dev->header_name) {
 		ret = -ENOMEM;
 		goto out_err;
 	}
-	sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX);
+	sprintf(rbd_dev->header_name, "%s%s",
+		rbd_dev->spec->image_name, RBD_SUFFIX);
 
 	/* Populate rbd image metadata */
 
 	ret = rbd_read_header(rbd_dev, &rbd_dev->header);
 	if (ret < 0)
 		goto out_err;
+
+	/* Version 1 images have no parent (no layering) */
+
+	rbd_dev->parent_spec = NULL;
+	rbd_dev->parent_overlap = 0;
+
 	rbd_dev->image_format = 1;
 
 	dout("discovered version 1 image, header name is %s\n",
@@ -2897,8 +3411,8 @@
 out_err:
 	kfree(rbd_dev->header_name);
 	rbd_dev->header_name = NULL;
-	kfree(rbd_dev->image_id);
-	rbd_dev->image_id = NULL;
+	kfree(rbd_dev->spec->image_id);
+	rbd_dev->spec->image_id = NULL;
 
 	return ret;
 }
@@ -2913,12 +3427,12 @@
 	 * Image id was filled in by the caller.  Record the header
 	 * object name for this rbd image.
 	 */
-	size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->image_id_len;
+	size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len;
 	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
 	if (!rbd_dev->header_name)
 		return -ENOMEM;
 	sprintf(rbd_dev->header_name, "%s%s",
-			RBD_HEADER_PREFIX, rbd_dev->image_id);
+			RBD_HEADER_PREFIX, rbd_dev->spec->image_id);
 
 	/* Get the size and object order for the image */
 
@@ -2932,12 +3446,20 @@
 	if (ret < 0)
 		goto out_err;
 
-	/* Get the features for the image */
+	/* Get the and check features for the image */
 
 	ret = rbd_dev_v2_features(rbd_dev);
 	if (ret < 0)
 		goto out_err;
 
+	/* If the image supports layering, get the parent info */
+
+	if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
+		ret = rbd_dev_v2_parent_info(rbd_dev);
+		if (ret < 0)
+			goto out_err;
+	}
+
 	/* crypto and compression type aren't (yet) supported for v2 images */
 
 	rbd_dev->header.crypt_type = 0;
@@ -2955,8 +3477,11 @@
 	dout("discovered version 2 image, header name is %s\n",
 		rbd_dev->header_name);
 
-	return -ENOTSUPP;
+	return 0;
 out_err:
+	rbd_dev->parent_overlap = 0;
+	rbd_spec_put(rbd_dev->parent_spec);
+	rbd_dev->parent_spec = NULL;
 	kfree(rbd_dev->header_name);
 	rbd_dev->header_name = NULL;
 	kfree(rbd_dev->header.object_prefix);
@@ -2965,6 +3490,88 @@
 	return ret;
 }
 
+static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
+{
+	int ret;
+
+	/* no need to lock here, as rbd_dev is not registered yet */
+	ret = rbd_dev_snaps_update(rbd_dev);
+	if (ret)
+		return ret;
+
+	ret = rbd_dev_probe_update_spec(rbd_dev);
+	if (ret)
+		goto err_out_snaps;
+
+	ret = rbd_dev_set_mapping(rbd_dev);
+	if (ret)
+		goto err_out_snaps;
+
+	/* generate unique id: find highest unique id, add one */
+	rbd_dev_id_get(rbd_dev);
+
+	/* Fill in the device name, now that we have its id. */
+	BUILD_BUG_ON(DEV_NAME_LEN
+			< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
+	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
+
+	/* Get our block major device number. */
+
+	ret = register_blkdev(0, rbd_dev->name);
+	if (ret < 0)
+		goto err_out_id;
+	rbd_dev->major = ret;
+
+	/* Set up the blkdev mapping. */
+
+	ret = rbd_init_disk(rbd_dev);
+	if (ret)
+		goto err_out_blkdev;
+
+	ret = rbd_bus_add_dev(rbd_dev);
+	if (ret)
+		goto err_out_disk;
+
+	/*
+	 * At this point cleanup in the event of an error is the job
+	 * of the sysfs code (initiated by rbd_bus_del_dev()).
+	 */
+	down_write(&rbd_dev->header_rwsem);
+	ret = rbd_dev_snaps_register(rbd_dev);
+	up_write(&rbd_dev->header_rwsem);
+	if (ret)
+		goto err_out_bus;
+
+	ret = rbd_init_watch_dev(rbd_dev);
+	if (ret)
+		goto err_out_bus;
+
+	/* Everything's ready.  Announce the disk to the world. */
+
+	add_disk(rbd_dev->disk);
+
+	pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
+		(unsigned long long) rbd_dev->mapping.size);
+
+	return ret;
+err_out_bus:
+	/* this will also clean up rest of rbd_dev stuff */
+
+	rbd_bus_del_dev(rbd_dev);
+
+	return ret;
+err_out_disk:
+	rbd_free_disk(rbd_dev);
+err_out_blkdev:
+	unregister_blkdev(rbd_dev->major, rbd_dev->name);
+err_out_id:
+	rbd_dev_id_put(rbd_dev);
+err_out_snaps:
+	rbd_remove_all_snaps(rbd_dev);
+
+	return ret;
+}
+
 /*
  * Probe for the existence of the header object for the given rbd
  * device.  For format 2 images this includes determining the image
@@ -2984,9 +3591,16 @@
 		ret = rbd_dev_v1_probe(rbd_dev);
 	else
 		ret = rbd_dev_v2_probe(rbd_dev);
-	if (ret)
+	if (ret) {
 		dout("probe failed, returning %d\n", ret);
 
+		return ret;
+	}
+
+	ret = rbd_dev_probe_finish(rbd_dev);
+	if (ret)
+		rbd_header_free(&rbd_dev->header);
+
 	return ret;
 }
 
@@ -2994,141 +3608,64 @@
 		       const char *buf,
 		       size_t count)
 {
-	char *options;
 	struct rbd_device *rbd_dev = NULL;
-	const char *mon_addrs = NULL;
-	size_t mon_addrs_size = 0;
+	struct ceph_options *ceph_opts = NULL;
+	struct rbd_options *rbd_opts = NULL;
+	struct rbd_spec *spec = NULL;
+	struct rbd_client *rbdc;
 	struct ceph_osd_client *osdc;
 	int rc = -ENOMEM;
-	char *snap_name;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
-	options = kmalloc(count, GFP_KERNEL);
-	if (!options)
-		goto err_out_mem;
-	rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
-	if (!rbd_dev)
-		goto err_out_mem;
-
-	/* static rbd_device initialization */
-	spin_lock_init(&rbd_dev->lock);
-	INIT_LIST_HEAD(&rbd_dev->node);
-	INIT_LIST_HEAD(&rbd_dev->snaps);
-	init_rwsem(&rbd_dev->header_rwsem);
-
 	/* parse add command */
-	snap_name = rbd_add_parse_args(rbd_dev, buf,
-				&mon_addrs, &mon_addrs_size, options, count);
-	if (IS_ERR(snap_name)) {
-		rc = PTR_ERR(snap_name);
-		goto err_out_mem;
-	}
-
-	rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options);
+	rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
 	if (rc < 0)
+		goto err_out_module;
+
+	rbdc = rbd_get_client(ceph_opts);
+	if (IS_ERR(rbdc)) {
+		rc = PTR_ERR(rbdc);
 		goto err_out_args;
+	}
+	ceph_opts = NULL;	/* rbd_dev client now owns this */
 
 	/* pick the pool */
-	osdc = &rbd_dev->rbd_client->client->osdc;
-	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
+	osdc = &rbdc->client->osdc;
+	rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
 	if (rc < 0)
 		goto err_out_client;
-	rbd_dev->pool_id = rc;
+	spec->pool_id = (u64) rc;
+
+	rbd_dev = rbd_dev_create(rbdc, spec);
+	if (!rbd_dev)
+		goto err_out_client;
+	rbdc = NULL;		/* rbd_dev now owns this */
+	spec = NULL;		/* rbd_dev now owns this */
+
+	rbd_dev->mapping.read_only = rbd_opts->read_only;
+	kfree(rbd_opts);
+	rbd_opts = NULL;	/* done with this */
 
 	rc = rbd_dev_probe(rbd_dev);
 	if (rc < 0)
-		goto err_out_client;
-	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
-
-	/* no need to lock here, as rbd_dev is not registered yet */
-	rc = rbd_dev_snaps_update(rbd_dev);
-	if (rc)
-		goto err_out_header;
-
-	rc = rbd_dev_set_mapping(rbd_dev, snap_name);
-	if (rc)
-		goto err_out_header;
-
-	/* generate unique id: find highest unique id, add one */
-	rbd_dev_id_get(rbd_dev);
-
-	/* Fill in the device name, now that we have its id. */
-	BUILD_BUG_ON(DEV_NAME_LEN
-			< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
-	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
-
-	/* Get our block major device number. */
-
-	rc = register_blkdev(0, rbd_dev->name);
-	if (rc < 0)
-		goto err_out_id;
-	rbd_dev->major = rc;
-
-	/* Set up the blkdev mapping. */
-
-	rc = rbd_init_disk(rbd_dev);
-	if (rc)
-		goto err_out_blkdev;
-
-	rc = rbd_bus_add_dev(rbd_dev);
-	if (rc)
-		goto err_out_disk;
-
-	/*
-	 * At this point cleanup in the event of an error is the job
-	 * of the sysfs code (initiated by rbd_bus_del_dev()).
-	 */
-
-	down_write(&rbd_dev->header_rwsem);
-	rc = rbd_dev_snaps_register(rbd_dev);
-	up_write(&rbd_dev->header_rwsem);
-	if (rc)
-		goto err_out_bus;
-
-	rc = rbd_init_watch_dev(rbd_dev);
-	if (rc)
-		goto err_out_bus;
-
-	/* Everything's ready.  Announce the disk to the world. */
-
-	add_disk(rbd_dev->disk);
-
-	pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
-		(unsigned long long) rbd_dev->mapping.size);
+		goto err_out_rbd_dev;
 
 	return count;
-
-err_out_bus:
-	/* this will also clean up rest of rbd_dev stuff */
-
-	rbd_bus_del_dev(rbd_dev);
-	kfree(options);
-	return rc;
-
-err_out_disk:
-	rbd_free_disk(rbd_dev);
-err_out_blkdev:
-	unregister_blkdev(rbd_dev->major, rbd_dev->name);
-err_out_id:
-	rbd_dev_id_put(rbd_dev);
-err_out_header:
-	rbd_header_free(&rbd_dev->header);
+err_out_rbd_dev:
+	rbd_dev_destroy(rbd_dev);
 err_out_client:
-	kfree(rbd_dev->header_name);
-	rbd_put_client(rbd_dev);
-	kfree(rbd_dev->image_id);
+	rbd_put_client(rbdc);
 err_out_args:
-	kfree(rbd_dev->mapping.snap_name);
-	kfree(rbd_dev->image_name);
-	kfree(rbd_dev->pool_name);
-err_out_mem:
-	kfree(rbd_dev);
-	kfree(options);
+	if (ceph_opts)
+		ceph_destroy_options(ceph_opts);
+	kfree(rbd_opts);
+	rbd_spec_put(spec);
+err_out_module:
+	module_put(THIS_MODULE);
 
 	dout("Error adding device %s\n", buf);
-	module_put(THIS_MODULE);
 
 	return (ssize_t) rc;
 }
@@ -3163,7 +3700,6 @@
 	if (rbd_dev->watch_event)
 		rbd_req_sync_unwatch(rbd_dev);
 
-	rbd_put_client(rbd_dev);
 
 	/* clean up and free blkdev */
 	rbd_free_disk(rbd_dev);
@@ -3173,13 +3709,9 @@
 	rbd_header_free(&rbd_dev->header);
 
 	/* done with the id, and with the rbd_dev */
-	kfree(rbd_dev->mapping.snap_name);
-	kfree(rbd_dev->image_id);
-	kfree(rbd_dev->header_name);
-	kfree(rbd_dev->pool_name);
-	kfree(rbd_dev->image_name);
 	rbd_dev_id_put(rbd_dev);
-	kfree(rbd_dev);
+	rbd_assert(rbd_dev->rbd_client != NULL);
+	rbd_dev_destroy(rbd_dev);
 
 	/* release module ref */
 	module_put(THIS_MODULE);
@@ -3211,7 +3743,12 @@
 		goto done;
 	}
 
-	__rbd_remove_all_snaps(rbd_dev);
+	if (rbd_dev->open_count) {
+		ret = -EBUSY;
+		goto done;
+	}
+
+	rbd_remove_all_snaps(rbd_dev);
 	rbd_bus_del_dev(rbd_dev);
 
 done:

diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index cbe77fa..49d77cb 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h

@@ -46,8 +46,6 @@
 #define RBD_MIN_OBJ_ORDER       16
 #define RBD_MAX_OBJ_ORDER       30
 
-#define RBD_MAX_SEG_NAME_LEN	128
-
 #define RBD_COMP_NONE		0
 #define RBD_CRYPT_NONE		0
 

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 280a138..74374fb 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c

@@ -39,6 +39,7 @@
 #include <linux/list.h>
 #include <linux/delay.h>
 #include <linux/freezer.h>
+#include <linux/bitmap.h>
 
 #include <xen/events.h>
 #include <xen/page.h>
@@ -79,6 +80,7 @@
 	unsigned short		operation;
 	int			status;
 	struct list_head	free_list;
+	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 };
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -99,6 +101,36 @@
 static struct xen_blkbk *blkbk;
 
 /*
+ * Maximum number of grant pages that can be mapped in blkback.
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
+ * pages that blkback will persistently map.
+ * Currently, this is:
+ * RING_SIZE = 32 (for all known ring types)
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
+ * sizeof(struct persistent_gnt) = 48
+ * So the maximum memory used to store the grants is:
+ * 32 * 11 * 48 = 16896 bytes
+ */
+static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
+{
+	switch (protocol) {
+	case BLKIF_PROTOCOL_NATIVE:
+		return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
+			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	case BLKIF_PROTOCOL_X86_32:
+		return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
+			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	case BLKIF_PROTOCOL_X86_64:
+		return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
+			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+
+/*
  * Little helpful macro to figure out the index and virtual address of the
  * pending_pages[..]. For each 'pending_req' we have have up to
  * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -129,6 +161,90 @@
 static void make_response(struct xen_blkif *blkif, u64 id,
 			  unsigned short op, int st);
 
+#define foreach_grant(pos, rbtree, node) \
+	for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
+	     &(pos)->node != NULL; \
+	     (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
+
+
+static void add_persistent_gnt(struct rb_root *root,
+			       struct persistent_gnt *persistent_gnt)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct persistent_gnt *this;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		this = container_of(*new, struct persistent_gnt, node);
+
+		parent = *new;
+		if (persistent_gnt->gnt < this->gnt)
+			new = &((*new)->rb_left);
+		else if (persistent_gnt->gnt > this->gnt)
+			new = &((*new)->rb_right);
+		else {
+			pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
+			BUG();
+		}
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&(persistent_gnt->node), parent, new);
+	rb_insert_color(&(persistent_gnt->node), root);
+}
+
+static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+						 grant_ref_t gref)
+{
+	struct persistent_gnt *data;
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		data = container_of(node, struct persistent_gnt, node);
+
+		if (gref < data->gnt)
+			node = node->rb_left;
+		else if (gref > data->gnt)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void free_persistent_gnts(struct rb_root *root, unsigned int num)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt *persistent_gnt;
+	int ret = 0;
+	int segs_to_unmap = 0;
+
+	foreach_grant(persistent_gnt, root, node) {
+		BUG_ON(persistent_gnt->handle ==
+			BLKBACK_INVALID_HANDLE);
+		gnttab_set_unmap_op(&unmap[segs_to_unmap],
+			(unsigned long) pfn_to_kaddr(page_to_pfn(
+				persistent_gnt->page)),
+			GNTMAP_host_map,
+			persistent_gnt->handle);
+
+		pages[segs_to_unmap] = persistent_gnt->page;
+		rb_erase(&persistent_gnt->node, root);
+		kfree(persistent_gnt);
+		num--;
+
+		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
+			!rb_next(&persistent_gnt->node)) {
+			ret = gnttab_unmap_refs(unmap, NULL, pages,
+				segs_to_unmap);
+			BUG_ON(ret);
+			segs_to_unmap = 0;
+		}
+	}
+	BUG_ON(num != 0);
+}
+
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
@@ -302,6 +418,14 @@
 			print_stats(blkif);
 	}
 
+	/* Free all persistent grant pages */
+	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
+		free_persistent_gnts(&blkif->persistent_gnts,
+			blkif->persistent_gnt_c);
+
+	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
+	blkif->persistent_gnt_c = 0;
+
 	if (log_stats)
 		print_stats(blkif);
 
@@ -328,6 +452,8 @@
 	int ret;
 
 	for (i = 0; i < req->nr_pages; i++) {
+		if (!test_bit(i, req->unmap_seg))
+			continue;
 		handle = pending_handle(req, i);
 		if (handle == BLKBACK_INVALID_HANDLE)
 			continue;
@@ -344,12 +470,26 @@
 
 static int xen_blkbk_map(struct blkif_request *req,
 			 struct pending_req *pending_req,
-			 struct seg_buf seg[])
+			 struct seg_buf seg[],
+			 struct page *pages[])
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	int i;
+	struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt *persistent_gnt = NULL;
+	struct xen_blkif *blkif = pending_req->blkif;
+	phys_addr_t addr = 0;
+	int i, j;
+	bool new_map;
 	int nseg = req->u.rw.nr_segments;
+	int segs_to_map = 0;
 	int ret = 0;
+	int use_persistent_gnts;
+
+	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
+
+	BUG_ON(blkif->persistent_gnt_c >
+		   max_mapped_grant_pages(pending_req->blkif->blk_protocol));
 
 	/*
 	 * Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,36 +499,146 @@
 	for (i = 0; i < nseg; i++) {
 		uint32_t flags;
 
-		flags = GNTMAP_host_map;
-		if (pending_req->operation != BLKIF_OP_READ)
-			flags |= GNTMAP_readonly;
-		gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
-				  req->u.rw.seg[i].gref,
-				  pending_req->blkif->domid);
+		if (use_persistent_gnts)
+			persistent_gnt = get_persistent_gnt(
+				&blkif->persistent_gnts,
+				req->u.rw.seg[i].gref);
+
+		if (persistent_gnt) {
+			/*
+			 * We are using persistent grants and
+			 * the grant is already mapped
+			 */
+			new_map = false;
+		} else if (use_persistent_gnts &&
+			   blkif->persistent_gnt_c <
+			   max_mapped_grant_pages(blkif->blk_protocol)) {
+			/*
+			 * We are using persistent grants, the grant is
+			 * not mapped but we have room for it
+			 */
+			new_map = true;
+			persistent_gnt = kmalloc(
+				sizeof(struct persistent_gnt),
+				GFP_KERNEL);
+			if (!persistent_gnt)
+				return -ENOMEM;
+			persistent_gnt->page = alloc_page(GFP_KERNEL);
+			if (!persistent_gnt->page) {
+				kfree(persistent_gnt);
+				return -ENOMEM;
+			}
+			persistent_gnt->gnt = req->u.rw.seg[i].gref;
+			persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
+
+			pages_to_gnt[segs_to_map] =
+				persistent_gnt->page;
+			addr = (unsigned long) pfn_to_kaddr(
+				page_to_pfn(persistent_gnt->page));
+
+			add_persistent_gnt(&blkif->persistent_gnts,
+				persistent_gnt);
+			blkif->persistent_gnt_c++;
+			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+				 persistent_gnt->gnt, blkif->persistent_gnt_c,
+				 max_mapped_grant_pages(blkif->blk_protocol));
+		} else {
+			/*
+			 * We are either using persistent grants and
+			 * hit the maximum limit of grants mapped,
+			 * or we are not using persistent grants.
+			 */
+			if (use_persistent_gnts &&
+				!blkif->vbd.overflow_max_grants) {
+				blkif->vbd.overflow_max_grants = 1;
+				pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+					 blkif->domid, blkif->vbd.handle);
+			}
+			new_map = true;
+			pages[i] = blkbk->pending_page(pending_req, i);
+			addr = vaddr(pending_req, i);
+			pages_to_gnt[segs_to_map] =
+				blkbk->pending_page(pending_req, i);
+		}
+
+		if (persistent_gnt) {
+			pages[i] = persistent_gnt->page;
+			persistent_gnts[i] = persistent_gnt;
+		} else {
+			persistent_gnts[i] = NULL;
+		}
+
+		if (new_map) {
+			flags = GNTMAP_host_map;
+			if (!persistent_gnt &&
+			    (pending_req->operation != BLKIF_OP_READ))
+				flags |= GNTMAP_readonly;
+			gnttab_set_map_op(&map[segs_to_map++], addr,
+					  flags, req->u.rw.seg[i].gref,
+					  blkif->domid);
+		}
 	}
 
-	ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
-	BUG_ON(ret);
+	if (segs_to_map) {
+		ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
+		BUG_ON(ret);
+	}
 
 	/*
 	 * Now swizzle the MFN in our domain with the MFN from the other domain
 	 * so that when we access vaddr(pending_req,i) it has the contents of
 	 * the page from the other domain.
 	 */
-	for (i = 0; i < nseg; i++) {
-		if (unlikely(map[i].status != 0)) {
-			pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-			map[i].handle = BLKBACK_INVALID_HANDLE;
-			ret |= 1;
+	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	for (i = 0, j = 0; i < nseg; i++) {
+		if (!persistent_gnts[i] ||
+		    persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+			/* This is a newly mapped grant */
+			BUG_ON(j >= segs_to_map);
+			if (unlikely(map[j].status != 0)) {
+				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
+				map[j].handle = BLKBACK_INVALID_HANDLE;
+				ret |= 1;
+				if (persistent_gnts[i]) {
+					rb_erase(&persistent_gnts[i]->node,
+						 &blkif->persistent_gnts);
+					blkif->persistent_gnt_c--;
+					kfree(persistent_gnts[i]);
+					persistent_gnts[i] = NULL;
+				}
+			}
 		}
+		if (persistent_gnts[i]) {
+			if (persistent_gnts[i]->handle ==
+			    BLKBACK_INVALID_HANDLE) {
+				/*
+				 * If this is a new persistent grant
+				 * save the handler
+				 */
+				persistent_gnts[i]->handle = map[j].handle;
+				persistent_gnts[i]->dev_bus_addr =
+					map[j++].dev_bus_addr;
+			}
+			pending_handle(pending_req, i) =
+				persistent_gnts[i]->handle;
 
-		pending_handle(pending_req, i) = map[i].handle;
+			if (ret)
+				continue;
 
-		if (ret)
-			continue;
+			seg[i].buf = persistent_gnts[i]->dev_bus_addr |
+				(req->u.rw.seg[i].first_sect << 9);
+		} else {
+			pending_handle(pending_req, i) = map[j].handle;
+			bitmap_set(pending_req->unmap_seg, i, 1);
 
-		seg[i].buf  = map[i].dev_bus_addr |
-			(req->u.rw.seg[i].first_sect << 9);
+			if (ret) {
+				j++;
+				continue;
+			}
+
+			seg[i].buf = map[j++].dev_bus_addr |
+				(req->u.rw.seg[i].first_sect << 9);
+		}
 	}
 	return ret;
 }
@@ -591,6 +841,7 @@
 	int operation;
 	struct blk_plug plug;
 	bool drain = false;
+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 
 	switch (req->operation) {
 	case BLKIF_OP_READ:
@@ -677,7 +928,7 @@
 	 * the hypercall to unmap the grants - that is all done in
 	 * xen_blkbk_unmap.
 	 */
-	if (xen_blkbk_map(req, pending_req, seg))
+	if (xen_blkbk_map(req, pending_req, seg, pages))
 		goto fail_flush;
 
 	/*
@@ -689,7 +940,7 @@
 	for (i = 0; i < nseg; i++) {
 		while ((bio == NULL) ||
 		       (bio_add_page(bio,
-				     blkbk->pending_page(pending_req, i),
+				     pages[i],
 				     seg[i].nsec << 9,
 				     seg[i].buf & ~PAGE_MASK) == 0)) {
 

diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9a54623..6072390 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h

@@ -34,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
 #include <linux/io.h>
+#include <linux/rbtree.h>
 #include <asm/setup.h>
 #include <asm/pgalloc.h>
 #include <asm/hypervisor.h>
@@ -160,10 +161,21 @@
 	sector_t		size;
 	unsigned int		flush_support:1;
 	unsigned int		discard_secure:1;
+	unsigned int		feature_gnt_persistent:1;
+	unsigned int		overflow_max_grants:1;
 };
 
 struct backend_info;
 
+
+struct persistent_gnt {
+	struct page *page;
+	grant_ref_t gnt;
+	grant_handle_t handle;
+	uint64_t dev_bus_addr;
+	struct rb_node node;
+};
+
 struct xen_blkif {
 	/* Unique identifier for this interface. */
 	domid_t			domid;
@@ -190,6 +202,10 @@
 	struct task_struct	*xenblkd;
 	unsigned int		waiting_reqs;
 
+	/* tree to store persistent grants */
+	struct rb_root		persistent_gnts;
+	unsigned int		persistent_gnt_c;
+
 	/* statistics */
 	unsigned long		st_print;
 	int			st_rd_req;

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index f58434c..6398072 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c

@@ -117,6 +117,7 @@
 	atomic_set(&blkif->drain, 0);
 	blkif->st_print = jiffies;
 	init_waitqueue_head(&blkif->waiting_to_free);
+	blkif->persistent_gnts.rb_node = NULL;
 
 	return blkif;
 }
@@ -672,6 +673,13 @@
 
 	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
 
+	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
+				 dev->nodename);
+		goto abort;
+	}
+
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
 	if (err) {
@@ -720,6 +728,7 @@
 	struct xenbus_device *dev = be->dev;
 	unsigned long ring_ref;
 	unsigned int evtchn;
+	unsigned int pers_grants;
 	char protocol[64] = "";
 	int err;
 
@@ -749,8 +758,18 @@
 		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
 		return -1;
 	}
-	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-		ring_ref, evtchn, be->blkif->blk_protocol, protocol);
+	err = xenbus_gather(XBT_NIL, dev->otherend,
+			    "feature-persistent", "%u",
+			    &pers_grants, NULL);
+	if (err)
+		pers_grants = 0;
+
+	be->blkif->vbd.feature_gnt_persistent = pers_grants;
+	be->blkif->vbd.overflow_max_grants = 0;
+
+	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
+		ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+		pers_grants ? "persistent grants" : "");
 
 	/* Map the shared frame, irq etc. */
 	err = xen_blkif_map(be->blkif, ring_ref, evtchn);

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 007db89..96e9b00 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c

@@ -44,6 +44,7 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/bitmap.h>
+#include <linux/llist.h>
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
@@ -64,10 +65,17 @@
 	BLKIF_STATE_SUSPENDED,
 };
 
+struct grant {
+	grant_ref_t gref;
+	unsigned long pfn;
+	struct llist_node node;
+};
+
 struct blk_shadow {
 	struct blkif_request req;
 	struct request *request;
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
 static DEFINE_MUTEX(blkfront_mutex);
@@ -97,6 +105,8 @@
 	struct work_struct work;
 	struct gnttab_free_callback callback;
 	struct blk_shadow shadow[BLK_RING_SIZE];
+	struct llist_head persistent_gnts;
+	unsigned int persistent_gnts_c;
 	unsigned long shadow_free;
 	unsigned int feature_flush;
 	unsigned int flush_op;
@@ -104,6 +114,7 @@
 	unsigned int feature_secdiscard:1;
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
+	unsigned int feature_persistent:1;
 	int is_ready;
 };
 
@@ -287,21 +298,36 @@
 	unsigned long id;
 	unsigned int fsect, lsect;
 	int i, ref;
+
+	/*
+	 * Used to store if we are able to queue the request by just using
+	 * existing persistent grants, or if we have to get new grants,
+	 * as there are not sufficiently many free.
+	 */
+	bool new_persistent_gnts;
 	grant_ref_t gref_head;
+	struct page *granted_page;
+	struct grant *gnt_list_entry = NULL;
 	struct scatterlist *sg;
 
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 		return 1;
 
-	if (gnttab_alloc_grant_references(
-		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
-		gnttab_request_free_callback(
-			&info->callback,
-			blkif_restart_queue_callback,
-			info,
-			BLKIF_MAX_SEGMENTS_PER_REQUEST);
-		return 1;
-	}
+	/* Check if we have enought grants to allocate a requests */
+	if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+		new_persistent_gnts = 1;
+		if (gnttab_alloc_grant_references(
+		    BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+		    &gref_head) < 0) {
+			gnttab_request_free_callback(
+				&info->callback,
+				blkif_restart_queue_callback,
+				info,
+				BLKIF_MAX_SEGMENTS_PER_REQUEST);
+			return 1;
+		}
+	} else
+		new_persistent_gnts = 0;
 
 	/* Fill out a communications ring structure. */
 	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
@@ -341,18 +367,73 @@
 		       BLKIF_MAX_SEGMENTS_PER_REQUEST);
 
 		for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
-			buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
-			/* install a grant reference. */
-			ref = gnttab_claim_grant_reference(&gref_head);
-			BUG_ON(ref == -ENOSPC);
 
-			gnttab_grant_foreign_access_ref(
-					ref,
+			if (info->persistent_gnts_c) {
+				BUG_ON(llist_empty(&info->persistent_gnts));
+				gnt_list_entry = llist_entry(
+					llist_del_first(&info->persistent_gnts),
+					struct grant, node);
+
+				ref = gnt_list_entry->gref;
+				buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+				info->persistent_gnts_c--;
+			} else {
+				ref = gnttab_claim_grant_reference(&gref_head);
+				BUG_ON(ref == -ENOSPC);
+
+				gnt_list_entry =
+					kmalloc(sizeof(struct grant),
+							 GFP_ATOMIC);
+				if (!gnt_list_entry)
+					return -ENOMEM;
+
+				granted_page = alloc_page(GFP_ATOMIC);
+				if (!granted_page) {
+					kfree(gnt_list_entry);
+					return -ENOMEM;
+				}
+
+				gnt_list_entry->pfn =
+					page_to_pfn(granted_page);
+				gnt_list_entry->gref = ref;
+
+				buffer_mfn = pfn_to_mfn(page_to_pfn(
+								granted_page));
+				gnttab_grant_foreign_access_ref(ref,
 					info->xbdev->otherend_id,
-					buffer_mfn,
-					rq_data_dir(req));
+					buffer_mfn, 0);
+			}
+
+			info->shadow[id].grants_used[i] = gnt_list_entry;
+
+			if (rq_data_dir(req)) {
+				char *bvec_data;
+				void *shared_data;
+
+				BUG_ON(sg->offset + sg->length > PAGE_SIZE);
+
+				shared_data = kmap_atomic(
+					pfn_to_page(gnt_list_entry->pfn));
+				bvec_data = kmap_atomic(sg_page(sg));
+
+				/*
+				 * this does not wipe data stored outside the
+				 * range sg->offset..sg->offset+sg->length.
+				 * Therefore, blkback *could* see data from
+				 * previous requests. This is OK as long as
+				 * persistent grants are shared with just one
+				 * domain. It may need refactoring if this
+				 * changes
+				 */
+				memcpy(shared_data + sg->offset,
+				       bvec_data   + sg->offset,
+				       sg->length);
+
+				kunmap_atomic(bvec_data);
+				kunmap_atomic(shared_data);
+			}
 
 			info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
 			ring_req->u.rw.seg[i] =
@@ -368,7 +449,8 @@
 	/* Keep a private copy so we can reissue requests when recovering. */
 	info->shadow[id].req = *ring_req;
 
-	gnttab_free_grant_references(gref_head);
+	if (new_persistent_gnts)
+		gnttab_free_grant_references(gref_head);
 
 	return 0;
 }
@@ -480,12 +562,13 @@
 static void xlvbd_flush(struct blkfront_info *info)
 {
 	blk_queue_flush(info->rq, info->feature_flush);
-	printk(KERN_INFO "blkfront: %s: %s: %s\n",
+	printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
 	       info->gd->disk_name,
 	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
 		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
 		"flush diskcache" : "barrier or flush"),
-	       info->feature_flush ? "enabled" : "disabled");
+	       info->feature_flush ? "enabled" : "disabled",
+	       info->feature_persistent ? "using persistent grants" : "");
 }
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -707,6 +790,9 @@
 
 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+	struct llist_node *all_gnts;
+	struct grant *persistent_gnt;
+
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
 	info->connected = suspend ?
@@ -714,6 +800,18 @@
 	/* No more blkif_request(). */
 	if (info->rq)
 		blk_stop_queue(info->rq);
+
+	/* Remove all persistent grants */
+	if (info->persistent_gnts_c) {
+		all_gnts = llist_del_all(&info->persistent_gnts);
+		llist_for_each_entry(persistent_gnt, all_gnts, node) {
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+		info->persistent_gnts_c = 0;
+	}
+
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&info->callback);
 	spin_unlock_irq(&info->io_lock);
@@ -734,13 +832,43 @@
 
 }
 
-static void blkif_completion(struct blk_shadow *s)
+static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
+			     struct blkif_response *bret)
 {
 	int i;
-	/* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
-	 * flag. */
-	for (i = 0; i < s->req.u.rw.nr_segments; i++)
-		gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
+	struct bio_vec *bvec;
+	struct req_iterator iter;
+	unsigned long flags;
+	char *bvec_data;
+	void *shared_data;
+	unsigned int offset = 0;
+
+	if (bret->operation == BLKIF_OP_READ) {
+		/*
+		 * Copy the data received from the backend into the bvec.
+		 * Since bv_offset can be different than 0, and bv_len different
+		 * than PAGE_SIZE, we have to keep track of the current offset,
+		 * to be sure we are copying the data from the right shared page.
+		 */
+		rq_for_each_segment(bvec, s->request, iter) {
+			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
+			i = offset >> PAGE_SHIFT;
+			BUG_ON(i >= s->req.u.rw.nr_segments);
+			shared_data = kmap_atomic(
+				pfn_to_page(s->grants_used[i]->pfn));
+			bvec_data = bvec_kmap_irq(bvec, &flags);
+			memcpy(bvec_data, shared_data + bvec->bv_offset,
+				bvec->bv_len);
+			bvec_kunmap_irq(bvec_data, &flags);
+			kunmap_atomic(shared_data);
+			offset += bvec->bv_len;
+		}
+	}
+	/* Add the persistent grant into the list of free grants */
+	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+		llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
+		info->persistent_gnts_c++;
+	}
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -783,7 +911,7 @@
 		req  = info->shadow[id].request;
 
 		if (bret->operation != BLKIF_OP_DISCARD)
-			blkif_completion(&info->shadow[id]);
+			blkif_completion(&info->shadow[id], info, bret);
 
 		if (add_id_to_freelist(info, id)) {
 			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
@@ -942,6 +1070,11 @@
 		message = "writing protocol";
 		goto abort_transaction;
 	}
+	err = xenbus_printf(xbt, dev->nodename,
+			    "feature-persistent", "%u", 1);
+	if (err)
+		dev_warn(&dev->dev,
+			 "writing persistent grants feature to xenbus");
 
 	err = xenbus_transaction_end(xbt, 0);
 	if (err) {
@@ -1029,6 +1162,8 @@
 	spin_lock_init(&info->io_lock);
 	info->xbdev = dev;
 	info->vdevice = vdevice;
+	init_llist_head(&info->persistent_gnts);
+	info->persistent_gnts_c = 0;
 	info->connected = BLKIF_STATE_DISCONNECTED;
 	INIT_WORK(&info->work, blkif_restart_queue);
 
@@ -1093,7 +1228,7 @@
 					req->u.rw.seg[j].gref,
 					info->xbdev->otherend_id,
 					pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
-					rq_data_dir(info->shadow[req->u.rw.id].request));
+					0);
 		}
 		info->shadow[req->u.rw.id].req = *req;
 
@@ -1225,7 +1360,7 @@
 	unsigned long sector_size;
 	unsigned int binfo;
 	int err;
-	int barrier, flush, discard;
+	int barrier, flush, discard, persistent;
 
 	switch (info->connected) {
 	case BLKIF_STATE_CONNECTED:
@@ -1303,6 +1438,14 @@
 	if (!err && discard)
 		blkfront_setup_discard(info);
 
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-persistent", "%u", &persistent,
+			    NULL);
+	if (err)
+		info->feature_persistent = 0;
+	else
+		info->feature_persistent = persistent;
+
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",

diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index bbec35d..0f51ed6 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig

@@ -6,6 +6,7 @@
 
 config OMAP_OCP2SCP
 	tristate "OMAP OCP2SCP DRIVER"
+	depends on ARCH_OMAP2PLUS
 	help
 	  Driver to enable ocp2scp module which transforms ocp interface
 	  protocol to scp protocol. In OMAP4, USB PHY is connected via

diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h
index 6ec0fff..1042c1b 100644
--- a/drivers/char/agp/intel-agp.h
+++ b/drivers/char/agp/intel-agp.h

@@ -62,12 +62,6 @@
 #define I810_PTE_LOCAL		0x00000002
 #define I810_PTE_VALID		0x00000001
 #define I830_PTE_SYSTEM_CACHED  0x00000006
-/* GT PTE cache control fields */
-#define GEN6_PTE_UNCACHED	0x00000002
-#define HSW_PTE_UNCACHED	0x00000000
-#define GEN6_PTE_LLC		0x00000004
-#define GEN6_PTE_LLC_MLC	0x00000006
-#define GEN6_PTE_GFDT		0x00000008
 
 #define I810_SMRAM_MISCC	0x70
 #define I810_GFX_MEM_WIN_SIZE	0x00010000
@@ -97,7 +91,6 @@
 #define G4x_GMCH_SIZE_VT_2M	(G4x_GMCH_SIZE_2M | G4x_GMCH_SIZE_VT_EN)
 
 #define GFX_FLSH_CNTL		0x2170 /* 915+ */
-#define GFX_FLSH_CNTL_VLV	0x101008
 
 #define I810_DRAM_CTL		0x3000
 #define I810_DRAM_ROW_0		0x00000001
@@ -148,29 +141,6 @@
 #define INTEL_I7505_AGPCTRL	0x70
 #define INTEL_I7505_MCHCFG	0x50
 
-#define SNB_GMCH_CTRL	0x50
-#define SNB_GMCH_GMS_STOLEN_MASK	0xF8
-#define SNB_GMCH_GMS_STOLEN_32M		(1 << 3)
-#define SNB_GMCH_GMS_STOLEN_64M		(2 << 3)
-#define SNB_GMCH_GMS_STOLEN_96M		(3 << 3)
-#define SNB_GMCH_GMS_STOLEN_128M	(4 << 3)
-#define SNB_GMCH_GMS_STOLEN_160M	(5 << 3)
-#define SNB_GMCH_GMS_STOLEN_192M	(6 << 3)
-#define SNB_GMCH_GMS_STOLEN_224M	(7 << 3)
-#define SNB_GMCH_GMS_STOLEN_256M	(8 << 3)
-#define SNB_GMCH_GMS_STOLEN_288M	(9 << 3)
-#define SNB_GMCH_GMS_STOLEN_320M	(0xa << 3)
-#define SNB_GMCH_GMS_STOLEN_352M	(0xb << 3)
-#define SNB_GMCH_GMS_STOLEN_384M	(0xc << 3)
-#define SNB_GMCH_GMS_STOLEN_416M	(0xd << 3)
-#define SNB_GMCH_GMS_STOLEN_448M	(0xe << 3)
-#define SNB_GMCH_GMS_STOLEN_480M	(0xf << 3)
-#define SNB_GMCH_GMS_STOLEN_512M	(0x10 << 3)
-#define SNB_GTT_SIZE_0M			(0 << 8)
-#define SNB_GTT_SIZE_1M			(1 << 8)
-#define SNB_GTT_SIZE_2M			(2 << 8)
-#define SNB_GTT_SIZE_MASK		(3 << 8)
-
 /* pci devices ids */
 #define PCI_DEVICE_ID_INTEL_E7221_HB	0x2588
 #define PCI_DEVICE_ID_INTEL_E7221_IG	0x258a
@@ -219,66 +189,5 @@
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB	    0x0062
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB    0x006a
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG	    0x0046
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB		0x0100  /* Desktop */
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT1_IG		0x0102
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_IG		0x0112
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_PLUS_IG	0x0122
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB		0x0104  /* Mobile */
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT1_IG	0x0106
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_IG	0x0116
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_PLUS_IG	0x0126
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_HB		0x0108  /* Server */
-#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_IG		0x010A
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_HB		0x0150  /* Desktop */
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_GT1_IG		0x0152
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_GT2_IG		0x0162
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_M_HB		0x0154  /* Mobile */
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_M_GT1_IG		0x0156
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_M_GT2_IG		0x0166
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_S_HB		0x0158  /* Server */
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_S_GT1_IG		0x015A
-#define PCI_DEVICE_ID_INTEL_IVYBRIDGE_S_GT2_IG		0x016A
-#define PCI_DEVICE_ID_INTEL_VALLEYVIEW_HB		0x0F00 /* VLV1 */
-#define PCI_DEVICE_ID_INTEL_VALLEYVIEW_IG		0x0F30
-#define PCI_DEVICE_ID_INTEL_HASWELL_HB			0x0400 /* Desktop */
-#define PCI_DEVICE_ID_INTEL_HASWELL_D_GT1_IG		0x0402
-#define PCI_DEVICE_ID_INTEL_HASWELL_D_GT2_IG		0x0412
-#define PCI_DEVICE_ID_INTEL_HASWELL_D_GT2_PLUS_IG	0x0422
-#define PCI_DEVICE_ID_INTEL_HASWELL_M_HB		0x0404 /* Mobile */
-#define PCI_DEVICE_ID_INTEL_HASWELL_M_GT1_IG		0x0406
-#define PCI_DEVICE_ID_INTEL_HASWELL_M_GT2_IG		0x0416
-#define PCI_DEVICE_ID_INTEL_HASWELL_M_GT2_PLUS_IG	0x0426
-#define PCI_DEVICE_ID_INTEL_HASWELL_S_HB		0x0408 /* Server */
-#define PCI_DEVICE_ID_INTEL_HASWELL_S_GT1_IG		0x040a
-#define PCI_DEVICE_ID_INTEL_HASWELL_S_GT2_IG		0x041a
-#define PCI_DEVICE_ID_INTEL_HASWELL_S_GT2_PLUS_IG	0x042a
-#define PCI_DEVICE_ID_INTEL_HASWELL_E_HB		0x0c04
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_D_GT1_IG	0x0C02
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_D_GT2_IG	0x0C12
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_D_GT2_PLUS_IG	0x0C22
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_M_GT1_IG	0x0C06
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_M_GT2_IG	0x0C16
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_M_GT2_PLUS_IG	0x0C26
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_S_GT1_IG	0x0C0A
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_S_GT2_IG	0x0C1A
-#define PCI_DEVICE_ID_INTEL_HASWELL_SDV_S_GT2_PLUS_IG	0x0C2A
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_D_GT1_IG	0x0A02
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_D_GT2_IG	0x0A12
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_D_GT2_PLUS_IG	0x0A22
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_M_GT1_IG	0x0A06
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_M_GT2_IG	0x0A16
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_M_GT2_PLUS_IG	0x0A26
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_S_GT1_IG	0x0A0A
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_S_GT2_IG	0x0A1A
-#define PCI_DEVICE_ID_INTEL_HASWELL_ULT_S_GT2_PLUS_IG	0x0A2A
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_D_GT1_IG	0x0D12
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_D_GT2_IG	0x0D22
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_D_GT2_PLUS_IG	0x0D32
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_M_GT1_IG	0x0D16
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_M_GT2_IG	0x0D26
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_M_GT2_PLUS_IG	0x0D36
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_S_GT1_IG	0x0D1A
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_S_GT2_IG	0x0D2A
-#define PCI_DEVICE_ID_INTEL_HASWELL_CRW_S_GT2_PLUS_IG	0x0D3A
 
 #endif

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 38390f7..dbd901e 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c

@@ -367,62 +367,6 @@
 			stolen_size = 0;
 			break;
 		}
-	} else if (INTEL_GTT_GEN == 6) {
-		/*
-		 * SandyBridge has new memory control reg at 0x50.w
-		 */
-		u16 snb_gmch_ctl;
-		pci_read_config_word(intel_private.pcidev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-		switch (snb_gmch_ctl & SNB_GMCH_GMS_STOLEN_MASK) {
-		case SNB_GMCH_GMS_STOLEN_32M:
-			stolen_size = MB(32);
-			break;
-		case SNB_GMCH_GMS_STOLEN_64M:
-			stolen_size = MB(64);
-			break;
-		case SNB_GMCH_GMS_STOLEN_96M:
-			stolen_size = MB(96);
-			break;
-		case SNB_GMCH_GMS_STOLEN_128M:
-			stolen_size = MB(128);
-			break;
-		case SNB_GMCH_GMS_STOLEN_160M:
-			stolen_size = MB(160);
-			break;
-		case SNB_GMCH_GMS_STOLEN_192M:
-			stolen_size = MB(192);
-			break;
-		case SNB_GMCH_GMS_STOLEN_224M:
-			stolen_size = MB(224);
-			break;
-		case SNB_GMCH_GMS_STOLEN_256M:
-			stolen_size = MB(256);
-			break;
-		case SNB_GMCH_GMS_STOLEN_288M:
-			stolen_size = MB(288);
-			break;
-		case SNB_GMCH_GMS_STOLEN_320M:
-			stolen_size = MB(320);
-			break;
-		case SNB_GMCH_GMS_STOLEN_352M:
-			stolen_size = MB(352);
-			break;
-		case SNB_GMCH_GMS_STOLEN_384M:
-			stolen_size = MB(384);
-			break;
-		case SNB_GMCH_GMS_STOLEN_416M:
-			stolen_size = MB(416);
-			break;
-		case SNB_GMCH_GMS_STOLEN_448M:
-			stolen_size = MB(448);
-			break;
-		case SNB_GMCH_GMS_STOLEN_480M:
-			stolen_size = MB(480);
-			break;
-		case SNB_GMCH_GMS_STOLEN_512M:
-			stolen_size = MB(512);
-			break;
-		}
 	} else {
 		switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
 		case I855_GMCH_GMS_STOLEN_1M:
@@ -556,29 +500,9 @@
 
 static unsigned int intel_gtt_total_entries(void)
 {
-	int size;
-
 	if (IS_G33 || INTEL_GTT_GEN == 4 || INTEL_GTT_GEN == 5)
 		return i965_gtt_total_entries();
-	else if (INTEL_GTT_GEN == 6) {
-		u16 snb_gmch_ctl;
-
-		pci_read_config_word(intel_private.pcidev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-		switch (snb_gmch_ctl & SNB_GTT_SIZE_MASK) {
-		default:
-		case SNB_GTT_SIZE_0M:
-			printk(KERN_ERR "Bad GTT size mask: 0x%04x.\n", snb_gmch_ctl);
-			size = MB(0);
-			break;
-		case SNB_GTT_SIZE_1M:
-			size = MB(1);
-			break;
-		case SNB_GTT_SIZE_2M:
-			size = MB(2);
-			break;
-		}
-		return size/4;
-	} else {
+	else {
 		/* On previous hardware, the GTT size was just what was
 		 * required to map the aperture.
 		 */
@@ -778,9 +702,6 @@
 {
 	u8 __iomem *reg;
 
-	if (INTEL_GTT_GEN >= 6)
-	    return true;
-
 	if (INTEL_GTT_GEN == 2) {
 		u16 gmch_ctrl;
 
@@ -1149,85 +1070,6 @@
 	writel(addr | pte_flags, intel_private.gtt + entry);
 }
 
-static bool gen6_check_flags(unsigned int flags)
-{
-	return true;
-}
-
-static void haswell_write_entry(dma_addr_t addr, unsigned int entry,
-				unsigned int flags)
-{
-	unsigned int type_mask = flags & ~AGP_USER_CACHED_MEMORY_GFDT;
-	unsigned int gfdt = flags & AGP_USER_CACHED_MEMORY_GFDT;
-	u32 pte_flags;
-
-	if (type_mask == AGP_USER_MEMORY)
-		pte_flags = HSW_PTE_UNCACHED | I810_PTE_VALID;
-	else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC) {
-		pte_flags = GEN6_PTE_LLC_MLC | I810_PTE_VALID;
-		if (gfdt)
-			pte_flags |= GEN6_PTE_GFDT;
-	} else { /* set 'normal'/'cached' to LLC by default */
-		pte_flags = GEN6_PTE_LLC | I810_PTE_VALID;
-		if (gfdt)
-			pte_flags |= GEN6_PTE_GFDT;
-	}
-
-	/* gen6 has bit11-4 for physical addr bit39-32 */
-	addr |= (addr >> 28) & 0xff0;
-	writel(addr | pte_flags, intel_private.gtt + entry);
-}
-
-static void gen6_write_entry(dma_addr_t addr, unsigned int entry,
-			     unsigned int flags)
-{
-	unsigned int type_mask = flags & ~AGP_USER_CACHED_MEMORY_GFDT;
-	unsigned int gfdt = flags & AGP_USER_CACHED_MEMORY_GFDT;
-	u32 pte_flags;
-
-	if (type_mask == AGP_USER_MEMORY)
-		pte_flags = GEN6_PTE_UNCACHED | I810_PTE_VALID;
-	else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC) {
-		pte_flags = GEN6_PTE_LLC_MLC | I810_PTE_VALID;
-		if (gfdt)
-			pte_flags |= GEN6_PTE_GFDT;
-	} else { /* set 'normal'/'cached' to LLC by default */
-		pte_flags = GEN6_PTE_LLC | I810_PTE_VALID;
-		if (gfdt)
-			pte_flags |= GEN6_PTE_GFDT;
-	}
-
-	/* gen6 has bit11-4 for physical addr bit39-32 */
-	addr |= (addr >> 28) & 0xff0;
-	writel(addr | pte_flags, intel_private.gtt + entry);
-}
-
-static void valleyview_write_entry(dma_addr_t addr, unsigned int entry,
-				   unsigned int flags)
-{
-	unsigned int type_mask = flags & ~AGP_USER_CACHED_MEMORY_GFDT;
-	unsigned int gfdt = flags & AGP_USER_CACHED_MEMORY_GFDT;
-	u32 pte_flags;
-
-	if (type_mask == AGP_USER_MEMORY)
-		pte_flags = GEN6_PTE_UNCACHED | I810_PTE_VALID;
-	else {
-		pte_flags = GEN6_PTE_LLC | I810_PTE_VALID;
-		if (gfdt)
-			pte_flags |= GEN6_PTE_GFDT;
-	}
-
-	/* gen6 has bit11-4 for physical addr bit39-32 */
-	addr |= (addr >> 28) & 0xff0;
-	writel(addr | pte_flags, intel_private.gtt + entry);
-
-	writel(1, intel_private.registers + GFX_FLSH_CNTL_VLV);
-}
-
-static void gen6_cleanup(void)
-{
-}
-
 /* Certain Gen5 chipsets require require idling the GPU before
  * unmapping anything from the GTT when VT-d is enabled.
  */
@@ -1249,41 +1091,29 @@
 
 static int i9xx_setup(void)
 {
-	u32 reg_addr;
+	u32 reg_addr, gtt_addr;
 	int size = KB(512);
 
 	pci_read_config_dword(intel_private.pcidev, I915_MMADDR, &reg_addr);
 
 	reg_addr &= 0xfff80000;
 
-	if (INTEL_GTT_GEN >= 7)
-		size = MB(2);
-
 	intel_private.registers = ioremap(reg_addr, size);
 	if (!intel_private.registers)
 		return -ENOMEM;
 
-	if (INTEL_GTT_GEN == 3) {
-		u32 gtt_addr;
-
+	switch (INTEL_GTT_GEN) {
+	case 3:
 		pci_read_config_dword(intel_private.pcidev,
 				      I915_PTEADDR, &gtt_addr);
 		intel_private.gtt_bus_addr = gtt_addr;
-	} else {
-		u32 gtt_offset;
-
-		switch (INTEL_GTT_GEN) {
-		case 5:
-		case 6:
-		case 7:
-			gtt_offset = MB(2);
-			break;
-		case 4:
-		default:
-			gtt_offset =  KB(512);
-			break;
-		}
-		intel_private.gtt_bus_addr = reg_addr + gtt_offset;
+		break;
+	case 5:
+		intel_private.gtt_bus_addr = reg_addr + MB(2);
+		break;
+	default:
+		intel_private.gtt_bus_addr = reg_addr + KB(512);
+		break;
 	}
 
 	if (needs_idle_maps())
@@ -1395,32 +1225,6 @@
 	.check_flags = i830_check_flags,
 	.chipset_flush = i9xx_chipset_flush,
 };
-static const struct intel_gtt_driver sandybridge_gtt_driver = {
-	.gen = 6,
-	.setup = i9xx_setup,
-	.cleanup = gen6_cleanup,
-	.write_entry = gen6_write_entry,
-	.dma_mask_size = 40,
-	.check_flags = gen6_check_flags,
-	.chipset_flush = i9xx_chipset_flush,
-};
-static const struct intel_gtt_driver haswell_gtt_driver = {
-	.gen = 6,
-	.setup = i9xx_setup,
-	.cleanup = gen6_cleanup,
-	.write_entry = haswell_write_entry,
-	.dma_mask_size = 40,
-	.check_flags = gen6_check_flags,
-	.chipset_flush = i9xx_chipset_flush,
-};
-static const struct intel_gtt_driver valleyview_gtt_driver = {
-	.gen = 7,
-	.setup = i9xx_setup,
-	.cleanup = gen6_cleanup,
-	.write_entry = valleyview_write_entry,
-	.dma_mask_size = 40,
-	.check_flags = gen6_check_flags,
-};
 
 /* Table to describe Intel GMCH and AGP/PCIE GART drivers.  At least one of
  * driver and gmch_driver must be non-null, and find_gmch will determine
@@ -1501,106 +1305,6 @@
 	    "HD Graphics", &ironlake_gtt_driver },
 	{ PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG,
 	    "HD Graphics", &ironlake_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT1_IG,
-	    "Sandybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_IG,
-	    "Sandybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_PLUS_IG,
-	    "Sandybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT1_IG,
-	    "Sandybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_IG,
-	    "Sandybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_PLUS_IG,
-	    "Sandybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_IG,
-	    "Sandybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_IVYBRIDGE_GT1_IG,
-	    "Ivybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_IVYBRIDGE_GT2_IG,
-	    "Ivybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_IVYBRIDGE_M_GT1_IG,
-	    "Ivybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_IVYBRIDGE_M_GT2_IG,
-	    "Ivybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_IVYBRIDGE_S_GT1_IG,
-	    "Ivybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_IVYBRIDGE_S_GT2_IG,
-	    "Ivybridge", &sandybridge_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_VALLEYVIEW_IG,
-	    "ValleyView", &valleyview_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_D_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_D_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_D_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_M_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_M_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_M_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_S_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_S_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_S_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_D_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_D_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_D_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_M_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_M_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_M_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_S_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_S_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_SDV_S_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_D_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_D_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_D_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_M_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_M_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_M_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_S_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_S_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_ULT_S_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_D_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_D_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_D_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_M_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_M_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_M_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_S_GT1_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_S_GT2_IG,
-	    "Haswell", &haswell_gtt_driver },
-	{ PCI_DEVICE_ID_INTEL_HASWELL_CRW_S_GT2_PLUS_IG,
-	    "Haswell", &haswell_gtt_driver },
 	{ 0, NULL, NULL }
 };
 
@@ -1686,7 +1390,7 @@
 }
 EXPORT_SYMBOL(intel_gmch_probe);
 
-const struct intel_gtt *intel_gtt_get(void)
+struct intel_gtt *intel_gtt_get(void)
 {
 	return &intel_private.base;
 }

diff --git a/drivers/char/random.c b/drivers/char/random.c
index b86eae9..85e81ec 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c

@@ -399,7 +399,6 @@
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 static struct fasync_struct *fasync;
 
-#if 0
 static bool debug;
 module_param(debug, bool, 0644);
 #define DEBUG_ENT(fmt, arg...) do { \
@@ -410,9 +409,6 @@
 		blocking_pool.entropy_count,\
 		nonblocking_pool.entropy_count,\
 		## arg); } while (0)
-#else
-#define DEBUG_ENT(fmt, arg...) do {} while (0)
-#endif
 
 /**********************************************************************
  *
@@ -437,6 +433,7 @@
 	int entropy_count;
 	int entropy_total;
 	unsigned int initialized:1;
+	bool last_data_init;
 	__u8 last_data[EXTRACT_SIZE];
 };
 
@@ -829,7 +826,7 @@
 		bytes = min_t(int, bytes, sizeof(tmp));
 
 		DEBUG_ENT("going to reseed %s with %d bits "
-			  "(%d of %d requested)\n",
+			  "(%zu of %d requested)\n",
 			  r->name, bytes * 8, nbytes * 8, r->entropy_count);
 
 		bytes = extract_entropy(r->pull, tmp, bytes,
@@ -860,7 +857,7 @@
 	spin_lock_irqsave(&r->lock, flags);
 
 	BUG_ON(r->entropy_count > r->poolinfo->POOLBITS);
-	DEBUG_ENT("trying to extract %d bits from %s\n",
+	DEBUG_ENT("trying to extract %zu bits from %s\n",
 		  nbytes * 8, r->name);
 
 	/* Can we pull enough? */
@@ -882,7 +879,7 @@
 		}
 	}
 
-	DEBUG_ENT("debiting %d entropy credits from %s%s\n",
+	DEBUG_ENT("debiting %zu entropy credits from %s%s\n",
 		  nbytes * 8, r->name, r->limit ? "" : " (unlimited)");
 
 	spin_unlock_irqrestore(&r->lock, flags);
@@ -957,6 +954,10 @@
 	ssize_t ret = 0, i;
 	__u8 tmp[EXTRACT_SIZE];
 
+	/* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
+	if (fips_enabled && !r->last_data_init)
+		nbytes += EXTRACT_SIZE;
+
 	trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_);
 	xfer_secondary_pool(r, nbytes);
 	nbytes = account(r, nbytes, min, reserved);
@@ -967,6 +968,17 @@
 		if (fips_enabled) {
 			unsigned long flags;
 
+
+			/* prime last_data value if need be, per fips 140-2 */
+			if (!r->last_data_init) {
+				spin_lock_irqsave(&r->lock, flags);
+				memcpy(r->last_data, tmp, EXTRACT_SIZE);
+				r->last_data_init = true;
+				nbytes -= EXTRACT_SIZE;
+				spin_unlock_irqrestore(&r->lock, flags);
+				extract_buf(r, tmp);
+			}
+
 			spin_lock_irqsave(&r->lock, flags);
 			if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
 				panic("Hardware RNG duplicated output!\n");
@@ -1086,6 +1098,7 @@
 
 	r->entropy_count = 0;
 	r->entropy_total = 0;
+	r->last_data_init = false;
 	mix_pool_bytes(r, &now, sizeof(now), NULL);
 	for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) {
 		if (!arch_get_random_long(&rv))
@@ -1142,11 +1155,16 @@
 		if (n > SEC_XFER_SIZE)
 			n = SEC_XFER_SIZE;
 
-		DEBUG_ENT("reading %d bits\n", n*8);
+		DEBUG_ENT("reading %zu bits\n", n*8);
 
 		n = extract_entropy_user(&blocking_pool, buf, n);
 
-		DEBUG_ENT("read got %d bits (%d still needed)\n",
+		if (n < 0) {
+			retval = n;
+			break;
+		}
+
+		DEBUG_ENT("read got %zd bits (%zd still needed)\n",
 			  n*8, (nbytes-n)*8);
 
 		if (n == 0) {
@@ -1171,10 +1189,6 @@
 			continue;
 		}
 
-		if (n < 0) {
-			retval = n;
-			break;
-		}
 		count += n;
 		buf += n;
 		nbytes -= n;

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 7da840d..9978609 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c

@@ -38,8 +38,6 @@
 };
 MODULE_DEVICE_TABLE(vio, tpm_ibmvtpm_device_table);
 
-DECLARE_WAIT_QUEUE_HEAD(wq);
-
 /**
  * ibmvtpm_send_crq - Send a CRQ request
  * @vdev:	vio device struct
@@ -83,6 +81,7 @@
 {
 	struct ibmvtpm_dev *ibmvtpm;
 	u16 len;
+	int sig;
 
 	ibmvtpm = (struct ibmvtpm_dev *)chip->vendor.data;
 
@@ -91,22 +90,23 @@
 		return 0;
 	}
 
-	wait_event_interruptible(wq, ibmvtpm->crq_res.len != 0);
+	sig = wait_event_interruptible(ibmvtpm->wq, ibmvtpm->res_len != 0);
+	if (sig)
+		return -EINTR;
 
-	if (count < ibmvtpm->crq_res.len) {
+	len = ibmvtpm->res_len;
+
+	if (count < len) {
 		dev_err(ibmvtpm->dev,
 			"Invalid size in recv: count=%ld, crq_size=%d\n",
-			count, ibmvtpm->crq_res.len);
+			count, len);
 		return -EIO;
 	}
 
 	spin_lock(&ibmvtpm->rtce_lock);
-	memcpy((void *)buf, (void *)ibmvtpm->rtce_buf, ibmvtpm->crq_res.len);
-	memset(ibmvtpm->rtce_buf, 0, ibmvtpm->crq_res.len);
-	ibmvtpm->crq_res.valid = 0;
-	ibmvtpm->crq_res.msg = 0;
-	len = ibmvtpm->crq_res.len;
-	ibmvtpm->crq_res.len = 0;
+	memcpy((void *)buf, (void *)ibmvtpm->rtce_buf, len);
+	memset(ibmvtpm->rtce_buf, 0, len);
+	ibmvtpm->res_len = 0;
 	spin_unlock(&ibmvtpm->rtce_lock);
 	return len;
 }
@@ -273,7 +273,6 @@
 	int rc = 0;
 
 	free_irq(vdev->irq, ibmvtpm);
-	tasklet_kill(&ibmvtpm->tasklet);
 
 	do {
 		if (rc)
@@ -372,7 +371,6 @@
 static int tpm_ibmvtpm_resume(struct device *dev)
 {
 	struct ibmvtpm_dev *ibmvtpm = ibmvtpm_get_data(dev);
-	unsigned long flags;
 	int rc = 0;
 
 	do {
@@ -387,10 +385,11 @@
 		return rc;
 	}
 
-	spin_lock_irqsave(&ibmvtpm->lock, flags);
-	vio_disable_interrupts(ibmvtpm->vdev);
-	tasklet_schedule(&ibmvtpm->tasklet);
-	spin_unlock_irqrestore(&ibmvtpm->lock, flags);
+	rc = vio_enable_interrupts(ibmvtpm->vdev);
+	if (rc) {
+		dev_err(dev, "Error vio_enable_interrupts rc=%d\n", rc);
+		return rc;
+	}
 
 	rc = ibmvtpm_crq_send_init(ibmvtpm);
 	if (rc)
@@ -467,7 +466,7 @@
 	if (crq->valid & VTPM_MSG_RES) {
 		if (++crq_q->index == crq_q->num_entry)
 			crq_q->index = 0;
-		rmb();
+		smp_rmb();
 	} else
 		crq = NULL;
 	return crq;
@@ -535,11 +534,9 @@
 			ibmvtpm->vtpm_version = crq->data;
 			return;
 		case VTPM_TPM_COMMAND_RES:
-			ibmvtpm->crq_res.valid = crq->valid;
-			ibmvtpm->crq_res.msg = crq->msg;
-			ibmvtpm->crq_res.len = crq->len;
-			ibmvtpm->crq_res.data = crq->data;
-			wake_up_interruptible(&wq);
+			/* len of the data in rtce buffer */
+			ibmvtpm->res_len = crq->len;
+			wake_up_interruptible(&ibmvtpm->wq);
 			return;
 		default:
 			return;
@@ -559,38 +556,19 @@
 static irqreturn_t ibmvtpm_interrupt(int irq, void *vtpm_instance)
 {
 	struct ibmvtpm_dev *ibmvtpm = (struct ibmvtpm_dev *) vtpm_instance;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ibmvtpm->lock, flags);
-	vio_disable_interrupts(ibmvtpm->vdev);
-	tasklet_schedule(&ibmvtpm->tasklet);
-	spin_unlock_irqrestore(&ibmvtpm->lock, flags);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * ibmvtpm_tasklet - Interrupt handler tasklet
- * @data:	ibm vtpm device struct
- *
- * Returns:
- *	Nothing
- **/
-static void ibmvtpm_tasklet(void *data)
-{
-	struct ibmvtpm_dev *ibmvtpm = data;
 	struct ibmvtpm_crq *crq;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ibmvtpm->lock, flags);
+	/* while loop is needed for initial setup (get version and
+	 * get rtce_size). There should be only one tpm request at any
+	 * given time.
+	 */
 	while ((crq = ibmvtpm_crq_get_next(ibmvtpm)) != NULL) {
 		ibmvtpm_crq_process(crq, ibmvtpm);
 		crq->valid = 0;
-		wmb();
+		smp_wmb();
 	}
 
-	vio_enable_interrupts(ibmvtpm->vdev);
-	spin_unlock_irqrestore(&ibmvtpm->lock, flags);
+	return IRQ_HANDLED;
 }
 
 /**
@@ -650,9 +628,6 @@
 		goto reg_crq_cleanup;
 	}
 
-	tasklet_init(&ibmvtpm->tasklet, (void *)ibmvtpm_tasklet,
-		     (unsigned long)ibmvtpm);
-
 	rc = request_irq(vio_dev->irq, ibmvtpm_interrupt, 0,
 			 tpm_ibmvtpm_driver_name, ibmvtpm);
 	if (rc) {
@@ -666,13 +641,14 @@
 		goto init_irq_cleanup;
 	}
 
+	init_waitqueue_head(&ibmvtpm->wq);
+
 	crq_q->index = 0;
 
 	ibmvtpm->dev = dev;
 	ibmvtpm->vdev = vio_dev;
 	chip->vendor.data = (void *)ibmvtpm;
 
-	spin_lock_init(&ibmvtpm->lock);
 	spin_lock_init(&ibmvtpm->rtce_lock);
 
 	rc = ibmvtpm_crq_send_init(ibmvtpm);
@@ -689,7 +665,6 @@
 
 	return rc;
 init_irq_cleanup:
-	tasklet_kill(&ibmvtpm->tasklet);
 	do {
 		rc1 = plpar_hcall_norets(H_FREE_CRQ, vio_dev->unit_address);
 	} while (rc1 == H_BUSY || H_IS_LONG_BUSY(rc1));

diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h
index 4296eb4..bd82a79 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.h
+++ b/drivers/char/tpm/tpm_ibmvtpm.h

@@ -38,13 +38,12 @@
 	struct vio_dev *vdev;
 	struct ibmvtpm_crq_queue crq_queue;
 	dma_addr_t crq_dma_handle;
-	spinlock_t lock;
-	struct tasklet_struct tasklet;
 	u32 rtce_size;
 	void __iomem *rtce_buf;
 	dma_addr_t rtce_dma_handle;
 	spinlock_t rtce_lock;
-	struct ibmvtpm_crq crq_res;
+	wait_queue_head_t wq;
+	u16 res_len;
 	u32 vtpm_version;
 };
 

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 90493d4..c594cb1 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c

@@ -37,8 +37,12 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/kconfig.h>
 #include "../tty/hvc/hvc_console.h"
 
+#define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC)
+
 /*
  * This is a global struct for storing common data for all the devices
  * this driver handles.
@@ -111,6 +115,21 @@
 	size_t len;
 	/* offset in the buf from which to consume data */
 	size_t offset;
+
+	/* DMA address of buffer */
+	dma_addr_t dma;
+
+	/* Device we got DMA memory from */
+	struct device *dev;
+
+	/* List of pending dma buffers to free */
+	struct list_head list;
+
+	/* If sgpages == 0 then buf is used */
+	unsigned int sgpages;
+
+	/* sg is used if spages > 0. sg must be the last in is struct */
+	struct scatterlist sg[0];
 };
 
 /*
@@ -325,6 +344,11 @@
 	return false;
 }
 
+static bool is_rproc_serial(const struct virtio_device *vdev)
+{
+	return is_rproc_enabled && vdev->id.device == VIRTIO_ID_RPROC_SERIAL;
+}
+
 static inline bool use_multiport(struct ports_device *portdev)
 {
 	/*
@@ -336,20 +360,110 @@
 	return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
 }
 
-static void free_buf(struct port_buffer *buf)
+static DEFINE_SPINLOCK(dma_bufs_lock);
+static LIST_HEAD(pending_free_dma_bufs);
+
+static void free_buf(struct port_buffer *buf, bool can_sleep)
 {
-	kfree(buf->buf);
+	unsigned int i;
+
+	for (i = 0; i < buf->sgpages; i++) {
+		struct page *page = sg_page(&buf->sg[i]);
+		if (!page)
+			break;
+		put_page(page);
+	}
+
+	if (!buf->dev) {
+		kfree(buf->buf);
+	} else if (is_rproc_enabled) {
+		unsigned long flags;
+
+		/* dma_free_coherent requires interrupts to be enabled. */
+		if (!can_sleep) {
+			/* queue up dma-buffers to be freed later */
+			spin_lock_irqsave(&dma_bufs_lock, flags);
+			list_add_tail(&buf->list, &pending_free_dma_bufs);
+			spin_unlock_irqrestore(&dma_bufs_lock, flags);
+			return;
+		}
+		dma_free_coherent(buf->dev, buf->size, buf->buf, buf->dma);
+
+		/* Release device refcnt and allow it to be freed */
+		put_device(buf->dev);
+	}
+
 	kfree(buf);
 }
 
-static struct port_buffer *alloc_buf(size_t buf_size)
+static void reclaim_dma_bufs(void)
+{
+	unsigned long flags;
+	struct port_buffer *buf, *tmp;
+	LIST_HEAD(tmp_list);
+
+	if (list_empty(&pending_free_dma_bufs))
+		return;
+
+	/* Create a copy of the pending_free_dma_bufs while holding the lock */
+	spin_lock_irqsave(&dma_bufs_lock, flags);
+	list_cut_position(&tmp_list, &pending_free_dma_bufs,
+			  pending_free_dma_bufs.prev);
+	spin_unlock_irqrestore(&dma_bufs_lock, flags);
+
+	/* Release the dma buffers, without irqs enabled */
+	list_for_each_entry_safe(buf, tmp, &tmp_list, list) {
+		list_del(&buf->list);
+		free_buf(buf, true);
+	}
+}
+
+static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size,
+				     int pages)
 {
 	struct port_buffer *buf;
 
-	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+	reclaim_dma_bufs();
+
+	/*
+	 * Allocate buffer and the sg list. The sg list array is allocated
+	 * directly after the port_buffer struct.
+	 */
+	buf = kmalloc(sizeof(*buf) + sizeof(struct scatterlist) * pages,
+		      GFP_KERNEL);
 	if (!buf)
 		goto fail;
-	buf->buf = kzalloc(buf_size, GFP_KERNEL);
+
+	buf->sgpages = pages;
+	if (pages > 0) {
+		buf->dev = NULL;
+		buf->buf = NULL;
+		return buf;
+	}
+
+	if (is_rproc_serial(vq->vdev)) {
+		/*
+		 * Allocate DMA memory from ancestor. When a virtio
+		 * device is created by remoteproc, the DMA memory is
+		 * associated with the grandparent device:
+		 * vdev => rproc => platform-dev.
+		 * The code here would have been less quirky if
+		 * DMA_MEMORY_INCLUDES_CHILDREN had been supported
+		 * in dma-coherent.c
+		 */
+		if (!vq->vdev->dev.parent || !vq->vdev->dev.parent->parent)
+			goto free_buf;
+		buf->dev = vq->vdev->dev.parent->parent;
+
+		/* Increase device refcnt to avoid freeing it */
+		get_device(buf->dev);
+		buf->buf = dma_alloc_coherent(buf->dev, buf_size, &buf->dma,
+					      GFP_KERNEL);
+	} else {
+		buf->dev = NULL;
+		buf->buf = kmalloc(buf_size, GFP_KERNEL);
+	}
+
 	if (!buf->buf)
 		goto free_buf;
 	buf->len = 0;
@@ -396,6 +510,8 @@
 
 	ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC);
 	virtqueue_kick(vq);
+	if (!ret)
+		ret = vq->num_free;
 	return ret;
 }
 
@@ -416,7 +532,7 @@
 		port->stats.bytes_discarded += buf->len - buf->offset;
 		if (add_inbuf(port->in_vq, buf) < 0) {
 			err++;
-			free_buf(buf);
+			free_buf(buf, false);
 		}
 		port->inbuf = NULL;
 		buf = get_inbuf(port);
@@ -459,7 +575,7 @@
 	vq = portdev->c_ovq;
 
 	sg_init_one(sg, &cpkt, sizeof(cpkt));
-	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) {
+	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) == 0) {
 		virtqueue_kick(vq);
 		while (!virtqueue_get_buf(vq, &len))
 			cpu_relax();
@@ -476,55 +592,29 @@
 	return 0;
 }
 
-struct buffer_token {
-	union {
-		void *buf;
-		struct scatterlist *sg;
-	} u;
-	/* If sgpages == 0 then buf is used, else sg is used */
-	unsigned int sgpages;
-};
-
-static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages)
-{
-	int i;
-	struct page *page;
-
-	for (i = 0; i < nrpages; i++) {
-		page = sg_page(&sg[i]);
-		if (!page)
-			break;
-		put_page(page);
-	}
-	kfree(sg);
-}
 
 /* Callers must take the port->outvq_lock */
 static void reclaim_consumed_buffers(struct port *port)
 {
-	struct buffer_token *tok;
+	struct port_buffer *buf;
 	unsigned int len;
 
 	if (!port->portdev) {
 		/* Device has been unplugged.  vqs are already gone. */
 		return;
 	}
-	while ((tok = virtqueue_get_buf(port->out_vq, &len))) {
-		if (tok->sgpages)
-			reclaim_sg_pages(tok->u.sg, tok->sgpages);
-		else
-			kfree(tok->u.buf);
-		kfree(tok);
+	while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
+		free_buf(buf, false);
 		port->outvq_full = false;
 	}
 }
 
 static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
 			      int nents, size_t in_count,
-			      struct buffer_token *tok, bool nonblock)
+			      void *data, bool nonblock)
 {
 	struct virtqueue *out_vq;
-	ssize_t ret;
+	int err;
 	unsigned long flags;
 	unsigned int len;
 
@@ -534,17 +624,17 @@
 
 	reclaim_consumed_buffers(port);
 
-	ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC);
+	err = virtqueue_add_buf(out_vq, sg, nents, 0, data, GFP_ATOMIC);
 
 	/* Tell Host to go! */
 	virtqueue_kick(out_vq);
 
-	if (ret < 0) {
+	if (err) {
 		in_count = 0;
 		goto done;
 	}
 
-	if (ret == 0)
+	if (out_vq->num_free == 0)
 		port->outvq_full = true;
 
 	if (nonblock)
@@ -572,37 +662,6 @@
 	return in_count;
 }
 
-static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
-			bool nonblock)
-{
-	struct scatterlist sg[1];
-	struct buffer_token *tok;
-
-	tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
-	if (!tok)
-		return -ENOMEM;
-	tok->sgpages = 0;
-	tok->u.buf = in_buf;
-
-	sg_init_one(sg, in_buf, in_count);
-
-	return __send_to_port(port, sg, 1, in_count, tok, nonblock);
-}
-
-static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents,
-			  size_t in_count, bool nonblock)
-{
-	struct buffer_token *tok;
-
-	tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
-	if (!tok)
-		return -ENOMEM;
-	tok->sgpages = nents;
-	tok->u.sg = sg;
-
-	return __send_to_port(port, sg, nents, in_count, tok, nonblock);
-}
-
 /*
  * Give out the data that's requested from the buffer that we have
  * queued up.
@@ -748,9 +807,10 @@
 			       size_t count, loff_t *offp)
 {
 	struct port *port;
-	char *buf;
+	struct port_buffer *buf;
 	ssize_t ret;
 	bool nonblock;
+	struct scatterlist sg[1];
 
 	/* Userspace could be out to fool us */
 	if (!count)
@@ -766,11 +826,11 @@
 
 	count = min((size_t)(32 * 1024), count);
 
-	buf = kmalloc(count, GFP_KERNEL);
+	buf = alloc_buf(port->out_vq, count, 0);
 	if (!buf)
 		return -ENOMEM;
 
-	ret = copy_from_user(buf, ubuf, count);
+	ret = copy_from_user(buf->buf, ubuf, count);
 	if (ret) {
 		ret = -EFAULT;
 		goto free_buf;
@@ -784,13 +844,14 @@
 	 * through to the host.
 	 */
 	nonblock = true;
-	ret = send_buf(port, buf, count, nonblock);
+	sg_init_one(sg, buf->buf, count);
+	ret = __send_to_port(port, sg, 1, count, buf, nonblock);
 
 	if (nonblock && ret > 0)
 		goto out;
 
 free_buf:
-	kfree(buf);
+	free_buf(buf, true);
 out:
 	return ret;
 }
@@ -856,6 +917,7 @@
 	struct port *port = filp->private_data;
 	struct sg_list sgl;
 	ssize_t ret;
+	struct port_buffer *buf;
 	struct splice_desc sd = {
 		.total_len = len,
 		.flags = flags,
@@ -863,22 +925,34 @@
 		.u.data = &sgl,
 	};
 
+	/*
+	 * Rproc_serial does not yet support splice. To support splice
+	 * pipe_to_sg() must allocate dma-buffers and copy content from
+	 * regular pages to dma pages. And alloc_buf and free_buf must
+	 * support allocating and freeing such a list of dma-buffers.
+	 */
+	if (is_rproc_serial(port->out_vq->vdev))
+		return -EINVAL;
+
 	ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
 	if (ret < 0)
 		return ret;
 
+	buf = alloc_buf(port->out_vq, 0, pipe->nrbufs);
+	if (!buf)
+		return -ENOMEM;
+
 	sgl.n = 0;
 	sgl.len = 0;
 	sgl.size = pipe->nrbufs;
-	sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL);
-	if (unlikely(!sgl.sg))
-		return -ENOMEM;
-
+	sgl.sg = buf->sg;
 	sg_init_table(sgl.sg, sgl.size);
 	ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
 	if (likely(ret > 0))
-		ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true);
+		ret = __send_to_port(port, buf->sg, sgl.n, sgl.len, buf, true);
 
+	if (unlikely(ret <= 0))
+		free_buf(buf, true);
 	return ret;
 }
 
@@ -927,6 +1001,7 @@
 	reclaim_consumed_buffers(port);
 	spin_unlock_irq(&port->outvq_lock);
 
+	reclaim_dma_bufs();
 	/*
 	 * Locks aren't necessary here as a port can't be opened after
 	 * unplug, and if a port isn't unplugged, a kref would already
@@ -1031,6 +1106,7 @@
 static int put_chars(u32 vtermno, const char *buf, int count)
 {
 	struct port *port;
+	struct scatterlist sg[1];
 
 	if (unlikely(early_put_chars))
 		return early_put_chars(vtermno, buf, count);
@@ -1039,7 +1115,8 @@
 	if (!port)
 		return -EPIPE;
 
-	return send_buf(port, (void *)buf, count, false);
+	sg_init_one(sg, buf, count);
+	return __send_to_port(port, sg, 1, count, (void *)buf, false);
 }
 
 /*
@@ -1076,7 +1153,10 @@
 		return;
 
 	vdev = port->portdev->vdev;
-	if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE))
+
+	/* Don't test F_SIZE at all if we're rproc: not a valid feature! */
+	if (!is_rproc_serial(vdev) &&
+	    virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE))
 		hvc_resize(port->cons.hvc, port->cons.ws);
 }
 
@@ -1260,7 +1340,7 @@
 
 	nr_added_bufs = 0;
 	do {
-		buf = alloc_buf(PAGE_SIZE);
+		buf = alloc_buf(vq, PAGE_SIZE, 0);
 		if (!buf)
 			break;
 
@@ -1268,7 +1348,7 @@
 		ret = add_inbuf(vq, buf);
 		if (ret < 0) {
 			spin_unlock_irq(lock);
-			free_buf(buf);
+			free_buf(buf, true);
 			break;
 		}
 		nr_added_bufs++;
@@ -1356,10 +1436,18 @@
 		goto free_device;
 	}
 
-	/*
-	 * If we're not using multiport support, this has to be a console port
-	 */
-	if (!use_multiport(port->portdev)) {
+	if (is_rproc_serial(port->portdev->vdev))
+		/*
+		 * For rproc_serial assume remote processor is connected.
+		 * rproc_serial does not want the console port, only
+		 * the generic port implementation.
+		 */
+		port->host_connected = true;
+	else if (!use_multiport(port->portdev)) {
+		/*
+		 * If we're not using multiport support,
+		 * this has to be a console port.
+		 */
 		err = init_port_console(port);
 		if (err)
 			goto free_inbufs;
@@ -1392,7 +1480,7 @@
 
 free_inbufs:
 	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-		free_buf(buf);
+		free_buf(buf, true);
 free_device:
 	device_destroy(pdrvdata.class, port->dev->devt);
 free_cdev:
@@ -1434,7 +1522,11 @@
 
 	/* Remove buffers we queued up for the Host to send us data in. */
 	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-		free_buf(buf);
+		free_buf(buf, true);
+
+	/* Free pending buffers from the out-queue. */
+	while ((buf = virtqueue_detach_unused_buf(port->out_vq)))
+		free_buf(buf, true);
 }
 
 /*
@@ -1636,7 +1728,7 @@
 		if (add_inbuf(portdev->c_ivq, buf) < 0) {
 			dev_warn(&portdev->vdev->dev,
 				 "Error adding buffer to queue\n");
-			free_buf(buf);
+			free_buf(buf, false);
 		}
 	}
 	spin_unlock(&portdev->cvq_lock);
@@ -1832,10 +1924,10 @@
 		return;
 
 	while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
-		free_buf(buf);
+		free_buf(buf, true);
 
 	while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
-		free_buf(buf);
+		free_buf(buf, true);
 }
 
 /*
@@ -1882,11 +1974,15 @@
 
 	multiport = false;
 	portdev->config.max_nr_ports = 1;
-	if (virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT,
-			      offsetof(struct virtio_console_config,
-				       max_nr_ports),
-			      &portdev->config.max_nr_ports) == 0)
+
+	/* Don't test MULTIPORT at all if we're rproc: not a valid feature! */
+	if (!is_rproc_serial(vdev) &&
+	    virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT,
+				  offsetof(struct virtio_console_config,
+					   max_nr_ports),
+				  &portdev->config.max_nr_ports) == 0) {
 		multiport = true;
+	}
 
 	err = init_vqs(portdev);
 	if (err < 0) {
@@ -1996,6 +2092,16 @@
 	VIRTIO_CONSOLE_F_MULTIPORT,
 };
 
+static struct virtio_device_id rproc_serial_id_table[] = {
+#if IS_ENABLED(CONFIG_REMOTEPROC)
+	{ VIRTIO_ID_RPROC_SERIAL, VIRTIO_DEV_ANY_ID },
+#endif
+	{ 0 },
+};
+
+static unsigned int rproc_serial_features[] = {
+};
+
 #ifdef CONFIG_PM
 static int virtcons_freeze(struct virtio_device *vdev)
 {
@@ -2080,6 +2186,20 @@
 #endif
 };
 
+/*
+ * virtio_rproc_serial refers to __devinit function which causes
+ * section mismatch warnings. So use __refdata to silence warnings.
+ */
+static struct virtio_driver __refdata virtio_rproc_serial = {
+	.feature_table = rproc_serial_features,
+	.feature_table_size = ARRAY_SIZE(rproc_serial_features),
+	.driver.name =	"virtio_rproc_serial",
+	.driver.owner =	THIS_MODULE,
+	.id_table =	rproc_serial_id_table,
+	.probe =	virtcons_probe,
+	.remove =	virtcons_remove,
+};
+
 static int __init init(void)
 {
 	int err;
@@ -2104,7 +2224,15 @@
 		pr_err("Error %d registering virtio driver\n", err);
 		goto free;
 	}
+	err = register_virtio_driver(&virtio_rproc_serial);
+	if (err < 0) {
+		pr_err("Error %d registering virtio rproc serial driver\n",
+		       err);
+		goto unregister;
+	}
 	return 0;
+unregister:
+	unregister_virtio_driver(&virtio_console);
 free:
 	if (pdrvdata.debugfs_dir)
 		debugfs_remove_recursive(pdrvdata.debugfs_dir);
@@ -2114,7 +2242,10 @@
 
 static void __exit fini(void)
 {
+	reclaim_dma_bufs();
+
 	unregister_virtio_driver(&virtio_console);
+	unregister_virtio_driver(&virtio_rproc_serial);
 
 	class_destroy(pdrvdata.class);
 	if (pdrvdata.debugfs_dir)

diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c
index 517a8ff..6b4c70f 100644
--- a/drivers/clk/clk-nomadik.c
+++ b/drivers/clk/clk-nomadik.c

@@ -20,6 +20,7 @@
 	clk_register_clkdev(clk, NULL, "gpio.2");
 	clk_register_clkdev(clk, NULL, "gpio.3");
 	clk_register_clkdev(clk, NULL, "rng");
+	clk_register_clkdev(clk, NULL, "fsmc-nand");
 
 	/*
 	 * The 2.4 MHz TIMCLK reference clock is active at boot time, this is

diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 0ce6257..6c4c000 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c

@@ -28,7 +28,6 @@
 #include <linux/slab.h>
 
 #include <asm/page.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/vio.h>
 
 #include "nx_csbcpb.h" /* struct nx_csbcpb */
@@ -1014,26 +1013,23 @@
  *	NOTIFY_BAD encoded with error number on failure, use
  *		notifier_to_errno() to decode this value
  */
-static int nx842_OF_notifier(struct notifier_block *np,
-					unsigned long action,
-					void *update)
+static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
+			     void *update)
 {
-	struct pSeries_reconfig_prop_update *upd;
+	struct of_prop_reconfig *upd = update;
 	struct nx842_devdata *local_devdata;
 	struct device_node *node = NULL;
 
-	upd = (struct pSeries_reconfig_prop_update *)update;
-
 	rcu_read_lock();
 	local_devdata = rcu_dereference(devdata);
 	if (local_devdata)
 		node = local_devdata->dev->of_node;
 
 	if (local_devdata &&
-			action == PSERIES_UPDATE_PROPERTY &&
-			!strcmp(upd->node->name, node->name)) {
+			action == OF_RECONFIG_UPDATE_PROPERTY &&
+			!strcmp(upd->dn->name, node->name)) {
 		rcu_read_unlock();
-		nx842_OF_upd(upd->property);
+		nx842_OF_upd(upd->prop);
 	} else
 		rcu_read_unlock();
 
@@ -1182,7 +1178,7 @@
 	synchronize_rcu();
 	kfree(old_devdata);
 
-	pSeries_reconfig_notifier_register(&nx842_of_nb);
+	of_reconfig_notifier_register(&nx842_of_nb);
 
 	ret = nx842_OF_upd(NULL);
 	if (ret && ret != -ENODEV) {
@@ -1228,7 +1224,7 @@
 	spin_lock_irqsave(&devdata_mutex, flags);
 	old_devdata = rcu_dereference_check(devdata,
 			lockdep_is_held(&devdata_mutex));
-	pSeries_reconfig_notifier_unregister(&nx842_of_nb);
+	of_reconfig_notifier_unregister(&nx842_of_nb);
 	rcu_assign_pointer(devdata, NULL);
 	spin_unlock_irqrestore(&devdata_mutex, flags);
 	synchronize_rcu();

diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 638110e..f7a8a16 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c

@@ -33,7 +33,6 @@
 #include <linux/scatterlist.h>
 #include <linux/device.h>
 #include <linux/of.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/hvcall.h>
 #include <asm/vio.h>
 

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 24225f0..64b048d 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c

@@ -228,6 +228,20 @@
 	wake_up_all(done->wait);
 }
 
+static inline void unmap_src(struct device *dev, dma_addr_t *addr, size_t len,
+			     unsigned int count)
+{
+	while (count--)
+		dma_unmap_single(dev, addr[count], len, DMA_TO_DEVICE);
+}
+
+static inline void unmap_dst(struct device *dev, dma_addr_t *addr, size_t len,
+			     unsigned int count)
+{
+	while (count--)
+		dma_unmap_single(dev, addr[count], len, DMA_BIDIRECTIONAL);
+}
+
 /*
  * This function repeatedly tests DMA transfers of various lengths and
  * offsets for a given operation type until it is told to exit by
@@ -353,15 +367,35 @@
 
 			dma_srcs[i] = dma_map_single(dev->dev, buf, len,
 						     DMA_TO_DEVICE);
+			ret = dma_mapping_error(dev->dev, dma_srcs[i]);
+			if (ret) {
+				unmap_src(dev->dev, dma_srcs, len, i);
+				pr_warn("%s: #%u: mapping error %d with "
+					"src_off=0x%x len=0x%x\n",
+					thread_name, total_tests - 1, ret,
+					src_off, len);
+				failed_tests++;
+				continue;
+			}
 		}
 		/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
 		for (i = 0; i < dst_cnt; i++) {
 			dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
 						     test_buf_size,
 						     DMA_BIDIRECTIONAL);
+			ret = dma_mapping_error(dev->dev, dma_dsts[i]);
+			if (ret) {
+				unmap_src(dev->dev, dma_srcs, len, src_cnt);
+				unmap_dst(dev->dev, dma_dsts, test_buf_size, i);
+				pr_warn("%s: #%u: mapping error %d with "
+					"dst_off=0x%x len=0x%x\n",
+					thread_name, total_tests - 1, ret,
+					dst_off, test_buf_size);
+				failed_tests++;
+				continue;
+			}
 		}
 
-
 		if (thread->type == DMA_MEMCPY)
 			tx = dev->device_prep_dma_memcpy(chan,
 							 dma_dsts[0] + dst_off,
@@ -383,13 +417,8 @@
 		}
 
 		if (!tx) {
-			for (i = 0; i < src_cnt; i++)
-				dma_unmap_single(dev->dev, dma_srcs[i], len,
-						 DMA_TO_DEVICE);
-			for (i = 0; i < dst_cnt; i++)
-				dma_unmap_single(dev->dev, dma_dsts[i],
-						 test_buf_size,
-						 DMA_BIDIRECTIONAL);
+			unmap_src(dev->dev, dma_srcs, len, src_cnt);
+			unmap_dst(dev->dev, dma_dsts, test_buf_size, dst_cnt);
 			pr_warning("%s: #%u: prep error with src_off=0x%x "
 					"dst_off=0x%x len=0x%x\n",
 					thread_name, total_tests - 1,
@@ -443,9 +472,7 @@
 		}
 
 		/* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
-		for (i = 0; i < dst_cnt; i++)
-			dma_unmap_single(dev->dev, dma_dsts[i], test_buf_size,
-					 DMA_BIDIRECTIONAL);
+		unmap_dst(dev->dev, dma_dsts, test_buf_size, dst_cnt);
 
 		error_count = 0;
 

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index b298158..fd3ae62 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c

@@ -16,6 +16,7 @@
  */
 static char dmi_empty_string[] = "        ";
 
+static u16 __initdata dmi_ver;
 /*
  * Catch too early calls to dmi_check_system():
  */
@@ -118,12 +119,12 @@
 	return 0;
 }
 
-static int __init dmi_checksum(const u8 *buf)
+static int __init dmi_checksum(const u8 *buf, u8 len)
 {
 	u8 sum = 0;
 	int a;
 
-	for (a = 0; a < 15; a++)
+	for (a = 0; a < len; a++)
 		sum += buf[a];
 
 	return sum == 0;
@@ -161,8 +162,10 @@
 		return;
 
 	for (i = 0; i < 16 && (is_ff || is_00); i++) {
-		if(d[i] != 0x00) is_ff = 0;
-		if(d[i] != 0xFF) is_00 = 0;
+		if (d[i] != 0x00)
+			is_00 = 0;
+		if (d[i] != 0xFF)
+			is_ff = 0;
 	}
 
 	if (is_ff || is_00)
@@ -172,7 +175,15 @@
 	if (!s)
 		return;
 
-	sprintf(s, "%pUB", d);
+	/*
+	 * As of version 2.6 of the SMBIOS specification, the first 3 fields of
+	 * the UUID are supposed to be little-endian encoded.  The specification
+	 * says that this is the defacto standard.
+	 */
+	if (dmi_ver >= 0x0206)
+		sprintf(s, "%pUL", d);
+	else
+		sprintf(s, "%pUB", d);
 
         dmi_ident[slot] = s;
 }
@@ -404,29 +415,57 @@
 	u8 buf[15];
 
 	memcpy_fromio(buf, p, 15);
-	if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
+	if (dmi_checksum(buf, 15)) {
 		dmi_num = (buf[13] << 8) | buf[12];
 		dmi_len = (buf[7] << 8) | buf[6];
 		dmi_base = (buf[11] << 24) | (buf[10] << 16) |
 			(buf[9] << 8) | buf[8];
 
-		/*
-		 * DMI version 0.0 means that the real version is taken from
-		 * the SMBIOS version, which we don't know at this point.
-		 */
-		if (buf[14] != 0)
-			printk(KERN_INFO "DMI %d.%d present.\n",
-			       buf[14] >> 4, buf[14] & 0xF);
-		else
-			printk(KERN_INFO "DMI present.\n");
 		if (dmi_walk_early(dmi_decode) == 0) {
+			if (dmi_ver)
+				pr_info("SMBIOS %d.%d present.\n",
+				       dmi_ver >> 8, dmi_ver & 0xFF);
+			else {
+				dmi_ver = (buf[14] & 0xF0) << 4 |
+					   (buf[14] & 0x0F);
+				pr_info("Legacy DMI %d.%d present.\n",
+				       dmi_ver >> 8, dmi_ver & 0xFF);
+			}
 			dmi_dump_ids();
 			return 0;
 		}
 	}
+	dmi_ver = 0;
 	return 1;
 }
 
+static int __init smbios_present(const char __iomem *p)
+{
+	u8 buf[32];
+	int offset = 0;
+
+	memcpy_fromio(buf, p, 32);
+	if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) {
+		dmi_ver = (buf[6] << 8) + buf[7];
+
+		/* Some BIOS report weird SMBIOS version, fix that up */
+		switch (dmi_ver) {
+		case 0x021F:
+		case 0x0221:
+			pr_debug("SMBIOS version fixup(2.%d->2.%d)\n",
+			       dmi_ver & 0xFF, 3);
+			dmi_ver = 0x0203;
+			break;
+		case 0x0233:
+			pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6);
+			dmi_ver = 0x0206;
+			break;
+		}
+		offset = 16;
+	}
+	return dmi_present(buf + offset);
+}
+
 void __init dmi_scan_machine(void)
 {
 	char __iomem *p, *q;
@@ -444,7 +483,7 @@
 		if (p == NULL)
 			goto error;
 
-		rc = dmi_present(p + 0x10); /* offset of _DMI_ string */
+		rc = smbios_present(p);
 		dmi_iounmap(p, 32);
 		if (!rc) {
 			dmi_available = 1;
@@ -462,7 +501,12 @@
 			goto error;
 
 		for (q = p; q < p + 0x10000; q += 16) {
-			rc = dmi_present(q);
+			if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0)
+				rc = smbios_present(q);
+			else if (memcmp(q, "_DMI_", 5) == 0)
+				rc = dmi_present(q);
+			else
+				continue;
 			if (!rc) {
 				dmi_available = 1;
 				dmi_iounmap(p, 0x10000);

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 52c5d89..7b1c374 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c

@@ -883,7 +883,6 @@
 
 	if (inode) {
 		inode->i_ino = get_next_ino();
-		inode->i_uid = inode->i_gid = 0;
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index bf892bd..682de75 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig

@@ -172,6 +172,7 @@
 config GPIO_MVEBU
 	def_bool y
 	depends on PLAT_ORION
+	depends on OF
 	select GPIO_GENERIC
 	select GENERIC_IRQ_CHIP
 
@@ -683,4 +684,17 @@
 	  Enable support for GPIO on intel MSIC controllers found in
 	  intel MID devices
 
+comment "USB GPIO expanders:"
+
+config GPIO_VIPERBOARD
+	tristate "Viperboard GPIO a & b support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the GPIO signals of Nano River
+	  Technologies Viperboard. There are two GPIO chips on the
+	  board: gpioa and gpiob.
+          See viperboard API specification and Nano
+          River Tech's viperboard.h for detailed meaning
+          of the module parameters.
+
 endif

diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 76b3446..c5aebd0 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile

@@ -76,6 +76,7 @@
 obj-$(CONFIG_GPIO_TWL4030)	+= gpio-twl4030.o
 obj-$(CONFIG_GPIO_TWL6040)	+= gpio-twl6040.o
 obj-$(CONFIG_GPIO_UCB1400)	+= gpio-ucb1400.o
+obj-$(CONFIG_GPIO_VIPERBOARD)	+= gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)	+= gpio-vr41xx.o
 obj-$(CONFIG_GPIO_VT8500)	+= gpio-vt8500.o
 obj-$(CONFIG_GPIO_VX855)	+= gpio-vx855.o

diff --git a/drivers/gpio/gpio-da9052.c b/drivers/gpio/gpio-da9052.c
index a05aacd..29b11e9 100644
--- a/drivers/gpio/gpio-da9052.c
+++ b/drivers/gpio/gpio-da9052.c

@@ -185,7 +185,11 @@
 	struct da9052_gpio *gpio = to_da9052_gpio(gc);
 	struct da9052 *da9052 = gpio->da9052;
 
-	return da9052->irq_base + DA9052_IRQ_GPI0 + offset;
+	int irq;
+
+	irq = regmap_irq_get_virq(da9052->irq_data, DA9052_IRQ_GPI0 + offset);
+
+	return irq;
 }
 
 static struct gpio_chip reference_gp = {

diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c
index 6cc87ac..6f2306d 100644
--- a/drivers/gpio/gpio-ich.c
+++ b/drivers/gpio/gpio-ich.c

@@ -390,6 +390,7 @@
 		return -ENODEV;
 	}
 
+	spin_lock_init(&ichx_priv.lock);
 	res_base = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_GPIO);
 	ichx_priv.use_gpio = ich_info->use_gpio;
 	err = ichx_gpio_request_regions(res_base, pdev->name,

diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index d767b53..7d9bd94 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c

@@ -41,7 +41,6 @@
 #include <linux/io.h>
 #include <linux/of_irq.h>
 #include <linux/of_device.h>
-#include <linux/platform_device.h>
 #include <linux/pinctrl/consumer.h>
 
 /*
@@ -469,19 +468,6 @@
 	}
 }
 
-static struct platform_device_id mvebu_gpio_ids[] = {
-	{
-		.name = "orion-gpio",
-	}, {
-		.name = "mv78200-gpio",
-	}, {
-		.name = "armadaxp-gpio",
-	}, {
-		/* sentinel */
-	},
-};
-MODULE_DEVICE_TABLE(platform, mvebu_gpio_ids);
-
 static struct of_device_id mvebu_gpio_of_match[] = {
 	{
 		.compatible = "marvell,orion-gpio",
@@ -555,9 +541,7 @@
 	mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK;
 	mvchip->chip.ngpio = ngpios;
 	mvchip->chip.can_sleep = 0;
-#ifdef CONFIG_OF
 	mvchip->chip.of_node = np;
-#endif
 
 	spin_lock_init(&mvchip->lock);
 	mvchip->membase = devm_request_and_ioremap(&pdev->dev, res);
@@ -698,7 +682,6 @@
 		.of_match_table = mvebu_gpio_of_match,
 	},
 	.probe		= mvebu_gpio_probe,
-	.id_table	= mvebu_gpio_ids,
 };
 
 static int __init mvebu_gpio_init(void)

diff --git a/drivers/gpio/gpio-tps6586x.c b/drivers/gpio/gpio-tps6586x.c
index c1b82da..29e8e75 100644
--- a/drivers/gpio/gpio-tps6586x.c
+++ b/drivers/gpio/gpio-tps6586x.c

@@ -80,6 +80,14 @@
 				val, mask);
 }
 
+static int tps6586x_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
+{
+	struct tps6586x_gpio *tps6586x_gpio = to_tps6586x_gpio(gc);
+
+	return tps6586x_irq_get_virq(tps6586x_gpio->parent,
+				TPS6586X_INT_PLDO_0 + offset);
+}
+
 static int tps6586x_gpio_probe(struct platform_device *pdev)
 {
 	struct tps6586x_platform_data *pdata;
@@ -106,6 +114,7 @@
 	tps6586x_gpio->gpio_chip.direction_output = tps6586x_gpio_output;
 	tps6586x_gpio->gpio_chip.set	= tps6586x_gpio_set;
 	tps6586x_gpio->gpio_chip.get	= tps6586x_gpio_get;
+	tps6586x_gpio->gpio_chip.to_irq	= tps6586x_gpio_to_irq;
 
 #ifdef CONFIG_OF_GPIO
 	tps6586x_gpio->gpio_chip.of_node = pdev->dev.parent->of_node;

diff --git a/drivers/gpio/gpio-twl4030.c b/drivers/gpio/gpio-twl4030.c
index 00329f2..9572aa1 100644
--- a/drivers/gpio/gpio-twl4030.c
+++ b/drivers/gpio/gpio-twl4030.c

@@ -355,13 +355,13 @@
 
 static int gpio_twl4030_pulls(u32 ups, u32 downs)
 {
-	u8		message[6];
+	u8		message[5];
 	unsigned	i, gpio_bit;
 
 	/* For most pins, a pulldown was enabled by default.
 	 * We should have data that's specific to this board.
 	 */
-	for (gpio_bit = 1, i = 1; i < 6; i++) {
+	for (gpio_bit = 1, i = 0; i < 5; i++) {
 		u8		bit_mask;
 		unsigned	j;
 
@@ -380,16 +380,16 @@
 
 static int gpio_twl4030_debounce(u32 debounce, u8 mmc_cd)
 {
-	u8		message[4];
+	u8		message[3];
 
 	/* 30 msec of debouncing is always used for MMC card detect,
 	 * and is optional for everything else.
 	 */
-	message[1] = (debounce & 0xff) | (mmc_cd & 0x03);
+	message[0] = (debounce & 0xff) | (mmc_cd & 0x03);
 	debounce >>= 8;
-	message[2] = (debounce & 0xff);
+	message[1] = (debounce & 0xff);
 	debounce >>= 8;
-	message[3] = (debounce & 0x03);
+	message[2] = (debounce & 0x03);
 
 	return twl_i2c_write(TWL4030_MODULE_GPIO, message,
 				REG_GPIO_DEBEN1, 3);

diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c
new file mode 100644
index 0000000..1377299
--- /dev/null
+++ b/drivers/gpio/gpio-viperboard.c

@@ -0,0 +1,517 @@
+/*
+ *  Nano River Technologies viperboard GPIO lib driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/gpio.h>
+
+#include <linux/mfd/viperboard.h>
+
+#define VPRBRD_GPIOA_CLK_1MHZ		0
+#define VPRBRD_GPIOA_CLK_100KHZ		1
+#define VPRBRD_GPIOA_CLK_10KHZ		2
+#define VPRBRD_GPIOA_CLK_1KHZ		3
+#define VPRBRD_GPIOA_CLK_100HZ		4
+#define VPRBRD_GPIOA_CLK_10HZ		5
+
+#define VPRBRD_GPIOA_FREQ_DEFAULT	1000
+
+#define VPRBRD_GPIOA_CMD_CONT		0x00
+#define VPRBRD_GPIOA_CMD_PULSE		0x01
+#define VPRBRD_GPIOA_CMD_PWM		0x02
+#define VPRBRD_GPIOA_CMD_SETOUT		0x03
+#define VPRBRD_GPIOA_CMD_SETIN		0x04
+#define VPRBRD_GPIOA_CMD_SETINT		0x05
+#define VPRBRD_GPIOA_CMD_GETIN		0x06
+
+#define VPRBRD_GPIOB_CMD_SETDIR		0x00
+#define VPRBRD_GPIOB_CMD_SETVAL		0x01
+
+struct vprbrd_gpioa_msg {
+	u8 cmd;
+	u8 clk;
+	u8 offset;
+	u8 t1;
+	u8 t2;
+	u8 invert;
+	u8 pwmlevel;
+	u8 outval;
+	u8 risefall;
+	u8 answer;
+	u8 __fill;
+} __packed;
+
+struct vprbrd_gpiob_msg {
+	u8 cmd;
+	u16 val;
+	u16 mask;
+} __packed;
+
+struct vprbrd_gpio {
+	struct gpio_chip gpioa; /* gpio a related things */
+	u32 gpioa_out;
+	u32 gpioa_val;
+	struct gpio_chip gpiob; /* gpio b related things */
+	u32 gpiob_out;
+	u32 gpiob_val;
+	struct vprbrd *vb;
+};
+
+/* gpioa sampling clock module parameter */
+static unsigned char gpioa_clk;
+static unsigned int gpioa_freq = VPRBRD_GPIOA_FREQ_DEFAULT;
+module_param(gpioa_freq, uint, 0);
+MODULE_PARM_DESC(gpioa_freq,
+	"gpio-a sampling freq in Hz (default is 1000Hz) valid values: 10, 100, 1000, 10000, 100000, 1000000");
+
+/* ----- begin of gipo a chip -------------------------------------------- */
+
+static int vprbrd_gpioa_get(struct gpio_chip *chip,
+		unsigned offset)
+{
+	int ret, answer, error = 0;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	/* if io is set to output, just return the saved value */
+	if (gpio->gpioa_out & (1 << offset))
+		return gpio->gpioa_val & (1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_GETIN;
+	gamsg->clk = 0x00;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = 0x00;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		error = -EREMOTEIO;
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_IN, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	answer = gamsg->answer & 0x01;
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		error = -EREMOTEIO;
+
+	if (error)
+		return error;
+
+	return answer;
+}
+
+static void vprbrd_gpioa_set(struct gpio_chip *chip,
+		unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	if (gpio->gpioa_out & (1 << offset)) {
+		if (value)
+			gpio->gpioa_val |= (1 << offset);
+		else
+			gpio->gpioa_val &= ~(1 << offset);
+
+		mutex_lock(&vb->lock);
+
+		gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT;
+		gamsg->clk = 0x00;
+		gamsg->offset = offset;
+		gamsg->t1 = 0x00;
+		gamsg->t2 = 0x00;
+		gamsg->invert = 0x00;
+		gamsg->pwmlevel = 0x00;
+		gamsg->outval = value;
+		gamsg->risefall = 0x00;
+		gamsg->answer = 0x00;
+		gamsg->__fill = 0x00;
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0),
+			VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT,
+			0x0000,	0x0000, gamsg,
+			sizeof(struct vprbrd_gpioa_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_gpioa_msg))
+			dev_err(chip->dev, "usb error setting pin value\n");
+	}
+}
+
+static int vprbrd_gpioa_direction_input(struct gpio_chip *chip,
+			unsigned offset)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	gpio->gpioa_out &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_SETIN;
+	gamsg->clk = gpioa_clk;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = 0x00;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static int vprbrd_gpioa_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	gpio->gpioa_out |= (1 << offset);
+	if (value)
+		gpio->gpioa_val |= (1 << offset);
+	else
+		gpio->gpioa_val &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT;
+	gamsg->clk = 0x00;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = value;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+/* ----- end of gpio a chip ---------------------------------------------- */
+
+/* ----- begin of gipo b chip -------------------------------------------- */
+
+static int vprbrd_gpiob_setdir(struct vprbrd *vb, unsigned offset,
+	unsigned dir)
+{
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+	int ret;
+
+	gbmsg->cmd = VPRBRD_GPIOB_CMD_SETDIR;
+	gbmsg->val = cpu_to_be16(dir << offset);
+	gbmsg->mask = cpu_to_be16(0x0001 << offset);
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gbmsg, sizeof(struct vprbrd_gpiob_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if (ret != sizeof(struct vprbrd_gpiob_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static int vprbrd_gpiob_get(struct gpio_chip *chip,
+		unsigned offset)
+{
+	int ret;
+	u16 val;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+
+	/* if io is set to output, just return the saved value */
+	if (gpio->gpiob_out & (1 << offset))
+		return gpio->gpiob_val & (1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_IN, 0x0000,
+		0x0000, gbmsg,	sizeof(struct vprbrd_gpiob_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	val = gbmsg->val;
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpiob_msg))
+		return ret;
+
+	/* cache the read values */
+	gpio->gpiob_val = be16_to_cpu(val);
+
+	return (gpio->gpiob_val >> offset) & 0x1;
+}
+
+static void vprbrd_gpiob_set(struct gpio_chip *chip,
+		unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+
+	if (gpio->gpiob_out & (1 << offset)) {
+		if (value)
+			gpio->gpiob_val |= (1 << offset);
+		else
+			gpio->gpiob_val &= ~(1 << offset);
+
+		mutex_lock(&vb->lock);
+
+		gbmsg->cmd = VPRBRD_GPIOB_CMD_SETVAL;
+		gbmsg->val = cpu_to_be16(value << offset);
+		gbmsg->mask = cpu_to_be16(0x0001 << offset);
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0),
+			VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT,
+			0x0000,	0x0000, gbmsg,
+			sizeof(struct vprbrd_gpiob_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_gpiob_msg))
+			dev_err(chip->dev, "usb error setting pin value\n");
+	}
+}
+
+static int vprbrd_gpiob_direction_input(struct gpio_chip *chip,
+			unsigned offset)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+
+	gpio->gpiob_out &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = vprbrd_gpiob_setdir(vb, offset, 0);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret)
+		dev_err(chip->dev, "usb error setting pin to input\n");
+
+	return ret;
+}
+
+static int vprbrd_gpiob_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+
+	gpio->gpiob_out |= (1 << offset);
+	if (value)
+		gpio->gpiob_val |= (1 << offset);
+	else
+		gpio->gpiob_val &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = vprbrd_gpiob_setdir(vb, offset, 1);
+	if (ret)
+		dev_err(chip->dev, "usb error setting pin to output\n");
+
+	mutex_unlock(&vb->lock);
+
+	vprbrd_gpiob_set(chip, offset, value);
+
+	return ret;
+}
+
+/* ----- end of gpio b chip ---------------------------------------------- */
+
+static int __devinit vprbrd_gpio_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_gpio *vb_gpio;
+	int ret;
+
+	vb_gpio = devm_kzalloc(&pdev->dev, sizeof(*vb_gpio), GFP_KERNEL);
+	if (vb_gpio == NULL)
+		return -ENOMEM;
+
+	vb_gpio->vb = vb;
+	/* registering gpio a */
+	vb_gpio->gpioa.label = "viperboard gpio a";
+	vb_gpio->gpioa.dev = &pdev->dev;
+	vb_gpio->gpioa.owner = THIS_MODULE;
+	vb_gpio->gpioa.base = -1;
+	vb_gpio->gpioa.ngpio = 16;
+	vb_gpio->gpioa.can_sleep = 1;
+	vb_gpio->gpioa.set = vprbrd_gpioa_set;
+	vb_gpio->gpioa.get = vprbrd_gpioa_get;
+	vb_gpio->gpioa.direction_input = vprbrd_gpioa_direction_input;
+	vb_gpio->gpioa.direction_output = vprbrd_gpioa_direction_output;
+	ret = gpiochip_add(&vb_gpio->gpioa);
+	if (ret < 0) {
+		dev_err(vb_gpio->gpioa.dev, "could not add gpio a");
+		goto err_gpioa;
+	}
+
+	/* registering gpio b */
+	vb_gpio->gpiob.label = "viperboard gpio b";
+	vb_gpio->gpiob.dev = &pdev->dev;
+	vb_gpio->gpiob.owner = THIS_MODULE;
+	vb_gpio->gpiob.base = -1;
+	vb_gpio->gpiob.ngpio = 16;
+	vb_gpio->gpiob.can_sleep = 1;
+	vb_gpio->gpiob.set = vprbrd_gpiob_set;
+	vb_gpio->gpiob.get = vprbrd_gpiob_get;
+	vb_gpio->gpiob.direction_input = vprbrd_gpiob_direction_input;
+	vb_gpio->gpiob.direction_output = vprbrd_gpiob_direction_output;
+	ret = gpiochip_add(&vb_gpio->gpiob);
+	if (ret < 0) {
+		dev_err(vb_gpio->gpiob.dev, "could not add gpio b");
+		goto err_gpiob;
+	}
+
+	platform_set_drvdata(pdev, vb_gpio);
+
+	return ret;
+
+err_gpiob:
+	ret = gpiochip_remove(&vb_gpio->gpioa);
+
+err_gpioa:
+	return ret;
+}
+
+static int __devexit vprbrd_gpio_remove(struct platform_device *pdev)
+{
+	struct vprbrd_gpio *vb_gpio = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&vb_gpio->gpiob);
+	if (ret == 0)
+		ret = gpiochip_remove(&vb_gpio->gpioa);
+
+	return ret;
+}
+
+static struct platform_driver vprbrd_gpio_driver = {
+	.driver.name	= "viperboard-gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= vprbrd_gpio_probe,
+	.remove		= __devexit_p(vprbrd_gpio_remove),
+};
+
+static int __init vprbrd_gpio_init(void)
+{
+	switch (gpioa_freq) {
+	case 1000000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_1MHZ;
+		break;
+	case 100000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_100KHZ;
+		break;
+	case 10000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_10KHZ;
+		break;
+	case 1000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ;
+		break;
+	case 100:
+		gpioa_clk = VPRBRD_GPIOA_CLK_100HZ;
+		break;
+	case 10:
+		gpioa_clk = VPRBRD_GPIOA_CLK_10HZ;
+		break;
+	default:
+		pr_warn("invalid gpioa_freq (%d)\n", gpioa_freq);
+		gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ;
+	}
+
+	return platform_driver_register(&vprbrd_gpio_driver);
+}
+subsys_initcall(vprbrd_gpio_init);
+
+static void __exit vprbrd_gpio_exit(void)
+{
+	platform_driver_unregister(&vprbrd_gpio_driver);
+}
+module_exit(vprbrd_gpio_exit);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("GPIO driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-gpio");

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 18321b68b..983201b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig

@@ -210,3 +210,5 @@
 source "drivers/gpu/drm/cirrus/Kconfig"
 
 source "drivers/gpu/drm/shmobile/Kconfig"
+
+source "drivers/gpu/drm/tegra/Kconfig"

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 2ff5cef..6f58c81 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile

@@ -8,7 +8,7 @@
 		drm_context.o drm_dma.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
-		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
+		drm_agpsupport.o drm_scatter.o drm_pci.o \
 		drm_platform.o drm_sysfs.o drm_hashtab.o drm_mm.o \
 		drm_crtc.o drm_modes.o drm_edid.o \
 		drm_info.o drm_debugfs.o drm_encoder_slave.o \
@@ -16,10 +16,11 @@
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
+drm-$(CONFIG_PCI) += ati_pcigart.o
 
 drm-usb-y   := drm_usb.o
 
-drm_kms_helper-y := drm_fb_helper.o drm_crtc_helper.o drm_dp_i2c_helper.o
+drm_kms_helper-y := drm_fb_helper.o drm_crtc_helper.o drm_dp_helper.o
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
 
@@ -48,4 +49,5 @@
 obj-$(CONFIG_DRM_UDL) += udl/
 obj-$(CONFIG_DRM_AST) += ast/
 obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
+obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-y			+= i2c/

diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 1a026ac..3602731 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c

@@ -186,11 +186,11 @@
 
 static int ast_bo_move(struct ttm_buffer_object *bo,
 		       bool evict, bool interruptible,
-		       bool no_wait_reserve, bool no_wait_gpu,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	return r;
 }
 
@@ -356,7 +356,7 @@
 
 	ret = ttm_bo_init(&ast->ttm.bdev, &astbo->bo, size,
 			  ttm_bo_type_device, &astbo->placement,
-			  align >> PAGE_SHIFT, 0, false, NULL, acc_size,
+			  align >> PAGE_SHIFT, false, NULL, acc_size,
 			  NULL, ast_bo_ttm_destroy);
 	if (ret)
 		return ret;
@@ -383,7 +383,7 @@
 	ast_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -406,7 +406,7 @@
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -431,7 +431,7 @@
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
 		DRM_ERROR("pushing to VRAM failed\n");
 		return ret;

diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index 101e423..dcd1a8c 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c

@@ -35,12 +35,15 @@
 };
 
 
-static void cirrus_kick_out_firmware_fb(struct pci_dev *pdev)
+static int cirrus_kick_out_firmware_fb(struct pci_dev *pdev)
 {
 	struct apertures_struct *ap;
 	bool primary = false;
 
 	ap = alloc_apertures(1);
+	if (!ap)
+		return -ENOMEM;
+
 	ap->ranges[0].base = pci_resource_start(pdev, 0);
 	ap->ranges[0].size = pci_resource_len(pdev, 0);
 
@@ -49,12 +52,18 @@
 #endif
 	remove_conflicting_framebuffers(ap, "cirrusdrmfb", primary);
 	kfree(ap);
+
+	return 0;
 }
 
 static int __devinit
 cirrus_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	cirrus_kick_out_firmware_fb(pdev);
+	int ret;
+
+	ret = cirrus_kick_out_firmware_fb(pdev);
+	if (ret)
+		return ret;
 
 	return drm_get_pci_dev(pdev, ent, &driver);
 }

diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index bc83f83..1413a26 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c

@@ -186,11 +186,11 @@
 
 static int cirrus_bo_move(struct ttm_buffer_object *bo,
 		       bool evict, bool interruptible,
-		       bool no_wait_reserve, bool no_wait_gpu,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	return r;
 }
 
@@ -361,7 +361,7 @@
 
 	ret = ttm_bo_init(&cirrus->ttm.bdev, &cirrusbo->bo, size,
 			  ttm_bo_type_device, &cirrusbo->placement,
-			  align >> PAGE_SHIFT, 0, false, NULL, acc_size,
+			  align >> PAGE_SHIFT, false, NULL, acc_size,
 			  NULL, cirrus_bo_ttm_destroy);
 	if (ret)
 		return ret;
@@ -388,7 +388,7 @@
 	cirrus_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -411,7 +411,7 @@
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -436,7 +436,7 @@
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
 		DRM_ERROR("pushing to VRAM failed\n");
 		return ret;

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ef1b221..f2d667b 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c

@@ -470,10 +470,8 @@
 {
 	struct drm_device *dev = crtc->dev;
 
-	if (crtc->gamma_store) {
-		kfree(crtc->gamma_store);
-		crtc->gamma_store = NULL;
-	}
+	kfree(crtc->gamma_store);
+	crtc->gamma_store = NULL;
 
 	drm_mode_object_put(dev, &crtc->base);
 	list_del(&crtc->head);
@@ -555,16 +553,17 @@
 	INIT_LIST_HEAD(&connector->probed_modes);
 	INIT_LIST_HEAD(&connector->modes);
 	connector->edid_blob_ptr = NULL;
+	connector->status = connector_status_unknown;
 
 	list_add_tail(&connector->head, &dev->mode_config.connector_list);
 	dev->mode_config.num_connector++;
 
 	if (connector_type != DRM_MODE_CONNECTOR_VIRTUAL)
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      dev->mode_config.edid_property,
 					      0);
 
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.dpms_property, 0);
 
  out:
@@ -2280,13 +2279,21 @@
 
 	for (i = 0; i < num_planes; i++) {
 		unsigned int width = r->width / (i != 0 ? hsub : 1);
+		unsigned int height = r->height / (i != 0 ? vsub : 1);
+		unsigned int cpp = drm_format_plane_cpp(r->pixel_format, i);
 
 		if (!r->handles[i]) {
 			DRM_DEBUG_KMS("no buffer object handle for plane %d\n", i);
 			return -EINVAL;
 		}
 
-		if (r->pitches[i] < drm_format_plane_cpp(r->pixel_format, i) * width) {
+		if ((uint64_t) width * cpp > UINT_MAX)
+			return -ERANGE;
+
+		if ((uint64_t) height * r->pitches[i] + r->offsets[i] > UINT_MAX)
+			return -ERANGE;
+
+		if (r->pitches[i] < width * cpp) {
 			DRM_DEBUG_KMS("bad pitch %u for plane %d\n", r->pitches[i], i);
 			return -EINVAL;
 		}
@@ -2323,6 +2330,11 @@
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
 
+	if (r->flags & ~DRM_MODE_FB_INTERLACED) {
+		DRM_DEBUG_KMS("bad framebuffer flags 0x%08x\n", r->flags);
+		return -EINVAL;
+	}
+
 	if ((config->min_width > r->width) || (r->width > config->max_width)) {
 		DRM_DEBUG_KMS("bad framebuffer width %d, should be >= %d && <= %d\n",
 			  r->width, config->min_width, config->max_width);
@@ -2916,27 +2928,6 @@
 }
 EXPORT_SYMBOL(drm_property_destroy);
 
-void drm_connector_attach_property(struct drm_connector *connector,
-			       struct drm_property *property, uint64_t init_val)
-{
-	drm_object_attach_property(&connector->base, property, init_val);
-}
-EXPORT_SYMBOL(drm_connector_attach_property);
-
-int drm_connector_property_set_value(struct drm_connector *connector,
-				  struct drm_property *property, uint64_t value)
-{
-	return drm_object_property_set_value(&connector->base, property, value);
-}
-EXPORT_SYMBOL(drm_connector_property_set_value);
-
-int drm_connector_property_get_value(struct drm_connector *connector,
-				  struct drm_property *property, uint64_t *val)
-{
-	return drm_object_property_get_value(&connector->base, property, val);
-}
-EXPORT_SYMBOL(drm_connector_property_get_value);
-
 void drm_object_attach_property(struct drm_mode_object *obj,
 				struct drm_property *property,
 				uint64_t init_val)
@@ -3173,15 +3164,17 @@
 	/* Delete edid, when there is none. */
 	if (!edid) {
 		connector->edid_blob_ptr = NULL;
-		ret = drm_connector_property_set_value(connector, dev->mode_config.edid_property, 0);
+		ret = drm_object_property_set_value(&connector->base, dev->mode_config.edid_property, 0);
 		return ret;
 	}
 
 	size = EDID_LENGTH * (1 + edid->extensions);
 	connector->edid_blob_ptr = drm_property_create_blob(connector->dev,
 							    size, edid);
+	if (!connector->edid_blob_ptr)
+		return -EINVAL;
 
-	ret = drm_connector_property_set_value(connector,
+	ret = drm_object_property_set_value(&connector->base,
 					       dev->mode_config.edid_property,
 					       connector->edid_blob_ptr->base.id);
 
@@ -3204,6 +3197,9 @@
 		for (i = 0; i < property->num_values; i++)
 			valid_mask |= (1ULL << property->values[i]);
 		return !(value & ~valid_mask);
+	} else if (property->flags & DRM_MODE_PROP_BLOB) {
+		/* Only the driver knows */
+		return true;
 	} else {
 		int i;
 		for (i = 0; i < property->num_values; i++)
@@ -3245,7 +3241,7 @@
 
 	/* store the property value if successful */
 	if (!ret)
-		drm_connector_property_set_value(connector, property, value);
+		drm_object_property_set_value(&connector->base, property, value);
 	return ret;
 }
 
@@ -3656,9 +3652,12 @@
 		if (encoder->funcs->reset)
 			encoder->funcs->reset(encoder);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		connector->status = connector_status_unknown;
+
 		if (connector->funcs->reset)
 			connector->funcs->reset(connector);
+	}
 }
 EXPORT_SYMBOL(drm_mode_config_reset);
 

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 1227adf..7b2d378 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c

@@ -39,6 +39,35 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_edid.h>
 
+/**
+ * drm_helper_move_panel_connectors_to_head() - move panels to the front in the
+ * 						connector list
+ * @dev: drm device to operate on
+ *
+ * Some userspace presumes that the first connected connector is the main
+ * display, where it's supposed to display e.g. the login screen. For
+ * laptops, this should be the main panel. Use this function to sort all
+ * (eDP/LVDS) panels to the front of the connector list, instead of
+ * painstakingly trying to initialize them in the right order.
+ */
+void drm_helper_move_panel_connectors_to_head(struct drm_device *dev)
+{
+	struct drm_connector *connector, *tmp;
+	struct list_head panel_list;
+
+	INIT_LIST_HEAD(&panel_list);
+
+	list_for_each_entry_safe(connector, tmp,
+				 &dev->mode_config.connector_list, head) {
+		if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS ||
+		    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+			list_move_tail(&connector->head, &panel_list);
+	}
+
+	list_splice(&panel_list, &dev->mode_config.connector_list);
+}
+EXPORT_SYMBOL(drm_helper_move_panel_connectors_to_head);
+
 static bool drm_kms_helper_poll = true;
 module_param_named(poll, drm_kms_helper_poll, bool, 0600);
 
@@ -64,22 +93,21 @@
 
 /**
  * drm_helper_probe_single_connector_modes - get complete set of display modes
- * @dev: DRM device
+ * @connector: connector to probe
  * @maxX: max width for modes
  * @maxY: max height for modes
  *
  * LOCKING:
  * Caller must hold mode config lock.
  *
- * Based on @dev's mode_config layout, scan all the connectors and try to detect
- * modes on them.  Modes will first be added to the connector's probed_modes
- * list, then culled (based on validity and the @maxX, @maxY parameters) and
- * put into the normal modes list.
+ * Based on the helper callbacks implemented by @connector try to detect all
+ * valid modes.  Modes will first be added to the connector's probed_modes list,
+ * then culled (based on validity and the @maxX, @maxY parameters) and put into
+ * the normal modes list.
  *
- * Intended to be used either at bootup time or when major configuration
- * changes have occurred.
- *
- * FIXME: take into account monitor limits
+ * Intended to be use as a generic implementation of the ->probe() @connector
+ * callback for drivers that use the crtc helpers for output mode filtering and
+ * detection.
  *
  * RETURNS:
  * Number of modes found on @connector.
@@ -109,9 +137,14 @@
 			connector->funcs->force(connector);
 	} else {
 		connector->status = connector->funcs->detect(connector, true);
-		drm_kms_helper_poll_enable(dev);
 	}
 
+	/* Re-enable polling in case the global poll config changed. */
+	if (drm_kms_helper_poll != dev->mode_config.poll_running)
+		drm_kms_helper_poll_enable(dev);
+
+	dev->mode_config.poll_running = drm_kms_helper_poll;
+
 	if (connector->status == connector_status_disconnected) {
 		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] disconnected\n",
 			connector->base.id, drm_get_connector_name(connector));
@@ -325,17 +358,24 @@
 }
 
 /**
- * drm_crtc_set_mode - set a mode
+ * drm_crtc_helper_set_mode - internal helper to set a mode
  * @crtc: CRTC to program
  * @mode: mode to use
- * @x: width of mode
- * @y: height of mode
+ * @x: horizontal offset into the surface
+ * @y: vertical offset into the surface
+ * @old_fb: old framebuffer, for cleanup
  *
  * LOCKING:
  * Caller must hold mode config lock.
  *
  * Try to set @mode on @crtc.  Give @crtc and its associated connectors a chance
- * to fixup or reject the mode prior to trying to set it.
+ * to fixup or reject the mode prior to trying to set it. This is an internal
+ * helper that drivers could e.g. use to update properties that require the
+ * entire output pipe to be disabled and re-enabled in a new configuration. For
+ * example for changing whether audio is enabled on a hdmi link or for changing
+ * panel fitter or dither attributes. It is also called by the
+ * drm_crtc_helper_set_config() helper function to drive the mode setting
+ * sequence.
  *
  * RETURNS:
  * True if the mode was set successfully, or false otherwise.
@@ -491,20 +531,19 @@
 
 /**
  * drm_crtc_helper_set_config - set a new config from userspace
- * @crtc: CRTC to setup
- * @crtc_info: user provided configuration
- * @new_mode: new mode to set
- * @connector_set: set of connectors for the new config
- * @fb: new framebuffer
+ * @set: mode set configuration
  *
  * LOCKING:
  * Caller must hold mode config lock.
  *
- * Setup a new configuration, provided by the user in @crtc_info, and enable
- * it.
+ * Setup a new configuration, provided by the upper layers (either an ioctl call
+ * from userspace or internally e.g. from the fbdev suppport code) in @set, and
+ * enable it. This is the main helper functions for drivers that implement
+ * kernel mode setting with the crtc helper functions and the assorted
+ * ->prepare(), ->modeset() and ->commit() helper callbacks.
  *
  * RETURNS:
- * Zero. (FIXME)
+ * Returns 0 on success, -ERRNO on failure.
  */
 int drm_crtc_helper_set_config(struct drm_mode_set *set)
 {
@@ -800,12 +839,14 @@
 }
 
 /**
- * drm_helper_connector_dpms
- * @connector affected connector
- * @mode DPMS mode
+ * drm_helper_connector_dpms() - connector dpms helper implementation
+ * @connector: affected connector
+ * @mode: DPMS mode
  *
- * Calls the low-level connector DPMS function, then
- * calls appropriate encoder and crtc DPMS functions as well
+ * This is the main helper function provided by the crtc helper framework for
+ * implementing the DPMS connector attribute. It computes the new desired DPMS
+ * state for all encoders and crtcs in the output mesh and calls the ->dpms()
+ * callback provided by the driver appropriately.
  */
 void drm_helper_connector_dpms(struct drm_connector *connector, int mode)
 {
@@ -918,6 +959,15 @@
 }
 EXPORT_SYMBOL(drm_helper_resume_force_mode);
 
+void drm_kms_helper_hotplug_event(struct drm_device *dev)
+{
+	/* send a uevent + call fbdev */
+	drm_sysfs_hotplug_event(dev);
+	if (dev->mode_config.funcs->output_poll_changed)
+		dev->mode_config.funcs->output_poll_changed(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_hotplug_event);
+
 #define DRM_OUTPUT_POLL_PERIOD (10*HZ)
 static void output_poll_execute(struct work_struct *work)
 {
@@ -933,20 +983,22 @@
 	mutex_lock(&dev->mode_config.mutex);
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 
-		/* if this is HPD or polled don't check it -
-		   TV out for instance */
-		if (!connector->polled)
+		/* Ignore forced connectors. */
+		if (connector->force)
 			continue;
 
-		else if (connector->polled & (DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT))
-			repoll = true;
+		/* Ignore HPD capable connectors and connectors where we don't
+		 * want any hotplug detection at all for polling. */
+		if (!connector->polled || connector->polled == DRM_CONNECTOR_POLL_HPD)
+			continue;
+
+		repoll = true;
 
 		old_status = connector->status;
 		/* if we are connected and don't want to poll for disconnect
 		   skip it */
 		if (old_status == connector_status_connected &&
-		    !(connector->polled & DRM_CONNECTOR_POLL_DISCONNECT) &&
-		    !(connector->polled & DRM_CONNECTOR_POLL_HPD))
+		    !(connector->polled & DRM_CONNECTOR_POLL_DISCONNECT))
 			continue;
 
 		connector->status = connector->funcs->detect(connector, false);
@@ -960,12 +1012,8 @@
 
 	mutex_unlock(&dev->mode_config.mutex);
 
-	if (changed) {
-		/* send a uevent + call fbdev */
-		drm_sysfs_hotplug_event(dev);
-		if (dev->mode_config.funcs->output_poll_changed)
-			dev->mode_config.funcs->output_poll_changed(dev);
-	}
+	if (changed)
+		drm_kms_helper_hotplug_event(dev);
 
 	if (repoll)
 		schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
@@ -988,7 +1036,8 @@
 		return;
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->polled)
+		if (connector->polled & (DRM_CONNECTOR_POLL_CONNECT |
+					 DRM_CONNECTOR_POLL_DISCONNECT))
 			poll = true;
 	}
 
@@ -1014,12 +1063,34 @@
 
 void drm_helper_hpd_irq_event(struct drm_device *dev)
 {
+	struct drm_connector *connector;
+	enum drm_connector_status old_status;
+	bool changed = false;
+
 	if (!dev->mode_config.poll_enabled)
 		return;
 
-	/* kill timer and schedule immediate execution, this doesn't block */
-	cancel_delayed_work(&dev->mode_config.output_poll_work);
-	if (drm_kms_helper_poll)
-		schedule_delayed_work(&dev->mode_config.output_poll_work, 0);
+	mutex_lock(&dev->mode_config.mutex);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+
+		/* Only handle HPD capable connectors. */
+		if (!(connector->polled & DRM_CONNECTOR_POLL_HPD))
+			continue;
+
+		old_status = connector->status;
+
+		connector->status = connector->funcs->detect(connector, false);
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %d to %d\n",
+			      connector->base.id,
+			      drm_get_connector_name(connector),
+			      old_status, connector->status);
+		if (old_status != connector->status)
+			changed = true;
+	}
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	if (changed)
+		drm_kms_helper_hotplug_event(dev);
 }
 EXPORT_SYMBOL(drm_helper_hpd_irq_event);

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
new file mode 100644
index 0000000..89e1966
--- /dev/null
+++ b/drivers/gpu/drm/drm_dp_helper.c

@@ -0,0 +1,348 @@
+/*
+ * Copyright © 2009 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/i2c.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drmP.h>
+
+/**
+ * DOC: dp helpers
+ *
+ * These functions contain some common logic and helpers at various abstraction
+ * levels to deal with Display Port sink devices and related things like DP aux
+ * channel transfers, EDID reading over DP aux channels, decoding certain DPCD
+ * blocks, ...
+ */
+
+/* Run a single AUX_CH I2C transaction, writing/reading data as necessary */
+static int
+i2c_algo_dp_aux_transaction(struct i2c_adapter *adapter, int mode,
+			    uint8_t write_byte, uint8_t *read_byte)
+{
+	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
+	int ret;
+
+	ret = (*algo_data->aux_ch)(adapter, mode,
+				   write_byte, read_byte);
+	return ret;
+}
+
+/*
+ * I2C over AUX CH
+ */
+
+/*
+ * Send the address. If the I2C link is running, this 'restarts'
+ * the connection with the new address, this is used for doing
+ * a write followed by a read (as needed for DDC)
+ */
+static int
+i2c_algo_dp_aux_address(struct i2c_adapter *adapter, u16 address, bool reading)
+{
+	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
+	int mode = MODE_I2C_START;
+	int ret;
+
+	if (reading)
+		mode |= MODE_I2C_READ;
+	else
+		mode |= MODE_I2C_WRITE;
+	algo_data->address = address;
+	algo_data->running = true;
+	ret = i2c_algo_dp_aux_transaction(adapter, mode, 0, NULL);
+	return ret;
+}
+
+/*
+ * Stop the I2C transaction. This closes out the link, sending
+ * a bare address packet with the MOT bit turned off
+ */
+static void
+i2c_algo_dp_aux_stop(struct i2c_adapter *adapter, bool reading)
+{
+	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
+	int mode = MODE_I2C_STOP;
+
+	if (reading)
+		mode |= MODE_I2C_READ;
+	else
+		mode |= MODE_I2C_WRITE;
+	if (algo_data->running) {
+		(void) i2c_algo_dp_aux_transaction(adapter, mode, 0, NULL);
+		algo_data->running = false;
+	}
+}
+
+/*
+ * Write a single byte to the current I2C address, the
+ * the I2C link must be running or this returns -EIO
+ */
+static int
+i2c_algo_dp_aux_put_byte(struct i2c_adapter *adapter, u8 byte)
+{
+	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
+	int ret;
+
+	if (!algo_data->running)
+		return -EIO;
+
+	ret = i2c_algo_dp_aux_transaction(adapter, MODE_I2C_WRITE, byte, NULL);
+	return ret;
+}
+
+/*
+ * Read a single byte from the current I2C address, the
+ * I2C link must be running or this returns -EIO
+ */
+static int
+i2c_algo_dp_aux_get_byte(struct i2c_adapter *adapter, u8 *byte_ret)
+{
+	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
+	int ret;
+
+	if (!algo_data->running)
+		return -EIO;
+
+	ret = i2c_algo_dp_aux_transaction(adapter, MODE_I2C_READ, 0, byte_ret);
+	return ret;
+}
+
+static int
+i2c_algo_dp_aux_xfer(struct i2c_adapter *adapter,
+		     struct i2c_msg *msgs,
+		     int num)
+{
+	int ret = 0;
+	bool reading = false;
+	int m;
+	int b;
+
+	for (m = 0; m < num; m++) {
+		u16 len = msgs[m].len;
+		u8 *buf = msgs[m].buf;
+		reading = (msgs[m].flags & I2C_M_RD) != 0;
+		ret = i2c_algo_dp_aux_address(adapter, msgs[m].addr, reading);
+		if (ret < 0)
+			break;
+		if (reading) {
+			for (b = 0; b < len; b++) {
+				ret = i2c_algo_dp_aux_get_byte(adapter, &buf[b]);
+				if (ret < 0)
+					break;
+			}
+		} else {
+			for (b = 0; b < len; b++) {
+				ret = i2c_algo_dp_aux_put_byte(adapter, buf[b]);
+				if (ret < 0)
+					break;
+			}
+		}
+		if (ret < 0)
+			break;
+	}
+	if (ret >= 0)
+		ret = num;
+	i2c_algo_dp_aux_stop(adapter, reading);
+	DRM_DEBUG_KMS("dp_aux_xfer return %d\n", ret);
+	return ret;
+}
+
+static u32
+i2c_algo_dp_aux_functionality(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
+	       I2C_FUNC_SMBUS_READ_BLOCK_DATA |
+	       I2C_FUNC_SMBUS_BLOCK_PROC_CALL |
+	       I2C_FUNC_10BIT_ADDR;
+}
+
+static const struct i2c_algorithm i2c_dp_aux_algo = {
+	.master_xfer	= i2c_algo_dp_aux_xfer,
+	.functionality	= i2c_algo_dp_aux_functionality,
+};
+
+static void
+i2c_dp_aux_reset_bus(struct i2c_adapter *adapter)
+{
+	(void) i2c_algo_dp_aux_address(adapter, 0, false);
+	(void) i2c_algo_dp_aux_stop(adapter, false);
+}
+
+static int
+i2c_dp_aux_prepare_bus(struct i2c_adapter *adapter)
+{
+	adapter->algo = &i2c_dp_aux_algo;
+	adapter->retries = 3;
+	i2c_dp_aux_reset_bus(adapter);
+	return 0;
+}
+
+/**
+ * i2c_dp_aux_add_bus() - register an i2c adapter using the aux ch helper
+ * @adapter: i2c adapter to register
+ *
+ * This registers an i2c adapater that uses dp aux channel as it's underlaying
+ * transport. The driver needs to fill out the &i2c_algo_dp_aux_data structure
+ * and store it in the algo_data member of the @adapter argument. This will be
+ * used by the i2c over dp aux algorithm to drive the hardware.
+ *
+ * RETURNS:
+ * 0 on success, -ERRNO on failure.
+ */
+int
+i2c_dp_aux_add_bus(struct i2c_adapter *adapter)
+{
+	int error;
+
+	error = i2c_dp_aux_prepare_bus(adapter);
+	if (error)
+		return error;
+	error = i2c_add_adapter(adapter);
+	return error;
+}
+EXPORT_SYMBOL(i2c_dp_aux_add_bus);
+
+/* Helpers for DP link training */
+static u8 dp_link_status(u8 link_status[DP_LINK_STATUS_SIZE], int r)
+{
+	return link_status[r - DP_LANE0_1_STATUS];
+}
+
+static u8 dp_get_lane_status(u8 link_status[DP_LINK_STATUS_SIZE],
+			     int lane)
+{
+	int i = DP_LANE0_1_STATUS + (lane >> 1);
+	int s = (lane & 1) * 4;
+	u8 l = dp_link_status(link_status, i);
+	return (l >> s) & 0xf;
+}
+
+bool drm_dp_channel_eq_ok(u8 link_status[DP_LINK_STATUS_SIZE],
+			  int lane_count)
+{
+	u8 lane_align;
+	u8 lane_status;
+	int lane;
+
+	lane_align = dp_link_status(link_status,
+				    DP_LANE_ALIGN_STATUS_UPDATED);
+	if ((lane_align & DP_INTERLANE_ALIGN_DONE) == 0)
+		return false;
+	for (lane = 0; lane < lane_count; lane++) {
+		lane_status = dp_get_lane_status(link_status, lane);
+		if ((lane_status & DP_CHANNEL_EQ_BITS) != DP_CHANNEL_EQ_BITS)
+			return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL(drm_dp_channel_eq_ok);
+
+bool drm_dp_clock_recovery_ok(u8 link_status[DP_LINK_STATUS_SIZE],
+			      int lane_count)
+{
+	int lane;
+	u8 lane_status;
+
+	for (lane = 0; lane < lane_count; lane++) {
+		lane_status = dp_get_lane_status(link_status, lane);
+		if ((lane_status & DP_LANE_CR_DONE) == 0)
+			return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL(drm_dp_clock_recovery_ok);
+
+u8 drm_dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE],
+				     int lane)
+{
+	int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
+	int s = ((lane & 1) ?
+		 DP_ADJUST_VOLTAGE_SWING_LANE1_SHIFT :
+		 DP_ADJUST_VOLTAGE_SWING_LANE0_SHIFT);
+	u8 l = dp_link_status(link_status, i);
+
+	return ((l >> s) & 0x3) << DP_TRAIN_VOLTAGE_SWING_SHIFT;
+}
+EXPORT_SYMBOL(drm_dp_get_adjust_request_voltage);
+
+u8 drm_dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE],
+					  int lane)
+{
+	int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
+	int s = ((lane & 1) ?
+		 DP_ADJUST_PRE_EMPHASIS_LANE1_SHIFT :
+		 DP_ADJUST_PRE_EMPHASIS_LANE0_SHIFT);
+	u8 l = dp_link_status(link_status, i);
+
+	return ((l >> s) & 0x3) << DP_TRAIN_PRE_EMPHASIS_SHIFT;
+}
+EXPORT_SYMBOL(drm_dp_get_adjust_request_pre_emphasis);
+
+void drm_dp_link_train_clock_recovery_delay(u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
+	if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0)
+		udelay(100);
+	else
+		mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4);
+}
+EXPORT_SYMBOL(drm_dp_link_train_clock_recovery_delay);
+
+void drm_dp_link_train_channel_eq_delay(u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
+	if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0)
+		udelay(400);
+	else
+		mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4);
+}
+EXPORT_SYMBOL(drm_dp_link_train_channel_eq_delay);
+
+u8 drm_dp_link_rate_to_bw_code(int link_rate)
+{
+	switch (link_rate) {
+	case 162000:
+	default:
+		return DP_LINK_BW_1_62;
+	case 270000:
+		return DP_LINK_BW_2_7;
+	case 540000:
+		return DP_LINK_BW_5_4;
+	}
+}
+EXPORT_SYMBOL(drm_dp_link_rate_to_bw_code);
+
+int drm_dp_bw_code_to_link_rate(u8 link_bw)
+{
+	switch (link_bw) {
+	case DP_LINK_BW_1_62:
+	default:
+		return 162000;
+	case DP_LINK_BW_2_7:
+		return 270000;
+	case DP_LINK_BW_5_4:
+		return 540000;
+	}
+}
+EXPORT_SYMBOL(drm_dp_bw_code_to_link_rate);

diff --git a/drivers/gpu/drm/drm_dp_i2c_helper.c b/drivers/gpu/drm/drm_dp_i2c_helper.c
deleted file mode 100644
index 7f246f2..0000000
--- a/drivers/gpu/drm/drm_dp_i2c_helper.c
+++ /dev/null

@@ -1,208 +0,0 @@
-/*
- * Copyright © 2009 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that copyright
- * notice and this permission notice appear in supporting documentation, and
- * that the name of the copyright holders not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  The copyright holders make no representations
- * about the suitability of this software for any purpose.  It is provided "as
- * is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THIS SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/i2c.h>
-#include <drm/drm_dp_helper.h>
-#include <drm/drmP.h>
-
-/* Run a single AUX_CH I2C transaction, writing/reading data as necessary */
-static int
-i2c_algo_dp_aux_transaction(struct i2c_adapter *adapter, int mode,
-			    uint8_t write_byte, uint8_t *read_byte)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int ret;
-	
-	ret = (*algo_data->aux_ch)(adapter, mode,
-				   write_byte, read_byte);
-	return ret;
-}
-
-/*
- * I2C over AUX CH
- */
-
-/*
- * Send the address. If the I2C link is running, this 'restarts'
- * the connection with the new address, this is used for doing
- * a write followed by a read (as needed for DDC)
- */
-static int
-i2c_algo_dp_aux_address(struct i2c_adapter *adapter, u16 address, bool reading)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int mode = MODE_I2C_START;
-	int ret;
-
-	if (reading)
-		mode |= MODE_I2C_READ;
-	else
-		mode |= MODE_I2C_WRITE;
-	algo_data->address = address;
-	algo_data->running = true;
-	ret = i2c_algo_dp_aux_transaction(adapter, mode, 0, NULL);
-	return ret;
-}
-
-/*
- * Stop the I2C transaction. This closes out the link, sending
- * a bare address packet with the MOT bit turned off
- */
-static void
-i2c_algo_dp_aux_stop(struct i2c_adapter *adapter, bool reading)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int mode = MODE_I2C_STOP;
-
-	if (reading)
-		mode |= MODE_I2C_READ;
-	else
-		mode |= MODE_I2C_WRITE;
-	if (algo_data->running) {
-		(void) i2c_algo_dp_aux_transaction(adapter, mode, 0, NULL);
-		algo_data->running = false;
-	}
-}
-
-/*
- * Write a single byte to the current I2C address, the
- * the I2C link must be running or this returns -EIO
- */
-static int
-i2c_algo_dp_aux_put_byte(struct i2c_adapter *adapter, u8 byte)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int ret;
-
-	if (!algo_data->running)
-		return -EIO;
-
-	ret = i2c_algo_dp_aux_transaction(adapter, MODE_I2C_WRITE, byte, NULL);
-	return ret;
-}
-
-/*
- * Read a single byte from the current I2C address, the
- * I2C link must be running or this returns -EIO
- */
-static int
-i2c_algo_dp_aux_get_byte(struct i2c_adapter *adapter, u8 *byte_ret)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int ret;
-
-	if (!algo_data->running)
-		return -EIO;
-
-	ret = i2c_algo_dp_aux_transaction(adapter, MODE_I2C_READ, 0, byte_ret);
-	return ret;
-}
-
-static int
-i2c_algo_dp_aux_xfer(struct i2c_adapter *adapter,
-		     struct i2c_msg *msgs,
-		     int num)
-{
-	int ret = 0;
-	bool reading = false;
-	int m;
-	int b;
-
-	for (m = 0; m < num; m++) {
-		u16 len = msgs[m].len;
-		u8 *buf = msgs[m].buf;
-		reading = (msgs[m].flags & I2C_M_RD) != 0;
-		ret = i2c_algo_dp_aux_address(adapter, msgs[m].addr, reading);
-		if (ret < 0)
-			break;
-		if (reading) {
-			for (b = 0; b < len; b++) {
-				ret = i2c_algo_dp_aux_get_byte(adapter, &buf[b]);
-				if (ret < 0)
-					break;
-			}
-		} else {
-			for (b = 0; b < len; b++) {
-				ret = i2c_algo_dp_aux_put_byte(adapter, buf[b]);
-				if (ret < 0)
-					break;
-			}
-		}
-		if (ret < 0)
-			break;
-	}
-	if (ret >= 0)
-		ret = num;
-	i2c_algo_dp_aux_stop(adapter, reading);
-	DRM_DEBUG_KMS("dp_aux_xfer return %d\n", ret);
-	return ret;
-}
-
-static u32
-i2c_algo_dp_aux_functionality(struct i2c_adapter *adapter)
-{
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
-	       I2C_FUNC_SMBUS_READ_BLOCK_DATA |
-	       I2C_FUNC_SMBUS_BLOCK_PROC_CALL |
-	       I2C_FUNC_10BIT_ADDR;
-}
-
-static const struct i2c_algorithm i2c_dp_aux_algo = {
-	.master_xfer	= i2c_algo_dp_aux_xfer,
-	.functionality	= i2c_algo_dp_aux_functionality,
-};
-
-static void
-i2c_dp_aux_reset_bus(struct i2c_adapter *adapter)
-{
-	(void) i2c_algo_dp_aux_address(adapter, 0, false);
-	(void) i2c_algo_dp_aux_stop(adapter, false);
-					   
-}
-
-static int
-i2c_dp_aux_prepare_bus(struct i2c_adapter *adapter)
-{
-	adapter->algo = &i2c_dp_aux_algo;
-	adapter->retries = 3;
-	i2c_dp_aux_reset_bus(adapter);
-	return 0;
-}
-
-int
-i2c_dp_aux_add_bus(struct i2c_adapter *adapter)
-{
-	int error;
-	
-	error = i2c_dp_aux_prepare_bus(adapter);
-	if (error)
-		return error;
-	error = i2c_add_adapter(adapter);
-	return error;
-}
-EXPORT_SYMBOL(i2c_dp_aux_add_bus);

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index fadcd44..5a3770f 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c

@@ -307,12 +307,9 @@
 
 static bool drm_edid_is_zero(u8 *in_edid, int length)
 {
-	int i;
-	u32 *raw_edid = (u32 *)in_edid;
+	if (memchr_inv(in_edid, 0, length))
+		return false;
 
-	for (i = 0; i < length / 4; i++)
-		if (*(raw_edid + i) != 0)
-			return false;
 	return true;
 }
 
@@ -1516,6 +1513,26 @@
 }
 EXPORT_SYMBOL(drm_find_cea_extension);
 
+/*
+ * Looks for a CEA mode matching given drm_display_mode.
+ * Returns its CEA Video ID code, or 0 if not found.
+ */
+u8 drm_match_cea_mode(struct drm_display_mode *to_match)
+{
+	struct drm_display_mode *cea_mode;
+	u8 mode;
+
+	for (mode = 0; mode < drm_num_cea_modes; mode++) {
+		cea_mode = (struct drm_display_mode *)&edid_cea_modes[mode];
+
+		if (drm_mode_equal(to_match, cea_mode))
+			return mode + 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_match_cea_mode);
+
+
 static int
 do_cea_modes (struct drm_connector *connector, u8 *db, u8 len)
 {
@@ -1622,7 +1639,7 @@
 	if (len >= 12)
 		connector->audio_latency[1] = db[12];
 
-	DRM_LOG_KMS("HDMI: DVI dual %d, "
+	DRM_DEBUG_KMS("HDMI: DVI dual %d, "
 		    "max TMDS clock %d, "
 		    "latency present %d %d, "
 		    "video latency %d %d, "
@@ -2062,3 +2079,22 @@
 	return num_modes;
 }
 EXPORT_SYMBOL(drm_add_modes_noedid);
+
+/**
+ * drm_mode_cea_vic - return the CEA-861 VIC of a given mode
+ * @mode: mode
+ *
+ * RETURNS:
+ * The VIC number, 0 in case it's not a CEA-861 mode.
+ */
+uint8_t drm_mode_cea_vic(const struct drm_display_mode *mode)
+{
+	uint8_t i;
+
+	for (i = 0; i < drm_num_cea_modes; i++)
+		if (drm_mode_equal(mode, &edid_cea_modes[i]))
+			return i + 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_cea_vic);

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 4d58d7e..954d175 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c

@@ -27,6 +27,8 @@
  *      Dave Airlie <airlied@linux.ie>
  *      Jesse Barnes <jesse.barnes@intel.com>
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/sysrq.h>
 #include <linux/slab.h>
@@ -43,6 +45,15 @@
 
 static LIST_HEAD(kernel_fb_helper_list);
 
+/**
+ * DOC: fbdev helpers
+ *
+ * The fb helper functions are useful to provide an fbdev on top of a drm kernel
+ * mode setting driver. They can be used mostly independantely from the crtc
+ * helper functions used by many drivers to implement the kernel mode setting
+ * interfaces.
+ */
+
 /* simple single crtc case helper function */
 int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper)
 {
@@ -95,10 +106,16 @@
 			if (mode->force) {
 				const char *s;
 				switch (mode->force) {
-				case DRM_FORCE_OFF: s = "OFF"; break;
-				case DRM_FORCE_ON_DIGITAL: s = "ON - dig"; break;
+				case DRM_FORCE_OFF:
+					s = "OFF";
+					break;
+				case DRM_FORCE_ON_DIGITAL:
+					s = "ON - dig";
+					break;
 				default:
-				case DRM_FORCE_ON: s = "ON"; break;
+				case DRM_FORCE_ON:
+					s = "ON";
+					break;
 				}
 
 				DRM_INFO("forcing %s connector %s\n",
@@ -265,7 +282,7 @@
 	if (panic_timeout < 0)
 		return 0;
 
-	printk(KERN_ERR "panic occurred, switching back to text console\n");
+	pr_err("panic occurred, switching back to text console\n");
 	return drm_fb_helper_force_kernel_mode();
 }
 EXPORT_SYMBOL(drm_fb_helper_panic);
@@ -331,7 +348,7 @@
 		for (j = 0; j < fb_helper->connector_count; j++) {
 			connector = fb_helper->connector_info[j]->connector;
 			connector->funcs->dpms(connector, dpms_mode);
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 				dev->mode_config.dpms_property, dpms_mode);
 		}
 	}
@@ -433,7 +450,7 @@
 	if (!list_empty(&fb_helper->kernel_fb_list)) {
 		list_del(&fb_helper->kernel_fb_list);
 		if (list_empty(&kernel_fb_helper_list)) {
-			printk(KERN_INFO "drm: unregistered panic notifier\n");
+			pr_info("drm: unregistered panic notifier\n");
 			atomic_notifier_chain_unregister(&panic_notifier_list,
 							 &paniced);
 			unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op);
@@ -724,9 +741,9 @@
 
 	/* if driver picks 8 or 16 by default use that
 	   for both depth/bpp */
-	if (preferred_bpp != sizes.surface_bpp) {
+	if (preferred_bpp != sizes.surface_bpp)
 		sizes.surface_depth = sizes.surface_bpp = preferred_bpp;
-	}
+
 	/* first up get a count of crtcs now in use and new min/maxes width/heights */
 	for (i = 0; i < fb_helper->connector_count; i++) {
 		struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i];
@@ -794,18 +811,16 @@
 	info = fb_helper->fbdev;
 
 	/* set the fb pointer */
-	for (i = 0; i < fb_helper->crtc_count; i++) {
+	for (i = 0; i < fb_helper->crtc_count; i++)
 		fb_helper->crtc_info[i].mode_set.fb = fb_helper->fb;
-	}
 
 	if (new_fb) {
 		info->var.pixclock = 0;
-		if (register_framebuffer(info) < 0) {
+		if (register_framebuffer(info) < 0)
 			return -EINVAL;
-		}
 
-		printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
-		       info->fix.id);
+		dev_info(fb_helper->dev->dev, "fb%d: %s frame buffer device\n",
+				info->node, info->fix.id);
 
 	} else {
 		drm_fb_helper_set_par(info);
@@ -814,7 +829,7 @@
 	/* Switch back to kernel console on panic */
 	/* multi card linked list maybe */
 	if (list_empty(&kernel_fb_helper_list)) {
-		printk(KERN_INFO "drm: registered panic notifier\n");
+		dev_info(fb_helper->dev->dev, "registered panic notifier\n");
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &paniced);
 		register_sysrq_key('v', &sysrq_drm_fb_helper_restore_op);
@@ -1002,11 +1017,11 @@
 {
 	bool enable;
 
-	if (strict) {
+	if (strict)
 		enable = connector->status == connector_status_connected;
-	} else {
+	else
 		enable = connector->status != connector_status_disconnected;
-	}
+
 	return enable;
 }
 
@@ -1191,9 +1206,8 @@
 	for (c = 0; c < fb_helper->crtc_count; c++) {
 		crtc = &fb_helper->crtc_info[c];
 
-		if ((encoder->possible_crtcs & (1 << c)) == 0) {
+		if ((encoder->possible_crtcs & (1 << c)) == 0)
 			continue;
-		}
 
 		for (o = 0; o < n; o++)
 			if (best_crtcs[o] == crtc)
@@ -1246,6 +1260,11 @@
 			sizeof(struct drm_display_mode *), GFP_KERNEL);
 	enabled = kcalloc(dev->mode_config.num_connector,
 			  sizeof(bool), GFP_KERNEL);
+	if (!crtcs || !modes || !enabled) {
+		DRM_ERROR("Memory allocation failed\n");
+		goto out;
+	}
+
 
 	drm_enable_connectors(fb_helper, enabled);
 
@@ -1284,6 +1303,7 @@
 		}
 	}
 
+out:
 	kfree(crtcs);
 	kfree(modes);
 	kfree(enabled);
@@ -1291,12 +1311,14 @@
 
 /**
  * drm_helper_initial_config - setup a sane initial connector configuration
- * @dev: DRM device
+ * @fb_helper: fb_helper device struct
+ * @bpp_sel: bpp value to use for the framebuffer configuration
  *
  * LOCKING:
- * Called at init time, must take mode config lock.
+ * Called at init time by the driver to set up the @fb_helper initial
+ * configuration, must take the mode config lock.
  *
- * Scan the CRTCs and connectors and try to put together an initial setup.
+ * Scans the CRTCs and connectors and tries to put together an initial setup.
  * At the moment, this is a cloned configuration across all heads with
  * a new framebuffer object as the backing store.
  *
@@ -1319,9 +1341,9 @@
 	/*
 	 * we shouldn't end up with no modes here.
 	 */
-	if (count == 0) {
-		printk(KERN_INFO "No connectors reported connected with modes\n");
-	}
+	if (count == 0)
+		dev_info(fb_helper->dev->dev, "No connectors reported connected with modes\n");
+
 	drm_setup_crtcs(fb_helper);
 
 	return drm_fb_helper_single_fb_probe(fb_helper, bpp_sel);
@@ -1330,7 +1352,7 @@
 
 /**
  * drm_fb_helper_hotplug_event - respond to a hotplug notification by
- *                               probing all the outputs attached to the fb.
+ *                               probing all the outputs attached to the fb
  * @fb_helper: the drm_fb_helper
  *
  * LOCKING:

diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index c3745c4..8025454 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c

@@ -67,10 +67,8 @@
 	hashed_key = hash_long(key, ht->order);
 	DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each(list, h_list) {
-		entry = hlist_entry(list, struct drm_hash_item, head);
+	hlist_for_each_entry(entry, list, h_list, head)
 		DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key);
-	}
 }
 
 static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht,
@@ -83,8 +81,7 @@
 
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each(list, h_list) {
-		entry = hlist_entry(list, struct drm_hash_item, head);
+	hlist_for_each_entry(entry, list, h_list, head) {
 		if (entry->key == key)
 			return list;
 		if (entry->key > key)
@@ -93,6 +90,24 @@
 	return NULL;
 }
 
+static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht,
+					      unsigned long key)
+{
+	struct drm_hash_item *entry;
+	struct hlist_head *h_list;
+	struct hlist_node *list;
+	unsigned int hashed_key;
+
+	hashed_key = hash_long(key, ht->order);
+	h_list = &ht->table[hashed_key];
+	hlist_for_each_entry_rcu(entry, list, h_list, head) {
+		if (entry->key == key)
+			return list;
+		if (entry->key > key)
+			break;
+	}
+	return NULL;
+}
 
 int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 {
@@ -105,8 +120,7 @@
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
 	parent = NULL;
-	hlist_for_each(list, h_list) {
-		entry = hlist_entry(list, struct drm_hash_item, head);
+	hlist_for_each_entry(entry, list, h_list, head) {
 		if (entry->key == key)
 			return -EINVAL;
 		if (entry->key > key)
@@ -114,9 +128,9 @@
 		parent = list;
 	}
 	if (parent) {
-		hlist_add_after(parent, &item->head);
+		hlist_add_after_rcu(parent, &item->head);
 	} else {
-		hlist_add_head(&item->head, h_list);
+		hlist_add_head_rcu(&item->head, h_list);
 	}
 	return 0;
 }
@@ -156,7 +170,7 @@
 {
 	struct hlist_node *list;
 
-	list = drm_ht_find_key(ht, key);
+	list = drm_ht_find_key_rcu(ht, key);
 	if (!list)
 		return -EINVAL;
 
@@ -171,7 +185,7 @@
 
 	list = drm_ht_find_key(ht, key);
 	if (list) {
-		hlist_del_init(list);
+		hlist_del_init_rcu(list);
 		return 0;
 	}
 	return -EINVAL;
@@ -179,7 +193,7 @@
 
 int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 {
-	hlist_del_init(&item->head);
+	hlist_del_init_rcu(&item->head);
 	return 0;
 }
 EXPORT_SYMBOL(drm_ht_remove_item);

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 23dd975..e77bd8b 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c

@@ -287,6 +287,9 @@
 		req->value |= dev->driver->prime_fd_to_handle ? DRM_PRIME_CAP_IMPORT : 0;
 		req->value |= dev->driver->prime_handle_to_fd ? DRM_PRIME_CAP_EXPORT : 0;
 		break;
+	case DRM_CAP_TIMESTAMP_MONOTONIC:
+		req->value = drm_timestamp_monotonic;
+		break;
 	default:
 		return -EINVAL;
 	}

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 3a3d0ce..19c01ca 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c

@@ -106,6 +106,7 @@
 	s64 diff_ns;
 	int vblrc;
 	struct timeval tvblank;
+	int count = DRM_TIMESTAMP_MAXRETRIES;
 
 	/* Prevent vblank irq processing while disabling vblank irqs,
 	 * so no updates of timestamps or count can happen after we've
@@ -131,7 +132,10 @@
 	do {
 		dev->last_vblank[crtc] = dev->driver->get_vblank_counter(dev, crtc);
 		vblrc = drm_get_last_vbltimestamp(dev, crtc, &tvblank, 0);
-	} while (dev->last_vblank[crtc] != dev->driver->get_vblank_counter(dev, crtc));
+	} while (dev->last_vblank[crtc] != dev->driver->get_vblank_counter(dev, crtc) && (--count) && vblrc);
+
+	if (!count)
+		vblrc = 0;
 
 	/* Compute time difference to stored timestamp of last vblank
 	 * as updated by last invocation of drm_handle_vblank() in vblank irq.
@@ -576,7 +580,8 @@
 					  unsigned flags,
 					  struct drm_crtc *refcrtc)
 {
-	struct timeval stime, raw_time;
+	ktime_t stime, etime, mono_time_offset;
+	struct timeval tv_etime;
 	struct drm_display_mode *mode;
 	int vbl_status, vtotal, vdisplay;
 	int vpos, hpos, i;
@@ -625,13 +630,15 @@
 		preempt_disable();
 
 		/* Get system timestamp before query. */
-		do_gettimeofday(&stime);
+		stime = ktime_get();
 
 		/* Get vertical and horizontal scanout pos. vpos, hpos. */
 		vbl_status = dev->driver->get_scanout_position(dev, crtc, &vpos, &hpos);
 
 		/* Get system timestamp after query. */
-		do_gettimeofday(&raw_time);
+		etime = ktime_get();
+		if (!drm_timestamp_monotonic)
+			mono_time_offset = ktime_get_monotonic_offset();
 
 		preempt_enable();
 
@@ -642,7 +649,7 @@
 			return -EIO;
 		}
 
-		duration_ns = timeval_to_ns(&raw_time) - timeval_to_ns(&stime);
+		duration_ns = ktime_to_ns(etime) - ktime_to_ns(stime);
 
 		/* Accept result with <  max_error nsecs timing uncertainty. */
 		if (duration_ns <= (s64) *max_error)
@@ -689,14 +696,20 @@
 		vbl_status |= 0x8;
 	}
 
+	if (!drm_timestamp_monotonic)
+		etime = ktime_sub(etime, mono_time_offset);
+
+	/* save this only for debugging purposes */
+	tv_etime = ktime_to_timeval(etime);
 	/* Subtract time delta from raw timestamp to get final
 	 * vblank_time timestamp for end of vblank.
 	 */
-	*vblank_time = ns_to_timeval(timeval_to_ns(&raw_time) - delta_ns);
+	etime = ktime_sub_ns(etime, delta_ns);
+	*vblank_time = ktime_to_timeval(etime);
 
 	DRM_DEBUG("crtc %d : v %d p(%d,%d)@ %ld.%ld -> %ld.%ld [e %d us, %d rep]\n",
 		  crtc, (int)vbl_status, hpos, vpos,
-		  (long)raw_time.tv_sec, (long)raw_time.tv_usec,
+		  (long)tv_etime.tv_sec, (long)tv_etime.tv_usec,
 		  (long)vblank_time->tv_sec, (long)vblank_time->tv_usec,
 		  (int)duration_ns/1000, i);
 
@@ -708,6 +721,17 @@
 }
 EXPORT_SYMBOL(drm_calc_vbltimestamp_from_scanoutpos);
 
+static struct timeval get_drm_timestamp(void)
+{
+	ktime_t now;
+
+	now = ktime_get();
+	if (!drm_timestamp_monotonic)
+		now = ktime_sub(now, ktime_get_monotonic_offset());
+
+	return ktime_to_timeval(now);
+}
+
 /**
  * drm_get_last_vbltimestamp - retrieve raw timestamp for the most recent
  * vblank interval.
@@ -745,9 +769,9 @@
 	}
 
 	/* GPU high precision timestamp query unsupported or failed.
-	 * Return gettimeofday timestamp as best estimate.
+	 * Return current monotonic/gettimeofday timestamp as best estimate.
 	 */
-	do_gettimeofday(tvblank);
+	*tvblank = get_drm_timestamp();
 
 	return 0;
 }
@@ -802,6 +826,47 @@
 }
 EXPORT_SYMBOL(drm_vblank_count_and_time);
 
+static void send_vblank_event(struct drm_device *dev,
+		struct drm_pending_vblank_event *e,
+		unsigned long seq, struct timeval *now)
+{
+	WARN_ON_SMP(!spin_is_locked(&dev->event_lock));
+	e->event.sequence = seq;
+	e->event.tv_sec = now->tv_sec;
+	e->event.tv_usec = now->tv_usec;
+
+	list_add_tail(&e->base.link,
+		      &e->base.file_priv->event_list);
+	wake_up_interruptible(&e->base.file_priv->event_wait);
+	trace_drm_vblank_event_delivered(e->base.pid, e->pipe,
+					 e->event.sequence);
+}
+
+/**
+ * drm_send_vblank_event - helper to send vblank event after pageflip
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ * @e: the event to send
+ *
+ * Updates sequence # and timestamp on event, and sends it to userspace.
+ * Caller must hold event lock.
+ */
+void drm_send_vblank_event(struct drm_device *dev, int crtc,
+		struct drm_pending_vblank_event *e)
+{
+	struct timeval now;
+	unsigned int seq;
+	if (crtc >= 0) {
+		seq = drm_vblank_count_and_time(dev, crtc, &now);
+	} else {
+		seq = 0;
+
+		now = get_drm_timestamp();
+	}
+	send_vblank_event(dev, e, seq, &now);
+}
+EXPORT_SYMBOL(drm_send_vblank_event);
+
 /**
  * drm_update_vblank_count - update the master vblank counter
  * @dev: DRM device
@@ -936,6 +1001,13 @@
 }
 EXPORT_SYMBOL(drm_vblank_put);
 
+/**
+ * drm_vblank_off - disable vblank events on a CRTC
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ *
+ * Caller must hold event lock.
+ */
 void drm_vblank_off(struct drm_device *dev, int crtc)
 {
 	struct drm_pending_vblank_event *e, *t;
@@ -949,22 +1021,19 @@
 
 	/* Send any queued vblank events, lest the natives grow disquiet */
 	seq = drm_vblank_count_and_time(dev, crtc, &now);
+
+	spin_lock(&dev->event_lock);
 	list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) {
 		if (e->pipe != crtc)
 			continue;
 		DRM_DEBUG("Sending premature vblank event on disable: \
 			  wanted %d, current %d\n",
 			  e->event.sequence, seq);
-
-		e->event.sequence = seq;
-		e->event.tv_sec = now.tv_sec;
-		e->event.tv_usec = now.tv_usec;
+		list_del(&e->base.link);
 		drm_vblank_put(dev, e->pipe);
-		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
-		wake_up_interruptible(&e->base.file_priv->event_wait);
-		trace_drm_vblank_event_delivered(e->base.pid, e->pipe,
-						 e->event.sequence);
+		send_vblank_event(dev, e, seq, &now);
 	}
+	spin_unlock(&dev->event_lock);
 
 	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
@@ -1107,15 +1176,9 @@
 
 	e->event.sequence = vblwait->request.sequence;
 	if ((seq - vblwait->request.sequence) <= (1 << 23)) {
-		e->event.sequence = seq;
-		e->event.tv_sec = now.tv_sec;
-		e->event.tv_usec = now.tv_usec;
 		drm_vblank_put(dev, pipe);
-		list_add_tail(&e->base.link, &e->base.file_priv->event_list);
-		wake_up_interruptible(&e->base.file_priv->event_wait);
+		send_vblank_event(dev, e, seq, &now);
 		vblwait->reply.sequence = seq;
-		trace_drm_vblank_event_delivered(current->pid, pipe,
-						 vblwait->request.sequence);
 	} else {
 		/* drm_handle_vblank_events will call drm_vblank_put */
 		list_add_tail(&e->base.link, &dev->vblank_event_list);
@@ -1256,14 +1319,9 @@
 		DRM_DEBUG("vblank event on %d, current %d\n",
 			  e->event.sequence, seq);
 
-		e->event.sequence = seq;
-		e->event.tv_sec = now.tv_sec;
-		e->event.tv_usec = now.tv_usec;
+		list_del(&e->base.link);
 		drm_vblank_put(dev, e->pipe);
-		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
-		wake_up_interruptible(&e->base.file_priv->event_wait);
-		trace_drm_vblank_event_delivered(e->base.pid, e->pipe,
-						 e->event.sequence);
+		send_vblank_event(dev, e, seq, &now);
 	}
 
 	spin_unlock_irqrestore(&dev->event_lock, flags);

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 59450f3..d8da30e 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c

@@ -46,7 +46,7 @@
  *
  * Describe @mode using DRM_DEBUG.
  */
-void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
+void drm_mode_debug_printmodeline(const struct drm_display_mode *mode)
 {
 	DRM_DEBUG_KMS("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d "
 			"0x%x 0x%x\n",
@@ -558,7 +558,7 @@
  * RETURNS:
  * @mode->hdisplay
  */
-int drm_mode_width(struct drm_display_mode *mode)
+int drm_mode_width(const struct drm_display_mode *mode)
 {
 	return mode->hdisplay;
 
@@ -579,7 +579,7 @@
  * RETURNS:
  * @mode->vdisplay
  */
-int drm_mode_height(struct drm_display_mode *mode)
+int drm_mode_height(const struct drm_display_mode *mode)
 {
 	return mode->vdisplay;
 }
@@ -768,7 +768,7 @@
  * RETURNS:
  * True if the modes are equal, false otherwise.
  */
-bool drm_mode_equal(struct drm_display_mode *mode1, struct drm_display_mode *mode2)
+bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
 {
 	/* do clock check convert to PICOS so fb modes get matched
 	 * the same */

diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index ba33144..754bc96 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c

@@ -470,7 +470,7 @@
 {
 	struct pci_dev *root;
 	int pos;
-	u32 lnkcap, lnkcap2;
+	u32 lnkcap = 0, lnkcap2 = 0;
 
 	*mask = 0;
 	if (!dev->pdev)

diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index c236fd2..200e104 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c

@@ -46,16 +46,24 @@
 unsigned int drm_timestamp_precision = 20;  /* Default to 20 usecs. */
 EXPORT_SYMBOL(drm_timestamp_precision);
 
+/*
+ * Default to use monotonic timestamps for wait-for-vblank and page-flip
+ * complete events.
+ */
+unsigned int drm_timestamp_monotonic = 1;
+
 MODULE_AUTHOR(CORE_AUTHOR);
 MODULE_DESCRIPTION(CORE_DESC);
 MODULE_LICENSE("GPL and additional rights");
 MODULE_PARM_DESC(debug, "Enable debug output");
 MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs]");
 MODULE_PARM_DESC(timestamp_precision_usec, "Max. error on timestamps [usecs]");
+MODULE_PARM_DESC(timestamp_monotonic, "Use monotonic timestamps");
 
 module_param_named(debug, drm_debug, int, 0600);
 module_param_named(vblankoffdelay, drm_vblank_offdelay, int, 0600);
 module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600);
+module_param_named(timestamp_monotonic, drm_timestamp_monotonic, int, 0600);
 
 struct idr drm_minors_idr;
 
@@ -221,20 +229,20 @@
 	if (!file_priv->master)
 		return -EINVAL;
 
-	if (!file_priv->minor->master &&
-	    file_priv->minor->master != file_priv->master) {
-		mutex_lock(&dev->struct_mutex);
-		file_priv->minor->master = drm_master_get(file_priv->master);
-		file_priv->is_master = 1;
-		if (dev->driver->master_set) {
-			ret = dev->driver->master_set(dev, file_priv, false);
-			if (unlikely(ret != 0)) {
-				file_priv->is_master = 0;
-				drm_master_put(&file_priv->minor->master);
-			}
+	if (file_priv->minor->master)
+		return -EINVAL;
+
+	mutex_lock(&dev->struct_mutex);
+	file_priv->minor->master = drm_master_get(file_priv->master);
+	file_priv->is_master = 1;
+	if (dev->driver->master_set) {
+		ret = dev->driver->master_set(dev, file_priv, false);
+		if (unlikely(ret != 0)) {
+			file_priv->is_master = 0;
+			drm_master_put(&file_priv->minor->master);
 		}
-		mutex_unlock(&dev->struct_mutex);
 	}
+	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
@@ -492,10 +500,7 @@
 	drm_put_minor(&dev->primary);
 
 	list_del(&dev->driver_item);
-	if (dev->devname) {
-		kfree(dev->devname);
-		dev->devname = NULL;
-	}
+	kfree(dev->devname);
 	kfree(dev);
 }
 EXPORT_SYMBOL(drm_put_dev);

diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 05cd8fe..0229665 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c

@@ -182,7 +182,7 @@
 	uint64_t dpms_status;
 	int ret;
 
-	ret = drm_connector_property_get_value(connector,
+	ret = drm_object_property_get_value(&connector->base,
 					    dev->mode_config.dpms_property,
 					    &dpms_status);
 	if (ret)
@@ -277,7 +277,7 @@
 		return 0;
 	}
 
-	ret = drm_connector_property_get_value(connector, prop, &subconnector);
+	ret = drm_object_property_get_value(&connector->base, prop, &subconnector);
 	if (ret)
 		return 0;
 
@@ -318,7 +318,7 @@
 		return 0;
 	}
 
-	ret = drm_connector_property_get_value(connector, prop, &subconnector);
+	ret = drm_object_property_get_value(&connector->base, prop, &subconnector);
 	if (ret)
 		return 0;
 

diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index fc345d4..1d1f1e5 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig

@@ -10,6 +10,12 @@
 	  Choose this option if you have a Samsung SoC EXYNOS chipset.
 	  If M is selected the module will be called exynosdrm.
 
+config DRM_EXYNOS_IOMMU
+	bool "EXYNOS DRM IOMMU Support"
+	depends on DRM_EXYNOS && EXYNOS_IOMMU && ARM_DMA_USE_IOMMU
+	help
+	  Choose this option if you want to use IOMMU feature for DRM.
+
 config DRM_EXYNOS_DMABUF
 	bool "EXYNOS DRM DMABUF"
 	depends on DRM_EXYNOS
@@ -39,3 +45,27 @@
 	depends on DRM_EXYNOS && !VIDEO_SAMSUNG_S5P_G2D
 	help
 	  Choose this option if you want to use Exynos G2D for DRM.
+
+config DRM_EXYNOS_IPP
+	bool "Exynos DRM IPP"
+	depends on DRM_EXYNOS
+	help
+	  Choose this option if you want to use IPP feature for DRM.
+
+config DRM_EXYNOS_FIMC
+	bool "Exynos DRM FIMC"
+	depends on DRM_EXYNOS_IPP
+	help
+	  Choose this option if you want to use Exynos FIMC for DRM.
+
+config DRM_EXYNOS_ROTATOR
+	bool "Exynos DRM Rotator"
+	depends on DRM_EXYNOS_IPP
+	help
+	  Choose this option if you want to use Exynos Rotator for DRM.
+
+config DRM_EXYNOS_GSC
+	bool "Exynos DRM GSC"
+	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5
+	help
+	  Choose this option if you want to use Exynos GSC for DRM.

diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
index eb651ca..639b49e 100644
--- a/drivers/gpu/drm/exynos/Makefile
+++ b/drivers/gpu/drm/exynos/Makefile

@@ -8,6 +8,7 @@
 		exynos_drm_buf.o exynos_drm_gem.o exynos_drm_core.o \
 		exynos_drm_plane.o
 
+exynosdrm-$(CONFIG_DRM_EXYNOS_IOMMU) += exynos_drm_iommu.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_DMABUF) += exynos_drm_dmabuf.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_FIMD)	+= exynos_drm_fimd.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI)	+= exynos_hdmi.o exynos_mixer.o \
@@ -15,5 +16,9 @@
 					   exynos_drm_hdmi.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI)	+= exynos_drm_vidi.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_G2D)	+= exynos_drm_g2d.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_IPP)	+= exynos_drm_ipp.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC)	+= exynos_drm_fimc.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR)	+= exynos_drm_rotator.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_GSC)	+= exynos_drm_gsc.o
 
 obj-$(CONFIG_DRM_EXYNOS)		+= exynosdrm.o

diff --git a/drivers/gpu/drm/exynos/exynos_ddc.c b/drivers/gpu/drm/exynos/exynos_ddc.c
index 37e6ec7..bef43e0 100644
--- a/drivers/gpu/drm/exynos/exynos_ddc.c
+++ b/drivers/gpu/drm/exynos/exynos_ddc.c

@@ -48,6 +48,7 @@
 	{ },
 };
 
+#ifdef CONFIG_OF
 static struct of_device_id hdmiddc_match_types[] = {
 	{
 		.compatible = "samsung,exynos5-hdmiddc",
@@ -55,12 +56,13 @@
 		/* end node */
 	}
 };
+#endif
 
 struct i2c_driver ddc_driver = {
 	.driver = {
 		.name = "exynos-hdmiddc",
 		.owner = THIS_MODULE,
-		.of_match_table = hdmiddc_match_types,
+		.of_match_table = of_match_ptr(hdmiddc_match_types),
 	},
 	.id_table	= ddc_idtable,
 	.probe		= s5p_ddc_probe,

diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.c b/drivers/gpu/drm/exynos/exynos_drm_buf.c
index 118c117..9601bad 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_buf.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.c

@@ -33,89 +33,64 @@
 static int lowlevel_buffer_allocate(struct drm_device *dev,
 		unsigned int flags, struct exynos_drm_gem_buf *buf)
 {
-	dma_addr_t start_addr;
-	unsigned int npages, i = 0;
-	struct scatterlist *sgl;
 	int ret = 0;
+	enum dma_attr attr;
+	unsigned int nr_pages;
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
-	if (IS_NONCONTIG_BUFFER(flags)) {
-		DRM_DEBUG_KMS("not support allocation type.\n");
-		return -EINVAL;
-	}
-
 	if (buf->dma_addr) {
 		DRM_DEBUG_KMS("already allocated.\n");
 		return 0;
 	}
 
-	if (buf->size >= SZ_1M) {
-		npages = buf->size >> SECTION_SHIFT;
-		buf->page_size = SECTION_SIZE;
-	} else if (buf->size >= SZ_64K) {
-		npages = buf->size >> 16;
-		buf->page_size = SZ_64K;
-	} else {
-		npages = buf->size >> PAGE_SHIFT;
-		buf->page_size = PAGE_SIZE;
-	}
+	init_dma_attrs(&buf->dma_attrs);
 
-	buf->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
-	if (!buf->sgt) {
-		DRM_ERROR("failed to allocate sg table.\n");
-		return -ENOMEM;
-	}
+	/*
+	 * if EXYNOS_BO_CONTIG, fully physically contiguous memory
+	 * region will be allocated else physically contiguous
+	 * as possible.
+	 */
+	if (flags & EXYNOS_BO_CONTIG)
+		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, &buf->dma_attrs);
 
-	ret = sg_alloc_table(buf->sgt, npages, GFP_KERNEL);
-	if (ret < 0) {
-		DRM_ERROR("failed to initialize sg table.\n");
-		kfree(buf->sgt);
-		buf->sgt = NULL;
-		return -ENOMEM;
-	}
+	/*
+	 * if EXYNOS_BO_WC or EXYNOS_BO_NONCACHABLE, writecombine mapping
+	 * else cachable mapping.
+	 */
+	if (flags & EXYNOS_BO_WC || !(flags & EXYNOS_BO_CACHABLE))
+		attr = DMA_ATTR_WRITE_COMBINE;
+	else
+		attr = DMA_ATTR_NON_CONSISTENT;
 
-	buf->kvaddr = dma_alloc_writecombine(dev->dev, buf->size,
-			&buf->dma_addr, GFP_KERNEL);
-	if (!buf->kvaddr) {
-		DRM_ERROR("failed to allocate buffer.\n");
-		ret = -ENOMEM;
-		goto err1;
-	}
+	dma_set_attr(attr, &buf->dma_attrs);
+	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &buf->dma_attrs);
 
-	buf->pages = kzalloc(sizeof(struct page) * npages, GFP_KERNEL);
+	buf->pages = dma_alloc_attrs(dev->dev, buf->size,
+			&buf->dma_addr, GFP_KERNEL, &buf->dma_attrs);
 	if (!buf->pages) {
-		DRM_ERROR("failed to allocate pages.\n");
+		DRM_ERROR("failed to allocate buffer.\n");
+		return -ENOMEM;
+	}
+
+	nr_pages = buf->size >> PAGE_SHIFT;
+	buf->sgt = drm_prime_pages_to_sg(buf->pages, nr_pages);
+	if (!buf->sgt) {
+		DRM_ERROR("failed to get sg table.\n");
 		ret = -ENOMEM;
-		goto err2;
+		goto err_free_attrs;
 	}
 
-	sgl = buf->sgt->sgl;
-	start_addr = buf->dma_addr;
-
-	while (i < npages) {
-		buf->pages[i] = phys_to_page(start_addr);
-		sg_set_page(sgl, buf->pages[i], buf->page_size, 0);
-		sg_dma_address(sgl) = start_addr;
-		start_addr += buf->page_size;
-		sgl = sg_next(sgl);
-		i++;
-	}
-
-	DRM_DEBUG_KMS("vaddr(0x%lx), dma_addr(0x%lx), size(0x%lx)\n",
-			(unsigned long)buf->kvaddr,
+	DRM_DEBUG_KMS("dma_addr(0x%lx), size(0x%lx)\n",
 			(unsigned long)buf->dma_addr,
 			buf->size);
 
 	return ret;
-err2:
-	dma_free_writecombine(dev->dev, buf->size, buf->kvaddr,
-			(dma_addr_t)buf->dma_addr);
+
+err_free_attrs:
+	dma_free_attrs(dev->dev, buf->size, buf->pages,
+			(dma_addr_t)buf->dma_addr, &buf->dma_attrs);
 	buf->dma_addr = (dma_addr_t)NULL;
-err1:
-	sg_free_table(buf->sgt);
-	kfree(buf->sgt);
-	buf->sgt = NULL;
 
 	return ret;
 }
@@ -125,23 +100,12 @@
 {
 	DRM_DEBUG_KMS("%s.\n", __FILE__);
 
-	/*
-	 * release only physically continuous memory and
-	 * non-continuous memory would be released by exynos
-	 * gem framework.
-	 */
-	if (IS_NONCONTIG_BUFFER(flags)) {
-		DRM_DEBUG_KMS("not support allocation type.\n");
-		return;
-	}
-
 	if (!buf->dma_addr) {
 		DRM_DEBUG_KMS("dma_addr is invalid.\n");
 		return;
 	}
 
-	DRM_DEBUG_KMS("vaddr(0x%lx), dma_addr(0x%lx), size(0x%lx)\n",
-			(unsigned long)buf->kvaddr,
+	DRM_DEBUG_KMS("dma_addr(0x%lx), size(0x%lx)\n",
 			(unsigned long)buf->dma_addr,
 			buf->size);
 
@@ -150,11 +114,8 @@
 	kfree(buf->sgt);
 	buf->sgt = NULL;
 
-	kfree(buf->pages);
-	buf->pages = NULL;
-
-	dma_free_writecombine(dev->dev, buf->size, buf->kvaddr,
-				(dma_addr_t)buf->dma_addr);
+	dma_free_attrs(dev->dev, buf->size, buf->pages,
+				(dma_addr_t)buf->dma_addr, &buf->dma_attrs);
 	buf->dma_addr = (dma_addr_t)NULL;
 }
 

diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.h b/drivers/gpu/drm/exynos/exynos_drm_buf.h
index 3388e4e..25cf162 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_buf.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.h

@@ -34,12 +34,12 @@
 void exynos_drm_fini_buf(struct drm_device *dev,
 				struct exynos_drm_gem_buf *buffer);
 
-/* allocate physical memory region and setup sgt and pages. */
+/* allocate physical memory region and setup sgt. */
 int exynos_drm_alloc_buf(struct drm_device *dev,
 				struct exynos_drm_gem_buf *buf,
 				unsigned int flags);
 
-/* release physical memory region, sgt and pages. */
+/* release physical memory region, and sgt. */
 void exynos_drm_free_buf(struct drm_device *dev,
 				unsigned int flags,
 				struct exynos_drm_gem_buf *buffer);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index fce245f..2efa4b0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c

@@ -236,16 +236,21 @@
 			goto out;
 		}
 
+		spin_lock_irq(&dev->event_lock);
 		list_add_tail(&event->base.link,
 				&dev_priv->pageflip_event_list);
+		spin_unlock_irq(&dev->event_lock);
 
 		crtc->fb = fb;
 		ret = exynos_drm_crtc_mode_set_base(crtc, crtc->x, crtc->y,
 						    NULL);
 		if (ret) {
 			crtc->fb = old_fb;
+
+			spin_lock_irq(&dev->event_lock);
 			drm_vblank_put(dev, exynos_crtc->pipe);
 			list_del(&event->base.link);
+			spin_unlock_irq(&dev->event_lock);
 
 			goto out;
 		}

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
index fae1f2e..61d5a84 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c

@@ -30,70 +30,108 @@
 
 #include <linux/dma-buf.h>
 
-static struct sg_table *exynos_pages_to_sg(struct page **pages, int nr_pages,
-		unsigned int page_size)
+struct exynos_drm_dmabuf_attachment {
+	struct sg_table sgt;
+	enum dma_data_direction dir;
+};
+
+static int exynos_gem_attach_dma_buf(struct dma_buf *dmabuf,
+					struct device *dev,
+					struct dma_buf_attachment *attach)
 {
-	struct sg_table *sgt = NULL;
-	struct scatterlist *sgl;
-	int i, ret;
+	struct exynos_drm_dmabuf_attachment *exynos_attach;
 
-	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
-	if (!sgt)
-		goto out;
+	exynos_attach = kzalloc(sizeof(*exynos_attach), GFP_KERNEL);
+	if (!exynos_attach)
+		return -ENOMEM;
 
-	ret = sg_alloc_table(sgt, nr_pages, GFP_KERNEL);
-	if (ret)
-		goto err_free_sgt;
+	exynos_attach->dir = DMA_NONE;
+	attach->priv = exynos_attach;
 
-	if (page_size < PAGE_SIZE)
-		page_size = PAGE_SIZE;
+	return 0;
+}
 
-	for_each_sg(sgt->sgl, sgl, nr_pages, i)
-		sg_set_page(sgl, pages[i], page_size, 0);
+static void exynos_gem_detach_dma_buf(struct dma_buf *dmabuf,
+					struct dma_buf_attachment *attach)
+{
+	struct exynos_drm_dmabuf_attachment *exynos_attach = attach->priv;
+	struct sg_table *sgt;
 
-	return sgt;
+	if (!exynos_attach)
+		return;
 
-err_free_sgt:
-	kfree(sgt);
-	sgt = NULL;
-out:
-	return NULL;
+	sgt = &exynos_attach->sgt;
+
+	if (exynos_attach->dir != DMA_NONE)
+		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents,
+				exynos_attach->dir);
+
+	sg_free_table(sgt);
+	kfree(exynos_attach);
+	attach->priv = NULL;
 }
 
 static struct sg_table *
 		exynos_gem_map_dma_buf(struct dma_buf_attachment *attach,
 					enum dma_data_direction dir)
 {
+	struct exynos_drm_dmabuf_attachment *exynos_attach = attach->priv;
 	struct exynos_drm_gem_obj *gem_obj = attach->dmabuf->priv;
 	struct drm_device *dev = gem_obj->base.dev;
 	struct exynos_drm_gem_buf *buf;
+	struct scatterlist *rd, *wr;
 	struct sg_table *sgt = NULL;
-	unsigned int npages;
-	int nents;
+	unsigned int i;
+	int nents, ret;
 
 	DRM_DEBUG_PRIME("%s\n", __FILE__);
 
-	mutex_lock(&dev->struct_mutex);
+	if (WARN_ON(dir == DMA_NONE))
+		return ERR_PTR(-EINVAL);
+
+	/* just return current sgt if already requested. */
+	if (exynos_attach->dir == dir)
+		return &exynos_attach->sgt;
+
+	/* reattaching is not allowed. */
+	if (WARN_ON(exynos_attach->dir != DMA_NONE))
+		return ERR_PTR(-EBUSY);
 
 	buf = gem_obj->buffer;
+	if (!buf) {
+		DRM_ERROR("buffer is null.\n");
+		return ERR_PTR(-ENOMEM);
+	}
 
-	/* there should always be pages allocated. */
-	if (!buf->pages) {
-		DRM_ERROR("pages is null.\n");
+	sgt = &exynos_attach->sgt;
+
+	ret = sg_alloc_table(sgt, buf->sgt->orig_nents, GFP_KERNEL);
+	if (ret) {
+		DRM_ERROR("failed to alloc sgt.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+
+	rd = buf->sgt->sgl;
+	wr = sgt->sgl;
+	for (i = 0; i < sgt->orig_nents; ++i) {
+		sg_set_page(wr, sg_page(rd), rd->length, rd->offset);
+		rd = sg_next(rd);
+		wr = sg_next(wr);
+	}
+
+	nents = dma_map_sg(attach->dev, sgt->sgl, sgt->orig_nents, dir);
+	if (!nents) {
+		DRM_ERROR("failed to map sgl with iommu.\n");
+		sgt = ERR_PTR(-EIO);
 		goto err_unlock;
 	}
 
-	npages = buf->size / buf->page_size;
+	exynos_attach->dir = dir;
+	attach->priv = exynos_attach;
 
-	sgt = exynos_pages_to_sg(buf->pages, npages, buf->page_size);
-	if (!sgt) {
-		DRM_DEBUG_PRIME("exynos_pages_to_sg returned NULL!\n");
-		goto err_unlock;
-	}
-	nents = dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir);
-
-	DRM_DEBUG_PRIME("npages = %d buffer size = 0x%lx page_size = 0x%lx\n",
-			npages, buf->size, buf->page_size);
+	DRM_DEBUG_PRIME("buffer size = 0x%lx\n", buf->size);
 
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
@@ -104,10 +142,7 @@
 						struct sg_table *sgt,
 						enum dma_data_direction dir)
 {
-	dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
-	sg_free_table(sgt);
-	kfree(sgt);
-	sgt = NULL;
+	/* Nothing to do. */
 }
 
 static void exynos_dmabuf_release(struct dma_buf *dmabuf)
@@ -169,6 +204,8 @@
 }
 
 static struct dma_buf_ops exynos_dmabuf_ops = {
+	.attach			= exynos_gem_attach_dma_buf,
+	.detach			= exynos_gem_detach_dma_buf,
 	.map_dma_buf		= exynos_gem_map_dma_buf,
 	.unmap_dma_buf		= exynos_gem_unmap_dma_buf,
 	.kmap			= exynos_gem_dmabuf_kmap,
@@ -196,7 +233,6 @@
 	struct scatterlist *sgl;
 	struct exynos_drm_gem_obj *exynos_gem_obj;
 	struct exynos_drm_gem_buf *buffer;
-	struct page *page;
 	int ret;
 
 	DRM_DEBUG_PRIME("%s\n", __FILE__);
@@ -233,38 +269,27 @@
 		goto err_unmap_attach;
 	}
 
-	buffer->pages = kzalloc(sizeof(*page) * sgt->nents, GFP_KERNEL);
-	if (!buffer->pages) {
-		DRM_ERROR("failed to allocate pages.\n");
+	exynos_gem_obj = exynos_drm_gem_init(drm_dev, dma_buf->size);
+	if (!exynos_gem_obj) {
 		ret = -ENOMEM;
 		goto err_free_buffer;
 	}
 
-	exynos_gem_obj = exynos_drm_gem_init(drm_dev, dma_buf->size);
-	if (!exynos_gem_obj) {
-		ret = -ENOMEM;
-		goto err_free_pages;
-	}
-
 	sgl = sgt->sgl;
 
-	if (sgt->nents == 1) {
-		buffer->dma_addr = sg_dma_address(sgt->sgl);
-		buffer->size = sg_dma_len(sgt->sgl);
+	buffer->size = dma_buf->size;
+	buffer->dma_addr = sg_dma_address(sgl);
 
+	if (sgt->nents == 1) {
 		/* always physically continuous memory if sgt->nents is 1. */
 		exynos_gem_obj->flags |= EXYNOS_BO_CONTIG;
 	} else {
-		unsigned int i = 0;
-
-		buffer->dma_addr = sg_dma_address(sgl);
-		while (i < sgt->nents) {
-			buffer->pages[i] = sg_page(sgl);
-			buffer->size += sg_dma_len(sgl);
-			sgl = sg_next(sgl);
-			i++;
-		}
-
+		/*
+		 * this case could be CONTIG or NONCONTIG type but for now
+		 * sets NONCONTIG.
+		 * TODO. we have to find a way that exporter can notify
+		 * the type of its own buffer to importer.
+		 */
 		exynos_gem_obj->flags |= EXYNOS_BO_NONCONTIG;
 	}
 
@@ -277,9 +302,6 @@
 
 	return &exynos_gem_obj->base;
 
-err_free_pages:
-	kfree(buffer->pages);
-	buffer->pages = NULL;
 err_free_buffer:
 	kfree(buffer);
 	buffer = NULL;

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 1de7baa..e0a8e80 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c

@@ -40,6 +40,8 @@
 #include "exynos_drm_vidi.h"
 #include "exynos_drm_dmabuf.h"
 #include "exynos_drm_g2d.h"
+#include "exynos_drm_ipp.h"
+#include "exynos_drm_iommu.h"
 
 #define DRIVER_NAME	"exynos"
 #define DRIVER_DESC	"Samsung SoC DRM"
@@ -49,6 +51,9 @@
 
 #define VBLANK_OFF_DELAY	50000
 
+/* platform device pointer for eynos drm device. */
+static struct platform_device *exynos_drm_pdev;
+
 static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
 {
 	struct exynos_drm_private *private;
@@ -66,6 +71,18 @@
 	INIT_LIST_HEAD(&private->pageflip_event_list);
 	dev->dev_private = (void *)private;
 
+	/*
+	 * create mapping to manage iommu table and set a pointer to iommu
+	 * mapping structure to iommu_mapping of private data.
+	 * also this iommu_mapping can be used to check if iommu is supported
+	 * or not.
+	 */
+	ret = drm_create_iommu_mapping(dev);
+	if (ret < 0) {
+		DRM_ERROR("failed to create iommu mapping.\n");
+		goto err_crtc;
+	}
+
 	drm_mode_config_init(dev);
 
 	/* init kms poll for handling hpd */
@@ -80,7 +97,7 @@
 	for (nr = 0; nr < MAX_CRTC; nr++) {
 		ret = exynos_drm_crtc_create(dev, nr);
 		if (ret)
-			goto err_crtc;
+			goto err_release_iommu_mapping;
 	}
 
 	for (nr = 0; nr < MAX_PLANE; nr++) {
@@ -89,12 +106,12 @@
 
 		plane = exynos_plane_init(dev, possible_crtcs, false);
 		if (!plane)
-			goto err_crtc;
+			goto err_release_iommu_mapping;
 	}
 
 	ret = drm_vblank_init(dev, MAX_CRTC);
 	if (ret)
-		goto err_crtc;
+		goto err_release_iommu_mapping;
 
 	/*
 	 * probe sub drivers such as display controller and hdmi driver,
@@ -126,6 +143,8 @@
 	exynos_drm_device_unregister(dev);
 err_vblank:
 	drm_vblank_cleanup(dev);
+err_release_iommu_mapping:
+	drm_release_iommu_mapping(dev);
 err_crtc:
 	drm_mode_config_cleanup(dev);
 	kfree(private);
@@ -142,6 +161,8 @@
 	drm_vblank_cleanup(dev);
 	drm_kms_helper_poll_fini(dev);
 	drm_mode_config_cleanup(dev);
+
+	drm_release_iommu_mapping(dev);
 	kfree(dev->dev_private);
 
 	dev->dev_private = NULL;
@@ -229,6 +250,14 @@
 			exynos_g2d_set_cmdlist_ioctl, DRM_UNLOCKED | DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC,
 			exynos_g2d_exec_ioctl, DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_PROPERTY,
+			exynos_drm_ipp_get_property, DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_SET_PROPERTY,
+			exynos_drm_ipp_set_property, DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_QUEUE_BUF,
+			exynos_drm_ipp_queue_buf, DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_CMD_CTRL,
+			exynos_drm_ipp_cmd_ctrl, DRM_UNLOCKED | DRM_AUTH),
 };
 
 static const struct file_operations exynos_drm_driver_fops = {
@@ -279,6 +308,7 @@
 {
 	DRM_DEBUG_DRIVER("%s\n", __FILE__);
 
+	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 	exynos_drm_driver.num_ioctls = DRM_ARRAY_SIZE(exynos_ioctls);
 
 	return drm_platform_init(&exynos_drm_driver, pdev);
@@ -324,6 +354,10 @@
 	ret = platform_driver_register(&exynos_drm_common_hdmi_driver);
 	if (ret < 0)
 		goto out_common_hdmi;
+
+	ret = exynos_platform_device_hdmi_register();
+	if (ret < 0)
+		goto out_common_hdmi_dev;
 #endif
 
 #ifdef CONFIG_DRM_EXYNOS_VIDI
@@ -338,24 +372,80 @@
 		goto out_g2d;
 #endif
 
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+	ret = platform_driver_register(&fimc_driver);
+	if (ret < 0)
+		goto out_fimc;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_ROTATOR
+	ret = platform_driver_register(&rotator_driver);
+	if (ret < 0)
+		goto out_rotator;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_GSC
+	ret = platform_driver_register(&gsc_driver);
+	if (ret < 0)
+		goto out_gsc;
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_IPP
+	ret = platform_driver_register(&ipp_driver);
+	if (ret < 0)
+		goto out_ipp;
+#endif
+
 	ret = platform_driver_register(&exynos_drm_platform_driver);
 	if (ret < 0)
+		goto out_drm;
+
+	exynos_drm_pdev = platform_device_register_simple("exynos-drm", -1,
+				NULL, 0);
+	if (IS_ERR_OR_NULL(exynos_drm_pdev)) {
+		ret = PTR_ERR(exynos_drm_pdev);
 		goto out;
+	}
 
 	return 0;
 
 out:
+	platform_driver_unregister(&exynos_drm_platform_driver);
+
+out_drm:
+#ifdef CONFIG_DRM_EXYNOS_IPP
+	platform_driver_unregister(&ipp_driver);
+out_ipp:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_GSC
+	platform_driver_unregister(&gsc_driver);
+out_gsc:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_ROTATOR
+	platform_driver_unregister(&rotator_driver);
+out_rotator:
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+	platform_driver_unregister(&fimc_driver);
+out_fimc:
+#endif
+
 #ifdef CONFIG_DRM_EXYNOS_G2D
 	platform_driver_unregister(&g2d_driver);
 out_g2d:
 #endif
 
 #ifdef CONFIG_DRM_EXYNOS_VIDI
-out_vidi:
 	platform_driver_unregister(&vidi_driver);
+out_vidi:
 #endif
 
 #ifdef CONFIG_DRM_EXYNOS_HDMI
+	exynos_platform_device_hdmi_unregister();
+out_common_hdmi_dev:
 	platform_driver_unregister(&exynos_drm_common_hdmi_driver);
 out_common_hdmi:
 	platform_driver_unregister(&mixer_driver);
@@ -375,13 +465,32 @@
 {
 	DRM_DEBUG_DRIVER("%s\n", __FILE__);
 
+	platform_device_unregister(exynos_drm_pdev);
+
 	platform_driver_unregister(&exynos_drm_platform_driver);
 
+#ifdef CONFIG_DRM_EXYNOS_IPP
+	platform_driver_unregister(&ipp_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_GSC
+	platform_driver_unregister(&gsc_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_ROTATOR
+	platform_driver_unregister(&rotator_driver);
+#endif
+
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+	platform_driver_unregister(&fimc_driver);
+#endif
+
 #ifdef CONFIG_DRM_EXYNOS_G2D
 	platform_driver_unregister(&g2d_driver);
 #endif
 
 #ifdef CONFIG_DRM_EXYNOS_HDMI
+	exynos_platform_device_hdmi_unregister();
 	platform_driver_unregister(&exynos_drm_common_hdmi_driver);
 	platform_driver_unregister(&mixer_driver);
 	platform_driver_unregister(&hdmi_driver);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index a342310..f5a9774 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h

@@ -74,8 +74,6 @@
  * @commit: apply hardware specific overlay data to registers.
  * @enable: enable hardware specific overlay.
  * @disable: disable hardware specific overlay.
- * @wait_for_vblank: wait for vblank interrupt to make sure that
- *	hardware overlay is disabled.
  */
 struct exynos_drm_overlay_ops {
 	void (*mode_set)(struct device *subdrv_dev,
@@ -83,7 +81,6 @@
 	void (*commit)(struct device *subdrv_dev, int zpos);
 	void (*enable)(struct device *subdrv_dev, int zpos);
 	void (*disable)(struct device *subdrv_dev, int zpos);
-	void (*wait_for_vblank)(struct device *subdrv_dev);
 };
 
 /*
@@ -110,7 +107,6 @@
  * @pixel_format: fourcc pixel format of this overlay
  * @dma_addr: array of bus(accessed by dma) address to the memory region
  *	      allocated for a overlay.
- * @vaddr: array of virtual memory addresss to this overlay.
  * @zpos: order of overlay layer(z position).
  * @default_win: a window to be enabled.
  * @color_key: color key on or off.
@@ -142,7 +138,6 @@
 	unsigned int pitch;
 	uint32_t pixel_format;
 	dma_addr_t dma_addr[MAX_FB_BUFFER];
-	void __iomem *vaddr[MAX_FB_BUFFER];
 	int zpos;
 
 	bool default_win;
@@ -186,6 +181,8 @@
  * @commit: set current hw specific display mode to hw.
  * @enable_vblank: specific driver callback for enabling vblank interrupt.
  * @disable_vblank: specific driver callback for disabling vblank interrupt.
+ * @wait_for_vblank: wait for vblank interrupt to make sure that
+ *	hardware overlay is updated.
  */
 struct exynos_drm_manager_ops {
 	void (*dpms)(struct device *subdrv_dev, int mode);
@@ -200,6 +197,7 @@
 	void (*commit)(struct device *subdrv_dev);
 	int (*enable_vblank)(struct device *subdrv_dev);
 	void (*disable_vblank)(struct device *subdrv_dev);
+	void (*wait_for_vblank)(struct device *subdrv_dev);
 };
 
 /*
@@ -231,16 +229,28 @@
 	struct device		*dev;
 	struct list_head	inuse_cmdlist;
 	struct list_head	event_list;
-	struct list_head	gem_list;
-	unsigned int		gem_nr;
+	struct list_head	userptr_list;
+};
+
+struct exynos_drm_ipp_private {
+	struct device	*dev;
+	struct list_head	event_list;
 };
 
 struct drm_exynos_file_private {
 	struct exynos_drm_g2d_private	*g2d_priv;
+	struct exynos_drm_ipp_private	*ipp_priv;
 };
 
 /*
  * Exynos drm private structure.
+ *
+ * @da_start: start address to device address space.
+ *	with iommu, device address space starts from this address
+ *	otherwise default one.
+ * @da_space_size: size of device address space.
+ *	if 0 then default value is used for it.
+ * @da_space_order: order to device address space.
  */
 struct exynos_drm_private {
 	struct drm_fb_helper *fb_helper;
@@ -255,6 +265,10 @@
 	struct drm_crtc *crtc[MAX_CRTC];
 	struct drm_property *plane_zpos_property;
 	struct drm_property *crtc_mode_property;
+
+	unsigned long da_start;
+	unsigned long da_space_size;
+	unsigned long da_space_order;
 };
 
 /*
@@ -318,10 +332,25 @@
 int exynos_drm_subdrv_open(struct drm_device *dev, struct drm_file *file);
 void exynos_drm_subdrv_close(struct drm_device *dev, struct drm_file *file);
 
+/*
+ * this function registers exynos drm hdmi platform device. It ensures only one
+ * instance of the device is created.
+ */
+extern int exynos_platform_device_hdmi_register(void);
+
+/*
+ * this function unregisters exynos drm hdmi platform device if it exists.
+ */
+void exynos_platform_device_hdmi_unregister(void);
+
 extern struct platform_driver fimd_driver;
 extern struct platform_driver hdmi_driver;
 extern struct platform_driver mixer_driver;
 extern struct platform_driver exynos_drm_common_hdmi_driver;
 extern struct platform_driver vidi_driver;
 extern struct platform_driver g2d_driver;
+extern struct platform_driver fimc_driver;
+extern struct platform_driver rotator_driver;
+extern struct platform_driver gsc_driver;
+extern struct platform_driver ipp_driver;
 #endif

diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
index f2df06c..3014852 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_encoder.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c

@@ -234,6 +234,32 @@
 	exynos_encoder->dpms = DRM_MODE_DPMS_ON;
 }
 
+void exynos_drm_encoder_complete_scanout(struct drm_framebuffer *fb)
+{
+	struct exynos_drm_encoder *exynos_encoder;
+	struct exynos_drm_manager_ops *ops;
+	struct drm_device *dev = fb->dev;
+	struct drm_encoder *encoder;
+
+	/*
+	 * make sure that overlay data are updated to real hardware
+	 * for all encoders.
+	 */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		exynos_encoder = to_exynos_encoder(encoder);
+		ops = exynos_encoder->manager->ops;
+
+		/*
+		 * wait for vblank interrupt
+		 * - this makes sure that overlay data are updated to
+		 *	real hardware.
+		 */
+		if (ops->wait_for_vblank)
+			ops->wait_for_vblank(exynos_encoder->manager->dev);
+	}
+}
+
+
 static void exynos_drm_encoder_disable(struct drm_encoder *encoder)
 {
 	struct drm_plane *plane;
@@ -505,14 +531,4 @@
 
 	if (overlay_ops && overlay_ops->disable)
 		overlay_ops->disable(manager->dev, zpos);
-
-	/*
-	 * wait for vblank interrupt
-	 * - this makes sure that hardware overlay is disabled to avoid
-	 * for the dma accesses to memory after gem buffer was released
-	 * because the setting for disabling the overlay will be updated
-	 * at vsync.
-	 */
-	if (overlay_ops && overlay_ops->wait_for_vblank)
-		overlay_ops->wait_for_vblank(manager->dev);
 }

diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.h b/drivers/gpu/drm/exynos/exynos_drm_encoder.h
index 6470d9d..88bb25a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_encoder.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.h

@@ -46,5 +46,6 @@
 void exynos_drm_encoder_plane_commit(struct drm_encoder *encoder, void *data);
 void exynos_drm_encoder_plane_enable(struct drm_encoder *encoder, void *data);
 void exynos_drm_encoder_plane_disable(struct drm_encoder *encoder, void *data);
+void exynos_drm_encoder_complete_scanout(struct drm_framebuffer *fb);
 
 #endif

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index 4ef4cd3..5426cc5 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c

@@ -30,10 +30,13 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <uapi/drm/exynos_drm.h>
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
 #include "exynos_drm_gem.h"
+#include "exynos_drm_iommu.h"
+#include "exynos_drm_encoder.h"
 
 #define to_exynos_fb(x)	container_of(x, struct exynos_drm_fb, fb)
 
@@ -50,6 +53,32 @@
 	struct exynos_drm_gem_obj	*exynos_gem_obj[MAX_FB_BUFFER];
 };
 
+static int check_fb_gem_memory_type(struct drm_device *drm_dev,
+				struct exynos_drm_gem_obj *exynos_gem_obj)
+{
+	unsigned int flags;
+
+	/*
+	 * if exynos drm driver supports iommu then framebuffer can use
+	 * all the buffer types.
+	 */
+	if (is_drm_iommu_supported(drm_dev))
+		return 0;
+
+	flags = exynos_gem_obj->flags;
+
+	/*
+	 * without iommu support, not support physically non-continuous memory
+	 * for framebuffer.
+	 */
+	if (IS_NONCONTIG_BUFFER(flags)) {
+		DRM_ERROR("cannot use this gem memory type for fb.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void exynos_drm_fb_destroy(struct drm_framebuffer *fb)
 {
 	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
@@ -57,6 +86,9 @@
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
+	/* make sure that overlay data are updated before relesing fb. */
+	exynos_drm_encoder_complete_scanout(fb);
+
 	drm_framebuffer_cleanup(fb);
 
 	for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem_obj); i++) {
@@ -128,23 +160,32 @@
 			    struct drm_gem_object *obj)
 {
 	struct exynos_drm_fb *exynos_fb;
+	struct exynos_drm_gem_obj *exynos_gem_obj;
 	int ret;
 
+	exynos_gem_obj = to_exynos_gem_obj(obj);
+
+	ret = check_fb_gem_memory_type(dev, exynos_gem_obj);
+	if (ret < 0) {
+		DRM_ERROR("cannot use this gem memory type for fb.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL);
 	if (!exynos_fb) {
 		DRM_ERROR("failed to allocate exynos drm framebuffer\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
+	drm_helper_mode_fill_fb_struct(&exynos_fb->fb, mode_cmd);
+	exynos_fb->exynos_gem_obj[0] = exynos_gem_obj;
+
 	ret = drm_framebuffer_init(dev, &exynos_fb->fb, &exynos_drm_fb_funcs);
 	if (ret) {
 		DRM_ERROR("failed to initialize framebuffer\n");
 		return ERR_PTR(ret);
 	}
 
-	drm_helper_mode_fill_fb_struct(&exynos_fb->fb, mode_cmd);
-	exynos_fb->exynos_gem_obj[0] = to_exynos_gem_obj(obj);
-
 	return &exynos_fb->fb;
 }
 
@@ -190,9 +231,8 @@
 		      struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
-	struct drm_framebuffer *fb;
 	struct exynos_drm_fb *exynos_fb;
-	int i;
+	int i, ret;
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
@@ -202,30 +242,56 @@
 		return ERR_PTR(-ENOENT);
 	}
 
-	fb = exynos_drm_framebuffer_init(dev, mode_cmd, obj);
-	if (IS_ERR(fb)) {
-		drm_gem_object_unreference_unlocked(obj);
-		return fb;
+	exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL);
+	if (!exynos_fb) {
+		DRM_ERROR("failed to allocate exynos drm framebuffer\n");
+		return ERR_PTR(-ENOMEM);
 	}
 
-	exynos_fb = to_exynos_fb(fb);
+	drm_helper_mode_fill_fb_struct(&exynos_fb->fb, mode_cmd);
+	exynos_fb->exynos_gem_obj[0] = to_exynos_gem_obj(obj);
 	exynos_fb->buf_cnt = exynos_drm_format_num_buffers(mode_cmd);
 
 	DRM_DEBUG_KMS("buf_cnt = %d\n", exynos_fb->buf_cnt);
 
 	for (i = 1; i < exynos_fb->buf_cnt; i++) {
+		struct exynos_drm_gem_obj *exynos_gem_obj;
+		int ret;
+
 		obj = drm_gem_object_lookup(dev, file_priv,
 				mode_cmd->handles[i]);
 		if (!obj) {
 			DRM_ERROR("failed to lookup gem object\n");
-			exynos_drm_fb_destroy(fb);
+			kfree(exynos_fb);
 			return ERR_PTR(-ENOENT);
 		}
 
+		exynos_gem_obj = to_exynos_gem_obj(obj);
+
+		ret = check_fb_gem_memory_type(dev, exynos_gem_obj);
+		if (ret < 0) {
+			DRM_ERROR("cannot use this gem memory type for fb.\n");
+			kfree(exynos_fb);
+			return ERR_PTR(ret);
+		}
+
 		exynos_fb->exynos_gem_obj[i] = to_exynos_gem_obj(obj);
 	}
 
-	return fb;
+	ret = drm_framebuffer_init(dev, &exynos_fb->fb, &exynos_drm_fb_funcs);
+	if (ret) {
+		for (i = 0; i < exynos_fb->buf_cnt; i++) {
+			struct exynos_drm_gem_obj *gem_obj;
+
+			gem_obj = exynos_fb->exynos_gem_obj[i];
+			drm_gem_object_unreference_unlocked(&gem_obj->base);
+		}
+
+		kfree(exynos_fb);
+		return ERR_PTR(ret);
+	}
+
+	return &exynos_fb->fb;
 }
 
 struct exynos_drm_gem_buf *exynos_drm_fb_buffer(struct drm_framebuffer *fb,
@@ -243,9 +309,7 @@
 	if (!buffer)
 		return NULL;
 
-	DRM_DEBUG_KMS("vaddr = 0x%lx, dma_addr = 0x%lx\n",
-			(unsigned long)buffer->kvaddr,
-			(unsigned long)buffer->dma_addr);
+	DRM_DEBUG_KMS("dma_addr = 0x%lx\n", (unsigned long)buffer->dma_addr);
 
 	return buffer;
 }

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index e7466c4..f433eb7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c

@@ -46,8 +46,38 @@
 	struct exynos_drm_gem_obj	*exynos_gem_obj;
 };
 
+static int exynos_drm_fb_mmap(struct fb_info *info,
+			struct vm_area_struct *vma)
+{
+	struct drm_fb_helper *helper = info->par;
+	struct exynos_drm_fbdev *exynos_fbd = to_exynos_fbdev(helper);
+	struct exynos_drm_gem_obj *exynos_gem_obj = exynos_fbd->exynos_gem_obj;
+	struct exynos_drm_gem_buf *buffer = exynos_gem_obj->buffer;
+	unsigned long vm_size;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+
+	vm_size = vma->vm_end - vma->vm_start;
+
+	if (vm_size > buffer->size)
+		return -EINVAL;
+
+	ret = dma_mmap_attrs(helper->dev->dev, vma, buffer->pages,
+		buffer->dma_addr, buffer->size, &buffer->dma_attrs);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static struct fb_ops exynos_drm_fb_ops = {
 	.owner		= THIS_MODULE,
+	.fb_mmap        = exynos_drm_fb_mmap,
 	.fb_fillrect	= cfb_fillrect,
 	.fb_copyarea	= cfb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
@@ -79,6 +109,17 @@
 		return -EFAULT;
 	}
 
+	/* map pages with kernel virtual space. */
+	if (!buffer->kvaddr) {
+		unsigned int nr_pages = buffer->size >> PAGE_SHIFT;
+		buffer->kvaddr = vmap(buffer->pages, nr_pages, VM_MAP,
+					pgprot_writecombine(PAGE_KERNEL));
+		if (!buffer->kvaddr) {
+			DRM_ERROR("failed to map pages to kernel space.\n");
+			return -EIO;
+		}
+	}
+
 	/* buffer count to framebuffer always is 1 at booting time. */
 	exynos_drm_fb_set_buf_cnt(fb, 1);
 
@@ -87,8 +128,8 @@
 
 	dev->mode_config.fb_base = (resource_size_t)buffer->dma_addr;
 	fbi->screen_base = buffer->kvaddr + offset;
-	fbi->fix.smem_start = (unsigned long)(page_to_phys(buffer->pages[0]) +
-				offset);
+	fbi->fix.smem_start = (unsigned long)
+			(page_to_phys(sg_page(buffer->sgt->sgl)) + offset);
 	fbi->screen_size = size;
 	fbi->fix.smem_len = size;
 
@@ -134,7 +175,7 @@
 	exynos_gem_obj = exynos_drm_gem_create(dev, 0, size);
 	if (IS_ERR(exynos_gem_obj)) {
 		ret = PTR_ERR(exynos_gem_obj);
-		goto out;
+		goto err_release_framebuffer;
 	}
 
 	exynos_fbdev->exynos_gem_obj = exynos_gem_obj;
@@ -144,7 +185,7 @@
 	if (IS_ERR_OR_NULL(helper->fb)) {
 		DRM_ERROR("failed to create drm framebuffer.\n");
 		ret = PTR_ERR(helper->fb);
-		goto out;
+		goto err_destroy_gem;
 	}
 
 	helper->fbdev = fbi;
@@ -156,14 +197,24 @@
 	ret = fb_alloc_cmap(&fbi->cmap, 256, 0);
 	if (ret) {
 		DRM_ERROR("failed to allocate cmap.\n");
-		goto out;
+		goto err_destroy_framebuffer;
 	}
 
 	ret = exynos_drm_fbdev_update(helper, helper->fb);
-	if (ret < 0) {
-		fb_dealloc_cmap(&fbi->cmap);
-		goto out;
-	}
+	if (ret < 0)
+		goto err_dealloc_cmap;
+
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+
+err_dealloc_cmap:
+	fb_dealloc_cmap(&fbi->cmap);
+err_destroy_framebuffer:
+	drm_framebuffer_cleanup(helper->fb);
+err_destroy_gem:
+	exynos_drm_gem_destroy(exynos_gem_obj);
+err_release_framebuffer:
+	framebuffer_release(fbi);
 
 /*
  * if failed, all resources allocated above would be released by
@@ -265,8 +316,13 @@
 static void exynos_drm_fbdev_destroy(struct drm_device *dev,
 				      struct drm_fb_helper *fb_helper)
 {
+	struct exynos_drm_fbdev *exynos_fbd = to_exynos_fbdev(fb_helper);
+	struct exynos_drm_gem_obj *exynos_gem_obj = exynos_fbd->exynos_gem_obj;
 	struct drm_framebuffer *fb;
 
+	if (exynos_gem_obj->buffer->kvaddr)
+		vunmap(exynos_gem_obj->buffer->kvaddr);
+
 	/* release drm framebuffer and real buffer */
 	if (fb_helper->fb && fb_helper->fb->funcs) {
 		fb = fb_helper->fb;

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
new file mode 100644
index 0000000..61ea242
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c

@@ -0,0 +1,2001 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ * Authors:
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *	Jinyoung Jeon <jy0.jeon@samsung.com>
+ *	Sangmin Lee <lsmin.lee@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <plat/map-base.h>
+
+#include <drm/drmP.h>
+#include <drm/exynos_drm.h>
+#include "regs-fimc.h"
+#include "exynos_drm_ipp.h"
+#include "exynos_drm_fimc.h"
+
+/*
+ * FIMC is stand for Fully Interactive Mobile Camera and
+ * supports image scaler/rotator and input/output DMA operations.
+ * input DMA reads image data from the memory.
+ * output DMA writes image data to memory.
+ * FIMC supports image rotation and image effect functions.
+ *
+ * M2M operation : supports crop/scale/rotation/csc so on.
+ * Memory ----> FIMC H/W ----> Memory.
+ * Writeback operation : supports cloned screen with FIMD.
+ * FIMD ----> FIMC H/W ----> Memory.
+ * Output operation : supports direct display using local path.
+ * Memory ----> FIMC H/W ----> FIMD.
+ */
+
+/*
+ * TODO
+ * 1. check suspend/resume api if needed.
+ * 2. need to check use case platform_device_id.
+ * 3. check src/dst size with, height.
+ * 4. added check_prepare api for right register.
+ * 5. need to add supported list in prop_list.
+ * 6. check prescaler/scaler optimization.
+ */
+
+#define FIMC_MAX_DEVS	4
+#define FIMC_MAX_SRC	2
+#define FIMC_MAX_DST	32
+#define FIMC_SHFACTOR	10
+#define FIMC_BUF_STOP	1
+#define FIMC_BUF_START	2
+#define FIMC_REG_SZ		32
+#define FIMC_WIDTH_ITU_709	1280
+#define FIMC_REFRESH_MAX	60
+#define FIMC_REFRESH_MIN	12
+#define FIMC_CROP_MAX	8192
+#define FIMC_CROP_MIN	32
+#define FIMC_SCALE_MAX	4224
+#define FIMC_SCALE_MIN	32
+
+#define get_fimc_context(dev)	platform_get_drvdata(to_platform_device(dev))
+#define get_ctx_from_ippdrv(ippdrv)	container_of(ippdrv,\
+					struct fimc_context, ippdrv);
+#define fimc_read(offset)		readl(ctx->regs + (offset))
+#define fimc_write(cfg, offset)	writel(cfg, ctx->regs + (offset))
+
+enum fimc_wb {
+	FIMC_WB_NONE,
+	FIMC_WB_A,
+	FIMC_WB_B,
+};
+
+/*
+ * A structure of scaler.
+ *
+ * @range: narrow, wide.
+ * @bypass: unused scaler path.
+ * @up_h: horizontal scale up.
+ * @up_v: vertical scale up.
+ * @hratio: horizontal ratio.
+ * @vratio: vertical ratio.
+ */
+struct fimc_scaler {
+	bool	range;
+	bool bypass;
+	bool up_h;
+	bool up_v;
+	u32 hratio;
+	u32 vratio;
+};
+
+/*
+ * A structure of scaler capability.
+ *
+ * find user manual table 43-1.
+ * @in_hori: scaler input horizontal size.
+ * @bypass: scaler bypass mode.
+ * @dst_h_wo_rot: target horizontal size without output rotation.
+ * @dst_h_rot: target horizontal size with output rotation.
+ * @rl_w_wo_rot: real width without input rotation.
+ * @rl_h_rot: real height without output rotation.
+ */
+struct fimc_capability {
+	/* scaler */
+	u32	in_hori;
+	u32	bypass;
+	/* output rotator */
+	u32	dst_h_wo_rot;
+	u32	dst_h_rot;
+	/* input rotator */
+	u32	rl_w_wo_rot;
+	u32	rl_h_rot;
+};
+
+/*
+ * A structure of fimc driver data.
+ *
+ * @parent_clk: name of parent clock.
+ */
+struct fimc_driverdata {
+	char	*parent_clk;
+};
+
+/*
+ * A structure of fimc context.
+ *
+ * @ippdrv: prepare initialization using ippdrv.
+ * @regs_res: register resources.
+ * @regs: memory mapped io registers.
+ * @lock: locking of operations.
+ * @sclk_fimc_clk: fimc source clock.
+ * @fimc_clk: fimc clock.
+ * @wb_clk: writeback a clock.
+ * @wb_b_clk: writeback b clock.
+ * @sc: scaler infomations.
+ * @odr: ordering of YUV.
+ * @ver: fimc version.
+ * @pol: porarity of writeback.
+ * @id: fimc id.
+ * @irq: irq number.
+ * @suspended: qos operations.
+ */
+struct fimc_context {
+	struct exynos_drm_ippdrv	ippdrv;
+	struct resource	*regs_res;
+	void __iomem	*regs;
+	struct mutex	lock;
+	struct clk	*sclk_fimc_clk;
+	struct clk	*fimc_clk;
+	struct clk	*wb_clk;
+	struct clk	*wb_b_clk;
+	struct fimc_scaler	sc;
+	struct fimc_driverdata	*ddata;
+	struct exynos_drm_ipp_pol	pol;
+	int	id;
+	int	irq;
+	bool	suspended;
+};
+
+static void fimc_sw_reset(struct fimc_context *ctx, bool pattern)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:pattern[%d]\n", __func__, pattern);
+
+	cfg = fimc_read(EXYNOS_CISRCFMT);
+	cfg |= EXYNOS_CISRCFMT_ITU601_8BIT;
+	if (pattern)
+		cfg |= EXYNOS_CIGCTRL_TESTPATTERN_COLOR_BAR;
+
+	fimc_write(cfg, EXYNOS_CISRCFMT);
+
+	/* s/w reset */
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg |= (EXYNOS_CIGCTRL_SWRST);
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+
+	/* s/w reset complete */
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg &= ~EXYNOS_CIGCTRL_SWRST;
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+
+	/* reset sequence */
+	fimc_write(0x0, EXYNOS_CIFCNTSEQ);
+}
+
+static void fimc_set_camblk_fimd0_wb(struct fimc_context *ctx)
+{
+	u32 camblk_cfg;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	camblk_cfg = readl(SYSREG_CAMERA_BLK);
+	camblk_cfg &= ~(SYSREG_FIMD0WB_DEST_MASK);
+	camblk_cfg |= ctx->id << (SYSREG_FIMD0WB_DEST_SHIFT);
+
+	writel(camblk_cfg, SYSREG_CAMERA_BLK);
+}
+
+static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:wb[%d]\n", __func__, wb);
+
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK |
+		EXYNOS_CIGCTRL_SELCAM_ITU_MASK |
+		EXYNOS_CIGCTRL_SELCAM_MIPI_MASK |
+		EXYNOS_CIGCTRL_SELCAM_FIMC_MASK |
+		EXYNOS_CIGCTRL_SELWB_CAMIF_MASK |
+		EXYNOS_CIGCTRL_SELWRITEBACK_MASK);
+
+	switch (wb) {
+	case FIMC_WB_A:
+		cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A |
+			EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
+		break;
+	case FIMC_WB_B:
+		cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B |
+			EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
+		break;
+	case FIMC_WB_NONE:
+	default:
+		cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A |
+			EXYNOS_CIGCTRL_SELWRITEBACK_A |
+			EXYNOS_CIGCTRL_SELCAM_MIPI_A |
+			EXYNOS_CIGCTRL_SELCAM_FIMC_ITU);
+		break;
+	}
+
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+}
+
+static void fimc_set_polarity(struct fimc_context *ctx,
+		struct exynos_drm_ipp_pol *pol)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:inv_pclk[%d]inv_vsync[%d]\n",
+		__func__, pol->inv_pclk, pol->inv_vsync);
+	DRM_DEBUG_KMS("%s:inv_href[%d]inv_hsync[%d]\n",
+		__func__, pol->inv_href, pol->inv_hsync);
+
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg &= ~(EXYNOS_CIGCTRL_INVPOLPCLK | EXYNOS_CIGCTRL_INVPOLVSYNC |
+		 EXYNOS_CIGCTRL_INVPOLHREF | EXYNOS_CIGCTRL_INVPOLHSYNC);
+
+	if (pol->inv_pclk)
+		cfg |= EXYNOS_CIGCTRL_INVPOLPCLK;
+	if (pol->inv_vsync)
+		cfg |= EXYNOS_CIGCTRL_INVPOLVSYNC;
+	if (pol->inv_href)
+		cfg |= EXYNOS_CIGCTRL_INVPOLHREF;
+	if (pol->inv_hsync)
+		cfg |= EXYNOS_CIGCTRL_INVPOLHSYNC;
+
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+}
+
+static void fimc_handle_jpeg(struct fimc_context *ctx, bool enable)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:enable[%d]\n", __func__, enable);
+
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	if (enable)
+		cfg |= EXYNOS_CIGCTRL_CAM_JPEG;
+	else
+		cfg &= ~EXYNOS_CIGCTRL_CAM_JPEG;
+
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+}
+
+static void fimc_handle_irq(struct fimc_context *ctx, bool enable,
+		bool overflow, bool level)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:enable[%d]overflow[%d]level[%d]\n", __func__,
+			enable, overflow, level);
+
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	if (enable) {
+		cfg &= ~(EXYNOS_CIGCTRL_IRQ_OVFEN | EXYNOS_CIGCTRL_IRQ_LEVEL);
+		cfg |= EXYNOS_CIGCTRL_IRQ_ENABLE;
+		if (overflow)
+			cfg |= EXYNOS_CIGCTRL_IRQ_OVFEN;
+		if (level)
+			cfg |= EXYNOS_CIGCTRL_IRQ_LEVEL;
+	} else
+		cfg &= ~(EXYNOS_CIGCTRL_IRQ_OVFEN | EXYNOS_CIGCTRL_IRQ_ENABLE);
+
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+}
+
+static void fimc_clear_irq(struct fimc_context *ctx)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg |= EXYNOS_CIGCTRL_IRQ_CLR;
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+}
+
+static bool fimc_check_ovf(struct fimc_context *ctx)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg, status, flag;
+
+	status = fimc_read(EXYNOS_CISTATUS);
+	flag = EXYNOS_CISTATUS_OVFIY | EXYNOS_CISTATUS_OVFICB |
+		EXYNOS_CISTATUS_OVFICR;
+
+	DRM_DEBUG_KMS("%s:flag[0x%x]\n", __func__, flag);
+
+	if (status & flag) {
+		cfg = fimc_read(EXYNOS_CIWDOFST);
+		cfg |= (EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
+			EXYNOS_CIWDOFST_CLROVFICR);
+
+		fimc_write(cfg, EXYNOS_CIWDOFST);
+
+		cfg = fimc_read(EXYNOS_CIWDOFST);
+		cfg &= ~(EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
+			EXYNOS_CIWDOFST_CLROVFICR);
+
+		fimc_write(cfg, EXYNOS_CIWDOFST);
+
+		dev_err(ippdrv->dev, "occured overflow at %d, status 0x%x.\n",
+			ctx->id, status);
+		return true;
+	}
+
+	return false;
+}
+
+static bool fimc_check_frame_end(struct fimc_context *ctx)
+{
+	u32 cfg;
+
+	cfg = fimc_read(EXYNOS_CISTATUS);
+
+	DRM_DEBUG_KMS("%s:cfg[0x%x]\n", __func__, cfg);
+
+	if (!(cfg & EXYNOS_CISTATUS_FRAMEEND))
+		return false;
+
+	cfg &= ~(EXYNOS_CISTATUS_FRAMEEND);
+	fimc_write(cfg, EXYNOS_CISTATUS);
+
+	return true;
+}
+
+static int fimc_get_buf_id(struct fimc_context *ctx)
+{
+	u32 cfg;
+	int frame_cnt, buf_id;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	cfg = fimc_read(EXYNOS_CISTATUS2);
+	frame_cnt = EXYNOS_CISTATUS2_GET_FRAMECOUNT_BEFORE(cfg);
+
+	if (frame_cnt == 0)
+		frame_cnt = EXYNOS_CISTATUS2_GET_FRAMECOUNT_PRESENT(cfg);
+
+	DRM_DEBUG_KMS("%s:present[%d]before[%d]\n", __func__,
+		EXYNOS_CISTATUS2_GET_FRAMECOUNT_PRESENT(cfg),
+		EXYNOS_CISTATUS2_GET_FRAMECOUNT_BEFORE(cfg));
+
+	if (frame_cnt == 0) {
+		DRM_ERROR("failed to get frame count.\n");
+		return -EIO;
+	}
+
+	buf_id = frame_cnt - 1;
+	DRM_DEBUG_KMS("%s:buf_id[%d]\n", __func__, buf_id);
+
+	return buf_id;
+}
+
+static void fimc_handle_lastend(struct fimc_context *ctx, bool enable)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:enable[%d]\n", __func__, enable);
+
+	cfg = fimc_read(EXYNOS_CIOCTRL);
+	if (enable)
+		cfg |= EXYNOS_CIOCTRL_LASTENDEN;
+	else
+		cfg &= ~EXYNOS_CIOCTRL_LASTENDEN;
+
+	fimc_write(cfg, EXYNOS_CIOCTRL);
+}
+
+
+static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:fmt[0x%x]\n", __func__, fmt);
+
+	/* RGB */
+	cfg = fimc_read(EXYNOS_CISCCTRL);
+	cfg &= ~EXYNOS_CISCCTRL_INRGB_FMT_RGB_MASK;
+
+	switch (fmt) {
+	case DRM_FORMAT_RGB565:
+		cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565;
+		fimc_write(cfg, EXYNOS_CISCCTRL);
+		return 0;
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_XRGB8888:
+		cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888;
+		fimc_write(cfg, EXYNOS_CISCCTRL);
+		return 0;
+	default:
+		/* bypass */
+		break;
+	}
+
+	/* YUV */
+	cfg = fimc_read(EXYNOS_MSCTRL);
+	cfg &= ~(EXYNOS_MSCTRL_ORDER2P_SHIFT_MASK |
+		EXYNOS_MSCTRL_C_INT_IN_2PLANE |
+		EXYNOS_MSCTRL_ORDER422_YCBYCR);
+
+	switch (fmt) {
+	case DRM_FORMAT_YUYV:
+		cfg |= EXYNOS_MSCTRL_ORDER422_YCBYCR;
+		break;
+	case DRM_FORMAT_YVYU:
+		cfg |= EXYNOS_MSCTRL_ORDER422_YCRYCB;
+		break;
+	case DRM_FORMAT_UYVY:
+		cfg |= EXYNOS_MSCTRL_ORDER422_CBYCRY;
+		break;
+	case DRM_FORMAT_VYUY:
+	case DRM_FORMAT_YUV444:
+		cfg |= EXYNOS_MSCTRL_ORDER422_CRYCBY;
+		break;
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV61:
+		cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CRCB |
+			EXYNOS_MSCTRL_C_INT_IN_2PLANE);
+		break;
+	case DRM_FORMAT_YUV422:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		cfg |= EXYNOS_MSCTRL_C_INT_IN_3PLANE;
+		break;
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV12MT:
+	case DRM_FORMAT_NV16:
+		cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR |
+			EXYNOS_MSCTRL_C_INT_IN_2PLANE);
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid source yuv order 0x%x.\n", fmt);
+		return -EINVAL;
+	}
+
+	fimc_write(cfg, EXYNOS_MSCTRL);
+
+	return 0;
+}
+
+static int fimc_src_set_fmt(struct device *dev, u32 fmt)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:fmt[0x%x]\n", __func__, fmt);
+
+	cfg = fimc_read(EXYNOS_MSCTRL);
+	cfg &= ~EXYNOS_MSCTRL_INFORMAT_RGB;
+
+	switch (fmt) {
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_XRGB8888:
+		cfg |= EXYNOS_MSCTRL_INFORMAT_RGB;
+		break;
+	case DRM_FORMAT_YUV444:
+		cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420;
+		break;
+	case DRM_FORMAT_YUYV:
+	case DRM_FORMAT_YVYU:
+	case DRM_FORMAT_UYVY:
+	case DRM_FORMAT_VYUY:
+		cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR422_1PLANE;
+		break;
+	case DRM_FORMAT_NV16:
+	case DRM_FORMAT_NV61:
+	case DRM_FORMAT_YUV422:
+		cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR422;
+		break;
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV12MT:
+		cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420;
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid source format 0x%x.\n", fmt);
+		return -EINVAL;
+	}
+
+	fimc_write(cfg, EXYNOS_MSCTRL);
+
+	cfg = fimc_read(EXYNOS_CIDMAPARAM);
+	cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK;
+
+	if (fmt == DRM_FORMAT_NV12MT)
+		cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32;
+	else
+		cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
+
+	fimc_write(cfg, EXYNOS_CIDMAPARAM);
+
+	return fimc_src_set_fmt_order(ctx, fmt);
+}
+
+static int fimc_src_set_transf(struct device *dev,
+		enum drm_exynos_degree degree,
+		enum drm_exynos_flip flip, bool *swap)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg1, cfg2;
+
+	DRM_DEBUG_KMS("%s:degree[%d]flip[0x%x]\n", __func__,
+		degree, flip);
+
+	cfg1 = fimc_read(EXYNOS_MSCTRL);
+	cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR |
+		EXYNOS_MSCTRL_FLIP_Y_MIRROR);
+
+	cfg2 = fimc_read(EXYNOS_CITRGFMT);
+	cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
+
+	switch (degree) {
+	case EXYNOS_DRM_DEGREE_0:
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
+		break;
+	case EXYNOS_DRM_DEGREE_90:
+		cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
+		break;
+	case EXYNOS_DRM_DEGREE_180:
+		cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
+			EXYNOS_MSCTRL_FLIP_Y_MIRROR);
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
+		break;
+	case EXYNOS_DRM_DEGREE_270:
+		cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
+			EXYNOS_MSCTRL_FLIP_Y_MIRROR);
+		cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid degree value %d.\n", degree);
+		return -EINVAL;
+	}
+
+	fimc_write(cfg1, EXYNOS_MSCTRL);
+	fimc_write(cfg2, EXYNOS_CITRGFMT);
+	*swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0;
+
+	return 0;
+}
+
+static int fimc_set_window(struct fimc_context *ctx,
+		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+{
+	u32 cfg, h1, h2, v1, v2;
+
+	/* cropped image */
+	h1 = pos->x;
+	h2 = sz->hsize - pos->w - pos->x;
+	v1 = pos->y;
+	v2 = sz->vsize - pos->h - pos->y;
+
+	DRM_DEBUG_KMS("%s:x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n",
+	__func__, pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize);
+	DRM_DEBUG_KMS("%s:h1[%d]h2[%d]v1[%d]v2[%d]\n", __func__,
+		h1, h2, v1, v2);
+
+	/*
+	 * set window offset 1, 2 size
+	 * check figure 43-21 in user manual
+	 */
+	cfg = fimc_read(EXYNOS_CIWDOFST);
+	cfg &= ~(EXYNOS_CIWDOFST_WINHOROFST_MASK |
+		EXYNOS_CIWDOFST_WINVEROFST_MASK);
+	cfg |= (EXYNOS_CIWDOFST_WINHOROFST(h1) |
+		EXYNOS_CIWDOFST_WINVEROFST(v1));
+	cfg |= EXYNOS_CIWDOFST_WINOFSEN;
+	fimc_write(cfg, EXYNOS_CIWDOFST);
+
+	cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) |
+		EXYNOS_CIWDOFST2_WINVEROFST2(v2));
+	fimc_write(cfg, EXYNOS_CIWDOFST2);
+
+	return 0;
+}
+
+static int fimc_src_set_size(struct device *dev, int swap,
+		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct drm_exynos_pos img_pos = *pos;
+	struct drm_exynos_sz img_sz = *sz;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:swap[%d]hsize[%d]vsize[%d]\n",
+		__func__, swap, sz->hsize, sz->vsize);
+
+	/* original size */
+	cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) |
+		EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize));
+
+	fimc_write(cfg, EXYNOS_ORGISIZE);
+
+	DRM_DEBUG_KMS("%s:x[%d]y[%d]w[%d]h[%d]\n", __func__,
+		pos->x, pos->y, pos->w, pos->h);
+
+	if (swap) {
+		img_pos.w = pos->h;
+		img_pos.h = pos->w;
+		img_sz.hsize = sz->vsize;
+		img_sz.vsize = sz->hsize;
+	}
+
+	/* set input DMA image size */
+	cfg = fimc_read(EXYNOS_CIREAL_ISIZE);
+	cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK |
+		EXYNOS_CIREAL_ISIZE_WIDTH_MASK);
+	cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) |
+		EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h));
+	fimc_write(cfg, EXYNOS_CIREAL_ISIZE);
+
+	/*
+	 * set input FIFO image size
+	 * for now, we support only ITU601 8 bit mode
+	 */
+	cfg = (EXYNOS_CISRCFMT_ITU601_8BIT |
+		EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) |
+		EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize));
+	fimc_write(cfg, EXYNOS_CISRCFMT);
+
+	/* offset Y(RGB), Cb, Cr */
+	cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) |
+		EXYNOS_CIIYOFF_VERTICAL(img_pos.y));
+	fimc_write(cfg, EXYNOS_CIIYOFF);
+	cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) |
+		EXYNOS_CIICBOFF_VERTICAL(img_pos.y));
+	fimc_write(cfg, EXYNOS_CIICBOFF);
+	cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) |
+		EXYNOS_CIICROFF_VERTICAL(img_pos.y));
+	fimc_write(cfg, EXYNOS_CIICROFF);
+
+	return fimc_set_window(ctx, &img_pos, &img_sz);
+}
+
+static int fimc_src_set_addr(struct device *dev,
+		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_property *property;
+	struct drm_exynos_ipp_config *config;
+
+	if (!c_node) {
+		DRM_ERROR("failed to get c_node.\n");
+		return -EINVAL;
+	}
+
+	property = &c_node->property;
+	if (!property) {
+		DRM_ERROR("failed to get property.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]buf_id[%d]buf_type[%d]\n", __func__,
+		property->prop_id, buf_id, buf_type);
+
+	if (buf_id > FIMC_MAX_SRC) {
+		dev_info(ippdrv->dev, "inavlid buf_id %d.\n", buf_id);
+		return -ENOMEM;
+	}
+
+	/* address register set */
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		config = &property->config[EXYNOS_DRM_OPS_SRC];
+		fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
+			EXYNOS_CIIYSA(buf_id));
+
+		if (config->fmt == DRM_FORMAT_YVU420) {
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+				EXYNOS_CIICBSA(buf_id));
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+				EXYNOS_CIICRSA(buf_id));
+		} else {
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+				EXYNOS_CIICBSA(buf_id));
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+				EXYNOS_CIICRSA(buf_id));
+		}
+		break;
+	case IPP_BUF_DEQUEUE:
+		fimc_write(0x0, EXYNOS_CIIYSA(buf_id));
+		fimc_write(0x0, EXYNOS_CIICBSA(buf_id));
+		fimc_write(0x0, EXYNOS_CIICRSA(buf_id));
+		break;
+	default:
+		/* bypass */
+		break;
+	}
+
+	return 0;
+}
+
+static struct exynos_drm_ipp_ops fimc_src_ops = {
+	.set_fmt = fimc_src_set_fmt,
+	.set_transf = fimc_src_set_transf,
+	.set_size = fimc_src_set_size,
+	.set_addr = fimc_src_set_addr,
+};
+
+static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:fmt[0x%x]\n", __func__, fmt);
+
+	/* RGB */
+	cfg = fimc_read(EXYNOS_CISCCTRL);
+	cfg &= ~EXYNOS_CISCCTRL_OUTRGB_FMT_RGB_MASK;
+
+	switch (fmt) {
+	case DRM_FORMAT_RGB565:
+		cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565;
+		fimc_write(cfg, EXYNOS_CISCCTRL);
+		return 0;
+	case DRM_FORMAT_RGB888:
+		cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888;
+		fimc_write(cfg, EXYNOS_CISCCTRL);
+		return 0;
+	case DRM_FORMAT_XRGB8888:
+		cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 |
+			EXYNOS_CISCCTRL_EXTRGB_EXTENSION);
+		fimc_write(cfg, EXYNOS_CISCCTRL);
+		break;
+	default:
+		/* bypass */
+		break;
+	}
+
+	/* YUV */
+	cfg = fimc_read(EXYNOS_CIOCTRL);
+	cfg &= ~(EXYNOS_CIOCTRL_ORDER2P_MASK |
+		EXYNOS_CIOCTRL_ORDER422_MASK |
+		EXYNOS_CIOCTRL_YCBCR_PLANE_MASK);
+
+	switch (fmt) {
+	case DRM_FORMAT_XRGB8888:
+		cfg |= EXYNOS_CIOCTRL_ALPHA_OUT;
+		break;
+	case DRM_FORMAT_YUYV:
+		cfg |= EXYNOS_CIOCTRL_ORDER422_YCBYCR;
+		break;
+	case DRM_FORMAT_YVYU:
+		cfg |= EXYNOS_CIOCTRL_ORDER422_YCRYCB;
+		break;
+	case DRM_FORMAT_UYVY:
+		cfg |= EXYNOS_CIOCTRL_ORDER422_CBYCRY;
+		break;
+	case DRM_FORMAT_VYUY:
+		cfg |= EXYNOS_CIOCTRL_ORDER422_CRYCBY;
+		break;
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV61:
+		cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CRCB;
+		cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE;
+		break;
+	case DRM_FORMAT_YUV422:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		cfg |= EXYNOS_CIOCTRL_YCBCR_3PLANE;
+		break;
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV12MT:
+	case DRM_FORMAT_NV16:
+		cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR;
+		cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE;
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid target yuv order 0x%x.\n", fmt);
+		return -EINVAL;
+	}
+
+	fimc_write(cfg, EXYNOS_CIOCTRL);
+
+	return 0;
+}
+
+static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:fmt[0x%x]\n", __func__, fmt);
+
+	cfg = fimc_read(EXYNOS_CIEXTEN);
+
+	if (fmt == DRM_FORMAT_AYUV) {
+		cfg |= EXYNOS_CIEXTEN_YUV444_OUT;
+		fimc_write(cfg, EXYNOS_CIEXTEN);
+	} else {
+		cfg &= ~EXYNOS_CIEXTEN_YUV444_OUT;
+		fimc_write(cfg, EXYNOS_CIEXTEN);
+
+		cfg = fimc_read(EXYNOS_CITRGFMT);
+		cfg &= ~EXYNOS_CITRGFMT_OUTFORMAT_MASK;
+
+		switch (fmt) {
+		case DRM_FORMAT_RGB565:
+		case DRM_FORMAT_RGB888:
+		case DRM_FORMAT_XRGB8888:
+			cfg |= EXYNOS_CITRGFMT_OUTFORMAT_RGB;
+			break;
+		case DRM_FORMAT_YUYV:
+		case DRM_FORMAT_YVYU:
+		case DRM_FORMAT_UYVY:
+		case DRM_FORMAT_VYUY:
+			cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR422_1PLANE;
+			break;
+		case DRM_FORMAT_NV16:
+		case DRM_FORMAT_NV61:
+		case DRM_FORMAT_YUV422:
+			cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR422;
+			break;
+		case DRM_FORMAT_YUV420:
+		case DRM_FORMAT_YVU420:
+		case DRM_FORMAT_NV12:
+		case DRM_FORMAT_NV12MT:
+		case DRM_FORMAT_NV21:
+			cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420;
+			break;
+		default:
+			dev_err(ippdrv->dev, "inavlid target format 0x%x.\n",
+				fmt);
+			return -EINVAL;
+		}
+
+		fimc_write(cfg, EXYNOS_CITRGFMT);
+	}
+
+	cfg = fimc_read(EXYNOS_CIDMAPARAM);
+	cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK;
+
+	if (fmt == DRM_FORMAT_NV12MT)
+		cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32;
+	else
+		cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
+
+	fimc_write(cfg, EXYNOS_CIDMAPARAM);
+
+	return fimc_dst_set_fmt_order(ctx, fmt);
+}
+
+static int fimc_dst_set_transf(struct device *dev,
+		enum drm_exynos_degree degree,
+		enum drm_exynos_flip flip, bool *swap)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:degree[%d]flip[0x%x]\n", __func__,
+		degree, flip);
+
+	cfg = fimc_read(EXYNOS_CITRGFMT);
+	cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK;
+	cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
+
+	switch (degree) {
+	case EXYNOS_DRM_DEGREE_0:
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
+		break;
+	case EXYNOS_DRM_DEGREE_90:
+		cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
+		break;
+	case EXYNOS_DRM_DEGREE_180:
+		cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR |
+			EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
+		break;
+	case EXYNOS_DRM_DEGREE_270:
+		cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE |
+			EXYNOS_CITRGFMT_FLIP_X_MIRROR |
+			EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid degree value %d.\n", degree);
+		return -EINVAL;
+	}
+
+	fimc_write(cfg, EXYNOS_CITRGFMT);
+	*swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0;
+
+	return 0;
+}
+
+static int fimc_get_ratio_shift(u32 src, u32 dst, u32 *ratio, u32 *shift)
+{
+	DRM_DEBUG_KMS("%s:src[%d]dst[%d]\n", __func__, src, dst);
+
+	if (src >= dst * 64) {
+		DRM_ERROR("failed to make ratio and shift.\n");
+		return -EINVAL;
+	} else if (src >= dst * 32) {
+		*ratio = 32;
+		*shift = 5;
+	} else if (src >= dst * 16) {
+		*ratio = 16;
+		*shift = 4;
+	} else if (src >= dst * 8) {
+		*ratio = 8;
+		*shift = 3;
+	} else if (src >= dst * 4) {
+		*ratio = 4;
+		*shift = 2;
+	} else if (src >= dst * 2) {
+		*ratio = 2;
+		*shift = 1;
+	} else {
+		*ratio = 1;
+		*shift = 0;
+	}
+
+	return 0;
+}
+
+static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc,
+		struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg, cfg_ext, shfactor;
+	u32 pre_dst_width, pre_dst_height;
+	u32 pre_hratio, hfactor, pre_vratio, vfactor;
+	int ret = 0;
+	u32 src_w, src_h, dst_w, dst_h;
+
+	cfg_ext = fimc_read(EXYNOS_CITRGFMT);
+	if (cfg_ext & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) {
+		src_w = src->h;
+		src_h = src->w;
+	} else {
+		src_w = src->w;
+		src_h = src->h;
+	}
+
+	if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) {
+		dst_w = dst->h;
+		dst_h = dst->w;
+	} else {
+		dst_w = dst->w;
+		dst_h = dst->h;
+	}
+
+	ret = fimc_get_ratio_shift(src_w, dst_w, &pre_hratio, &hfactor);
+	if (ret) {
+		dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+		return ret;
+	}
+
+	ret = fimc_get_ratio_shift(src_h, dst_h, &pre_vratio, &vfactor);
+	if (ret) {
+		dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+		return ret;
+	}
+
+	pre_dst_width = src_w / pre_hratio;
+	pre_dst_height = src_h / pre_vratio;
+	DRM_DEBUG_KMS("%s:pre_dst_width[%d]pre_dst_height[%d]\n", __func__,
+		pre_dst_width, pre_dst_height);
+	DRM_DEBUG_KMS("%s:pre_hratio[%d]hfactor[%d]pre_vratio[%d]vfactor[%d]\n",
+		__func__, pre_hratio, hfactor, pre_vratio, vfactor);
+
+	sc->hratio = (src_w << 14) / (dst_w << hfactor);
+	sc->vratio = (src_h << 14) / (dst_h << vfactor);
+	sc->up_h = (dst_w >= src_w) ? true : false;
+	sc->up_v = (dst_h >= src_h) ? true : false;
+	DRM_DEBUG_KMS("%s:hratio[%d]vratio[%d]up_h[%d]up_v[%d]\n",
+	__func__, sc->hratio, sc->vratio, sc->up_h, sc->up_v);
+
+	shfactor = FIMC_SHFACTOR - (hfactor + vfactor);
+	DRM_DEBUG_KMS("%s:shfactor[%d]\n", __func__, shfactor);
+
+	cfg = (EXYNOS_CISCPRERATIO_SHFACTOR(shfactor) |
+		EXYNOS_CISCPRERATIO_PREHORRATIO(pre_hratio) |
+		EXYNOS_CISCPRERATIO_PREVERRATIO(pre_vratio));
+	fimc_write(cfg, EXYNOS_CISCPRERATIO);
+
+	cfg = (EXYNOS_CISCPREDST_PREDSTWIDTH(pre_dst_width) |
+		EXYNOS_CISCPREDST_PREDSTHEIGHT(pre_dst_height));
+	fimc_write(cfg, EXYNOS_CISCPREDST);
+
+	return ret;
+}
+
+static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc)
+{
+	u32 cfg, cfg_ext;
+
+	DRM_DEBUG_KMS("%s:range[%d]bypass[%d]up_h[%d]up_v[%d]\n",
+		__func__, sc->range, sc->bypass, sc->up_h, sc->up_v);
+	DRM_DEBUG_KMS("%s:hratio[%d]vratio[%d]\n",
+		__func__, sc->hratio, sc->vratio);
+
+	cfg = fimc_read(EXYNOS_CISCCTRL);
+	cfg &= ~(EXYNOS_CISCCTRL_SCALERBYPASS |
+		EXYNOS_CISCCTRL_SCALEUP_H | EXYNOS_CISCCTRL_SCALEUP_V |
+		EXYNOS_CISCCTRL_MAIN_V_RATIO_MASK |
+		EXYNOS_CISCCTRL_MAIN_H_RATIO_MASK |
+		EXYNOS_CISCCTRL_CSCR2Y_WIDE |
+		EXYNOS_CISCCTRL_CSCY2R_WIDE);
+
+	if (sc->range)
+		cfg |= (EXYNOS_CISCCTRL_CSCR2Y_WIDE |
+			EXYNOS_CISCCTRL_CSCY2R_WIDE);
+	if (sc->bypass)
+		cfg |= EXYNOS_CISCCTRL_SCALERBYPASS;
+	if (sc->up_h)
+		cfg |= EXYNOS_CISCCTRL_SCALEUP_H;
+	if (sc->up_v)
+		cfg |= EXYNOS_CISCCTRL_SCALEUP_V;
+
+	cfg |= (EXYNOS_CISCCTRL_MAINHORRATIO((sc->hratio >> 6)) |
+		EXYNOS_CISCCTRL_MAINVERRATIO((sc->vratio >> 6)));
+	fimc_write(cfg, EXYNOS_CISCCTRL);
+
+	cfg_ext = fimc_read(EXYNOS_CIEXTEN);
+	cfg_ext &= ~EXYNOS_CIEXTEN_MAINHORRATIO_EXT_MASK;
+	cfg_ext &= ~EXYNOS_CIEXTEN_MAINVERRATIO_EXT_MASK;
+	cfg_ext |= (EXYNOS_CIEXTEN_MAINHORRATIO_EXT(sc->hratio) |
+		EXYNOS_CIEXTEN_MAINVERRATIO_EXT(sc->vratio));
+	fimc_write(cfg_ext, EXYNOS_CIEXTEN);
+}
+
+static int fimc_dst_set_size(struct device *dev, int swap,
+		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct drm_exynos_pos img_pos = *pos;
+	struct drm_exynos_sz img_sz = *sz;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:swap[%d]hsize[%d]vsize[%d]\n",
+		__func__, swap, sz->hsize, sz->vsize);
+
+	/* original size */
+	cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) |
+		EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize));
+
+	fimc_write(cfg, EXYNOS_ORGOSIZE);
+
+	DRM_DEBUG_KMS("%s:x[%d]y[%d]w[%d]h[%d]\n",
+		__func__, pos->x, pos->y, pos->w, pos->h);
+
+	/* CSC ITU */
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg &= ~EXYNOS_CIGCTRL_CSC_MASK;
+
+	if (sz->hsize >= FIMC_WIDTH_ITU_709)
+		cfg |= EXYNOS_CIGCTRL_CSC_ITU709;
+	else
+		cfg |= EXYNOS_CIGCTRL_CSC_ITU601;
+
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+
+	if (swap) {
+		img_pos.w = pos->h;
+		img_pos.h = pos->w;
+		img_sz.hsize = sz->vsize;
+		img_sz.vsize = sz->hsize;
+	}
+
+	/* target image size */
+	cfg = fimc_read(EXYNOS_CITRGFMT);
+	cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK |
+		EXYNOS_CITRGFMT_TARGETV_MASK);
+	cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) |
+		EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h));
+	fimc_write(cfg, EXYNOS_CITRGFMT);
+
+	/* target area */
+	cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h);
+	fimc_write(cfg, EXYNOS_CITAREA);
+
+	/* offset Y(RGB), Cb, Cr */
+	cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) |
+		EXYNOS_CIOYOFF_VERTICAL(img_pos.y));
+	fimc_write(cfg, EXYNOS_CIOYOFF);
+	cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) |
+		EXYNOS_CIOCBOFF_VERTICAL(img_pos.y));
+	fimc_write(cfg, EXYNOS_CIOCBOFF);
+	cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) |
+		EXYNOS_CIOCROFF_VERTICAL(img_pos.y));
+	fimc_write(cfg, EXYNOS_CIOCROFF);
+
+	return 0;
+}
+
+static int fimc_dst_get_buf_seq(struct fimc_context *ctx)
+{
+	u32 cfg, i, buf_num = 0;
+	u32 mask = 0x00000001;
+
+	cfg = fimc_read(EXYNOS_CIFCNTSEQ);
+
+	for (i = 0; i < FIMC_REG_SZ; i++)
+		if (cfg & (mask << i))
+			buf_num++;
+
+	DRM_DEBUG_KMS("%s:buf_num[%d]\n", __func__, buf_num);
+
+	return buf_num;
+}
+
+static int fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	bool enable;
+	u32 cfg;
+	u32 mask = 0x00000001 << buf_id;
+	int ret = 0;
+
+	DRM_DEBUG_KMS("%s:buf_id[%d]buf_type[%d]\n", __func__,
+		buf_id, buf_type);
+
+	mutex_lock(&ctx->lock);
+
+	/* mask register set */
+	cfg = fimc_read(EXYNOS_CIFCNTSEQ);
+
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		enable = true;
+		break;
+	case IPP_BUF_DEQUEUE:
+		enable = false;
+		break;
+	default:
+		dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
+		ret =  -EINVAL;
+		goto err_unlock;
+	}
+
+	/* sequence id */
+	cfg &= (~mask);
+	cfg |= (enable << buf_id);
+	fimc_write(cfg, EXYNOS_CIFCNTSEQ);
+
+	/* interrupt enable */
+	if (buf_type == IPP_BUF_ENQUEUE &&
+	    fimc_dst_get_buf_seq(ctx) >= FIMC_BUF_START)
+		fimc_handle_irq(ctx, true, false, true);
+
+	/* interrupt disable */
+	if (buf_type == IPP_BUF_DEQUEUE &&
+	    fimc_dst_get_buf_seq(ctx) <= FIMC_BUF_STOP)
+		fimc_handle_irq(ctx, false, false, true);
+
+err_unlock:
+	mutex_unlock(&ctx->lock);
+	return ret;
+}
+
+static int fimc_dst_set_addr(struct device *dev,
+		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_property *property;
+	struct drm_exynos_ipp_config *config;
+
+	if (!c_node) {
+		DRM_ERROR("failed to get c_node.\n");
+		return -EINVAL;
+	}
+
+	property = &c_node->property;
+	if (!property) {
+		DRM_ERROR("failed to get property.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]buf_id[%d]buf_type[%d]\n", __func__,
+		property->prop_id, buf_id, buf_type);
+
+	if (buf_id > FIMC_MAX_DST) {
+		dev_info(ippdrv->dev, "inavlid buf_id %d.\n", buf_id);
+		return -ENOMEM;
+	}
+
+	/* address register set */
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		config = &property->config[EXYNOS_DRM_OPS_DST];
+
+		fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
+			EXYNOS_CIOYSA(buf_id));
+
+		if (config->fmt == DRM_FORMAT_YVU420) {
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+				EXYNOS_CIOCBSA(buf_id));
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+				EXYNOS_CIOCRSA(buf_id));
+		} else {
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+				EXYNOS_CIOCBSA(buf_id));
+			fimc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+				EXYNOS_CIOCRSA(buf_id));
+		}
+		break;
+	case IPP_BUF_DEQUEUE:
+		fimc_write(0x0, EXYNOS_CIOYSA(buf_id));
+		fimc_write(0x0, EXYNOS_CIOCBSA(buf_id));
+		fimc_write(0x0, EXYNOS_CIOCRSA(buf_id));
+		break;
+	default:
+		/* bypass */
+		break;
+	}
+
+	return fimc_dst_set_buf_seq(ctx, buf_id, buf_type);
+}
+
+static struct exynos_drm_ipp_ops fimc_dst_ops = {
+	.set_fmt = fimc_dst_set_fmt,
+	.set_transf = fimc_dst_set_transf,
+	.set_size = fimc_dst_set_size,
+	.set_addr = fimc_dst_set_addr,
+};
+
+static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable)
+{
+	DRM_DEBUG_KMS("%s:enable[%d]\n", __func__, enable);
+
+	if (enable) {
+		clk_enable(ctx->sclk_fimc_clk);
+		clk_enable(ctx->fimc_clk);
+		clk_enable(ctx->wb_clk);
+		ctx->suspended = false;
+	} else {
+		clk_disable(ctx->sclk_fimc_clk);
+		clk_disable(ctx->fimc_clk);
+		clk_disable(ctx->wb_clk);
+		ctx->suspended = true;
+	}
+
+	return 0;
+}
+
+static irqreturn_t fimc_irq_handler(int irq, void *dev_id)
+{
+	struct fimc_context *ctx = dev_id;
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_event_work *event_work =
+		c_node->event_work;
+	int buf_id;
+
+	DRM_DEBUG_KMS("%s:fimc id[%d]\n", __func__, ctx->id);
+
+	fimc_clear_irq(ctx);
+	if (fimc_check_ovf(ctx))
+		return IRQ_NONE;
+
+	if (!fimc_check_frame_end(ctx))
+		return IRQ_NONE;
+
+	buf_id = fimc_get_buf_id(ctx);
+	if (buf_id < 0)
+		return IRQ_HANDLED;
+
+	DRM_DEBUG_KMS("%s:buf_id[%d]\n", __func__, buf_id);
+
+	if (fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE) < 0) {
+		DRM_ERROR("failed to dequeue.\n");
+		return IRQ_HANDLED;
+	}
+
+	event_work->ippdrv = ippdrv;
+	event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
+	queue_work(ippdrv->event_workq, (struct work_struct *)event_work);
+
+	return IRQ_HANDLED;
+}
+
+static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
+{
+	struct drm_exynos_ipp_prop_list *prop_list;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	prop_list = devm_kzalloc(ippdrv->dev, sizeof(*prop_list), GFP_KERNEL);
+	if (!prop_list) {
+		DRM_ERROR("failed to alloc property list.\n");
+		return -ENOMEM;
+	}
+
+	prop_list->version = 1;
+	prop_list->writeback = 1;
+	prop_list->refresh_min = FIMC_REFRESH_MIN;
+	prop_list->refresh_max = FIMC_REFRESH_MAX;
+	prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) |
+				(1 << EXYNOS_DRM_FLIP_VERTICAL) |
+				(1 << EXYNOS_DRM_FLIP_HORIZONTAL);
+	prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
+				(1 << EXYNOS_DRM_DEGREE_90) |
+				(1 << EXYNOS_DRM_DEGREE_180) |
+				(1 << EXYNOS_DRM_DEGREE_270);
+	prop_list->csc = 1;
+	prop_list->crop = 1;
+	prop_list->crop_max.hsize = FIMC_CROP_MAX;
+	prop_list->crop_max.vsize = FIMC_CROP_MAX;
+	prop_list->crop_min.hsize = FIMC_CROP_MIN;
+	prop_list->crop_min.vsize = FIMC_CROP_MIN;
+	prop_list->scale = 1;
+	prop_list->scale_max.hsize = FIMC_SCALE_MAX;
+	prop_list->scale_max.vsize = FIMC_SCALE_MAX;
+	prop_list->scale_min.hsize = FIMC_SCALE_MIN;
+	prop_list->scale_min.vsize = FIMC_SCALE_MIN;
+
+	ippdrv->prop_list = prop_list;
+
+	return 0;
+}
+
+static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip)
+{
+	switch (flip) {
+	case EXYNOS_DRM_FLIP_NONE:
+	case EXYNOS_DRM_FLIP_VERTICAL:
+	case EXYNOS_DRM_FLIP_HORIZONTAL:
+		return true;
+	default:
+		DRM_DEBUG_KMS("%s:invalid flip\n", __func__);
+		return false;
+	}
+}
+
+static int fimc_ippdrv_check_property(struct device *dev,
+		struct drm_exynos_ipp_property *property)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_prop_list *pp = ippdrv->prop_list;
+	struct drm_exynos_ipp_config *config;
+	struct drm_exynos_pos *pos;
+	struct drm_exynos_sz *sz;
+	bool swap;
+	int i;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	for_each_ipp_ops(i) {
+		if ((i == EXYNOS_DRM_OPS_SRC) &&
+			(property->cmd == IPP_CMD_WB))
+			continue;
+
+		config = &property->config[i];
+		pos = &config->pos;
+		sz = &config->sz;
+
+		/* check for flip */
+		if (!fimc_check_drm_flip(config->flip)) {
+			DRM_ERROR("invalid flip.\n");
+			goto err_property;
+		}
+
+		/* check for degree */
+		switch (config->degree) {
+		case EXYNOS_DRM_DEGREE_90:
+		case EXYNOS_DRM_DEGREE_270:
+			swap = true;
+			break;
+		case EXYNOS_DRM_DEGREE_0:
+		case EXYNOS_DRM_DEGREE_180:
+			swap = false;
+			break;
+		default:
+			DRM_ERROR("invalid degree.\n");
+			goto err_property;
+		}
+
+		/* check for buffer bound */
+		if ((pos->x + pos->w > sz->hsize) ||
+			(pos->y + pos->h > sz->vsize)) {
+			DRM_ERROR("out of buf bound.\n");
+			goto err_property;
+		}
+
+		/* check for crop */
+		if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
+			if (swap) {
+				if ((pos->h < pp->crop_min.hsize) ||
+					(sz->vsize > pp->crop_max.hsize) ||
+					(pos->w < pp->crop_min.vsize) ||
+					(sz->hsize > pp->crop_max.vsize)) {
+					DRM_ERROR("out of crop size.\n");
+					goto err_property;
+				}
+			} else {
+				if ((pos->w < pp->crop_min.hsize) ||
+					(sz->hsize > pp->crop_max.hsize) ||
+					(pos->h < pp->crop_min.vsize) ||
+					(sz->vsize > pp->crop_max.vsize)) {
+					DRM_ERROR("out of crop size.\n");
+					goto err_property;
+				}
+			}
+		}
+
+		/* check for scale */
+		if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
+			if (swap) {
+				if ((pos->h < pp->scale_min.hsize) ||
+					(sz->vsize > pp->scale_max.hsize) ||
+					(pos->w < pp->scale_min.vsize) ||
+					(sz->hsize > pp->scale_max.vsize)) {
+					DRM_ERROR("out of scale size.\n");
+					goto err_property;
+				}
+			} else {
+				if ((pos->w < pp->scale_min.hsize) ||
+					(sz->hsize > pp->scale_max.hsize) ||
+					(pos->h < pp->scale_min.vsize) ||
+					(sz->vsize > pp->scale_max.vsize)) {
+					DRM_ERROR("out of scale size.\n");
+					goto err_property;
+				}
+			}
+		}
+	}
+
+	return 0;
+
+err_property:
+	for_each_ipp_ops(i) {
+		if ((i == EXYNOS_DRM_OPS_SRC) &&
+			(property->cmd == IPP_CMD_WB))
+			continue;
+
+		config = &property->config[i];
+		pos = &config->pos;
+		sz = &config->sz;
+
+		DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
+			i ? "dst" : "src", config->flip, config->degree,
+			pos->x, pos->y, pos->w, pos->h,
+			sz->hsize, sz->vsize);
+	}
+
+	return -EINVAL;
+}
+
+static void fimc_clear_addr(struct fimc_context *ctx)
+{
+	int i;
+
+	DRM_DEBUG_KMS("%s:\n", __func__);
+
+	for (i = 0; i < FIMC_MAX_SRC; i++) {
+		fimc_write(0, EXYNOS_CIIYSA(i));
+		fimc_write(0, EXYNOS_CIICBSA(i));
+		fimc_write(0, EXYNOS_CIICRSA(i));
+	}
+
+	for (i = 0; i < FIMC_MAX_DST; i++) {
+		fimc_write(0, EXYNOS_CIOYSA(i));
+		fimc_write(0, EXYNOS_CIOCBSA(i));
+		fimc_write(0, EXYNOS_CIOCRSA(i));
+	}
+}
+
+static int fimc_ippdrv_reset(struct device *dev)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	/* reset h/w block */
+	fimc_sw_reset(ctx, false);
+
+	/* reset scaler capability */
+	memset(&ctx->sc, 0x0, sizeof(ctx->sc));
+
+	fimc_clear_addr(ctx);
+
+	return 0;
+}
+
+static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_property *property;
+	struct drm_exynos_ipp_config *config;
+	struct drm_exynos_pos	img_pos[EXYNOS_DRM_OPS_MAX];
+	struct drm_exynos_ipp_set_wb set_wb;
+	int ret, i;
+	u32 cfg0, cfg1;
+
+	DRM_DEBUG_KMS("%s:cmd[%d]\n", __func__, cmd);
+
+	if (!c_node) {
+		DRM_ERROR("failed to get c_node.\n");
+		return -EINVAL;
+	}
+
+	property = &c_node->property;
+	if (!property) {
+		DRM_ERROR("failed to get property.\n");
+		return -EINVAL;
+	}
+
+	fimc_handle_irq(ctx, true, false, true);
+
+	for_each_ipp_ops(i) {
+		config = &property->config[i];
+		img_pos[i] = config->pos;
+	}
+
+	ret = fimc_set_prescaler(ctx, &ctx->sc,
+		&img_pos[EXYNOS_DRM_OPS_SRC],
+		&img_pos[EXYNOS_DRM_OPS_DST]);
+	if (ret) {
+		dev_err(dev, "failed to set precalser.\n");
+		return ret;
+	}
+
+	/* If set ture, we can save jpeg about screen */
+	fimc_handle_jpeg(ctx, false);
+	fimc_set_scaler(ctx, &ctx->sc);
+	fimc_set_polarity(ctx, &ctx->pol);
+
+	switch (cmd) {
+	case IPP_CMD_M2M:
+		fimc_set_type_ctrl(ctx, FIMC_WB_NONE);
+		fimc_handle_lastend(ctx, false);
+
+		/* setup dma */
+		cfg0 = fimc_read(EXYNOS_MSCTRL);
+		cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
+		cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
+		fimc_write(cfg0, EXYNOS_MSCTRL);
+		break;
+	case IPP_CMD_WB:
+		fimc_set_type_ctrl(ctx, FIMC_WB_A);
+		fimc_handle_lastend(ctx, true);
+
+		/* setup FIMD */
+		fimc_set_camblk_fimd0_wb(ctx);
+
+		set_wb.enable = 1;
+		set_wb.refresh = property->refresh_rate;
+		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
+		break;
+	case IPP_CMD_OUTPUT:
+	default:
+		ret = -EINVAL;
+		dev_err(dev, "invalid operations.\n");
+		return ret;
+	}
+
+	/* Reset status */
+	fimc_write(0x0, EXYNOS_CISTATUS);
+
+	cfg0 = fimc_read(EXYNOS_CIIMGCPT);
+	cfg0 &= ~EXYNOS_CIIMGCPT_IMGCPTEN_SC;
+	cfg0 |= EXYNOS_CIIMGCPT_IMGCPTEN_SC;
+
+	/* Scaler */
+	cfg1 = fimc_read(EXYNOS_CISCCTRL);
+	cfg1 &= ~EXYNOS_CISCCTRL_SCAN_MASK;
+	cfg1 |= (EXYNOS_CISCCTRL_PROGRESSIVE |
+		EXYNOS_CISCCTRL_SCALERSTART);
+
+	fimc_write(cfg1, EXYNOS_CISCCTRL);
+
+	/* Enable image capture*/
+	cfg0 |= EXYNOS_CIIMGCPT_IMGCPTEN;
+	fimc_write(cfg0, EXYNOS_CIIMGCPT);
+
+	/* Disable frame end irq */
+	cfg0 = fimc_read(EXYNOS_CIGCTRL);
+	cfg0 &= ~EXYNOS_CIGCTRL_IRQ_END_DISABLE;
+	fimc_write(cfg0, EXYNOS_CIGCTRL);
+
+	cfg0 = fimc_read(EXYNOS_CIOCTRL);
+	cfg0 &= ~EXYNOS_CIOCTRL_WEAVE_MASK;
+	fimc_write(cfg0, EXYNOS_CIOCTRL);
+
+	if (cmd == IPP_CMD_M2M) {
+		cfg0 = fimc_read(EXYNOS_MSCTRL);
+		cfg0 |= EXYNOS_MSCTRL_ENVID;
+		fimc_write(cfg0, EXYNOS_MSCTRL);
+
+		cfg0 = fimc_read(EXYNOS_MSCTRL);
+		cfg0 |= EXYNOS_MSCTRL_ENVID;
+		fimc_write(cfg0, EXYNOS_MSCTRL);
+	}
+
+	return 0;
+}
+
+static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct drm_exynos_ipp_set_wb set_wb = {0, 0};
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:cmd[%d]\n", __func__, cmd);
+
+	switch (cmd) {
+	case IPP_CMD_M2M:
+		/* Source clear */
+		cfg = fimc_read(EXYNOS_MSCTRL);
+		cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
+		cfg &= ~EXYNOS_MSCTRL_ENVID;
+		fimc_write(cfg, EXYNOS_MSCTRL);
+		break;
+	case IPP_CMD_WB:
+		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
+		break;
+	case IPP_CMD_OUTPUT:
+	default:
+		dev_err(dev, "invalid operations.\n");
+		break;
+	}
+
+	fimc_handle_irq(ctx, false, false, true);
+
+	/* reset sequence */
+	fimc_write(0x0, EXYNOS_CIFCNTSEQ);
+
+	/* Scaler disable */
+	cfg = fimc_read(EXYNOS_CISCCTRL);
+	cfg &= ~EXYNOS_CISCCTRL_SCALERSTART;
+	fimc_write(cfg, EXYNOS_CISCCTRL);
+
+	/* Disable image capture */
+	cfg = fimc_read(EXYNOS_CIIMGCPT);
+	cfg &= ~(EXYNOS_CIIMGCPT_IMGCPTEN_SC | EXYNOS_CIIMGCPT_IMGCPTEN);
+	fimc_write(cfg, EXYNOS_CIIMGCPT);
+
+	/* Enable frame end irq */
+	cfg = fimc_read(EXYNOS_CIGCTRL);
+	cfg |= EXYNOS_CIGCTRL_IRQ_END_DISABLE;
+	fimc_write(cfg, EXYNOS_CIGCTRL);
+}
+
+static int __devinit fimc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct fimc_context *ctx;
+	struct clk	*parent_clk;
+	struct resource *res;
+	struct exynos_drm_ippdrv *ippdrv;
+	struct exynos_drm_fimc_pdata *pdata;
+	struct fimc_driverdata *ddata;
+	int ret;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(dev, "no platform data specified.\n");
+		return -EINVAL;
+	}
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ddata = (struct fimc_driverdata *)
+		platform_get_device_id(pdev)->driver_data;
+
+	/* clock control */
+	ctx->sclk_fimc_clk = clk_get(dev, "sclk_fimc");
+	if (IS_ERR(ctx->sclk_fimc_clk)) {
+		dev_err(dev, "failed to get src fimc clock.\n");
+		ret = PTR_ERR(ctx->sclk_fimc_clk);
+		goto err_ctx;
+	}
+	clk_enable(ctx->sclk_fimc_clk);
+
+	ctx->fimc_clk = clk_get(dev, "fimc");
+	if (IS_ERR(ctx->fimc_clk)) {
+		dev_err(dev, "failed to get fimc clock.\n");
+		ret = PTR_ERR(ctx->fimc_clk);
+		clk_disable(ctx->sclk_fimc_clk);
+		clk_put(ctx->sclk_fimc_clk);
+		goto err_ctx;
+	}
+
+	ctx->wb_clk = clk_get(dev, "pxl_async0");
+	if (IS_ERR(ctx->wb_clk)) {
+		dev_err(dev, "failed to get writeback a clock.\n");
+		ret = PTR_ERR(ctx->wb_clk);
+		clk_disable(ctx->sclk_fimc_clk);
+		clk_put(ctx->sclk_fimc_clk);
+		clk_put(ctx->fimc_clk);
+		goto err_ctx;
+	}
+
+	ctx->wb_b_clk = clk_get(dev, "pxl_async1");
+	if (IS_ERR(ctx->wb_b_clk)) {
+		dev_err(dev, "failed to get writeback b clock.\n");
+		ret = PTR_ERR(ctx->wb_b_clk);
+		clk_disable(ctx->sclk_fimc_clk);
+		clk_put(ctx->sclk_fimc_clk);
+		clk_put(ctx->fimc_clk);
+		clk_put(ctx->wb_clk);
+		goto err_ctx;
+	}
+
+	parent_clk = clk_get(dev, ddata->parent_clk);
+
+	if (IS_ERR(parent_clk)) {
+		dev_err(dev, "failed to get parent clock.\n");
+		ret = PTR_ERR(parent_clk);
+		clk_disable(ctx->sclk_fimc_clk);
+		clk_put(ctx->sclk_fimc_clk);
+		clk_put(ctx->fimc_clk);
+		clk_put(ctx->wb_clk);
+		clk_put(ctx->wb_b_clk);
+		goto err_ctx;
+	}
+
+	if (clk_set_parent(ctx->sclk_fimc_clk, parent_clk)) {
+		dev_err(dev, "failed to set parent.\n");
+		ret = -EINVAL;
+		clk_put(parent_clk);
+		clk_disable(ctx->sclk_fimc_clk);
+		clk_put(ctx->sclk_fimc_clk);
+		clk_put(ctx->fimc_clk);
+		clk_put(ctx->wb_clk);
+		clk_put(ctx->wb_b_clk);
+		goto err_ctx;
+	}
+
+	clk_put(parent_clk);
+	clk_set_rate(ctx->sclk_fimc_clk, pdata->clk_rate);
+
+	/* resource memory */
+	ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!ctx->regs_res) {
+		dev_err(dev, "failed to find registers.\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs = devm_request_and_ioremap(dev, ctx->regs_res);
+	if (!ctx->regs) {
+		dev_err(dev, "failed to map registers.\n");
+		ret = -ENXIO;
+		goto err_clk;
+	}
+
+	/* resource irq */
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "failed to request irq resource.\n");
+		ret = -ENOENT;
+		goto err_get_regs;
+	}
+
+	ctx->irq = res->start;
+	ret = request_threaded_irq(ctx->irq, NULL, fimc_irq_handler,
+		IRQF_ONESHOT, "drm_fimc", ctx);
+	if (ret < 0) {
+		dev_err(dev, "failed to request irq.\n");
+		goto err_get_regs;
+	}
+
+	/* context initailization */
+	ctx->id = pdev->id;
+	ctx->pol = pdata->pol;
+	ctx->ddata = ddata;
+
+	ippdrv = &ctx->ippdrv;
+	ippdrv->dev = dev;
+	ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops;
+	ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops;
+	ippdrv->check_property = fimc_ippdrv_check_property;
+	ippdrv->reset = fimc_ippdrv_reset;
+	ippdrv->start = fimc_ippdrv_start;
+	ippdrv->stop = fimc_ippdrv_stop;
+	ret = fimc_init_prop_list(ippdrv);
+	if (ret < 0) {
+		dev_err(dev, "failed to init property list.\n");
+		goto err_get_irq;
+	}
+
+	DRM_DEBUG_KMS("%s:id[%d]ippdrv[0x%x]\n", __func__, ctx->id,
+		(int)ippdrv);
+
+	mutex_init(&ctx->lock);
+	platform_set_drvdata(pdev, ctx);
+
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
+	ret = exynos_drm_ippdrv_register(ippdrv);
+	if (ret < 0) {
+		dev_err(dev, "failed to register drm fimc device.\n");
+		goto err_ippdrv_register;
+	}
+
+	dev_info(&pdev->dev, "drm fimc registered successfully.\n");
+
+	return 0;
+
+err_ippdrv_register:
+	devm_kfree(dev, ippdrv->prop_list);
+	pm_runtime_disable(dev);
+err_get_irq:
+	free_irq(ctx->irq, ctx);
+err_get_regs:
+	devm_iounmap(dev, ctx->regs);
+err_clk:
+	clk_put(ctx->sclk_fimc_clk);
+	clk_put(ctx->fimc_clk);
+	clk_put(ctx->wb_clk);
+	clk_put(ctx->wb_b_clk);
+err_ctx:
+	devm_kfree(dev, ctx);
+	return ret;
+}
+
+static int __devexit fimc_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct fimc_context *ctx = get_fimc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+
+	devm_kfree(dev, ippdrv->prop_list);
+	exynos_drm_ippdrv_unregister(ippdrv);
+	mutex_destroy(&ctx->lock);
+
+	pm_runtime_set_suspended(dev);
+	pm_runtime_disable(dev);
+
+	free_irq(ctx->irq, ctx);
+	devm_iounmap(dev, ctx->regs);
+
+	clk_put(ctx->sclk_fimc_clk);
+	clk_put(ctx->fimc_clk);
+	clk_put(ctx->wb_clk);
+	clk_put(ctx->wb_b_clk);
+
+	devm_kfree(dev, ctx);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int fimc_suspend(struct device *dev)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, ctx->id);
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	return fimc_clk_ctrl(ctx, false);
+}
+
+static int fimc_resume(struct device *dev)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, ctx->id);
+
+	if (!pm_runtime_suspended(dev))
+		return fimc_clk_ctrl(ctx, true);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+static int fimc_runtime_suspend(struct device *dev)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, ctx->id);
+
+	return  fimc_clk_ctrl(ctx, false);
+}
+
+static int fimc_runtime_resume(struct device *dev)
+{
+	struct fimc_context *ctx = get_fimc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, ctx->id);
+
+	return  fimc_clk_ctrl(ctx, true);
+}
+#endif
+
+static struct fimc_driverdata exynos4210_fimc_data = {
+	.parent_clk = "mout_mpll",
+};
+
+static struct fimc_driverdata exynos4410_fimc_data = {
+	.parent_clk = "mout_mpll_user",
+};
+
+static struct platform_device_id fimc_driver_ids[] = {
+	{
+		.name		= "exynos4210-fimc",
+		.driver_data	= (unsigned long)&exynos4210_fimc_data,
+	}, {
+		.name		= "exynos4412-fimc",
+		.driver_data	= (unsigned long)&exynos4410_fimc_data,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(platform, fimc_driver_ids);
+
+static const struct dev_pm_ops fimc_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(fimc_suspend, fimc_resume)
+	SET_RUNTIME_PM_OPS(fimc_runtime_suspend, fimc_runtime_resume, NULL)
+};
+
+struct platform_driver fimc_driver = {
+	.probe		= fimc_probe,
+	.remove		= __devexit_p(fimc_remove),
+	.id_table	= fimc_driver_ids,
+	.driver		= {
+		.name	= "exynos-drm-fimc",
+		.owner	= THIS_MODULE,
+		.pm	= &fimc_pm_ops,
+	},
+};
+

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h
new file mode 100644
index 0000000..dc970fa
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.h

@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *	Jinyoung Jeon <jy0.jeon@samsung.com>
+ *	Sangmin Lee <lsmin.lee@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_FIMC_H_
+#define _EXYNOS_DRM_FIMC_H_
+
+/*
+ * TODO
+ * FIMD output interface notifier callback.
+ */
+
+#endif /* _EXYNOS_DRM_FIMC_H_ */

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index e08478f..bf0d9ba 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c

@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 
 #include <video/samsung_fimd.h>
@@ -25,6 +26,7 @@
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
+#include "exynos_drm_iommu.h"
 
 /*
  * FIMD is stand for Fully Interactive Mobile Display and
@@ -78,10 +80,10 @@
 	unsigned int		fb_height;
 	unsigned int		bpp;
 	dma_addr_t		dma_addr;
-	void __iomem		*vaddr;
 	unsigned int		buf_offsize;
 	unsigned int		line_size;	/* bytes */
 	bool			enabled;
+	bool			resume;
 };
 
 struct fimd_context {
@@ -99,13 +101,34 @@
 	u32				vidcon1;
 	bool				suspended;
 	struct mutex			lock;
+	wait_queue_head_t		wait_vsync_queue;
+	atomic_t			wait_vsync_event;
 
 	struct exynos_drm_panel_info *panel;
 };
 
+#ifdef CONFIG_OF
+static const struct of_device_id fimd_driver_dt_match[] = {
+	{ .compatible = "samsung,exynos4-fimd",
+	  .data = &exynos4_fimd_driver_data },
+	{ .compatible = "samsung,exynos5-fimd",
+	  .data = &exynos5_fimd_driver_data },
+	{},
+};
+MODULE_DEVICE_TABLE(of, fimd_driver_dt_match);
+#endif
+
 static inline struct fimd_driver_data *drm_fimd_get_driver_data(
 	struct platform_device *pdev)
 {
+#ifdef CONFIG_OF
+	const struct of_device_id *of_id =
+			of_match_device(fimd_driver_dt_match, &pdev->dev);
+
+	if (of_id)
+		return (struct fimd_driver_data *)of_id->data;
+#endif
+
 	return (struct fimd_driver_data *)
 		platform_get_device_id(pdev)->driver_data;
 }
@@ -240,7 +263,9 @@
 
 	/* setup horizontal and vertical display size. */
 	val = VIDTCON2_LINEVAL(timing->yres - 1) |
-	       VIDTCON2_HOZVAL(timing->xres - 1);
+	       VIDTCON2_HOZVAL(timing->xres - 1) |
+	       VIDTCON2_LINEVAL_E(timing->yres - 1) |
+	       VIDTCON2_HOZVAL_E(timing->xres - 1);
 	writel(val, ctx->regs + driver_data->timing_base + VIDTCON2);
 
 	/* setup clock source, clock divider, enable dma. */
@@ -307,12 +332,32 @@
 	}
 }
 
+static void fimd_wait_for_vblank(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+
+	if (ctx->suspended)
+		return;
+
+	atomic_set(&ctx->wait_vsync_event, 1);
+
+	/*
+	 * wait for FIMD to signal VSYNC interrupt or return after
+	 * timeout which is set to 50ms (refresh rate of 20).
+	 */
+	if (!wait_event_timeout(ctx->wait_vsync_queue,
+				!atomic_read(&ctx->wait_vsync_event),
+				DRM_HZ/20))
+		DRM_DEBUG_KMS("vblank wait timed out.\n");
+}
+
 static struct exynos_drm_manager_ops fimd_manager_ops = {
 	.dpms = fimd_dpms,
 	.apply = fimd_apply,
 	.commit = fimd_commit,
 	.enable_vblank = fimd_enable_vblank,
 	.disable_vblank = fimd_disable_vblank,
+	.wait_for_vblank = fimd_wait_for_vblank,
 };
 
 static void fimd_win_mode_set(struct device *dev,
@@ -351,7 +396,6 @@
 	win_data->fb_width = overlay->fb_width;
 	win_data->fb_height = overlay->fb_height;
 	win_data->dma_addr = overlay->dma_addr[0] + offset;
-	win_data->vaddr = overlay->vaddr[0] + offset;
 	win_data->bpp = overlay->bpp;
 	win_data->buf_offsize = (overlay->fb_width - overlay->crtc_width) *
 				(overlay->bpp >> 3);
@@ -361,9 +405,7 @@
 			win_data->offset_x, win_data->offset_y);
 	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
 			win_data->ovl_width, win_data->ovl_height);
-	DRM_DEBUG_KMS("paddr = 0x%lx, vaddr = 0x%lx\n",
-			(unsigned long)win_data->dma_addr,
-			(unsigned long)win_data->vaddr);
+	DRM_DEBUG_KMS("paddr = 0x%lx\n", (unsigned long)win_data->dma_addr);
 	DRM_DEBUG_KMS("fb_width = %d, crtc_width = %d\n",
 			overlay->fb_width, overlay->crtc_width);
 }
@@ -451,6 +493,8 @@
 	struct fimd_win_data *win_data;
 	int win = zpos;
 	unsigned long val, alpha, size;
+	unsigned int last_x;
+	unsigned int last_y;
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
@@ -496,24 +540,32 @@
 
 	/* buffer size */
 	val = VIDW_BUF_SIZE_OFFSET(win_data->buf_offsize) |
-		VIDW_BUF_SIZE_PAGEWIDTH(win_data->line_size);
+		VIDW_BUF_SIZE_PAGEWIDTH(win_data->line_size) |
+		VIDW_BUF_SIZE_OFFSET_E(win_data->buf_offsize) |
+		VIDW_BUF_SIZE_PAGEWIDTH_E(win_data->line_size);
 	writel(val, ctx->regs + VIDWx_BUF_SIZE(win, 0));
 
 	/* OSD position */
 	val = VIDOSDxA_TOPLEFT_X(win_data->offset_x) |
-		VIDOSDxA_TOPLEFT_Y(win_data->offset_y);
+		VIDOSDxA_TOPLEFT_Y(win_data->offset_y) |
+		VIDOSDxA_TOPLEFT_X_E(win_data->offset_x) |
+		VIDOSDxA_TOPLEFT_Y_E(win_data->offset_y);
 	writel(val, ctx->regs + VIDOSD_A(win));
 
-	val = VIDOSDxB_BOTRIGHT_X(win_data->offset_x +
-					win_data->ovl_width - 1) |
-		VIDOSDxB_BOTRIGHT_Y(win_data->offset_y +
-					win_data->ovl_height - 1);
+	last_x = win_data->offset_x + win_data->ovl_width;
+	if (last_x)
+		last_x--;
+	last_y = win_data->offset_y + win_data->ovl_height;
+	if (last_y)
+		last_y--;
+
+	val = VIDOSDxB_BOTRIGHT_X(last_x) | VIDOSDxB_BOTRIGHT_Y(last_y) |
+		VIDOSDxB_BOTRIGHT_X_E(last_x) | VIDOSDxB_BOTRIGHT_Y_E(last_y);
+
 	writel(val, ctx->regs + VIDOSD_B(win));
 
 	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
-			win_data->offset_x, win_data->offset_y,
-			win_data->offset_x + win_data->ovl_width - 1,
-			win_data->offset_y + win_data->ovl_height - 1);
+			win_data->offset_x, win_data->offset_y, last_x, last_y);
 
 	/* hardware window 0 doesn't support alpha channel. */
 	if (win != 0) {
@@ -573,6 +625,12 @@
 
 	win_data = &ctx->win_data[win];
 
+	if (ctx->suspended) {
+		/* do not resume this window*/
+		win_data->resume = false;
+		return;
+	}
+
 	/* protect windows */
 	val = readl(ctx->regs + SHADOWCON);
 	val |= SHADOWCON_WINx_PROTECT(win);
@@ -592,22 +650,10 @@
 	win_data->enabled = false;
 }
 
-static void fimd_wait_for_vblank(struct device *dev)
-{
-	struct fimd_context *ctx = get_fimd_context(dev);
-	int ret;
-
-	ret = wait_for((__raw_readl(ctx->regs + VIDCON1) &
-					VIDCON1_VSTATUS_VSYNC), 50);
-	if (ret < 0)
-		DRM_DEBUG_KMS("vblank wait timed out.\n");
-}
-
 static struct exynos_drm_overlay_ops fimd_overlay_ops = {
 	.mode_set = fimd_win_mode_set,
 	.commit = fimd_win_commit,
 	.disable = fimd_win_disable,
-	.wait_for_vblank = fimd_wait_for_vblank,
 };
 
 static struct exynos_drm_manager fimd_manager = {
@@ -623,7 +669,6 @@
 	struct drm_pending_vblank_event *e, *t;
 	struct timeval now;
 	unsigned long flags;
-	bool is_checked = false;
 
 	spin_lock_irqsave(&drm_dev->event_lock, flags);
 
@@ -633,8 +678,6 @@
 		if (crtc != e->pipe)
 			continue;
 
-		is_checked = true;
-
 		do_gettimeofday(&now);
 		e->event.sequence = 0;
 		e->event.tv_sec = now.tv_sec;
@@ -642,22 +685,7 @@
 
 		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
 		wake_up_interruptible(&e->base.file_priv->event_wait);
-	}
-
-	if (is_checked) {
-		/*
-		 * call drm_vblank_put only in case that drm_vblank_get was
-		 * called.
-		 */
-		if (atomic_read(&drm_dev->vblank_refcount[crtc]) > 0)
-			drm_vblank_put(drm_dev, crtc);
-
-		/*
-		 * don't off vblank if vblank_disable_allowed is 1,
-		 * because vblank would be off by timer handler.
-		 */
-		if (!drm_dev->vblank_disable_allowed)
-			drm_vblank_off(drm_dev, crtc);
+		drm_vblank_put(drm_dev, crtc);
 	}
 
 	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
@@ -684,6 +712,11 @@
 	drm_handle_vblank(drm_dev, manager->pipe);
 	fimd_finish_pageflip(drm_dev, manager->pipe);
 
+	/* set wait vsync event to zero and wake up queue. */
+	if (atomic_read(&ctx->wait_vsync_event)) {
+		atomic_set(&ctx->wait_vsync_event, 0);
+		DRM_WAKEUP(&ctx->wait_vsync_queue);
+	}
 out:
 	return IRQ_HANDLED;
 }
@@ -709,6 +742,10 @@
 	 */
 	drm_dev->vblank_disable_allowed = 1;
 
+	/* attach this sub driver to iommu mapping if supported. */
+	if (is_drm_iommu_supported(drm_dev))
+		drm_iommu_attach_device(drm_dev, dev);
+
 	return 0;
 }
 
@@ -716,7 +753,9 @@
 {
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
-	/* TODO. */
+	/* detach this sub driver from iommu mapping if supported. */
+	if (is_drm_iommu_supported(drm_dev))
+		drm_iommu_detach_device(drm_dev, dev);
 }
 
 static int fimd_calc_clkdiv(struct fimd_context *ctx,
@@ -805,11 +844,38 @@
 	return 0;
 }
 
+static void fimd_window_suspend(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int i;
+
+	for (i = 0; i < WINDOWS_NR; i++) {
+		win_data = &ctx->win_data[i];
+		win_data->resume = win_data->enabled;
+		fimd_win_disable(dev, i);
+	}
+	fimd_wait_for_vblank(dev);
+}
+
+static void fimd_window_resume(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int i;
+
+	for (i = 0; i < WINDOWS_NR; i++) {
+		win_data = &ctx->win_data[i];
+		win_data->enabled = win_data->resume;
+		win_data->resume = false;
+	}
+}
+
 static int fimd_activate(struct fimd_context *ctx, bool enable)
 {
+	struct device *dev = ctx->subdrv.dev;
 	if (enable) {
 		int ret;
-		struct device *dev = ctx->subdrv.dev;
 
 		ret = fimd_clock(ctx, true);
 		if (ret < 0)
@@ -820,7 +886,11 @@
 		/* if vblank was enabled status, enable it again. */
 		if (test_and_clear_bit(0, &ctx->irq_flags))
 			fimd_enable_vblank(dev);
+
+		fimd_window_resume(dev);
 	} else {
+		fimd_window_suspend(dev);
+
 		fimd_clock(ctx, false);
 		ctx->suspended = true;
 	}
@@ -857,18 +927,16 @@
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->bus_clk = clk_get(dev, "fimd");
+	ctx->bus_clk = devm_clk_get(dev, "fimd");
 	if (IS_ERR(ctx->bus_clk)) {
 		dev_err(dev, "failed to get bus clock\n");
-		ret = PTR_ERR(ctx->bus_clk);
-		goto err_clk_get;
+		return PTR_ERR(ctx->bus_clk);
 	}
 
-	ctx->lcd_clk = clk_get(dev, "sclk_fimd");
+	ctx->lcd_clk = devm_clk_get(dev, "sclk_fimd");
 	if (IS_ERR(ctx->lcd_clk)) {
 		dev_err(dev, "failed to get lcd clock\n");
-		ret = PTR_ERR(ctx->lcd_clk);
-		goto err_bus_clk;
+		return PTR_ERR(ctx->lcd_clk);
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -876,14 +944,13 @@
 	ctx->regs = devm_request_and_ioremap(&pdev->dev, res);
 	if (!ctx->regs) {
 		dev_err(dev, "failed to map registers\n");
-		ret = -ENXIO;
-		goto err_clk;
+		return -ENXIO;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!res) {
 		dev_err(dev, "irq request failed.\n");
-		goto err_clk;
+		return -ENXIO;
 	}
 
 	ctx->irq = res->start;
@@ -892,13 +959,15 @@
 							0, "drm_fimd", ctx);
 	if (ret) {
 		dev_err(dev, "irq request failed.\n");
-		goto err_clk;
+		return ret;
 	}
 
 	ctx->vidcon0 = pdata->vidcon0;
 	ctx->vidcon1 = pdata->vidcon1;
 	ctx->default_win = pdata->default_win;
 	ctx->panel = panel;
+	DRM_INIT_WAITQUEUE(&ctx->wait_vsync_queue);
+	atomic_set(&ctx->wait_vsync_event, 0);
 
 	subdrv = &ctx->subdrv;
 
@@ -926,17 +995,6 @@
 	exynos_drm_subdrv_register(subdrv);
 
 	return 0;
-
-err_clk:
-	clk_disable(ctx->lcd_clk);
-	clk_put(ctx->lcd_clk);
-
-err_bus_clk:
-	clk_disable(ctx->bus_clk);
-	clk_put(ctx->bus_clk);
-
-err_clk_get:
-	return ret;
 }
 
 static int __devexit fimd_remove(struct platform_device *pdev)
@@ -960,9 +1018,6 @@
 out:
 	pm_runtime_disable(dev);
 
-	clk_put(ctx->lcd_clk);
-	clk_put(ctx->bus_clk);
-
 	return 0;
 }
 
@@ -1056,5 +1111,6 @@
 		.name	= "exynos4-fb",
 		.owner	= THIS_MODULE,
 		.pm	= &fimd_pm_ops,
+		.of_match_table = of_match_ptr(fimd_driver_dt_match),
 	},
 };

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index f7aab24..6ffa076 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c

@@ -17,11 +17,14 @@
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-attrs.h>
 
 #include <drm/drmP.h>
 #include <drm/exynos_drm.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_gem.h"
+#include "exynos_drm_iommu.h"
 
 #define G2D_HW_MAJOR_VER		4
 #define G2D_HW_MINOR_VER		1
@@ -92,11 +95,21 @@
 #define G2D_CMDLIST_POOL_SIZE		(G2D_CMDLIST_SIZE * G2D_CMDLIST_NUM)
 #define G2D_CMDLIST_DATA_NUM		(G2D_CMDLIST_SIZE / sizeof(u32) - 2)
 
+#define MAX_BUF_ADDR_NR			6
+
+/* maximum buffer pool size of userptr is 64MB as default */
+#define MAX_POOL		(64 * 1024 * 1024)
+
+enum {
+	BUF_TYPE_GEM = 1,
+	BUF_TYPE_USERPTR,
+};
+
 /* cmdlist data structure */
 struct g2d_cmdlist {
-	u32	head;
-	u32	data[G2D_CMDLIST_DATA_NUM];
-	u32	last;	/* last data offset */
+	u32		head;
+	unsigned long	data[G2D_CMDLIST_DATA_NUM];
+	u32		last;	/* last data offset */
 };
 
 struct drm_exynos_pending_g2d_event {
@@ -104,15 +117,26 @@
 	struct drm_exynos_g2d_event	event;
 };
 
-struct g2d_gem_node {
+struct g2d_cmdlist_userptr {
 	struct list_head	list;
-	unsigned int		handle;
+	dma_addr_t		dma_addr;
+	unsigned long		userptr;
+	unsigned long		size;
+	struct page		**pages;
+	unsigned int		npages;
+	struct sg_table		*sgt;
+	struct vm_area_struct	*vma;
+	atomic_t		refcount;
+	bool			in_pool;
+	bool			out_of_list;
 };
 
 struct g2d_cmdlist_node {
 	struct list_head	list;
 	struct g2d_cmdlist	*cmdlist;
-	unsigned int		gem_nr;
+	unsigned int		map_nr;
+	unsigned long		handles[MAX_BUF_ADDR_NR];
+	unsigned int		obj_type[MAX_BUF_ADDR_NR];
 	dma_addr_t		dma_addr;
 
 	struct drm_exynos_pending_g2d_event	*event;
@@ -122,6 +146,7 @@
 	struct list_head	list;
 	struct list_head	run_cmdlist;
 	struct list_head	event_list;
+	struct drm_file		*filp;
 	pid_t			pid;
 	struct completion	complete;
 	int			async;
@@ -143,23 +168,33 @@
 	struct mutex			cmdlist_mutex;
 	dma_addr_t			cmdlist_pool;
 	void				*cmdlist_pool_virt;
+	struct dma_attrs		cmdlist_dma_attrs;
 
 	/* runqueue*/
 	struct g2d_runqueue_node	*runqueue_node;
 	struct list_head		runqueue;
 	struct mutex			runqueue_mutex;
 	struct kmem_cache		*runqueue_slab;
+
+	unsigned long			current_pool;
+	unsigned long			max_pool;
 };
 
 static int g2d_init_cmdlist(struct g2d_data *g2d)
 {
 	struct device *dev = g2d->dev;
 	struct g2d_cmdlist_node *node = g2d->cmdlist_node;
+	struct exynos_drm_subdrv *subdrv = &g2d->subdrv;
 	int nr;
 	int ret;
 
-	g2d->cmdlist_pool_virt = dma_alloc_coherent(dev, G2D_CMDLIST_POOL_SIZE,
-						&g2d->cmdlist_pool, GFP_KERNEL);
+	init_dma_attrs(&g2d->cmdlist_dma_attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &g2d->cmdlist_dma_attrs);
+
+	g2d->cmdlist_pool_virt = dma_alloc_attrs(subdrv->drm_dev->dev,
+						G2D_CMDLIST_POOL_SIZE,
+						&g2d->cmdlist_pool, GFP_KERNEL,
+						&g2d->cmdlist_dma_attrs);
 	if (!g2d->cmdlist_pool_virt) {
 		dev_err(dev, "failed to allocate dma memory\n");
 		return -ENOMEM;
@@ -184,18 +219,20 @@
 	return 0;
 
 err:
-	dma_free_coherent(dev, G2D_CMDLIST_POOL_SIZE, g2d->cmdlist_pool_virt,
-			g2d->cmdlist_pool);
+	dma_free_attrs(subdrv->drm_dev->dev, G2D_CMDLIST_POOL_SIZE,
+			g2d->cmdlist_pool_virt,
+			g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs);
 	return ret;
 }
 
 static void g2d_fini_cmdlist(struct g2d_data *g2d)
 {
-	struct device *dev = g2d->dev;
+	struct exynos_drm_subdrv *subdrv = &g2d->subdrv;
 
 	kfree(g2d->cmdlist_node);
-	dma_free_coherent(dev, G2D_CMDLIST_POOL_SIZE, g2d->cmdlist_pool_virt,
-			g2d->cmdlist_pool);
+	dma_free_attrs(subdrv->drm_dev->dev, G2D_CMDLIST_POOL_SIZE,
+			g2d->cmdlist_pool_virt,
+			g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs);
 }
 
 static struct g2d_cmdlist_node *g2d_get_cmdlist(struct g2d_data *g2d)
@@ -245,62 +282,300 @@
 		list_add_tail(&node->event->base.link, &g2d_priv->event_list);
 }
 
-static int g2d_get_cmdlist_gem(struct drm_device *drm_dev,
-			       struct drm_file *file,
-			       struct g2d_cmdlist_node *node)
+static void g2d_userptr_put_dma_addr(struct drm_device *drm_dev,
+					unsigned long obj,
+					bool force)
 {
-	struct drm_exynos_file_private *file_priv = file->driver_priv;
+	struct g2d_cmdlist_userptr *g2d_userptr =
+					(struct g2d_cmdlist_userptr *)obj;
+
+	if (!obj)
+		return;
+
+	if (force)
+		goto out;
+
+	atomic_dec(&g2d_userptr->refcount);
+
+	if (atomic_read(&g2d_userptr->refcount) > 0)
+		return;
+
+	if (g2d_userptr->in_pool)
+		return;
+
+out:
+	exynos_gem_unmap_sgt_from_dma(drm_dev, g2d_userptr->sgt,
+					DMA_BIDIRECTIONAL);
+
+	exynos_gem_put_pages_to_userptr(g2d_userptr->pages,
+					g2d_userptr->npages,
+					g2d_userptr->vma);
+
+	if (!g2d_userptr->out_of_list)
+		list_del_init(&g2d_userptr->list);
+
+	sg_free_table(g2d_userptr->sgt);
+	kfree(g2d_userptr->sgt);
+	g2d_userptr->sgt = NULL;
+
+	kfree(g2d_userptr->pages);
+	g2d_userptr->pages = NULL;
+	kfree(g2d_userptr);
+	g2d_userptr = NULL;
+}
+
+dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev,
+					unsigned long userptr,
+					unsigned long size,
+					struct drm_file *filp,
+					unsigned long *obj)
+{
+	struct drm_exynos_file_private *file_priv = filp->driver_priv;
 	struct exynos_drm_g2d_private *g2d_priv = file_priv->g2d_priv;
+	struct g2d_cmdlist_userptr *g2d_userptr;
+	struct g2d_data *g2d;
+	struct page **pages;
+	struct sg_table	*sgt;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
+	unsigned int npages, offset;
+	int ret;
+
+	if (!size) {
+		DRM_ERROR("invalid userptr size.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	g2d = dev_get_drvdata(g2d_priv->dev);
+
+	/* check if userptr already exists in userptr_list. */
+	list_for_each_entry(g2d_userptr, &g2d_priv->userptr_list, list) {
+		if (g2d_userptr->userptr == userptr) {
+			/*
+			 * also check size because there could be same address
+			 * and different size.
+			 */
+			if (g2d_userptr->size == size) {
+				atomic_inc(&g2d_userptr->refcount);
+				*obj = (unsigned long)g2d_userptr;
+
+				return &g2d_userptr->dma_addr;
+			}
+
+			/*
+			 * at this moment, maybe g2d dma is accessing this
+			 * g2d_userptr memory region so just remove this
+			 * g2d_userptr object from userptr_list not to be
+			 * referred again and also except it the userptr
+			 * pool to be released after the dma access completion.
+			 */
+			g2d_userptr->out_of_list = true;
+			g2d_userptr->in_pool = false;
+			list_del_init(&g2d_userptr->list);
+
+			break;
+		}
+	}
+
+	g2d_userptr = kzalloc(sizeof(*g2d_userptr), GFP_KERNEL);
+	if (!g2d_userptr) {
+		DRM_ERROR("failed to allocate g2d_userptr.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	atomic_set(&g2d_userptr->refcount, 1);
+
+	start = userptr & PAGE_MASK;
+	offset = userptr & ~PAGE_MASK;
+	end = PAGE_ALIGN(userptr + size);
+	npages = (end - start) >> PAGE_SHIFT;
+	g2d_userptr->npages = npages;
+
+	pages = kzalloc(npages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages) {
+		DRM_ERROR("failed to allocate pages.\n");
+		kfree(g2d_userptr);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	vma = find_vma(current->mm, userptr);
+	if (!vma) {
+		DRM_ERROR("failed to get vm region.\n");
+		ret = -EFAULT;
+		goto err_free_pages;
+	}
+
+	if (vma->vm_end < userptr + size) {
+		DRM_ERROR("vma is too small.\n");
+		ret = -EFAULT;
+		goto err_free_pages;
+	}
+
+	g2d_userptr->vma = exynos_gem_get_vma(vma);
+	if (!g2d_userptr->vma) {
+		DRM_ERROR("failed to copy vma.\n");
+		ret = -ENOMEM;
+		goto err_free_pages;
+	}
+
+	g2d_userptr->size = size;
+
+	ret = exynos_gem_get_pages_from_userptr(start & PAGE_MASK,
+						npages, pages, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to get user pages from userptr.\n");
+		goto err_put_vma;
+	}
+
+	g2d_userptr->pages = pages;
+
+	sgt = kzalloc(sizeof *sgt, GFP_KERNEL);
+	if (!sgt) {
+		DRM_ERROR("failed to allocate sg table.\n");
+		ret = -ENOMEM;
+		goto err_free_userptr;
+	}
+
+	ret = sg_alloc_table_from_pages(sgt, pages, npages, offset,
+					size, GFP_KERNEL);
+	if (ret < 0) {
+		DRM_ERROR("failed to get sgt from pages.\n");
+		goto err_free_sgt;
+	}
+
+	g2d_userptr->sgt = sgt;
+
+	ret = exynos_gem_map_sgt_with_dma(drm_dev, g2d_userptr->sgt,
+						DMA_BIDIRECTIONAL);
+	if (ret < 0) {
+		DRM_ERROR("failed to map sgt with dma region.\n");
+		goto err_free_sgt;
+	}
+
+	g2d_userptr->dma_addr = sgt->sgl[0].dma_address;
+	g2d_userptr->userptr = userptr;
+
+	list_add_tail(&g2d_userptr->list, &g2d_priv->userptr_list);
+
+	if (g2d->current_pool + (npages << PAGE_SHIFT) < g2d->max_pool) {
+		g2d->current_pool += npages << PAGE_SHIFT;
+		g2d_userptr->in_pool = true;
+	}
+
+	*obj = (unsigned long)g2d_userptr;
+
+	return &g2d_userptr->dma_addr;
+
+err_free_sgt:
+	sg_free_table(sgt);
+	kfree(sgt);
+	sgt = NULL;
+
+err_free_userptr:
+	exynos_gem_put_pages_to_userptr(g2d_userptr->pages,
+					g2d_userptr->npages,
+					g2d_userptr->vma);
+
+err_put_vma:
+	exynos_gem_put_vma(g2d_userptr->vma);
+
+err_free_pages:
+	kfree(pages);
+	kfree(g2d_userptr);
+	pages = NULL;
+	g2d_userptr = NULL;
+
+	return ERR_PTR(ret);
+}
+
+static void g2d_userptr_free_all(struct drm_device *drm_dev,
+					struct g2d_data *g2d,
+					struct drm_file *filp)
+{
+	struct drm_exynos_file_private *file_priv = filp->driver_priv;
+	struct exynos_drm_g2d_private *g2d_priv = file_priv->g2d_priv;
+	struct g2d_cmdlist_userptr *g2d_userptr, *n;
+
+	list_for_each_entry_safe(g2d_userptr, n, &g2d_priv->userptr_list, list)
+		if (g2d_userptr->in_pool)
+			g2d_userptr_put_dma_addr(drm_dev,
+						(unsigned long)g2d_userptr,
+						true);
+
+	g2d->current_pool = 0;
+}
+
+static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
+				struct g2d_cmdlist_node *node,
+				struct drm_device *drm_dev,
+				struct drm_file *file)
+{
 	struct g2d_cmdlist *cmdlist = node->cmdlist;
-	dma_addr_t *addr;
 	int offset;
 	int i;
 
-	for (i = 0; i < node->gem_nr; i++) {
-		struct g2d_gem_node *gem_node;
-
-		gem_node = kzalloc(sizeof(*gem_node), GFP_KERNEL);
-		if (!gem_node) {
-			dev_err(g2d_priv->dev, "failed to allocate gem node\n");
-			return -ENOMEM;
-		}
+	for (i = 0; i < node->map_nr; i++) {
+		unsigned long handle;
+		dma_addr_t *addr;
 
 		offset = cmdlist->last - (i * 2 + 1);
-		gem_node->handle = cmdlist->data[offset];
+		handle = cmdlist->data[offset];
 
-		addr = exynos_drm_gem_get_dma_addr(drm_dev, gem_node->handle,
-						   file);
-		if (IS_ERR(addr)) {
-			node->gem_nr = i;
-			kfree(gem_node);
-			return PTR_ERR(addr);
+		if (node->obj_type[i] == BUF_TYPE_GEM) {
+			addr = exynos_drm_gem_get_dma_addr(drm_dev, handle,
+								file);
+			if (IS_ERR(addr)) {
+				node->map_nr = i;
+				return -EFAULT;
+			}
+		} else {
+			struct drm_exynos_g2d_userptr g2d_userptr;
+
+			if (copy_from_user(&g2d_userptr, (void __user *)handle,
+				sizeof(struct drm_exynos_g2d_userptr))) {
+				node->map_nr = i;
+				return -EFAULT;
+			}
+
+			addr = g2d_userptr_get_dma_addr(drm_dev,
+							g2d_userptr.userptr,
+							g2d_userptr.size,
+							file,
+							&handle);
+			if (IS_ERR(addr)) {
+				node->map_nr = i;
+				return -EFAULT;
+			}
 		}
 
 		cmdlist->data[offset] = *addr;
-		list_add_tail(&gem_node->list, &g2d_priv->gem_list);
-		g2d_priv->gem_nr++;
+		node->handles[i] = handle;
 	}
 
 	return 0;
 }
 
-static void g2d_put_cmdlist_gem(struct drm_device *drm_dev,
-				struct drm_file *file,
-				unsigned int nr)
+static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
+				  struct g2d_cmdlist_node *node,
+				  struct drm_file *filp)
 {
-	struct drm_exynos_file_private *file_priv = file->driver_priv;
-	struct exynos_drm_g2d_private *g2d_priv = file_priv->g2d_priv;
-	struct g2d_gem_node *node, *n;
+	struct exynos_drm_subdrv *subdrv = &g2d->subdrv;
+	int i;
 
-	list_for_each_entry_safe_reverse(node, n, &g2d_priv->gem_list, list) {
-		if (!nr)
-			break;
+	for (i = 0; i < node->map_nr; i++) {
+		unsigned long handle = node->handles[i];
 
-		exynos_drm_gem_put_dma_addr(drm_dev, node->handle, file);
-		list_del_init(&node->list);
-		kfree(node);
-		nr--;
+		if (node->obj_type[i] == BUF_TYPE_GEM)
+			exynos_drm_gem_put_dma_addr(subdrv->drm_dev, handle,
+							filp);
+		else
+			g2d_userptr_put_dma_addr(subdrv->drm_dev, handle,
+							false);
+
+		node->handles[i] = 0;
 	}
+
+	node->map_nr = 0;
 }
 
 static void g2d_dma_start(struct g2d_data *g2d,
@@ -337,10 +612,18 @@
 static void g2d_free_runqueue_node(struct g2d_data *g2d,
 				   struct g2d_runqueue_node *runqueue_node)
 {
+	struct g2d_cmdlist_node *node;
+
 	if (!runqueue_node)
 		return;
 
 	mutex_lock(&g2d->cmdlist_mutex);
+	/*
+	 * commands in run_cmdlist have been completed so unmap all gem
+	 * objects in each command node so that they are unreferenced.
+	 */
+	list_for_each_entry(node, &runqueue_node->run_cmdlist, list)
+		g2d_unmap_cmdlist_gem(g2d, node, runqueue_node->filp);
 	list_splice_tail_init(&runqueue_node->run_cmdlist, &g2d->free_cmdlist);
 	mutex_unlock(&g2d->cmdlist_mutex);
 
@@ -430,15 +713,28 @@
 	return IRQ_HANDLED;
 }
 
-static int g2d_check_reg_offset(struct device *dev, struct g2d_cmdlist *cmdlist,
+static int g2d_check_reg_offset(struct device *dev,
+				struct g2d_cmdlist_node *node,
 				int nr, bool for_addr)
 {
+	struct g2d_cmdlist *cmdlist = node->cmdlist;
 	int reg_offset;
 	int index;
 	int i;
 
 	for (i = 0; i < nr; i++) {
 		index = cmdlist->last - 2 * (i + 1);
+
+		if (for_addr) {
+			/* check userptr buffer type. */
+			reg_offset = (cmdlist->data[index] &
+					~0x7fffffff) >> 31;
+			if (reg_offset) {
+				node->obj_type[i] = BUF_TYPE_USERPTR;
+				cmdlist->data[index] &= ~G2D_BUF_USERPTR;
+			}
+		}
+
 		reg_offset = cmdlist->data[index] & ~0xfffff000;
 
 		if (reg_offset < G2D_VALID_START || reg_offset > G2D_VALID_END)
@@ -455,6 +751,9 @@
 		case G2D_MSK_BASE_ADDR:
 			if (!for_addr)
 				goto err;
+
+			if (node->obj_type[i] != BUF_TYPE_USERPTR)
+				node->obj_type[i] = BUF_TYPE_GEM;
 			break;
 		default:
 			if (for_addr)
@@ -466,7 +765,7 @@
 	return 0;
 
 err:
-	dev_err(dev, "Bad register offset: 0x%x\n", cmdlist->data[index]);
+	dev_err(dev, "Bad register offset: 0x%lx\n", cmdlist->data[index]);
 	return -EINVAL;
 }
 
@@ -566,7 +865,7 @@
 	}
 
 	/* Check size of cmdlist: last 2 is about G2D_BITBLT_START */
-	size = cmdlist->last + req->cmd_nr * 2 + req->cmd_gem_nr * 2 + 2;
+	size = cmdlist->last + req->cmd_nr * 2 + req->cmd_buf_nr * 2 + 2;
 	if (size > G2D_CMDLIST_DATA_NUM) {
 		dev_err(dev, "cmdlist size is too big\n");
 		ret = -EINVAL;
@@ -583,29 +882,29 @@
 	}
 	cmdlist->last += req->cmd_nr * 2;
 
-	ret = g2d_check_reg_offset(dev, cmdlist, req->cmd_nr, false);
+	ret = g2d_check_reg_offset(dev, node, req->cmd_nr, false);
 	if (ret < 0)
 		goto err_free_event;
 
-	node->gem_nr = req->cmd_gem_nr;
-	if (req->cmd_gem_nr) {
-		struct drm_exynos_g2d_cmd *cmd_gem;
+	node->map_nr = req->cmd_buf_nr;
+	if (req->cmd_buf_nr) {
+		struct drm_exynos_g2d_cmd *cmd_buf;
 
-		cmd_gem = (struct drm_exynos_g2d_cmd *)(uint32_t)req->cmd_gem;
+		cmd_buf = (struct drm_exynos_g2d_cmd *)(uint32_t)req->cmd_buf;
 
 		if (copy_from_user(cmdlist->data + cmdlist->last,
-					(void __user *)cmd_gem,
-					sizeof(*cmd_gem) * req->cmd_gem_nr)) {
+					(void __user *)cmd_buf,
+					sizeof(*cmd_buf) * req->cmd_buf_nr)) {
 			ret = -EFAULT;
 			goto err_free_event;
 		}
-		cmdlist->last += req->cmd_gem_nr * 2;
+		cmdlist->last += req->cmd_buf_nr * 2;
 
-		ret = g2d_check_reg_offset(dev, cmdlist, req->cmd_gem_nr, true);
+		ret = g2d_check_reg_offset(dev, node, req->cmd_buf_nr, true);
 		if (ret < 0)
 			goto err_free_event;
 
-		ret = g2d_get_cmdlist_gem(drm_dev, file, node);
+		ret = g2d_map_cmdlist_gem(g2d, node, drm_dev, file);
 		if (ret < 0)
 			goto err_unmap;
 	}
@@ -624,7 +923,7 @@
 	return 0;
 
 err_unmap:
-	g2d_put_cmdlist_gem(drm_dev, file, node->gem_nr);
+	g2d_unmap_cmdlist_gem(g2d, node, file);
 err_free_event:
 	if (node->event) {
 		spin_lock_irqsave(&drm_dev->event_lock, flags);
@@ -680,6 +979,7 @@
 
 	mutex_lock(&g2d->runqueue_mutex);
 	runqueue_node->pid = current->pid;
+	runqueue_node->filp = file;
 	list_add_tail(&runqueue_node->list, &g2d->runqueue);
 	if (!g2d->runqueue_node)
 		g2d_exec_runqueue(g2d);
@@ -696,6 +996,43 @@
 }
 EXPORT_SYMBOL_GPL(exynos_g2d_exec_ioctl);
 
+static int g2d_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
+{
+	struct g2d_data *g2d;
+	int ret;
+
+	g2d = dev_get_drvdata(dev);
+	if (!g2d)
+		return -EFAULT;
+
+	/* allocate dma-aware cmdlist buffer. */
+	ret = g2d_init_cmdlist(g2d);
+	if (ret < 0) {
+		dev_err(dev, "cmdlist init failed\n");
+		return ret;
+	}
+
+	if (!is_drm_iommu_supported(drm_dev))
+		return 0;
+
+	ret = drm_iommu_attach_device(drm_dev, dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable iommu.\n");
+		g2d_fini_cmdlist(g2d);
+	}
+
+	return ret;
+
+}
+
+static void g2d_subdrv_remove(struct drm_device *drm_dev, struct device *dev)
+{
+	if (!is_drm_iommu_supported(drm_dev))
+		return;
+
+	drm_iommu_detach_device(drm_dev, dev);
+}
+
 static int g2d_open(struct drm_device *drm_dev, struct device *dev,
 			struct drm_file *file)
 {
@@ -713,7 +1050,7 @@
 
 	INIT_LIST_HEAD(&g2d_priv->inuse_cmdlist);
 	INIT_LIST_HEAD(&g2d_priv->event_list);
-	INIT_LIST_HEAD(&g2d_priv->gem_list);
+	INIT_LIST_HEAD(&g2d_priv->userptr_list);
 
 	return 0;
 }
@@ -734,11 +1071,21 @@
 		return;
 
 	mutex_lock(&g2d->cmdlist_mutex);
-	list_for_each_entry_safe(node, n, &g2d_priv->inuse_cmdlist, list)
+	list_for_each_entry_safe(node, n, &g2d_priv->inuse_cmdlist, list) {
+		/*
+		 * unmap all gem objects not completed.
+		 *
+		 * P.S. if current process was terminated forcely then
+		 * there may be some commands in inuse_cmdlist so unmap
+		 * them.
+		 */
+		g2d_unmap_cmdlist_gem(g2d, node, file);
 		list_move_tail(&node->list, &g2d->free_cmdlist);
+	}
 	mutex_unlock(&g2d->cmdlist_mutex);
 
-	g2d_put_cmdlist_gem(drm_dev, file, g2d_priv->gem_nr);
+	/* release all g2d_userptr in pool. */
+	g2d_userptr_free_all(drm_dev, g2d, file);
 
 	kfree(file_priv->g2d_priv);
 }
@@ -778,15 +1125,11 @@
 	mutex_init(&g2d->cmdlist_mutex);
 	mutex_init(&g2d->runqueue_mutex);
 
-	ret = g2d_init_cmdlist(g2d);
-	if (ret < 0)
-		goto err_destroy_workqueue;
-
-	g2d->gate_clk = clk_get(dev, "fimg2d");
+	g2d->gate_clk = devm_clk_get(dev, "fimg2d");
 	if (IS_ERR(g2d->gate_clk)) {
 		dev_err(dev, "failed to get gate clock\n");
 		ret = PTR_ERR(g2d->gate_clk);
-		goto err_fini_cmdlist;
+		goto err_destroy_workqueue;
 	}
 
 	pm_runtime_enable(dev);
@@ -814,10 +1157,14 @@
 		goto err_put_clk;
 	}
 
+	g2d->max_pool = MAX_POOL;
+
 	platform_set_drvdata(pdev, g2d);
 
 	subdrv = &g2d->subdrv;
 	subdrv->dev = dev;
+	subdrv->probe = g2d_subdrv_probe;
+	subdrv->remove = g2d_subdrv_remove;
 	subdrv->open = g2d_open;
 	subdrv->close = g2d_close;
 
@@ -834,9 +1181,6 @@
 
 err_put_clk:
 	pm_runtime_disable(dev);
-	clk_put(g2d->gate_clk);
-err_fini_cmdlist:
-	g2d_fini_cmdlist(g2d);
 err_destroy_workqueue:
 	destroy_workqueue(g2d->g2d_workq);
 err_destroy_slab:
@@ -857,7 +1201,6 @@
 	}
 
 	pm_runtime_disable(&pdev->dev);
-	clk_put(g2d->gate_clk);
 
 	g2d_fini_cmdlist(g2d);
 	destroy_workqueue(g2d->g2d_workq);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index d254556..d48183e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c

@@ -83,157 +83,40 @@
 
 static unsigned long roundup_gem_size(unsigned long size, unsigned int flags)
 {
-	if (!IS_NONCONTIG_BUFFER(flags)) {
-		if (size >= SZ_1M)
-			return roundup(size, SECTION_SIZE);
-		else if (size >= SZ_64K)
-			return roundup(size, SZ_64K);
-		else
-			goto out;
-	}
-out:
+	/* TODO */
+
 	return roundup(size, PAGE_SIZE);
 }
 
-struct page **exynos_gem_get_pages(struct drm_gem_object *obj,
-						gfp_t gfpmask)
-{
-	struct page *p, **pages;
-	int i, npages;
-
-	npages = obj->size >> PAGE_SHIFT;
-
-	pages = drm_malloc_ab(npages, sizeof(struct page *));
-	if (pages == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0; i < npages; i++) {
-		p = alloc_page(gfpmask);
-		if (IS_ERR(p))
-			goto fail;
-		pages[i] = p;
-	}
-
-	return pages;
-
-fail:
-	while (--i)
-		__free_page(pages[i]);
-
-	drm_free_large(pages);
-	return ERR_CAST(p);
-}
-
-static void exynos_gem_put_pages(struct drm_gem_object *obj,
-					struct page **pages)
-{
-	int npages;
-
-	npages = obj->size >> PAGE_SHIFT;
-
-	while (--npages >= 0)
-		__free_page(pages[npages]);
-
-	drm_free_large(pages);
-}
-
-static int exynos_drm_gem_map_pages(struct drm_gem_object *obj,
+static int exynos_drm_gem_map_buf(struct drm_gem_object *obj,
 					struct vm_area_struct *vma,
 					unsigned long f_vaddr,
 					pgoff_t page_offset)
 {
 	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
 	struct exynos_drm_gem_buf *buf = exynos_gem_obj->buffer;
-	unsigned long pfn;
-
-	if (exynos_gem_obj->flags & EXYNOS_BO_NONCONTIG) {
-		if (!buf->pages)
-			return -EINTR;
-
-		pfn = page_to_pfn(buf->pages[page_offset++]);
-	} else
-		pfn = (buf->dma_addr >> PAGE_SHIFT) + page_offset;
-
-	return vm_insert_mixed(vma, f_vaddr, pfn);
-}
-
-static int exynos_drm_gem_get_pages(struct drm_gem_object *obj)
-{
-	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
-	struct exynos_drm_gem_buf *buf = exynos_gem_obj->buffer;
 	struct scatterlist *sgl;
-	struct page **pages;
-	unsigned int npages, i = 0;
-	int ret;
+	unsigned long pfn;
+	int i;
 
-	if (buf->pages) {
-		DRM_DEBUG_KMS("already allocated.\n");
+	if (!buf->sgt)
+		return -EINTR;
+
+	if (page_offset >= (buf->size >> PAGE_SHIFT)) {
+		DRM_ERROR("invalid page offset\n");
 		return -EINVAL;
 	}
 
-	pages = exynos_gem_get_pages(obj, GFP_HIGHUSER_MOVABLE);
-	if (IS_ERR(pages)) {
-		DRM_ERROR("failed to get pages.\n");
-		return PTR_ERR(pages);
-	}
-
-	npages = obj->size >> PAGE_SHIFT;
-	buf->page_size = PAGE_SIZE;
-
-	buf->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
-	if (!buf->sgt) {
-		DRM_ERROR("failed to allocate sg table.\n");
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	ret = sg_alloc_table(buf->sgt, npages, GFP_KERNEL);
-	if (ret < 0) {
-		DRM_ERROR("failed to initialize sg table.\n");
-		ret = -EFAULT;
-		goto err1;
-	}
-
 	sgl = buf->sgt->sgl;
-
-	/* set all pages to sg list. */
-	while (i < npages) {
-		sg_set_page(sgl, pages[i], PAGE_SIZE, 0);
-		sg_dma_address(sgl) = page_to_phys(pages[i]);
-		i++;
-		sgl = sg_next(sgl);
+	for_each_sg(buf->sgt->sgl, sgl, buf->sgt->nents, i) {
+		if (page_offset < (sgl->length >> PAGE_SHIFT))
+			break;
+		page_offset -=	(sgl->length >> PAGE_SHIFT);
 	}
 
-	/* add some codes for UNCACHED type here. TODO */
+	pfn = __phys_to_pfn(sg_phys(sgl)) + page_offset;
 
-	buf->pages = pages;
-	return ret;
-err1:
-	kfree(buf->sgt);
-	buf->sgt = NULL;
-err:
-	exynos_gem_put_pages(obj, pages);
-	return ret;
-
-}
-
-static void exynos_drm_gem_put_pages(struct drm_gem_object *obj)
-{
-	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
-	struct exynos_drm_gem_buf *buf = exynos_gem_obj->buffer;
-
-	/*
-	 * if buffer typs is EXYNOS_BO_NONCONTIG then release all pages
-	 * allocated at gem fault handler.
-	 */
-	sg_free_table(buf->sgt);
-	kfree(buf->sgt);
-	buf->sgt = NULL;
-
-	exynos_gem_put_pages(obj, buf->pages);
-	buf->pages = NULL;
-
-	/* add some codes for UNCACHED type here. TODO */
+	return vm_insert_mixed(vma, f_vaddr, pfn);
 }
 
 static int exynos_drm_gem_handle_create(struct drm_gem_object *obj,
@@ -270,9 +153,6 @@
 
 	DRM_DEBUG_KMS("handle count = %d\n", atomic_read(&obj->handle_count));
 
-	if (!buf->pages)
-		return;
-
 	/*
 	 * do not release memory region from exporter.
 	 *
@@ -282,10 +162,7 @@
 	if (obj->import_attach)
 		goto out;
 
-	if (exynos_gem_obj->flags & EXYNOS_BO_NONCONTIG)
-		exynos_drm_gem_put_pages(obj);
-	else
-		exynos_drm_free_buf(obj->dev, exynos_gem_obj->flags, buf);
+	exynos_drm_free_buf(obj->dev, exynos_gem_obj->flags, buf);
 
 out:
 	exynos_drm_fini_buf(obj->dev, buf);
@@ -364,22 +241,10 @@
 	/* set memory type and cache attribute from user side. */
 	exynos_gem_obj->flags = flags;
 
-	/*
-	 * allocate all pages as desired size if user wants to allocate
-	 * physically non-continuous memory.
-	 */
-	if (flags & EXYNOS_BO_NONCONTIG) {
-		ret = exynos_drm_gem_get_pages(&exynos_gem_obj->base);
-		if (ret < 0) {
-			drm_gem_object_release(&exynos_gem_obj->base);
-			goto err_fini_buf;
-		}
-	} else {
-		ret = exynos_drm_alloc_buf(dev, buf, flags);
-		if (ret < 0) {
-			drm_gem_object_release(&exynos_gem_obj->base);
-			goto err_fini_buf;
-		}
+	ret = exynos_drm_alloc_buf(dev, buf, flags);
+	if (ret < 0) {
+		drm_gem_object_release(&exynos_gem_obj->base);
+		goto err_fini_buf;
 	}
 
 	return exynos_gem_obj;
@@ -412,14 +277,14 @@
 	return 0;
 }
 
-void *exynos_drm_gem_get_dma_addr(struct drm_device *dev,
+dma_addr_t *exynos_drm_gem_get_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
-					struct drm_file *file_priv)
+					struct drm_file *filp)
 {
 	struct exynos_drm_gem_obj *exynos_gem_obj;
 	struct drm_gem_object *obj;
 
-	obj = drm_gem_object_lookup(dev, file_priv, gem_handle);
+	obj = drm_gem_object_lookup(dev, filp, gem_handle);
 	if (!obj) {
 		DRM_ERROR("failed to lookup gem object.\n");
 		return ERR_PTR(-EINVAL);
@@ -427,25 +292,17 @@
 
 	exynos_gem_obj = to_exynos_gem_obj(obj);
 
-	if (exynos_gem_obj->flags & EXYNOS_BO_NONCONTIG) {
-		DRM_DEBUG_KMS("not support NONCONTIG type.\n");
-		drm_gem_object_unreference_unlocked(obj);
-
-		/* TODO */
-		return ERR_PTR(-EINVAL);
-	}
-
 	return &exynos_gem_obj->buffer->dma_addr;
 }
 
 void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
-					struct drm_file *file_priv)
+					struct drm_file *filp)
 {
 	struct exynos_drm_gem_obj *exynos_gem_obj;
 	struct drm_gem_object *obj;
 
-	obj = drm_gem_object_lookup(dev, file_priv, gem_handle);
+	obj = drm_gem_object_lookup(dev, filp, gem_handle);
 	if (!obj) {
 		DRM_ERROR("failed to lookup gem object.\n");
 		return;
@@ -453,14 +310,6 @@
 
 	exynos_gem_obj = to_exynos_gem_obj(obj);
 
-	if (exynos_gem_obj->flags & EXYNOS_BO_NONCONTIG) {
-		DRM_DEBUG_KMS("not support NONCONTIG type.\n");
-		drm_gem_object_unreference_unlocked(obj);
-
-		/* TODO */
-		return;
-	}
-
 	drm_gem_object_unreference_unlocked(obj);
 
 	/*
@@ -489,22 +338,57 @@
 			&args->offset);
 }
 
+static struct drm_file *exynos_drm_find_drm_file(struct drm_device *drm_dev,
+							struct file *filp)
+{
+	struct drm_file *file_priv;
+
+	mutex_lock(&drm_dev->struct_mutex);
+
+	/* find current process's drm_file from filelist. */
+	list_for_each_entry(file_priv, &drm_dev->filelist, lhead) {
+		if (file_priv->filp == filp) {
+			mutex_unlock(&drm_dev->struct_mutex);
+			return file_priv;
+		}
+	}
+
+	mutex_unlock(&drm_dev->struct_mutex);
+	WARN_ON(1);
+
+	return ERR_PTR(-EFAULT);
+}
+
 static int exynos_drm_gem_mmap_buffer(struct file *filp,
 				      struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = filp->private_data;
 	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+	struct drm_device *drm_dev = obj->dev;
 	struct exynos_drm_gem_buf *buffer;
-	unsigned long pfn, vm_size, usize, uaddr = vma->vm_start;
+	struct drm_file *file_priv;
+	unsigned long vm_size;
 	int ret;
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
 	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_private_data = obj;
+	vma->vm_ops = drm_dev->driver->gem_vm_ops;
+
+	/* restore it to driver's fops. */
+	filp->f_op = fops_get(drm_dev->driver->fops);
+
+	file_priv = exynos_drm_find_drm_file(drm_dev, filp);
+	if (IS_ERR(file_priv))
+		return PTR_ERR(file_priv);
+
+	/* restore it to drm_file. */
+	filp->private_data = file_priv;
 
 	update_vm_cache_attr(exynos_gem_obj, vma);
 
-	vm_size = usize = vma->vm_end - vma->vm_start;
+	vm_size = vma->vm_end - vma->vm_start;
 
 	/*
 	 * a buffer contains information to physically continuous memory
@@ -516,41 +400,24 @@
 	if (vm_size > buffer->size)
 		return -EINVAL;
 
-	if (exynos_gem_obj->flags & EXYNOS_BO_NONCONTIG) {
-		int i = 0;
-
-		if (!buffer->pages)
-			return -EINVAL;
-
-		vma->vm_flags |= VM_MIXEDMAP;
-
-		do {
-			ret = vm_insert_page(vma, uaddr, buffer->pages[i++]);
-			if (ret) {
-				DRM_ERROR("failed to remap user space.\n");
-				return ret;
-			}
-
-			uaddr += PAGE_SIZE;
-			usize -= PAGE_SIZE;
-		} while (usize > 0);
-	} else {
-		/*
-		 * get page frame number to physical memory to be mapped
-		 * to user space.
-		 */
-		pfn = ((unsigned long)exynos_gem_obj->buffer->dma_addr) >>
-								PAGE_SHIFT;
-
-		DRM_DEBUG_KMS("pfn = 0x%lx\n", pfn);
-
-		if (remap_pfn_range(vma, vma->vm_start, pfn, vm_size,
-					vma->vm_page_prot)) {
-			DRM_ERROR("failed to remap pfn range.\n");
-			return -EAGAIN;
-		}
+	ret = dma_mmap_attrs(drm_dev->dev, vma, buffer->pages,
+				buffer->dma_addr, buffer->size,
+				&buffer->dma_attrs);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap.\n");
+		return ret;
 	}
 
+	/*
+	 * take a reference to this mapping of the object. And this reference
+	 * is unreferenced by the corresponding vm_close call.
+	 */
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&drm_dev->struct_mutex);
+	drm_vm_open_locked(drm_dev, vma);
+	mutex_unlock(&drm_dev->struct_mutex);
+
 	return 0;
 }
 
@@ -578,16 +445,29 @@
 		return -EINVAL;
 	}
 
-	obj->filp->f_op = &exynos_drm_gem_fops;
-	obj->filp->private_data = obj;
+	/*
+	 * Set specific mmper's fops. And it will be restored by
+	 * exynos_drm_gem_mmap_buffer to dev->driver->fops.
+	 * This is used to call specific mapper temporarily.
+	 */
+	file_priv->filp->f_op = &exynos_drm_gem_fops;
 
-	addr = vm_mmap(obj->filp, 0, args->size,
+	/*
+	 * Set gem object to private_data so that specific mmaper
+	 * can get the gem object. And it will be restored by
+	 * exynos_drm_gem_mmap_buffer to drm_file.
+	 */
+	file_priv->filp->private_data = obj;
+
+	addr = vm_mmap(file_priv->filp, 0, args->size,
 			PROT_READ | PROT_WRITE, MAP_SHARED, 0);
 
 	drm_gem_object_unreference_unlocked(obj);
 
-	if (IS_ERR((void *)addr))
+	if (IS_ERR((void *)addr)) {
+		file_priv->filp->private_data = file_priv;
 		return PTR_ERR((void *)addr);
+	}
 
 	args->mapped = addr;
 
@@ -622,6 +502,129 @@
 	return 0;
 }
 
+struct vm_area_struct *exynos_gem_get_vma(struct vm_area_struct *vma)
+{
+	struct vm_area_struct *vma_copy;
+
+	vma_copy = kmalloc(sizeof(*vma_copy), GFP_KERNEL);
+	if (!vma_copy)
+		return NULL;
+
+	if (vma->vm_ops && vma->vm_ops->open)
+		vma->vm_ops->open(vma);
+
+	if (vma->vm_file)
+		get_file(vma->vm_file);
+
+	memcpy(vma_copy, vma, sizeof(*vma));
+
+	vma_copy->vm_mm = NULL;
+	vma_copy->vm_next = NULL;
+	vma_copy->vm_prev = NULL;
+
+	return vma_copy;
+}
+
+void exynos_gem_put_vma(struct vm_area_struct *vma)
+{
+	if (!vma)
+		return;
+
+	if (vma->vm_ops && vma->vm_ops->close)
+		vma->vm_ops->close(vma);
+
+	if (vma->vm_file)
+		fput(vma->vm_file);
+
+	kfree(vma);
+}
+
+int exynos_gem_get_pages_from_userptr(unsigned long start,
+						unsigned int npages,
+						struct page **pages,
+						struct vm_area_struct *vma)
+{
+	int get_npages;
+
+	/* the memory region mmaped with VM_PFNMAP. */
+	if (vma_is_io(vma)) {
+		unsigned int i;
+
+		for (i = 0; i < npages; ++i, start += PAGE_SIZE) {
+			unsigned long pfn;
+			int ret = follow_pfn(vma, start, &pfn);
+			if (ret)
+				return ret;
+
+			pages[i] = pfn_to_page(pfn);
+		}
+
+		if (i != npages) {
+			DRM_ERROR("failed to get user_pages.\n");
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	get_npages = get_user_pages(current, current->mm, start,
+					npages, 1, 1, pages, NULL);
+	get_npages = max(get_npages, 0);
+	if (get_npages != npages) {
+		DRM_ERROR("failed to get user_pages.\n");
+		while (get_npages)
+			put_page(pages[--get_npages]);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+void exynos_gem_put_pages_to_userptr(struct page **pages,
+					unsigned int npages,
+					struct vm_area_struct *vma)
+{
+	if (!vma_is_io(vma)) {
+		unsigned int i;
+
+		for (i = 0; i < npages; i++) {
+			set_page_dirty_lock(pages[i]);
+
+			/*
+			 * undo the reference we took when populating
+			 * the table.
+			 */
+			put_page(pages[i]);
+		}
+	}
+}
+
+int exynos_gem_map_sgt_with_dma(struct drm_device *drm_dev,
+				struct sg_table *sgt,
+				enum dma_data_direction dir)
+{
+	int nents;
+
+	mutex_lock(&drm_dev->struct_mutex);
+
+	nents = dma_map_sg(drm_dev->dev, sgt->sgl, sgt->nents, dir);
+	if (!nents) {
+		DRM_ERROR("failed to map sgl with dma.\n");
+		mutex_unlock(&drm_dev->struct_mutex);
+		return nents;
+	}
+
+	mutex_unlock(&drm_dev->struct_mutex);
+	return 0;
+}
+
+void exynos_gem_unmap_sgt_from_dma(struct drm_device *drm_dev,
+				struct sg_table *sgt,
+				enum dma_data_direction dir)
+{
+	dma_unmap_sg(drm_dev->dev, sgt->sgl, sgt->nents, dir);
+}
+
 int exynos_drm_gem_init_object(struct drm_gem_object *obj)
 {
 	DRM_DEBUG_KMS("%s\n", __FILE__);
@@ -753,9 +756,9 @@
 
 	mutex_lock(&dev->struct_mutex);
 
-	ret = exynos_drm_gem_map_pages(obj, vma, f_vaddr, page_offset);
+	ret = exynos_drm_gem_map_buf(obj, vma, f_vaddr, page_offset);
 	if (ret < 0)
-		DRM_ERROR("failed to map pages.\n");
+		DRM_ERROR("failed to map a buffer with user.\n");
 
 	mutex_unlock(&dev->struct_mutex);
 

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 085b2a5..f11f2af 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h

@@ -35,21 +35,27 @@
  * exynos drm gem buffer structure.
  *
  * @kvaddr: kernel virtual address to allocated memory region.
+ * *userptr: user space address.
  * @dma_addr: bus address(accessed by dma) to allocated memory region.
  *	- this address could be physical address without IOMMU and
  *	device address with IOMMU.
+ * @write: whether pages will be written to by the caller.
+ * @pages: Array of backing pages.
  * @sgt: sg table to transfer page data.
- * @pages: contain all pages to allocated memory region.
- * @page_size: could be 4K, 64K or 1MB.
  * @size: size of allocated memory region.
+ * @pfnmap: indicate whether memory region from userptr is mmaped with
+ *	VM_PFNMAP or not.
  */
 struct exynos_drm_gem_buf {
 	void __iomem		*kvaddr;
+	unsigned long		userptr;
 	dma_addr_t		dma_addr;
-	struct sg_table		*sgt;
+	struct dma_attrs	dma_attrs;
+	unsigned int		write;
 	struct page		**pages;
-	unsigned long		page_size;
+	struct sg_table		*sgt;
 	unsigned long		size;
+	bool			pfnmap;
 };
 
 /*
@@ -65,6 +71,7 @@
  *	or at framebuffer creation.
  * @size: size requested from user, in bytes and this size is aligned
  *	in page unit.
+ * @vma: a pointer to vm_area.
  * @flags: indicate memory type to allocated buffer and cache attruibute.
  *
  * P.S. this object would be transfered to user as kms_bo.handle so
@@ -74,6 +81,7 @@
 	struct drm_gem_object		base;
 	struct exynos_drm_gem_buf	*buffer;
 	unsigned long			size;
+	struct vm_area_struct		*vma;
 	unsigned int			flags;
 };
 
@@ -104,9 +112,9 @@
  * other drivers such as 2d/3d acceleration drivers.
  * with this function call, gem object reference count would be increased.
  */
-void *exynos_drm_gem_get_dma_addr(struct drm_device *dev,
+dma_addr_t *exynos_drm_gem_get_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
-					struct drm_file *file_priv);
+					struct drm_file *filp);
 
 /*
  * put dma address from gem handle and this function could be used for
@@ -115,7 +123,7 @@
  */
 void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
-					struct drm_file *file_priv);
+					struct drm_file *filp);
 
 /* get buffer offset to map to user space. */
 int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
@@ -128,6 +136,10 @@
 int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 
+/* map user space allocated by malloc to pages. */
+int exynos_drm_gem_userptr_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_priv);
+
 /* get buffer information to memory region allocated by gem. */
 int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_priv);
@@ -163,4 +175,36 @@
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
+static inline int vma_is_io(struct vm_area_struct *vma)
+{
+	return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
+}
+
+/* get a copy of a virtual memory region. */
+struct vm_area_struct *exynos_gem_get_vma(struct vm_area_struct *vma);
+
+/* release a userspace virtual memory area. */
+void exynos_gem_put_vma(struct vm_area_struct *vma);
+
+/* get pages from user space. */
+int exynos_gem_get_pages_from_userptr(unsigned long start,
+						unsigned int npages,
+						struct page **pages,
+						struct vm_area_struct *vma);
+
+/* drop the reference to pages. */
+void exynos_gem_put_pages_to_userptr(struct page **pages,
+					unsigned int npages,
+					struct vm_area_struct *vma);
+
+/* map sgt with dma region. */
+int exynos_gem_map_sgt_with_dma(struct drm_device *drm_dev,
+				struct sg_table *sgt,
+				enum dma_data_direction dir);
+
+/* unmap sgt from dma region. */
+void exynos_gem_unmap_sgt_from_dma(struct drm_device *drm_dev,
+				struct sg_table *sgt,
+				enum dma_data_direction dir);
+
 #endif

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
new file mode 100644
index 0000000..5639353
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c

@@ -0,0 +1,1870 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ * Authors:
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *	Jinyoung Jeon <jy0.jeon@samsung.com>
+ *	Sangmin Lee <lsmin.lee@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <plat/map-base.h>
+
+#include <drm/drmP.h>
+#include <drm/exynos_drm.h>
+#include "regs-gsc.h"
+#include "exynos_drm_ipp.h"
+#include "exynos_drm_gsc.h"
+
+/*
+ * GSC is stand for General SCaler and
+ * supports image scaler/rotator and input/output DMA operations.
+ * input DMA reads image data from the memory.
+ * output DMA writes image data to memory.
+ * GSC supports image rotation and image effect functions.
+ *
+ * M2M operation : supports crop/scale/rotation/csc so on.
+ * Memory ----> GSC H/W ----> Memory.
+ * Writeback operation : supports cloned screen with FIMD.
+ * FIMD ----> GSC H/W ----> Memory.
+ * Output operation : supports direct display using local path.
+ * Memory ----> GSC H/W ----> FIMD, Mixer.
+ */
+
+/*
+ * TODO
+ * 1. check suspend/resume api if needed.
+ * 2. need to check use case platform_device_id.
+ * 3. check src/dst size with, height.
+ * 4. added check_prepare api for right register.
+ * 5. need to add supported list in prop_list.
+ * 6. check prescaler/scaler optimization.
+ */
+
+#define GSC_MAX_DEVS	4
+#define GSC_MAX_SRC		4
+#define GSC_MAX_DST		16
+#define GSC_RESET_TIMEOUT	50
+#define GSC_BUF_STOP	1
+#define GSC_BUF_START	2
+#define GSC_REG_SZ		16
+#define GSC_WIDTH_ITU_709	1280
+#define GSC_SC_UP_MAX_RATIO		65536
+#define GSC_SC_DOWN_RATIO_7_8		74898
+#define GSC_SC_DOWN_RATIO_6_8		87381
+#define GSC_SC_DOWN_RATIO_5_8		104857
+#define GSC_SC_DOWN_RATIO_4_8		131072
+#define GSC_SC_DOWN_RATIO_3_8		174762
+#define GSC_SC_DOWN_RATIO_2_8		262144
+#define GSC_REFRESH_MIN	12
+#define GSC_REFRESH_MAX	60
+#define GSC_CROP_MAX	8192
+#define GSC_CROP_MIN	32
+#define GSC_SCALE_MAX	4224
+#define GSC_SCALE_MIN	32
+#define GSC_COEF_RATIO	7
+#define GSC_COEF_PHASE	9
+#define GSC_COEF_ATTR	16
+#define GSC_COEF_H_8T	8
+#define GSC_COEF_V_4T	4
+#define GSC_COEF_DEPTH	3
+
+#define get_gsc_context(dev)	platform_get_drvdata(to_platform_device(dev))
+#define get_ctx_from_ippdrv(ippdrv)	container_of(ippdrv,\
+					struct gsc_context, ippdrv);
+#define gsc_read(offset)		readl(ctx->regs + (offset))
+#define gsc_write(cfg, offset)	writel(cfg, ctx->regs + (offset))
+
+/*
+ * A structure of scaler.
+ *
+ * @range: narrow, wide.
+ * @pre_shfactor: pre sclaer shift factor.
+ * @pre_hratio: horizontal ratio of the prescaler.
+ * @pre_vratio: vertical ratio of the prescaler.
+ * @main_hratio: the main scaler's horizontal ratio.
+ * @main_vratio: the main scaler's vertical ratio.
+ */
+struct gsc_scaler {
+	bool	range;
+	u32	pre_shfactor;
+	u32	pre_hratio;
+	u32	pre_vratio;
+	unsigned long main_hratio;
+	unsigned long main_vratio;
+};
+
+/*
+ * A structure of scaler capability.
+ *
+ * find user manual 49.2 features.
+ * @tile_w: tile mode or rotation width.
+ * @tile_h: tile mode or rotation height.
+ * @w: other cases width.
+ * @h: other cases height.
+ */
+struct gsc_capability {
+	/* tile or rotation */
+	u32	tile_w;
+	u32	tile_h;
+	/* other cases */
+	u32	w;
+	u32	h;
+};
+
+/*
+ * A structure of gsc context.
+ *
+ * @ippdrv: prepare initialization using ippdrv.
+ * @regs_res: register resources.
+ * @regs: memory mapped io registers.
+ * @lock: locking of operations.
+ * @gsc_clk: gsc gate clock.
+ * @sc: scaler infomations.
+ * @id: gsc id.
+ * @irq: irq number.
+ * @rotation: supports rotation of src.
+ * @suspended: qos operations.
+ */
+struct gsc_context {
+	struct exynos_drm_ippdrv	ippdrv;
+	struct resource	*regs_res;
+	void __iomem	*regs;
+	struct mutex	lock;
+	struct clk	*gsc_clk;
+	struct gsc_scaler	sc;
+	int	id;
+	int	irq;
+	bool	rotation;
+	bool	suspended;
+};
+
+/* 8-tap Filter Coefficient */
+static const int h_coef_8t[GSC_COEF_RATIO][GSC_COEF_ATTR][GSC_COEF_H_8T] = {
+	{	/* Ratio <= 65536 (~8:8) */
+		{  0,  0,   0, 128,   0,   0,  0,  0 },
+		{ -1,  2,  -6, 127,   7,  -2,  1,  0 },
+		{ -1,  4, -12, 125,  16,  -5,  1,  0 },
+		{ -1,  5, -15, 120,  25,  -8,  2,  0 },
+		{ -1,  6, -18, 114,  35, -10,  3, -1 },
+		{ -1,  6, -20, 107,  46, -13,  4, -1 },
+		{ -2,  7, -21,  99,  57, -16,  5, -1 },
+		{ -1,  6, -20,  89,  68, -18,  5, -1 },
+		{ -1,  6, -20,  79,  79, -20,  6, -1 },
+		{ -1,  5, -18,  68,  89, -20,  6, -1 },
+		{ -1,  5, -16,  57,  99, -21,  7, -2 },
+		{ -1,  4, -13,  46, 107, -20,  6, -1 },
+		{ -1,  3, -10,  35, 114, -18,  6, -1 },
+		{  0,  2,  -8,  25, 120, -15,  5, -1 },
+		{  0,  1,  -5,  16, 125, -12,  4, -1 },
+		{  0,  1,  -2,   7, 127,  -6,  2, -1 }
+	}, {	/* 65536 < Ratio <= 74898 (~8:7) */
+		{  3, -8,  14, 111,  13,  -8,  3,  0 },
+		{  2, -6,   7, 112,  21, -10,  3, -1 },
+		{  2, -4,   1, 110,  28, -12,  4, -1 },
+		{  1, -2,  -3, 106,  36, -13,  4, -1 },
+		{  1, -1,  -7, 103,  44, -15,  4, -1 },
+		{  1,  1, -11,  97,  53, -16,  4, -1 },
+		{  0,  2, -13,  91,  61, -16,  4, -1 },
+		{  0,  3, -15,  85,  69, -17,  4, -1 },
+		{  0,  3, -16,  77,  77, -16,  3,  0 },
+		{ -1,  4, -17,  69,  85, -15,  3,  0 },
+		{ -1,  4, -16,  61,  91, -13,  2,  0 },
+		{ -1,  4, -16,  53,  97, -11,  1,  1 },
+		{ -1,  4, -15,  44, 103,  -7, -1,  1 },
+		{ -1,  4, -13,  36, 106,  -3, -2,  1 },
+		{ -1,  4, -12,  28, 110,   1, -4,  2 },
+		{ -1,  3, -10,  21, 112,   7, -6,  2 }
+	}, {	/* 74898 < Ratio <= 87381 (~8:6) */
+		{ 2, -11,  25,  96, 25, -11,   2,  0 },
+		{ 2, -10,  19,  96, 31, -12,   2,  0 },
+		{ 2,  -9,  14,  94, 37, -12,   2,  0 },
+		{ 2,  -8,  10,  92, 43, -12,   1,  0 },
+		{ 2,  -7,   5,  90, 49, -12,   1,  0 },
+		{ 2,  -5,   1,  86, 55, -12,   0,  1 },
+		{ 2,  -4,  -2,  82, 61, -11,  -1,  1 },
+		{ 1,  -3,  -5,  77, 67,  -9,  -1,  1 },
+		{ 1,  -2,  -7,  72, 72,  -7,  -2,  1 },
+		{ 1,  -1,  -9,  67, 77,  -5,  -3,  1 },
+		{ 1,  -1, -11,  61, 82,  -2,  -4,  2 },
+		{ 1,   0, -12,  55, 86,   1,  -5,  2 },
+		{ 0,   1, -12,  49, 90,   5,  -7,  2 },
+		{ 0,   1, -12,  43, 92,  10,  -8,  2 },
+		{ 0,   2, -12,  37, 94,  14,  -9,  2 },
+		{ 0,   2, -12,  31, 96,  19, -10,  2 }
+	}, {	/* 87381 < Ratio <= 104857 (~8:5) */
+		{ -1,  -8, 33,  80, 33,  -8,  -1,  0 },
+		{ -1,  -8, 28,  80, 37,  -7,  -2,  1 },
+		{  0,  -8, 24,  79, 41,  -7,  -2,  1 },
+		{  0,  -8, 20,  78, 46,  -6,  -3,  1 },
+		{  0,  -8, 16,  76, 50,  -4,  -3,  1 },
+		{  0,  -7, 13,  74, 54,  -3,  -4,  1 },
+		{  1,  -7, 10,  71, 58,  -1,  -5,  1 },
+		{  1,  -6,  6,  68, 62,   1,  -5,  1 },
+		{  1,  -6,  4,  65, 65,   4,  -6,  1 },
+		{  1,  -5,  1,  62, 68,   6,  -6,  1 },
+		{  1,  -5, -1,  58, 71,  10,  -7,  1 },
+		{  1,  -4, -3,  54, 74,  13,  -7,  0 },
+		{  1,  -3, -4,  50, 76,  16,  -8,  0 },
+		{  1,  -3, -6,  46, 78,  20,  -8,  0 },
+		{  1,  -2, -7,  41, 79,  24,  -8,  0 },
+		{  1,  -2, -7,  37, 80,  28,  -8, -1 }
+	}, {	/* 104857 < Ratio <= 131072 (~8:4) */
+		{ -3,   0, 35,  64, 35,   0,  -3,  0 },
+		{ -3,  -1, 32,  64, 38,   1,  -3,  0 },
+		{ -2,  -2, 29,  63, 41,   2,  -3,  0 },
+		{ -2,  -3, 27,  63, 43,   4,  -4,  0 },
+		{ -2,  -3, 24,  61, 46,   6,  -4,  0 },
+		{ -2,  -3, 21,  60, 49,   7,  -4,  0 },
+		{ -1,  -4, 19,  59, 51,   9,  -4, -1 },
+		{ -1,  -4, 16,  57, 53,  12,  -4, -1 },
+		{ -1,  -4, 14,  55, 55,  14,  -4, -1 },
+		{ -1,  -4, 12,  53, 57,  16,  -4, -1 },
+		{ -1,  -4,  9,  51, 59,  19,  -4, -1 },
+		{  0,  -4,  7,  49, 60,  21,  -3, -2 },
+		{  0,  -4,  6,  46, 61,  24,  -3, -2 },
+		{  0,  -4,  4,  43, 63,  27,  -3, -2 },
+		{  0,  -3,  2,  41, 63,  29,  -2, -2 },
+		{  0,  -3,  1,  38, 64,  32,  -1, -3 }
+	}, {	/* 131072 < Ratio <= 174762 (~8:3) */
+		{ -1,   8, 33,  48, 33,   8,  -1,  0 },
+		{ -1,   7, 31,  49, 35,   9,  -1, -1 },
+		{ -1,   6, 30,  49, 36,  10,  -1, -1 },
+		{ -1,   5, 28,  48, 38,  12,  -1, -1 },
+		{ -1,   4, 26,  48, 39,  13,   0, -1 },
+		{ -1,   3, 24,  47, 41,  15,   0, -1 },
+		{ -1,   2, 23,  47, 42,  16,   0, -1 },
+		{ -1,   2, 21,  45, 43,  18,   1, -1 },
+		{ -1,   1, 19,  45, 45,  19,   1, -1 },
+		{ -1,   1, 18,  43, 45,  21,   2, -1 },
+		{ -1,   0, 16,  42, 47,  23,   2, -1 },
+		{ -1,   0, 15,  41, 47,  24,   3, -1 },
+		{ -1,   0, 13,  39, 48,  26,   4, -1 },
+		{ -1,  -1, 12,  38, 48,  28,   5, -1 },
+		{ -1,  -1, 10,  36, 49,  30,   6, -1 },
+		{ -1,  -1,  9,  35, 49,  31,   7, -1 }
+	}, {	/* 174762 < Ratio <= 262144 (~8:2) */
+		{  2,  13, 30,  38, 30,  13,   2,  0 },
+		{  2,  12, 29,  38, 30,  14,   3,  0 },
+		{  2,  11, 28,  38, 31,  15,   3,  0 },
+		{  2,  10, 26,  38, 32,  16,   4,  0 },
+		{  1,  10, 26,  37, 33,  17,   4,  0 },
+		{  1,   9, 24,  37, 34,  18,   5,  0 },
+		{  1,   8, 24,  37, 34,  19,   5,  0 },
+		{  1,   7, 22,  36, 35,  20,   6,  1 },
+		{  1,   6, 21,  36, 36,  21,   6,  1 },
+		{  1,   6, 20,  35, 36,  22,   7,  1 },
+		{  0,   5, 19,  34, 37,  24,   8,  1 },
+		{  0,   5, 18,  34, 37,  24,   9,  1 },
+		{  0,   4, 17,  33, 37,  26,  10,  1 },
+		{  0,   4, 16,  32, 38,  26,  10,  2 },
+		{  0,   3, 15,  31, 38,  28,  11,  2 },
+		{  0,   3, 14,  30, 38,  29,  12,  2 }
+	}
+};
+
+/* 4-tap Filter Coefficient */
+static const int v_coef_4t[GSC_COEF_RATIO][GSC_COEF_ATTR][GSC_COEF_V_4T] = {
+	{	/* Ratio <= 65536 (~8:8) */
+		{  0, 128,   0,  0 },
+		{ -4, 127,   5,  0 },
+		{ -6, 124,  11, -1 },
+		{ -8, 118,  19, -1 },
+		{ -8, 111,  27, -2 },
+		{ -8, 102,  37, -3 },
+		{ -8,  92,  48, -4 },
+		{ -7,  81,  59, -5 },
+		{ -6,  70,  70, -6 },
+		{ -5,  59,  81, -7 },
+		{ -4,  48,  92, -8 },
+		{ -3,  37, 102, -8 },
+		{ -2,  27, 111, -8 },
+		{ -1,  19, 118, -8 },
+		{ -1,  11, 124, -6 },
+		{  0,   5, 127, -4 }
+	}, {	/* 65536 < Ratio <= 74898 (~8:7) */
+		{  8, 112,   8,  0 },
+		{  4, 111,  14, -1 },
+		{  1, 109,  20, -2 },
+		{ -2, 105,  27, -2 },
+		{ -3, 100,  34, -3 },
+		{ -5,  93,  43, -3 },
+		{ -5,  86,  51, -4 },
+		{ -5,  77,  60, -4 },
+		{ -5,  69,  69, -5 },
+		{ -4,  60,  77, -5 },
+		{ -4,  51,  86, -5 },
+		{ -3,  43,  93, -5 },
+		{ -3,  34, 100, -3 },
+		{ -2,  27, 105, -2 },
+		{ -2,  20, 109,  1 },
+		{ -1,  14, 111,  4 }
+	}, {	/* 74898 < Ratio <= 87381 (~8:6) */
+		{ 16,  96,  16,  0 },
+		{ 12,  97,  21, -2 },
+		{  8,  96,  26, -2 },
+		{  5,  93,  32, -2 },
+		{  2,  89,  39, -2 },
+		{  0,  84,  46, -2 },
+		{ -1,  79,  53, -3 },
+		{ -2,  73,  59, -2 },
+		{ -2,  66,  66, -2 },
+		{ -2,  59,  73, -2 },
+		{ -3,  53,  79, -1 },
+		{ -2,  46,  84,  0 },
+		{ -2,  39,  89,  2 },
+		{ -2,  32,  93,  5 },
+		{ -2,  26,  96,  8 },
+		{ -2,  21,  97, 12 }
+	}, {	/* 87381 < Ratio <= 104857 (~8:5) */
+		{ 22,  84,  22,  0 },
+		{ 18,  85,  26, -1 },
+		{ 14,  84,  31, -1 },
+		{ 11,  82,  36, -1 },
+		{  8,  79,  42, -1 },
+		{  6,  76,  47, -1 },
+		{  4,  72,  52,  0 },
+		{  2,  68,  58,  0 },
+		{  1,  63,  63,  1 },
+		{  0,  58,  68,  2 },
+		{  0,  52,  72,  4 },
+		{ -1,  47,  76,  6 },
+		{ -1,  42,  79,  8 },
+		{ -1,  36,  82, 11 },
+		{ -1,  31,  84, 14 },
+		{ -1,  26,  85, 18 }
+	}, {	/* 104857 < Ratio <= 131072 (~8:4) */
+		{ 26,  76,  26,  0 },
+		{ 22,  76,  30,  0 },
+		{ 19,  75,  34,  0 },
+		{ 16,  73,  38,  1 },
+		{ 13,  71,  43,  1 },
+		{ 10,  69,  47,  2 },
+		{  8,  66,  51,  3 },
+		{  6,  63,  55,  4 },
+		{  5,  59,  59,  5 },
+		{  4,  55,  63,  6 },
+		{  3,  51,  66,  8 },
+		{  2,  47,  69, 10 },
+		{  1,  43,  71, 13 },
+		{  1,  38,  73, 16 },
+		{  0,  34,  75, 19 },
+		{  0,  30,  76, 22 }
+	}, {	/* 131072 < Ratio <= 174762 (~8:3) */
+		{ 29,  70,  29,  0 },
+		{ 26,  68,  32,  2 },
+		{ 23,  67,  36,  2 },
+		{ 20,  66,  39,  3 },
+		{ 17,  65,  43,  3 },
+		{ 15,  63,  46,  4 },
+		{ 12,  61,  50,  5 },
+		{ 10,  58,  53,  7 },
+		{  8,  56,  56,  8 },
+		{  7,  53,  58, 10 },
+		{  5,  50,  61, 12 },
+		{  4,  46,  63, 15 },
+		{  3,  43,  65, 17 },
+		{  3,  39,  66, 20 },
+		{  2,  36,  67, 23 },
+		{  2,  32,  68, 26 }
+	}, {	/* 174762 < Ratio <= 262144 (~8:2) */
+		{ 32,  64,  32,  0 },
+		{ 28,  63,  34,  3 },
+		{ 25,  62,  37,  4 },
+		{ 22,  62,  40,  4 },
+		{ 19,  61,  43,  5 },
+		{ 17,  59,  46,  6 },
+		{ 15,  58,  48,  7 },
+		{ 13,  55,  51,  9 },
+		{ 11,  53,  53, 11 },
+		{  9,  51,  55, 13 },
+		{  7,  48,  58, 15 },
+		{  6,  46,  59, 17 },
+		{  5,  43,  61, 19 },
+		{  4,  40,  62, 22 },
+		{  4,  37,  62, 25 },
+		{  3,  34,  63, 28 }
+	}
+};
+
+static int gsc_sw_reset(struct gsc_context *ctx)
+{
+	u32 cfg;
+	int count = GSC_RESET_TIMEOUT;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	/* s/w reset */
+	cfg = (GSC_SW_RESET_SRESET);
+	gsc_write(cfg, GSC_SW_RESET);
+
+	/* wait s/w reset complete */
+	while (count--) {
+		cfg = gsc_read(GSC_SW_RESET);
+		if (!cfg)
+			break;
+		usleep_range(1000, 2000);
+	}
+
+	if (cfg) {
+		DRM_ERROR("failed to reset gsc h/w.\n");
+		return -EBUSY;
+	}
+
+	/* reset sequence */
+	cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK);
+	cfg |= (GSC_IN_BASE_ADDR_MASK |
+		GSC_IN_BASE_ADDR_PINGPONG(0));
+	gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK);
+	gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK);
+	gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK);
+
+	cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK);
+	cfg |= (GSC_OUT_BASE_ADDR_MASK |
+		GSC_OUT_BASE_ADDR_PINGPONG(0));
+	gsc_write(cfg, GSC_OUT_BASE_ADDR_Y_MASK);
+	gsc_write(cfg, GSC_OUT_BASE_ADDR_CB_MASK);
+	gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK);
+
+	return 0;
+}
+
+static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable)
+{
+	u32 gscblk_cfg;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	gscblk_cfg = readl(SYSREG_GSCBLK_CFG1);
+
+	if (enable)
+		gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) |
+				GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) |
+				GSC_BLK_SW_RESET_WB_DEST(ctx->id);
+	else
+		gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id);
+
+	writel(gscblk_cfg, SYSREG_GSCBLK_CFG1);
+}
+
+static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
+		bool overflow, bool done)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:enable[%d]overflow[%d]level[%d]\n", __func__,
+			enable, overflow, done);
+
+	cfg = gsc_read(GSC_IRQ);
+	cfg |= (GSC_IRQ_OR_MASK | GSC_IRQ_FRMDONE_MASK);
+
+	if (enable)
+		cfg |= GSC_IRQ_ENABLE;
+	else
+		cfg &= ~GSC_IRQ_ENABLE;
+
+	if (overflow)
+		cfg &= ~GSC_IRQ_OR_MASK;
+	else
+		cfg |= GSC_IRQ_OR_MASK;
+
+	if (done)
+		cfg &= ~GSC_IRQ_FRMDONE_MASK;
+	else
+		cfg |= GSC_IRQ_FRMDONE_MASK;
+
+	gsc_write(cfg, GSC_IRQ);
+}
+
+
+static int gsc_src_set_fmt(struct device *dev, u32 fmt)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:fmt[0x%x]\n", __func__, fmt);
+
+	cfg = gsc_read(GSC_IN_CON);
+	cfg &= ~(GSC_IN_RGB_TYPE_MASK | GSC_IN_YUV422_1P_ORDER_MASK |
+		 GSC_IN_CHROMA_ORDER_MASK | GSC_IN_FORMAT_MASK |
+		 GSC_IN_TILE_TYPE_MASK | GSC_IN_TILE_MODE |
+		 GSC_IN_CHROM_STRIDE_SEL_MASK | GSC_IN_RB_SWAP_MASK);
+
+	switch (fmt) {
+	case DRM_FORMAT_RGB565:
+		cfg |= GSC_IN_RGB565;
+		break;
+	case DRM_FORMAT_XRGB8888:
+		cfg |= GSC_IN_XRGB8888;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		cfg |= (GSC_IN_XRGB8888 | GSC_IN_RB_SWAP);
+		break;
+	case DRM_FORMAT_YUYV:
+		cfg |= (GSC_IN_YUV422_1P |
+			GSC_IN_YUV422_1P_ORDER_LSB_Y |
+			GSC_IN_CHROMA_ORDER_CBCR);
+		break;
+	case DRM_FORMAT_YVYU:
+		cfg |= (GSC_IN_YUV422_1P |
+			GSC_IN_YUV422_1P_ORDER_LSB_Y |
+			GSC_IN_CHROMA_ORDER_CRCB);
+		break;
+	case DRM_FORMAT_UYVY:
+		cfg |= (GSC_IN_YUV422_1P |
+			GSC_IN_YUV422_1P_OEDER_LSB_C |
+			GSC_IN_CHROMA_ORDER_CBCR);
+		break;
+	case DRM_FORMAT_VYUY:
+		cfg |= (GSC_IN_YUV422_1P |
+			GSC_IN_YUV422_1P_OEDER_LSB_C |
+			GSC_IN_CHROMA_ORDER_CRCB);
+		break;
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV61:
+		cfg |= (GSC_IN_CHROMA_ORDER_CRCB |
+			GSC_IN_YUV420_2P);
+		break;
+	case DRM_FORMAT_YUV422:
+		cfg |= GSC_IN_YUV422_3P;
+		break;
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		cfg |= GSC_IN_YUV420_3P;
+		break;
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV16:
+		cfg |= (GSC_IN_CHROMA_ORDER_CBCR |
+			GSC_IN_YUV420_2P);
+		break;
+	case DRM_FORMAT_NV12MT:
+		cfg |= (GSC_IN_TILE_C_16x8 | GSC_IN_TILE_MODE);
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid target yuv order 0x%x.\n", fmt);
+		return -EINVAL;
+	}
+
+	gsc_write(cfg, GSC_IN_CON);
+
+	return 0;
+}
+
+static int gsc_src_set_transf(struct device *dev,
+		enum drm_exynos_degree degree,
+		enum drm_exynos_flip flip, bool *swap)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:degree[%d]flip[0x%x]\n", __func__,
+		degree, flip);
+
+	cfg = gsc_read(GSC_IN_CON);
+	cfg &= ~GSC_IN_ROT_MASK;
+
+	switch (degree) {
+	case EXYNOS_DRM_DEGREE_0:
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg |= GSC_IN_ROT_XFLIP;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg |= GSC_IN_ROT_YFLIP;
+		break;
+	case EXYNOS_DRM_DEGREE_90:
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg |= GSC_IN_ROT_90_XFLIP;
+		else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg |= GSC_IN_ROT_90_YFLIP;
+		else
+			cfg |= GSC_IN_ROT_90;
+		break;
+	case EXYNOS_DRM_DEGREE_180:
+		cfg |= GSC_IN_ROT_180;
+		break;
+	case EXYNOS_DRM_DEGREE_270:
+		cfg |= GSC_IN_ROT_270;
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid degree value %d.\n", degree);
+		return -EINVAL;
+	}
+
+	gsc_write(cfg, GSC_IN_CON);
+
+	ctx->rotation = cfg &
+		(GSC_IN_ROT_90 | GSC_IN_ROT_270) ? 1 : 0;
+	*swap = ctx->rotation;
+
+	return 0;
+}
+
+static int gsc_src_set_size(struct device *dev, int swap,
+		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct drm_exynos_pos img_pos = *pos;
+	struct gsc_scaler *sc = &ctx->sc;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
+		__func__, swap, pos->x, pos->y, pos->w, pos->h);
+
+	if (swap) {
+		img_pos.w = pos->h;
+		img_pos.h = pos->w;
+	}
+
+	/* pixel offset */
+	cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) |
+		GSC_SRCIMG_OFFSET_Y(img_pos.y));
+	gsc_write(cfg, GSC_SRCIMG_OFFSET);
+
+	/* cropped size */
+	cfg = (GSC_CROPPED_WIDTH(img_pos.w) |
+		GSC_CROPPED_HEIGHT(img_pos.h));
+	gsc_write(cfg, GSC_CROPPED_SIZE);
+
+	DRM_DEBUG_KMS("%s:hsize[%d]vsize[%d]\n",
+		__func__, sz->hsize, sz->vsize);
+
+	/* original size */
+	cfg = gsc_read(GSC_SRCIMG_SIZE);
+	cfg &= ~(GSC_SRCIMG_HEIGHT_MASK |
+		GSC_SRCIMG_WIDTH_MASK);
+
+	cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) |
+		GSC_SRCIMG_HEIGHT(sz->vsize));
+
+	gsc_write(cfg, GSC_SRCIMG_SIZE);
+
+	cfg = gsc_read(GSC_IN_CON);
+	cfg &= ~GSC_IN_RGB_TYPE_MASK;
+
+	DRM_DEBUG_KMS("%s:width[%d]range[%d]\n",
+		__func__, pos->w, sc->range);
+
+	if (pos->w >= GSC_WIDTH_ITU_709)
+		if (sc->range)
+			cfg |= GSC_IN_RGB_HD_WIDE;
+		else
+			cfg |= GSC_IN_RGB_HD_NARROW;
+	else
+		if (sc->range)
+			cfg |= GSC_IN_RGB_SD_WIDE;
+		else
+			cfg |= GSC_IN_RGB_SD_NARROW;
+
+	gsc_write(cfg, GSC_IN_CON);
+
+	return 0;
+}
+
+static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	bool masked;
+	u32 cfg;
+	u32 mask = 0x00000001 << buf_id;
+
+	DRM_DEBUG_KMS("%s:buf_id[%d]buf_type[%d]\n", __func__,
+		buf_id, buf_type);
+
+	/* mask register set */
+	cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK);
+
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		masked = false;
+		break;
+	case IPP_BUF_DEQUEUE:
+		masked = true;
+		break;
+	default:
+		dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
+		return -EINVAL;
+	}
+
+	/* sequence id */
+	cfg &= ~mask;
+	cfg |= masked << buf_id;
+	gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK);
+	gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK);
+	gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK);
+
+	return 0;
+}
+
+static int gsc_src_set_addr(struct device *dev,
+		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_property *property;
+
+	if (!c_node) {
+		DRM_ERROR("failed to get c_node.\n");
+		return -EFAULT;
+	}
+
+	property = &c_node->property;
+	if (!property) {
+		DRM_ERROR("failed to get property.\n");
+		return -EFAULT;
+	}
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]buf_id[%d]buf_type[%d]\n", __func__,
+		property->prop_id, buf_id, buf_type);
+
+	if (buf_id > GSC_MAX_SRC) {
+		dev_info(ippdrv->dev, "inavlid buf_id %d.\n", buf_id);
+		return -EINVAL;
+	}
+
+	/* address register set */
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
+			GSC_IN_BASE_ADDR_Y(buf_id));
+		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+			GSC_IN_BASE_ADDR_CB(buf_id));
+		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+			GSC_IN_BASE_ADDR_CR(buf_id));
+		break;
+	case IPP_BUF_DEQUEUE:
+		gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id));
+		gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id));
+		gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id));
+		break;
+	default:
+		/* bypass */
+		break;
+	}
+
+	return gsc_src_set_buf_seq(ctx, buf_id, buf_type);
+}
+
+static struct exynos_drm_ipp_ops gsc_src_ops = {
+	.set_fmt = gsc_src_set_fmt,
+	.set_transf = gsc_src_set_transf,
+	.set_size = gsc_src_set_size,
+	.set_addr = gsc_src_set_addr,
+};
+
+static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:fmt[0x%x]\n", __func__, fmt);
+
+	cfg = gsc_read(GSC_OUT_CON);
+	cfg &= ~(GSC_OUT_RGB_TYPE_MASK | GSC_OUT_YUV422_1P_ORDER_MASK |
+		 GSC_OUT_CHROMA_ORDER_MASK | GSC_OUT_FORMAT_MASK |
+		 GSC_OUT_CHROM_STRIDE_SEL_MASK | GSC_OUT_RB_SWAP_MASK |
+		 GSC_OUT_GLOBAL_ALPHA_MASK);
+
+	switch (fmt) {
+	case DRM_FORMAT_RGB565:
+		cfg |= GSC_OUT_RGB565;
+		break;
+	case DRM_FORMAT_XRGB8888:
+		cfg |= GSC_OUT_XRGB8888;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP);
+		break;
+	case DRM_FORMAT_YUYV:
+		cfg |= (GSC_OUT_YUV422_1P |
+			GSC_OUT_YUV422_1P_ORDER_LSB_Y |
+			GSC_OUT_CHROMA_ORDER_CBCR);
+		break;
+	case DRM_FORMAT_YVYU:
+		cfg |= (GSC_OUT_YUV422_1P |
+			GSC_OUT_YUV422_1P_ORDER_LSB_Y |
+			GSC_OUT_CHROMA_ORDER_CRCB);
+		break;
+	case DRM_FORMAT_UYVY:
+		cfg |= (GSC_OUT_YUV422_1P |
+			GSC_OUT_YUV422_1P_OEDER_LSB_C |
+			GSC_OUT_CHROMA_ORDER_CBCR);
+		break;
+	case DRM_FORMAT_VYUY:
+		cfg |= (GSC_OUT_YUV422_1P |
+			GSC_OUT_YUV422_1P_OEDER_LSB_C |
+			GSC_OUT_CHROMA_ORDER_CRCB);
+		break;
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV61:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_2P);
+		break;
+	case DRM_FORMAT_YUV422:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		cfg |= GSC_OUT_YUV420_3P;
+		break;
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV16:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR |
+			GSC_OUT_YUV420_2P);
+		break;
+	case DRM_FORMAT_NV12MT:
+		cfg |= (GSC_OUT_TILE_C_16x8 | GSC_OUT_TILE_MODE);
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid target yuv order 0x%x.\n", fmt);
+		return -EINVAL;
+	}
+
+	gsc_write(cfg, GSC_OUT_CON);
+
+	return 0;
+}
+
+static int gsc_dst_set_transf(struct device *dev,
+		enum drm_exynos_degree degree,
+		enum drm_exynos_flip flip, bool *swap)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:degree[%d]flip[0x%x]\n", __func__,
+		degree, flip);
+
+	cfg = gsc_read(GSC_IN_CON);
+	cfg &= ~GSC_IN_ROT_MASK;
+
+	switch (degree) {
+	case EXYNOS_DRM_DEGREE_0:
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg |= GSC_IN_ROT_XFLIP;
+		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg |= GSC_IN_ROT_YFLIP;
+		break;
+	case EXYNOS_DRM_DEGREE_90:
+		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+			cfg |= GSC_IN_ROT_90_XFLIP;
+		else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+			cfg |= GSC_IN_ROT_90_YFLIP;
+		else
+			cfg |= GSC_IN_ROT_90;
+		break;
+	case EXYNOS_DRM_DEGREE_180:
+		cfg |= GSC_IN_ROT_180;
+		break;
+	case EXYNOS_DRM_DEGREE_270:
+		cfg |= GSC_IN_ROT_270;
+		break;
+	default:
+		dev_err(ippdrv->dev, "inavlid degree value %d.\n", degree);
+		return -EINVAL;
+	}
+
+	gsc_write(cfg, GSC_IN_CON);
+
+	ctx->rotation = cfg &
+		(GSC_IN_ROT_90 | GSC_IN_ROT_270) ? 1 : 0;
+	*swap = ctx->rotation;
+
+	return 0;
+}
+
+static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio)
+{
+	DRM_DEBUG_KMS("%s:src[%d]dst[%d]\n", __func__, src, dst);
+
+	if (src >= dst * 8) {
+		DRM_ERROR("failed to make ratio and shift.\n");
+		return -EINVAL;
+	} else if (src >= dst * 4)
+		*ratio = 4;
+	else if (src >= dst * 2)
+		*ratio = 2;
+	else
+		*ratio = 1;
+
+	return 0;
+}
+
+static void gsc_get_prescaler_shfactor(u32 hratio, u32 vratio, u32 *shfactor)
+{
+	if (hratio == 4 && vratio == 4)
+		*shfactor = 4;
+	else if ((hratio == 4 && vratio == 2) ||
+		 (hratio == 2 && vratio == 4))
+		*shfactor = 3;
+	else if ((hratio == 4 && vratio == 1) ||
+		 (hratio == 1 && vratio == 4) ||
+		 (hratio == 2 && vratio == 2))
+		*shfactor = 2;
+	else if (hratio == 1 && vratio == 1)
+		*shfactor = 0;
+	else
+		*shfactor = 1;
+}
+
+static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc,
+		struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	u32 cfg;
+	u32 src_w, src_h, dst_w, dst_h;
+	int ret = 0;
+
+	src_w = src->w;
+	src_h = src->h;
+
+	if (ctx->rotation) {
+		dst_w = dst->h;
+		dst_h = dst->w;
+	} else {
+		dst_w = dst->w;
+		dst_h = dst->h;
+	}
+
+	ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio);
+	if (ret) {
+		dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+		return ret;
+	}
+
+	ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio);
+	if (ret) {
+		dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+		return ret;
+	}
+
+	DRM_DEBUG_KMS("%s:pre_hratio[%d]pre_vratio[%d]\n",
+		__func__, sc->pre_hratio, sc->pre_vratio);
+
+	sc->main_hratio = (src_w << 16) / dst_w;
+	sc->main_vratio = (src_h << 16) / dst_h;
+
+	DRM_DEBUG_KMS("%s:main_hratio[%ld]main_vratio[%ld]\n",
+		__func__, sc->main_hratio, sc->main_vratio);
+
+	gsc_get_prescaler_shfactor(sc->pre_hratio, sc->pre_vratio,
+		&sc->pre_shfactor);
+
+	DRM_DEBUG_KMS("%s:pre_shfactor[%d]\n", __func__,
+		sc->pre_shfactor);
+
+	cfg = (GSC_PRESC_SHFACTOR(sc->pre_shfactor) |
+		GSC_PRESC_H_RATIO(sc->pre_hratio) |
+		GSC_PRESC_V_RATIO(sc->pre_vratio));
+	gsc_write(cfg, GSC_PRE_SCALE_RATIO);
+
+	return ret;
+}
+
+static void gsc_set_h_coef(struct gsc_context *ctx, unsigned long main_hratio)
+{
+	int i, j, k, sc_ratio;
+
+	if (main_hratio <= GSC_SC_UP_MAX_RATIO)
+		sc_ratio = 0;
+	else if (main_hratio <= GSC_SC_DOWN_RATIO_7_8)
+		sc_ratio = 1;
+	else if (main_hratio <= GSC_SC_DOWN_RATIO_6_8)
+		sc_ratio = 2;
+	else if (main_hratio <= GSC_SC_DOWN_RATIO_5_8)
+		sc_ratio = 3;
+	else if (main_hratio <= GSC_SC_DOWN_RATIO_4_8)
+		sc_ratio = 4;
+	else if (main_hratio <= GSC_SC_DOWN_RATIO_3_8)
+		sc_ratio = 5;
+	else
+		sc_ratio = 6;
+
+	for (i = 0; i < GSC_COEF_PHASE; i++)
+		for (j = 0; j < GSC_COEF_H_8T; j++)
+			for (k = 0; k < GSC_COEF_DEPTH; k++)
+				gsc_write(h_coef_8t[sc_ratio][i][j],
+					GSC_HCOEF(i, j, k));
+}
+
+static void gsc_set_v_coef(struct gsc_context *ctx, unsigned long main_vratio)
+{
+	int i, j, k, sc_ratio;
+
+	if (main_vratio <= GSC_SC_UP_MAX_RATIO)
+		sc_ratio = 0;
+	else if (main_vratio <= GSC_SC_DOWN_RATIO_7_8)
+		sc_ratio = 1;
+	else if (main_vratio <= GSC_SC_DOWN_RATIO_6_8)
+		sc_ratio = 2;
+	else if (main_vratio <= GSC_SC_DOWN_RATIO_5_8)
+		sc_ratio = 3;
+	else if (main_vratio <= GSC_SC_DOWN_RATIO_4_8)
+		sc_ratio = 4;
+	else if (main_vratio <= GSC_SC_DOWN_RATIO_3_8)
+		sc_ratio = 5;
+	else
+		sc_ratio = 6;
+
+	for (i = 0; i < GSC_COEF_PHASE; i++)
+		for (j = 0; j < GSC_COEF_V_4T; j++)
+			for (k = 0; k < GSC_COEF_DEPTH; k++)
+				gsc_write(v_coef_4t[sc_ratio][i][j],
+					GSC_VCOEF(i, j, k));
+}
+
+static void gsc_set_scaler(struct gsc_context *ctx, struct gsc_scaler *sc)
+{
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:main_hratio[%ld]main_vratio[%ld]\n",
+		__func__, sc->main_hratio, sc->main_vratio);
+
+	gsc_set_h_coef(ctx, sc->main_hratio);
+	cfg = GSC_MAIN_H_RATIO_VALUE(sc->main_hratio);
+	gsc_write(cfg, GSC_MAIN_H_RATIO);
+
+	gsc_set_v_coef(ctx, sc->main_vratio);
+	cfg = GSC_MAIN_V_RATIO_VALUE(sc->main_vratio);
+	gsc_write(cfg, GSC_MAIN_V_RATIO);
+}
+
+static int gsc_dst_set_size(struct device *dev, int swap,
+		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct drm_exynos_pos img_pos = *pos;
+	struct gsc_scaler *sc = &ctx->sc;
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
+		__func__, swap, pos->x, pos->y, pos->w, pos->h);
+
+	if (swap) {
+		img_pos.w = pos->h;
+		img_pos.h = pos->w;
+	}
+
+	/* pixel offset */
+	cfg = (GSC_DSTIMG_OFFSET_X(pos->x) |
+		GSC_DSTIMG_OFFSET_Y(pos->y));
+	gsc_write(cfg, GSC_DSTIMG_OFFSET);
+
+	/* scaled size */
+	cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h));
+	gsc_write(cfg, GSC_SCALED_SIZE);
+
+	DRM_DEBUG_KMS("%s:hsize[%d]vsize[%d]\n",
+		__func__, sz->hsize, sz->vsize);
+
+	/* original size */
+	cfg = gsc_read(GSC_DSTIMG_SIZE);
+	cfg &= ~(GSC_DSTIMG_HEIGHT_MASK |
+		GSC_DSTIMG_WIDTH_MASK);
+	cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) |
+		GSC_DSTIMG_HEIGHT(sz->vsize));
+	gsc_write(cfg, GSC_DSTIMG_SIZE);
+
+	cfg = gsc_read(GSC_OUT_CON);
+	cfg &= ~GSC_OUT_RGB_TYPE_MASK;
+
+	DRM_DEBUG_KMS("%s:width[%d]range[%d]\n",
+		__func__, pos->w, sc->range);
+
+	if (pos->w >= GSC_WIDTH_ITU_709)
+		if (sc->range)
+			cfg |= GSC_OUT_RGB_HD_WIDE;
+		else
+			cfg |= GSC_OUT_RGB_HD_NARROW;
+	else
+		if (sc->range)
+			cfg |= GSC_OUT_RGB_SD_WIDE;
+		else
+			cfg |= GSC_OUT_RGB_SD_NARROW;
+
+	gsc_write(cfg, GSC_OUT_CON);
+
+	return 0;
+}
+
+static int gsc_dst_get_buf_seq(struct gsc_context *ctx)
+{
+	u32 cfg, i, buf_num = GSC_REG_SZ;
+	u32 mask = 0x00000001;
+
+	cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK);
+
+	for (i = 0; i < GSC_REG_SZ; i++)
+		if (cfg & (mask << i))
+			buf_num--;
+
+	DRM_DEBUG_KMS("%s:buf_num[%d]\n", __func__, buf_num);
+
+	return buf_num;
+}
+
+static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	bool masked;
+	u32 cfg;
+	u32 mask = 0x00000001 << buf_id;
+	int ret = 0;
+
+	DRM_DEBUG_KMS("%s:buf_id[%d]buf_type[%d]\n", __func__,
+		buf_id, buf_type);
+
+	mutex_lock(&ctx->lock);
+
+	/* mask register set */
+	cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK);
+
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		masked = false;
+		break;
+	case IPP_BUF_DEQUEUE:
+		masked = true;
+		break;
+	default:
+		dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
+		ret =  -EINVAL;
+		goto err_unlock;
+	}
+
+	/* sequence id */
+	cfg &= ~mask;
+	cfg |= masked << buf_id;
+	gsc_write(cfg, GSC_OUT_BASE_ADDR_Y_MASK);
+	gsc_write(cfg, GSC_OUT_BASE_ADDR_CB_MASK);
+	gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK);
+
+	/* interrupt enable */
+	if (buf_type == IPP_BUF_ENQUEUE &&
+	    gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START)
+		gsc_handle_irq(ctx, true, false, true);
+
+	/* interrupt disable */
+	if (buf_type == IPP_BUF_DEQUEUE &&
+	    gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP)
+		gsc_handle_irq(ctx, false, false, true);
+
+err_unlock:
+	mutex_unlock(&ctx->lock);
+	return ret;
+}
+
+static int gsc_dst_set_addr(struct device *dev,
+		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_property *property;
+
+	if (!c_node) {
+		DRM_ERROR("failed to get c_node.\n");
+		return -EFAULT;
+	}
+
+	property = &c_node->property;
+	if (!property) {
+		DRM_ERROR("failed to get property.\n");
+		return -EFAULT;
+	}
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]buf_id[%d]buf_type[%d]\n", __func__,
+		property->prop_id, buf_id, buf_type);
+
+	if (buf_id > GSC_MAX_DST) {
+		dev_info(ippdrv->dev, "inavlid buf_id %d.\n", buf_id);
+		return -EINVAL;
+	}
+
+	/* address register set */
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
+			GSC_OUT_BASE_ADDR_Y(buf_id));
+		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
+			GSC_OUT_BASE_ADDR_CB(buf_id));
+		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
+			GSC_OUT_BASE_ADDR_CR(buf_id));
+		break;
+	case IPP_BUF_DEQUEUE:
+		gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id));
+		gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id));
+		gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id));
+		break;
+	default:
+		/* bypass */
+		break;
+	}
+
+	return gsc_dst_set_buf_seq(ctx, buf_id, buf_type);
+}
+
+static struct exynos_drm_ipp_ops gsc_dst_ops = {
+	.set_fmt = gsc_dst_set_fmt,
+	.set_transf = gsc_dst_set_transf,
+	.set_size = gsc_dst_set_size,
+	.set_addr = gsc_dst_set_addr,
+};
+
+static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable)
+{
+	DRM_DEBUG_KMS("%s:enable[%d]\n", __func__, enable);
+
+	if (enable) {
+		clk_enable(ctx->gsc_clk);
+		ctx->suspended = false;
+	} else {
+		clk_disable(ctx->gsc_clk);
+		ctx->suspended = true;
+	}
+
+	return 0;
+}
+
+static int gsc_get_src_buf_index(struct gsc_context *ctx)
+{
+	u32 cfg, curr_index, i;
+	u32 buf_id = GSC_MAX_SRC;
+	int ret;
+
+	DRM_DEBUG_KMS("%s:gsc id[%d]\n", __func__, ctx->id);
+
+	cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK);
+	curr_index = GSC_IN_CURR_GET_INDEX(cfg);
+
+	for (i = curr_index; i < GSC_MAX_SRC; i++) {
+		if (!((cfg >> i) & 0x1)) {
+			buf_id = i;
+			break;
+		}
+	}
+
+	if (buf_id == GSC_MAX_SRC) {
+		DRM_ERROR("failed to get in buffer index.\n");
+		return -EINVAL;
+	}
+
+	ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
+	if (ret < 0) {
+		DRM_ERROR("failed to dequeue.\n");
+		return ret;
+	}
+
+	DRM_DEBUG_KMS("%s:cfg[0x%x]curr_index[%d]buf_id[%d]\n", __func__, cfg,
+		curr_index, buf_id);
+
+	return buf_id;
+}
+
+static int gsc_get_dst_buf_index(struct gsc_context *ctx)
+{
+	u32 cfg, curr_index, i;
+	u32 buf_id = GSC_MAX_DST;
+	int ret;
+
+	DRM_DEBUG_KMS("%s:gsc id[%d]\n", __func__, ctx->id);
+
+	cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK);
+	curr_index = GSC_OUT_CURR_GET_INDEX(cfg);
+
+	for (i = curr_index; i < GSC_MAX_DST; i++) {
+		if (!((cfg >> i) & 0x1)) {
+			buf_id = i;
+			break;
+		}
+	}
+
+	if (buf_id == GSC_MAX_DST) {
+		DRM_ERROR("failed to get out buffer index.\n");
+		return -EINVAL;
+	}
+
+	ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
+	if (ret < 0) {
+		DRM_ERROR("failed to dequeue.\n");
+		return ret;
+	}
+
+	DRM_DEBUG_KMS("%s:cfg[0x%x]curr_index[%d]buf_id[%d]\n", __func__, cfg,
+		curr_index, buf_id);
+
+	return buf_id;
+}
+
+static irqreturn_t gsc_irq_handler(int irq, void *dev_id)
+{
+	struct gsc_context *ctx = dev_id;
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_event_work *event_work =
+		c_node->event_work;
+	u32 status;
+	int buf_id[EXYNOS_DRM_OPS_MAX];
+
+	DRM_DEBUG_KMS("%s:gsc id[%d]\n", __func__, ctx->id);
+
+	status = gsc_read(GSC_IRQ);
+	if (status & GSC_IRQ_STATUS_OR_IRQ) {
+		dev_err(ippdrv->dev, "occured overflow at %d, status 0x%x.\n",
+			ctx->id, status);
+		return IRQ_NONE;
+	}
+
+	if (status & GSC_IRQ_STATUS_OR_FRM_DONE) {
+		dev_dbg(ippdrv->dev, "occured frame done at %d, status 0x%x.\n",
+			ctx->id, status);
+
+		buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx);
+		if (buf_id[EXYNOS_DRM_OPS_SRC] < 0)
+			return IRQ_HANDLED;
+
+		buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx);
+		if (buf_id[EXYNOS_DRM_OPS_DST] < 0)
+			return IRQ_HANDLED;
+
+		DRM_DEBUG_KMS("%s:buf_id_src[%d]buf_id_dst[%d]\n", __func__,
+			buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]);
+
+		event_work->ippdrv = ippdrv;
+		event_work->buf_id[EXYNOS_DRM_OPS_SRC] =
+			buf_id[EXYNOS_DRM_OPS_SRC];
+		event_work->buf_id[EXYNOS_DRM_OPS_DST] =
+			buf_id[EXYNOS_DRM_OPS_DST];
+		queue_work(ippdrv->event_workq,
+			(struct work_struct *)event_work);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
+{
+	struct drm_exynos_ipp_prop_list *prop_list;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	prop_list = devm_kzalloc(ippdrv->dev, sizeof(*prop_list), GFP_KERNEL);
+	if (!prop_list) {
+		DRM_ERROR("failed to alloc property list.\n");
+		return -ENOMEM;
+	}
+
+	prop_list->version = 1;
+	prop_list->writeback = 1;
+	prop_list->refresh_min = GSC_REFRESH_MIN;
+	prop_list->refresh_max = GSC_REFRESH_MAX;
+	prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
+				(1 << EXYNOS_DRM_FLIP_HORIZONTAL);
+	prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
+				(1 << EXYNOS_DRM_DEGREE_90) |
+				(1 << EXYNOS_DRM_DEGREE_180) |
+				(1 << EXYNOS_DRM_DEGREE_270);
+	prop_list->csc = 1;
+	prop_list->crop = 1;
+	prop_list->crop_max.hsize = GSC_CROP_MAX;
+	prop_list->crop_max.vsize = GSC_CROP_MAX;
+	prop_list->crop_min.hsize = GSC_CROP_MIN;
+	prop_list->crop_min.vsize = GSC_CROP_MIN;
+	prop_list->scale = 1;
+	prop_list->scale_max.hsize = GSC_SCALE_MAX;
+	prop_list->scale_max.vsize = GSC_SCALE_MAX;
+	prop_list->scale_min.hsize = GSC_SCALE_MIN;
+	prop_list->scale_min.vsize = GSC_SCALE_MIN;
+
+	ippdrv->prop_list = prop_list;
+
+	return 0;
+}
+
+static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip)
+{
+	switch (flip) {
+	case EXYNOS_DRM_FLIP_NONE:
+	case EXYNOS_DRM_FLIP_VERTICAL:
+	case EXYNOS_DRM_FLIP_HORIZONTAL:
+	case EXYNOS_DRM_FLIP_VERTICAL | EXYNOS_DRM_FLIP_HORIZONTAL:
+		return true;
+	default:
+		DRM_DEBUG_KMS("%s:invalid flip\n", __func__);
+		return false;
+	}
+}
+
+static int gsc_ippdrv_check_property(struct device *dev,
+		struct drm_exynos_ipp_property *property)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_prop_list *pp = ippdrv->prop_list;
+	struct drm_exynos_ipp_config *config;
+	struct drm_exynos_pos *pos;
+	struct drm_exynos_sz *sz;
+	bool swap;
+	int i;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	for_each_ipp_ops(i) {
+		if ((i == EXYNOS_DRM_OPS_SRC) &&
+			(property->cmd == IPP_CMD_WB))
+			continue;
+
+		config = &property->config[i];
+		pos = &config->pos;
+		sz = &config->sz;
+
+		/* check for flip */
+		if (!gsc_check_drm_flip(config->flip)) {
+			DRM_ERROR("invalid flip.\n");
+			goto err_property;
+		}
+
+		/* check for degree */
+		switch (config->degree) {
+		case EXYNOS_DRM_DEGREE_90:
+		case EXYNOS_DRM_DEGREE_270:
+			swap = true;
+			break;
+		case EXYNOS_DRM_DEGREE_0:
+		case EXYNOS_DRM_DEGREE_180:
+			swap = false;
+			break;
+		default:
+			DRM_ERROR("invalid degree.\n");
+			goto err_property;
+		}
+
+		/* check for buffer bound */
+		if ((pos->x + pos->w > sz->hsize) ||
+			(pos->y + pos->h > sz->vsize)) {
+			DRM_ERROR("out of buf bound.\n");
+			goto err_property;
+		}
+
+		/* check for crop */
+		if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
+			if (swap) {
+				if ((pos->h < pp->crop_min.hsize) ||
+					(sz->vsize > pp->crop_max.hsize) ||
+					(pos->w < pp->crop_min.vsize) ||
+					(sz->hsize > pp->crop_max.vsize)) {
+					DRM_ERROR("out of crop size.\n");
+					goto err_property;
+				}
+			} else {
+				if ((pos->w < pp->crop_min.hsize) ||
+					(sz->hsize > pp->crop_max.hsize) ||
+					(pos->h < pp->crop_min.vsize) ||
+					(sz->vsize > pp->crop_max.vsize)) {
+					DRM_ERROR("out of crop size.\n");
+					goto err_property;
+				}
+			}
+		}
+
+		/* check for scale */
+		if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
+			if (swap) {
+				if ((pos->h < pp->scale_min.hsize) ||
+					(sz->vsize > pp->scale_max.hsize) ||
+					(pos->w < pp->scale_min.vsize) ||
+					(sz->hsize > pp->scale_max.vsize)) {
+					DRM_ERROR("out of scale size.\n");
+					goto err_property;
+				}
+			} else {
+				if ((pos->w < pp->scale_min.hsize) ||
+					(sz->hsize > pp->scale_max.hsize) ||
+					(pos->h < pp->scale_min.vsize) ||
+					(sz->vsize > pp->scale_max.vsize)) {
+					DRM_ERROR("out of scale size.\n");
+					goto err_property;
+				}
+			}
+		}
+	}
+
+	return 0;
+
+err_property:
+	for_each_ipp_ops(i) {
+		if ((i == EXYNOS_DRM_OPS_SRC) &&
+			(property->cmd == IPP_CMD_WB))
+			continue;
+
+		config = &property->config[i];
+		pos = &config->pos;
+		sz = &config->sz;
+
+		DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
+			i ? "dst" : "src", config->flip, config->degree,
+			pos->x, pos->y, pos->w, pos->h,
+			sz->hsize, sz->vsize);
+	}
+
+	return -EINVAL;
+}
+
+
+static int gsc_ippdrv_reset(struct device *dev)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct gsc_scaler *sc = &ctx->sc;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	/* reset h/w block */
+	ret = gsc_sw_reset(ctx);
+	if (ret < 0) {
+		dev_err(dev, "failed to reset hardware.\n");
+		return ret;
+	}
+
+	/* scaler setting */
+	memset(&ctx->sc, 0x0, sizeof(ctx->sc));
+	sc->range = true;
+
+	return 0;
+}
+
+static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_property *property;
+	struct drm_exynos_ipp_config *config;
+	struct drm_exynos_pos	img_pos[EXYNOS_DRM_OPS_MAX];
+	struct drm_exynos_ipp_set_wb set_wb;
+	u32 cfg;
+	int ret, i;
+
+	DRM_DEBUG_KMS("%s:cmd[%d]\n", __func__, cmd);
+
+	if (!c_node) {
+		DRM_ERROR("failed to get c_node.\n");
+		return -EINVAL;
+	}
+
+	property = &c_node->property;
+	if (!property) {
+		DRM_ERROR("failed to get property.\n");
+		return -EINVAL;
+	}
+
+	gsc_handle_irq(ctx, true, false, true);
+
+	for_each_ipp_ops(i) {
+		config = &property->config[i];
+		img_pos[i] = config->pos;
+	}
+
+	switch (cmd) {
+	case IPP_CMD_M2M:
+		/* enable one shot */
+		cfg = gsc_read(GSC_ENABLE);
+		cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK |
+			GSC_ENABLE_CLK_GATE_MODE_MASK);
+		cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT;
+		gsc_write(cfg, GSC_ENABLE);
+
+		/* src dma memory */
+		cfg = gsc_read(GSC_IN_CON);
+		cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
+		cfg |= GSC_IN_PATH_MEMORY;
+		gsc_write(cfg, GSC_IN_CON);
+
+		/* dst dma memory */
+		cfg = gsc_read(GSC_OUT_CON);
+		cfg |= GSC_OUT_PATH_MEMORY;
+		gsc_write(cfg, GSC_OUT_CON);
+		break;
+	case IPP_CMD_WB:
+		set_wb.enable = 1;
+		set_wb.refresh = property->refresh_rate;
+		gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
+		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
+
+		/* src local path */
+		cfg = readl(GSC_IN_CON);
+		cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
+		cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB);
+		gsc_write(cfg, GSC_IN_CON);
+
+		/* dst dma memory */
+		cfg = gsc_read(GSC_OUT_CON);
+		cfg |= GSC_OUT_PATH_MEMORY;
+		gsc_write(cfg, GSC_OUT_CON);
+		break;
+	case IPP_CMD_OUTPUT:
+		/* src dma memory */
+		cfg = gsc_read(GSC_IN_CON);
+		cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
+		cfg |= GSC_IN_PATH_MEMORY;
+		gsc_write(cfg, GSC_IN_CON);
+
+		/* dst local path */
+		cfg = gsc_read(GSC_OUT_CON);
+		cfg |= GSC_OUT_PATH_MEMORY;
+		gsc_write(cfg, GSC_OUT_CON);
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(dev, "invalid operations.\n");
+		return ret;
+	}
+
+	ret = gsc_set_prescaler(ctx, &ctx->sc,
+		&img_pos[EXYNOS_DRM_OPS_SRC],
+		&img_pos[EXYNOS_DRM_OPS_DST]);
+	if (ret) {
+		dev_err(dev, "failed to set precalser.\n");
+		return ret;
+	}
+
+	gsc_set_scaler(ctx, &ctx->sc);
+
+	cfg = gsc_read(GSC_ENABLE);
+	cfg |= GSC_ENABLE_ON;
+	gsc_write(cfg, GSC_ENABLE);
+
+	return 0;
+}
+
+static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct drm_exynos_ipp_set_wb set_wb = {0, 0};
+	u32 cfg;
+
+	DRM_DEBUG_KMS("%s:cmd[%d]\n", __func__, cmd);
+
+	switch (cmd) {
+	case IPP_CMD_M2M:
+		/* bypass */
+		break;
+	case IPP_CMD_WB:
+		gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
+		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
+		break;
+	case IPP_CMD_OUTPUT:
+	default:
+		dev_err(dev, "invalid operations.\n");
+		break;
+	}
+
+	gsc_handle_irq(ctx, false, false, true);
+
+	/* reset sequence */
+	gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK);
+	gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK);
+	gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK);
+
+	cfg = gsc_read(GSC_ENABLE);
+	cfg &= ~GSC_ENABLE_ON;
+	gsc_write(cfg, GSC_ENABLE);
+}
+
+static int __devinit gsc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct gsc_context *ctx;
+	struct resource *res;
+	struct exynos_drm_ippdrv *ippdrv;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	/* clock control */
+	ctx->gsc_clk = clk_get(dev, "gscl");
+	if (IS_ERR(ctx->gsc_clk)) {
+		dev_err(dev, "failed to get gsc clock.\n");
+		ret = PTR_ERR(ctx->gsc_clk);
+		goto err_ctx;
+	}
+
+	/* resource memory */
+	ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!ctx->regs_res) {
+		dev_err(dev, "failed to find registers.\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs = devm_request_and_ioremap(dev, ctx->regs_res);
+	if (!ctx->regs) {
+		dev_err(dev, "failed to map registers.\n");
+		ret = -ENXIO;
+		goto err_clk;
+	}
+
+	/* resource irq */
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "failed to request irq resource.\n");
+		ret = -ENOENT;
+		goto err_get_regs;
+	}
+
+	ctx->irq = res->start;
+	ret = request_threaded_irq(ctx->irq, NULL, gsc_irq_handler,
+		IRQF_ONESHOT, "drm_gsc", ctx);
+	if (ret < 0) {
+		dev_err(dev, "failed to request irq.\n");
+		goto err_get_regs;
+	}
+
+	/* context initailization */
+	ctx->id = pdev->id;
+
+	ippdrv = &ctx->ippdrv;
+	ippdrv->dev = dev;
+	ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops;
+	ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops;
+	ippdrv->check_property = gsc_ippdrv_check_property;
+	ippdrv->reset = gsc_ippdrv_reset;
+	ippdrv->start = gsc_ippdrv_start;
+	ippdrv->stop = gsc_ippdrv_stop;
+	ret = gsc_init_prop_list(ippdrv);
+	if (ret < 0) {
+		dev_err(dev, "failed to init property list.\n");
+		goto err_get_irq;
+	}
+
+	DRM_DEBUG_KMS("%s:id[%d]ippdrv[0x%x]\n", __func__, ctx->id,
+		(int)ippdrv);
+
+	mutex_init(&ctx->lock);
+	platform_set_drvdata(pdev, ctx);
+
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
+	ret = exynos_drm_ippdrv_register(ippdrv);
+	if (ret < 0) {
+		dev_err(dev, "failed to register drm gsc device.\n");
+		goto err_ippdrv_register;
+	}
+
+	dev_info(&pdev->dev, "drm gsc registered successfully.\n");
+
+	return 0;
+
+err_ippdrv_register:
+	devm_kfree(dev, ippdrv->prop_list);
+	pm_runtime_disable(dev);
+err_get_irq:
+	free_irq(ctx->irq, ctx);
+err_get_regs:
+	devm_iounmap(dev, ctx->regs);
+err_clk:
+	clk_put(ctx->gsc_clk);
+err_ctx:
+	devm_kfree(dev, ctx);
+	return ret;
+}
+
+static int __devexit gsc_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct gsc_context *ctx = get_gsc_context(dev);
+	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+
+	devm_kfree(dev, ippdrv->prop_list);
+	exynos_drm_ippdrv_unregister(ippdrv);
+	mutex_destroy(&ctx->lock);
+
+	pm_runtime_set_suspended(dev);
+	pm_runtime_disable(dev);
+
+	free_irq(ctx->irq, ctx);
+	devm_iounmap(dev, ctx->regs);
+
+	clk_put(ctx->gsc_clk);
+
+	devm_kfree(dev, ctx);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int gsc_suspend(struct device *dev)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, ctx->id);
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	return gsc_clk_ctrl(ctx, false);
+}
+
+static int gsc_resume(struct device *dev)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, ctx->id);
+
+	if (!pm_runtime_suspended(dev))
+		return gsc_clk_ctrl(ctx, true);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+static int gsc_runtime_suspend(struct device *dev)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, ctx->id);
+
+	return  gsc_clk_ctrl(ctx, false);
+}
+
+static int gsc_runtime_resume(struct device *dev)
+{
+	struct gsc_context *ctx = get_gsc_context(dev);
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __FILE__, ctx->id);
+
+	return  gsc_clk_ctrl(ctx, true);
+}
+#endif
+
+static const struct dev_pm_ops gsc_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(gsc_suspend, gsc_resume)
+	SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
+};
+
+struct platform_driver gsc_driver = {
+	.probe		= gsc_probe,
+	.remove		= __devexit_p(gsc_remove),
+	.driver		= {
+		.name	= "exynos-drm-gsc",
+		.owner	= THIS_MODULE,
+		.pm	= &gsc_pm_ops,
+	},
+};
+

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h
new file mode 100644
index 0000000..b3c3bc6
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *	Jinyoung Jeon <jy0.jeon@samsung.com>
+ *	Sangmin Lee <lsmin.lee@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_GSC_H_
+#define _EXYNOS_DRM_GSC_H_
+
+/*
+ * TODO
+ * FIMD output interface notifier callback.
+ * Mixer output interface notifier callback.
+ */
+
+#endif /* _EXYNOS_DRM_GSC_H_ */

diff --git a/drivers/gpu/drm/exynos/exynos_drm_hdmi.c b/drivers/gpu/drm/exynos/exynos_drm_hdmi.c
index c3b9e2b..55793c4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_hdmi.c

@@ -29,6 +29,9 @@
 #define get_ctx_from_subdrv(subdrv)	container_of(subdrv,\
 					struct drm_hdmi_context, subdrv);
 
+/* platform device pointer for common drm hdmi device. */
+static struct platform_device *exynos_drm_hdmi_pdev;
+
 /* Common hdmi subdrv needs to access the hdmi and mixer though context.
 * These should be initialied by the repective drivers */
 static struct exynos_drm_hdmi_context *hdmi_ctx;
@@ -46,6 +49,25 @@
 	bool	enabled[MIXER_WIN_NR];
 };
 
+int exynos_platform_device_hdmi_register(void)
+{
+	if (exynos_drm_hdmi_pdev)
+		return -EEXIST;
+
+	exynos_drm_hdmi_pdev = platform_device_register_simple(
+			"exynos-drm-hdmi", -1, NULL, 0);
+	if (IS_ERR_OR_NULL(exynos_drm_hdmi_pdev))
+		return PTR_ERR(exynos_drm_hdmi_pdev);
+
+	return 0;
+}
+
+void exynos_platform_device_hdmi_unregister(void)
+{
+	if (exynos_drm_hdmi_pdev)
+		platform_device_unregister(exynos_drm_hdmi_pdev);
+}
+
 void exynos_hdmi_drv_attach(struct exynos_drm_hdmi_context *ctx)
 {
 	if (ctx)
@@ -157,6 +179,16 @@
 		return mixer_ops->disable_vblank(ctx->mixer_ctx->ctx);
 }
 
+static void drm_hdmi_wait_for_vblank(struct device *subdrv_dev)
+{
+	struct drm_hdmi_context *ctx = to_context(subdrv_dev);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (mixer_ops && mixer_ops->wait_for_vblank)
+		mixer_ops->wait_for_vblank(ctx->mixer_ctx->ctx);
+}
+
 static void drm_hdmi_mode_fixup(struct device *subdrv_dev,
 				struct drm_connector *connector,
 				const struct drm_display_mode *mode,
@@ -238,6 +270,7 @@
 	.apply = drm_hdmi_apply,
 	.enable_vblank = drm_hdmi_enable_vblank,
 	.disable_vblank = drm_hdmi_disable_vblank,
+	.wait_for_vblank = drm_hdmi_wait_for_vblank,
 	.mode_fixup = drm_hdmi_mode_fixup,
 	.mode_set = drm_hdmi_mode_set,
 	.get_max_resol = drm_hdmi_get_max_resol,
@@ -291,21 +324,10 @@
 	ctx->enabled[win] = false;
 }
 
-static void drm_mixer_wait_for_vblank(struct device *subdrv_dev)
-{
-	struct drm_hdmi_context *ctx = to_context(subdrv_dev);
-
-	DRM_DEBUG_KMS("%s\n", __FILE__);
-
-	if (mixer_ops && mixer_ops->wait_for_vblank)
-		mixer_ops->wait_for_vblank(ctx->mixer_ctx->ctx);
-}
-
 static struct exynos_drm_overlay_ops drm_hdmi_overlay_ops = {
 	.mode_set = drm_mixer_mode_set,
 	.commit = drm_mixer_commit,
 	.disable = drm_mixer_disable,
-	.wait_for_vblank = drm_mixer_wait_for_vblank,
 };
 
 static struct exynos_drm_manager hdmi_manager = {
@@ -346,9 +368,23 @@
 	ctx->hdmi_ctx->drm_dev = drm_dev;
 	ctx->mixer_ctx->drm_dev = drm_dev;
 
+	if (mixer_ops->iommu_on)
+		mixer_ops->iommu_on(ctx->mixer_ctx->ctx, true);
+
 	return 0;
 }
 
+static void hdmi_subdrv_remove(struct drm_device *drm_dev, struct device *dev)
+{
+	struct drm_hdmi_context *ctx;
+	struct exynos_drm_subdrv *subdrv = to_subdrv(dev);
+
+	ctx = get_ctx_from_subdrv(subdrv);
+
+	if (mixer_ops->iommu_on)
+		mixer_ops->iommu_on(ctx->mixer_ctx->ctx, false);
+}
+
 static int __devinit exynos_drm_hdmi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -368,6 +404,7 @@
 	subdrv->dev = dev;
 	subdrv->manager = &hdmi_manager;
 	subdrv->probe = hdmi_subdrv_probe;
+	subdrv->remove = hdmi_subdrv_remove;
 
 	platform_set_drvdata(pdev, subdrv);
 

diff --git a/drivers/gpu/drm/exynos/exynos_drm_hdmi.h b/drivers/gpu/drm/exynos/exynos_drm_hdmi.h
index 2da5ffd..fcc3093 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_hdmi.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_hdmi.h

@@ -62,12 +62,13 @@
 
 struct exynos_mixer_ops {
 	/* manager */
+	int (*iommu_on)(void *ctx, bool enable);
 	int (*enable_vblank)(void *ctx, int pipe);
 	void (*disable_vblank)(void *ctx);
+	void (*wait_for_vblank)(void *ctx);
 	void (*dpms)(void *ctx, int mode);
 
 	/* overlay */
-	void (*wait_for_vblank)(void *ctx);
 	void (*win_mode_set)(void *ctx, struct exynos_drm_overlay *overlay);
 	void (*win_commit)(void *ctx, int zpos);
 	void (*win_disable)(void *ctx, int zpos);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.c b/drivers/gpu/drm/exynos/exynos_drm_iommu.c
new file mode 100644
index 0000000..2482b7f
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.c

@@ -0,0 +1,150 @@
+/* exynos_drm_iommu.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drmP.h>
+#include <drm/exynos_drm.h>
+
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/kref.h>
+
+#include <asm/dma-iommu.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
+
+/*
+ * drm_create_iommu_mapping - create a mapping structure
+ *
+ * @drm_dev: DRM device
+ */
+int drm_create_iommu_mapping(struct drm_device *drm_dev)
+{
+	struct dma_iommu_mapping *mapping = NULL;
+	struct exynos_drm_private *priv = drm_dev->dev_private;
+	struct device *dev = drm_dev->dev;
+
+	if (!priv->da_start)
+		priv->da_start = EXYNOS_DEV_ADDR_START;
+	if (!priv->da_space_size)
+		priv->da_space_size = EXYNOS_DEV_ADDR_SIZE;
+	if (!priv->da_space_order)
+		priv->da_space_order = EXYNOS_DEV_ADDR_ORDER;
+
+	mapping = arm_iommu_create_mapping(&platform_bus_type, priv->da_start,
+						priv->da_space_size,
+						priv->da_space_order);
+	if (IS_ERR(mapping))
+		return PTR_ERR(mapping);
+
+	dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms),
+					GFP_KERNEL);
+	dma_set_max_seg_size(dev, 0xffffffffu);
+	dev->archdata.mapping = mapping;
+
+	return 0;
+}
+
+/*
+ * drm_release_iommu_mapping - release iommu mapping structure
+ *
+ * @drm_dev: DRM device
+ *
+ * if mapping->kref becomes 0 then all things related to iommu mapping
+ * will be released
+ */
+void drm_release_iommu_mapping(struct drm_device *drm_dev)
+{
+	struct device *dev = drm_dev->dev;
+
+	arm_iommu_release_mapping(dev->archdata.mapping);
+}
+
+/*
+ * drm_iommu_attach_device- attach device to iommu mapping
+ *
+ * @drm_dev: DRM device
+ * @subdrv_dev: device to be attach
+ *
+ * This function should be called by sub drivers to attach it to iommu
+ * mapping.
+ */
+int drm_iommu_attach_device(struct drm_device *drm_dev,
+				struct device *subdrv_dev)
+{
+	struct device *dev = drm_dev->dev;
+	int ret;
+
+	if (!dev->archdata.mapping) {
+		DRM_ERROR("iommu_mapping is null.\n");
+		return -EFAULT;
+	}
+
+	subdrv_dev->dma_parms = devm_kzalloc(subdrv_dev,
+					sizeof(*subdrv_dev->dma_parms),
+					GFP_KERNEL);
+	dma_set_max_seg_size(subdrv_dev, 0xffffffffu);
+
+	ret = arm_iommu_attach_device(subdrv_dev, dev->archdata.mapping);
+	if (ret < 0) {
+		DRM_DEBUG_KMS("failed iommu attach.\n");
+		return ret;
+	}
+
+	/*
+	 * Set dma_ops to drm_device just one time.
+	 *
+	 * The dma mapping api needs device object and the api is used
+	 * to allocate physial memory and map it with iommu table.
+	 * If iommu attach succeeded, the sub driver would have dma_ops
+	 * for iommu and also all sub drivers have same dma_ops.
+	 */
+	if (!dev->archdata.dma_ops)
+		dev->archdata.dma_ops = subdrv_dev->archdata.dma_ops;
+
+	return 0;
+}
+
+/*
+ * drm_iommu_detach_device -detach device address space mapping from device
+ *
+ * @drm_dev: DRM device
+ * @subdrv_dev: device to be detached
+ *
+ * This function should be called by sub drivers to detach it from iommu
+ * mapping
+ */
+void drm_iommu_detach_device(struct drm_device *drm_dev,
+				struct device *subdrv_dev)
+{
+	struct device *dev = drm_dev->dev;
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+
+	if (!mapping || !mapping->domain)
+		return;
+
+	iommu_detach_device(mapping->domain, subdrv_dev);
+	drm_release_iommu_mapping(drm_dev);
+}

diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
new file mode 100644
index 0000000..18a0ca1
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h

@@ -0,0 +1,85 @@
+/* exynos_drm_iommu.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * Authoer: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_IOMMU_H_
+#define _EXYNOS_DRM_IOMMU_H_
+
+#define EXYNOS_DEV_ADDR_START	0x20000000
+#define EXYNOS_DEV_ADDR_SIZE	0x40000000
+#define EXYNOS_DEV_ADDR_ORDER	0x4
+
+#ifdef CONFIG_DRM_EXYNOS_IOMMU
+
+int drm_create_iommu_mapping(struct drm_device *drm_dev);
+
+void drm_release_iommu_mapping(struct drm_device *drm_dev);
+
+int drm_iommu_attach_device(struct drm_device *drm_dev,
+				struct device *subdrv_dev);
+
+void drm_iommu_detach_device(struct drm_device *dev_dev,
+				struct device *subdrv_dev);
+
+static inline bool is_drm_iommu_supported(struct drm_device *drm_dev)
+{
+#ifdef CONFIG_ARM_DMA_USE_IOMMU
+	struct device *dev = drm_dev->dev;
+
+	return dev->archdata.mapping ? true : false;
+#else
+	return false;
+#endif
+}
+
+#else
+
+struct dma_iommu_mapping;
+static inline int drm_create_iommu_mapping(struct drm_device *drm_dev)
+{
+	return 0;
+}
+
+static inline void drm_release_iommu_mapping(struct drm_device *drm_dev)
+{
+}
+
+static inline int drm_iommu_attach_device(struct drm_device *drm_dev,
+						struct device *subdrv_dev)
+{
+	return 0;
+}
+
+static inline void drm_iommu_detach_device(struct drm_device *drm_dev,
+						struct device *subdrv_dev)
+{
+}
+
+static inline bool is_drm_iommu_supported(struct drm_device *drm_dev)
+{
+	return false;
+}
+
+#endif
+#endif

diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
new file mode 100644
index 0000000..49eebe9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c

@@ -0,0 +1,2060 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ * Authors:
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *	Jinyoung Jeon <jy0.jeon@samsung.com>
+ *	Sangmin Lee <lsmin.lee@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <plat/map-base.h>
+
+#include <drm/drmP.h>
+#include <drm/exynos_drm.h>
+#include "exynos_drm_drv.h"
+#include "exynos_drm_gem.h"
+#include "exynos_drm_ipp.h"
+#include "exynos_drm_iommu.h"
+
+/*
+ * IPP is stand for Image Post Processing and
+ * supports image scaler/rotator and input/output DMA operations.
+ * using FIMC, GSC, Rotator, so on.
+ * IPP is integration device driver of same attribute h/w
+ */
+
+/*
+ * TODO
+ * 1. expand command control id.
+ * 2. integrate	property and config.
+ * 3. removed send_event id check routine.
+ * 4. compare send_event id if needed.
+ * 5. free subdrv_remove notifier callback list if needed.
+ * 6. need to check subdrv_open about multi-open.
+ * 7. need to power_on implement power and sysmmu ctrl.
+ */
+
+#define get_ipp_context(dev)	platform_get_drvdata(to_platform_device(dev))
+#define ipp_is_m2m_cmd(c)	(c == IPP_CMD_M2M)
+
+/*
+ * A structure of event.
+ *
+ * @base: base of event.
+ * @event: ipp event.
+ */
+struct drm_exynos_ipp_send_event {
+	struct drm_pending_event	base;
+	struct drm_exynos_ipp_event	event;
+};
+
+/*
+ * A structure of memory node.
+ *
+ * @list: list head to memory queue information.
+ * @ops_id: id of operations.
+ * @prop_id: id of property.
+ * @buf_id: id of buffer.
+ * @buf_info: gem objects and dma address, size.
+ * @filp: a pointer to drm_file.
+ */
+struct drm_exynos_ipp_mem_node {
+	struct list_head	list;
+	enum drm_exynos_ops_id	ops_id;
+	u32	prop_id;
+	u32	buf_id;
+	struct drm_exynos_ipp_buf_info	buf_info;
+	struct drm_file		*filp;
+};
+
+/*
+ * A structure of ipp context.
+ *
+ * @subdrv: prepare initialization using subdrv.
+ * @ipp_lock: lock for synchronization of access to ipp_idr.
+ * @prop_lock: lock for synchronization of access to prop_idr.
+ * @ipp_idr: ipp driver idr.
+ * @prop_idr: property idr.
+ * @event_workq: event work queue.
+ * @cmd_workq: command work queue.
+ */
+struct ipp_context {
+	struct exynos_drm_subdrv	subdrv;
+	struct mutex	ipp_lock;
+	struct mutex	prop_lock;
+	struct idr	ipp_idr;
+	struct idr	prop_idr;
+	struct workqueue_struct	*event_workq;
+	struct workqueue_struct	*cmd_workq;
+};
+
+static LIST_HEAD(exynos_drm_ippdrv_list);
+static DEFINE_MUTEX(exynos_drm_ippdrv_lock);
+static BLOCKING_NOTIFIER_HEAD(exynos_drm_ippnb_list);
+
+int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv)
+{
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!ippdrv)
+		return -EINVAL;
+
+	mutex_lock(&exynos_drm_ippdrv_lock);
+	list_add_tail(&ippdrv->drv_list, &exynos_drm_ippdrv_list);
+	mutex_unlock(&exynos_drm_ippdrv_lock);
+
+	return 0;
+}
+
+int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv)
+{
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!ippdrv)
+		return -EINVAL;
+
+	mutex_lock(&exynos_drm_ippdrv_lock);
+	list_del(&ippdrv->drv_list);
+	mutex_unlock(&exynos_drm_ippdrv_lock);
+
+	return 0;
+}
+
+static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj,
+		u32 *idp)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+again:
+	/* ensure there is space available to allocate a handle */
+	if (idr_pre_get(id_idr, GFP_KERNEL) == 0) {
+		DRM_ERROR("failed to get idr.\n");
+		return -ENOMEM;
+	}
+
+	/* do the allocation under our mutexlock */
+	mutex_lock(lock);
+	ret = idr_get_new_above(id_idr, obj, 1, (int *)idp);
+	mutex_unlock(lock);
+	if (ret == -EAGAIN)
+		goto again;
+
+	return ret;
+}
+
+static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id)
+{
+	void *obj;
+
+	DRM_DEBUG_KMS("%s:id[%d]\n", __func__, id);
+
+	mutex_lock(lock);
+
+	/* find object using handle */
+	obj = idr_find(id_idr, id);
+	if (!obj) {
+		DRM_ERROR("failed to find object.\n");
+		mutex_unlock(lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_unlock(lock);
+
+	return obj;
+}
+
+static inline bool ipp_check_dedicated(struct exynos_drm_ippdrv *ippdrv,
+		enum drm_exynos_ipp_cmd	cmd)
+{
+	/*
+	 * check dedicated flag and WB, OUTPUT operation with
+	 * power on state.
+	 */
+	if (ippdrv->dedicated || (!ipp_is_m2m_cmd(cmd) &&
+	    !pm_runtime_suspended(ippdrv->dev)))
+		return true;
+
+	return false;
+}
+
+static struct exynos_drm_ippdrv *ipp_find_driver(struct ipp_context *ctx,
+		struct drm_exynos_ipp_property *property)
+{
+	struct exynos_drm_ippdrv *ippdrv;
+	u32 ipp_id = property->ipp_id;
+
+	DRM_DEBUG_KMS("%s:ipp_id[%d]\n", __func__, ipp_id);
+
+	if (ipp_id) {
+		/* find ipp driver using idr */
+		ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock,
+			ipp_id);
+		if (IS_ERR_OR_NULL(ippdrv)) {
+			DRM_ERROR("not found ipp%d driver.\n", ipp_id);
+			return ippdrv;
+		}
+
+		/*
+		 * WB, OUTPUT opertion not supported multi-operation.
+		 * so, make dedicated state at set property ioctl.
+		 * when ipp driver finished operations, clear dedicated flags.
+		 */
+		if (ipp_check_dedicated(ippdrv, property->cmd)) {
+			DRM_ERROR("already used choose device.\n");
+			return ERR_PTR(-EBUSY);
+		}
+
+		/*
+		 * This is necessary to find correct device in ipp drivers.
+		 * ipp drivers have different abilities,
+		 * so need to check property.
+		 */
+		if (ippdrv->check_property &&
+		    ippdrv->check_property(ippdrv->dev, property)) {
+			DRM_ERROR("not support property.\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		return ippdrv;
+	} else {
+		/*
+		 * This case is search all ipp driver for finding.
+		 * user application don't set ipp_id in this case,
+		 * so ipp subsystem search correct driver in driver list.
+		 */
+		list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
+			if (ipp_check_dedicated(ippdrv, property->cmd)) {
+				DRM_DEBUG_KMS("%s:used device.\n", __func__);
+				continue;
+			}
+
+			if (ippdrv->check_property &&
+			    ippdrv->check_property(ippdrv->dev, property)) {
+				DRM_DEBUG_KMS("%s:not support property.\n",
+					__func__);
+				continue;
+			}
+
+			return ippdrv;
+		}
+
+		DRM_ERROR("not support ipp driver operations.\n");
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
+static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id)
+{
+	struct exynos_drm_ippdrv *ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node;
+	int count = 0;
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]\n", __func__, prop_id);
+
+	if (list_empty(&exynos_drm_ippdrv_list)) {
+		DRM_DEBUG_KMS("%s:ippdrv_list is empty.\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	/*
+	 * This case is search ipp driver by prop_id handle.
+	 * sometimes, ipp subsystem find driver by prop_id.
+	 * e.g PAUSE state, queue buf, command contro.
+	 */
+	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
+		DRM_DEBUG_KMS("%s:count[%d]ippdrv[0x%x]\n", __func__,
+			count++, (int)ippdrv);
+
+		if (!list_empty(&ippdrv->cmd_list)) {
+			list_for_each_entry(c_node, &ippdrv->cmd_list, list)
+				if (c_node->property.prop_id == prop_id)
+					return ippdrv;
+		}
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
+int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_exynos_file_private *file_priv = file->driver_priv;
+	struct exynos_drm_ipp_private *priv = file_priv->ipp_priv;
+	struct device *dev = priv->dev;
+	struct ipp_context *ctx = get_ipp_context(dev);
+	struct drm_exynos_ipp_prop_list *prop_list = data;
+	struct exynos_drm_ippdrv *ippdrv;
+	int count = 0;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!ctx) {
+		DRM_ERROR("invalid context.\n");
+		return -EINVAL;
+	}
+
+	if (!prop_list) {
+		DRM_ERROR("invalid property parameter.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:ipp_id[%d]\n", __func__, prop_list->ipp_id);
+
+	if (!prop_list->ipp_id) {
+		list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list)
+			count++;
+		/*
+		 * Supports ippdrv list count for user application.
+		 * First step user application getting ippdrv count.
+		 * and second step getting ippdrv capability using ipp_id.
+		 */
+		prop_list->count = count;
+	} else {
+		/*
+		 * Getting ippdrv capability by ipp_id.
+		 * some deivce not supported wb, output interface.
+		 * so, user application detect correct ipp driver
+		 * using this ioctl.
+		 */
+		ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock,
+						prop_list->ipp_id);
+		if (!ippdrv) {
+			DRM_ERROR("not found ipp%d driver.\n",
+					prop_list->ipp_id);
+			return -EINVAL;
+		}
+
+		prop_list = ippdrv->prop_list;
+	}
+
+	return 0;
+}
+
+static void ipp_print_property(struct drm_exynos_ipp_property *property,
+		int idx)
+{
+	struct drm_exynos_ipp_config *config = &property->config[idx];
+	struct drm_exynos_pos *pos = &config->pos;
+	struct drm_exynos_sz *sz = &config->sz;
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]ops[%s]fmt[0x%x]\n",
+		__func__, property->prop_id, idx ? "dst" : "src", config->fmt);
+
+	DRM_DEBUG_KMS("%s:pos[%d %d %d %d]sz[%d %d]f[%d]r[%d]\n",
+		__func__, pos->x, pos->y, pos->w, pos->h,
+		sz->hsize, sz->vsize, config->flip, config->degree);
+}
+
+static int ipp_find_and_set_property(struct drm_exynos_ipp_property *property)
+{
+	struct exynos_drm_ippdrv *ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node;
+	u32 prop_id = property->prop_id;
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]\n", __func__, prop_id);
+
+	ippdrv = ipp_find_drv_by_handle(prop_id);
+	if (IS_ERR_OR_NULL(ippdrv)) {
+		DRM_ERROR("failed to get ipp driver.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Find command node using command list in ippdrv.
+	 * when we find this command no using prop_id.
+	 * return property information set in this command node.
+	 */
+	list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
+		if ((c_node->property.prop_id == prop_id) &&
+		    (c_node->state == IPP_STATE_STOP)) {
+			DRM_DEBUG_KMS("%s:found cmd[%d]ippdrv[0x%x]\n",
+				__func__, property->cmd, (int)ippdrv);
+
+			c_node->property = *property;
+			return 0;
+		}
+	}
+
+	DRM_ERROR("failed to search property.\n");
+
+	return -EINVAL;
+}
+
+static struct drm_exynos_ipp_cmd_work *ipp_create_cmd_work(void)
+{
+	struct drm_exynos_ipp_cmd_work *cmd_work;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	cmd_work = kzalloc(sizeof(*cmd_work), GFP_KERNEL);
+	if (!cmd_work) {
+		DRM_ERROR("failed to alloc cmd_work.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	INIT_WORK((struct work_struct *)cmd_work, ipp_sched_cmd);
+
+	return cmd_work;
+}
+
+static struct drm_exynos_ipp_event_work *ipp_create_event_work(void)
+{
+	struct drm_exynos_ipp_event_work *event_work;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	event_work = kzalloc(sizeof(*event_work), GFP_KERNEL);
+	if (!event_work) {
+		DRM_ERROR("failed to alloc event_work.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	INIT_WORK((struct work_struct *)event_work, ipp_sched_event);
+
+	return event_work;
+}
+
+int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_exynos_file_private *file_priv = file->driver_priv;
+	struct exynos_drm_ipp_private *priv = file_priv->ipp_priv;
+	struct device *dev = priv->dev;
+	struct ipp_context *ctx = get_ipp_context(dev);
+	struct drm_exynos_ipp_property *property = data;
+	struct exynos_drm_ippdrv *ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node;
+	int ret, i;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!ctx) {
+		DRM_ERROR("invalid context.\n");
+		return -EINVAL;
+	}
+
+	if (!property) {
+		DRM_ERROR("invalid property parameter.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * This is log print for user application property.
+	 * user application set various property.
+	 */
+	for_each_ipp_ops(i)
+		ipp_print_property(property, i);
+
+	/*
+	 * set property ioctl generated new prop_id.
+	 * but in this case already asigned prop_id using old set property.
+	 * e.g PAUSE state. this case supports find current prop_id and use it
+	 * instead of allocation.
+	 */
+	if (property->prop_id) {
+		DRM_DEBUG_KMS("%s:prop_id[%d]\n", __func__, property->prop_id);
+		return ipp_find_and_set_property(property);
+	}
+
+	/* find ipp driver using ipp id */
+	ippdrv = ipp_find_driver(ctx, property);
+	if (IS_ERR_OR_NULL(ippdrv)) {
+		DRM_ERROR("failed to get ipp driver.\n");
+		return -EINVAL;
+	}
+
+	/* allocate command node */
+	c_node = kzalloc(sizeof(*c_node), GFP_KERNEL);
+	if (!c_node) {
+		DRM_ERROR("failed to allocate map node.\n");
+		return -ENOMEM;
+	}
+
+	/* create property id */
+	ret = ipp_create_id(&ctx->prop_idr, &ctx->prop_lock, c_node,
+		&property->prop_id);
+	if (ret) {
+		DRM_ERROR("failed to create id.\n");
+		goto err_clear;
+	}
+
+	DRM_DEBUG_KMS("%s:created prop_id[%d]cmd[%d]ippdrv[0x%x]\n",
+		__func__, property->prop_id, property->cmd, (int)ippdrv);
+
+	/* stored property information and ippdrv in private data */
+	c_node->priv = priv;
+	c_node->property = *property;
+	c_node->state = IPP_STATE_IDLE;
+
+	c_node->start_work = ipp_create_cmd_work();
+	if (IS_ERR_OR_NULL(c_node->start_work)) {
+		DRM_ERROR("failed to create start work.\n");
+		goto err_clear;
+	}
+
+	c_node->stop_work = ipp_create_cmd_work();
+	if (IS_ERR_OR_NULL(c_node->stop_work)) {
+		DRM_ERROR("failed to create stop work.\n");
+		goto err_free_start;
+	}
+
+	c_node->event_work = ipp_create_event_work();
+	if (IS_ERR_OR_NULL(c_node->event_work)) {
+		DRM_ERROR("failed to create event work.\n");
+		goto err_free_stop;
+	}
+
+	mutex_init(&c_node->cmd_lock);
+	mutex_init(&c_node->mem_lock);
+	mutex_init(&c_node->event_lock);
+
+	init_completion(&c_node->start_complete);
+	init_completion(&c_node->stop_complete);
+
+	for_each_ipp_ops(i)
+		INIT_LIST_HEAD(&c_node->mem_list[i]);
+
+	INIT_LIST_HEAD(&c_node->event_list);
+	list_splice_init(&priv->event_list, &c_node->event_list);
+	list_add_tail(&c_node->list, &ippdrv->cmd_list);
+
+	/* make dedicated state without m2m */
+	if (!ipp_is_m2m_cmd(property->cmd))
+		ippdrv->dedicated = true;
+
+	return 0;
+
+err_free_stop:
+	kfree(c_node->stop_work);
+err_free_start:
+	kfree(c_node->start_work);
+err_clear:
+	kfree(c_node);
+	return ret;
+}
+
+static void ipp_clean_cmd_node(struct drm_exynos_ipp_cmd_node *c_node)
+{
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	/* delete list */
+	list_del(&c_node->list);
+
+	/* destroy mutex */
+	mutex_destroy(&c_node->cmd_lock);
+	mutex_destroy(&c_node->mem_lock);
+	mutex_destroy(&c_node->event_lock);
+
+	/* free command node */
+	kfree(c_node->start_work);
+	kfree(c_node->stop_work);
+	kfree(c_node->event_work);
+	kfree(c_node);
+}
+
+static int ipp_check_mem_list(struct drm_exynos_ipp_cmd_node *c_node)
+{
+	struct drm_exynos_ipp_property *property = &c_node->property;
+	struct drm_exynos_ipp_mem_node *m_node;
+	struct list_head *head;
+	int ret, i, count[EXYNOS_DRM_OPS_MAX] = { 0, };
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	mutex_lock(&c_node->mem_lock);
+
+	for_each_ipp_ops(i) {
+		/* source/destination memory list */
+		head = &c_node->mem_list[i];
+
+		if (list_empty(head)) {
+			DRM_DEBUG_KMS("%s:%s memory empty.\n", __func__,
+				i ? "dst" : "src");
+			continue;
+		}
+
+		/* find memory node entry */
+		list_for_each_entry(m_node, head, list) {
+			DRM_DEBUG_KMS("%s:%s,count[%d]m_node[0x%x]\n", __func__,
+				i ? "dst" : "src", count[i], (int)m_node);
+			count[i]++;
+		}
+	}
+
+	DRM_DEBUG_KMS("%s:min[%d]max[%d]\n", __func__,
+		min(count[EXYNOS_DRM_OPS_SRC], count[EXYNOS_DRM_OPS_DST]),
+		max(count[EXYNOS_DRM_OPS_SRC], count[EXYNOS_DRM_OPS_DST]));
+
+	/*
+	 * M2M operations should be need paired memory address.
+	 * so, need to check minimum count about src, dst.
+	 * other case not use paired memory, so use maximum count
+	 */
+	if (ipp_is_m2m_cmd(property->cmd))
+		ret = min(count[EXYNOS_DRM_OPS_SRC],
+			count[EXYNOS_DRM_OPS_DST]);
+	else
+		ret = max(count[EXYNOS_DRM_OPS_SRC],
+			count[EXYNOS_DRM_OPS_DST]);
+
+	mutex_unlock(&c_node->mem_lock);
+
+	return ret;
+}
+
+static struct drm_exynos_ipp_mem_node
+		*ipp_find_mem_node(struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_queue_buf *qbuf)
+{
+	struct drm_exynos_ipp_mem_node *m_node;
+	struct list_head *head;
+	int count = 0;
+
+	DRM_DEBUG_KMS("%s:buf_id[%d]\n", __func__, qbuf->buf_id);
+
+	/* source/destination memory list */
+	head = &c_node->mem_list[qbuf->ops_id];
+
+	/* find memory node from memory list */
+	list_for_each_entry(m_node, head, list) {
+		DRM_DEBUG_KMS("%s:count[%d]m_node[0x%x]\n",
+			__func__, count++, (int)m_node);
+
+		/* compare buffer id */
+		if (m_node->buf_id == qbuf->buf_id)
+			return m_node;
+	}
+
+	return NULL;
+}
+
+static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv,
+		struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_mem_node *m_node)
+{
+	struct exynos_drm_ipp_ops *ops = NULL;
+	int ret = 0;
+
+	DRM_DEBUG_KMS("%s:node[0x%x]\n", __func__, (int)m_node);
+
+	if (!m_node) {
+		DRM_ERROR("invalid queue node.\n");
+		return -EFAULT;
+	}
+
+	mutex_lock(&c_node->mem_lock);
+
+	DRM_DEBUG_KMS("%s:ops_id[%d]\n", __func__, m_node->ops_id);
+
+	/* get operations callback */
+	ops = ippdrv->ops[m_node->ops_id];
+	if (!ops) {
+		DRM_ERROR("not support ops.\n");
+		ret = -EFAULT;
+		goto err_unlock;
+	}
+
+	/* set address and enable irq */
+	if (ops->set_addr) {
+		ret = ops->set_addr(ippdrv->dev, &m_node->buf_info,
+			m_node->buf_id, IPP_BUF_ENQUEUE);
+		if (ret) {
+			DRM_ERROR("failed to set addr.\n");
+			goto err_unlock;
+		}
+	}
+
+err_unlock:
+	mutex_unlock(&c_node->mem_lock);
+	return ret;
+}
+
+static struct drm_exynos_ipp_mem_node
+		*ipp_get_mem_node(struct drm_device *drm_dev,
+		struct drm_file *file,
+		struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_queue_buf *qbuf)
+{
+	struct drm_exynos_ipp_mem_node *m_node;
+	struct drm_exynos_ipp_buf_info buf_info;
+	void *addr;
+	int i;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	mutex_lock(&c_node->mem_lock);
+
+	m_node = kzalloc(sizeof(*m_node), GFP_KERNEL);
+	if (!m_node) {
+		DRM_ERROR("failed to allocate queue node.\n");
+		goto err_unlock;
+	}
+
+	/* clear base address for error handling */
+	memset(&buf_info, 0x0, sizeof(buf_info));
+
+	/* operations, buffer id */
+	m_node->ops_id = qbuf->ops_id;
+	m_node->prop_id = qbuf->prop_id;
+	m_node->buf_id = qbuf->buf_id;
+
+	DRM_DEBUG_KMS("%s:m_node[0x%x]ops_id[%d]\n", __func__,
+		(int)m_node, qbuf->ops_id);
+	DRM_DEBUG_KMS("%s:prop_id[%d]buf_id[%d]\n", __func__,
+		qbuf->prop_id, m_node->buf_id);
+
+	for_each_ipp_planar(i) {
+		DRM_DEBUG_KMS("%s:i[%d]handle[0x%x]\n", __func__,
+			i, qbuf->handle[i]);
+
+		/* get dma address by handle */
+		if (qbuf->handle[i]) {
+			addr = exynos_drm_gem_get_dma_addr(drm_dev,
+					qbuf->handle[i], file);
+			if (IS_ERR(addr)) {
+				DRM_ERROR("failed to get addr.\n");
+				goto err_clear;
+			}
+
+			buf_info.handles[i] = qbuf->handle[i];
+			buf_info.base[i] = *(dma_addr_t *) addr;
+			DRM_DEBUG_KMS("%s:i[%d]base[0x%x]hd[0x%x]\n",
+				__func__, i, buf_info.base[i],
+				(int)buf_info.handles[i]);
+		}
+	}
+
+	m_node->filp = file;
+	m_node->buf_info = buf_info;
+	list_add_tail(&m_node->list, &c_node->mem_list[qbuf->ops_id]);
+
+	mutex_unlock(&c_node->mem_lock);
+	return m_node;
+
+err_clear:
+	kfree(m_node);
+err_unlock:
+	mutex_unlock(&c_node->mem_lock);
+	return ERR_PTR(-EFAULT);
+}
+
+static int ipp_put_mem_node(struct drm_device *drm_dev,
+		struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_mem_node *m_node)
+{
+	int i;
+
+	DRM_DEBUG_KMS("%s:node[0x%x]\n", __func__, (int)m_node);
+
+	if (!m_node) {
+		DRM_ERROR("invalid dequeue node.\n");
+		return -EFAULT;
+	}
+
+	if (list_empty(&m_node->list)) {
+		DRM_ERROR("empty memory node.\n");
+		return -ENOMEM;
+	}
+
+	mutex_lock(&c_node->mem_lock);
+
+	DRM_DEBUG_KMS("%s:ops_id[%d]\n", __func__, m_node->ops_id);
+
+	/* put gem buffer */
+	for_each_ipp_planar(i) {
+		unsigned long handle = m_node->buf_info.handles[i];
+		if (handle)
+			exynos_drm_gem_put_dma_addr(drm_dev, handle,
+							m_node->filp);
+	}
+
+	/* delete list in queue */
+	list_del(&m_node->list);
+	kfree(m_node);
+
+	mutex_unlock(&c_node->mem_lock);
+
+	return 0;
+}
+
+static void ipp_free_event(struct drm_pending_event *event)
+{
+	kfree(event);
+}
+
+static int ipp_get_event(struct drm_device *drm_dev,
+		struct drm_file *file,
+		struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_queue_buf *qbuf)
+{
+	struct drm_exynos_ipp_send_event *e;
+	unsigned long flags;
+
+	DRM_DEBUG_KMS("%s:ops_id[%d]buf_id[%d]\n", __func__,
+		qbuf->ops_id, qbuf->buf_id);
+
+	e = kzalloc(sizeof(*e), GFP_KERNEL);
+
+	if (!e) {
+		DRM_ERROR("failed to allocate event.\n");
+		spin_lock_irqsave(&drm_dev->event_lock, flags);
+		file->event_space += sizeof(e->event);
+		spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* make event */
+	e->event.base.type = DRM_EXYNOS_IPP_EVENT;
+	e->event.base.length = sizeof(e->event);
+	e->event.user_data = qbuf->user_data;
+	e->event.prop_id = qbuf->prop_id;
+	e->event.buf_id[EXYNOS_DRM_OPS_DST] = qbuf->buf_id;
+	e->base.event = &e->event.base;
+	e->base.file_priv = file;
+	e->base.destroy = ipp_free_event;
+	list_add_tail(&e->base.link, &c_node->event_list);
+
+	return 0;
+}
+
+static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_queue_buf *qbuf)
+{
+	struct drm_exynos_ipp_send_event *e, *te;
+	int count = 0;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (list_empty(&c_node->event_list)) {
+		DRM_DEBUG_KMS("%s:event_list is empty.\n", __func__);
+		return;
+	}
+
+	list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
+		DRM_DEBUG_KMS("%s:count[%d]e[0x%x]\n",
+			__func__, count++, (int)e);
+
+		/*
+		 * quf == NULL condition means all event deletion.
+		 * stop operations want to delete all event list.
+		 * another case delete only same buf id.
+		 */
+		if (!qbuf) {
+			/* delete list */
+			list_del(&e->base.link);
+			kfree(e);
+		}
+
+		/* compare buffer id */
+		if (qbuf && (qbuf->buf_id ==
+		    e->event.buf_id[EXYNOS_DRM_OPS_DST])) {
+			/* delete list */
+			list_del(&e->base.link);
+			kfree(e);
+			return;
+		}
+	}
+}
+
+void ipp_handle_cmd_work(struct device *dev,
+		struct exynos_drm_ippdrv *ippdrv,
+		struct drm_exynos_ipp_cmd_work *cmd_work,
+		struct drm_exynos_ipp_cmd_node *c_node)
+{
+	struct ipp_context *ctx = get_ipp_context(dev);
+
+	cmd_work->ippdrv = ippdrv;
+	cmd_work->c_node = c_node;
+	queue_work(ctx->cmd_workq, (struct work_struct *)cmd_work);
+}
+
+static int ipp_queue_buf_with_run(struct device *dev,
+		struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_mem_node *m_node,
+		struct drm_exynos_ipp_queue_buf *qbuf)
+{
+	struct exynos_drm_ippdrv *ippdrv;
+	struct drm_exynos_ipp_property *property;
+	struct exynos_drm_ipp_ops *ops;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	ippdrv = ipp_find_drv_by_handle(qbuf->prop_id);
+	if (IS_ERR_OR_NULL(ippdrv)) {
+		DRM_ERROR("failed to get ipp driver.\n");
+		return -EFAULT;
+	}
+
+	ops = ippdrv->ops[qbuf->ops_id];
+	if (!ops) {
+		DRM_ERROR("failed to get ops.\n");
+		return -EFAULT;
+	}
+
+	property = &c_node->property;
+
+	if (c_node->state != IPP_STATE_START) {
+		DRM_DEBUG_KMS("%s:bypass for invalid state.\n" , __func__);
+		return 0;
+	}
+
+	if (!ipp_check_mem_list(c_node)) {
+		DRM_DEBUG_KMS("%s:empty memory.\n", __func__);
+		return 0;
+	}
+
+	/*
+	 * If set destination buffer and enabled clock,
+	 * then m2m operations need start operations at queue_buf
+	 */
+	if (ipp_is_m2m_cmd(property->cmd)) {
+		struct drm_exynos_ipp_cmd_work *cmd_work = c_node->start_work;
+
+		cmd_work->ctrl = IPP_CTRL_PLAY;
+		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
+	} else {
+		ret = ipp_set_mem_node(ippdrv, c_node, m_node);
+		if (ret) {
+			DRM_ERROR("failed to set m node.\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void ipp_clean_queue_buf(struct drm_device *drm_dev,
+		struct drm_exynos_ipp_cmd_node *c_node,
+		struct drm_exynos_ipp_queue_buf *qbuf)
+{
+	struct drm_exynos_ipp_mem_node *m_node, *tm_node;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!list_empty(&c_node->mem_list[qbuf->ops_id])) {
+		/* delete list */
+		list_for_each_entry_safe(m_node, tm_node,
+			&c_node->mem_list[qbuf->ops_id], list) {
+			if (m_node->buf_id == qbuf->buf_id &&
+			    m_node->ops_id == qbuf->ops_id)
+				ipp_put_mem_node(drm_dev, c_node, m_node);
+		}
+	}
+}
+
+int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_exynos_file_private *file_priv = file->driver_priv;
+	struct exynos_drm_ipp_private *priv = file_priv->ipp_priv;
+	struct device *dev = priv->dev;
+	struct ipp_context *ctx = get_ipp_context(dev);
+	struct drm_exynos_ipp_queue_buf *qbuf = data;
+	struct drm_exynos_ipp_cmd_node *c_node;
+	struct drm_exynos_ipp_mem_node *m_node;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!qbuf) {
+		DRM_ERROR("invalid buf parameter.\n");
+		return -EINVAL;
+	}
+
+	if (qbuf->ops_id >= EXYNOS_DRM_OPS_MAX) {
+		DRM_ERROR("invalid ops parameter.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]ops_id[%s]buf_id[%d]buf_type[%d]\n",
+		__func__, qbuf->prop_id, qbuf->ops_id ? "dst" : "src",
+		qbuf->buf_id, qbuf->buf_type);
+
+	/* find command node */
+	c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
+		qbuf->prop_id);
+	if (!c_node) {
+		DRM_ERROR("failed to get command node.\n");
+		return -EFAULT;
+	}
+
+	/* buffer control */
+	switch (qbuf->buf_type) {
+	case IPP_BUF_ENQUEUE:
+		/* get memory node */
+		m_node = ipp_get_mem_node(drm_dev, file, c_node, qbuf);
+		if (IS_ERR(m_node)) {
+			DRM_ERROR("failed to get m_node.\n");
+			return PTR_ERR(m_node);
+		}
+
+		/*
+		 * first step get event for destination buffer.
+		 * and second step when M2M case run with destination buffer
+		 * if needed.
+		 */
+		if (qbuf->ops_id == EXYNOS_DRM_OPS_DST) {
+			/* get event for destination buffer */
+			ret = ipp_get_event(drm_dev, file, c_node, qbuf);
+			if (ret) {
+				DRM_ERROR("failed to get event.\n");
+				goto err_clean_node;
+			}
+
+			/*
+			 * M2M case run play control for streaming feature.
+			 * other case set address and waiting.
+			 */
+			ret = ipp_queue_buf_with_run(dev, c_node, m_node, qbuf);
+			if (ret) {
+				DRM_ERROR("failed to run command.\n");
+				goto err_clean_node;
+			}
+		}
+		break;
+	case IPP_BUF_DEQUEUE:
+		mutex_lock(&c_node->cmd_lock);
+
+		/* put event for destination buffer */
+		if (qbuf->ops_id == EXYNOS_DRM_OPS_DST)
+			ipp_put_event(c_node, qbuf);
+
+		ipp_clean_queue_buf(drm_dev, c_node, qbuf);
+
+		mutex_unlock(&c_node->cmd_lock);
+		break;
+	default:
+		DRM_ERROR("invalid buffer control.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+
+err_clean_node:
+	DRM_ERROR("clean memory nodes.\n");
+
+	ipp_clean_queue_buf(drm_dev, c_node, qbuf);
+	return ret;
+}
+
+static bool exynos_drm_ipp_check_valid(struct device *dev,
+		enum drm_exynos_ipp_ctrl ctrl, enum drm_exynos_ipp_state state)
+{
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (ctrl != IPP_CTRL_PLAY) {
+		if (pm_runtime_suspended(dev)) {
+			DRM_ERROR("pm:runtime_suspended.\n");
+			goto err_status;
+		}
+	}
+
+	switch (ctrl) {
+	case IPP_CTRL_PLAY:
+		if (state != IPP_STATE_IDLE)
+			goto err_status;
+		break;
+	case IPP_CTRL_STOP:
+		if (state == IPP_STATE_STOP)
+			goto err_status;
+		break;
+	case IPP_CTRL_PAUSE:
+		if (state != IPP_STATE_START)
+			goto err_status;
+		break;
+	case IPP_CTRL_RESUME:
+		if (state != IPP_STATE_STOP)
+			goto err_status;
+		break;
+	default:
+		DRM_ERROR("invalid state.\n");
+		goto err_status;
+		break;
+	}
+
+	return true;
+
+err_status:
+	DRM_ERROR("invalid status:ctrl[%d]state[%d]\n", ctrl, state);
+	return false;
+}
+
+int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_exynos_file_private *file_priv = file->driver_priv;
+	struct exynos_drm_ipp_private *priv = file_priv->ipp_priv;
+	struct exynos_drm_ippdrv *ippdrv = NULL;
+	struct device *dev = priv->dev;
+	struct ipp_context *ctx = get_ipp_context(dev);
+	struct drm_exynos_ipp_cmd_ctrl *cmd_ctrl = data;
+	struct drm_exynos_ipp_cmd_work *cmd_work;
+	struct drm_exynos_ipp_cmd_node *c_node;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!ctx) {
+		DRM_ERROR("invalid context.\n");
+		return -EINVAL;
+	}
+
+	if (!cmd_ctrl) {
+		DRM_ERROR("invalid control parameter.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:ctrl[%d]prop_id[%d]\n", __func__,
+		cmd_ctrl->ctrl, cmd_ctrl->prop_id);
+
+	ippdrv = ipp_find_drv_by_handle(cmd_ctrl->prop_id);
+	if (IS_ERR(ippdrv)) {
+		DRM_ERROR("failed to get ipp driver.\n");
+		return PTR_ERR(ippdrv);
+	}
+
+	c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
+		cmd_ctrl->prop_id);
+	if (!c_node) {
+		DRM_ERROR("invalid command node list.\n");
+		return -EINVAL;
+	}
+
+	if (!exynos_drm_ipp_check_valid(ippdrv->dev, cmd_ctrl->ctrl,
+	    c_node->state)) {
+		DRM_ERROR("invalid state.\n");
+		return -EINVAL;
+	}
+
+	switch (cmd_ctrl->ctrl) {
+	case IPP_CTRL_PLAY:
+		if (pm_runtime_suspended(ippdrv->dev))
+			pm_runtime_get_sync(ippdrv->dev);
+		c_node->state = IPP_STATE_START;
+
+		cmd_work = c_node->start_work;
+		cmd_work->ctrl = cmd_ctrl->ctrl;
+		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
+		c_node->state = IPP_STATE_START;
+		break;
+	case IPP_CTRL_STOP:
+		cmd_work = c_node->stop_work;
+		cmd_work->ctrl = cmd_ctrl->ctrl;
+		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
+
+		if (!wait_for_completion_timeout(&c_node->stop_complete,
+		    msecs_to_jiffies(300))) {
+			DRM_ERROR("timeout stop:prop_id[%d]\n",
+				c_node->property.prop_id);
+		}
+
+		c_node->state = IPP_STATE_STOP;
+		ippdrv->dedicated = false;
+		ipp_clean_cmd_node(c_node);
+
+		if (list_empty(&ippdrv->cmd_list))
+			pm_runtime_put_sync(ippdrv->dev);
+		break;
+	case IPP_CTRL_PAUSE:
+		cmd_work = c_node->stop_work;
+		cmd_work->ctrl = cmd_ctrl->ctrl;
+		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
+
+		if (!wait_for_completion_timeout(&c_node->stop_complete,
+		    msecs_to_jiffies(200))) {
+			DRM_ERROR("timeout stop:prop_id[%d]\n",
+				c_node->property.prop_id);
+		}
+
+		c_node->state = IPP_STATE_STOP;
+		break;
+	case IPP_CTRL_RESUME:
+		c_node->state = IPP_STATE_START;
+		cmd_work = c_node->start_work;
+		cmd_work->ctrl = cmd_ctrl->ctrl;
+		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
+		break;
+	default:
+		DRM_ERROR("could not support this state currently.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:done ctrl[%d]prop_id[%d]\n", __func__,
+		cmd_ctrl->ctrl, cmd_ctrl->prop_id);
+
+	return 0;
+}
+
+int exynos_drm_ippnb_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(
+		&exynos_drm_ippnb_list, nb);
+}
+
+int exynos_drm_ippnb_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(
+		&exynos_drm_ippnb_list, nb);
+}
+
+int exynos_drm_ippnb_send_event(unsigned long val, void *v)
+{
+	return blocking_notifier_call_chain(
+		&exynos_drm_ippnb_list, val, v);
+}
+
+static int ipp_set_property(struct exynos_drm_ippdrv *ippdrv,
+		struct drm_exynos_ipp_property *property)
+{
+	struct exynos_drm_ipp_ops *ops = NULL;
+	bool swap = false;
+	int ret, i;
+
+	if (!property) {
+		DRM_ERROR("invalid property parameter.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]\n", __func__, property->prop_id);
+
+	/* reset h/w block */
+	if (ippdrv->reset &&
+	    ippdrv->reset(ippdrv->dev)) {
+		DRM_ERROR("failed to reset.\n");
+		return -EINVAL;
+	}
+
+	/* set source,destination operations */
+	for_each_ipp_ops(i) {
+		struct drm_exynos_ipp_config *config =
+			&property->config[i];
+
+		ops = ippdrv->ops[i];
+		if (!ops || !config) {
+			DRM_ERROR("not support ops and config.\n");
+			return -EINVAL;
+		}
+
+		/* set format */
+		if (ops->set_fmt) {
+			ret = ops->set_fmt(ippdrv->dev, config->fmt);
+			if (ret) {
+				DRM_ERROR("not support format.\n");
+				return ret;
+			}
+		}
+
+		/* set transform for rotation, flip */
+		if (ops->set_transf) {
+			ret = ops->set_transf(ippdrv->dev, config->degree,
+				config->flip, &swap);
+			if (ret) {
+				DRM_ERROR("not support tranf.\n");
+				return -EINVAL;
+			}
+		}
+
+		/* set size */
+		if (ops->set_size) {
+			ret = ops->set_size(ippdrv->dev, swap, &config->pos,
+				&config->sz);
+			if (ret) {
+				DRM_ERROR("not support size.\n");
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv,
+		struct drm_exynos_ipp_cmd_node *c_node)
+{
+	struct drm_exynos_ipp_mem_node *m_node;
+	struct drm_exynos_ipp_property *property = &c_node->property;
+	struct list_head *head;
+	int ret, i;
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]\n", __func__, property->prop_id);
+
+	/* store command info in ippdrv */
+	ippdrv->cmd = c_node;
+
+	if (!ipp_check_mem_list(c_node)) {
+		DRM_DEBUG_KMS("%s:empty memory.\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* set current property in ippdrv */
+	ret = ipp_set_property(ippdrv, property);
+	if (ret) {
+		DRM_ERROR("failed to set property.\n");
+		ippdrv->cmd = NULL;
+		return ret;
+	}
+
+	/* check command */
+	switch (property->cmd) {
+	case IPP_CMD_M2M:
+		for_each_ipp_ops(i) {
+			/* source/destination memory list */
+			head = &c_node->mem_list[i];
+
+			m_node = list_first_entry(head,
+				struct drm_exynos_ipp_mem_node, list);
+			if (!m_node) {
+				DRM_ERROR("failed to get node.\n");
+				ret = -EFAULT;
+				return ret;
+			}
+
+			DRM_DEBUG_KMS("%s:m_node[0x%x]\n",
+				__func__, (int)m_node);
+
+			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
+			if (ret) {
+				DRM_ERROR("failed to set m node.\n");
+				return ret;
+			}
+		}
+		break;
+	case IPP_CMD_WB:
+		/* destination memory list */
+		head = &c_node->mem_list[EXYNOS_DRM_OPS_DST];
+
+		list_for_each_entry(m_node, head, list) {
+			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
+			if (ret) {
+				DRM_ERROR("failed to set m node.\n");
+				return ret;
+			}
+		}
+		break;
+	case IPP_CMD_OUTPUT:
+		/* source memory list */
+		head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
+
+		list_for_each_entry(m_node, head, list) {
+			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
+			if (ret) {
+				DRM_ERROR("failed to set m node.\n");
+				return ret;
+			}
+		}
+		break;
+	default:
+		DRM_ERROR("invalid operations.\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG_KMS("%s:cmd[%d]\n", __func__, property->cmd);
+
+	/* start operations */
+	if (ippdrv->start) {
+		ret = ippdrv->start(ippdrv->dev, property->cmd);
+		if (ret) {
+			DRM_ERROR("failed to start ops.\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int ipp_stop_property(struct drm_device *drm_dev,
+		struct exynos_drm_ippdrv *ippdrv,
+		struct drm_exynos_ipp_cmd_node *c_node)
+{
+	struct drm_exynos_ipp_mem_node *m_node, *tm_node;
+	struct drm_exynos_ipp_property *property = &c_node->property;
+	struct list_head *head;
+	int ret = 0, i;
+
+	DRM_DEBUG_KMS("%s:prop_id[%d]\n", __func__, property->prop_id);
+
+	/* put event */
+	ipp_put_event(c_node, NULL);
+
+	/* check command */
+	switch (property->cmd) {
+	case IPP_CMD_M2M:
+		for_each_ipp_ops(i) {
+			/* source/destination memory list */
+			head = &c_node->mem_list[i];
+
+			if (list_empty(head)) {
+				DRM_DEBUG_KMS("%s:mem_list is empty.\n",
+					__func__);
+				break;
+			}
+
+			list_for_each_entry_safe(m_node, tm_node,
+				head, list) {
+				ret = ipp_put_mem_node(drm_dev, c_node,
+					m_node);
+				if (ret) {
+					DRM_ERROR("failed to put m_node.\n");
+					goto err_clear;
+				}
+			}
+		}
+		break;
+	case IPP_CMD_WB:
+		/* destination memory list */
+		head = &c_node->mem_list[EXYNOS_DRM_OPS_DST];
+
+		if (list_empty(head)) {
+			DRM_DEBUG_KMS("%s:mem_list is empty.\n", __func__);
+			break;
+		}
+
+		list_for_each_entry_safe(m_node, tm_node, head, list) {
+			ret = ipp_put_mem_node(drm_dev, c_node, m_node);
+			if (ret) {
+				DRM_ERROR("failed to put m_node.\n");
+				goto err_clear;
+			}
+		}
+		break;
+	case IPP_CMD_OUTPUT:
+		/* source memory list */
+		head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
+
+		if (list_empty(head)) {
+			DRM_DEBUG_KMS("%s:mem_list is empty.\n", __func__);
+			break;
+		}
+
+		list_for_each_entry_safe(m_node, tm_node, head, list) {
+			ret = ipp_put_mem_node(drm_dev, c_node, m_node);
+			if (ret) {
+				DRM_ERROR("failed to put m_node.\n");
+				goto err_clear;
+			}
+		}
+		break;
+	default:
+		DRM_ERROR("invalid operations.\n");
+		ret = -EINVAL;
+		goto err_clear;
+	}
+
+err_clear:
+	/* stop operations */
+	if (ippdrv->stop)
+		ippdrv->stop(ippdrv->dev, property->cmd);
+
+	return ret;
+}
+
+void ipp_sched_cmd(struct work_struct *work)
+{
+	struct drm_exynos_ipp_cmd_work *cmd_work =
+		(struct drm_exynos_ipp_cmd_work *)work;
+	struct exynos_drm_ippdrv *ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node;
+	struct drm_exynos_ipp_property *property;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	ippdrv = cmd_work->ippdrv;
+	if (!ippdrv) {
+		DRM_ERROR("invalid ippdrv list.\n");
+		return;
+	}
+
+	c_node = cmd_work->c_node;
+	if (!c_node) {
+		DRM_ERROR("invalid command node list.\n");
+		return;
+	}
+
+	mutex_lock(&c_node->cmd_lock);
+
+	property = &c_node->property;
+	if (!property) {
+		DRM_ERROR("failed to get property:prop_id[%d]\n",
+			c_node->property.prop_id);
+		goto err_unlock;
+	}
+
+	switch (cmd_work->ctrl) {
+	case IPP_CTRL_PLAY:
+	case IPP_CTRL_RESUME:
+		ret = ipp_start_property(ippdrv, c_node);
+		if (ret) {
+			DRM_ERROR("failed to start property:prop_id[%d]\n",
+				c_node->property.prop_id);
+			goto err_unlock;
+		}
+
+		/*
+		 * M2M case supports wait_completion of transfer.
+		 * because M2M case supports single unit operation
+		 * with multiple queue.
+		 * M2M need to wait completion of data transfer.
+		 */
+		if (ipp_is_m2m_cmd(property->cmd)) {
+			if (!wait_for_completion_timeout
+			    (&c_node->start_complete, msecs_to_jiffies(200))) {
+				DRM_ERROR("timeout event:prop_id[%d]\n",
+					c_node->property.prop_id);
+				goto err_unlock;
+			}
+		}
+		break;
+	case IPP_CTRL_STOP:
+	case IPP_CTRL_PAUSE:
+		ret = ipp_stop_property(ippdrv->drm_dev, ippdrv,
+			c_node);
+		if (ret) {
+			DRM_ERROR("failed to stop property.\n");
+			goto err_unlock;
+		}
+
+		complete(&c_node->stop_complete);
+		break;
+	default:
+		DRM_ERROR("unknown control type\n");
+		break;
+	}
+
+	DRM_DEBUG_KMS("%s:ctrl[%d] done.\n", __func__, cmd_work->ctrl);
+
+err_unlock:
+	mutex_unlock(&c_node->cmd_lock);
+}
+
+static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv,
+		struct drm_exynos_ipp_cmd_node *c_node, int *buf_id)
+{
+	struct drm_device *drm_dev = ippdrv->drm_dev;
+	struct drm_exynos_ipp_property *property = &c_node->property;
+	struct drm_exynos_ipp_mem_node *m_node;
+	struct drm_exynos_ipp_queue_buf qbuf;
+	struct drm_exynos_ipp_send_event *e;
+	struct list_head *head;
+	struct timeval now;
+	unsigned long flags;
+	u32 tbuf_id[EXYNOS_DRM_OPS_MAX] = {0, };
+	int ret, i;
+
+	for_each_ipp_ops(i)
+		DRM_DEBUG_KMS("%s:%s buf_id[%d]\n", __func__,
+			i ? "dst" : "src", buf_id[i]);
+
+	if (!drm_dev) {
+		DRM_ERROR("failed to get drm_dev.\n");
+		return -EINVAL;
+	}
+
+	if (!property) {
+		DRM_ERROR("failed to get property.\n");
+		return -EINVAL;
+	}
+
+	if (list_empty(&c_node->event_list)) {
+		DRM_DEBUG_KMS("%s:event list is empty.\n", __func__);
+		return 0;
+	}
+
+	if (!ipp_check_mem_list(c_node)) {
+		DRM_DEBUG_KMS("%s:empty memory.\n", __func__);
+		return 0;
+	}
+
+	/* check command */
+	switch (property->cmd) {
+	case IPP_CMD_M2M:
+		for_each_ipp_ops(i) {
+			/* source/destination memory list */
+			head = &c_node->mem_list[i];
+
+			m_node = list_first_entry(head,
+				struct drm_exynos_ipp_mem_node, list);
+			if (!m_node) {
+				DRM_ERROR("empty memory node.\n");
+				return -ENOMEM;
+			}
+
+			tbuf_id[i] = m_node->buf_id;
+			DRM_DEBUG_KMS("%s:%s buf_id[%d]\n", __func__,
+				i ? "dst" : "src", tbuf_id[i]);
+
+			ret = ipp_put_mem_node(drm_dev, c_node, m_node);
+			if (ret)
+				DRM_ERROR("failed to put m_node.\n");
+		}
+		break;
+	case IPP_CMD_WB:
+		/* clear buf for finding */
+		memset(&qbuf, 0x0, sizeof(qbuf));
+		qbuf.ops_id = EXYNOS_DRM_OPS_DST;
+		qbuf.buf_id = buf_id[EXYNOS_DRM_OPS_DST];
+
+		/* get memory node entry */
+		m_node = ipp_find_mem_node(c_node, &qbuf);
+		if (!m_node) {
+			DRM_ERROR("empty memory node.\n");
+			return -ENOMEM;
+		}
+
+		tbuf_id[EXYNOS_DRM_OPS_DST] = m_node->buf_id;
+
+		ret = ipp_put_mem_node(drm_dev, c_node, m_node);
+		if (ret)
+			DRM_ERROR("failed to put m_node.\n");
+		break;
+	case IPP_CMD_OUTPUT:
+		/* source memory list */
+		head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
+
+		m_node = list_first_entry(head,
+			struct drm_exynos_ipp_mem_node, list);
+		if (!m_node) {
+			DRM_ERROR("empty memory node.\n");
+			return -ENOMEM;
+		}
+
+		tbuf_id[EXYNOS_DRM_OPS_SRC] = m_node->buf_id;
+
+		ret = ipp_put_mem_node(drm_dev, c_node, m_node);
+		if (ret)
+			DRM_ERROR("failed to put m_node.\n");
+		break;
+	default:
+		DRM_ERROR("invalid operations.\n");
+		return -EINVAL;
+	}
+
+	if (tbuf_id[EXYNOS_DRM_OPS_DST] != buf_id[EXYNOS_DRM_OPS_DST])
+		DRM_ERROR("failed to match buf_id[%d %d]prop_id[%d]\n",
+			tbuf_id[1], buf_id[1], property->prop_id);
+
+	/*
+	 * command node have event list of destination buffer
+	 * If destination buffer enqueue to mem list,
+	 * then we make event and link to event list tail.
+	 * so, we get first event for first enqueued buffer.
+	 */
+	e = list_first_entry(&c_node->event_list,
+		struct drm_exynos_ipp_send_event, base.link);
+
+	if (!e) {
+		DRM_ERROR("empty event.\n");
+		return -EINVAL;
+	}
+
+	do_gettimeofday(&now);
+	DRM_DEBUG_KMS("%s:tv_sec[%ld]tv_usec[%ld]\n"
+		, __func__, now.tv_sec, now.tv_usec);
+	e->event.tv_sec = now.tv_sec;
+	e->event.tv_usec = now.tv_usec;
+	e->event.prop_id = property->prop_id;
+
+	/* set buffer id about source destination */
+	for_each_ipp_ops(i)
+		e->event.buf_id[i] = tbuf_id[i];
+
+	spin_lock_irqsave(&drm_dev->event_lock, flags);
+	list_move_tail(&e->base.link, &e->base.file_priv->event_list);
+	wake_up_interruptible(&e->base.file_priv->event_wait);
+	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+
+	DRM_DEBUG_KMS("%s:done cmd[%d]prop_id[%d]buf_id[%d]\n", __func__,
+		property->cmd, property->prop_id, tbuf_id[EXYNOS_DRM_OPS_DST]);
+
+	return 0;
+}
+
+void ipp_sched_event(struct work_struct *work)
+{
+	struct drm_exynos_ipp_event_work *event_work =
+		(struct drm_exynos_ipp_event_work *)work;
+	struct exynos_drm_ippdrv *ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node;
+	int ret;
+
+	if (!event_work) {
+		DRM_ERROR("failed to get event_work.\n");
+		return;
+	}
+
+	DRM_DEBUG_KMS("%s:buf_id[%d]\n", __func__,
+		event_work->buf_id[EXYNOS_DRM_OPS_DST]);
+
+	ippdrv = event_work->ippdrv;
+	if (!ippdrv) {
+		DRM_ERROR("failed to get ipp driver.\n");
+		return;
+	}
+
+	c_node = ippdrv->cmd;
+	if (!c_node) {
+		DRM_ERROR("failed to get command node.\n");
+		return;
+	}
+
+	/*
+	 * IPP supports command thread, event thread synchronization.
+	 * If IPP close immediately from user land, then IPP make
+	 * synchronization with command thread, so make complete event.
+	 * or going out operations.
+	 */
+	if (c_node->state != IPP_STATE_START) {
+		DRM_DEBUG_KMS("%s:bypass state[%d]prop_id[%d]\n",
+			__func__, c_node->state, c_node->property.prop_id);
+		goto err_completion;
+	}
+
+	mutex_lock(&c_node->event_lock);
+
+	ret = ipp_send_event(ippdrv, c_node, event_work->buf_id);
+	if (ret) {
+		DRM_ERROR("failed to send event.\n");
+		goto err_completion;
+	}
+
+err_completion:
+	if (ipp_is_m2m_cmd(c_node->property.cmd))
+		complete(&c_node->start_complete);
+
+	mutex_unlock(&c_node->event_lock);
+}
+
+static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
+{
+	struct ipp_context *ctx = get_ipp_context(dev);
+	struct exynos_drm_ippdrv *ippdrv;
+	int ret, count = 0;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	/* get ipp driver entry */
+	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
+		ippdrv->drm_dev = drm_dev;
+
+		ret = ipp_create_id(&ctx->ipp_idr, &ctx->ipp_lock, ippdrv,
+			&ippdrv->ipp_id);
+		if (ret) {
+			DRM_ERROR("failed to create id.\n");
+			goto err_idr;
+		}
+
+		DRM_DEBUG_KMS("%s:count[%d]ippdrv[0x%x]ipp_id[%d]\n", __func__,
+			count++, (int)ippdrv, ippdrv->ipp_id);
+
+		if (ippdrv->ipp_id == 0) {
+			DRM_ERROR("failed to get ipp_id[%d]\n",
+				ippdrv->ipp_id);
+			goto err_idr;
+		}
+
+		/* store parent device for node */
+		ippdrv->parent_dev = dev;
+
+		/* store event work queue and handler */
+		ippdrv->event_workq = ctx->event_workq;
+		ippdrv->sched_event = ipp_sched_event;
+		INIT_LIST_HEAD(&ippdrv->cmd_list);
+
+		if (is_drm_iommu_supported(drm_dev)) {
+			ret = drm_iommu_attach_device(drm_dev, ippdrv->dev);
+			if (ret) {
+				DRM_ERROR("failed to activate iommu\n");
+				goto err_iommu;
+			}
+		}
+	}
+
+	return 0;
+
+err_iommu:
+	/* get ipp driver entry */
+	list_for_each_entry_reverse(ippdrv, &exynos_drm_ippdrv_list, drv_list)
+		if (is_drm_iommu_supported(drm_dev))
+			drm_iommu_detach_device(drm_dev, ippdrv->dev);
+
+err_idr:
+	idr_remove_all(&ctx->ipp_idr);
+	idr_remove_all(&ctx->prop_idr);
+	idr_destroy(&ctx->ipp_idr);
+	idr_destroy(&ctx->prop_idr);
+	return ret;
+}
+
+static void ipp_subdrv_remove(struct drm_device *drm_dev, struct device *dev)
+{
+	struct exynos_drm_ippdrv *ippdrv;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	/* get ipp driver entry */
+	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
+		if (is_drm_iommu_supported(drm_dev))
+			drm_iommu_detach_device(drm_dev, ippdrv->dev);
+
+		ippdrv->drm_dev = NULL;
+		exynos_drm_ippdrv_unregister(ippdrv);
+	}
+}
+
+static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev,
+		struct drm_file *file)
+{
+	struct drm_exynos_file_private *file_priv = file->driver_priv;
+	struct exynos_drm_ipp_private *priv;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		DRM_ERROR("failed to allocate priv.\n");
+		return -ENOMEM;
+	}
+	priv->dev = dev;
+	file_priv->ipp_priv = priv;
+
+	INIT_LIST_HEAD(&priv->event_list);
+
+	DRM_DEBUG_KMS("%s:done priv[0x%x]\n", __func__, (int)priv);
+
+	return 0;
+}
+
+static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev,
+		struct drm_file *file)
+{
+	struct drm_exynos_file_private *file_priv = file->driver_priv;
+	struct exynos_drm_ipp_private *priv = file_priv->ipp_priv;
+	struct exynos_drm_ippdrv *ippdrv = NULL;
+	struct drm_exynos_ipp_cmd_node *c_node, *tc_node;
+	int count = 0;
+
+	DRM_DEBUG_KMS("%s:for priv[0x%x]\n", __func__, (int)priv);
+
+	if (list_empty(&exynos_drm_ippdrv_list)) {
+		DRM_DEBUG_KMS("%s:ippdrv_list is empty.\n", __func__);
+		goto err_clear;
+	}
+
+	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
+		if (list_empty(&ippdrv->cmd_list))
+			continue;
+
+		list_for_each_entry_safe(c_node, tc_node,
+			&ippdrv->cmd_list, list) {
+			DRM_DEBUG_KMS("%s:count[%d]ippdrv[0x%x]\n",
+				__func__, count++, (int)ippdrv);
+
+			if (c_node->priv == priv) {
+				/*
+				 * userland goto unnormal state. process killed.
+				 * and close the file.
+				 * so, IPP didn't called stop cmd ctrl.
+				 * so, we are make stop operation in this state.
+				 */
+				if (c_node->state == IPP_STATE_START) {
+					ipp_stop_property(drm_dev, ippdrv,
+						c_node);
+					c_node->state = IPP_STATE_STOP;
+				}
+
+				ippdrv->dedicated = false;
+				ipp_clean_cmd_node(c_node);
+				if (list_empty(&ippdrv->cmd_list))
+					pm_runtime_put_sync(ippdrv->dev);
+			}
+		}
+	}
+
+err_clear:
+	kfree(priv);
+	return;
+}
+
+static int __devinit ipp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ipp_context *ctx;
+	struct exynos_drm_subdrv *subdrv;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	mutex_init(&ctx->ipp_lock);
+	mutex_init(&ctx->prop_lock);
+
+	idr_init(&ctx->ipp_idr);
+	idr_init(&ctx->prop_idr);
+
+	/*
+	 * create single thread for ipp event
+	 * IPP supports event thread for IPP drivers.
+	 * IPP driver send event_work to this thread.
+	 * and IPP event thread send event to user process.
+	 */
+	ctx->event_workq = create_singlethread_workqueue("ipp_event");
+	if (!ctx->event_workq) {
+		dev_err(dev, "failed to create event workqueue\n");
+		ret = -EINVAL;
+		goto err_clear;
+	}
+
+	/*
+	 * create single thread for ipp command
+	 * IPP supports command thread for user process.
+	 * user process make command node using set property ioctl.
+	 * and make start_work and send this work to command thread.
+	 * and then this command thread start property.
+	 */
+	ctx->cmd_workq = create_singlethread_workqueue("ipp_cmd");
+	if (!ctx->cmd_workq) {
+		dev_err(dev, "failed to create cmd workqueue\n");
+		ret = -EINVAL;
+		goto err_event_workq;
+	}
+
+	/* set sub driver informations */
+	subdrv = &ctx->subdrv;
+	subdrv->dev = dev;
+	subdrv->probe = ipp_subdrv_probe;
+	subdrv->remove = ipp_subdrv_remove;
+	subdrv->open = ipp_subdrv_open;
+	subdrv->close = ipp_subdrv_close;
+
+	platform_set_drvdata(pdev, ctx);
+
+	ret = exynos_drm_subdrv_register(subdrv);
+	if (ret < 0) {
+		DRM_ERROR("failed to register drm ipp device.\n");
+		goto err_cmd_workq;
+	}
+
+	dev_info(&pdev->dev, "drm ipp registered successfully.\n");
+
+	return 0;
+
+err_cmd_workq:
+	destroy_workqueue(ctx->cmd_workq);
+err_event_workq:
+	destroy_workqueue(ctx->event_workq);
+err_clear:
+	kfree(ctx);
+	return ret;
+}
+
+static int __devexit ipp_remove(struct platform_device *pdev)
+{
+	struct ipp_context *ctx = platform_get_drvdata(pdev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	/* unregister sub driver */
+	exynos_drm_subdrv_unregister(&ctx->subdrv);
+
+	/* remove,destroy ipp idr */
+	idr_remove_all(&ctx->ipp_idr);
+	idr_remove_all(&ctx->prop_idr);
+	idr_destroy(&ctx->ipp_idr);
+	idr_destroy(&ctx->prop_idr);
+
+	mutex_destroy(&ctx->ipp_lock);
+	mutex_destroy(&ctx->prop_lock);
+
+	/* destroy command, event work queue */
+	destroy_workqueue(ctx->cmd_workq);
+	destroy_workqueue(ctx->event_workq);
+
+	kfree(ctx);
+
+	return 0;
+}
+
+static int ipp_power_ctrl(struct ipp_context *ctx, bool enable)
+{
+	DRM_DEBUG_KMS("%s:enable[%d]\n", __func__, enable);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int ipp_suspend(struct device *dev)
+{
+	struct ipp_context *ctx = get_ipp_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	return ipp_power_ctrl(ctx, false);
+}
+
+static int ipp_resume(struct device *dev)
+{
+	struct ipp_context *ctx = get_ipp_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!pm_runtime_suspended(dev))
+		return ipp_power_ctrl(ctx, true);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+static int ipp_runtime_suspend(struct device *dev)
+{
+	struct ipp_context *ctx = get_ipp_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	return ipp_power_ctrl(ctx, false);
+}
+
+static int ipp_runtime_resume(struct device *dev)
+{
+	struct ipp_context *ctx = get_ipp_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	return ipp_power_ctrl(ctx, true);
+}
+#endif
+
+static const struct dev_pm_ops ipp_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(ipp_suspend, ipp_resume)
+	SET_RUNTIME_PM_OPS(ipp_runtime_suspend, ipp_runtime_resume, NULL)
+};
+
+struct platform_driver ipp_driver = {
+	.probe		= ipp_probe,
+	.remove		= __devexit_p(ipp_remove),
+	.driver		= {
+		.name	= "exynos-drm-ipp",
+		.owner	= THIS_MODULE,
+		.pm	= &ipp_pm_ops,
+	},
+};
+

diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
new file mode 100644
index 0000000..28ffac9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h

@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *	Jinyoung Jeon <jy0.jeon@samsung.com>
+ *	Sangmin Lee <lsmin.lee@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_IPP_H_
+#define _EXYNOS_DRM_IPP_H_
+
+#define for_each_ipp_ops(pos)	\
+	for (pos = 0; pos < EXYNOS_DRM_OPS_MAX; pos++)
+#define for_each_ipp_planar(pos)	\
+	for (pos = 0; pos < EXYNOS_DRM_PLANAR_MAX; pos++)
+
+#define IPP_GET_LCD_WIDTH	_IOR('F', 302, int)
+#define IPP_GET_LCD_HEIGHT	_IOR('F', 303, int)
+#define IPP_SET_WRITEBACK	_IOW('F', 304, u32)
+
+/* definition of state */
+enum drm_exynos_ipp_state {
+	IPP_STATE_IDLE,
+	IPP_STATE_START,
+	IPP_STATE_STOP,
+};
+
+/*
+ * A structure of command work information.
+ * @work: work structure.
+ * @ippdrv: current work ippdrv.
+ * @c_node: command node information.
+ * @ctrl: command control.
+ */
+struct drm_exynos_ipp_cmd_work {
+	struct work_struct	work;
+	struct exynos_drm_ippdrv	*ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node;
+	enum drm_exynos_ipp_ctrl	ctrl;
+};
+
+/*
+ * A structure of command node.
+ *
+ * @priv: IPP private infomation.
+ * @list: list head to command queue information.
+ * @event_list: list head of event.
+ * @mem_list: list head to source,destination memory queue information.
+ * @cmd_lock: lock for synchronization of access to ioctl.
+ * @mem_lock: lock for synchronization of access to memory nodes.
+ * @event_lock: lock for synchronization of access to scheduled event.
+ * @start_complete: completion of start of command.
+ * @stop_complete: completion of stop of command.
+ * @property: property information.
+ * @start_work: start command work structure.
+ * @stop_work: stop command work structure.
+ * @event_work: event work structure.
+ * @state: state of command node.
+ */
+struct drm_exynos_ipp_cmd_node {
+	struct exynos_drm_ipp_private *priv;
+	struct list_head	list;
+	struct list_head	event_list;
+	struct list_head	mem_list[EXYNOS_DRM_OPS_MAX];
+	struct mutex	cmd_lock;
+	struct mutex	mem_lock;
+	struct mutex	event_lock;
+	struct completion	start_complete;
+	struct completion	stop_complete;
+	struct drm_exynos_ipp_property	property;
+	struct drm_exynos_ipp_cmd_work *start_work;
+	struct drm_exynos_ipp_cmd_work *stop_work;
+	struct drm_exynos_ipp_event_work *event_work;
+	enum drm_exynos_ipp_state	state;
+};
+
+/*
+ * A structure of buffer information.
+ *
+ * @gem_objs: Y, Cb, Cr each gem object.
+ * @base: Y, Cb, Cr each planar address.
+ */
+struct drm_exynos_ipp_buf_info {
+	unsigned long	handles[EXYNOS_DRM_PLANAR_MAX];
+	dma_addr_t	base[EXYNOS_DRM_PLANAR_MAX];
+};
+
+/*
+ * A structure of wb setting infomation.
+ *
+ * @enable: enable flag for wb.
+ * @refresh: HZ of the refresh rate.
+ */
+struct drm_exynos_ipp_set_wb {
+	__u32	enable;
+	__u32	refresh;
+};
+
+/*
+ * A structure of event work information.
+ *
+ * @work: work structure.
+ * @ippdrv: current work ippdrv.
+ * @buf_id: id of src, dst buffer.
+ */
+struct drm_exynos_ipp_event_work {
+	struct work_struct	work;
+	struct exynos_drm_ippdrv *ippdrv;
+	u32	buf_id[EXYNOS_DRM_OPS_MAX];
+};
+
+/*
+ * A structure of source,destination operations.
+ *
+ * @set_fmt: set format of image.
+ * @set_transf: set transform(rotations, flip).
+ * @set_size: set size of region.
+ * @set_addr: set address for dma.
+ */
+struct exynos_drm_ipp_ops {
+	int (*set_fmt)(struct device *dev, u32 fmt);
+	int (*set_transf)(struct device *dev,
+		enum drm_exynos_degree degree,
+		enum drm_exynos_flip flip, bool *swap);
+	int (*set_size)(struct device *dev, int swap,
+		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz);
+	int (*set_addr)(struct device *dev,
+		 struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
+		enum drm_exynos_ipp_buf_type buf_type);
+};
+
+/*
+ * A structure of ipp driver.
+ *
+ * @drv_list: list head for registed sub driver information.
+ * @parent_dev: parent device information.
+ * @dev: platform device.
+ * @drm_dev: drm device.
+ * @ipp_id: id of ipp driver.
+ * @dedicated: dedicated ipp device.
+ * @ops: source, destination operations.
+ * @event_workq: event work queue.
+ * @cmd: current command information.
+ * @cmd_list: list head for command information.
+ * @prop_list: property informations of current ipp driver.
+ * @check_property: check property about format, size, buffer.
+ * @reset: reset ipp block.
+ * @start: ipp each device start.
+ * @stop: ipp each device stop.
+ * @sched_event: work schedule handler.
+ */
+struct exynos_drm_ippdrv {
+	struct list_head	drv_list;
+	struct device	*parent_dev;
+	struct device	*dev;
+	struct drm_device	*drm_dev;
+	u32	ipp_id;
+	bool	dedicated;
+	struct exynos_drm_ipp_ops	*ops[EXYNOS_DRM_OPS_MAX];
+	struct workqueue_struct	*event_workq;
+	struct drm_exynos_ipp_cmd_node *cmd;
+	struct list_head	cmd_list;
+	struct drm_exynos_ipp_prop_list *prop_list;
+
+	int (*check_property)(struct device *dev,
+		struct drm_exynos_ipp_property *property);
+	int (*reset)(struct device *dev);
+	int (*start)(struct device *dev, enum drm_exynos_ipp_cmd cmd);
+	void (*stop)(struct device *dev, enum drm_exynos_ipp_cmd cmd);
+	void (*sched_event)(struct work_struct *work);
+};
+
+#ifdef CONFIG_DRM_EXYNOS_IPP
+extern int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv);
+extern int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv);
+extern int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data,
+					 struct drm_file *file);
+extern int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
+					 struct drm_file *file);
+extern int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data,
+					 struct drm_file *file);
+extern int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data,
+					 struct drm_file *file);
+extern int exynos_drm_ippnb_register(struct notifier_block *nb);
+extern int exynos_drm_ippnb_unregister(struct notifier_block *nb);
+extern int exynos_drm_ippnb_send_event(unsigned long val, void *v);
+extern void ipp_sched_cmd(struct work_struct *work);
+extern void ipp_sched_event(struct work_struct *work);
+
+#else
+static inline int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv)
+{
+	return -ENODEV;
+}
+
+static inline int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv)
+{
+	return -ENODEV;
+}
+
+static inline int exynos_drm_ipp_get_property(struct drm_device *drm_dev,
+						void *data,
+						struct drm_file *file_priv)
+{
+	return -ENOTTY;
+}
+
+static inline int exynos_drm_ipp_set_property(struct drm_device *drm_dev,
+						void *data,
+						struct drm_file *file_priv)
+{
+	return -ENOTTY;
+}
+
+static inline int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev,
+						void *data,
+						struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+static inline int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev,
+						void *data,
+						struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+static inline int exynos_drm_ippnb_register(struct notifier_block *nb)
+{
+	return -ENODEV;
+}
+
+static inline int exynos_drm_ippnb_unregister(struct notifier_block *nb)
+{
+	return -ENODEV;
+}
+
+static inline int exynos_drm_ippnb_send_event(unsigned long val, void *v)
+{
+	return -ENOTTY;
+}
+#endif
+
+#endif /* _EXYNOS_DRM_IPP_H_ */
+

diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index 862ca1e..83efc66 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c

@@ -40,7 +40,7 @@
  * CRTC ----------------
  *      ^ start        ^ end
  *
- * There are six cases from a to b.
+ * There are six cases from a to f.
  *
  *             <----- SCREEN ----->
  *             0                 last
@@ -93,11 +93,9 @@
 		}
 
 		overlay->dma_addr[i] = buffer->dma_addr;
-		overlay->vaddr[i] = buffer->kvaddr;
 
-		DRM_DEBUG_KMS("buffer: %d, vaddr = 0x%lx, dma_addr = 0x%lx\n",
-				i, (unsigned long)overlay->vaddr[i],
-				(unsigned long)overlay->dma_addr[i]);
+		DRM_DEBUG_KMS("buffer: %d, dma_addr = 0x%lx\n",
+				i, (unsigned long)overlay->dma_addr[i]);
 	}
 
 	actual_w = exynos_plane_get_size(crtc_x, crtc_w, crtc->mode.hdisplay);
@@ -106,16 +104,12 @@
 	if (crtc_x < 0) {
 		if (actual_w)
 			src_x -= crtc_x;
-		else
-			src_x += crtc_w;
 		crtc_x = 0;
 	}
 
 	if (crtc_y < 0) {
 		if (actual_h)
 			src_y -= crtc_y;
-		else
-			src_y += crtc_h;
 		crtc_y = 0;
 	}
 

diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
new file mode 100644
index 0000000..1c23660
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c

@@ -0,0 +1,855 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ * Authors:
+ *	YoungJun Cho <yj44.cho@samsung.com>
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundationr
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drmP.h>
+#include <drm/exynos_drm.h>
+#include "regs-rotator.h"
+#include "exynos_drm.h"
+#include "exynos_drm_ipp.h"
+
+/*
+ * Rotator supports image crop/rotator and input/output DMA operations.
+ * input DMA reads image data from the memory.
+ * output DMA writes image data to memory.
+ *
+ * M2M operation : supports crop/scale/rotation/csc so on.
+ * Memory ----> Rotator H/W ----> Memory.
+ */
+
+/*
+ * TODO
+ * 1. check suspend/resume api if needed.
+ * 2. need to check use case platform_device_id.
+ * 3. check src/dst size with, height.
+ * 4. need to add supported list in prop_list.
+ */
+
+#define get_rot_context(dev)	platform_get_drvdata(to_platform_device(dev))
+#define get_ctx_from_ippdrv(ippdrv)	container_of(ippdrv,\
+					struct rot_context, ippdrv);
+#define rot_read(offset)		readl(rot->regs + (offset))
+#define rot_write(cfg, offset)	writel(cfg, rot->regs + (offset))
+
+enum rot_irq_status {
+	ROT_IRQ_STATUS_COMPLETE	= 8,
+	ROT_IRQ_STATUS_ILLEGAL	= 9,
+};
+
+/*
+ * A structure of limitation.
+ *
+ * @min_w: minimum width.
+ * @min_h: minimum height.
+ * @max_w: maximum width.
+ * @max_h: maximum height.
+ * @align: align size.
+ */
+struct rot_limit {
+	u32	min_w;
+	u32	min_h;
+	u32	max_w;
+	u32	max_h;
+	u32	align;
+};
+
+/*
+ * A structure of limitation table.
+ *
+ * @ycbcr420_2p: case of YUV.
+ * @rgb888: case of RGB.
+ */
+struct rot_limit_table {
+	struct rot_limit	ycbcr420_2p;
+	struct rot_limit	rgb888;
+};
+
+/*
+ * A structure of rotator context.
+ * @ippdrv: prepare initialization using ippdrv.
+ * @regs_res: register resources.
+ * @regs: memory mapped io registers.
+ * @clock: rotator gate clock.
+ * @limit_tbl: limitation of rotator.
+ * @irq: irq number.
+ * @cur_buf_id: current operation buffer id.
+ * @suspended: suspended state.
+ */
+struct rot_context {
+	struct exynos_drm_ippdrv	ippdrv;
+	struct resource	*regs_res;
+	void __iomem	*regs;
+	struct clk	*clock;
+	struct rot_limit_table	*limit_tbl;
+	int	irq;
+	int	cur_buf_id[EXYNOS_DRM_OPS_MAX];
+	bool	suspended;
+};
+
+static void rotator_reg_set_irq(struct rot_context *rot, bool enable)
+{
+	u32 val = rot_read(ROT_CONFIG);
+
+	if (enable == true)
+		val |= ROT_CONFIG_IRQ;
+	else
+		val &= ~ROT_CONFIG_IRQ;
+
+	rot_write(val, ROT_CONFIG);
+}
+
+static u32 rotator_reg_get_fmt(struct rot_context *rot)
+{
+	u32 val = rot_read(ROT_CONTROL);
+
+	val &= ROT_CONTROL_FMT_MASK;
+
+	return val;
+}
+
+static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot)
+{
+	u32 val = rot_read(ROT_STATUS);
+
+	val = ROT_STATUS_IRQ(val);
+
+	if (val == ROT_STATUS_IRQ_VAL_COMPLETE)
+		return ROT_IRQ_STATUS_COMPLETE;
+
+	return ROT_IRQ_STATUS_ILLEGAL;
+}
+
+static irqreturn_t rotator_irq_handler(int irq, void *arg)
+{
+	struct rot_context *rot = arg;
+	struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
+	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->cmd;
+	struct drm_exynos_ipp_event_work *event_work = c_node->event_work;
+	enum rot_irq_status irq_status;
+	u32 val;
+
+	/* Get execution result */
+	irq_status = rotator_reg_get_irq_status(rot);
+
+	/* clear status */
+	val = rot_read(ROT_STATUS);
+	val |= ROT_STATUS_IRQ_PENDING((u32)irq_status);
+	rot_write(val, ROT_STATUS);
+
+	if (irq_status == ROT_IRQ_STATUS_COMPLETE) {
+		event_work->ippdrv = ippdrv;
+		event_work->buf_id[EXYNOS_DRM_OPS_DST] =
+			rot->cur_buf_id[EXYNOS_DRM_OPS_DST];
+		queue_work(ippdrv->event_workq,
+			(struct work_struct *)event_work);
+	} else
+		DRM_ERROR("the SFR is set illegally\n");
+
+	return IRQ_HANDLED;
+}
+
+static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize,
+		u32 *vsize)
+{
+	struct rot_limit_table *limit_tbl = rot->limit_tbl;
+	struct rot_limit *limit;
+	u32 mask, val;
+
+	/* Get size limit */
+	if (fmt == ROT_CONTROL_FMT_RGB888)
+		limit = &limit_tbl->rgb888;
+	else
+		limit = &limit_tbl->ycbcr420_2p;
+
+	/* Get mask for rounding to nearest aligned val */
+	mask = ~((1 << limit->align) - 1);
+
+	/* Set aligned width */
+	val = ROT_ALIGN(*hsize, limit->align, mask);
+	if (val < limit->min_w)
+		*hsize = ROT_MIN(limit->min_w, mask);
+	else if (val > limit->max_w)
+		*hsize = ROT_MAX(limit->max_w, mask);
+	else
+		*hsize = val;
+
+	/* Set aligned height */
+	val = ROT_ALIGN(*vsize, limit->align, mask);
+	if (val < limit->min_h)
+		*vsize = ROT_MIN(limit->min_h, mask);
+	else if (val > limit->max_h)
+		*vsize = ROT_MAX(limit->max_h, mask);
+	else
+		*vsize = val;
+}
+
+static int rotator_src_set_fmt(struct device *dev, u32 fmt)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	u32 val;
+
+	val = rot_read(ROT_CONTROL);
+	val &= ~ROT_CONTROL_FMT_MASK;
+
+	switch (fmt) {
+	case DRM_FORMAT_NV12:
+		val |= ROT_CONTROL_FMT_YCBCR420_2P;
+		break;
+	case DRM_FORMAT_XRGB8888:
+		val |= ROT_CONTROL_FMT_RGB888;
+		break;
+	default:
+		DRM_ERROR("invalid image format\n");
+		return -EINVAL;
+	}
+
+	rot_write(val, ROT_CONTROL);
+
+	return 0;
+}
+
+static inline bool rotator_check_reg_fmt(u32 fmt)
+{
+	if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) ||
+	    (fmt == ROT_CONTROL_FMT_RGB888))
+		return true;
+
+	return false;
+}
+
+static int rotator_src_set_size(struct device *dev, int swap,
+		struct drm_exynos_pos *pos,
+		struct drm_exynos_sz *sz)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	u32 fmt, hsize, vsize;
+	u32 val;
+
+	/* Get format */
+	fmt = rotator_reg_get_fmt(rot);
+	if (!rotator_check_reg_fmt(fmt)) {
+		DRM_ERROR("%s:invalid format.\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Align buffer size */
+	hsize = sz->hsize;
+	vsize = sz->vsize;
+	rotator_align_size(rot, fmt, &hsize, &vsize);
+
+	/* Set buffer size configuration */
+	val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+	rot_write(val, ROT_SRC_BUF_SIZE);
+
+	/* Set crop image position configuration */
+	val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+	rot_write(val, ROT_SRC_CROP_POS);
+	val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w);
+	rot_write(val, ROT_SRC_CROP_SIZE);
+
+	return 0;
+}
+
+static int rotator_src_set_addr(struct device *dev,
+		struct drm_exynos_ipp_buf_info *buf_info,
+		u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
+	u32 val, fmt, hsize, vsize;
+	int i;
+
+	/* Set current buf_id */
+	rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id;
+
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		/* Set address configuration */
+		for_each_ipp_planar(i)
+			addr[i] = buf_info->base[i];
+
+		/* Get format */
+		fmt = rotator_reg_get_fmt(rot);
+		if (!rotator_check_reg_fmt(fmt)) {
+			DRM_ERROR("%s:invalid format.\n", __func__);
+			return -EINVAL;
+		}
+
+		/* Re-set cb planar for NV12 format */
+		if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
+		    !addr[EXYNOS_DRM_PLANAR_CB]) {
+
+			val = rot_read(ROT_SRC_BUF_SIZE);
+			hsize = ROT_GET_BUF_SIZE_W(val);
+			vsize = ROT_GET_BUF_SIZE_H(val);
+
+			/* Set cb planar */
+			addr[EXYNOS_DRM_PLANAR_CB] =
+				addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
+		}
+
+		for_each_ipp_planar(i)
+			rot_write(addr[i], ROT_SRC_BUF_ADDR(i));
+		break;
+	case IPP_BUF_DEQUEUE:
+		for_each_ipp_planar(i)
+			rot_write(0x0, ROT_SRC_BUF_ADDR(i));
+		break;
+	default:
+		/* Nothing to do */
+		break;
+	}
+
+	return 0;
+}
+
+static int rotator_dst_set_transf(struct device *dev,
+		enum drm_exynos_degree degree,
+		enum drm_exynos_flip flip, bool *swap)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	u32 val;
+
+	/* Set transform configuration */
+	val = rot_read(ROT_CONTROL);
+	val &= ~ROT_CONTROL_FLIP_MASK;
+
+	switch (flip) {
+	case EXYNOS_DRM_FLIP_VERTICAL:
+		val |= ROT_CONTROL_FLIP_VERTICAL;
+		break;
+	case EXYNOS_DRM_FLIP_HORIZONTAL:
+		val |= ROT_CONTROL_FLIP_HORIZONTAL;
+		break;
+	default:
+		/* Flip None */
+		break;
+	}
+
+	val &= ~ROT_CONTROL_ROT_MASK;
+
+	switch (degree) {
+	case EXYNOS_DRM_DEGREE_90:
+		val |= ROT_CONTROL_ROT_90;
+		break;
+	case EXYNOS_DRM_DEGREE_180:
+		val |= ROT_CONTROL_ROT_180;
+		break;
+	case EXYNOS_DRM_DEGREE_270:
+		val |= ROT_CONTROL_ROT_270;
+		break;
+	default:
+		/* Rotation 0 Degree */
+		break;
+	}
+
+	rot_write(val, ROT_CONTROL);
+
+	/* Check degree for setting buffer size swap */
+	if ((degree == EXYNOS_DRM_DEGREE_90) ||
+	    (degree == EXYNOS_DRM_DEGREE_270))
+		*swap = true;
+	else
+		*swap = false;
+
+	return 0;
+}
+
+static int rotator_dst_set_size(struct device *dev, int swap,
+		struct drm_exynos_pos *pos,
+		struct drm_exynos_sz *sz)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	u32 val, fmt, hsize, vsize;
+
+	/* Get format */
+	fmt = rotator_reg_get_fmt(rot);
+	if (!rotator_check_reg_fmt(fmt)) {
+		DRM_ERROR("%s:invalid format.\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Align buffer size */
+	hsize = sz->hsize;
+	vsize = sz->vsize;
+	rotator_align_size(rot, fmt, &hsize, &vsize);
+
+	/* Set buffer size configuration */
+	val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+	rot_write(val, ROT_DST_BUF_SIZE);
+
+	/* Set crop image position configuration */
+	val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+	rot_write(val, ROT_DST_CROP_POS);
+
+	return 0;
+}
+
+static int rotator_dst_set_addr(struct device *dev,
+		struct drm_exynos_ipp_buf_info *buf_info,
+		u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
+	u32 val, fmt, hsize, vsize;
+	int i;
+
+	/* Set current buf_id */
+	rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
+
+	switch (buf_type) {
+	case IPP_BUF_ENQUEUE:
+		/* Set address configuration */
+		for_each_ipp_planar(i)
+			addr[i] = buf_info->base[i];
+
+		/* Get format */
+		fmt = rotator_reg_get_fmt(rot);
+		if (!rotator_check_reg_fmt(fmt)) {
+			DRM_ERROR("%s:invalid format.\n", __func__);
+			return -EINVAL;
+		}
+
+		/* Re-set cb planar for NV12 format */
+		if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
+		    !addr[EXYNOS_DRM_PLANAR_CB]) {
+			/* Get buf size */
+			val = rot_read(ROT_DST_BUF_SIZE);
+
+			hsize = ROT_GET_BUF_SIZE_W(val);
+			vsize = ROT_GET_BUF_SIZE_H(val);
+
+			/* Set cb planar */
+			addr[EXYNOS_DRM_PLANAR_CB] =
+				addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
+		}
+
+		for_each_ipp_planar(i)
+			rot_write(addr[i], ROT_DST_BUF_ADDR(i));
+		break;
+	case IPP_BUF_DEQUEUE:
+		for_each_ipp_planar(i)
+			rot_write(0x0, ROT_DST_BUF_ADDR(i));
+		break;
+	default:
+		/* Nothing to do */
+		break;
+	}
+
+	return 0;
+}
+
+static struct exynos_drm_ipp_ops rot_src_ops = {
+	.set_fmt	=	rotator_src_set_fmt,
+	.set_size	=	rotator_src_set_size,
+	.set_addr	=	rotator_src_set_addr,
+};
+
+static struct exynos_drm_ipp_ops rot_dst_ops = {
+	.set_transf	=	rotator_dst_set_transf,
+	.set_size	=	rotator_dst_set_size,
+	.set_addr	=	rotator_dst_set_addr,
+};
+
+static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
+{
+	struct drm_exynos_ipp_prop_list *prop_list;
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	prop_list = devm_kzalloc(ippdrv->dev, sizeof(*prop_list), GFP_KERNEL);
+	if (!prop_list) {
+		DRM_ERROR("failed to alloc property list.\n");
+		return -ENOMEM;
+	}
+
+	prop_list->version = 1;
+	prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
+				(1 << EXYNOS_DRM_FLIP_HORIZONTAL);
+	prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
+				(1 << EXYNOS_DRM_DEGREE_90) |
+				(1 << EXYNOS_DRM_DEGREE_180) |
+				(1 << EXYNOS_DRM_DEGREE_270);
+	prop_list->csc = 0;
+	prop_list->crop = 0;
+	prop_list->scale = 0;
+
+	ippdrv->prop_list = prop_list;
+
+	return 0;
+}
+
+static inline bool rotator_check_drm_fmt(u32 fmt)
+{
+	switch (fmt) {
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_NV12:
+		return true;
+	default:
+		DRM_DEBUG_KMS("%s:not support format\n", __func__);
+		return false;
+	}
+}
+
+static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip)
+{
+	switch (flip) {
+	case EXYNOS_DRM_FLIP_NONE:
+	case EXYNOS_DRM_FLIP_VERTICAL:
+	case EXYNOS_DRM_FLIP_HORIZONTAL:
+		return true;
+	default:
+		DRM_DEBUG_KMS("%s:invalid flip\n", __func__);
+		return false;
+	}
+}
+
+static int rotator_ippdrv_check_property(struct device *dev,
+		struct drm_exynos_ipp_property *property)
+{
+	struct drm_exynos_ipp_config *src_config =
+					&property->config[EXYNOS_DRM_OPS_SRC];
+	struct drm_exynos_ipp_config *dst_config =
+					&property->config[EXYNOS_DRM_OPS_DST];
+	struct drm_exynos_pos *src_pos = &src_config->pos;
+	struct drm_exynos_pos *dst_pos = &dst_config->pos;
+	struct drm_exynos_sz *src_sz = &src_config->sz;
+	struct drm_exynos_sz *dst_sz = &dst_config->sz;
+	bool swap = false;
+
+	/* Check format configuration */
+	if (src_config->fmt != dst_config->fmt) {
+		DRM_DEBUG_KMS("%s:not support csc feature\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!rotator_check_drm_fmt(dst_config->fmt)) {
+		DRM_DEBUG_KMS("%s:invalid format\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Check transform configuration */
+	if (src_config->degree != EXYNOS_DRM_DEGREE_0) {
+		DRM_DEBUG_KMS("%s:not support source-side rotation\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	switch (dst_config->degree) {
+	case EXYNOS_DRM_DEGREE_90:
+	case EXYNOS_DRM_DEGREE_270:
+		swap = true;
+	case EXYNOS_DRM_DEGREE_0:
+	case EXYNOS_DRM_DEGREE_180:
+		/* No problem */
+		break;
+	default:
+		DRM_DEBUG_KMS("%s:invalid degree\n", __func__);
+		return -EINVAL;
+	}
+
+	if (src_config->flip != EXYNOS_DRM_FLIP_NONE) {
+		DRM_DEBUG_KMS("%s:not support source-side flip\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!rotator_check_drm_flip(dst_config->flip)) {
+		DRM_DEBUG_KMS("%s:invalid flip\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Check size configuration */
+	if ((src_pos->x + src_pos->w > src_sz->hsize) ||
+		(src_pos->y + src_pos->h > src_sz->vsize)) {
+		DRM_DEBUG_KMS("%s:out of source buffer bound\n", __func__);
+		return -EINVAL;
+	}
+
+	if (swap) {
+		if ((dst_pos->x + dst_pos->h > dst_sz->vsize) ||
+			(dst_pos->y + dst_pos->w > dst_sz->hsize)) {
+			DRM_DEBUG_KMS("%s:out of destination buffer bound\n",
+				__func__);
+			return -EINVAL;
+		}
+
+		if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) {
+			DRM_DEBUG_KMS("%s:not support scale feature\n",
+				__func__);
+			return -EINVAL;
+		}
+	} else {
+		if ((dst_pos->x + dst_pos->w > dst_sz->hsize) ||
+			(dst_pos->y + dst_pos->h > dst_sz->vsize)) {
+			DRM_DEBUG_KMS("%s:out of destination buffer bound\n",
+				__func__);
+			return -EINVAL;
+		}
+
+		if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) {
+			DRM_DEBUG_KMS("%s:not support scale feature\n",
+				__func__);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	u32 val;
+
+	if (rot->suspended) {
+		DRM_ERROR("suspended state\n");
+		return -EPERM;
+	}
+
+	if (cmd != IPP_CMD_M2M) {
+		DRM_ERROR("not support cmd: %d\n", cmd);
+		return -EINVAL;
+	}
+
+	/* Set interrupt enable */
+	rotator_reg_set_irq(rot, true);
+
+	val = rot_read(ROT_CONTROL);
+	val |= ROT_CONTROL_START;
+
+	rot_write(val, ROT_CONTROL);
+
+	return 0;
+}
+
+static int __devinit rotator_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rot_context *rot;
+	struct exynos_drm_ippdrv *ippdrv;
+	int ret;
+
+	rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL);
+	if (!rot) {
+		dev_err(dev, "failed to allocate rot\n");
+		return -ENOMEM;
+	}
+
+	rot->limit_tbl = (struct rot_limit_table *)
+				platform_get_device_id(pdev)->driver_data;
+
+	rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!rot->regs_res) {
+		dev_err(dev, "failed to find registers\n");
+		ret = -ENOENT;
+		goto err_get_resource;
+	}
+
+	rot->regs = devm_request_and_ioremap(dev, rot->regs_res);
+	if (!rot->regs) {
+		dev_err(dev, "failed to map register\n");
+		ret = -ENXIO;
+		goto err_get_resource;
+	}
+
+	rot->irq = platform_get_irq(pdev, 0);
+	if (rot->irq < 0) {
+		dev_err(dev, "failed to get irq\n");
+		ret = rot->irq;
+		goto err_get_irq;
+	}
+
+	ret = request_threaded_irq(rot->irq, NULL, rotator_irq_handler,
+			IRQF_ONESHOT, "drm_rotator", rot);
+	if (ret < 0) {
+		dev_err(dev, "failed to request irq\n");
+		goto err_get_irq;
+	}
+
+	rot->clock = clk_get(dev, "rotator");
+	if (IS_ERR_OR_NULL(rot->clock)) {
+		dev_err(dev, "failed to get clock\n");
+		ret = PTR_ERR(rot->clock);
+		goto err_clk_get;
+	}
+
+	pm_runtime_enable(dev);
+
+	ippdrv = &rot->ippdrv;
+	ippdrv->dev = dev;
+	ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops;
+	ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops;
+	ippdrv->check_property = rotator_ippdrv_check_property;
+	ippdrv->start = rotator_ippdrv_start;
+	ret = rotator_init_prop_list(ippdrv);
+	if (ret < 0) {
+		dev_err(dev, "failed to init property list.\n");
+		goto err_ippdrv_register;
+	}
+
+	DRM_DEBUG_KMS("%s:ippdrv[0x%x]\n", __func__, (int)ippdrv);
+
+	platform_set_drvdata(pdev, rot);
+
+	ret = exynos_drm_ippdrv_register(ippdrv);
+	if (ret < 0) {
+		dev_err(dev, "failed to register drm rotator device\n");
+		goto err_ippdrv_register;
+	}
+
+	dev_info(dev, "The exynos rotator is probed successfully\n");
+
+	return 0;
+
+err_ippdrv_register:
+	devm_kfree(dev, ippdrv->prop_list);
+	pm_runtime_disable(dev);
+	clk_put(rot->clock);
+err_clk_get:
+	free_irq(rot->irq, rot);
+err_get_irq:
+	devm_iounmap(dev, rot->regs);
+err_get_resource:
+	devm_kfree(dev, rot);
+	return ret;
+}
+
+static int __devexit rotator_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rot_context *rot = dev_get_drvdata(dev);
+	struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
+
+	devm_kfree(dev, ippdrv->prop_list);
+	exynos_drm_ippdrv_unregister(ippdrv);
+
+	pm_runtime_disable(dev);
+	clk_put(rot->clock);
+
+	free_irq(rot->irq, rot);
+	devm_iounmap(dev, rot->regs);
+
+	devm_kfree(dev, rot);
+
+	return 0;
+}
+
+struct rot_limit_table rot_limit_tbl = {
+	.ycbcr420_2p = {
+		.min_w = 32,
+		.min_h = 32,
+		.max_w = SZ_32K,
+		.max_h = SZ_32K,
+		.align = 3,
+	},
+	.rgb888 = {
+		.min_w = 8,
+		.min_h = 8,
+		.max_w = SZ_8K,
+		.max_h = SZ_8K,
+		.align = 2,
+	},
+};
+
+struct platform_device_id rotator_driver_ids[] = {
+	{
+		.name		= "exynos-rot",
+		.driver_data	= (unsigned long)&rot_limit_tbl,
+	},
+	{},
+};
+
+static int rotator_clk_crtl(struct rot_context *rot, bool enable)
+{
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (enable) {
+		clk_enable(rot->clock);
+		rot->suspended = false;
+	} else {
+		clk_disable(rot->clock);
+		rot->suspended = true;
+	}
+
+	return 0;
+}
+
+
+#ifdef CONFIG_PM_SLEEP
+static int rotator_suspend(struct device *dev)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	return rotator_clk_crtl(rot, false);
+}
+
+static int rotator_resume(struct device *dev)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	if (!pm_runtime_suspended(dev))
+		return rotator_clk_crtl(rot, true);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+static int rotator_runtime_suspend(struct device *dev)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	return  rotator_clk_crtl(rot, false);
+}
+
+static int rotator_runtime_resume(struct device *dev)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+
+	DRM_DEBUG_KMS("%s\n", __func__);
+
+	return  rotator_clk_crtl(rot, true);
+}
+#endif
+
+static const struct dev_pm_ops rotator_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(rotator_suspend, rotator_resume)
+	SET_RUNTIME_PM_OPS(rotator_runtime_suspend, rotator_runtime_resume,
+									NULL)
+};
+
+struct platform_driver rotator_driver = {
+	.probe		= rotator_probe,
+	.remove		= __devexit_p(rotator_remove),
+	.id_table	= rotator_driver_ids,
+	.driver		= {
+		.name	= "exynos-rot",
+		.owner	= THIS_MODULE,
+		.pm	= &rotator_pm_ops,
+	},
+};

diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.h b/drivers/gpu/drm/exynos/exynos_drm_rotator.h
new file mode 100644
index 0000000..a2d7a14
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.h

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	YoungJun Cho <yj44.cho@samsung.com>
+ *	Eunchul Kim <chulspro.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef	_EXYNOS_DRM_ROTATOR_H_
+#define	_EXYNOS_DRM_ROTATOR_H_
+
+/* TODO */
+
+#endif

diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index e4b8a8f..99bfc38 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c

@@ -39,7 +39,6 @@
 	unsigned int		fb_height;
 	unsigned int		bpp;
 	dma_addr_t		dma_addr;
-	void __iomem		*vaddr;
 	unsigned int		buf_offsize;
 	unsigned int		line_size;	/* bytes */
 	bool			enabled;
@@ -294,7 +293,6 @@
 	win_data->fb_width = overlay->fb_width;
 	win_data->fb_height = overlay->fb_height;
 	win_data->dma_addr = overlay->dma_addr[0] + offset;
-	win_data->vaddr = overlay->vaddr[0] + offset;
 	win_data->bpp = overlay->bpp;
 	win_data->buf_offsize = (overlay->fb_width - overlay->crtc_width) *
 				(overlay->bpp >> 3);
@@ -309,9 +307,7 @@
 			win_data->offset_x, win_data->offset_y);
 	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
 			win_data->ovl_width, win_data->ovl_height);
-	DRM_DEBUG_KMS("paddr = 0x%lx, vaddr = 0x%lx\n",
-			(unsigned long)win_data->dma_addr,
-			(unsigned long)win_data->vaddr);
+	DRM_DEBUG_KMS("paddr = 0x%lx\n", (unsigned long)win_data->dma_addr);
 	DRM_DEBUG_KMS("fb_width = %d, crtc_width = %d\n",
 			overlay->fb_width, overlay->crtc_width);
 }
@@ -382,7 +378,6 @@
 	struct drm_pending_vblank_event *e, *t;
 	struct timeval now;
 	unsigned long flags;
-	bool is_checked = false;
 
 	spin_lock_irqsave(&drm_dev->event_lock, flags);
 
@@ -392,8 +387,6 @@
 		if (crtc != e->pipe)
 			continue;
 
-		is_checked = true;
-
 		do_gettimeofday(&now);
 		e->event.sequence = 0;
 		e->event.tv_sec = now.tv_sec;
@@ -401,22 +394,7 @@
 
 		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
 		wake_up_interruptible(&e->base.file_priv->event_wait);
-	}
-
-	if (is_checked) {
-		/*
-		 * call drm_vblank_put only in case that drm_vblank_get was
-		 * called.
-		 */
-		if (atomic_read(&drm_dev->vblank_refcount[crtc]) > 0)
-			drm_vblank_put(drm_dev, crtc);
-
-		/*
-		 * don't off vblank if vblank_disable_allowed is 1,
-		 * because vblank would be off by timer handler.
-		 */
-		if (!drm_dev->vblank_disable_allowed)
-			drm_vblank_off(drm_dev, crtc);
+		drm_vblank_put(drm_dev, crtc);
 	}
 
 	spin_unlock_irqrestore(&drm_dev->event_lock, flags);

diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 2c115f8..2c46b6c 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c

@@ -50,6 +50,29 @@
 #define MAX_HEIGHT		1080
 #define get_hdmi_context(dev)	platform_get_drvdata(to_platform_device(dev))
 
+/* AVI header and aspect ratio */
+#define HDMI_AVI_VERSION		0x02
+#define HDMI_AVI_LENGTH		0x0D
+#define AVI_PIC_ASPECT_RATIO_16_9	(2 << 4)
+#define AVI_SAME_AS_PIC_ASPECT_RATIO	8
+
+/* AUI header info */
+#define HDMI_AUI_VERSION	0x01
+#define HDMI_AUI_LENGTH	0x0A
+
+/* HDMI infoframe to configure HDMI out packet header, AUI and AVI */
+enum HDMI_PACKET_TYPE {
+	/* refer to Table 5-8 Packet Type in HDMI specification v1.4a */
+	/* InfoFrame packet type */
+	HDMI_PACKET_TYPE_INFOFRAME = 0x80,
+	/* Vendor-Specific InfoFrame */
+	HDMI_PACKET_TYPE_VSI = HDMI_PACKET_TYPE_INFOFRAME + 1,
+	/* Auxiliary Video information InfoFrame */
+	HDMI_PACKET_TYPE_AVI = HDMI_PACKET_TYPE_INFOFRAME + 2,
+	/* Audio information InfoFrame */
+	HDMI_PACKET_TYPE_AUI = HDMI_PACKET_TYPE_INFOFRAME + 4
+};
+
 enum hdmi_type {
 	HDMI_TYPE13,
 	HDMI_TYPE14,
@@ -74,6 +97,7 @@
 	struct mutex			hdmi_mutex;
 
 	void __iomem			*regs;
+	void				*parent_ctx;
 	int				external_irq;
 	int				internal_irq;
 
@@ -84,7 +108,6 @@
 	int cur_conf;
 
 	struct hdmi_resources		res;
-	void				*parent_ctx;
 
 	int				hpd_gpio;
 
@@ -182,6 +205,7 @@
 	int height;
 	int vrefresh;
 	bool interlace;
+	int cea_video_id;
 	const u8 *hdmiphy_data;
 	const struct hdmi_v13_preset_conf *conf;
 };
@@ -353,15 +377,20 @@
 };
 
 static const struct hdmi_v13_conf hdmi_v13_confs[] = {
-	{ 1280, 720, 60, false, hdmiphy_v13_conf74_25, &hdmi_v13_conf_720p60 },
-	{ 1280, 720, 50, false, hdmiphy_v13_conf74_25, &hdmi_v13_conf_720p60 },
-	{ 720, 480, 60, false, hdmiphy_v13_conf27_027, &hdmi_v13_conf_480p },
-	{ 1920, 1080, 50, true, hdmiphy_v13_conf74_25, &hdmi_v13_conf_1080i50 },
-	{ 1920, 1080, 50, false, hdmiphy_v13_conf148_5,
-				 &hdmi_v13_conf_1080p50 },
-	{ 1920, 1080, 60, true, hdmiphy_v13_conf74_25, &hdmi_v13_conf_1080i60 },
-	{ 1920, 1080, 60, false, hdmiphy_v13_conf148_5,
-				 &hdmi_v13_conf_1080p60 },
+	{ 1280, 720, 60, false, 4, hdmiphy_v13_conf74_25,
+			&hdmi_v13_conf_720p60 },
+	{ 1280, 720, 50, false, 19, hdmiphy_v13_conf74_25,
+			&hdmi_v13_conf_720p60 },
+	{ 720, 480, 60, false, 3, hdmiphy_v13_conf27_027,
+			&hdmi_v13_conf_480p },
+	{ 1920, 1080, 50, true, 20, hdmiphy_v13_conf74_25,
+			&hdmi_v13_conf_1080i50 },
+	{ 1920, 1080, 50, false, 31, hdmiphy_v13_conf148_5,
+			&hdmi_v13_conf_1080p50 },
+	{ 1920, 1080, 60, true, 5, hdmiphy_v13_conf74_25,
+			&hdmi_v13_conf_1080i60 },
+	{ 1920, 1080, 60, false, 16, hdmiphy_v13_conf148_5,
+			&hdmi_v13_conf_1080p60 },
 };
 
 /* HDMI Version 1.4 */
@@ -479,6 +508,7 @@
 	int height;
 	int vrefresh;
 	bool interlace;
+	int cea_video_id;
 	const u8 *hdmiphy_data;
 	const struct hdmi_preset_conf *conf;
 };
@@ -934,16 +964,21 @@
 };
 
 static const struct hdmi_conf hdmi_confs[] = {
-	{ 720, 480, 60, false, hdmiphy_conf27_027, &hdmi_conf_480p60 },
-	{ 1280, 720, 50, false, hdmiphy_conf74_25, &hdmi_conf_720p50 },
-	{ 1280, 720, 60, false, hdmiphy_conf74_25, &hdmi_conf_720p60 },
-	{ 1920, 1080, 50, true, hdmiphy_conf74_25, &hdmi_conf_1080i50 },
-	{ 1920, 1080, 60, true, hdmiphy_conf74_25, &hdmi_conf_1080i60 },
-	{ 1920, 1080, 30, false, hdmiphy_conf74_176, &hdmi_conf_1080p30 },
-	{ 1920, 1080, 50, false, hdmiphy_conf148_5, &hdmi_conf_1080p50 },
-	{ 1920, 1080, 60, false, hdmiphy_conf148_5, &hdmi_conf_1080p60 },
+	{ 720, 480, 60, false, 3, hdmiphy_conf27_027, &hdmi_conf_480p60 },
+	{ 1280, 720, 50, false, 19, hdmiphy_conf74_25, &hdmi_conf_720p50 },
+	{ 1280, 720, 60, false, 4, hdmiphy_conf74_25, &hdmi_conf_720p60 },
+	{ 1920, 1080, 50, true, 20, hdmiphy_conf74_25, &hdmi_conf_1080i50 },
+	{ 1920, 1080, 60, true, 5, hdmiphy_conf74_25, &hdmi_conf_1080i60 },
+	{ 1920, 1080, 30, false, 34, hdmiphy_conf74_176, &hdmi_conf_1080p30 },
+	{ 1920, 1080, 50, false, 31, hdmiphy_conf148_5, &hdmi_conf_1080p50 },
+	{ 1920, 1080, 60, false, 16, hdmiphy_conf148_5, &hdmi_conf_1080p60 },
 };
 
+struct hdmi_infoframe {
+	enum HDMI_PACKET_TYPE type;
+	u8 ver;
+	u8 len;
+};
 
 static inline u32 hdmi_reg_read(struct hdmi_context *hdata, u32 reg_id)
 {
@@ -1267,6 +1302,88 @@
 	return hdmi_v14_conf_index(mode);
 }
 
+static u8 hdmi_chksum(struct hdmi_context *hdata,
+			u32 start, u8 len, u32 hdr_sum)
+{
+	int i;
+
+	/* hdr_sum : header0 + header1 + header2
+	* start : start address of packet byte1
+	* len : packet bytes - 1 */
+	for (i = 0; i < len; ++i)
+		hdr_sum += 0xff & hdmi_reg_read(hdata, start + i * 4);
+
+	/* return 2's complement of 8 bit hdr_sum */
+	return (u8)(~(hdr_sum & 0xff) + 1);
+}
+
+static void hdmi_reg_infoframe(struct hdmi_context *hdata,
+			struct hdmi_infoframe *infoframe)
+{
+	u32 hdr_sum;
+	u8 chksum;
+	u32 aspect_ratio;
+	u32 mod;
+	u32 vic;
+
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	mod = hdmi_reg_read(hdata, HDMI_MODE_SEL);
+	if (hdata->dvi_mode) {
+		hdmi_reg_writeb(hdata, HDMI_VSI_CON,
+				HDMI_VSI_CON_DO_NOT_TRANSMIT);
+		hdmi_reg_writeb(hdata, HDMI_AVI_CON,
+				HDMI_AVI_CON_DO_NOT_TRANSMIT);
+		hdmi_reg_writeb(hdata, HDMI_AUI_CON, HDMI_AUI_CON_NO_TRAN);
+		return;
+	}
+
+	switch (infoframe->type) {
+	case HDMI_PACKET_TYPE_AVI:
+		hdmi_reg_writeb(hdata, HDMI_AVI_CON, HDMI_AVI_CON_EVERY_VSYNC);
+		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER0, infoframe->type);
+		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER1, infoframe->ver);
+		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER2, infoframe->len);
+		hdr_sum = infoframe->type + infoframe->ver + infoframe->len;
+
+		/* Output format zero hardcoded ,RGB YBCR selection */
+		hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(1), 0 << 5 |
+			AVI_ACTIVE_FORMAT_VALID |
+			AVI_UNDERSCANNED_DISPLAY_VALID);
+
+		aspect_ratio = AVI_PIC_ASPECT_RATIO_16_9;
+
+		hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(2), aspect_ratio |
+				AVI_SAME_AS_PIC_ASPECT_RATIO);
+
+		if (hdata->type == HDMI_TYPE13)
+			vic = hdmi_v13_confs[hdata->cur_conf].cea_video_id;
+		else
+			vic = hdmi_confs[hdata->cur_conf].cea_video_id;
+
+		hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(4), vic);
+
+		chksum = hdmi_chksum(hdata, HDMI_AVI_BYTE(1),
+					infoframe->len, hdr_sum);
+		DRM_DEBUG_KMS("AVI checksum = 0x%x\n", chksum);
+		hdmi_reg_writeb(hdata, HDMI_AVI_CHECK_SUM, chksum);
+		break;
+	case HDMI_PACKET_TYPE_AUI:
+		hdmi_reg_writeb(hdata, HDMI_AUI_CON, 0x02);
+		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER0, infoframe->type);
+		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER1, infoframe->ver);
+		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER2, infoframe->len);
+		hdr_sum = infoframe->type + infoframe->ver + infoframe->len;
+		chksum = hdmi_chksum(hdata, HDMI_AUI_BYTE(1),
+					infoframe->len, hdr_sum);
+		DRM_DEBUG_KMS("AUI checksum = 0x%x\n", chksum);
+		hdmi_reg_writeb(hdata, HDMI_AUI_CHECK_SUM, chksum);
+		break;
+	default:
+		break;
+	}
+}
+
 static bool hdmi_is_connected(void *ctx)
 {
 	struct hdmi_context *hdata = ctx;
@@ -1293,6 +1410,7 @@
 		DRM_DEBUG_KMS("%s : width[%d] x height[%d]\n",
 			(hdata->dvi_mode ? "dvi monitor" : "hdmi monitor"),
 			raw_edid->width_cm, raw_edid->height_cm);
+		kfree(raw_edid);
 	} else {
 		return -ENODEV;
 	}
@@ -1541,6 +1659,8 @@
 
 static void hdmi_conf_init(struct hdmi_context *hdata)
 {
+	struct hdmi_infoframe infoframe;
+
 	/* disable HPD interrupts */
 	hdmi_reg_writemask(hdata, HDMI_INTC_CON, 0, HDMI_INTC_EN_GLOBAL |
 		HDMI_INTC_EN_HPD_PLUG | HDMI_INTC_EN_HPD_UNPLUG);
@@ -1575,9 +1695,17 @@
 		hdmi_reg_writeb(hdata, HDMI_V13_AUI_CON, 0x02);
 		hdmi_reg_writeb(hdata, HDMI_V13_ACR_CON, 0x04);
 	} else {
+		infoframe.type = HDMI_PACKET_TYPE_AVI;
+		infoframe.ver = HDMI_AVI_VERSION;
+		infoframe.len = HDMI_AVI_LENGTH;
+		hdmi_reg_infoframe(hdata, &infoframe);
+
+		infoframe.type = HDMI_PACKET_TYPE_AUI;
+		infoframe.ver = HDMI_AUI_VERSION;
+		infoframe.len = HDMI_AUI_LENGTH;
+		hdmi_reg_infoframe(hdata, &infoframe);
+
 		/* enable AVI packet every vsync, fixes purple line problem */
-		hdmi_reg_writeb(hdata, HDMI_AVI_CON, 0x02);
-		hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(1), 2 << 5);
 		hdmi_reg_writemask(hdata, HDMI_CON_1, 2, 3 << 5);
 	}
 }
@@ -1875,6 +2003,24 @@
 	mdelay(10);
 }
 
+static void hdmiphy_poweron(struct hdmi_context *hdata)
+{
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	if (hdata->type == HDMI_TYPE14)
+		hdmi_reg_writemask(hdata, HDMI_PHY_CON_0, 0,
+			HDMI_PHY_POWER_OFF_EN);
+}
+
+static void hdmiphy_poweroff(struct hdmi_context *hdata)
+{
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	if (hdata->type == HDMI_TYPE14)
+		hdmi_reg_writemask(hdata, HDMI_PHY_CON_0, ~0,
+			HDMI_PHY_POWER_OFF_EN);
+}
+
 static void hdmiphy_conf_apply(struct hdmi_context *hdata)
 {
 	const u8 *hdmiphy_data;
@@ -1978,9 +2124,18 @@
 			index = hdmi_v14_conf_index(m);
 
 		if (index >= 0) {
+			struct drm_mode_object base;
+			struct list_head head;
+
 			DRM_INFO("desired mode doesn't exist so\n");
 			DRM_INFO("use the most suitable mode among modes.\n");
+
+			/* preserve display mode header while copying. */
+			head = adjusted_mode->head;
+			base = adjusted_mode->base;
 			memcpy(adjusted_mode, m, sizeof(*m));
+			adjusted_mode->head = head;
+			adjusted_mode->base = base;
 			break;
 		}
 	}
@@ -2034,12 +2189,12 @@
 
 	mutex_unlock(&hdata->hdmi_mutex);
 
-	pm_runtime_get_sync(hdata->dev);
-
 	regulator_bulk_enable(res->regul_count, res->regul_bulk);
 	clk_enable(res->hdmiphy);
 	clk_enable(res->hdmi);
 	clk_enable(res->sclk_hdmi);
+
+	hdmiphy_poweron(hdata);
 }
 
 static void hdmi_poweroff(struct hdmi_context *hdata)
@@ -2058,14 +2213,13 @@
 	 * its reset state seems to meet the condition.
 	 */
 	hdmiphy_conf_reset(hdata);
+	hdmiphy_poweroff(hdata);
 
 	clk_disable(res->sclk_hdmi);
 	clk_disable(res->hdmi);
 	clk_disable(res->hdmiphy);
 	regulator_bulk_disable(res->regul_count, res->regul_bulk);
 
-	pm_runtime_put_sync(hdata->dev);
-
 	mutex_lock(&hdata->hdmi_mutex);
 
 	hdata->powered = false;
@@ -2078,16 +2232,18 @@
 {
 	struct hdmi_context *hdata = ctx;
 
-	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+	DRM_DEBUG_KMS("[%d] %s mode %d\n", __LINE__, __func__, mode);
 
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
-		hdmi_poweron(hdata);
+		if (pm_runtime_suspended(hdata->dev))
+			pm_runtime_get_sync(hdata->dev);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		hdmi_poweroff(hdata);
+		if (!pm_runtime_suspended(hdata->dev))
+			pm_runtime_put_sync(hdata->dev);
 		break;
 	default:
 		DRM_DEBUG_KMS("unknown dpms mode: %d\n", mode);
@@ -2166,27 +2322,27 @@
 	memset(res, 0, sizeof(*res));
 
 	/* get clocks, power */
-	res->hdmi = clk_get(dev, "hdmi");
+	res->hdmi = devm_clk_get(dev, "hdmi");
 	if (IS_ERR_OR_NULL(res->hdmi)) {
 		DRM_ERROR("failed to get clock 'hdmi'\n");
 		goto fail;
 	}
-	res->sclk_hdmi = clk_get(dev, "sclk_hdmi");
+	res->sclk_hdmi = devm_clk_get(dev, "sclk_hdmi");
 	if (IS_ERR_OR_NULL(res->sclk_hdmi)) {
 		DRM_ERROR("failed to get clock 'sclk_hdmi'\n");
 		goto fail;
 	}
-	res->sclk_pixel = clk_get(dev, "sclk_pixel");
+	res->sclk_pixel = devm_clk_get(dev, "sclk_pixel");
 	if (IS_ERR_OR_NULL(res->sclk_pixel)) {
 		DRM_ERROR("failed to get clock 'sclk_pixel'\n");
 		goto fail;
 	}
-	res->sclk_hdmiphy = clk_get(dev, "sclk_hdmiphy");
+	res->sclk_hdmiphy = devm_clk_get(dev, "sclk_hdmiphy");
 	if (IS_ERR_OR_NULL(res->sclk_hdmiphy)) {
 		DRM_ERROR("failed to get clock 'sclk_hdmiphy'\n");
 		goto fail;
 	}
-	res->hdmiphy = clk_get(dev, "hdmiphy");
+	res->hdmiphy = devm_clk_get(dev, "hdmiphy");
 	if (IS_ERR_OR_NULL(res->hdmiphy)) {
 		DRM_ERROR("failed to get clock 'hdmiphy'\n");
 		goto fail;
@@ -2194,7 +2350,7 @@
 
 	clk_set_parent(res->sclk_hdmi, res->sclk_pixel);
 
-	res->regul_bulk = kzalloc(ARRAY_SIZE(supply) *
+	res->regul_bulk = devm_kzalloc(dev, ARRAY_SIZE(supply) *
 		sizeof(res->regul_bulk[0]), GFP_KERNEL);
 	if (!res->regul_bulk) {
 		DRM_ERROR("failed to get memory for regulators\n");
@@ -2204,7 +2360,7 @@
 		res->regul_bulk[i].supply = supply[i];
 		res->regul_bulk[i].consumer = NULL;
 	}
-	ret = regulator_bulk_get(dev, ARRAY_SIZE(supply), res->regul_bulk);
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(supply), res->regul_bulk);
 	if (ret) {
 		DRM_ERROR("failed to get regulators\n");
 		goto fail;
@@ -2217,28 +2373,6 @@
 	return -ENODEV;
 }
 
-static int hdmi_resources_cleanup(struct hdmi_context *hdata)
-{
-	struct hdmi_resources *res = &hdata->res;
-
-	regulator_bulk_free(res->regul_count, res->regul_bulk);
-	/* kfree is NULL-safe */
-	kfree(res->regul_bulk);
-	if (!IS_ERR_OR_NULL(res->hdmiphy))
-		clk_put(res->hdmiphy);
-	if (!IS_ERR_OR_NULL(res->sclk_hdmiphy))
-		clk_put(res->sclk_hdmiphy);
-	if (!IS_ERR_OR_NULL(res->sclk_pixel))
-		clk_put(res->sclk_pixel);
-	if (!IS_ERR_OR_NULL(res->sclk_hdmi))
-		clk_put(res->sclk_hdmi);
-	if (!IS_ERR_OR_NULL(res->hdmi))
-		clk_put(res->hdmi);
-	memset(res, 0, sizeof(*res));
-
-	return 0;
-}
-
 static struct i2c_client *hdmi_ddc, *hdmi_hdmiphy;
 
 void hdmi_attach_ddc_client(struct i2c_client *ddc)
@@ -2306,6 +2440,7 @@
 	}
 };
 
+#ifdef CONFIG_OF
 static struct of_device_id hdmi_match_types[] = {
 	{
 		.compatible = "samsung,exynos5-hdmi",
@@ -2314,6 +2449,7 @@
 		/* end node */
 	}
 };
+#endif
 
 static int __devinit hdmi_probe(struct platform_device *pdev)
 {
@@ -2366,6 +2502,8 @@
 		const struct of_device_id *match;
 		match = of_match_node(of_match_ptr(hdmi_match_types),
 					pdev->dev.of_node);
+		if (match == NULL)
+			return -ENODEV;
 		hdata->type = (enum hdmi_type)match->data;
 	} else {
 		hdata->type = (enum hdmi_type)platform_get_device_id
@@ -2378,36 +2516,32 @@
 	ret = hdmi_resources_init(hdata);
 
 	if (ret) {
-		ret = -EINVAL;
 		DRM_ERROR("hdmi_resources_init failed\n");
-		goto err_data;
+		return -EINVAL;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		DRM_ERROR("failed to find registers\n");
-		ret = -ENOENT;
-		goto err_resource;
+		return -ENOENT;
 	}
 
 	hdata->regs = devm_request_and_ioremap(&pdev->dev, res);
 	if (!hdata->regs) {
 		DRM_ERROR("failed to map registers\n");
-		ret = -ENXIO;
-		goto err_resource;
+		return -ENXIO;
 	}
 
-	ret = gpio_request(hdata->hpd_gpio, "HPD");
+	ret = devm_gpio_request(&pdev->dev, hdata->hpd_gpio, "HPD");
 	if (ret) {
 		DRM_ERROR("failed to request HPD gpio\n");
-		goto err_resource;
+		return ret;
 	}
 
 	/* DDC i2c driver */
 	if (i2c_add_driver(&ddc_driver)) {
 		DRM_ERROR("failed to register ddc i2c driver\n");
-		ret = -ENOENT;
-		goto err_gpio;
+		return -ENOENT;
 	}
 
 	hdata->ddc_port = hdmi_ddc;
@@ -2470,11 +2604,6 @@
 	i2c_del_driver(&hdmiphy_driver);
 err_ddc:
 	i2c_del_driver(&ddc_driver);
-err_gpio:
-	gpio_free(hdata->hpd_gpio);
-err_resource:
-	hdmi_resources_cleanup(hdata);
-err_data:
 	return ret;
 }
 
@@ -2491,9 +2620,6 @@
 	free_irq(hdata->internal_irq, hdata);
 	free_irq(hdata->external_irq, hdata);
 
-	gpio_free(hdata->hpd_gpio);
-
-	hdmi_resources_cleanup(hdata);
 
 	/* hdmiphy i2c driver */
 	i2c_del_driver(&hdmiphy_driver);
@@ -2509,6 +2635,8 @@
 	struct exynos_drm_hdmi_context *ctx = get_hdmi_context(dev);
 	struct hdmi_context *hdata = ctx->ctx;
 
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
 	disable_irq(hdata->internal_irq);
 	disable_irq(hdata->external_irq);
 
@@ -2516,6 +2644,11 @@
 	if (ctx->drm_dev)
 		drm_helper_hpd_irq_event(ctx->drm_dev);
 
+	if (pm_runtime_suspended(dev)) {
+		DRM_DEBUG_KMS("%s : Already suspended\n", __func__);
+		return 0;
+	}
+
 	hdmi_poweroff(hdata);
 
 	return 0;
@@ -2526,13 +2659,52 @@
 	struct exynos_drm_hdmi_context *ctx = get_hdmi_context(dev);
 	struct hdmi_context *hdata = ctx->ctx;
 
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	hdata->hpd = gpio_get_value(hdata->hpd_gpio);
+
 	enable_irq(hdata->external_irq);
 	enable_irq(hdata->internal_irq);
+
+	if (!pm_runtime_suspended(dev)) {
+		DRM_DEBUG_KMS("%s : Already resumed\n", __func__);
+		return 0;
+	}
+
+	hdmi_poweron(hdata);
+
 	return 0;
 }
 #endif
 
-static SIMPLE_DEV_PM_OPS(hdmi_pm_ops, hdmi_suspend, hdmi_resume);
+#ifdef CONFIG_PM_RUNTIME
+static int hdmi_runtime_suspend(struct device *dev)
+{
+	struct exynos_drm_hdmi_context *ctx = get_hdmi_context(dev);
+	struct hdmi_context *hdata = ctx->ctx;
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	hdmi_poweroff(hdata);
+
+	return 0;
+}
+
+static int hdmi_runtime_resume(struct device *dev)
+{
+	struct exynos_drm_hdmi_context *ctx = get_hdmi_context(dev);
+	struct hdmi_context *hdata = ctx->ctx;
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	hdmi_poweron(hdata);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops hdmi_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(hdmi_suspend, hdmi_resume)
+	SET_RUNTIME_PM_OPS(hdmi_runtime_suspend, hdmi_runtime_resume, NULL)
+};
 
 struct platform_driver hdmi_driver = {
 	.probe		= hdmi_probe,
@@ -2542,6 +2714,6 @@
 		.name	= "exynos-hdmi",
 		.owner	= THIS_MODULE,
 		.pm	= &hdmi_pm_ops,
-		.of_match_table = hdmi_match_types,
+		.of_match_table = of_match_ptr(hdmi_match_types),
 	},
 };

diff --git a/drivers/gpu/drm/exynos/exynos_hdmiphy.c b/drivers/gpu/drm/exynos/exynos_hdmiphy.c
index 27d1720..6206056 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmiphy.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmiphy.c

@@ -46,6 +46,7 @@
 	{ },
 };
 
+#ifdef CONFIG_OF
 static struct of_device_id hdmiphy_match_types[] = {
 	{
 		.compatible = "samsung,exynos5-hdmiphy",
@@ -53,12 +54,13 @@
 		/* end node */
 	}
 };
+#endif
 
 struct i2c_driver hdmiphy_driver = {
 	.driver = {
 		.name	= "exynos-hdmiphy",
 		.owner	= THIS_MODULE,
-		.of_match_table = hdmiphy_match_types,
+		.of_match_table = of_match_ptr(hdmiphy_match_types),
 	},
 	.id_table = hdmiphy_id,
 	.probe		= hdmiphy_probe,

diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index e7fbb82..21db895 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c

@@ -36,14 +36,13 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_hdmi.h"
+#include "exynos_drm_iommu.h"
 
 #define get_mixer_context(dev)	platform_get_drvdata(to_platform_device(dev))
 
 struct hdmi_win_data {
 	dma_addr_t		dma_addr;
-	void __iomem		*vaddr;
 	dma_addr_t		chroma_dma_addr;
-	void __iomem		*chroma_vaddr;
 	uint32_t		pixel_format;
 	unsigned int		bpp;
 	unsigned int		crtc_x;
@@ -59,6 +58,8 @@
 	unsigned int		mode_width;
 	unsigned int		mode_height;
 	unsigned int		scan_flags;
+	bool			enabled;
+	bool			resume;
 };
 
 struct mixer_resources {
@@ -80,6 +81,7 @@
 
 struct mixer_context {
 	struct device		*dev;
+	struct drm_device	*drm_dev;
 	int			pipe;
 	bool			interlace;
 	bool			powered;
@@ -90,6 +92,9 @@
 	struct mixer_resources	mixer_res;
 	struct hdmi_win_data	win_data[MIXER_WIN_NR];
 	enum mixer_version_id	mxr_ver;
+	void			*parent_ctx;
+	wait_queue_head_t	wait_vsync_queue;
+	atomic_t		wait_vsync_event;
 };
 
 struct mixer_drv_data {
@@ -665,58 +670,22 @@
 	spin_unlock_irqrestore(&res->reg_slock, flags);
 }
 
-static void mixer_poweron(struct mixer_context *ctx)
+static int mixer_iommu_on(void *ctx, bool enable)
 {
-	struct mixer_resources *res = &ctx->mixer_res;
+	struct exynos_drm_hdmi_context *drm_hdmi_ctx;
+	struct mixer_context *mdata = ctx;
+	struct drm_device *drm_dev;
 
-	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+	drm_hdmi_ctx = mdata->parent_ctx;
+	drm_dev = drm_hdmi_ctx->drm_dev;
 
-	mutex_lock(&ctx->mixer_mutex);
-	if (ctx->powered) {
-		mutex_unlock(&ctx->mixer_mutex);
-		return;
+	if (is_drm_iommu_supported(drm_dev)) {
+		if (enable)
+			return drm_iommu_attach_device(drm_dev, mdata->dev);
+
+		drm_iommu_detach_device(drm_dev, mdata->dev);
 	}
-	ctx->powered = true;
-	mutex_unlock(&ctx->mixer_mutex);
-
-	pm_runtime_get_sync(ctx->dev);
-
-	clk_enable(res->mixer);
-	if (ctx->vp_enabled) {
-		clk_enable(res->vp);
-		clk_enable(res->sclk_mixer);
-	}
-
-	mixer_reg_write(res, MXR_INT_EN, ctx->int_en);
-	mixer_win_reset(ctx);
-}
-
-static void mixer_poweroff(struct mixer_context *ctx)
-{
-	struct mixer_resources *res = &ctx->mixer_res;
-
-	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
-
-	mutex_lock(&ctx->mixer_mutex);
-	if (!ctx->powered)
-		goto out;
-	mutex_unlock(&ctx->mixer_mutex);
-
-	ctx->int_en = mixer_reg_read(res, MXR_INT_EN);
-
-	clk_disable(res->mixer);
-	if (ctx->vp_enabled) {
-		clk_disable(res->vp);
-		clk_disable(res->sclk_mixer);
-	}
-
-	pm_runtime_put_sync(ctx->dev);
-
-	mutex_lock(&ctx->mixer_mutex);
-	ctx->powered = false;
-
-out:
-	mutex_unlock(&ctx->mixer_mutex);
+	return 0;
 }
 
 static int mixer_enable_vblank(void *ctx, int pipe)
@@ -746,39 +715,6 @@
 	mixer_reg_writemask(res, MXR_INT_EN, 0, MXR_INT_EN_VSYNC);
 }
 
-static void mixer_dpms(void *ctx, int mode)
-{
-	struct mixer_context *mixer_ctx = ctx;
-
-	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
-
-	switch (mode) {
-	case DRM_MODE_DPMS_ON:
-		mixer_poweron(mixer_ctx);
-		break;
-	case DRM_MODE_DPMS_STANDBY:
-	case DRM_MODE_DPMS_SUSPEND:
-	case DRM_MODE_DPMS_OFF:
-		mixer_poweroff(mixer_ctx);
-		break;
-	default:
-		DRM_DEBUG_KMS("unknown dpms mode: %d\n", mode);
-		break;
-	}
-}
-
-static void mixer_wait_for_vblank(void *ctx)
-{
-	struct mixer_context *mixer_ctx = ctx;
-	struct mixer_resources *res = &mixer_ctx->mixer_res;
-	int ret;
-
-	ret = wait_for((mixer_reg_read(res, MXR_INT_STATUS) &
-				MXR_INT_STATUS_VSYNC), 50);
-	if (ret < 0)
-		DRM_DEBUG_KMS("vblank wait timed out.\n");
-}
-
 static void mixer_win_mode_set(void *ctx,
 			      struct exynos_drm_overlay *overlay)
 {
@@ -811,9 +747,7 @@
 	win_data = &mixer_ctx->win_data[win];
 
 	win_data->dma_addr = overlay->dma_addr[0];
-	win_data->vaddr = overlay->vaddr[0];
 	win_data->chroma_dma_addr = overlay->dma_addr[1];
-	win_data->chroma_vaddr = overlay->vaddr[1];
 	win_data->pixel_format = overlay->pixel_format;
 	win_data->bpp = overlay->bpp;
 
@@ -845,6 +779,8 @@
 		vp_video_buffer(mixer_ctx, win);
 	else
 		mixer_graph_buffer(mixer_ctx, win);
+
+	mixer_ctx->win_data[win].enabled = true;
 }
 
 static void mixer_win_disable(void *ctx, int win)
@@ -855,6 +791,14 @@
 
 	DRM_DEBUG_KMS("[%d] %s, win: %d\n", __LINE__, __func__, win);
 
+	mutex_lock(&mixer_ctx->mixer_mutex);
+	if (!mixer_ctx->powered) {
+		mutex_unlock(&mixer_ctx->mixer_mutex);
+		mixer_ctx->win_data[win].resume = false;
+		return;
+	}
+	mutex_unlock(&mixer_ctx->mixer_mutex);
+
 	spin_lock_irqsave(&res->reg_slock, flags);
 	mixer_vsync_set_update(mixer_ctx, false);
 
@@ -862,16 +806,144 @@
 
 	mixer_vsync_set_update(mixer_ctx, true);
 	spin_unlock_irqrestore(&res->reg_slock, flags);
+
+	mixer_ctx->win_data[win].enabled = false;
+}
+
+static void mixer_wait_for_vblank(void *ctx)
+{
+	struct mixer_context *mixer_ctx = ctx;
+
+	mutex_lock(&mixer_ctx->mixer_mutex);
+	if (!mixer_ctx->powered) {
+		mutex_unlock(&mixer_ctx->mixer_mutex);
+		return;
+	}
+	mutex_unlock(&mixer_ctx->mixer_mutex);
+
+	atomic_set(&mixer_ctx->wait_vsync_event, 1);
+
+	/*
+	 * wait for MIXER to signal VSYNC interrupt or return after
+	 * timeout which is set to 50ms (refresh rate of 20).
+	 */
+	if (!wait_event_timeout(mixer_ctx->wait_vsync_queue,
+				!atomic_read(&mixer_ctx->wait_vsync_event),
+				DRM_HZ/20))
+		DRM_DEBUG_KMS("vblank wait timed out.\n");
+}
+
+static void mixer_window_suspend(struct mixer_context *ctx)
+{
+	struct hdmi_win_data *win_data;
+	int i;
+
+	for (i = 0; i < MIXER_WIN_NR; i++) {
+		win_data = &ctx->win_data[i];
+		win_data->resume = win_data->enabled;
+		mixer_win_disable(ctx, i);
+	}
+	mixer_wait_for_vblank(ctx);
+}
+
+static void mixer_window_resume(struct mixer_context *ctx)
+{
+	struct hdmi_win_data *win_data;
+	int i;
+
+	for (i = 0; i < MIXER_WIN_NR; i++) {
+		win_data = &ctx->win_data[i];
+		win_data->enabled = win_data->resume;
+		win_data->resume = false;
+	}
+}
+
+static void mixer_poweron(struct mixer_context *ctx)
+{
+	struct mixer_resources *res = &ctx->mixer_res;
+
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	mutex_lock(&ctx->mixer_mutex);
+	if (ctx->powered) {
+		mutex_unlock(&ctx->mixer_mutex);
+		return;
+	}
+	ctx->powered = true;
+	mutex_unlock(&ctx->mixer_mutex);
+
+	clk_enable(res->mixer);
+	if (ctx->vp_enabled) {
+		clk_enable(res->vp);
+		clk_enable(res->sclk_mixer);
+	}
+
+	mixer_reg_write(res, MXR_INT_EN, ctx->int_en);
+	mixer_win_reset(ctx);
+
+	mixer_window_resume(ctx);
+}
+
+static void mixer_poweroff(struct mixer_context *ctx)
+{
+	struct mixer_resources *res = &ctx->mixer_res;
+
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	mutex_lock(&ctx->mixer_mutex);
+	if (!ctx->powered)
+		goto out;
+	mutex_unlock(&ctx->mixer_mutex);
+
+	mixer_window_suspend(ctx);
+
+	ctx->int_en = mixer_reg_read(res, MXR_INT_EN);
+
+	clk_disable(res->mixer);
+	if (ctx->vp_enabled) {
+		clk_disable(res->vp);
+		clk_disable(res->sclk_mixer);
+	}
+
+	mutex_lock(&ctx->mixer_mutex);
+	ctx->powered = false;
+
+out:
+	mutex_unlock(&ctx->mixer_mutex);
+}
+
+static void mixer_dpms(void *ctx, int mode)
+{
+	struct mixer_context *mixer_ctx = ctx;
+
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		if (pm_runtime_suspended(mixer_ctx->dev))
+			pm_runtime_get_sync(mixer_ctx->dev);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		if (!pm_runtime_suspended(mixer_ctx->dev))
+			pm_runtime_put_sync(mixer_ctx->dev);
+		break;
+	default:
+		DRM_DEBUG_KMS("unknown dpms mode: %d\n", mode);
+		break;
+	}
 }
 
 static struct exynos_mixer_ops mixer_ops = {
 	/* manager */
+	.iommu_on		= mixer_iommu_on,
 	.enable_vblank		= mixer_enable_vblank,
 	.disable_vblank		= mixer_disable_vblank,
+	.wait_for_vblank	= mixer_wait_for_vblank,
 	.dpms			= mixer_dpms,
 
 	/* overlay */
-	.wait_for_vblank	= mixer_wait_for_vblank,
 	.win_mode_set		= mixer_win_mode_set,
 	.win_commit		= mixer_win_commit,
 	.win_disable		= mixer_win_disable,
@@ -884,7 +956,6 @@
 	struct drm_pending_vblank_event *e, *t;
 	struct timeval now;
 	unsigned long flags;
-	bool is_checked = false;
 
 	spin_lock_irqsave(&drm_dev->event_lock, flags);
 
@@ -894,7 +965,6 @@
 		if (crtc != e->pipe)
 			continue;
 
-		is_checked = true;
 		do_gettimeofday(&now);
 		e->event.sequence = 0;
 		e->event.tv_sec = now.tv_sec;
@@ -902,16 +972,9 @@
 
 		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
 		wake_up_interruptible(&e->base.file_priv->event_wait);
+		drm_vblank_put(drm_dev, crtc);
 	}
 
-	if (is_checked)
-		/*
-		 * call drm_vblank_put only in case that drm_vblank_get was
-		 * called.
-		 */
-		if (atomic_read(&drm_dev->vblank_refcount[crtc]) > 0)
-			drm_vblank_put(drm_dev, crtc);
-
 	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
 }
 
@@ -944,6 +1007,12 @@
 
 		drm_handle_vblank(drm_hdmi_ctx->drm_dev, ctx->pipe);
 		mixer_finish_pageflip(drm_hdmi_ctx->drm_dev, ctx->pipe);
+
+		/* set wait vsync event to zero and wake up queue. */
+		if (atomic_read(&ctx->wait_vsync_event)) {
+			atomic_set(&ctx->wait_vsync_event, 0);
+			DRM_WAKEUP(&ctx->wait_vsync_queue);
+		}
 	}
 
 out:
@@ -971,57 +1040,45 @@
 
 	spin_lock_init(&mixer_res->reg_slock);
 
-	mixer_res->mixer = clk_get(dev, "mixer");
+	mixer_res->mixer = devm_clk_get(dev, "mixer");
 	if (IS_ERR_OR_NULL(mixer_res->mixer)) {
 		dev_err(dev, "failed to get clock 'mixer'\n");
-		ret = -ENODEV;
-		goto fail;
+		return -ENODEV;
 	}
 
-	mixer_res->sclk_hdmi = clk_get(dev, "sclk_hdmi");
+	mixer_res->sclk_hdmi = devm_clk_get(dev, "sclk_hdmi");
 	if (IS_ERR_OR_NULL(mixer_res->sclk_hdmi)) {
 		dev_err(dev, "failed to get clock 'sclk_hdmi'\n");
-		ret = -ENODEV;
-		goto fail;
+		return -ENODEV;
 	}
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(dev, "get memory resource failed.\n");
-		ret = -ENXIO;
-		goto fail;
+		return -ENXIO;
 	}
 
 	mixer_res->mixer_regs = devm_ioremap(&pdev->dev, res->start,
 							resource_size(res));
 	if (mixer_res->mixer_regs == NULL) {
 		dev_err(dev, "register mapping failed.\n");
-		ret = -ENXIO;
-		goto fail;
+		return -ENXIO;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (res == NULL) {
 		dev_err(dev, "get interrupt resource failed.\n");
-		ret = -ENXIO;
-		goto fail;
+		return -ENXIO;
 	}
 
 	ret = devm_request_irq(&pdev->dev, res->start, mixer_irq_handler,
 							0, "drm_mixer", ctx);
 	if (ret) {
 		dev_err(dev, "request interrupt failed.\n");
-		goto fail;
+		return ret;
 	}
 	mixer_res->irq = res->start;
 
 	return 0;
-
-fail:
-	if (!IS_ERR_OR_NULL(mixer_res->sclk_hdmi))
-		clk_put(mixer_res->sclk_hdmi);
-	if (!IS_ERR_OR_NULL(mixer_res->mixer))
-		clk_put(mixer_res->mixer);
-	return ret;
 }
 
 static int __devinit vp_resources_init(struct exynos_drm_hdmi_context *ctx,
@@ -1031,25 +1088,21 @@
 	struct device *dev = &pdev->dev;
 	struct mixer_resources *mixer_res = &mixer_ctx->mixer_res;
 	struct resource *res;
-	int ret;
 
-	mixer_res->vp = clk_get(dev, "vp");
+	mixer_res->vp = devm_clk_get(dev, "vp");
 	if (IS_ERR_OR_NULL(mixer_res->vp)) {
 		dev_err(dev, "failed to get clock 'vp'\n");
-		ret = -ENODEV;
-		goto fail;
+		return -ENODEV;
 	}
-	mixer_res->sclk_mixer = clk_get(dev, "sclk_mixer");
+	mixer_res->sclk_mixer = devm_clk_get(dev, "sclk_mixer");
 	if (IS_ERR_OR_NULL(mixer_res->sclk_mixer)) {
 		dev_err(dev, "failed to get clock 'sclk_mixer'\n");
-		ret = -ENODEV;
-		goto fail;
+		return -ENODEV;
 	}
-	mixer_res->sclk_dac = clk_get(dev, "sclk_dac");
+	mixer_res->sclk_dac = devm_clk_get(dev, "sclk_dac");
 	if (IS_ERR_OR_NULL(mixer_res->sclk_dac)) {
 		dev_err(dev, "failed to get clock 'sclk_dac'\n");
-		ret = -ENODEV;
-		goto fail;
+		return -ENODEV;
 	}
 
 	if (mixer_res->sclk_hdmi)
@@ -1058,28 +1111,17 @@
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (res == NULL) {
 		dev_err(dev, "get memory resource failed.\n");
-		ret = -ENXIO;
-		goto fail;
+		return -ENXIO;
 	}
 
 	mixer_res->vp_regs = devm_ioremap(&pdev->dev, res->start,
 							resource_size(res));
 	if (mixer_res->vp_regs == NULL) {
 		dev_err(dev, "register mapping failed.\n");
-		ret = -ENXIO;
-		goto fail;
+		return -ENXIO;
 	}
 
 	return 0;
-
-fail:
-	if (!IS_ERR_OR_NULL(mixer_res->sclk_dac))
-		clk_put(mixer_res->sclk_dac);
-	if (!IS_ERR_OR_NULL(mixer_res->sclk_mixer))
-		clk_put(mixer_res->sclk_mixer);
-	if (!IS_ERR_OR_NULL(mixer_res->vp))
-		clk_put(mixer_res->vp);
-	return ret;
 }
 
 static struct mixer_drv_data exynos5_mxr_drv_data = {
@@ -1149,9 +1191,12 @@
 	}
 
 	ctx->dev = &pdev->dev;
+	ctx->parent_ctx = (void *)drm_hdmi_ctx;
 	drm_hdmi_ctx->ctx = (void *)ctx;
 	ctx->vp_enabled = drv->is_vp_enabled;
 	ctx->mxr_ver = drv->version;
+	DRM_INIT_WAITQUEUE(&ctx->wait_vsync_queue);
+	atomic_set(&ctx->wait_vsync_event, 0);
 
 	platform_set_drvdata(pdev, drm_hdmi_ctx);
 
@@ -1202,13 +1247,66 @@
 	struct exynos_drm_hdmi_context *drm_hdmi_ctx = get_mixer_context(dev);
 	struct mixer_context *ctx = drm_hdmi_ctx->ctx;
 
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	if (pm_runtime_suspended(dev)) {
+		DRM_DEBUG_KMS("%s : Already suspended\n", __func__);
+		return 0;
+	}
+
 	mixer_poweroff(ctx);
 
 	return 0;
 }
+
+static int mixer_resume(struct device *dev)
+{
+	struct exynos_drm_hdmi_context *drm_hdmi_ctx = get_mixer_context(dev);
+	struct mixer_context *ctx = drm_hdmi_ctx->ctx;
+
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	if (!pm_runtime_suspended(dev)) {
+		DRM_DEBUG_KMS("%s : Already resumed\n", __func__);
+		return 0;
+	}
+
+	mixer_poweron(ctx);
+
+	return 0;
+}
 #endif
 
-static SIMPLE_DEV_PM_OPS(mixer_pm_ops, mixer_suspend, NULL);
+#ifdef CONFIG_PM_RUNTIME
+static int mixer_runtime_suspend(struct device *dev)
+{
+	struct exynos_drm_hdmi_context *drm_hdmi_ctx = get_mixer_context(dev);
+	struct mixer_context *ctx = drm_hdmi_ctx->ctx;
+
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	mixer_poweroff(ctx);
+
+	return 0;
+}
+
+static int mixer_runtime_resume(struct device *dev)
+{
+	struct exynos_drm_hdmi_context *drm_hdmi_ctx = get_mixer_context(dev);
+	struct mixer_context *ctx = drm_hdmi_ctx->ctx;
+
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+
+	mixer_poweron(ctx);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops mixer_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mixer_suspend, mixer_resume)
+	SET_RUNTIME_PM_OPS(mixer_runtime_suspend, mixer_runtime_resume, NULL)
+};
 
 struct platform_driver mixer_driver = {
 	.driver = {

diff --git a/drivers/gpu/drm/exynos/regs-fimc.h b/drivers/gpu/drm/exynos/regs-fimc.h
new file mode 100644
index 0000000..b4f9ca1
--- /dev/null
+++ b/drivers/gpu/drm/exynos/regs-fimc.h

@@ -0,0 +1,669 @@
+/* drivers/gpu/drm/exynos/regs-fimc.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * Register definition file for Samsung Camera Interface (FIMC) driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef EXYNOS_REGS_FIMC_H
+#define EXYNOS_REGS_FIMC_H
+
+/*
+ * Register part
+*/
+/* Input source format */
+#define EXYNOS_CISRCFMT		(0x00)
+/* Window offset */
+#define EXYNOS_CIWDOFST		(0x04)
+/* Global control */
+#define EXYNOS_CIGCTRL		(0x08)
+/* Window offset 2 */
+#define EXYNOS_CIWDOFST2	(0x14)
+/* Y 1st frame start address for output DMA */
+#define EXYNOS_CIOYSA1		(0x18)
+/* Y 2nd frame start address for output DMA */
+#define EXYNOS_CIOYSA2		(0x1c)
+/* Y 3rd frame start address for output DMA */
+#define EXYNOS_CIOYSA3		(0x20)
+/* Y 4th frame start address for output DMA */
+#define EXYNOS_CIOYSA4		(0x24)
+/* Cb 1st frame start address for output DMA */
+#define EXYNOS_CIOCBSA1		(0x28)
+/* Cb 2nd frame start address for output DMA */
+#define EXYNOS_CIOCBSA2		(0x2c)
+/* Cb 3rd frame start address for output DMA */
+#define EXYNOS_CIOCBSA3		(0x30)
+/* Cb 4th frame start address for output DMA */
+#define EXYNOS_CIOCBSA4		(0x34)
+/* Cr 1st frame start address for output DMA */
+#define EXYNOS_CIOCRSA1		(0x38)
+/* Cr 2nd frame start address for output DMA */
+#define EXYNOS_CIOCRSA2		(0x3c)
+/* Cr 3rd frame start address for output DMA */
+#define EXYNOS_CIOCRSA3		(0x40)
+/* Cr 4th frame start address for output DMA */
+#define EXYNOS_CIOCRSA4		(0x44)
+/* Target image format */
+#define EXYNOS_CITRGFMT		(0x48)
+/* Output DMA control */
+#define EXYNOS_CIOCTRL		(0x4c)
+/* Pre-scaler control 1 */
+#define EXYNOS_CISCPRERATIO	(0x50)
+/* Pre-scaler control 2 */
+#define EXYNOS_CISCPREDST		(0x54)
+/* Main scaler control */
+#define EXYNOS_CISCCTRL		(0x58)
+/* Target area */
+#define EXYNOS_CITAREA		(0x5c)
+/* Status */
+#define EXYNOS_CISTATUS		(0x64)
+/* Status2 */
+#define EXYNOS_CISTATUS2		(0x68)
+/* Image capture enable command */
+#define EXYNOS_CIIMGCPT		(0xc0)
+/* Capture sequence */
+#define EXYNOS_CICPTSEQ		(0xc4)
+/* Image effects */
+#define EXYNOS_CIIMGEFF		(0xd0)
+/* Y frame start address for input DMA */
+#define EXYNOS_CIIYSA0		(0xd4)
+/* Cb frame start address for input DMA */
+#define EXYNOS_CIICBSA0		(0xd8)
+/* Cr frame start address for input DMA */
+#define EXYNOS_CIICRSA0		(0xdc)
+/* Input DMA Y Line Skip */
+#define EXYNOS_CIILINESKIP_Y	(0xec)
+/* Input DMA Cb Line Skip */
+#define EXYNOS_CIILINESKIP_CB	(0xf0)
+/* Input DMA Cr Line Skip */
+#define EXYNOS_CIILINESKIP_CR	(0xf4)
+/* Real input DMA image size */
+#define EXYNOS_CIREAL_ISIZE	(0xf8)
+/* Input DMA control */
+#define EXYNOS_MSCTRL		(0xfc)
+/* Y frame start address for input DMA */
+#define EXYNOS_CIIYSA1		(0x144)
+/* Cb frame start address for input DMA */
+#define EXYNOS_CIICBSA1		(0x148)
+/* Cr frame start address for input DMA */
+#define EXYNOS_CIICRSA1		(0x14c)
+/* Output DMA Y offset */
+#define EXYNOS_CIOYOFF		(0x168)
+/* Output DMA CB offset */
+#define EXYNOS_CIOCBOFF		(0x16c)
+/* Output DMA CR offset */
+#define EXYNOS_CIOCROFF		(0x170)
+/* Input DMA Y offset */
+#define EXYNOS_CIIYOFF		(0x174)
+/* Input DMA CB offset */
+#define EXYNOS_CIICBOFF		(0x178)
+/* Input DMA CR offset */
+#define EXYNOS_CIICROFF		(0x17c)
+/* Input DMA original image size */
+#define EXYNOS_ORGISIZE		(0x180)
+/* Output DMA original image size */
+#define EXYNOS_ORGOSIZE		(0x184)
+/* Real output DMA image size */
+#define EXYNOS_CIEXTEN		(0x188)
+/* DMA parameter */
+#define EXYNOS_CIDMAPARAM		(0x18c)
+/* MIPI CSI image format */
+#define EXYNOS_CSIIMGFMT		(0x194)
+/* FIMC Clock Source Select */
+#define EXYNOS_MISC_FIMC		(0x198)
+
+/* Add for FIMC v5.1 */
+/* Output Frame Buffer Sequence */
+#define EXYNOS_CIFCNTSEQ		(0x1fc)
+/* Y 5th frame start address for output DMA */
+#define EXYNOS_CIOYSA5		(0x200)
+/* Y 6th frame start address for output DMA */
+#define EXYNOS_CIOYSA6		(0x204)
+/* Y 7th frame start address for output DMA */
+#define EXYNOS_CIOYSA7		(0x208)
+/* Y 8th frame start address for output DMA */
+#define EXYNOS_CIOYSA8		(0x20c)
+/* Y 9th frame start address for output DMA */
+#define EXYNOS_CIOYSA9		(0x210)
+/* Y 10th frame start address for output DMA */
+#define EXYNOS_CIOYSA10		(0x214)
+/* Y 11th frame start address for output DMA */
+#define EXYNOS_CIOYSA11		(0x218)
+/* Y 12th frame start address for output DMA */
+#define EXYNOS_CIOYSA12		(0x21c)
+/* Y 13th frame start address for output DMA */
+#define EXYNOS_CIOYSA13		(0x220)
+/* Y 14th frame start address for output DMA */
+#define EXYNOS_CIOYSA14		(0x224)
+/* Y 15th frame start address for output DMA */
+#define EXYNOS_CIOYSA15		(0x228)
+/* Y 16th frame start address for output DMA */
+#define EXYNOS_CIOYSA16		(0x22c)
+/* Y 17th frame start address for output DMA */
+#define EXYNOS_CIOYSA17		(0x230)
+/* Y 18th frame start address for output DMA */
+#define EXYNOS_CIOYSA18		(0x234)
+/* Y 19th frame start address for output DMA */
+#define EXYNOS_CIOYSA19		(0x238)
+/* Y 20th frame start address for output DMA */
+#define EXYNOS_CIOYSA20		(0x23c)
+/* Y 21th frame start address for output DMA */
+#define EXYNOS_CIOYSA21		(0x240)
+/* Y 22th frame start address for output DMA */
+#define EXYNOS_CIOYSA22		(0x244)
+/* Y 23th frame start address for output DMA */
+#define EXYNOS_CIOYSA23		(0x248)
+/* Y 24th frame start address for output DMA */
+#define EXYNOS_CIOYSA24		(0x24c)
+/* Y 25th frame start address for output DMA */
+#define EXYNOS_CIOYSA25		(0x250)
+/* Y 26th frame start address for output DMA */
+#define EXYNOS_CIOYSA26		(0x254)
+/* Y 27th frame start address for output DMA */
+#define EXYNOS_CIOYSA27		(0x258)
+/* Y 28th frame start address for output DMA */
+#define EXYNOS_CIOYSA28		(0x25c)
+/* Y 29th frame start address for output DMA */
+#define EXYNOS_CIOYSA29		(0x260)
+/* Y 30th frame start address for output DMA */
+#define EXYNOS_CIOYSA30		(0x264)
+/* Y 31th frame start address for output DMA */
+#define EXYNOS_CIOYSA31		(0x268)
+/* Y 32th frame start address for output DMA */
+#define EXYNOS_CIOYSA32		(0x26c)
+
+/* CB 5th frame start address for output DMA */
+#define EXYNOS_CIOCBSA5		(0x270)
+/* CB 6th frame start address for output DMA */
+#define EXYNOS_CIOCBSA6		(0x274)
+/* CB 7th frame start address for output DMA */
+#define EXYNOS_CIOCBSA7		(0x278)
+/* CB 8th frame start address for output DMA */
+#define EXYNOS_CIOCBSA8		(0x27c)
+/* CB 9th frame start address for output DMA */
+#define EXYNOS_CIOCBSA9		(0x280)
+/* CB 10th frame start address for output DMA */
+#define EXYNOS_CIOCBSA10		(0x284)
+/* CB 11th frame start address for output DMA */
+#define EXYNOS_CIOCBSA11		(0x288)
+/* CB 12th frame start address for output DMA */
+#define EXYNOS_CIOCBSA12		(0x28c)
+/* CB 13th frame start address for output DMA */
+#define EXYNOS_CIOCBSA13		(0x290)
+/* CB 14th frame start address for output DMA */
+#define EXYNOS_CIOCBSA14		(0x294)
+/* CB 15th frame start address for output DMA */
+#define EXYNOS_CIOCBSA15		(0x298)
+/* CB 16th frame start address for output DMA */
+#define EXYNOS_CIOCBSA16		(0x29c)
+/* CB 17th frame start address for output DMA */
+#define EXYNOS_CIOCBSA17		(0x2a0)
+/* CB 18th frame start address for output DMA */
+#define EXYNOS_CIOCBSA18		(0x2a4)
+/* CB 19th frame start address for output DMA */
+#define EXYNOS_CIOCBSA19		(0x2a8)
+/* CB 20th frame start address for output DMA */
+#define EXYNOS_CIOCBSA20		(0x2ac)
+/* CB 21th frame start address for output DMA */
+#define EXYNOS_CIOCBSA21		(0x2b0)
+/* CB 22th frame start address for output DMA */
+#define EXYNOS_CIOCBSA22		(0x2b4)
+/* CB 23th frame start address for output DMA */
+#define EXYNOS_CIOCBSA23		(0x2b8)
+/* CB 24th frame start address for output DMA */
+#define EXYNOS_CIOCBSA24		(0x2bc)
+/* CB 25th frame start address for output DMA */
+#define EXYNOS_CIOCBSA25		(0x2c0)
+/* CB 26th frame start address for output DMA */
+#define EXYNOS_CIOCBSA26		(0x2c4)
+/* CB 27th frame start address for output DMA */
+#define EXYNOS_CIOCBSA27		(0x2c8)
+/* CB 28th frame start address for output DMA */
+#define EXYNOS_CIOCBSA28		(0x2cc)
+/* CB 29th frame start address for output DMA */
+#define EXYNOS_CIOCBSA29		(0x2d0)
+/* CB 30th frame start address for output DMA */
+#define EXYNOS_CIOCBSA30		(0x2d4)
+/* CB 31th frame start address for output DMA */
+#define EXYNOS_CIOCBSA31		(0x2d8)
+/* CB 32th frame start address for output DMA */
+#define EXYNOS_CIOCBSA32		(0x2dc)
+
+/* CR 5th frame start address for output DMA */
+#define EXYNOS_CIOCRSA5		(0x2e0)
+/* CR 6th frame start address for output DMA */
+#define EXYNOS_CIOCRSA6		(0x2e4)
+/* CR 7th frame start address for output DMA */
+#define EXYNOS_CIOCRSA7		(0x2e8)
+/* CR 8th frame start address for output DMA */
+#define EXYNOS_CIOCRSA8		(0x2ec)
+/* CR 9th frame start address for output DMA */
+#define EXYNOS_CIOCRSA9		(0x2f0)
+/* CR 10th frame start address for output DMA */
+#define EXYNOS_CIOCRSA10		(0x2f4)
+/* CR 11th frame start address for output DMA */
+#define EXYNOS_CIOCRSA11		(0x2f8)
+/* CR 12th frame start address for output DMA */
+#define EXYNOS_CIOCRSA12		(0x2fc)
+/* CR 13th frame start address for output DMA */
+#define EXYNOS_CIOCRSA13		(0x300)
+/* CR 14th frame start address for output DMA */
+#define EXYNOS_CIOCRSA14		(0x304)
+/* CR 15th frame start address for output DMA */
+#define EXYNOS_CIOCRSA15		(0x308)
+/* CR 16th frame start address for output DMA */
+#define EXYNOS_CIOCRSA16		(0x30c)
+/* CR 17th frame start address for output DMA */
+#define EXYNOS_CIOCRSA17		(0x310)
+/* CR 18th frame start address for output DMA */
+#define EXYNOS_CIOCRSA18		(0x314)
+/* CR 19th frame start address for output DMA */
+#define EXYNOS_CIOCRSA19		(0x318)
+/* CR 20th frame start address for output DMA */
+#define EXYNOS_CIOCRSA20		(0x31c)
+/* CR 21th frame start address for output DMA */
+#define EXYNOS_CIOCRSA21		(0x320)
+/* CR 22th frame start address for output DMA */
+#define EXYNOS_CIOCRSA22		(0x324)
+/* CR 23th frame start address for output DMA */
+#define EXYNOS_CIOCRSA23		(0x328)
+/* CR 24th frame start address for output DMA */
+#define EXYNOS_CIOCRSA24		(0x32c)
+/* CR 25th frame start address for output DMA */
+#define EXYNOS_CIOCRSA25		(0x330)
+/* CR 26th frame start address for output DMA */
+#define EXYNOS_CIOCRSA26		(0x334)
+/* CR 27th frame start address for output DMA */
+#define EXYNOS_CIOCRSA27		(0x338)
+/* CR 28th frame start address for output DMA */
+#define EXYNOS_CIOCRSA28		(0x33c)
+/* CR 29th frame start address for output DMA */
+#define EXYNOS_CIOCRSA29		(0x340)
+/* CR 30th frame start address for output DMA */
+#define EXYNOS_CIOCRSA30		(0x344)
+/* CR 31th frame start address for output DMA */
+#define EXYNOS_CIOCRSA31		(0x348)
+/* CR 32th frame start address for output DMA */
+#define EXYNOS_CIOCRSA32		(0x34c)
+
+/*
+ * Macro part
+*/
+/* frame start address 1 ~ 4, 5 ~ 32 */
+/* Number of Default PingPong Memory */
+#define DEF_PP		4
+#define EXYNOS_CIOYSA(__x)		\
+	(((__x) < DEF_PP) ?	\
+	 (EXYNOS_CIOYSA1  + (__x) * 4) : \
+	(EXYNOS_CIOYSA5  + ((__x) - DEF_PP) * 4))
+#define EXYNOS_CIOCBSA(__x)	\
+	(((__x) < DEF_PP) ?	\
+	 (EXYNOS_CIOCBSA1 + (__x) * 4) : \
+	(EXYNOS_CIOCBSA5 + ((__x) - DEF_PP) * 4))
+#define EXYNOS_CIOCRSA(__x)	\
+	(((__x) < DEF_PP) ?	\
+	 (EXYNOS_CIOCRSA1 + (__x) * 4) : \
+	(EXYNOS_CIOCRSA5 + ((__x) - DEF_PP) * 4))
+/* Number of Default PingPong Memory */
+#define DEF_IPP		1
+#define EXYNOS_CIIYSA(__x)		\
+	(((__x) < DEF_IPP) ?	\
+	 (EXYNOS_CIIYSA0) : (EXYNOS_CIIYSA1))
+#define EXYNOS_CIICBSA(__x)	\
+	(((__x) < DEF_IPP) ?	\
+	 (EXYNOS_CIICBSA0) : (EXYNOS_CIICBSA1))
+#define EXYNOS_CIICRSA(__x)	\
+	(((__x) < DEF_IPP) ?	\
+	 (EXYNOS_CIICRSA0) : (EXYNOS_CIICRSA1))
+
+#define EXYNOS_CISRCFMT_SOURCEHSIZE(x)		((x) << 16)
+#define EXYNOS_CISRCFMT_SOURCEVSIZE(x)		((x) << 0)
+
+#define EXYNOS_CIWDOFST_WINHOROFST(x)		((x) << 16)
+#define EXYNOS_CIWDOFST_WINVEROFST(x)		((x) << 0)
+
+#define EXYNOS_CIWDOFST2_WINHOROFST2(x)		((x) << 16)
+#define EXYNOS_CIWDOFST2_WINVEROFST2(x)		((x) << 0)
+
+#define EXYNOS_CITRGFMT_TARGETHSIZE(x)		(((x) & 0x1fff) << 16)
+#define EXYNOS_CITRGFMT_TARGETVSIZE(x)		(((x) & 0x1fff) << 0)
+
+#define EXYNOS_CISCPRERATIO_SHFACTOR(x)		((x) << 28)
+#define EXYNOS_CISCPRERATIO_PREHORRATIO(x)		((x) << 16)
+#define EXYNOS_CISCPRERATIO_PREVERRATIO(x)		((x) << 0)
+
+#define EXYNOS_CISCPREDST_PREDSTWIDTH(x)		((x) << 16)
+#define EXYNOS_CISCPREDST_PREDSTHEIGHT(x)		((x) << 0)
+
+#define EXYNOS_CISCCTRL_MAINHORRATIO(x)		((x) << 16)
+#define EXYNOS_CISCCTRL_MAINVERRATIO(x)		((x) << 0)
+
+#define EXYNOS_CITAREA_TARGET_AREA(x)		((x) << 0)
+
+#define EXYNOS_CISTATUS_GET_FRAME_COUNT(x)		(((x) >> 26) & 0x3)
+#define EXYNOS_CISTATUS_GET_FRAME_END(x)		(((x) >> 17) & 0x1)
+#define EXYNOS_CISTATUS_GET_LAST_CAPTURE_END(x)	(((x) >> 16) & 0x1)
+#define EXYNOS_CISTATUS_GET_LCD_STATUS(x)		(((x) >> 9) & 0x1)
+#define EXYNOS_CISTATUS_GET_ENVID_STATUS(x)	(((x) >> 8) & 0x1)
+
+#define EXYNOS_CISTATUS2_GET_FRAMECOUNT_BEFORE(x)	(((x) >> 7) & 0x3f)
+#define EXYNOS_CISTATUS2_GET_FRAMECOUNT_PRESENT(x)	((x) & 0x3f)
+
+#define EXYNOS_CIIMGEFF_FIN(x)			((x & 0x7) << 26)
+#define EXYNOS_CIIMGEFF_PAT_CB(x)			((x) << 13)
+#define EXYNOS_CIIMGEFF_PAT_CR(x)			((x) << 0)
+
+#define EXYNOS_CIILINESKIP(x)			(((x) & 0xf) << 24)
+
+#define EXYNOS_CIREAL_ISIZE_HEIGHT(x)		((x) << 16)
+#define EXYNOS_CIREAL_ISIZE_WIDTH(x)		((x) << 0)
+
+#define EXYNOS_MSCTRL_SUCCESSIVE_COUNT(x)		((x) << 24)
+#define EXYNOS_MSCTRL_GET_INDMA_STATUS(x)		((x) & 0x1)
+
+#define EXYNOS_CIOYOFF_VERTICAL(x)			((x) << 16)
+#define EXYNOS_CIOYOFF_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_CIOCBOFF_VERTICAL(x)		((x) << 16)
+#define EXYNOS_CIOCBOFF_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_CIOCROFF_VERTICAL(x)		((x) << 16)
+#define EXYNOS_CIOCROFF_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_CIIYOFF_VERTICAL(x)			((x) << 16)
+#define EXYNOS_CIIYOFF_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_CIICBOFF_VERTICAL(x)		((x) << 16)
+#define EXYNOS_CIICBOFF_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_CIICROFF_VERTICAL(x)		((x) << 16)
+#define EXYNOS_CIICROFF_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_ORGISIZE_VERTICAL(x)		((x) << 16)
+#define EXYNOS_ORGISIZE_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_ORGOSIZE_VERTICAL(x)		((x) << 16)
+#define EXYNOS_ORGOSIZE_HORIZONTAL(x)		((x) << 0)
+
+#define EXYNOS_CIEXTEN_TARGETH_EXT(x)		((((x) & 0x2000) >> 13) << 26)
+#define EXYNOS_CIEXTEN_TARGETV_EXT(x)		((((x) & 0x2000) >> 13) << 24)
+#define EXYNOS_CIEXTEN_MAINHORRATIO_EXT(x)		(((x) & 0x3F) << 10)
+#define EXYNOS_CIEXTEN_MAINVERRATIO_EXT(x)		((x) & 0x3F)
+
+/*
+ * Bit definition part
+*/
+/* Source format register */
+#define EXYNOS_CISRCFMT_ITU601_8BIT		(1 << 31)
+#define EXYNOS_CISRCFMT_ITU656_8BIT		(0 << 31)
+#define EXYNOS_CISRCFMT_ITU601_16BIT		(1 << 29)
+#define EXYNOS_CISRCFMT_ORDER422_YCBYCR		(0 << 14)
+#define EXYNOS_CISRCFMT_ORDER422_YCRYCB		(1 << 14)
+#define EXYNOS_CISRCFMT_ORDER422_CBYCRY		(2 << 14)
+#define EXYNOS_CISRCFMT_ORDER422_CRYCBY		(3 << 14)
+/* ITU601 16bit only */
+#define EXYNOS_CISRCFMT_ORDER422_Y4CBCRCBCR	(0 << 14)
+/* ITU601 16bit only */
+#define EXYNOS_CISRCFMT_ORDER422_Y4CRCBCRCB	(1 << 14)
+
+/* Window offset register */
+#define EXYNOS_CIWDOFST_WINOFSEN			(1 << 31)
+#define EXYNOS_CIWDOFST_CLROVFIY			(1 << 30)
+#define EXYNOS_CIWDOFST_CLROVRLB			(1 << 29)
+#define EXYNOS_CIWDOFST_WINHOROFST_MASK		(0x7ff << 16)
+#define EXYNOS_CIWDOFST_CLROVFICB			(1 << 15)
+#define EXYNOS_CIWDOFST_CLROVFICR			(1 << 14)
+#define EXYNOS_CIWDOFST_WINVEROFST_MASK		(0xfff << 0)
+
+/* Global control register */
+#define EXYNOS_CIGCTRL_SWRST			(1 << 31)
+#define EXYNOS_CIGCTRL_CAMRST_A			(1 << 30)
+#define EXYNOS_CIGCTRL_SELCAM_ITU_B		(0 << 29)
+#define EXYNOS_CIGCTRL_SELCAM_ITU_A		(1 << 29)
+#define EXYNOS_CIGCTRL_SELCAM_ITU_MASK		(1 << 29)
+#define EXYNOS_CIGCTRL_TESTPATTERN_NORMAL		(0 << 27)
+#define EXYNOS_CIGCTRL_TESTPATTERN_COLOR_BAR	(1 << 27)
+#define EXYNOS_CIGCTRL_TESTPATTERN_HOR_INC		(2 << 27)
+#define EXYNOS_CIGCTRL_TESTPATTERN_VER_INC		(3 << 27)
+#define EXYNOS_CIGCTRL_TESTPATTERN_MASK		(3 << 27)
+#define EXYNOS_CIGCTRL_TESTPATTERN_SHIFT		(27)
+#define EXYNOS_CIGCTRL_INVPOLPCLK			(1 << 26)
+#define EXYNOS_CIGCTRL_INVPOLVSYNC			(1 << 25)
+#define EXYNOS_CIGCTRL_INVPOLHREF			(1 << 24)
+#define EXYNOS_CIGCTRL_IRQ_OVFEN			(1 << 22)
+#define EXYNOS_CIGCTRL_HREF_MASK			(1 << 21)
+#define EXYNOS_CIGCTRL_IRQ_EDGE			(0 << 20)
+#define EXYNOS_CIGCTRL_IRQ_LEVEL			(1 << 20)
+#define EXYNOS_CIGCTRL_IRQ_CLR			(1 << 19)
+#define EXYNOS_CIGCTRL_IRQ_END_DISABLE		(1 << 18)
+#define EXYNOS_CIGCTRL_IRQ_DISABLE			(0 << 16)
+#define EXYNOS_CIGCTRL_IRQ_ENABLE			(1 << 16)
+#define EXYNOS_CIGCTRL_SHADOW_DISABLE		(1 << 12)
+#define EXYNOS_CIGCTRL_CAM_JPEG			(1 << 8)
+#define EXYNOS_CIGCTRL_SELCAM_MIPI_B		(0 << 7)
+#define EXYNOS_CIGCTRL_SELCAM_MIPI_A		(1 << 7)
+#define EXYNOS_CIGCTRL_SELCAM_MIPI_MASK		(1 << 7)
+#define EXYNOS_CIGCTRL_SELWB_CAMIF_CAMERA	(0 << 6)
+#define EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK	(1 << 6)
+#define EXYNOS_CIGCTRL_SELWRITEBACK_MASK		(1 << 10)
+#define EXYNOS_CIGCTRL_SELWRITEBACK_A		(1 << 10)
+#define EXYNOS_CIGCTRL_SELWRITEBACK_B		(0 << 10)
+#define EXYNOS_CIGCTRL_SELWB_CAMIF_MASK		(1 << 6)
+#define EXYNOS_CIGCTRL_CSC_ITU601			(0 << 5)
+#define EXYNOS_CIGCTRL_CSC_ITU709			(1 << 5)
+#define EXYNOS_CIGCTRL_CSC_MASK			(1 << 5)
+#define EXYNOS_CIGCTRL_INVPOLHSYNC			(1 << 4)
+#define EXYNOS_CIGCTRL_SELCAM_FIMC_ITU		(0 << 3)
+#define EXYNOS_CIGCTRL_SELCAM_FIMC_MIPI		(1 << 3)
+#define EXYNOS_CIGCTRL_SELCAM_FIMC_MASK		(1 << 3)
+#define EXYNOS_CIGCTRL_PROGRESSIVE			(0 << 0)
+#define EXYNOS_CIGCTRL_INTERLACE			(1 << 0)
+
+/* Window offset2 register */
+#define EXYNOS_CIWDOFST_WINHOROFST2_MASK		(0xfff << 16)
+#define EXYNOS_CIWDOFST_WINVEROFST2_MASK		(0xfff << 16)
+
+/* Target format register */
+#define EXYNOS_CITRGFMT_INROT90_CLOCKWISE		(1 << 31)
+#define EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420		(0 << 29)
+#define EXYNOS_CITRGFMT_OUTFORMAT_YCBCR422		(1 << 29)
+#define EXYNOS_CITRGFMT_OUTFORMAT_YCBCR422_1PLANE	(2 << 29)
+#define EXYNOS_CITRGFMT_OUTFORMAT_RGB		(3 << 29)
+#define EXYNOS_CITRGFMT_OUTFORMAT_MASK		(3 << 29)
+#define EXYNOS_CITRGFMT_FLIP_SHIFT			(14)
+#define EXYNOS_CITRGFMT_FLIP_NORMAL		(0 << 14)
+#define EXYNOS_CITRGFMT_FLIP_X_MIRROR		(1 << 14)
+#define EXYNOS_CITRGFMT_FLIP_Y_MIRROR		(2 << 14)
+#define EXYNOS_CITRGFMT_FLIP_180			(3 << 14)
+#define EXYNOS_CITRGFMT_FLIP_MASK			(3 << 14)
+#define EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE		(1 << 13)
+#define EXYNOS_CITRGFMT_TARGETV_MASK		(0x1fff << 0)
+#define EXYNOS_CITRGFMT_TARGETH_MASK		(0x1fff << 16)
+
+/* Output DMA control register */
+#define EXYNOS_CIOCTRL_WEAVE_OUT			(1 << 31)
+#define EXYNOS_CIOCTRL_WEAVE_MASK			(1 << 31)
+#define EXYNOS_CIOCTRL_LASTENDEN			(1 << 30)
+#define EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR		(0 << 24)
+#define EXYNOS_CIOCTRL_ORDER2P_LSB_CRCB		(1 << 24)
+#define EXYNOS_CIOCTRL_ORDER2P_MSB_CRCB		(2 << 24)
+#define EXYNOS_CIOCTRL_ORDER2P_MSB_CBCR		(3 << 24)
+#define EXYNOS_CIOCTRL_ORDER2P_SHIFT		(24)
+#define EXYNOS_CIOCTRL_ORDER2P_MASK		(3 << 24)
+#define EXYNOS_CIOCTRL_YCBCR_3PLANE		(0 << 3)
+#define EXYNOS_CIOCTRL_YCBCR_2PLANE		(1 << 3)
+#define EXYNOS_CIOCTRL_YCBCR_PLANE_MASK		(1 << 3)
+#define EXYNOS_CIOCTRL_LASTIRQ_ENABLE		(1 << 2)
+#define EXYNOS_CIOCTRL_ALPHA_OUT			(0xff << 4)
+#define EXYNOS_CIOCTRL_ORDER422_YCBYCR		(0 << 0)
+#define EXYNOS_CIOCTRL_ORDER422_YCRYCB		(1 << 0)
+#define EXYNOS_CIOCTRL_ORDER422_CBYCRY		(2 << 0)
+#define EXYNOS_CIOCTRL_ORDER422_CRYCBY		(3 << 0)
+#define EXYNOS_CIOCTRL_ORDER422_MASK		(3 << 0)
+
+/* Main scaler control register */
+#define EXYNOS_CISCCTRL_SCALERBYPASS		(1 << 31)
+#define EXYNOS_CISCCTRL_SCALEUP_H			(1 << 30)
+#define EXYNOS_CISCCTRL_SCALEUP_V			(1 << 29)
+#define EXYNOS_CISCCTRL_CSCR2Y_NARROW		(0 << 28)
+#define EXYNOS_CISCCTRL_CSCR2Y_WIDE		(1 << 28)
+#define EXYNOS_CISCCTRL_CSCY2R_NARROW		(0 << 27)
+#define EXYNOS_CISCCTRL_CSCY2R_WIDE		(1 << 27)
+#define EXYNOS_CISCCTRL_LCDPATHEN_FIFO		(1 << 26)
+#define EXYNOS_CISCCTRL_PROGRESSIVE		(0 << 25)
+#define EXYNOS_CISCCTRL_INTERLACE			(1 << 25)
+#define EXYNOS_CISCCTRL_SCAN_MASK			(1 << 25)
+#define EXYNOS_CISCCTRL_SCALERSTART		(1 << 15)
+#define EXYNOS_CISCCTRL_INRGB_FMT_RGB565		(0 << 13)
+#define EXYNOS_CISCCTRL_INRGB_FMT_RGB666		(1 << 13)
+#define EXYNOS_CISCCTRL_INRGB_FMT_RGB888		(2 << 13)
+#define EXYNOS_CISCCTRL_INRGB_FMT_RGB_MASK		(3 << 13)
+#define EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565		(0 << 11)
+#define EXYNOS_CISCCTRL_OUTRGB_FMT_RGB666		(1 << 11)
+#define EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888		(2 << 11)
+#define EXYNOS_CISCCTRL_OUTRGB_FMT_RGB_MASK	(3 << 11)
+#define EXYNOS_CISCCTRL_EXTRGB_NORMAL		(0 << 10)
+#define EXYNOS_CISCCTRL_EXTRGB_EXTENSION		(1 << 10)
+#define EXYNOS_CISCCTRL_ONE2ONE			(1 << 9)
+#define EXYNOS_CISCCTRL_MAIN_V_RATIO_MASK		(0x1ff << 0)
+#define EXYNOS_CISCCTRL_MAIN_H_RATIO_MASK		(0x1ff << 16)
+
+/* Status register */
+#define EXYNOS_CISTATUS_OVFIY			(1 << 31)
+#define EXYNOS_CISTATUS_OVFICB			(1 << 30)
+#define EXYNOS_CISTATUS_OVFICR			(1 << 29)
+#define EXYNOS_CISTATUS_VSYNC			(1 << 28)
+#define EXYNOS_CISTATUS_SCALERSTART		(1 << 26)
+#define EXYNOS_CISTATUS_WINOFSTEN			(1 << 25)
+#define EXYNOS_CISTATUS_IMGCPTEN			(1 << 22)
+#define EXYNOS_CISTATUS_IMGCPTENSC			(1 << 21)
+#define EXYNOS_CISTATUS_VSYNC_A			(1 << 20)
+#define EXYNOS_CISTATUS_VSYNC_B			(1 << 19)
+#define EXYNOS_CISTATUS_OVRLB			(1 << 18)
+#define EXYNOS_CISTATUS_FRAMEEND			(1 << 17)
+#define EXYNOS_CISTATUS_LASTCAPTUREEND		(1 << 16)
+#define EXYNOS_CISTATUS_VVALID_A			(1 << 15)
+#define EXYNOS_CISTATUS_VVALID_B			(1 << 14)
+
+/* Image capture enable register */
+#define EXYNOS_CIIMGCPT_IMGCPTEN			(1 << 31)
+#define EXYNOS_CIIMGCPT_IMGCPTEN_SC		(1 << 30)
+#define EXYNOS_CIIMGCPT_CPT_FREN_ENABLE		(1 << 25)
+#define EXYNOS_CIIMGCPT_CPT_FRMOD_EN		(0 << 18)
+#define EXYNOS_CIIMGCPT_CPT_FRMOD_CNT		(1 << 18)
+
+/* Image effects register */
+#define EXYNOS_CIIMGEFF_IE_DISABLE			(0 << 30)
+#define EXYNOS_CIIMGEFF_IE_ENABLE			(1 << 30)
+#define EXYNOS_CIIMGEFF_IE_SC_BEFORE		(0 << 29)
+#define EXYNOS_CIIMGEFF_IE_SC_AFTER		(1 << 29)
+#define EXYNOS_CIIMGEFF_FIN_BYPASS			(0 << 26)
+#define EXYNOS_CIIMGEFF_FIN_ARBITRARY		(1 << 26)
+#define EXYNOS_CIIMGEFF_FIN_NEGATIVE		(2 << 26)
+#define EXYNOS_CIIMGEFF_FIN_ARTFREEZE		(3 << 26)
+#define EXYNOS_CIIMGEFF_FIN_EMBOSSING		(4 << 26)
+#define EXYNOS_CIIMGEFF_FIN_SILHOUETTE		(5 << 26)
+#define EXYNOS_CIIMGEFF_FIN_MASK			(7 << 26)
+#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK		((0xff < 13) | (0xff < 0))
+
+/* Real input DMA size register */
+#define EXYNOS_CIREAL_ISIZE_AUTOLOAD_ENABLE	(1 << 31)
+#define EXYNOS_CIREAL_ISIZE_ADDR_CH_DISABLE	(1 << 30)
+#define EXYNOS_CIREAL_ISIZE_HEIGHT_MASK		(0x3FFF << 16)
+#define EXYNOS_CIREAL_ISIZE_WIDTH_MASK		(0x3FFF << 0)
+
+/* Input DMA control register */
+#define EXYNOS_MSCTRL_FIELD_MASK			(1 << 31)
+#define EXYNOS_MSCTRL_FIELD_WEAVE			(1 << 31)
+#define EXYNOS_MSCTRL_FIELD_NORMAL			(0 << 31)
+#define EXYNOS_MSCTRL_BURST_CNT			(24)
+#define EXYNOS_MSCTRL_BURST_CNT_MASK		(0xf << 24)
+#define EXYNOS_MSCTRL_ORDER2P_LSB_CBCR		(0 << 16)
+#define EXYNOS_MSCTRL_ORDER2P_LSB_CRCB		(1 << 16)
+#define EXYNOS_MSCTRL_ORDER2P_MSB_CRCB		(2 << 16)
+#define EXYNOS_MSCTRL_ORDER2P_MSB_CBCR		(3 << 16)
+#define EXYNOS_MSCTRL_ORDER2P_SHIFT		(16)
+#define EXYNOS_MSCTRL_ORDER2P_SHIFT_MASK		(0x3 << 16)
+#define EXYNOS_MSCTRL_C_INT_IN_3PLANE		(0 << 15)
+#define EXYNOS_MSCTRL_C_INT_IN_2PLANE		(1 << 15)
+#define EXYNOS_MSCTRL_FLIP_SHIFT			(13)
+#define EXYNOS_MSCTRL_FLIP_NORMAL			(0 << 13)
+#define EXYNOS_MSCTRL_FLIP_X_MIRROR		(1 << 13)
+#define EXYNOS_MSCTRL_FLIP_Y_MIRROR		(2 << 13)
+#define EXYNOS_MSCTRL_FLIP_180			(3 << 13)
+#define EXYNOS_MSCTRL_FLIP_MASK			(3 << 13)
+#define EXYNOS_MSCTRL_ORDER422_CRYCBY		(0 << 4)
+#define EXYNOS_MSCTRL_ORDER422_YCRYCB		(1 << 4)
+#define EXYNOS_MSCTRL_ORDER422_CBYCRY		(2 << 4)
+#define EXYNOS_MSCTRL_ORDER422_YCBYCR		(3 << 4)
+#define EXYNOS_MSCTRL_INPUT_EXTCAM			(0 << 3)
+#define EXYNOS_MSCTRL_INPUT_MEMORY			(1 << 3)
+#define EXYNOS_MSCTRL_INPUT_MASK			(1 << 3)
+#define EXYNOS_MSCTRL_INFORMAT_YCBCR420		(0 << 1)
+#define EXYNOS_MSCTRL_INFORMAT_YCBCR422		(1 << 1)
+#define EXYNOS_MSCTRL_INFORMAT_YCBCR422_1PLANE	(2 << 1)
+#define EXYNOS_MSCTRL_INFORMAT_RGB			(3 << 1)
+#define EXYNOS_MSCTRL_ENVID			(1 << 0)
+
+/* DMA parameter register */
+#define EXYNOS_CIDMAPARAM_R_MODE_LINEAR		(0 << 29)
+#define EXYNOS_CIDMAPARAM_R_MODE_CONFTILE		(1 << 29)
+#define EXYNOS_CIDMAPARAM_R_MODE_16X16		(2 << 29)
+#define EXYNOS_CIDMAPARAM_R_MODE_64X32		(3 << 29)
+#define EXYNOS_CIDMAPARAM_R_MODE_MASK		(3 << 29)
+#define EXYNOS_CIDMAPARAM_R_TILE_HSIZE_64		(0 << 24)
+#define EXYNOS_CIDMAPARAM_R_TILE_HSIZE_128		(1 << 24)
+#define EXYNOS_CIDMAPARAM_R_TILE_HSIZE_256		(2 << 24)
+#define EXYNOS_CIDMAPARAM_R_TILE_HSIZE_512		(3 << 24)
+#define EXYNOS_CIDMAPARAM_R_TILE_HSIZE_1024	(4 << 24)
+#define EXYNOS_CIDMAPARAM_R_TILE_HSIZE_2048	(5 << 24)
+#define EXYNOS_CIDMAPARAM_R_TILE_HSIZE_4096	(6 << 24)
+#define EXYNOS_CIDMAPARAM_R_TILE_VSIZE_1		(0 << 20)
+#define EXYNOS_CIDMAPARAM_R_TILE_VSIZE_2		(1 << 20)
+#define EXYNOS_CIDMAPARAM_R_TILE_VSIZE_4		(2 << 20)
+#define EXYNOS_CIDMAPARAM_R_TILE_VSIZE_8		(3 << 20)
+#define EXYNOS_CIDMAPARAM_R_TILE_VSIZE_16		(4 << 20)
+#define EXYNOS_CIDMAPARAM_R_TILE_VSIZE_32		(5 << 20)
+#define EXYNOS_CIDMAPARAM_W_MODE_LINEAR		(0 << 13)
+#define EXYNOS_CIDMAPARAM_W_MODE_CONFTILE		(1 << 13)
+#define EXYNOS_CIDMAPARAM_W_MODE_16X16		(2 << 13)
+#define EXYNOS_CIDMAPARAM_W_MODE_64X32		(3 << 13)
+#define EXYNOS_CIDMAPARAM_W_MODE_MASK		(3 << 13)
+#define EXYNOS_CIDMAPARAM_W_TILE_HSIZE_64		(0 << 8)
+#define EXYNOS_CIDMAPARAM_W_TILE_HSIZE_128		(1 << 8)
+#define EXYNOS_CIDMAPARAM_W_TILE_HSIZE_256		(2 << 8)
+#define EXYNOS_CIDMAPARAM_W_TILE_HSIZE_512		(3 << 8)
+#define EXYNOS_CIDMAPARAM_W_TILE_HSIZE_1024	(4 << 8)
+#define EXYNOS_CIDMAPARAM_W_TILE_HSIZE_2048	(5 << 8)
+#define EXYNOS_CIDMAPARAM_W_TILE_HSIZE_4096	(6 << 8)
+#define EXYNOS_CIDMAPARAM_W_TILE_VSIZE_1		(0 << 4)
+#define EXYNOS_CIDMAPARAM_W_TILE_VSIZE_2		(1 << 4)
+#define EXYNOS_CIDMAPARAM_W_TILE_VSIZE_4		(2 << 4)
+#define EXYNOS_CIDMAPARAM_W_TILE_VSIZE_8		(3 << 4)
+#define EXYNOS_CIDMAPARAM_W_TILE_VSIZE_16		(4 << 4)
+#define EXYNOS_CIDMAPARAM_W_TILE_VSIZE_32		(5 << 4)
+
+/* Gathering Extension register */
+#define EXYNOS_CIEXTEN_TARGETH_EXT_MASK		(1 << 26)
+#define EXYNOS_CIEXTEN_TARGETV_EXT_MASK		(1 << 24)
+#define EXYNOS_CIEXTEN_MAINHORRATIO_EXT_MASK	(0x3F << 10)
+#define EXYNOS_CIEXTEN_MAINVERRATIO_EXT_MASK	(0x3F)
+#define EXYNOS_CIEXTEN_YUV444_OUT			(1 << 22)
+
+/* FIMC Clock Source Select register */
+#define EXYNOS_CLKSRC_HCLK				(0 << 1)
+#define EXYNOS_CLKSRC_HCLK_MASK			(1 << 1)
+#define EXYNOS_CLKSRC_SCLK				(1 << 1)
+
+/* SYSREG for FIMC writeback */
+#define SYSREG_CAMERA_BLK			(S3C_VA_SYS + 0x0218)
+#define SYSREG_ISP_BLK				(S3C_VA_SYS + 0x020c)
+#define SYSREG_FIMD0WB_DEST_MASK	(0x3 << 23)
+#define SYSREG_FIMD0WB_DEST_SHIFT	23
+
+#endif /* EXYNOS_REGS_FIMC_H */

diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h
new file mode 100644
index 0000000..9ad5927
--- /dev/null
+++ b/drivers/gpu/drm/exynos/regs-gsc.h

@@ -0,0 +1,284 @@
+/* linux/drivers/gpu/drm/exynos/regs-gsc.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Register definition file for Samsung G-Scaler driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef EXYNOS_REGS_GSC_H_
+#define EXYNOS_REGS_GSC_H_
+
+/* G-Scaler enable */
+#define GSC_ENABLE			0x00
+#define GSC_ENABLE_PP_UPDATE_TIME_MASK	(1 << 9)
+#define GSC_ENABLE_PP_UPDATE_TIME_CURR	(0 << 9)
+#define GSC_ENABLE_PP_UPDATE_TIME_EOPAS	(1 << 9)
+#define GSC_ENABLE_CLK_GATE_MODE_MASK	(1 << 8)
+#define GSC_ENABLE_CLK_GATE_MODE_FREE	(1 << 8)
+#define GSC_ENABLE_IPC_MODE_MASK	(1 << 7)
+#define GSC_ENABLE_NORM_MODE		(0 << 7)
+#define GSC_ENABLE_IPC_MODE		(1 << 7)
+#define GSC_ENABLE_PP_UPDATE_MODE_MASK	(1 << 6)
+#define GSC_ENABLE_PP_UPDATE_FIRE_MODE	(1 << 6)
+#define GSC_ENABLE_IN_PP_UPDATE		(1 << 5)
+#define GSC_ENABLE_ON_CLEAR_MASK	(1 << 4)
+#define GSC_ENABLE_ON_CLEAR_ONESHOT	(1 << 4)
+#define GSC_ENABLE_QOS_ENABLE		(1 << 3)
+#define GSC_ENABLE_OP_STATUS		(1 << 2)
+#define GSC_ENABLE_SFR_UPDATE		(1 << 1)
+#define GSC_ENABLE_ON			(1 << 0)
+
+/* G-Scaler S/W reset */
+#define GSC_SW_RESET			0x04
+#define GSC_SW_RESET_SRESET		(1 << 0)
+
+/* G-Scaler IRQ */
+#define GSC_IRQ				0x08
+#define GSC_IRQ_STATUS_OR_IRQ		(1 << 17)
+#define GSC_IRQ_STATUS_OR_FRM_DONE	(1 << 16)
+#define GSC_IRQ_OR_MASK			(1 << 2)
+#define GSC_IRQ_FRMDONE_MASK		(1 << 1)
+#define GSC_IRQ_ENABLE			(1 << 0)
+
+/* G-Scaler input control */
+#define GSC_IN_CON			0x10
+#define GSC_IN_CHROM_STRIDE_SEL_MASK	(1 << 20)
+#define GSC_IN_CHROM_STRIDE_SEPAR	(1 << 20)
+#define GSC_IN_RB_SWAP_MASK		(1 << 19)
+#define GSC_IN_RB_SWAP			(1 << 19)
+#define GSC_IN_ROT_MASK			(7 << 16)
+#define GSC_IN_ROT_270			(7 << 16)
+#define GSC_IN_ROT_90_YFLIP		(6 << 16)
+#define GSC_IN_ROT_90_XFLIP		(5 << 16)
+#define GSC_IN_ROT_90			(4 << 16)
+#define GSC_IN_ROT_180			(3 << 16)
+#define GSC_IN_ROT_YFLIP		(2 << 16)
+#define GSC_IN_ROT_XFLIP		(1 << 16)
+#define GSC_IN_RGB_TYPE_MASK		(3 << 14)
+#define GSC_IN_RGB_HD_WIDE		(3 << 14)
+#define GSC_IN_RGB_HD_NARROW		(2 << 14)
+#define GSC_IN_RGB_SD_WIDE		(1 << 14)
+#define GSC_IN_RGB_SD_NARROW		(0 << 14)
+#define GSC_IN_YUV422_1P_ORDER_MASK	(1 << 13)
+#define GSC_IN_YUV422_1P_ORDER_LSB_Y	(0 << 13)
+#define GSC_IN_YUV422_1P_OEDER_LSB_C	(1 << 13)
+#define GSC_IN_CHROMA_ORDER_MASK	(1 << 12)
+#define GSC_IN_CHROMA_ORDER_CBCR	(0 << 12)
+#define GSC_IN_CHROMA_ORDER_CRCB	(1 << 12)
+#define GSC_IN_FORMAT_MASK		(7 << 8)
+#define GSC_IN_XRGB8888			(0 << 8)
+#define GSC_IN_RGB565			(1 << 8)
+#define GSC_IN_YUV420_2P		(2 << 8)
+#define GSC_IN_YUV420_3P		(3 << 8)
+#define GSC_IN_YUV422_1P		(4 << 8)
+#define GSC_IN_YUV422_2P		(5 << 8)
+#define GSC_IN_YUV422_3P		(6 << 8)
+#define GSC_IN_TILE_TYPE_MASK		(1 << 4)
+#define GSC_IN_TILE_C_16x8		(0 << 4)
+#define GSC_IN_TILE_C_16x16		(1 << 4)
+#define GSC_IN_TILE_MODE		(1 << 3)
+#define GSC_IN_LOCAL_SEL_MASK		(3 << 1)
+#define GSC_IN_LOCAL_CAM3		(3 << 1)
+#define GSC_IN_LOCAL_FIMD_WB		(2 << 1)
+#define GSC_IN_LOCAL_CAM1		(1 << 1)
+#define GSC_IN_LOCAL_CAM0		(0 << 1)
+#define GSC_IN_PATH_MASK		(1 << 0)
+#define GSC_IN_PATH_LOCAL		(1 << 0)
+#define GSC_IN_PATH_MEMORY		(0 << 0)
+
+/* G-Scaler source image size */
+#define GSC_SRCIMG_SIZE			0x14
+#define GSC_SRCIMG_HEIGHT_MASK		(0x1fff << 16)
+#define GSC_SRCIMG_HEIGHT(x)		((x) << 16)
+#define GSC_SRCIMG_WIDTH_MASK		(0x3fff << 0)
+#define GSC_SRCIMG_WIDTH(x)		((x) << 0)
+
+/* G-Scaler source image offset */
+#define GSC_SRCIMG_OFFSET		0x18
+#define GSC_SRCIMG_OFFSET_Y_MASK	(0x1fff << 16)
+#define GSC_SRCIMG_OFFSET_Y(x)		((x) << 16)
+#define GSC_SRCIMG_OFFSET_X_MASK	(0x1fff << 0)
+#define GSC_SRCIMG_OFFSET_X(x)		((x) << 0)
+
+/* G-Scaler cropped source image size */
+#define GSC_CROPPED_SIZE		0x1C
+#define GSC_CROPPED_HEIGHT_MASK		(0x1fff << 16)
+#define GSC_CROPPED_HEIGHT(x)		((x) << 16)
+#define GSC_CROPPED_WIDTH_MASK		(0x1fff << 0)
+#define GSC_CROPPED_WIDTH(x)		((x) << 0)
+
+/* G-Scaler output control */
+#define GSC_OUT_CON			0x20
+#define GSC_OUT_GLOBAL_ALPHA_MASK	(0xff << 24)
+#define GSC_OUT_GLOBAL_ALPHA(x)		((x) << 24)
+#define GSC_OUT_CHROM_STRIDE_SEL_MASK	(1 << 13)
+#define GSC_OUT_CHROM_STRIDE_SEPAR	(1 << 13)
+#define GSC_OUT_RB_SWAP_MASK		(1 << 12)
+#define GSC_OUT_RB_SWAP			(1 << 12)
+#define GSC_OUT_RGB_TYPE_MASK		(3 << 10)
+#define GSC_OUT_RGB_HD_NARROW		(3 << 10)
+#define GSC_OUT_RGB_HD_WIDE		(2 << 10)
+#define GSC_OUT_RGB_SD_NARROW		(1 << 10)
+#define GSC_OUT_RGB_SD_WIDE		(0 << 10)
+#define GSC_OUT_YUV422_1P_ORDER_MASK	(1 << 9)
+#define GSC_OUT_YUV422_1P_ORDER_LSB_Y	(0 << 9)
+#define GSC_OUT_YUV422_1P_OEDER_LSB_C	(1 << 9)
+#define GSC_OUT_CHROMA_ORDER_MASK	(1 << 8)
+#define GSC_OUT_CHROMA_ORDER_CBCR	(0 << 8)
+#define GSC_OUT_CHROMA_ORDER_CRCB	(1 << 8)
+#define GSC_OUT_FORMAT_MASK		(7 << 4)
+#define GSC_OUT_XRGB8888		(0 << 4)
+#define GSC_OUT_RGB565			(1 << 4)
+#define GSC_OUT_YUV420_2P		(2 << 4)
+#define GSC_OUT_YUV420_3P		(3 << 4)
+#define GSC_OUT_YUV422_1P		(4 << 4)
+#define GSC_OUT_YUV422_2P		(5 << 4)
+#define GSC_OUT_YUV444			(7 << 4)
+#define GSC_OUT_TILE_TYPE_MASK		(1 << 2)
+#define GSC_OUT_TILE_C_16x8		(0 << 2)
+#define GSC_OUT_TILE_C_16x16		(1 << 2)
+#define GSC_OUT_TILE_MODE		(1 << 1)
+#define GSC_OUT_PATH_MASK		(1 << 0)
+#define GSC_OUT_PATH_LOCAL		(1 << 0)
+#define GSC_OUT_PATH_MEMORY		(0 << 0)
+
+/* G-Scaler scaled destination image size */
+#define GSC_SCALED_SIZE			0x24
+#define GSC_SCALED_HEIGHT_MASK		(0x1fff << 16)
+#define GSC_SCALED_HEIGHT(x)		((x) << 16)
+#define GSC_SCALED_WIDTH_MASK		(0x1fff << 0)
+#define GSC_SCALED_WIDTH(x)		((x) << 0)
+
+/* G-Scaler pre scale ratio */
+#define GSC_PRE_SCALE_RATIO		0x28
+#define GSC_PRESC_SHFACTOR_MASK		(7 << 28)
+#define GSC_PRESC_SHFACTOR(x)		((x) << 28)
+#define GSC_PRESC_V_RATIO_MASK		(7 << 16)
+#define GSC_PRESC_V_RATIO(x)		((x) << 16)
+#define GSC_PRESC_H_RATIO_MASK		(7 << 0)
+#define GSC_PRESC_H_RATIO(x)		((x) << 0)
+
+/* G-Scaler main scale horizontal ratio */
+#define GSC_MAIN_H_RATIO		0x2C
+#define GSC_MAIN_H_RATIO_MASK		(0xfffff << 0)
+#define GSC_MAIN_H_RATIO_VALUE(x)	((x) << 0)
+
+/* G-Scaler main scale vertical ratio */
+#define GSC_MAIN_V_RATIO		0x30
+#define GSC_MAIN_V_RATIO_MASK		(0xfffff << 0)
+#define GSC_MAIN_V_RATIO_VALUE(x)	((x) << 0)
+
+/* G-Scaler input chrominance stride */
+#define GSC_IN_CHROM_STRIDE		0x3C
+#define GSC_IN_CHROM_STRIDE_MASK	(0x3fff << 0)
+#define GSC_IN_CHROM_STRIDE_VALUE(x)	((x) << 0)
+
+/* G-Scaler destination image size */
+#define GSC_DSTIMG_SIZE			0x40
+#define GSC_DSTIMG_HEIGHT_MASK		(0x1fff << 16)
+#define GSC_DSTIMG_HEIGHT(x)		((x) << 16)
+#define GSC_DSTIMG_WIDTH_MASK		(0x1fff << 0)
+#define GSC_DSTIMG_WIDTH(x)		((x) << 0)
+
+/* G-Scaler destination image offset */
+#define GSC_DSTIMG_OFFSET		0x44
+#define GSC_DSTIMG_OFFSET_Y_MASK	(0x1fff << 16)
+#define GSC_DSTIMG_OFFSET_Y(x)		((x) << 16)
+#define GSC_DSTIMG_OFFSET_X_MASK	(0x1fff << 0)
+#define GSC_DSTIMG_OFFSET_X(x)		((x) << 0)
+
+/* G-Scaler output chrominance stride */
+#define GSC_OUT_CHROM_STRIDE		0x48
+#define GSC_OUT_CHROM_STRIDE_MASK	(0x3fff << 0)
+#define GSC_OUT_CHROM_STRIDE_VALUE(x)	((x) << 0)
+
+/* G-Scaler input y address mask */
+#define GSC_IN_BASE_ADDR_Y_MASK		0x4C
+/* G-Scaler input y base address */
+#define GSC_IN_BASE_ADDR_Y(n)		(0x50 + (n) * 0x4)
+/* G-Scaler input y base current address */
+#define GSC_IN_BASE_ADDR_Y_CUR(n)	(0x60 + (n) * 0x4)
+
+/* G-Scaler input cb address mask */
+#define GSC_IN_BASE_ADDR_CB_MASK	0x7C
+/* G-Scaler input cb base address */
+#define GSC_IN_BASE_ADDR_CB(n)		(0x80 + (n) * 0x4)
+/* G-Scaler input cb base current address */
+#define GSC_IN_BASE_ADDR_CB_CUR(n)	(0x90 + (n) * 0x4)
+
+/* G-Scaler input cr address mask */
+#define GSC_IN_BASE_ADDR_CR_MASK	0xAC
+/* G-Scaler input cr base address */
+#define GSC_IN_BASE_ADDR_CR(n)		(0xB0 + (n) * 0x4)
+/* G-Scaler input cr base current address */
+#define GSC_IN_BASE_ADDR_CR_CUR(n)	(0xC0 + (n) * 0x4)
+
+/* G-Scaler input address mask */
+#define GSC_IN_CURR_ADDR_INDEX	(0xf << 24)
+#define GSC_IN_CURR_GET_INDEX(x)	((x) >> 24)
+#define GSC_IN_BASE_ADDR_PINGPONG(x)	((x) << 16)
+#define GSC_IN_BASE_ADDR_MASK		(0xff << 0)
+
+/* G-Scaler output y address mask */
+#define GSC_OUT_BASE_ADDR_Y_MASK	0x10C
+/* G-Scaler output y base address */
+#define GSC_OUT_BASE_ADDR_Y(n)		(0x110 + (n) * 0x4)
+
+/* G-Scaler output cb address mask */
+#define GSC_OUT_BASE_ADDR_CB_MASK	0x15C
+/* G-Scaler output cb base address */
+#define GSC_OUT_BASE_ADDR_CB(n)		(0x160 + (n) * 0x4)
+
+/* G-Scaler output cr address mask */
+#define GSC_OUT_BASE_ADDR_CR_MASK	0x1AC
+/* G-Scaler output cr base address */
+#define GSC_OUT_BASE_ADDR_CR(n)		(0x1B0 + (n) * 0x4)
+
+/* G-Scaler output address mask */
+#define GSC_OUT_CURR_ADDR_INDEX		(0xf << 24)
+#define GSC_OUT_CURR_GET_INDEX(x)	((x) >> 24)
+#define GSC_OUT_BASE_ADDR_PINGPONG(x)	((x) << 16)
+#define GSC_OUT_BASE_ADDR_MASK		(0xffff << 0)
+
+/* G-Scaler horizontal scaling filter */
+#define GSC_HCOEF(n, s, x)	(0x300 + (n) * 0x4 + (s) * 0x30 + (x) * 0x300)
+
+/* G-Scaler vertical scaling filter */
+#define GSC_VCOEF(n, s, x)	(0x200 + (n) * 0x4 + (s) * 0x30 + (x) * 0x300)
+
+/* G-Scaler BUS control */
+#define GSC_BUSCON			0xA78
+#define GSC_BUSCON_INT_TIME_MASK	(1 << 8)
+#define GSC_BUSCON_INT_DATA_TRANS	(0 << 8)
+#define GSC_BUSCON_INT_AXI_RESPONSE	(1 << 8)
+#define GSC_BUSCON_AWCACHE(x)		((x) << 4)
+#define GSC_BUSCON_ARCACHE(x)		((x) << 0)
+
+/* G-Scaler V position */
+#define GSC_VPOSITION			0xA7C
+#define GSC_VPOS_F(x)			((x) << 0)
+
+
+/* G-Scaler clock initial count */
+#define GSC_CLK_INIT_COUNT		0xC00
+#define GSC_CLK_GATE_MODE_INIT_CNT(x)	((x) << 0)
+
+/* G-Scaler clock snoop count */
+#define GSC_CLK_SNOOP_COUNT		0xC04
+#define GSC_CLK_GATE_MODE_SNOOP_CNT(x)	((x) << 0)
+
+/* SYSCON. GSCBLK_CFG */
+#define SYSREG_GSCBLK_CFG1		(S3C_VA_SYS + 0x0224)
+#define GSC_BLK_DISP1WB_DEST(x)		(x << 10)
+#define GSC_BLK_SW_RESET_WB_DEST(x)	(1 << (18 + x))
+#define GSC_BLK_PXLASYNC_LO_MASK_WB(x)	(0 << (14 + x))
+#define GSC_BLK_GSCL_WB_IN_SRC_SEL(x)	(1 << (2 * x))
+#define SYSREG_GSCBLK_CFG2		(S3C_VA_SYS + 0x2000)
+#define PXLASYNC_LO_MASK_CAMIF_GSCL(x)	(1 << (x))
+
+#endif /* EXYNOS_REGS_GSC_H_ */

diff --git a/drivers/gpu/drm/exynos/regs-hdmi.h b/drivers/gpu/drm/exynos/regs-hdmi.h
index 9cc7c5e..ef1b3eb 100644
--- a/drivers/gpu/drm/exynos/regs-hdmi.h
+++ b/drivers/gpu/drm/exynos/regs-hdmi.h

@@ -176,6 +176,11 @@
 #define HDMI_PHY_CMU			HDMI_CTRL_BASE(0x007C)
 #define HDMI_CORE_RSTOUT		HDMI_CTRL_BASE(0x0080)
 
+/* PHY Control bit definition */
+
+/* HDMI_PHY_CON_0 */
+#define HDMI_PHY_POWER_OFF_EN		(1 << 0)
+
 /* Video related registers */
 #define HDMI_YMAX			HDMI_CORE_BASE(0x0060)
 #define HDMI_YMIN			HDMI_CORE_BASE(0x0064)
@@ -298,14 +303,14 @@
 #define HDMI_AVI_HEADER1		HDMI_CORE_BASE(0x0714)
 #define HDMI_AVI_HEADER2		HDMI_CORE_BASE(0x0718)
 #define HDMI_AVI_CHECK_SUM		HDMI_CORE_BASE(0x071C)
-#define HDMI_AVI_BYTE(n)		HDMI_CORE_BASE(0x0720 + 4 * (n))
+#define HDMI_AVI_BYTE(n)		HDMI_CORE_BASE(0x0720 + 4 * (n-1))
 
 #define HDMI_AUI_CON			HDMI_CORE_BASE(0x0800)
 #define HDMI_AUI_HEADER0		HDMI_CORE_BASE(0x0810)
 #define HDMI_AUI_HEADER1		HDMI_CORE_BASE(0x0814)
 #define HDMI_AUI_HEADER2		HDMI_CORE_BASE(0x0818)
 #define HDMI_AUI_CHECK_SUM		HDMI_CORE_BASE(0x081C)
-#define HDMI_AUI_BYTE(n)		HDMI_CORE_BASE(0x0820 + 4 * (n))
+#define HDMI_AUI_BYTE(n)		HDMI_CORE_BASE(0x0820 + 4 * (n-1))
 
 #define HDMI_MPG_CON			HDMI_CORE_BASE(0x0900)
 #define HDMI_MPG_CHECK_SUM		HDMI_CORE_BASE(0x091C)
@@ -338,6 +343,19 @@
 #define HDMI_AN_SEED_2			HDMI_CORE_BASE(0x0E60)
 #define HDMI_AN_SEED_3			HDMI_CORE_BASE(0x0E64)
 
+/* AVI bit definition */
+#define HDMI_AVI_CON_DO_NOT_TRANSMIT	(0 << 1)
+#define HDMI_AVI_CON_EVERY_VSYNC	(1 << 1)
+
+#define AVI_ACTIVE_FORMAT_VALID	(1 << 4)
+#define AVI_UNDERSCANNED_DISPLAY_VALID	(1 << 1)
+
+/* AUI bit definition */
+#define HDMI_AUI_CON_NO_TRAN		(0 << 0)
+
+/* VSI bit definition */
+#define HDMI_VSI_CON_DO_NOT_TRANSMIT	(0 << 0)
+
 /* HDCP related registers */
 #define HDMI_HDCP_SHA1(n)		HDMI_CORE_BASE(0x7000 + 4 * (n))
 #define HDMI_HDCP_KSV_LIST(n)		HDMI_CORE_BASE(0x7050 + 4 * (n))

diff --git a/drivers/gpu/drm/exynos/regs-rotator.h b/drivers/gpu/drm/exynos/regs-rotator.h
new file mode 100644
index 0000000..a09ac6e
--- /dev/null
+++ b/drivers/gpu/drm/exynos/regs-rotator.h

@@ -0,0 +1,73 @@
+/* drivers/gpu/drm/exynos/regs-rotator.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * Register definition file for Samsung Rotator Interface (Rotator) driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef EXYNOS_REGS_ROTATOR_H
+#define EXYNOS_REGS_ROTATOR_H
+
+/* Configuration */
+#define ROT_CONFIG			0x00
+#define ROT_CONFIG_IRQ			(3 << 8)
+
+/* Image Control */
+#define ROT_CONTROL			0x10
+#define ROT_CONTROL_PATTERN_WRITE	(1 << 16)
+#define ROT_CONTROL_FMT_YCBCR420_2P	(1 << 8)
+#define ROT_CONTROL_FMT_RGB888		(6 << 8)
+#define ROT_CONTROL_FMT_MASK		(7 << 8)
+#define ROT_CONTROL_FLIP_VERTICAL	(2 << 6)
+#define ROT_CONTROL_FLIP_HORIZONTAL	(3 << 6)
+#define ROT_CONTROL_FLIP_MASK		(3 << 6)
+#define ROT_CONTROL_ROT_90		(1 << 4)
+#define ROT_CONTROL_ROT_180		(2 << 4)
+#define ROT_CONTROL_ROT_270		(3 << 4)
+#define ROT_CONTROL_ROT_MASK		(3 << 4)
+#define ROT_CONTROL_START		(1 << 0)
+
+/* Status */
+#define ROT_STATUS			0x20
+#define ROT_STATUS_IRQ_PENDING(x)	(1 << (x))
+#define ROT_STATUS_IRQ(x)		(((x) >> 8) & 0x3)
+#define ROT_STATUS_IRQ_VAL_COMPLETE	1
+#define ROT_STATUS_IRQ_VAL_ILLEGAL	2
+
+/* Buffer Address */
+#define ROT_SRC_BUF_ADDR(n)		(0x30 + ((n) << 2))
+#define ROT_DST_BUF_ADDR(n)		(0x50 + ((n) << 2))
+
+/* Buffer Size */
+#define ROT_SRC_BUF_SIZE		0x3c
+#define ROT_DST_BUF_SIZE		0x5c
+#define ROT_SET_BUF_SIZE_H(x)		((x) << 16)
+#define ROT_SET_BUF_SIZE_W(x)		((x) << 0)
+#define ROT_GET_BUF_SIZE_H(x)		((x) >> 16)
+#define ROT_GET_BUF_SIZE_W(x)		((x) & 0xffff)
+
+/* Crop Position */
+#define ROT_SRC_CROP_POS		0x40
+#define ROT_DST_CROP_POS		0x60
+#define ROT_CROP_POS_Y(x)		((x) << 16)
+#define ROT_CROP_POS_X(x)		((x) << 0)
+
+/* Source Crop Size */
+#define ROT_SRC_CROP_SIZE		0x44
+#define ROT_SRC_CROP_SIZE_H(x)		((x) << 16)
+#define ROT_SRC_CROP_SIZE_W(x)		((x) << 0)
+
+/* Round to nearest aligned value */
+#define ROT_ALIGN(x, align, mask)	(((x) + (1 << ((align) - 1))) & (mask))
+/* Minimum limit value */
+#define ROT_MIN(min, mask)		(((min) + ~(mask)) & (mask))
+/* Maximum limit value */
+#define ROT_MAX(max, mask)		((max) & (mask))
+
+#endif /* EXYNOS_REGS_ROTATOR_H */
+

diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 1ceca3d..23e14e9 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c

@@ -523,7 +523,7 @@
 
 		dev_priv->force_audio_property = prop;
 	}
-	drm_connector_attach_property(connector, prop, 0);
+	drm_object_attach_property(&connector->base, prop, 0);
 }
 
 
@@ -553,7 +553,7 @@
 		dev_priv->broadcast_rgb_property = prop;
 	}
 
-	drm_connector_attach_property(connector, prop, 0);
+	drm_object_attach_property(&connector->base, prop, 0);
 }
 
 /* Cedarview */

diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index e3a3978..51044cc 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c

@@ -1650,7 +1650,7 @@
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	int ret;
 
-	ret = drm_connector_property_set_value(connector, property, val);
+	ret = drm_object_property_set_value(&connector->base, property, val);
 	if (ret)
 		return ret;
 

diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 7272a46..e223b50 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c

@@ -185,14 +185,14 @@
 			return -1;
 		}
 
-		if (drm_connector_property_get_value(connector,
+		if (drm_object_property_get_value(&connector->base,
 							property, &curValue))
 			return -1;
 
 		if (curValue == value)
 			return 0;
 
-		if (drm_connector_property_set_value(connector,
+		if (drm_object_property_set_value(&connector->base,
 							property, value))
 			return -1;
 
@@ -341,7 +341,7 @@
 	connector->interlace_allowed = false;
 	connector->doublescan_allowed = false;
 
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.scaling_mode_property,
 				      DRM_MODE_SCALE_FULLSCREEN);
 

diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index b362dd3..d81dbc3 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c

@@ -479,7 +479,7 @@
 			return -1;
 		}
 
-		if (drm_connector_property_get_value(connector,
+		if (drm_object_property_get_value(&connector->base,
 						     property,
 						     &curValue))
 			return -1;
@@ -487,7 +487,7 @@
 		if (curValue == value)
 			return 0;
 
-		if (drm_connector_property_set_value(connector,
+		if (drm_object_property_set_value(&connector->base,
 							property,
 							value))
 			return -1;
@@ -502,7 +502,7 @@
 				return -1;
 		}
 	} else if (!strcmp(property->name, "backlight") && encoder) {
-		if (drm_connector_property_set_value(connector,
+		if (drm_object_property_set_value(&connector->base,
 							property,
 							value))
 			return -1;
@@ -671,10 +671,10 @@
 	connector->doublescan_allowed = false;
 
 	/*Attach connector properties*/
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.scaling_mode_property,
 				      DRM_MODE_SCALE_FULLSCREEN);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev_priv->backlight_property,
 				      BRIGHTNESS_MAX_LEVEL);
 

diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index 32dba2a..2d4ab48 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c

@@ -265,13 +265,13 @@
 			goto set_prop_error;
 		}
 
-		if (drm_connector_property_get_value(connector, property, &val))
+		if (drm_object_property_get_value(&connector->base, property, &val))
 			goto set_prop_error;
 
 		if (val == value)
 			goto set_prop_done;
 
-		if (drm_connector_property_set_value(connector,
+		if (drm_object_property_set_value(&connector->base,
 							property, value))
 			goto set_prop_error;
 
@@ -296,7 +296,7 @@
 			}
 		}
 	} else if (!strcmp(property->name, "backlight") && encoder) {
-		if (drm_connector_property_set_value(connector, property,
+		if (drm_object_property_set_value(&connector->base, property,
 									value))
 			goto set_prop_error;
 		else
@@ -506,7 +506,7 @@
 
 	dev_dbg(dev->dev, "init DSI output on pipe %d\n", pipe);
 
-	if (!dev || ((pipe != 0) && (pipe != 2))) {
+	if (pipe != 0 && pipe != 2) {
 		DRM_ERROR("Invalid parameter\n");
 		return;
 	}
@@ -572,10 +572,10 @@
 	connector->doublescan_allowed = false;
 
 	/*attach properties*/
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				dev->mode_config.scaling_mode_property,
 				DRM_MODE_SCALE_FULLSCREEN);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				dev_priv->backlight_property,
 				MDFLD_DSI_BRIGHTNESS_MAX_LEVEL);
 

diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c
index dec6a9a..74485dc 100644
--- a/drivers/gpu/drm/gma500/mdfld_intel_display.c
+++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c

@@ -820,7 +820,7 @@
 	REG_WRITE(map->pos, 0);
 
 	if (psb_intel_encoder)
-		drm_connector_property_get_value(connector,
+		drm_object_property_get_value(&connector->base,
 			dev->mode_config.scaling_mode_property, &scalingType);
 
 	if (scalingType == DRM_MODE_SCALE_NO_SCALE) {

diff --git a/drivers/gpu/drm/gma500/oaktrail.h b/drivers/gpu/drm/gma500/oaktrail.h
index f2f9f38..30adbbe2 100644
--- a/drivers/gpu/drm/gma500/oaktrail.h
+++ b/drivers/gpu/drm/gma500/oaktrail.h

@@ -249,3 +249,9 @@
 extern void oaktrail_hdmi_save(struct drm_device *dev);
 extern void oaktrail_hdmi_restore(struct drm_device *dev);
 extern void oaktrail_hdmi_init(struct drm_device *dev, struct psb_intel_mode_device *mode_dev);
+extern int oaktrail_crtc_hdmi_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
+						struct drm_display_mode *adjusted_mode, int x, int y,
+						struct drm_framebuffer *old_fb);
+extern void oaktrail_crtc_hdmi_dpms(struct drm_crtc *crtc, int mode);
+
+

diff --git a/drivers/gpu/drm/gma500/oaktrail_crtc.c b/drivers/gpu/drm/gma500/oaktrail_crtc.c
index cdafd2a..3071526 100644
--- a/drivers/gpu/drm/gma500/oaktrail_crtc.c
+++ b/drivers/gpu/drm/gma500/oaktrail_crtc.c

@@ -168,6 +168,11 @@
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	u32 temp;
 
+	if (pipe == 1) {
+		oaktrail_crtc_hdmi_dpms(crtc, mode);
+		return;
+	}
+
 	if (!gma_power_begin(dev, true))
 		return;
 
@@ -302,6 +307,9 @@
 	uint64_t scalingType = DRM_MODE_SCALE_FULLSCREEN;
 	struct drm_connector *connector;
 
+	if (pipe == 1)
+		return oaktrail_crtc_hdmi_mode_set(crtc, mode, adjusted_mode, x, y, old_fb);
+
 	if (!gma_power_begin(dev, true))
 		return 0;
 
@@ -343,7 +351,7 @@
 		  (mode->crtc_vdisplay - 1));
 
 	if (psb_intel_encoder)
-		drm_connector_property_get_value(connector,
+		drm_object_property_get_value(&connector->base,
 			dev->mode_config.scaling_mode_property, &scalingType);
 
 	if (scalingType == DRM_MODE_SCALE_NO_SCALE) {

diff --git a/drivers/gpu/drm/gma500/oaktrail_device.c b/drivers/gpu/drm/gma500/oaktrail_device.c
index 010b8207..08747fd 100644
--- a/drivers/gpu/drm/gma500/oaktrail_device.c
+++ b/drivers/gpu/drm/gma500/oaktrail_device.c

@@ -544,7 +544,7 @@
 	.accel_2d = 1,
 	.pipes = 2,
 	.crtcs = 2,
-	.hdmi_mask = (1 << 0),
+	.hdmi_mask = (1 << 1),
 	.lvds_mask = (1 << 0),
 	.cursor_needs_phys = 0,
 	.sgx_offset = MRST_SGX_OFFSET,

diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 69e51e90..f036f1f 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c

@@ -155,6 +155,345 @@
 	HDMI_READ(HDMI_HCR);
 }
 
+static void wait_for_vblank(struct drm_device *dev)
+{
+	/* Wait for 20ms, i.e. one cycle at 50hz. */
+	mdelay(20);
+}
+
+static unsigned int htotal_calculate(struct drm_display_mode *mode)
+{
+	u32 htotal, new_crtc_htotal;
+
+	htotal = (mode->crtc_hdisplay - 1) | ((mode->crtc_htotal - 1) << 16);
+
+	/*
+	 * 1024 x 768  new_crtc_htotal = 0x1024;
+	 * 1280 x 1024 new_crtc_htotal = 0x0c34;
+	 */
+	new_crtc_htotal = (mode->crtc_htotal - 1) * 200 * 1000 / mode->clock;
+
+	DRM_DEBUG_KMS("new crtc htotal 0x%4x\n", new_crtc_htotal);
+	return (mode->crtc_hdisplay - 1) | (new_crtc_htotal << 16);
+}
+
+static void oaktrail_hdmi_find_dpll(struct drm_crtc *crtc, int target,
+				int refclk, struct oaktrail_hdmi_clock *best_clock)
+{
+	int np_min, np_max, nr_min, nr_max;
+	int np, nr, nf;
+
+	np_min = DIV_ROUND_UP(oaktrail_hdmi_limit.vco.min, target * 10);
+	np_max = oaktrail_hdmi_limit.vco.max / (target * 10);
+	if (np_min < oaktrail_hdmi_limit.np.min)
+		np_min = oaktrail_hdmi_limit.np.min;
+	if (np_max > oaktrail_hdmi_limit.np.max)
+		np_max = oaktrail_hdmi_limit.np.max;
+
+	nr_min = DIV_ROUND_UP((refclk * 1000), (target * 10 * np_max));
+	nr_max = DIV_ROUND_UP((refclk * 1000), (target * 10 * np_min));
+	if (nr_min < oaktrail_hdmi_limit.nr.min)
+		nr_min = oaktrail_hdmi_limit.nr.min;
+	if (nr_max > oaktrail_hdmi_limit.nr.max)
+		nr_max = oaktrail_hdmi_limit.nr.max;
+
+	np = DIV_ROUND_UP((refclk * 1000), (target * 10 * nr_max));
+	nr = DIV_ROUND_UP((refclk * 1000), (target * 10 * np));
+	nf = DIV_ROUND_CLOSEST((target * 10 * np * nr), refclk);
+	DRM_DEBUG_KMS("np, nr, nf %d %d %d\n", np, nr, nf);
+
+	/*
+	 * 1024 x 768  np = 1; nr = 0x26; nf = 0x0fd8000;
+	 * 1280 x 1024 np = 1; nr = 0x17; nf = 0x1034000;
+	 */
+	best_clock->np = np;
+	best_clock->nr = nr - 1;
+	best_clock->nf = (nf << 14);
+}
+
+static void scu_busy_loop(void __iomem *scu_base)
+{
+	u32 status = 0;
+	u32 loop_count = 0;
+
+	status = readl(scu_base + 0x04);
+	while (status & 1) {
+		udelay(1); /* scu processing time is in few u secods */
+		status = readl(scu_base + 0x04);
+		loop_count++;
+		/* break if scu doesn't reset busy bit after huge retry */
+		if (loop_count > 1000) {
+			DRM_DEBUG_KMS("SCU IPC timed out");
+			return;
+		}
+	}
+}
+
+/*
+ *	You don't want to know, you really really don't want to know....
+ *
+ *	This is magic. However it's safe magic because of the way the platform
+ *	works and it is necessary magic.
+ */
+static void oaktrail_hdmi_reset(struct drm_device *dev)
+{
+	void __iomem *base;
+	unsigned long scu_ipc_mmio = 0xff11c000UL;
+	int scu_len = 1024;
+
+	base = ioremap((resource_size_t)scu_ipc_mmio, scu_len);
+	if (base == NULL) {
+		DRM_ERROR("failed to map scu mmio\n");
+		return;
+	}
+
+	/* scu ipc: assert hdmi controller reset */
+	writel(0xff11d118, base + 0x0c);
+	writel(0x7fffffdf, base + 0x80);
+	writel(0x42005, base + 0x0);
+	scu_busy_loop(base);
+
+	/* scu ipc: de-assert hdmi controller reset */
+	writel(0xff11d118, base + 0x0c);
+	writel(0x7fffffff, base + 0x80);
+	writel(0x42005, base + 0x0);
+	scu_busy_loop(base);
+
+	iounmap(base);
+}
+
+int oaktrail_crtc_hdmi_mode_set(struct drm_crtc *crtc,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode,
+			    int x, int y,
+			    struct drm_framebuffer *old_fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct oaktrail_hdmi_dev *hdmi_dev = dev_priv->hdmi_priv;
+	int pipe = 1;
+	int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
+	int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
+	int hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
+	int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
+	int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
+	int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
+	int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE;
+	int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS;
+	int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC;
+	int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
+	int refclk;
+	struct oaktrail_hdmi_clock clock;
+	u32 dspcntr, pipeconf, dpll, temp;
+	int dspcntr_reg = DSPBCNTR;
+
+	if (!gma_power_begin(dev, true))
+		return 0;
+
+	/* Disable the VGA plane that we never use */
+	REG_WRITE(VGACNTRL, VGA_DISP_DISABLE);
+
+	/* Disable dpll if necessary */
+	dpll = REG_READ(DPLL_CTRL);
+	if ((dpll & DPLL_PWRDN) == 0) {
+		REG_WRITE(DPLL_CTRL, dpll | (DPLL_PWRDN | DPLL_RESET));
+		REG_WRITE(DPLL_DIV_CTRL, 0x00000000);
+		REG_WRITE(DPLL_STATUS, 0x1);
+	}
+	udelay(150);
+
+	/* Reset controller */
+	oaktrail_hdmi_reset(dev);
+
+	/* program and enable dpll */
+	refclk = 25000;
+	oaktrail_hdmi_find_dpll(crtc, adjusted_mode->clock, refclk, &clock);
+
+	/* Set the DPLL */
+	dpll = REG_READ(DPLL_CTRL);
+	dpll &= ~DPLL_PDIV_MASK;
+	dpll &= ~(DPLL_PWRDN | DPLL_RESET);
+	REG_WRITE(DPLL_CTRL, 0x00000008);
+	REG_WRITE(DPLL_DIV_CTRL, ((clock.nf << 6) | clock.nr));
+	REG_WRITE(DPLL_ADJUST, ((clock.nf >> 14) - 1));
+	REG_WRITE(DPLL_CTRL, (dpll | (clock.np << DPLL_PDIV_SHIFT) | DPLL_ENSTAT | DPLL_DITHEN));
+	REG_WRITE(DPLL_UPDATE, 0x80000000);
+	REG_WRITE(DPLL_CLK_ENABLE, 0x80050102);
+	udelay(150);
+
+	/* configure HDMI */
+	HDMI_WRITE(0x1004, 0x1fd);
+	HDMI_WRITE(0x2000, 0x1);
+	HDMI_WRITE(0x2008, 0x0);
+	HDMI_WRITE(0x3130, 0x8);
+	HDMI_WRITE(0x101c, 0x1800810);
+
+	temp = htotal_calculate(adjusted_mode);
+	REG_WRITE(htot_reg, temp);
+	REG_WRITE(hblank_reg, (adjusted_mode->crtc_hblank_start - 1) | ((adjusted_mode->crtc_hblank_end - 1) << 16));
+	REG_WRITE(hsync_reg, (adjusted_mode->crtc_hsync_start - 1) | ((adjusted_mode->crtc_hsync_end - 1) << 16));
+	REG_WRITE(vtot_reg, (adjusted_mode->crtc_vdisplay - 1) | ((adjusted_mode->crtc_vtotal - 1) << 16));
+	REG_WRITE(vblank_reg, (adjusted_mode->crtc_vblank_start - 1) | ((adjusted_mode->crtc_vblank_end - 1) << 16));
+	REG_WRITE(vsync_reg, (adjusted_mode->crtc_vsync_start - 1) | ((adjusted_mode->crtc_vsync_end - 1) << 16));
+	REG_WRITE(pipesrc_reg, ((mode->crtc_hdisplay - 1) << 16) |  (mode->crtc_vdisplay - 1));
+
+	REG_WRITE(PCH_HTOTAL_B, (adjusted_mode->crtc_hdisplay - 1) | ((adjusted_mode->crtc_htotal - 1) << 16));
+	REG_WRITE(PCH_HBLANK_B, (adjusted_mode->crtc_hblank_start - 1) | ((adjusted_mode->crtc_hblank_end - 1) << 16));
+	REG_WRITE(PCH_HSYNC_B, (adjusted_mode->crtc_hsync_start - 1) | ((adjusted_mode->crtc_hsync_end - 1) << 16));
+	REG_WRITE(PCH_VTOTAL_B, (adjusted_mode->crtc_vdisplay - 1) | ((adjusted_mode->crtc_vtotal - 1) << 16));
+	REG_WRITE(PCH_VBLANK_B, (adjusted_mode->crtc_vblank_start - 1) | ((adjusted_mode->crtc_vblank_end - 1) << 16));
+	REG_WRITE(PCH_VSYNC_B, (adjusted_mode->crtc_vsync_start - 1) | ((adjusted_mode->crtc_vsync_end - 1) << 16));
+	REG_WRITE(PCH_PIPEBSRC, ((mode->crtc_hdisplay - 1) << 16) |  (mode->crtc_vdisplay - 1));
+
+	temp = adjusted_mode->crtc_hblank_end - adjusted_mode->crtc_hblank_start;
+	HDMI_WRITE(HDMI_HBLANK_A, ((adjusted_mode->crtc_hdisplay - 1) << 16) |  temp);
+
+	REG_WRITE(dspsize_reg, ((mode->vdisplay - 1) << 16) | (mode->hdisplay - 1));
+	REG_WRITE(dsppos_reg, 0);
+
+	/* Flush the plane changes */
+	{
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+		crtc_funcs->mode_set_base(crtc, x, y, old_fb);
+	}
+
+	/* Set up the display plane register */
+	dspcntr = REG_READ(dspcntr_reg);
+	dspcntr |= DISPPLANE_GAMMA_ENABLE;
+	dspcntr |= DISPPLANE_SEL_PIPE_B;
+	dspcntr |= DISPLAY_PLANE_ENABLE;
+
+	/* setup pipeconf */
+	pipeconf = REG_READ(pipeconf_reg);
+	pipeconf |= PIPEACONF_ENABLE;
+
+	REG_WRITE(pipeconf_reg, pipeconf);
+	REG_READ(pipeconf_reg);
+
+	REG_WRITE(PCH_PIPEBCONF, pipeconf);
+	REG_READ(PCH_PIPEBCONF);
+	wait_for_vblank(dev);
+
+	REG_WRITE(dspcntr_reg, dspcntr);
+	wait_for_vblank(dev);
+
+	gma_power_end(dev);
+
+	return 0;
+}
+
+void oaktrail_crtc_hdmi_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	u32 temp;
+
+	DRM_DEBUG_KMS("%s %d\n", __func__, mode);
+
+	switch (mode) {
+	case DRM_MODE_DPMS_OFF:
+		REG_WRITE(VGACNTRL, 0x80000000);
+
+		/* Disable plane */
+		temp = REG_READ(DSPBCNTR);
+		if ((temp & DISPLAY_PLANE_ENABLE) != 0) {
+			REG_WRITE(DSPBCNTR, temp & ~DISPLAY_PLANE_ENABLE);
+			REG_READ(DSPBCNTR);
+			/* Flush the plane changes */
+			REG_WRITE(DSPBSURF, REG_READ(DSPBSURF));
+			REG_READ(DSPBSURF);
+		}
+
+		/* Disable pipe B */
+		temp = REG_READ(PIPEBCONF);
+		if ((temp & PIPEACONF_ENABLE) != 0) {
+			REG_WRITE(PIPEBCONF, temp & ~PIPEACONF_ENABLE);
+			REG_READ(PIPEBCONF);
+		}
+
+		/* Disable LNW Pipes, etc */
+		temp = REG_READ(PCH_PIPEBCONF);
+		if ((temp & PIPEACONF_ENABLE) != 0) {
+			REG_WRITE(PCH_PIPEBCONF, temp & ~PIPEACONF_ENABLE);
+			REG_READ(PCH_PIPEBCONF);
+		}
+
+		/* wait for pipe off */
+		udelay(150);
+
+		/* Disable dpll */
+		temp = REG_READ(DPLL_CTRL);
+		if ((temp & DPLL_PWRDN) == 0) {
+			REG_WRITE(DPLL_CTRL, temp | (DPLL_PWRDN | DPLL_RESET));
+			REG_WRITE(DPLL_STATUS, 0x1);
+		}
+
+		/* wait for dpll off */
+		udelay(150);
+
+		break;
+	case DRM_MODE_DPMS_ON:
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+		/* Enable dpll */
+		temp = REG_READ(DPLL_CTRL);
+		if ((temp & DPLL_PWRDN) != 0) {
+			REG_WRITE(DPLL_CTRL, temp & ~(DPLL_PWRDN | DPLL_RESET));
+			temp = REG_READ(DPLL_CLK_ENABLE);
+			REG_WRITE(DPLL_CLK_ENABLE, temp | DPLL_EN_DISP | DPLL_SEL_HDMI | DPLL_EN_HDMI);
+			REG_READ(DPLL_CLK_ENABLE);
+		}
+		/* wait for dpll warm up */
+		udelay(150);
+
+		/* Enable pipe B */
+		temp = REG_READ(PIPEBCONF);
+		if ((temp & PIPEACONF_ENABLE) == 0) {
+			REG_WRITE(PIPEBCONF, temp | PIPEACONF_ENABLE);
+			REG_READ(PIPEBCONF);
+		}
+
+		/* Enable LNW Pipe B */
+		temp = REG_READ(PCH_PIPEBCONF);
+		if ((temp & PIPEACONF_ENABLE) == 0) {
+			REG_WRITE(PCH_PIPEBCONF, temp | PIPEACONF_ENABLE);
+			REG_READ(PCH_PIPEBCONF);
+		}
+
+		wait_for_vblank(dev);
+
+		/* Enable plane */
+		temp = REG_READ(DSPBCNTR);
+		if ((temp & DISPLAY_PLANE_ENABLE) == 0) {
+			REG_WRITE(DSPBCNTR, temp | DISPLAY_PLANE_ENABLE);
+			/* Flush the plane changes */
+			REG_WRITE(DSPBSURF, REG_READ(DSPBSURF));
+			REG_READ(DSPBSURF);
+		}
+
+		psb_intel_crtc_load_lut(crtc);
+	}
+
+	/* DSPARB */
+	REG_WRITE(DSPARB, 0x00003fbf);
+
+	/* FW1 */
+	REG_WRITE(0x70034, 0x3f880a0a);
+
+	/* FW2 */
+	REG_WRITE(0x70038, 0x0b060808);
+
+	/* FW4 */
+	REG_WRITE(0x70050, 0x08030404);
+
+	/* FW5 */
+	REG_WRITE(0x70054, 0x04040404);
+
+	/* LNC Chicken Bits - Squawk! */
+	REG_WRITE(0x70400, 0x4000);
+
+	return;
+}
+
 static void oaktrail_hdmi_dpms(struct drm_encoder *encoder, int mode)
 {
 	static int dpms_mode = -1;
@@ -233,13 +572,15 @@
 
 static int oaktrail_hdmi_get_modes(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct i2c_adapter *i2c_adap;
 	struct edid *edid;
-	struct drm_display_mode *mode, *t;
-	int i = 0, ret = 0;
+	int ret = 0;
 
+	/*
+	 *	FIXME: We need to figure this lot out. In theory we can
+	 *	read the EDID somehow but I've yet to find working reference
+	 *	code.
+	 */
 	i2c_adap = i2c_get_adapter(3);
 	if (i2c_adap == NULL) {
 		DRM_ERROR("No ddc adapter available!\n");
@@ -253,17 +594,7 @@
 		drm_mode_connector_update_edid_property(connector, edid);
 		ret = drm_add_edid_modes(connector, edid);
 	}
-
-	/*
-	 * prune modes that require frame buffer bigger than stolen mem
-	 */
-	list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
-		if ((mode->hdisplay * mode->vdisplay * 4) >= dev_priv->vram_stolen_size) {
-			i++;
-			drm_mode_remove(connector, mode);
-		}
-	}
-	return ret - i;
+	return ret;
 }
 
 static void oaktrail_hdmi_mode_set(struct drm_encoder *encoder,
@@ -349,6 +680,7 @@
 	connector->interlace_allowed = false;
 	connector->doublescan_allowed = false;
 	drm_sysfs_connector_add(connector);
+	dev_info(dev->dev, "HDMI initialised.\n");
 
 	return;
 
@@ -403,6 +735,9 @@
 
 	dev_priv->hdmi_priv = hdmi_dev;
 	oaktrail_hdmi_audio_disable(dev);
+
+	dev_info(dev->dev, "HDMI hardware present.\n");
+
 	return;
 
 free:

diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 558c77f..325013a 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c

@@ -133,8 +133,8 @@
 		return;
 	}
 
-	drm_connector_property_get_value(
-		connector,
+	drm_object_property_get_value(
+		&connector->base,
 		dev->mode_config.scaling_mode_property,
 		&v);
 
@@ -363,10 +363,10 @@
 	connector->interlace_allowed = false;
 	connector->doublescan_allowed = false;
 
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					dev->mode_config.scaling_mode_property,
 					DRM_MODE_SCALE_FULLSCREEN);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					dev_priv->backlight_property,
 					BRIGHTNESS_MAX_LEVEL);
 

diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 2a4c3a9..9fa5fa2 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c

@@ -603,7 +603,7 @@
 			goto set_prop_error;
 		}
 
-		if (drm_connector_property_get_value(connector,
+		if (drm_object_property_get_value(&connector->base,
 						     property,
 						     &curval))
 			goto set_prop_error;
@@ -611,7 +611,7 @@
 		if (curval == value)
 			goto set_prop_done;
 
-		if (drm_connector_property_set_value(connector,
+		if (drm_object_property_set_value(&connector->base,
 							property,
 							value))
 			goto set_prop_error;
@@ -626,7 +626,7 @@
 				goto set_prop_error;
 		}
 	} else if (!strcmp(property->name, "backlight")) {
-		if (drm_connector_property_set_value(connector,
+		if (drm_object_property_set_value(&connector->base,
 							property,
 							value))
 			goto set_prop_error;
@@ -746,10 +746,10 @@
 	connector->doublescan_allowed = false;
 
 	/*Attach connector properties*/
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.scaling_mode_property,
 				      DRM_MODE_SCALE_FULLSCREEN);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev_priv->backlight_property,
 				      BRIGHTNESS_MAX_LEVEL);
 

diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index fc92927..a4cc777a 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c

@@ -1694,7 +1694,7 @@
 	uint8_t cmd;
 	int ret;
 
-	ret = drm_connector_property_set_value(connector, property, val);
+	ret = drm_object_property_set_value(&connector->base, property, val);
 	if (ret)
 		return ret;
 
@@ -1749,7 +1749,7 @@
 	} else if (IS_TV_OR_LVDS(psb_intel_sdvo_connector)) {
 		temp_value = val;
 		if (psb_intel_sdvo_connector->left == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 psb_intel_sdvo_connector->right, val);
 			if (psb_intel_sdvo_connector->left_margin == temp_value)
 				return 0;
@@ -1761,7 +1761,7 @@
 			cmd = SDVO_CMD_SET_OVERSCAN_H;
 			goto set_value;
 		} else if (psb_intel_sdvo_connector->right == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 psb_intel_sdvo_connector->left, val);
 			if (psb_intel_sdvo_connector->right_margin == temp_value)
 				return 0;
@@ -1773,7 +1773,7 @@
 			cmd = SDVO_CMD_SET_OVERSCAN_H;
 			goto set_value;
 		} else if (psb_intel_sdvo_connector->top == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 psb_intel_sdvo_connector->bottom, val);
 			if (psb_intel_sdvo_connector->top_margin == temp_value)
 				return 0;
@@ -1785,7 +1785,7 @@
 			cmd = SDVO_CMD_SET_OVERSCAN_V;
 			goto set_value;
 		} else if (psb_intel_sdvo_connector->bottom == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 psb_intel_sdvo_connector->top, val);
 			if (psb_intel_sdvo_connector->bottom_margin == temp_value)
 				return 0;
@@ -2286,7 +2286,7 @@
 				i, tv_format_names[psb_intel_sdvo_connector->tv_format_supported[i]]);
 
 	psb_intel_sdvo->tv_format_index = psb_intel_sdvo_connector->tv_format_supported[0];
-	drm_connector_attach_property(&psb_intel_sdvo_connector->base.base,
+	drm_object_attach_property(&psb_intel_sdvo_connector->base.base.base,
 				      psb_intel_sdvo_connector->tv_format, 0);
 	return true;
 
@@ -2302,7 +2302,7 @@
 		psb_intel_sdvo_connector->name = \
 			drm_property_create_range(dev, 0, #name, 0, data_value[0]); \
 		if (!psb_intel_sdvo_connector->name) return false; \
-		drm_connector_attach_property(connector, \
+		drm_object_attach_property(&connector->base, \
 					      psb_intel_sdvo_connector->name, \
 					      psb_intel_sdvo_connector->cur_##name); \
 		DRM_DEBUG_KMS(#name ": max %d, default %d, current %d\n", \
@@ -2339,7 +2339,7 @@
 		if (!psb_intel_sdvo_connector->left)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      psb_intel_sdvo_connector->left,
 					      psb_intel_sdvo_connector->left_margin);
 
@@ -2348,7 +2348,7 @@
 		if (!psb_intel_sdvo_connector->right)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      psb_intel_sdvo_connector->right,
 					      psb_intel_sdvo_connector->right_margin);
 		DRM_DEBUG_KMS("h_overscan: max %d, "
@@ -2375,7 +2375,7 @@
 		if (!psb_intel_sdvo_connector->top)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      psb_intel_sdvo_connector->top,
 					      psb_intel_sdvo_connector->top_margin);
 
@@ -2384,7 +2384,7 @@
 		if (!psb_intel_sdvo_connector->bottom)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      psb_intel_sdvo_connector->bottom,
 					      psb_intel_sdvo_connector->bottom_margin);
 		DRM_DEBUG_KMS("v_overscan: max %d, "
@@ -2416,7 +2416,7 @@
 		if (!psb_intel_sdvo_connector->dot_crawl)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      psb_intel_sdvo_connector->dot_crawl,
 					      psb_intel_sdvo_connector->cur_dot_crawl);
 		DRM_DEBUG_KMS("dot crawl: current %d\n", response);

diff --git a/drivers/gpu/drm/i2c/ch7006_drv.c b/drivers/gpu/drm/i2c/ch7006_drv.c
index 599099f..b865d07 100644
--- a/drivers/gpu/drm/i2c/ch7006_drv.c
+++ b/drivers/gpu/drm/i2c/ch7006_drv.c

@@ -214,7 +214,7 @@
 	else
 		priv->subconnector = DRM_MODE_SUBCONNECTOR_Unknown;
 
-	drm_connector_property_set_value(connector,
+	drm_object_property_set_value(&connector->base,
 			encoder->dev->mode_config.tv_subconnector_property,
 							priv->subconnector);
 
@@ -254,23 +254,23 @@
 
 	priv->scale_property = drm_property_create_range(dev, 0, "scale", 0, 2);
 
-	drm_connector_attach_property(connector, conf->tv_select_subconnector_property,
+	drm_object_attach_property(&connector->base, conf->tv_select_subconnector_property,
 				      priv->select_subconnector);
-	drm_connector_attach_property(connector, conf->tv_subconnector_property,
+	drm_object_attach_property(&connector->base, conf->tv_subconnector_property,
 				      priv->subconnector);
-	drm_connector_attach_property(connector, conf->tv_left_margin_property,
+	drm_object_attach_property(&connector->base, conf->tv_left_margin_property,
 				      priv->hmargin);
-	drm_connector_attach_property(connector, conf->tv_bottom_margin_property,
+	drm_object_attach_property(&connector->base, conf->tv_bottom_margin_property,
 				      priv->vmargin);
-	drm_connector_attach_property(connector, conf->tv_mode_property,
+	drm_object_attach_property(&connector->base, conf->tv_mode_property,
 				      priv->norm);
-	drm_connector_attach_property(connector, conf->tv_brightness_property,
+	drm_object_attach_property(&connector->base, conf->tv_brightness_property,
 				      priv->brightness);
-	drm_connector_attach_property(connector, conf->tv_contrast_property,
+	drm_object_attach_property(&connector->base, conf->tv_contrast_property,
 				      priv->contrast);
-	drm_connector_attach_property(connector, conf->tv_flicker_reduction_property,
+	drm_object_attach_property(&connector->base, conf->tv_flicker_reduction_property,
 				      priv->flicker);
-	drm_connector_attach_property(connector, priv->scale_property,
+	drm_object_attach_property(&connector->base, priv->scale_property,
 				      priv->scale);
 
 	return 0;

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index dde8b50..e6a11ca8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c

@@ -317,7 +317,7 @@
 			seq_printf(m, "No flip due on pipe %c (plane %c)\n",
 				   pipe, plane);
 		} else {
-			if (!work->pending) {
+			if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 				seq_printf(m, "Flip queued on pipe %c (plane %c)\n",
 					   pipe, plane);
 			} else {
@@ -328,7 +328,7 @@
 				seq_printf(m, "Stall check enabled, ");
 			else
 				seq_printf(m, "Stall check waiting for page flip ioctl, ");
-			seq_printf(m, "%d prepares\n", work->pending);
+			seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
 
 			if (work->old_fb_obj) {
 				struct drm_i915_gem_object *obj = work->old_fb_obj;
@@ -655,10 +655,12 @@
 	if (INTEL_INFO(dev)->gen >= 6) {
 		seq_printf(m, "  RC PSMI: 0x%08x\n", error->rc_psmi[ring]);
 		seq_printf(m, "  FAULT_REG: 0x%08x\n", error->fault_reg[ring]);
-		seq_printf(m, "  SYNC_0: 0x%08x\n",
-			   error->semaphore_mboxes[ring][0]);
-		seq_printf(m, "  SYNC_1: 0x%08x\n",
-			   error->semaphore_mboxes[ring][1]);
+		seq_printf(m, "  SYNC_0: 0x%08x [last synced 0x%08x]\n",
+			   error->semaphore_mboxes[ring][0],
+			   error->semaphore_seqno[ring][0]);
+		seq_printf(m, "  SYNC_1: 0x%08x [last synced 0x%08x]\n",
+			   error->semaphore_mboxes[ring][1],
+			   error->semaphore_seqno[ring][1]);
 	}
 	seq_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
 	seq_printf(m, "  waiting: %s\n", yesno(error->waiting[ring]));
@@ -1068,7 +1070,7 @@
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 rpmodectl1, gt_core_status, rcctl1;
+	u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
 	unsigned forcewake_count;
 	int count=0, ret;
 
@@ -1097,6 +1099,9 @@
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
 	rcctl1 = I915_READ(GEN6_RC_CONTROL);
 	mutex_unlock(&dev->struct_mutex);
+	mutex_lock(&dev_priv->rps.hw_lock);
+	sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	seq_printf(m, "Video Turbo Mode: %s\n",
 		   yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
@@ -1148,6 +1153,12 @@
 	seq_printf(m, "RC6++ residency since boot: %u\n",
 		   I915_READ(GEN6_GT_GFX_RC6pp));
 
+	seq_printf(m, "RC6   voltage: %dmV\n",
+		   GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
+	seq_printf(m, "RC6+  voltage: %dmV\n",
+		   GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
+	seq_printf(m, "RC6++ voltage: %dmV\n",
+		   GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
 	return 0;
 }
 
@@ -1273,7 +1284,7 @@
 		return 0;
 	}
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
 	if (ret)
 		return ret;
 
@@ -1282,19 +1293,14 @@
 	for (gpu_freq = dev_priv->rps.min_delay;
 	     gpu_freq <= dev_priv->rps.max_delay;
 	     gpu_freq++) {
-		I915_WRITE(GEN6_PCODE_DATA, gpu_freq);
-		I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY |
-			   GEN6_PCODE_READ_MIN_FREQ_TABLE);
-		if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) &
-			      GEN6_PCODE_READY) == 0, 10)) {
-			DRM_ERROR("pcode read of freq table timed out\n");
-			continue;
-		}
-		ia_freq = I915_READ(GEN6_PCODE_DATA);
+		ia_freq = gpu_freq;
+		sandybridge_pcode_read(dev_priv,
+				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
+				       &ia_freq);
 		seq_printf(m, "%d\t\t%d\n", gpu_freq * GT_FREQUENCY_MULTIPLIER, ia_freq * 100);
 	}
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return 0;
 }
@@ -1398,15 +1404,15 @@
 	if (ret)
 		return ret;
 
-	if (dev_priv->pwrctx) {
+	if (dev_priv->ips.pwrctx) {
 		seq_printf(m, "power context ");
-		describe_obj(m, dev_priv->pwrctx);
+		describe_obj(m, dev_priv->ips.pwrctx);
 		seq_printf(m, "\n");
 	}
 
-	if (dev_priv->renderctx) {
+	if (dev_priv->ips.renderctx) {
 		seq_printf(m, "render context ");
-		describe_obj(m, dev_priv->renderctx);
+		describe_obj(m, dev_priv->ips.renderctx);
 		seq_printf(m, "\n");
 	}
 
@@ -1711,13 +1717,13 @@
 	if (!(IS_GEN6(dev) || IS_GEN7(dev)))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
 	if (ret)
 		return ret;
 
 	len = snprintf(buf, sizeof(buf),
 		       "max freq: %d\n", dev_priv->rps.max_delay * GT_FREQUENCY_MULTIPLIER);
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	if (len > sizeof(buf))
 		len = sizeof(buf);
@@ -1752,7 +1758,7 @@
 
 	DRM_DEBUG_DRIVER("Manually setting max freq to %d\n", val);
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
 	if (ret)
 		return ret;
 
@@ -1762,7 +1768,7 @@
 	dev_priv->rps.max_delay = val / GT_FREQUENCY_MULTIPLIER;
 
 	gen6_set_rps(dev, val / GT_FREQUENCY_MULTIPLIER);
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return cnt;
 }
@@ -1787,13 +1793,13 @@
 	if (!(IS_GEN6(dev) || IS_GEN7(dev)))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
 	if (ret)
 		return ret;
 
 	len = snprintf(buf, sizeof(buf),
 		       "min freq: %d\n", dev_priv->rps.min_delay * GT_FREQUENCY_MULTIPLIER);
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	if (len > sizeof(buf))
 		len = sizeof(buf);
@@ -1826,7 +1832,7 @@
 
 	DRM_DEBUG_DRIVER("Manually setting min freq to %d\n", val);
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
 	if (ret)
 		return ret;
 
@@ -1836,7 +1842,7 @@
 	dev_priv->rps.min_delay = val / GT_FREQUENCY_MULTIPLIER;
 
 	gen6_set_rps(dev, val / GT_FREQUENCY_MULTIPLIER);
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return cnt;
 }

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 61ae104..8f63cd5 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c

@@ -104,32 +104,6 @@
 }
 
 /**
- * Sets up the hardware status page for devices that need a physical address
- * in the register.
- */
-static int i915_init_phys_hws(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = dev->dev_private;
-
-	/* Program Hardware Status Page */
-	dev_priv->status_page_dmah =
-		drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE);
-
-	if (!dev_priv->status_page_dmah) {
-		DRM_ERROR("Can not allocate hardware status page\n");
-		return -ENOMEM;
-	}
-
-	memset_io((void __force __iomem *)dev_priv->status_page_dmah->vaddr,
-		  0, PAGE_SIZE);
-
-	i915_write_hws_pga(dev);
-
-	DRM_DEBUG_DRIVER("Enabled hardware status page\n");
-	return 0;
-}
-
-/**
  * Frees the hardware status page, whether it's a physical address or a virtual
  * address set up by the X Server.
  */
@@ -167,7 +141,7 @@
 
 	ring->head = I915_READ_HEAD(ring) & HEAD_ADDR;
 	ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
-	ring->space = ring->head - (ring->tail + 8);
+	ring->space = ring->head - (ring->tail + I915_RING_FREE_SPACE);
 	if (ring->space < 0)
 		ring->space += ring->size;
 
@@ -451,16 +425,16 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 
-	dev_priv->counter++;
-	if (dev_priv->counter > 0x7FFFFFFFUL)
-		dev_priv->counter = 0;
+	dev_priv->dri1.counter++;
+	if (dev_priv->dri1.counter > 0x7FFFFFFFUL)
+		dev_priv->dri1.counter = 0;
 	if (master_priv->sarea_priv)
-		master_priv->sarea_priv->last_enqueue = dev_priv->counter;
+		master_priv->sarea_priv->last_enqueue = dev_priv->dri1.counter;
 
 	if (BEGIN_LP_RING(4) == 0) {
 		OUT_RING(MI_STORE_DWORD_INDEX);
 		OUT_RING(I915_BREADCRUMB_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-		OUT_RING(dev_priv->counter);
+		OUT_RING(dev_priv->dri1.counter);
 		OUT_RING(0);
 		ADVANCE_LP_RING();
 	}
@@ -602,12 +576,12 @@
 
 	ADVANCE_LP_RING();
 
-	master_priv->sarea_priv->last_enqueue = dev_priv->counter++;
+	master_priv->sarea_priv->last_enqueue = dev_priv->dri1.counter++;
 
 	if (BEGIN_LP_RING(4) == 0) {
 		OUT_RING(MI_STORE_DWORD_INDEX);
 		OUT_RING(I915_BREADCRUMB_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-		OUT_RING(dev_priv->counter);
+		OUT_RING(dev_priv->dri1.counter);
 		OUT_RING(0);
 		ADVANCE_LP_RING();
 	}
@@ -618,10 +592,8 @@
 
 static int i915_quiescent(struct drm_device *dev)
 {
-	struct intel_ring_buffer *ring = LP_RING(dev->dev_private);
-
 	i915_kernel_lost_context(dev);
-	return intel_wait_ring_idle(ring);
+	return intel_ring_idle(LP_RING(dev->dev_private));
 }
 
 static int i915_flush_ioctl(struct drm_device *dev, void *data,
@@ -775,21 +747,21 @@
 
 	DRM_DEBUG_DRIVER("\n");
 
-	dev_priv->counter++;
-	if (dev_priv->counter > 0x7FFFFFFFUL)
-		dev_priv->counter = 1;
+	dev_priv->dri1.counter++;
+	if (dev_priv->dri1.counter > 0x7FFFFFFFUL)
+		dev_priv->dri1.counter = 1;
 	if (master_priv->sarea_priv)
-		master_priv->sarea_priv->last_enqueue = dev_priv->counter;
+		master_priv->sarea_priv->last_enqueue = dev_priv->dri1.counter;
 
 	if (BEGIN_LP_RING(4) == 0) {
 		OUT_RING(MI_STORE_DWORD_INDEX);
 		OUT_RING(I915_BREADCRUMB_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-		OUT_RING(dev_priv->counter);
+		OUT_RING(dev_priv->dri1.counter);
 		OUT_RING(MI_USER_INTERRUPT);
 		ADVANCE_LP_RING();
 	}
 
-	return dev_priv->counter;
+	return dev_priv->dri1.counter;
 }
 
 static int i915_wait_irq(struct drm_device * dev, int irq_nr)
@@ -820,7 +792,7 @@
 
 	if (ret == -EBUSY) {
 		DRM_ERROR("EBUSY -- rec: %d emitted: %d\n",
-			  READ_BREADCRUMB(dev_priv), (int)dev_priv->counter);
+			  READ_BREADCRUMB(dev_priv), (int)dev_priv->dri1.counter);
 	}
 
 	return ret;
@@ -1014,6 +986,9 @@
 	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_SECURE_BATCHES:
+		value = capable(CAP_SYS_ADMIN);
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
@@ -1068,7 +1043,7 @@
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_hws_addr_t *hws = data;
-	struct intel_ring_buffer *ring = LP_RING(dev_priv);
+	struct intel_ring_buffer *ring;
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return -ENODEV;
@@ -1088,6 +1063,7 @@
 
 	DRM_DEBUG_DRIVER("set status page addr 0x%08x\n", (u32)hws->addr);
 
+	ring = LP_RING(dev_priv);
 	ring->status_page.gfx_addr = hws->addr & (0x1ffff<<12);
 
 	dev_priv->dri1.gfx_hws_cpu_addr =
@@ -1326,6 +1302,8 @@
 
 	intel_modeset_gem_init(dev);
 
+	INIT_WORK(&dev_priv->console_resume_work, intel_console_resume);
+
 	ret = drm_irq_install(dev);
 	if (ret)
 		goto cleanup_gem;
@@ -1491,19 +1469,9 @@
 		goto free_priv;
 	}
 
-	ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL);
-	if (!ret) {
-		DRM_ERROR("failed to set up gmch\n");
-		ret = -EIO;
+	ret = i915_gem_gtt_init(dev);
+	if (ret)
 		goto put_bridge;
-	}
-
-	dev_priv->mm.gtt = intel_gtt_get();
-	if (!dev_priv->mm.gtt) {
-		DRM_ERROR("Failed to initialize GTT\n");
-		ret = -ENODEV;
-		goto put_gmch;
-	}
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		i915_kick_out_firmware_fb(dev_priv);
@@ -1590,18 +1558,10 @@
 	intel_setup_gmbus(dev);
 	intel_opregion_setup(dev);
 
-	/* Make sure the bios did its job and set up vital registers */
 	intel_setup_bios(dev);
 
 	i915_gem_load(dev);
 
-	/* Init HWS */
-	if (!I915_NEED_GFX_HWS(dev)) {
-		ret = i915_init_phys_hws(dev);
-		if (ret)
-			goto out_gem_unload;
-	}
-
 	/* On the 945G/GM, the chipset reports the MSI capability on the
 	 * integrated graphics even though the support isn't actually there
 	 * according to the published specs.  It doesn't appear to function
@@ -1621,6 +1581,8 @@
 	spin_lock_init(&dev_priv->rps.lock);
 	spin_lock_init(&dev_priv->dpio_lock);
 
+	mutex_init(&dev_priv->rps.hw_lock);
+
 	if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
 		dev_priv->num_pipe = 3;
 	else if (IS_MOBILE(dev) || !IS_GEN2(dev))
@@ -1678,7 +1640,7 @@
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_gmch:
-	intel_gmch_remove();
+	i915_gem_gtt_fini(dev);
 put_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 free_priv:
@@ -1721,6 +1683,7 @@
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		intel_fbdev_fini(dev);
 		intel_modeset_cleanup(dev);
+		cancel_work_sync(&dev_priv->console_resume_work);
 
 		/*
 		 * free the memory space allocated for the child device

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6770ee6..530db83 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c

@@ -47,11 +47,11 @@
 unsigned int i915_fbpercrtc __always_unused = 0;
 module_param_named(fbpercrtc, i915_fbpercrtc, int, 0400);
 
-int i915_panel_ignore_lid __read_mostly = 0;
+int i915_panel_ignore_lid __read_mostly = 1;
 module_param_named(panel_ignore_lid, i915_panel_ignore_lid, int, 0600);
 MODULE_PARM_DESC(panel_ignore_lid,
-		"Override lid status (0=autodetect [default], 1=lid open, "
-		"-1=lid closed)");
+		"Override lid status (0=autodetect, 1=autodetect disabled [default], "
+		"-1=force lid closed, -2=force lid open)");
 
 unsigned int i915_powersave __read_mostly = 1;
 module_param_named(powersave, i915_powersave, int, 0600);
@@ -396,12 +396,6 @@
 MODULE_DEVICE_TABLE(pci, pciidlist);
 #endif
 
-#define INTEL_PCH_DEVICE_ID_MASK	0xff00
-#define INTEL_PCH_IBX_DEVICE_ID_TYPE	0x3b00
-#define INTEL_PCH_CPT_DEVICE_ID_TYPE	0x1c00
-#define INTEL_PCH_PPT_DEVICE_ID_TYPE	0x1e00
-#define INTEL_PCH_LPT_DEVICE_ID_TYPE	0x8c00
-
 void intel_detect_pch(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -416,26 +410,36 @@
 	pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
 	if (pch) {
 		if (pch->vendor == PCI_VENDOR_ID_INTEL) {
-			int id;
+			unsigned short id;
 			id = pch->device & INTEL_PCH_DEVICE_ID_MASK;
+			dev_priv->pch_id = id;
 
 			if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_IBX;
 				dev_priv->num_pch_pll = 2;
 				DRM_DEBUG_KMS("Found Ibex Peak PCH\n");
+				WARN_ON(!IS_GEN5(dev));
 			} else if (id == INTEL_PCH_CPT_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_CPT;
 				dev_priv->num_pch_pll = 2;
 				DRM_DEBUG_KMS("Found CougarPoint PCH\n");
+				WARN_ON(!(IS_GEN6(dev) || IS_IVYBRIDGE(dev)));
 			} else if (id == INTEL_PCH_PPT_DEVICE_ID_TYPE) {
 				/* PantherPoint is CPT compatible */
 				dev_priv->pch_type = PCH_CPT;
 				dev_priv->num_pch_pll = 2;
 				DRM_DEBUG_KMS("Found PatherPoint PCH\n");
+				WARN_ON(!(IS_GEN6(dev) || IS_IVYBRIDGE(dev)));
 			} else if (id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_LPT;
 				dev_priv->num_pch_pll = 0;
 				DRM_DEBUG_KMS("Found LynxPoint PCH\n");
+				WARN_ON(!IS_HASWELL(dev));
+			} else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
+				dev_priv->pch_type = PCH_LPT;
+				dev_priv->num_pch_pll = 0;
+				DRM_DEBUG_KMS("Found LynxPoint LP PCH\n");
+				WARN_ON(!IS_HASWELL(dev));
 			}
 			BUG_ON(dev_priv->num_pch_pll > I915_NUM_PLLS);
 		}
@@ -477,6 +481,8 @@
 			return error;
 		}
 
+		cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
+
 		intel_modeset_disable(dev);
 
 		drm_irq_uninstall(dev);
@@ -526,24 +532,29 @@
 	return 0;
 }
 
-static int i915_drm_thaw(struct drm_device *dev)
+void intel_console_resume(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, struct drm_i915_private,
+			     console_resume_work);
+	struct drm_device *dev = dev_priv->dev;
+
+	console_lock();
+	intel_fbdev_set_suspend(dev, 0);
+	console_unlock();
+}
+
+static int __i915_drm_thaw(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int error = 0;
 
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		mutex_lock(&dev->struct_mutex);
-		i915_gem_restore_gtt_mappings(dev);
-		mutex_unlock(&dev->struct_mutex);
-	}
-
 	i915_restore_state(dev);
 	intel_opregion_setup(dev);
 
 	/* KMS EnterVT equivalent */
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
-			ironlake_init_pch_refclk(dev);
+		intel_init_pch_refclk(dev);
 
 		mutex_lock(&dev->struct_mutex);
 		dev_priv->mm.suspended = 0;
@@ -552,8 +563,7 @@
 		mutex_unlock(&dev->struct_mutex);
 
 		intel_modeset_init_hw(dev);
-		intel_modeset_setup_hw_state(dev);
-		drm_mode_config_reset(dev);
+		intel_modeset_setup_hw_state(dev, false);
 		drm_irq_install(dev);
 	}
 
@@ -561,14 +571,41 @@
 
 	dev_priv->modeset_on_lid = 0;
 
-	console_lock();
-	intel_fbdev_set_suspend(dev, 0);
-	console_unlock();
+	/*
+	 * The console lock can be pretty contented on resume due
+	 * to all the printk activity.  Try to keep it out of the hot
+	 * path of resume if possible.
+	 */
+	if (console_trylock()) {
+		intel_fbdev_set_suspend(dev, 0);
+		console_unlock();
+	} else {
+		schedule_work(&dev_priv->console_resume_work);
+	}
+
+	return error;
+}
+
+static int i915_drm_thaw(struct drm_device *dev)
+{
+	int error = 0;
+
+	intel_gt_reset(dev);
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		mutex_lock(&dev->struct_mutex);
+		i915_gem_restore_gtt_mappings(dev);
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	__i915_drm_thaw(dev);
+
 	return error;
 }
 
 int i915_resume(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
@@ -579,7 +616,20 @@
 
 	pci_set_master(dev->pdev);
 
-	ret = i915_drm_thaw(dev);
+	intel_gt_reset(dev);
+
+	/*
+	 * Platforms with opregion should have sane BIOS, older ones (gen3 and
+	 * earlier) need this since the BIOS might clear all our scratch PTEs.
+	 */
+	if (drm_core_check_feature(dev, DRIVER_MODESET) &&
+	    !dev_priv->opregion.header) {
+		mutex_lock(&dev->struct_mutex);
+		i915_gem_restore_gtt_mappings(dev);
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	ret = __i915_drm_thaw(dev);
 	if (ret)
 		return ret;
 
@@ -833,7 +883,7 @@
 	struct intel_device_info *intel_info =
 		(struct intel_device_info *) ent->driver_data;
 
-	if (intel_info->is_haswell || intel_info->is_valleyview)
+	if (intel_info->is_valleyview)
 		if(!i915_preliminary_hw_support) {
 			DRM_ERROR("Preliminary hardware support disabled\n");
 			return -ENODEV;
@@ -1140,12 +1190,40 @@
 	if (reg == GEN6_GDRST)
 		return false;
 
+	switch (reg) {
+	case _3D_CHICKEN3:
+	case IVB_CHICKEN3:
+	case GEN7_COMMON_SLICE_CHICKEN1:
+	case GEN7_L3CNTLREG1:
+	case GEN7_L3_CHICKEN_MODE_REGISTER:
+	case GEN7_ROW_CHICKEN2:
+	case GEN7_L3SQCREG4:
+	case GEN7_SQ_CHICKEN_MBCUNIT_CONFIG:
+	case GEN7_HALF_SLICE_CHICKEN1:
+	case GEN6_MBCTL:
+	case GEN6_UCGCTL2:
+		return false;
+	default:
+		break;
+	}
+
 	return true;
 }
 
+static void
+ilk_dummy_write(struct drm_i915_private *dev_priv)
+{
+	/* WaIssueDummyWriteToWakeupFromRC6: Issue a dummy write to wake up the
+	 * chip from rc6 before touching it for real. MI_MODE is masked, hence
+	 * harmless to write 0 into. */
+	I915_WRITE_NOTRACE(MI_MODE, 0);
+}
+
 #define __i915_read(x, y) \
 u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
 	u##x val = 0; \
+	if (IS_GEN5(dev_priv->dev)) \
+		ilk_dummy_write(dev_priv); \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
 		unsigned long irqflags; \
 		spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
@@ -1177,6 +1255,12 @@
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
+	if (IS_GEN5(dev_priv->dev)) \
+		ilk_dummy_write(dev_priv); \
+	if (IS_HASWELL(dev_priv->dev) && (I915_READ_NOTRACE(GEN7_ERR_INT) & ERR_INT_MMIO_UNCLAIMED)) { \
+		DRM_ERROR("Unknown unclaimed register before writing to %x\n", reg); \
+		I915_WRITE_NOTRACE(GEN7_ERR_INT, ERR_INT_MMIO_UNCLAIMED); \
+	} \
 	if (IS_VALLEYVIEW(dev_priv->dev) && IS_DISPLAYREG(reg)) { \
 		write##y(val, dev_priv->regs + reg + 0x180000);		\
 	} else {							\

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f511fa2..557843d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h

@@ -58,6 +58,14 @@
 };
 #define pipe_name(p) ((p) + 'A')
 
+enum transcoder {
+	TRANSCODER_A = 0,
+	TRANSCODER_B,
+	TRANSCODER_C,
+	TRANSCODER_EDP = 0xF,
+};
+#define transcoder_name(t) ((t) + 'A')
+
 enum plane {
 	PLANE_A = 0,
 	PLANE_B,
@@ -93,6 +101,12 @@
 };
 #define I915_NUM_PLLS 2
 
+struct intel_ddi_plls {
+	int spll_refcount;
+	int wrpll1_refcount;
+	int wrpll2_refcount;
+};
+
 /* Interface history:
  *
  * 1.1: Original.
@@ -123,14 +137,6 @@
 	struct drm_i915_gem_object *cur_obj;
 };
 
-struct mem_block {
-	struct mem_block *next;
-	struct mem_block *prev;
-	int start;
-	int size;
-	struct drm_file *file_priv; /* NULL: free, -1: heap, other: real files */
-};
-
 struct opregion_header;
 struct opregion_acpi;
 struct opregion_swsci;
@@ -191,6 +197,7 @@
 	u32 instdone[I915_NUM_RINGS];
 	u32 acthd[I915_NUM_RINGS];
 	u32 semaphore_mboxes[I915_NUM_RINGS][I915_NUM_RINGS - 1];
+	u32 semaphore_seqno[I915_NUM_RINGS][I915_NUM_RINGS - 1];
 	u32 rc_psmi[I915_NUM_RINGS]; /* sleep state */
 	/* our own tracking of ring head and tail */
 	u32 cpu_ring_head[I915_NUM_RINGS];
@@ -251,6 +258,7 @@
 				 uint32_t sprite_width, int pixel_size);
 	void (*update_linetime_wm)(struct drm_device *dev, int pipe,
 				 struct drm_display_mode *mode);
+	void (*modeset_global_resources)(struct drm_device *dev);
 	int (*crtc_mode_set)(struct drm_crtc *crtc,
 			     struct drm_display_mode *mode,
 			     struct drm_display_mode *adjusted_mode,
@@ -263,7 +271,6 @@
 			  struct drm_crtc *crtc);
 	void (*fdi_link_train)(struct drm_crtc *crtc);
 	void (*init_clock_gating)(struct drm_device *dev);
-	void (*init_pch_clock_gating)(struct drm_device *dev);
 	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
 			  struct drm_framebuffer *fb,
 			  struct drm_i915_gem_object *obj);
@@ -338,6 +345,7 @@
 #define I915_PPGTT_PD_ENTRIES 512
 #define I915_PPGTT_PT_ENTRIES 1024
 struct i915_hw_ppgtt {
+	struct drm_device *dev;
 	unsigned num_pd_entries;
 	struct page **pt_pages;
 	uint32_t pd_offset;
@@ -374,6 +382,11 @@
 	PCH_LPT,	/* Lynxpoint PCH */
 };
 
+enum intel_sbi_destination {
+	SBI_ICLK,
+	SBI_MPHY,
+};
+
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
@@ -383,154 +396,18 @@
 
 struct intel_gmbus {
 	struct i2c_adapter adapter;
-	bool force_bit;
+	u32 force_bit;
 	u32 reg0;
 	u32 gpio_reg;
 	struct i2c_algo_bit_data bit_algo;
 	struct drm_i915_private *dev_priv;
 };
 
-typedef struct drm_i915_private {
-	struct drm_device *dev;
-
-	const struct intel_device_info *info;
-
-	int relative_constants_mode;
-
-	void __iomem *regs;
-
-	struct drm_i915_gt_funcs gt;
-	/** gt_fifo_count and the subsequent register write are synchronized
-	 * with dev->struct_mutex. */
-	unsigned gt_fifo_count;
-	/** forcewake_count is protected by gt_lock */
-	unsigned forcewake_count;
-	/** gt_lock is also taken in irq contexts. */
-	struct spinlock gt_lock;
-
-	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
-
-	/** gmbus_mutex protects against concurrent usage of the single hw gmbus
-	 * controller on different i2c buses. */
-	struct mutex gmbus_mutex;
-
-	/**
-	 * Base address of the gmbus and gpio block.
-	 */
-	uint32_t gpio_mmio_base;
-
-	struct pci_dev *bridge_dev;
-	struct intel_ring_buffer ring[I915_NUM_RINGS];
-	uint32_t next_seqno;
-
-	drm_dma_handle_t *status_page_dmah;
-	uint32_t counter;
-	struct drm_i915_gem_object *pwrctx;
-	struct drm_i915_gem_object *renderctx;
-
-	struct resource mch_res;
-
-	atomic_t irq_received;
-
-	/* protects the irq masks */
-	spinlock_t irq_lock;
-
-	/* DPIO indirect register protection */
-	spinlock_t dpio_lock;
-
-	/** Cached value of IMR to avoid reads in updating the bitfield */
-	u32 pipestat[2];
-	u32 irq_mask;
-	u32 gt_irq_mask;
-	u32 pch_irq_mask;
-
-	u32 hotplug_supported_mask;
-	struct work_struct hotplug_work;
-
-	int num_pipe;
-	int num_pch_pll;
-
-	/* For hangcheck timer */
-#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
-	struct timer_list hangcheck_timer;
-	int hangcheck_count;
-	uint32_t last_acthd[I915_NUM_RINGS];
-	uint32_t prev_instdone[I915_NUM_INSTDONE_REG];
-
-	unsigned int stop_rings;
-
-	unsigned long cfb_size;
-	unsigned int cfb_fb;
-	enum plane cfb_plane;
-	int cfb_y;
-	struct intel_fbc_work *fbc_work;
-
-	struct intel_opregion opregion;
-
-	/* overlay */
-	struct intel_overlay *overlay;
-	bool sprite_scaling_enabled;
-
-	/* LVDS info */
-	int backlight_level;  /* restore backlight to this value */
-	bool backlight_enabled;
-	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
-	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
-
-	/* Feature bits from the VBIOS */
-	unsigned int int_tv_support:1;
-	unsigned int lvds_dither:1;
-	unsigned int lvds_vbt:1;
-	unsigned int int_crt_support:1;
-	unsigned int lvds_use_ssc:1;
-	unsigned int display_clock_mode:1;
-	int lvds_ssc_freq;
-	unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
-	unsigned int lvds_val; /* used for checking LVDS channel mode */
-	struct {
-		int rate;
-		int lanes;
-		int preemphasis;
-		int vswing;
-
-		bool initialized;
-		bool support;
-		int bpp;
-		struct edp_power_seq pps;
-	} edp;
-	bool no_aux_handshake;
-
-	struct notifier_block lid_notifier;
-
-	int crt_ddc_pin;
-	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
-	int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
-	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
-
-	unsigned int fsb_freq, mem_freq, is_ddr3;
-
-	spinlock_t error_lock;
-	/* Protected by dev->error_lock. */
-	struct drm_i915_error_state *first_error;
-	struct work_struct error_work;
-	struct completion error_completion;
-	struct workqueue_struct *wq;
-
-	/* Display functions */
-	struct drm_i915_display_funcs display;
-
-	/* PCH chipset type */
-	enum intel_pch pch_type;
-
-	unsigned long quirks;
-
-	/* Register state */
-	bool modeset_on_lid;
+struct i915_suspend_saved_registers {
 	u8 saveLBB;
 	u32 saveDSPACNTR;
 	u32 saveDSPBCNTR;
 	u32 saveDSPARB;
-	u32 saveHWS;
 	u32 savePIPEACONF;
 	u32 savePIPEBCONF;
 	u32 savePIPEASRC;
@@ -676,10 +553,206 @@
 	u32 savePIPEB_LINK_N1;
 	u32 saveMCHBAR_RENDER_STANDBY;
 	u32 savePCH_PORT_HOTPLUG;
+};
+
+struct intel_gen6_power_mgmt {
+	struct work_struct work;
+	u32 pm_iir;
+	/* lock - irqsave spinlock that protectects the work_struct and
+	 * pm_iir. */
+	spinlock_t lock;
+
+	/* The below variables an all the rps hw state are protected by
+	 * dev->struct mutext. */
+	u8 cur_delay;
+	u8 min_delay;
+	u8 max_delay;
+
+	struct delayed_work delayed_resume_work;
+
+	/*
+	 * Protects RPS/RC6 register access and PCU communication.
+	 * Must be taken after struct_mutex if nested.
+	 */
+	struct mutex hw_lock;
+};
+
+struct intel_ilk_power_mgmt {
+	u8 cur_delay;
+	u8 min_delay;
+	u8 max_delay;
+	u8 fmax;
+	u8 fstart;
+
+	u64 last_count1;
+	unsigned long last_time1;
+	unsigned long chipset_power;
+	u64 last_count2;
+	struct timespec last_time2;
+	unsigned long gfx_power;
+	u8 corr;
+
+	int c_m;
+	int r_t;
+
+	struct drm_i915_gem_object *pwrctx;
+	struct drm_i915_gem_object *renderctx;
+};
+
+struct i915_dri1_state {
+	unsigned allow_batchbuffer : 1;
+	u32 __iomem *gfx_hws_cpu_addr;
+
+	unsigned int cpp;
+	int back_offset;
+	int front_offset;
+	int current_page;
+	int page_flipping;
+
+	uint32_t counter;
+};
+
+struct intel_l3_parity {
+	u32 *remap_info;
+	struct work_struct error_work;
+};
+
+typedef struct drm_i915_private {
+	struct drm_device *dev;
+
+	const struct intel_device_info *info;
+
+	int relative_constants_mode;
+
+	void __iomem *regs;
+
+	struct drm_i915_gt_funcs gt;
+	/** gt_fifo_count and the subsequent register write are synchronized
+	 * with dev->struct_mutex. */
+	unsigned gt_fifo_count;
+	/** forcewake_count is protected by gt_lock */
+	unsigned forcewake_count;
+	/** gt_lock is also taken in irq contexts. */
+	struct spinlock gt_lock;
+
+	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
+
+	/** gmbus_mutex protects against concurrent usage of the single hw gmbus
+	 * controller on different i2c buses. */
+	struct mutex gmbus_mutex;
+
+	/**
+	 * Base address of the gmbus and gpio block.
+	 */
+	uint32_t gpio_mmio_base;
+
+	struct pci_dev *bridge_dev;
+	struct intel_ring_buffer ring[I915_NUM_RINGS];
+	uint32_t next_seqno;
+
+	drm_dma_handle_t *status_page_dmah;
+	struct resource mch_res;
+
+	atomic_t irq_received;
+
+	/* protects the irq masks */
+	spinlock_t irq_lock;
+
+	/* DPIO indirect register protection */
+	spinlock_t dpio_lock;
+
+	/** Cached value of IMR to avoid reads in updating the bitfield */
+	u32 pipestat[2];
+	u32 irq_mask;
+	u32 gt_irq_mask;
+	u32 pch_irq_mask;
+
+	u32 hotplug_supported_mask;
+	struct work_struct hotplug_work;
+
+	int num_pipe;
+	int num_pch_pll;
+
+	/* For hangcheck timer */
+#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
+#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
+	struct timer_list hangcheck_timer;
+	int hangcheck_count;
+	uint32_t last_acthd[I915_NUM_RINGS];
+	uint32_t prev_instdone[I915_NUM_INSTDONE_REG];
+
+	unsigned int stop_rings;
+
+	unsigned long cfb_size;
+	unsigned int cfb_fb;
+	enum plane cfb_plane;
+	int cfb_y;
+	struct intel_fbc_work *fbc_work;
+
+	struct intel_opregion opregion;
+
+	/* overlay */
+	struct intel_overlay *overlay;
+	bool sprite_scaling_enabled;
+
+	/* LVDS info */
+	int backlight_level;  /* restore backlight to this value */
+	bool backlight_enabled;
+	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
+	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
+
+	/* Feature bits from the VBIOS */
+	unsigned int int_tv_support:1;
+	unsigned int lvds_dither:1;
+	unsigned int lvds_vbt:1;
+	unsigned int int_crt_support:1;
+	unsigned int lvds_use_ssc:1;
+	unsigned int display_clock_mode:1;
+	int lvds_ssc_freq;
+	unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
+	unsigned int lvds_val; /* used for checking LVDS channel mode */
+	struct {
+		int rate;
+		int lanes;
+		int preemphasis;
+		int vswing;
+
+		bool initialized;
+		bool support;
+		int bpp;
+		struct edp_power_seq pps;
+	} edp;
+	bool no_aux_handshake;
+
+	int crt_ddc_pin;
+	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
+	int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
+	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
+
+	unsigned int fsb_freq, mem_freq, is_ddr3;
+
+	spinlock_t error_lock;
+	/* Protected by dev->error_lock. */
+	struct drm_i915_error_state *first_error;
+	struct work_struct error_work;
+	struct completion error_completion;
+	struct workqueue_struct *wq;
+
+	/* Display functions */
+	struct drm_i915_display_funcs display;
+
+	/* PCH chipset type */
+	enum intel_pch pch_type;
+	unsigned short pch_id;
+
+	unsigned long quirks;
+
+	/* Register state */
+	bool modeset_on_lid;
 
 	struct {
 		/** Bridge to intel-gtt-ko */
-		const struct intel_gtt *gtt;
+		struct intel_gtt *gtt;
 		/** Memory allocator for GTT stolen memory */
 		struct drm_mm stolen;
 		/** Memory allocator for GTT */
@@ -706,8 +779,6 @@
 		/** PPGTT used for aliasing the PPGTT with the GTT */
 		struct i915_hw_ppgtt *aliasing_ppgtt;
 
-		u32 *l3_remap_info;
-
 		struct shrinker inactive_shrinker;
 
 		/**
@@ -785,19 +856,6 @@
 		u32 object_count;
 	} mm;
 
-	/* Old dri1 support infrastructure, beware the dragons ya fools entering
-	 * here! */
-	struct {
-		unsigned allow_batchbuffer : 1;
-		u32 __iomem *gfx_hws_cpu_addr;
-
-		unsigned int cpp;
-		int back_offset;
-		int front_offset;
-		int current_page;
-		int page_flipping;
-	} dri1;
-
 	/* Kernel Modesetting */
 
 	struct sdvo_device_mapping sdvo_mappings[2];
@@ -811,6 +869,7 @@
 	wait_queue_head_t pending_flip_queue;
 
 	struct intel_pch_pll pch_plls[I915_NUM_PLLS];
+	struct intel_ddi_plls ddi_plls;
 
 	/* Reclocking support */
 	bool render_reclock_avail;
@@ -820,46 +879,17 @@
 	u16 orig_clock;
 	int child_dev_num;
 	struct child_device_config *child_dev;
-	struct drm_connector *int_lvds_connector;
-	struct drm_connector *int_edp_connector;
 
 	bool mchbar_need_disable;
 
-	/* gen6+ rps state */
-	struct {
-		struct work_struct work;
-		u32 pm_iir;
-		/* lock - irqsave spinlock that protectects the work_struct and
-		 * pm_iir. */
-		spinlock_t lock;
+	struct intel_l3_parity l3_parity;
 
-		/* The below variables an all the rps hw state are protected by
-		 * dev->struct mutext. */
-		u8 cur_delay;
-		u8 min_delay;
-		u8 max_delay;
-	} rps;
+	/* gen6+ rps state */
+	struct intel_gen6_power_mgmt rps;
 
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
-	struct {
-		u8 cur_delay;
-		u8 min_delay;
-		u8 max_delay;
-		u8 fmax;
-		u8 fstart;
-
-		u64 last_count1;
-		unsigned long last_time1;
-		unsigned long chipset_power;
-		u64 last_count2;
-		struct timespec last_time2;
-		unsigned long gfx_power;
-		u8 corr;
-
-		int c_m;
-		int r_t;
-	} ips;
+	struct intel_ilk_power_mgmt ips;
 
 	enum no_fbc_reason no_fbc_reason;
 
@@ -871,14 +901,27 @@
 	/* list of fbdev register on this device */
 	struct intel_fbdev *fbdev;
 
+	/*
+	 * The console may be contended at resume, but we don't
+	 * want it to block on it.
+	 */
+	struct work_struct console_resume_work;
+
 	struct backlight_device *backlight;
 
 	struct drm_property *broadcast_rgb_property;
 	struct drm_property *force_audio_property;
 
-	struct work_struct parity_error_work;
 	bool hw_contexts_disabled;
 	uint32_t hw_context_size;
+
+	bool fdi_rx_polarity_reversed;
+
+	struct i915_suspend_saved_registers regfile;
+
+	/* Old dri1 support infrastructure, beware the dragons ya fools entering
+	 * here! */
+	struct i915_dri1_state dri1;
 } drm_i915_private_t;
 
 /* Iterate over initialised rings */
@@ -1120,9 +1163,14 @@
 #define IS_IRONLAKE_D(dev)	((dev)->pci_device == 0x0042)
 #define IS_IRONLAKE_M(dev)	((dev)->pci_device == 0x0046)
 #define IS_IVYBRIDGE(dev)	(INTEL_INFO(dev)->is_ivybridge)
+#define IS_IVB_GT1(dev)		((dev)->pci_device == 0x0156 || \
+				 (dev)->pci_device == 0x0152 ||	\
+				 (dev)->pci_device == 0x015a)
 #define IS_VALLEYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview)
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
+#define IS_ULT(dev)		(IS_HASWELL(dev) && \
+				 ((dev)->pci_device & 0xFF00) == 0x0A00)
 
 /*
  * The genX designation typically refers to the render engine, so render
@@ -1168,6 +1216,13 @@
 
 #define HAS_PIPE_CONTROL(dev) (INTEL_INFO(dev)->gen >= 5)
 
+#define INTEL_PCH_DEVICE_ID_MASK		0xff00
+#define INTEL_PCH_IBX_DEVICE_ID_TYPE		0x3b00
+#define INTEL_PCH_CPT_DEVICE_ID_TYPE		0x1c00
+#define INTEL_PCH_PPT_DEVICE_ID_TYPE		0x1e00
+#define INTEL_PCH_LPT_DEVICE_ID_TYPE		0x8c00
+#define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE		0x9c00
+
 #define INTEL_PCH_TYPE(dev) (((struct drm_i915_private *)(dev)->dev_private)->pch_type)
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
@@ -1250,6 +1305,7 @@
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 
+extern void intel_console_resume(struct work_struct *work);
 
 /* i915_irq.c */
 void i915_hangcheck_elapsed(unsigned long data);
@@ -1257,6 +1313,7 @@
 
 extern void intel_irq_init(struct drm_device *dev);
 extern void intel_gt_init(struct drm_device *dev);
+extern void intel_gt_reset(struct drm_device *dev);
 
 void i915_error_state_free(struct kref *error_ref);
 
@@ -1368,8 +1425,7 @@
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct intel_ring_buffer *to);
 void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-				    struct intel_ring_buffer *ring,
-				    u32 seqno);
+				    struct intel_ring_buffer *ring);
 
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
@@ -1387,7 +1443,7 @@
 	return (int32_t)(seq1 - seq2) >= 0;
 }
 
-u32 i915_gem_next_request_seqno(struct intel_ring_buffer *ring);
+extern int i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 
 int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
@@ -1499,6 +1555,14 @@
 			      unsigned long start,
 			      unsigned long mappable_end,
 			      unsigned long end);
+int i915_gem_gtt_init(struct drm_device *dev);
+void i915_gem_gtt_fini(struct drm_device *dev);
+static inline void i915_gem_chipset_flush(struct drm_device *dev)
+{
+	if (INTEL_INFO(dev)->gen < 6)
+		intel_gtt_chipset_flush();
+}
+
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
@@ -1595,11 +1659,12 @@
 extern void intel_modeset_gem_init(struct drm_device *dev);
 extern void intel_modeset_cleanup(struct drm_device *dev);
 extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
-extern void intel_modeset_setup_hw_state(struct drm_device *dev);
+extern void intel_modeset_setup_hw_state(struct drm_device *dev,
+					 bool force_restore);
 extern bool intel_fbc_enabled(struct drm_device *dev);
 extern void intel_disable_fbc(struct drm_device *dev);
 extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
-extern void ironlake_init_pch_refclk(struct drm_device *dev);
+extern void intel_init_pch_refclk(struct drm_device *dev);
 extern void gen6_set_rps(struct drm_device *dev, u8 val);
 extern void intel_detect_pch(struct drm_device *dev);
 extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
@@ -1628,6 +1693,9 @@
 void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
 int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val);
+int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val);
+
 #define __i915_read(x, y) \
 	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
 

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9b285da..742206e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -845,12 +845,12 @@
 		 * domain anymore. */
 		if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 			i915_gem_clflush_object(obj);
-			intel_gtt_chipset_flush();
+			i915_gem_chipset_flush(dev);
 		}
 	}
 
 	if (needs_clflush_after)
-		intel_gtt_chipset_flush();
+		i915_gem_chipset_flush(dev);
 
 	return ret;
 }
@@ -1345,30 +1345,17 @@
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
 	/* Now bind it into the GTT if needed */
-	if (!obj->map_and_fenceable) {
-		ret = i915_gem_object_unbind(obj);
-		if (ret)
-			goto unlock;
-	}
-	if (!obj->gtt_space) {
-		ret = i915_gem_object_bind_to_gtt(obj, 0, true, false);
-		if (ret)
-			goto unlock;
-
-		ret = i915_gem_object_set_to_gtt_domain(obj, write);
-		if (ret)
-			goto unlock;
-	}
-
-	if (!obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
-
-	ret = i915_gem_object_get_fence(obj);
+	ret = i915_gem_object_pin(obj, 0, true, false);
 	if (ret)
 		goto unlock;
 
-	if (i915_gem_object_is_inactive(obj))
-		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	ret = i915_gem_object_set_to_gtt_domain(obj, write);
+	if (ret)
+		goto unpin;
+
+	ret = i915_gem_object_get_fence(obj);
+	if (ret)
+		goto unpin;
 
 	obj->fault_mappable = true;
 
@@ -1377,6 +1364,8 @@
 
 	/* Finally, remap it using the new GTT offset */
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+unpin:
+	i915_gem_object_unpin(obj);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
 out:
@@ -1707,10 +1696,14 @@
 	if (obj->pages_pin_count)
 		return -EBUSY;
 
+	/* ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early. */
+	list_del(&obj->gtt_list);
+
 	ops->put_pages(obj);
 	obj->pages = NULL;
 
-	list_del(&obj->gtt_list);
 	if (i915_gem_object_is_purgeable(obj))
 		i915_gem_object_truncate(obj);
 
@@ -1868,11 +1861,11 @@
 
 void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_ring_buffer *ring,
-			       u32 seqno)
+			       struct intel_ring_buffer *ring)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 seqno = intel_ring_get_seqno(ring);
 
 	BUG_ON(ring == NULL);
 	obj->ring = ring;
@@ -1933,26 +1926,54 @@
 	WARN_ON(i915_verify_lists(dev));
 }
 
-static u32
-i915_gem_get_seqno(struct drm_device *dev)
+static int
+i915_gem_handle_seqno_wrap(struct drm_device *dev)
 {
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	u32 seqno = dev_priv->next_seqno;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring;
+	int ret, i, j;
 
-	/* reserve 0 for non-seqno */
-	if (++dev_priv->next_seqno == 0)
-		dev_priv->next_seqno = 1;
+	/* The hardware uses various monotonic 32-bit counters, if we
+	 * detect that they will wraparound we need to idle the GPU
+	 * and reset those counters.
+	 */
+	ret = 0;
+	for_each_ring(ring, dev_priv, i) {
+		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
+			ret |= ring->sync_seqno[j] != 0;
+	}
+	if (ret == 0)
+		return ret;
 
-	return seqno;
+	ret = i915_gpu_idle(dev);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(dev);
+	for_each_ring(ring, dev_priv, i) {
+		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
+			ring->sync_seqno[j] = 0;
+	}
+
+	return 0;
 }
 
-u32
-i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
+int
+i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 {
-	if (ring->outstanding_lazy_request == 0)
-		ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	return ring->outstanding_lazy_request;
+	/* reserve 0 for non-seqno */
+	if (dev_priv->next_seqno == 0) {
+		int ret = i915_gem_handle_seqno_wrap(dev);
+		if (ret)
+			return ret;
+
+		dev_priv->next_seqno = 1;
+	}
+
+	*seqno = dev_priv->next_seqno++;
+	return 0;
 }
 
 int
@@ -1963,7 +1984,6 @@
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_request *request;
 	u32 request_ring_position;
-	u32 seqno;
 	int was_empty;
 	int ret;
 
@@ -1982,7 +2002,6 @@
 	if (request == NULL)
 		return -ENOMEM;
 
-	seqno = i915_gem_next_request_seqno(ring);
 
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
@@ -1991,15 +2010,13 @@
 	 */
 	request_ring_position = intel_ring_get_tail(ring);
 
-	ret = ring->add_request(ring, &seqno);
+	ret = ring->add_request(ring);
 	if (ret) {
 		kfree(request);
 		return ret;
 	}
 
-	trace_i915_gem_request_add(ring, seqno);
-
-	request->seqno = seqno;
+	request->seqno = intel_ring_get_seqno(ring);
 	request->ring = ring;
 	request->tail = request_ring_position;
 	request->emitted_jiffies = jiffies;
@@ -2017,23 +2034,24 @@
 		spin_unlock(&file_priv->mm.lock);
 	}
 
+	trace_i915_gem_request_add(ring, request->seqno);
 	ring->outstanding_lazy_request = 0;
 
 	if (!dev_priv->mm.suspended) {
 		if (i915_enable_hangcheck) {
 			mod_timer(&dev_priv->hangcheck_timer,
-				  jiffies +
-				  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+				  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
 		}
 		if (was_empty) {
 			queue_delayed_work(dev_priv->wq,
-					   &dev_priv->mm.retire_work, HZ);
+					   &dev_priv->mm.retire_work,
+					   round_jiffies_up_relative(HZ));
 			intel_mark_busy(dev_priv->dev);
 		}
 	}
 
 	if (out_seqno)
-		*out_seqno = seqno;
+		*out_seqno = request->seqno;
 	return 0;
 }
 
@@ -2131,7 +2149,6 @@
 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 {
 	uint32_t seqno;
-	int i;
 
 	if (list_empty(&ring->request_list))
 		return;
@@ -2140,10 +2157,6 @@
 
 	seqno = ring->get_seqno(ring, true);
 
-	for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
-		if (seqno >= ring->sync_seqno[i])
-			ring->sync_seqno[i] = 0;
-
 	while (!list_empty(&ring->request_list)) {
 		struct drm_i915_gem_request *request;
 
@@ -2218,7 +2231,8 @@
 
 	/* Come back later if the device is busy... */
 	if (!mutex_trylock(&dev->struct_mutex)) {
-		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
+				   round_jiffies_up_relative(HZ));
 		return;
 	}
 
@@ -2236,7 +2250,8 @@
 	}
 
 	if (!dev_priv->mm.suspended && !idle)
-		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
+				   round_jiffies_up_relative(HZ));
 	if (idle)
 		intel_mark_idle(dev);
 
@@ -2386,7 +2401,11 @@
 
 	ret = to->sync_to(to, from, seqno);
 	if (!ret)
-		from->sync_seqno[idx] = seqno;
+		/* We use last_read_seqno because sync_to()
+		 * might have just caused seqno wrap under
+		 * the radar.
+		 */
+		from->sync_seqno[idx] = obj->last_read_seqno;
 
 	return ret;
 }
@@ -2469,14 +2488,6 @@
 	return 0;
 }
 
-static int i915_ring_idle(struct intel_ring_buffer *ring)
-{
-	if (list_empty(&ring->active_list))
-		return 0;
-
-	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
-}
-
 int i915_gpu_idle(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2489,7 +2500,7 @@
 		if (ret)
 			return ret;
 
-		ret = i915_ring_idle(ring);
+		ret = intel_ring_idle(ring);
 		if (ret)
 			return ret;
 	}
@@ -2923,13 +2934,14 @@
 	if (ret)
 		return ret;
 
+	i915_gem_object_pin_pages(obj);
+
  search_free:
 	if (map_and_fenceable)
-		free_space =
-			drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
-							  size, alignment, obj->cache_level,
-							  0, dev_priv->mm.gtt_mappable_end,
-							  false);
+		free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
+							       size, alignment, obj->cache_level,
+							       0, dev_priv->mm.gtt_mappable_end,
+							       false);
 	else
 		free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
 						      size, alignment, obj->cache_level,
@@ -2937,60 +2949,60 @@
 
 	if (free_space != NULL) {
 		if (map_and_fenceable)
-			obj->gtt_space =
+			free_space =
 				drm_mm_get_block_range_generic(free_space,
 							       size, alignment, obj->cache_level,
 							       0, dev_priv->mm.gtt_mappable_end,
 							       false);
 		else
-			obj->gtt_space =
+			free_space =
 				drm_mm_get_block_generic(free_space,
 							 size, alignment, obj->cache_level,
 							 false);
 	}
-	if (obj->gtt_space == NULL) {
+	if (free_space == NULL) {
 		ret = i915_gem_evict_something(dev, size, alignment,
 					       obj->cache_level,
 					       map_and_fenceable,
 					       nonblocking);
-		if (ret)
+		if (ret) {
+			i915_gem_object_unpin_pages(obj);
 			return ret;
+		}
 
 		goto search_free;
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(dev,
-					      obj->gtt_space,
+					      free_space,
 					      obj->cache_level))) {
-		drm_mm_put_block(obj->gtt_space);
-		obj->gtt_space = NULL;
+		i915_gem_object_unpin_pages(obj);
+		drm_mm_put_block(free_space);
 		return -EINVAL;
 	}
 
-
 	ret = i915_gem_gtt_prepare_object(obj);
 	if (ret) {
-		drm_mm_put_block(obj->gtt_space);
-		obj->gtt_space = NULL;
+		i915_gem_object_unpin_pages(obj);
+		drm_mm_put_block(free_space);
 		return ret;
 	}
 
-	if (!dev_priv->mm.aliasing_ppgtt)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
-
 	list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	obj->gtt_offset = obj->gtt_space->start;
+	obj->gtt_space = free_space;
+	obj->gtt_offset = free_space->start;
 
 	fenceable =
-		obj->gtt_space->size == fence_size &&
-		(obj->gtt_space->start & (fence_alignment - 1)) == 0;
+		free_space->size == fence_size &&
+		(free_space->start & (fence_alignment - 1)) == 0;
 
 	mappable =
 		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
 
 	obj->map_and_fenceable = mappable && fenceable;
 
+	i915_gem_object_unpin_pages(obj);
 	trace_i915_gem_object_bind(obj, map_and_fenceable);
 	i915_gem_verify_gtt(dev);
 	return 0;
@@ -3059,7 +3071,7 @@
 		return;
 
 	i915_gem_clflush_object(obj);
-	intel_gtt_chipset_flush();
+	i915_gem_chipset_flush(obj->base.dev);
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
 
@@ -3454,11 +3466,16 @@
 	}
 
 	if (obj->gtt_space == NULL) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+
 		ret = i915_gem_object_bind_to_gtt(obj, alignment,
 						  map_and_fenceable,
 						  nonblocking);
 		if (ret)
 			return ret;
+
+		if (!dev_priv->mm.aliasing_ppgtt)
+			i915_gem_gtt_bind_object(obj, obj->cache_level);
 	}
 
 	if (!obj->has_global_gtt_mapping && map_and_fenceable)
@@ -3832,7 +3849,7 @@
 	if (!IS_IVYBRIDGE(dev))
 		return;
 
-	if (!dev_priv->mm.l3_remap_info)
+	if (!dev_priv->l3_parity.remap_info)
 		return;
 
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
@@ -3841,12 +3858,12 @@
 
 	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
 		u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
-		if (remap && remap != dev_priv->mm.l3_remap_info[i/4])
+		if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
 			DRM_DEBUG("0x%x was already programmed to %x\n",
 				  GEN7_L3LOG_BASE + i, remap);
-		if (remap && !dev_priv->mm.l3_remap_info[i/4])
+		if (remap && !dev_priv->l3_parity.remap_info[i/4])
 			DRM_DEBUG_DRIVER("Clearing remapped register\n");
-		I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->mm.l3_remap_info[i/4]);
+		I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
 	}
 
 	/* Make sure all the writes land before disabling dop clock gating */
@@ -3876,68 +3893,6 @@
 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
 }
 
-void i915_gem_init_ppgtt(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t pd_offset;
-	struct intel_ring_buffer *ring;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	uint32_t __iomem *pd_addr;
-	uint32_t pd_entry;
-	int i;
-
-	if (!dev_priv->mm.aliasing_ppgtt)
-		return;
-
-
-	pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t);
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		dma_addr_t pt_addr;
-
-		if (dev_priv->mm.gtt->needs_dmar)
-			pt_addr = ppgtt->pt_dma_addr[i];
-		else
-			pt_addr = page_to_phys(ppgtt->pt_pages[i]);
-
-		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
-		pd_entry |= GEN6_PDE_VALID;
-
-		writel(pd_entry, pd_addr + i);
-	}
-	readl(pd_addr);
-
-	pd_offset = ppgtt->pd_offset;
-	pd_offset /= 64; /* in cachelines, */
-	pd_offset <<= 16;
-
-	if (INTEL_INFO(dev)->gen == 6) {
-		uint32_t ecochk, gab_ctl, ecobits;
-
-		ecobits = I915_READ(GAC_ECO_BITS); 
-		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
-
-		gab_ctl = I915_READ(GAB_CTL);
-		I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
-
-		ecochk = I915_READ(GAM_ECOCHK);
-		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
-				       ECOCHK_PPGTT_CACHE64B);
-		I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-	} else if (INTEL_INFO(dev)->gen >= 7) {
-		I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
-		/* GFX_MODE is per-ring on gen7+ */
-	}
-
-	for_each_ring(ring, dev_priv, i) {
-		if (INTEL_INFO(dev)->gen >= 7)
-			I915_WRITE(RING_MODE_GEN7(ring),
-				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-
-		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
-		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
-	}
-}
-
 static bool
 intel_enable_blt(struct drm_device *dev)
 {
@@ -3960,7 +3915,7 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
-	if (!intel_enable_gtt())
+	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
 
 	if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
@@ -4295,7 +4250,7 @@
 			page_cache_release(page);
 		}
 	}
-	intel_gtt_chipset_flush();
+	i915_gem_chipset_flush(dev);
 
 	obj->phys_obj->cur_obj = NULL;
 	obj->phys_obj = NULL;
@@ -4382,7 +4337,7 @@
 			return -EFAULT;
 	}
 
-	intel_gtt_chipset_flush();
+	i915_gem_chipset_flush(dev);
 	return 0;
 }
 
@@ -4407,6 +4362,19 @@
 	spin_unlock(&file_priv->mm.lock);
 }
 
+static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
+{
+	if (!mutex_is_locked(mutex))
+		return false;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
+	return mutex->owner == task;
+#else
+	/* Since UP may be pre-empted, we cannot assume that we own the lock */
+	return false;
+#endif
+}
+
 static int
 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 {
@@ -4417,10 +4385,15 @@
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_i915_gem_object *obj;
 	int nr_to_scan = sc->nr_to_scan;
+	bool unlock = true;
 	int cnt;
 
-	if (!mutex_trylock(&dev->struct_mutex))
-		return 0;
+	if (!mutex_trylock(&dev->struct_mutex)) {
+		if (!mutex_is_locked_by(&dev->struct_mutex, current))
+			return 0;
+
+		unlock = false;
+	}
 
 	if (nr_to_scan) {
 		nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
@@ -4436,6 +4409,7 @@
 		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
 
-	mutex_unlock(&dev->struct_mutex);
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
 	return cnt;
 }

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 05ed42f..a3f06bc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c

@@ -146,7 +146,7 @@
 	struct i915_hw_context *ctx;
 	int ret, id;
 
-	ctx = kzalloc(sizeof(struct drm_i915_file_private), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -410,9 +410,8 @@
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from_obj != NULL) {
-		u32 seqno = i915_gem_next_request_seqno(ring);
 		from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_gem_object_move_to_active(from_obj, ring, seqno);
+		i915_gem_object_move_to_active(from_obj, ring);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3eea143..ee8f97f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -128,15 +128,6 @@
 					 target_i915_obj->cache_level);
 	}
 
-	/* The target buffer should have appeared before us in the
-	 * exec_object list, so it should have a GTT space bound by now.
-	 */
-	if (unlikely(target_offset == 0)) {
-		DRM_DEBUG("No GTT space found for object %d\n",
-			  reloc->target_handle);
-		return ret;
-	}
-
 	/* Validate that the target is in a valid r/w GPU domain */
 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 		DRM_DEBUG("reloc with multiple write domains: "
@@ -672,7 +663,7 @@
 	}
 
 	if (flush_domains & I915_GEM_DOMAIN_CPU)
-		intel_gtt_chipset_flush();
+		i915_gem_chipset_flush(ring->dev);
 
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
 		wmb();
@@ -722,8 +713,7 @@
 
 static void
 i915_gem_execbuffer_move_to_active(struct list_head *objects,
-				   struct intel_ring_buffer *ring,
-				   u32 seqno)
+				   struct intel_ring_buffer *ring)
 {
 	struct drm_i915_gem_object *obj;
 
@@ -735,10 +725,10 @@
 		obj->base.write_domain = obj->base.pending_write_domain;
 		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
-		i915_gem_object_move_to_active(obj, ring, seqno);
+		i915_gem_object_move_to_active(obj, ring);
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
-			obj->last_write_seqno = seqno;
+			obj->last_write_seqno = intel_ring_get_seqno(ring);
 			if (obj->pin_count) /* check for potential scanout */
 				intel_mark_fb_busy(obj);
 		}
@@ -798,8 +788,8 @@
 	struct intel_ring_buffer *ring;
 	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 exec_start, exec_len;
-	u32 seqno;
 	u32 mask;
+	u32 flags;
 	int ret, mode, i;
 
 	if (!i915_gem_check_execbuffer(args)) {
@@ -811,6 +801,14 @@
 	if (ret)
 		return ret;
 
+	flags = 0;
+	if (args->flags & I915_EXEC_SECURE) {
+		if (!file->is_master || !capable(CAP_SYS_ADMIN))
+		    return -EPERM;
+
+		flags |= I915_DISPATCH_SECURE;
+	}
+
 	switch (args->flags & I915_EXEC_RING_MASK) {
 	case I915_EXEC_DEFAULT:
 	case I915_EXEC_RENDER:
@@ -983,26 +981,17 @@
 	}
 	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 
+	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+	 * batch" bit. Hence we need to pin secure batches into the global gtt.
+	 * hsw should have this fixed, but let's be paranoid and do it
+	 * unconditionally for now. */
+	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
+		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
+
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
 	if (ret)
 		goto err;
 
-	seqno = i915_gem_next_request_seqno(ring);
-	for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
-		if (seqno < ring->sync_seqno[i]) {
-			/* The GPU can not handle its semaphore value wrapping,
-			 * so every billion or so execbuffers, we need to stall
-			 * the GPU in order to reset the counters.
-			 */
-			ret = i915_gpu_idle(dev);
-			if (ret)
-				goto err;
-			i915_gem_retire_requests(dev);
-
-			BUG_ON(ring->sync_seqno[i]);
-		}
-	}
-
 	ret = i915_switch_context(ring, file, ctx_id);
 	if (ret)
 		goto err;
@@ -1028,8 +1017,6 @@
 			goto err;
 	}
 
-	trace_i915_gem_ring_dispatch(ring, seqno);
-
 	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
 	exec_len = args->batch_len;
 	if (cliprects) {
@@ -1040,17 +1027,22 @@
 				goto err;
 
 			ret = ring->dispatch_execbuffer(ring,
-							exec_start, exec_len);
+							exec_start, exec_len,
+							flags);
 			if (ret)
 				goto err;
 		}
 	} else {
-		ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
+		ret = ring->dispatch_execbuffer(ring,
+						exec_start, exec_len,
+						flags);
 		if (ret)
 			goto err;
 	}
 
-	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
+	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
+
+	i915_gem_execbuffer_move_to_active(&objects, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring);
 
 err:

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index df470b5..2c150de 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -28,19 +28,67 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+typedef uint32_t gtt_pte_t;
+
+/* PPGTT stuff */
+#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
+
+#define GEN6_PDE_VALID			(1 << 0)
+/* gen6+ has bit 11-4 for physical addr bit 39-32 */
+#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+
+#define GEN6_PTE_VALID			(1 << 0)
+#define GEN6_PTE_UNCACHED		(1 << 1)
+#define HSW_PTE_UNCACHED		(0)
+#define GEN6_PTE_CACHE_LLC		(2 << 1)
+#define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
+#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+
+static inline gtt_pte_t pte_encode(struct drm_device *dev,
+				   dma_addr_t addr,
+				   enum i915_cache_level level)
+{
+	gtt_pte_t pte = GEN6_PTE_VALID;
+	pte |= GEN6_PTE_ADDR_ENCODE(addr);
+
+	switch (level) {
+	case I915_CACHE_LLC_MLC:
+		/* Haswell doesn't set L3 this way */
+		if (IS_HASWELL(dev))
+			pte |= GEN6_PTE_CACHE_LLC;
+		else
+			pte |= GEN6_PTE_CACHE_LLC_MLC;
+		break;
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		if (IS_HASWELL(dev))
+			pte |= HSW_PTE_UNCACHED;
+		else
+			pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		BUG();
+	}
+
+
+	return pte;
+}
+
 /* PPGTT support for Sandybdrige/Gen6 and later */
 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
-	uint32_t *pt_vaddr;
-	uint32_t scratch_pte;
+	gtt_pte_t *pt_vaddr;
+	gtt_pte_t scratch_pte;
 	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = GEN6_PTE_ADDR_ENCODE(ppgtt->scratch_page_dma_addr);
-	scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC;
+	scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
+				 I915_CACHE_LLC);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -77,6 +125,7 @@
 	if (!ppgtt)
 		return ret;
 
+	ppgtt->dev = dev;
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
@@ -118,7 +167,7 @@
 	i915_ppgtt_clear_range(ppgtt, 0,
 			       ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
 
-	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(uint32_t);
+	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
 
 	dev_priv->mm.aliasing_ppgtt = ppgtt;
 
@@ -168,9 +217,9 @@
 static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
 					 const struct sg_table *pages,
 					 unsigned first_entry,
-					 uint32_t pte_flags)
+					 enum i915_cache_level cache_level)
 {
-	uint32_t *pt_vaddr, pte;
+	gtt_pte_t *pt_vaddr;
 	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned i, j, m, segment_len;
@@ -188,8 +237,8 @@
 
 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
 			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			pte = GEN6_PTE_ADDR_ENCODE(page_addr);
-			pt_vaddr[j] = pte | pte_flags;
+			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
+						 cache_level);
 
 			/* grab the next page */
 			if (++m == segment_len) {
@@ -213,29 +262,10 @@
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	uint32_t pte_flags = GEN6_PTE_VALID;
-
-	switch (cache_level) {
-	case I915_CACHE_LLC_MLC:
-		pte_flags |= GEN6_PTE_CACHE_LLC_MLC;
-		break;
-	case I915_CACHE_LLC:
-		pte_flags |= GEN6_PTE_CACHE_LLC;
-		break;
-	case I915_CACHE_NONE:
-		if (IS_HASWELL(obj->base.dev))
-			pte_flags |= HSW_PTE_UNCACHED;
-		else
-			pte_flags |= GEN6_PTE_UNCACHED;
-		break;
-	default:
-		BUG();
-	}
-
 	i915_ppgtt_insert_sg_entries(ppgtt,
 				     obj->pages,
 				     obj->gtt_space->start >> PAGE_SHIFT,
-				     pte_flags);
+				     cache_level);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
@@ -246,23 +276,65 @@
 			       obj->base.size >> PAGE_SHIFT);
 }
 
-/* XXX kill agp_type! */
-static unsigned int cache_level_to_agp_type(struct drm_device *dev,
-					    enum i915_cache_level cache_level)
+void i915_gem_init_ppgtt(struct drm_device *dev)
 {
-	switch (cache_level) {
-	case I915_CACHE_LLC_MLC:
-		if (INTEL_INFO(dev)->gen >= 6)
-			return AGP_USER_CACHED_MEMORY_LLC_MLC;
-		/* Older chipsets do not have this extra level of CPU
-		 * cacheing, so fallthrough and request the PTE simply
-		 * as cached.
-		 */
-	case I915_CACHE_LLC:
-		return AGP_USER_CACHED_MEMORY;
-	default:
-	case I915_CACHE_NONE:
-		return AGP_USER_MEMORY;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	uint32_t pd_offset;
+	struct intel_ring_buffer *ring;
+	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+	uint32_t __iomem *pd_addr;
+	uint32_t pd_entry;
+	int i;
+
+	if (!dev_priv->mm.aliasing_ppgtt)
+		return;
+
+
+	pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t);
+	for (i = 0; i < ppgtt->num_pd_entries; i++) {
+		dma_addr_t pt_addr;
+
+		if (dev_priv->mm.gtt->needs_dmar)
+			pt_addr = ppgtt->pt_dma_addr[i];
+		else
+			pt_addr = page_to_phys(ppgtt->pt_pages[i]);
+
+		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
+		pd_entry |= GEN6_PDE_VALID;
+
+		writel(pd_entry, pd_addr + i);
+	}
+	readl(pd_addr);
+
+	pd_offset = ppgtt->pd_offset;
+	pd_offset /= 64; /* in cachelines, */
+	pd_offset <<= 16;
+
+	if (INTEL_INFO(dev)->gen == 6) {
+		uint32_t ecochk, gab_ctl, ecobits;
+
+		ecobits = I915_READ(GAC_ECO_BITS);
+		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
+
+		gab_ctl = I915_READ(GAB_CTL);
+		I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+		ecochk = I915_READ(GAM_ECOCHK);
+		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
+				       ECOCHK_PPGTT_CACHE64B);
+		I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+	} else if (INTEL_INFO(dev)->gen >= 7) {
+		I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
+		/* GFX_MODE is per-ring on gen7+ */
+	}
+
+	for_each_ring(ring, dev_priv, i) {
+		if (INTEL_INFO(dev)->gen >= 7)
+			I915_WRITE(RING_MODE_GEN7(ring),
+				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+
+		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
 	}
 }
 
@@ -288,13 +360,40 @@
 		dev_priv->mm.interruptible = interruptible;
 }
 
+
+static void i915_ggtt_clear_range(struct drm_device *dev,
+				 unsigned first_entry,
+				 unsigned num_entries)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	gtt_pte_t scratch_pte;
+	gtt_pte_t __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry;
+	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+	int i;
+
+	if (INTEL_INFO(dev)->gen < 6) {
+		intel_gtt_clear_range(first_entry, num_entries);
+		return;
+	}
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC);
+	for (i = 0; i < num_entries; i++)
+		iowrite32(scratch_pte, &gtt_base[i]);
+	readl(gtt_base);
+}
+
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 
 	/* First fill our portion of the GTT with scratch pages */
-	intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE,
+	i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE,
 			      (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
@@ -302,7 +401,7 @@
 		i915_gem_gtt_bind_object(obj, obj->cache_level);
 	}
 
-	intel_gtt_chipset_flush();
+	i915_gem_chipset_flush(dev);
 }
 
 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
@@ -318,21 +417,76 @@
 	return 0;
 }
 
+/*
+ * Binds an object into the global gtt with the specified cache level. The object
+ * will be accessible to the GPU via commands whose operands reference offsets
+ * within the global GTT as well as accessible by the GPU through the GMADR
+ * mapped BAR (dev_priv->mm.gtt->gtt).
+ */
+static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
+				  enum i915_cache_level level)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct sg_table *st = obj->pages;
+	struct scatterlist *sg = st->sgl;
+	const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
+	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+	gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry;
+	int unused, i = 0;
+	unsigned int len, m = 0;
+	dma_addr_t addr;
+
+	for_each_sg(st->sgl, sg, st->nents, unused) {
+		len = sg_dma_len(sg) >> PAGE_SHIFT;
+		for (m = 0; m < len; m++) {
+			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+			iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
+			i++;
+		}
+	}
+
+	BUG_ON(i > max_entries);
+	BUG_ON(i != obj->base.size / PAGE_SIZE);
+
+	/* XXX: This serves as a posting read to make sure that the PTE has
+	 * actually been updated. There is some concern that even though
+	 * registers and PTEs are within the same BAR that they are potentially
+	 * of NUMA access patterns. Therefore, even with the way we assume
+	 * hardware should work, we must keep this posting read for paranoia.
+	 */
+	if (i != 0)
+		WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
+
+	/* This next bit makes the above posting read even more important. We
+	 * want to flush the TLBs only after we're certain all the PTE updates
+	 * have finished.
+	 */
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 			      enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
+	if (INTEL_INFO(dev)->gen < 6) {
+		unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+			AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+		intel_gtt_insert_sg_entries(obj->pages,
+					    obj->gtt_space->start >> PAGE_SHIFT,
+					    flags);
+	} else {
+		gen6_ggtt_bind_object(obj, cache_level);
+	}
 
-	intel_gtt_insert_sg_entries(obj->pages,
-				    obj->gtt_space->start >> PAGE_SHIFT,
-				    agp_type);
 	obj->has_global_gtt_mapping = 1;
 }
 
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
-	intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
+	i915_ggtt_clear_range(obj->base.dev,
+			      obj->gtt_space->start >> PAGE_SHIFT,
 			      obj->base.size >> PAGE_SHIFT);
 
 	obj->has_global_gtt_mapping = 0;
@@ -390,5 +544,165 @@
 	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
 
 	/* ... but ensure that we clear the entire range. */
-	intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
+	i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE);
+}
+
+static int setup_scratch_page(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct page *page;
+	dma_addr_t dma_addr;
+
+	page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
+	if (page == NULL)
+		return -ENOMEM;
+	get_page(page);
+	set_pages_uc(page, 1);
+
+#ifdef CONFIG_INTEL_IOMMU
+	dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
+				PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(dev->pdev, dma_addr))
+		return -EINVAL;
+#else
+	dma_addr = page_to_phys(page);
+#endif
+	dev_priv->mm.gtt->scratch_page = page;
+	dev_priv->mm.gtt->scratch_page_dma = dma_addr;
+
+	return 0;
+}
+
+static void teardown_scratch_page(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	set_pages_wb(dev_priv->mm.gtt->scratch_page, 1);
+	pci_unmap_page(dev->pdev, dev_priv->mm.gtt->scratch_page_dma,
+		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	put_page(dev_priv->mm.gtt->scratch_page);
+	__free_page(dev_priv->mm.gtt->scratch_page);
+}
+
+static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+	return snb_gmch_ctl << 20;
+}
+
+static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl)
+{
+	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
+	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
+	return snb_gmch_ctl << 25; /* 32 MB units */
+}
+
+static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
+{
+	static const int stolen_decoder[] = {
+		0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352};
+	snb_gmch_ctl >>= IVB_GMCH_GMS_SHIFT;
+	snb_gmch_ctl &= IVB_GMCH_GMS_MASK;
+	return stolen_decoder[snb_gmch_ctl] << 20;
+}
+
+int i915_gem_gtt_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	phys_addr_t gtt_bus_addr;
+	u16 snb_gmch_ctl;
+	int ret;
+
+	/* On modern platforms we need not worry ourself with the legacy
+	 * hostbridge query stuff. Skip it entirely
+	 */
+	if (INTEL_INFO(dev)->gen < 6) {
+		ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL);
+		if (!ret) {
+			DRM_ERROR("failed to set up gmch\n");
+			return -EIO;
+		}
+
+		dev_priv->mm.gtt = intel_gtt_get();
+		if (!dev_priv->mm.gtt) {
+			DRM_ERROR("Failed to initialize GTT\n");
+			intel_gmch_remove();
+			return -ENODEV;
+		}
+		return 0;
+	}
+
+	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
+	if (!dev_priv->mm.gtt)
+		return -ENOMEM;
+
+	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
+		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
+
+#ifdef CONFIG_INTEL_IOMMU
+	dev_priv->mm.gtt->needs_dmar = 1;
+#endif
+
+	/* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
+	gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
+	dev_priv->mm.gtt->gma_bus_addr = pci_resource_start(dev->pdev, 2);
+
+	/* i9xx_setup */
+	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+	dev_priv->mm.gtt->gtt_total_entries =
+		gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t);
+	if (INTEL_INFO(dev)->gen < 7)
+		dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
+	else
+		dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl);
+
+	dev_priv->mm.gtt->gtt_mappable_entries = pci_resource_len(dev->pdev, 2) >> PAGE_SHIFT;
+	/* 64/512MB is the current min/max we actually know of, but this is just a
+	 * coarse sanity check.
+	 */
+	if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 ||
+	    dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) {
+		DRM_ERROR("Unknown GMADR entries (%d)\n",
+			  dev_priv->mm.gtt->gtt_mappable_entries);
+		ret = -ENXIO;
+		goto err_out;
+	}
+
+	ret = setup_scratch_page(dev);
+	if (ret) {
+		DRM_ERROR("Scratch setup failed\n");
+		goto err_out;
+	}
+
+	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
+					   dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
+	if (!dev_priv->mm.gtt->gtt) {
+		DRM_ERROR("Failed to map the gtt page table\n");
+		teardown_scratch_page(dev);
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
+	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
+	DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
+	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
+	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
+
+	return 0;
+
+err_out:
+	kfree(dev_priv->mm.gtt);
+	if (INTEL_INFO(dev)->gen < 6)
+		intel_gmch_remove();
+	return ret;
+}
+
+void i915_gem_gtt_fini(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	iounmap(dev_priv->mm.gtt->gtt);
+	teardown_scratch_page(dev);
+	if (INTEL_INFO(dev)->gen < 6)
+		intel_gmch_remove();
+	kfree(dev_priv->mm.gtt);
 }

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 32e1bda..a4dc97f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c

@@ -122,7 +122,10 @@
 i915_pipe_enabled(struct drm_device *dev, int pipe)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	return I915_READ(PIPECONF(pipe)) & PIPECONF_ENABLE;
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
+
+	return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_ENABLE;
 }
 
 /* Called from drm generic code, passed a 'crtc', which
@@ -182,6 +185,8 @@
 	int vbl_start, vbl_end, htotal, vtotal;
 	bool in_vbl = true;
 	int ret = 0;
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
 
 	if (!i915_pipe_enabled(dev, pipe)) {
 		DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled "
@@ -190,7 +195,7 @@
 	}
 
 	/* Get vtotal. */
-	vtotal = 1 + ((I915_READ(VTOTAL(pipe)) >> 16) & 0x1fff);
+	vtotal = 1 + ((I915_READ(VTOTAL(cpu_transcoder)) >> 16) & 0x1fff);
 
 	if (INTEL_INFO(dev)->gen >= 4) {
 		/* No obvious pixelcount register. Only query vertical
@@ -210,13 +215,13 @@
 		 */
 		position = (I915_READ(PIPEFRAMEPIXEL(pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT;
 
-		htotal = 1 + ((I915_READ(HTOTAL(pipe)) >> 16) & 0x1fff);
+		htotal = 1 + ((I915_READ(HTOTAL(cpu_transcoder)) >> 16) & 0x1fff);
 		*vpos = position / htotal;
 		*hpos = position - (*vpos * htotal);
 	}
 
 	/* Query vblank area. */
-	vbl = I915_READ(VBLANK(pipe));
+	vbl = I915_READ(VBLANK(cpu_transcoder));
 
 	/* Test position against vblank region. */
 	vbl_start = vbl & 0x1fff;
@@ -352,8 +357,7 @@
 	if (i915_enable_hangcheck) {
 		dev_priv->hangcheck_count = 0;
 		mod_timer(&dev_priv->hangcheck_timer,
-			  jiffies +
-			  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+			  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
 	}
 }
 
@@ -374,7 +378,7 @@
 	if ((pm_iir & GEN6_PM_DEFERRED_EVENTS) == 0)
 		return;
 
-	mutex_lock(&dev_priv->dev->struct_mutex);
+	mutex_lock(&dev_priv->rps.hw_lock);
 
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD)
 		new_delay = dev_priv->rps.cur_delay + 1;
@@ -389,7 +393,7 @@
 		gen6_set_rps(dev_priv->dev, new_delay);
 	}
 
-	mutex_unlock(&dev_priv->dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
 
@@ -405,7 +409,7 @@
 static void ivybridge_parity_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
-						    parity_error_work);
+						    l3_parity.error_work);
 	u32 error_status, row, bank, subbank;
 	char *parity_event[5];
 	uint32_t misccpctl;
@@ -469,7 +473,7 @@
 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
-	queue_work(dev_priv->wq, &dev_priv->parity_error_work);
+	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
 }
 
 static void snb_gt_irq_handler(struct drm_device *dev,
@@ -520,7 +524,7 @@
 	queue_work(dev_priv->wq, &dev_priv->rps.work);
 }
 
-static irqreturn_t valleyview_irq_handler(DRM_IRQ_ARGS)
+static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -606,6 +610,9 @@
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int pipe;
 
+	if (pch_iir & SDE_HOTPLUG_MASK)
+		queue_work(dev_priv->wq, &dev_priv->hotplug_work);
+
 	if (pch_iir & SDE_AUDIO_POWER_MASK)
 		DRM_DEBUG_DRIVER("PCH audio power change on port %d\n",
 				 (pch_iir & SDE_AUDIO_POWER_MASK) >>
@@ -646,6 +653,9 @@
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int pipe;
 
+	if (pch_iir & SDE_HOTPLUG_MASK_CPT)
+		queue_work(dev_priv->wq, &dev_priv->hotplug_work);
+
 	if (pch_iir & SDE_AUDIO_POWER_MASK_CPT)
 		DRM_DEBUG_DRIVER("PCH audio power change on port %d\n",
 				 (pch_iir & SDE_AUDIO_POWER_MASK_CPT) >>
@@ -670,7 +680,7 @@
 					 I915_READ(FDI_RX_IIR(pipe)));
 }
 
-static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
+static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -709,8 +719,6 @@
 		if (de_iir & DE_PCH_EVENT_IVB) {
 			u32 pch_iir = I915_READ(SDEIIR);
 
-			if (pch_iir & SDE_HOTPLUG_MASK_CPT)
-				queue_work(dev_priv->wq, &dev_priv->hotplug_work);
 			cpt_irq_handler(dev, pch_iir);
 
 			/* clear PCH hotplug event before clear CPU irq */
@@ -745,13 +753,12 @@
 		notify_ring(dev, &dev_priv->ring[VCS]);
 }
 
-static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
+static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int ret = IRQ_NONE;
 	u32 de_iir, gt_iir, de_ier, pch_iir, pm_iir;
-	u32 hotplug_mask;
 
 	atomic_inc(&dev_priv->irq_received);
 
@@ -769,11 +776,6 @@
 	    (!IS_GEN6(dev) || pm_iir == 0))
 		goto done;
 
-	if (HAS_PCH_CPT(dev))
-		hotplug_mask = SDE_HOTPLUG_MASK_CPT;
-	else
-		hotplug_mask = SDE_HOTPLUG_MASK;
-
 	ret = IRQ_HANDLED;
 
 	if (IS_GEN5(dev))
@@ -802,8 +804,6 @@
 
 	/* check event from PCH */
 	if (de_iir & DE_PCH_EVENT) {
-		if (pch_iir & hotplug_mask)
-			queue_work(dev_priv->wq, &dev_priv->hotplug_work);
 		if (HAS_PCH_CPT(dev))
 			cpt_irq_handler(dev, pch_iir);
 		else
@@ -1120,6 +1120,8 @@
 			= I915_READ(RING_SYNC_0(ring->mmio_base));
 		error->semaphore_mboxes[ring->id][1]
 			= I915_READ(RING_SYNC_1(ring->mmio_base));
+		error->semaphore_seqno[ring->id][0] = ring->sync_seqno[0];
+		error->semaphore_seqno[ring->id][1] = ring->sync_seqno[1];
 	}
 
 	if (INTEL_INFO(dev)->gen >= 4) {
@@ -1464,7 +1466,9 @@
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
 
-	if (work == NULL || work->pending || !work->enable_stall_check) {
+	if (work == NULL ||
+	    atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE ||
+	    !work->enable_stall_check) {
 		/* Either the pending flip IRQ arrived, or we're too early. Don't check */
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
@@ -1751,7 +1755,7 @@
 repeat:
 	/* Reset timer case chip hangs without another request being added */
 	mod_timer(&dev_priv->hangcheck_timer,
-		  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+		  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
 }
 
 /* drm_dma.h hooks
@@ -1956,6 +1960,7 @@
 	u32 enable_mask;
 	u32 hotplug_en = I915_READ(PORT_HOTPLUG_EN);
 	u32 pipestat_enable = PLANE_FLIP_DONE_INT_EN_VLV;
+	u32 render_irqs;
 	u16 msid;
 
 	enable_mask = I915_DISPLAY_PORT_INTERRUPT;
@@ -1995,21 +2000,12 @@
 	I915_WRITE(VLV_IIR, 0xffffffff);
 	I915_WRITE(VLV_IIR, 0xffffffff);
 
-	dev_priv->gt_irq_mask = ~0;
-
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-	I915_WRITE(GTIER, GT_GEN6_BLT_FLUSHDW_NOTIFY_INTERRUPT |
-		   GT_GEN6_BLT_CS_ERROR_INTERRUPT |
-		   GT_GEN6_BLT_USER_INTERRUPT |
-		   GT_GEN6_BSD_USER_INTERRUPT |
-		   GT_GEN6_BSD_CS_ERROR_INTERRUPT |
-		   GT_GEN7_L3_PARITY_ERROR_INTERRUPT |
-		   GT_PIPE_NOTIFY |
-		   GT_RENDER_CS_ERROR_INTERRUPT |
-		   GT_SYNC_STATUS |
-		   GT_USER_INTERRUPT);
+
+	render_irqs = GT_USER_INTERRUPT | GEN6_BSD_USER_INTERRUPT |
+		GEN6_BLITTER_USER_INTERRUPT;
+	I915_WRITE(GTIER, render_irqs);
 	POSTING_READ(GTIER);
 
 	/* ack & enable invalid PTE error interrupts */
@@ -2019,7 +2015,6 @@
 #endif
 
 	I915_WRITE(VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE);
-#if 0 /* FIXME: check register definitions; some have moved */
 	/* Note HDMI and DP share bits */
 	if (dev_priv->hotplug_supported_mask & HDMIB_HOTPLUG_INT_STATUS)
 		hotplug_en |= HDMIB_HOTPLUG_INT_EN;
@@ -2027,15 +2022,14 @@
 		hotplug_en |= HDMIC_HOTPLUG_INT_EN;
 	if (dev_priv->hotplug_supported_mask & HDMID_HOTPLUG_INT_STATUS)
 		hotplug_en |= HDMID_HOTPLUG_INT_EN;
-	if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS)
+	if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS_I915)
 		hotplug_en |= SDVOC_HOTPLUG_INT_EN;
-	if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS)
+	if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS_I915)
 		hotplug_en |= SDVOB_HOTPLUG_INT_EN;
 	if (dev_priv->hotplug_supported_mask & CRT_HOTPLUG_INT_STATUS) {
 		hotplug_en |= CRT_HOTPLUG_INT_EN;
 		hotplug_en |= CRT_HOTPLUG_VOLTAGE_COMPARE_50;
 	}
-#endif
 
 	I915_WRITE(PORT_HOTPLUG_EN, hotplug_en);
 
@@ -2129,7 +2123,7 @@
 	return 0;
 }
 
-static irqreturn_t i8xx_irq_handler(DRM_IRQ_ARGS)
+static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -2307,7 +2301,7 @@
 	return 0;
 }
 
-static irqreturn_t i915_irq_handler(DRM_IRQ_ARGS)
+static irqreturn_t i915_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -2545,7 +2539,7 @@
 	return 0;
 }
 
-static irqreturn_t i965_irq_handler(DRM_IRQ_ARGS)
+static irqreturn_t i965_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -2691,7 +2685,7 @@
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 	INIT_WORK(&dev_priv->error_work, i915_error_work_func);
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
-	INIT_WORK(&dev_priv->parity_error_work, ivybridge_parity_work);
+	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
 	dev->driver->get_vblank_counter = i915_get_vblank_counter;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a4162dd..3f75cfa 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h

@@ -26,6 +26,7 @@
 #define _I915_REG_H_
 
 #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a)))
+#define _TRANSCODER(tran, a, b) ((a) + (tran)*((b)-(a)))
 
 #define _PORT(port, a, b) ((a) + (port)*((b)-(a)))
 
@@ -40,6 +41,14 @@
  */
 #define INTEL_GMCH_CTRL		0x52
 #define INTEL_GMCH_VGA_DISABLE  (1 << 1)
+#define SNB_GMCH_CTRL		0x50
+#define    SNB_GMCH_GGMS_SHIFT	8 /* GTT Graphics Memory Size */
+#define    SNB_GMCH_GGMS_MASK	0x3
+#define    SNB_GMCH_GMS_SHIFT   3 /* Graphics Mode Select */
+#define    SNB_GMCH_GMS_MASK    0x1f
+#define    IVB_GMCH_GMS_SHIFT   4
+#define    IVB_GMCH_GMS_MASK    0xf
+
 
 /* PCI config space */
 
@@ -105,23 +114,6 @@
 #define  GEN6_GRDOM_MEDIA		(1 << 2)
 #define  GEN6_GRDOM_BLT			(1 << 3)
 
-/* PPGTT stuff */
-#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
-
-#define GEN6_PDE_VALID			(1 << 0)
-#define GEN6_PDE_LARGE_PAGE		(2 << 0) /* use 32kb pages */
-/* gen6+ has bit 11-4 for physical addr bit 39-32 */
-#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-
-#define GEN6_PTE_VALID			(1 << 0)
-#define GEN6_PTE_UNCACHED		(1 << 1)
-#define HSW_PTE_UNCACHED		(0)
-#define GEN6_PTE_CACHE_LLC		(2 << 1)
-#define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
-#define GEN6_PTE_CACHE_BITS		(3 << 1)
-#define GEN6_PTE_GFDT			(1 << 3)
-#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
-
 #define RING_PP_DIR_BASE(ring)		((ring)->mmio_base+0x228)
 #define RING_PP_DIR_BASE_READ(ring)	((ring)->mmio_base+0x518)
 #define RING_PP_DIR_DCLV(ring)		((ring)->mmio_base+0x220)
@@ -241,11 +233,18 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*x-1)
 #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
-#define   MI_INVALIDATE_TLB	(1<<18)
-#define   MI_INVALIDATE_BSD	(1<<7)
+#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
+#define   MI_INVALIDATE_TLB		(1<<18)
+#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
+#define   MI_INVALIDATE_BSD		(1<<7)
+#define   MI_FLUSH_DW_USE_GTT		(1<<2)
+#define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
-#define   MI_BATCH_NON_SECURE	(1)
-#define   MI_BATCH_NON_SECURE_I965 (1<<8)
+#define   MI_BATCH_NON_SECURE		(1)
+/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
+#define   MI_BATCH_NON_SECURE_I965 	(1<<8)
+#define   MI_BATCH_PPGTT_HSW		(1<<8)
+#define   MI_BATCH_NON_SECURE_HSW 	(1<<13)
 #define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6+ */
@@ -369,6 +368,7 @@
 #define   DPIO_PLL_MODESEL_SHIFT	24 /* 3 bits */
 #define   DPIO_BIAS_CURRENT_CTL_SHIFT	21 /* 3 bits, always 0x7 */
 #define   DPIO_PLL_REFCLK_SEL_SHIFT	16 /* 2 bits */
+#define   DPIO_PLL_REFCLK_SEL_MASK	3
 #define   DPIO_DRIVER_CTL_SHIFT		12 /* always set to 0x8 */
 #define   DPIO_CLK_BIAS_CTL_SHIFT	8 /* always set to 0x5 */
 #define _DPIO_REFSFR_B			0x8034
@@ -384,6 +384,9 @@
 
 #define DPIO_FASTCLK_DISABLE		0x8100
 
+#define DPIO_DATA_CHANNEL1		0x8220
+#define DPIO_DATA_CHANNEL2		0x8420
+
 /*
  * Fence registers
  */
@@ -521,6 +524,7 @@
  */
 # define _3D_CHICKEN2_WM_READ_PIPELINED			(1 << 14)
 #define _3D_CHICKEN3	0x02090
+#define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL		(1 << 10)
 #define  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL		(1 << 5)
 
 #define MI_MODE		0x0209c
@@ -547,6 +551,8 @@
 #define IIR		0x020a4
 #define IMR		0x020a8
 #define ISR		0x020ac
+#define VLV_GUNIT_CLOCK_GATE	0x182060
+#define   GCFG_DIS		(1<<8)
 #define VLV_IIR_RW	0x182084
 #define VLV_IER		0x1820a0
 #define VLV_IIR		0x1820a4
@@ -661,6 +667,7 @@
 #define   MI_ARB_DISPLAY_PRIORITY_B_A		(1 << 0)	/* display B > display A */
 
 #define CACHE_MODE_0	0x02120 /* 915+ only */
+#define   CM0_PIPELINED_RENDER_FLUSH_DISABLE (1<<8)
 #define   CM0_IZ_OPT_DISABLE      (1<<6)
 #define   CM0_ZR_OPT_DISABLE      (1<<5)
 #define	  CM0_STC_EVICT_DISABLE_LRA_SNB	(1<<5)
@@ -670,6 +677,8 @@
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
 #define BB_ADDR		0x02140 /* 8 bytes */
 #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
+#define GFX_FLSH_CNTL_GEN6	0x101008
+#define   GFX_FLSH_CNTL_EN	(1<<0)
 #define ECOSKPD		0x021d0
 #define   ECO_GATING_CX_ONLY	(1<<3)
 #define   ECO_FLIP_DONE		(1<<0)
@@ -1559,14 +1568,14 @@
 #define _VSYNCSHIFT_B	0x61028
 
 
-#define HTOTAL(pipe) _PIPE(pipe, _HTOTAL_A, _HTOTAL_B)
-#define HBLANK(pipe) _PIPE(pipe, _HBLANK_A, _HBLANK_B)
-#define HSYNC(pipe) _PIPE(pipe, _HSYNC_A, _HSYNC_B)
-#define VTOTAL(pipe) _PIPE(pipe, _VTOTAL_A, _VTOTAL_B)
-#define VBLANK(pipe) _PIPE(pipe, _VBLANK_A, _VBLANK_B)
-#define VSYNC(pipe) _PIPE(pipe, _VSYNC_A, _VSYNC_B)
+#define HTOTAL(trans) _TRANSCODER(trans, _HTOTAL_A, _HTOTAL_B)
+#define HBLANK(trans) _TRANSCODER(trans, _HBLANK_A, _HBLANK_B)
+#define HSYNC(trans) _TRANSCODER(trans, _HSYNC_A, _HSYNC_B)
+#define VTOTAL(trans) _TRANSCODER(trans, _VTOTAL_A, _VTOTAL_B)
+#define VBLANK(trans) _TRANSCODER(trans, _VBLANK_A, _VBLANK_B)
+#define VSYNC(trans) _TRANSCODER(trans, _VSYNC_A, _VSYNC_B)
 #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B)
-#define VSYNCSHIFT(pipe) _PIPE(pipe, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
+#define VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
 
 /* VGA port control */
 #define ADPA			0x61100
@@ -2641,6 +2650,7 @@
 #define   PIPECONF_GAMMA		(1<<24)
 #define   PIPECONF_FORCE_BORDER	(1<<25)
 #define   PIPECONF_INTERLACE_MASK	(7 << 21)
+#define   PIPECONF_INTERLACE_MASK_HSW	(3 << 21)
 /* Note that pre-gen3 does not support interlaced display directly. Panel
  * fitting must be disabled on pre-ilk for interlaced. */
 #define   PIPECONF_PROGRESSIVE			(0 << 21)
@@ -2711,7 +2721,7 @@
 #define   PIPE_12BPC				(3 << 5)
 
 #define PIPESRC(pipe) _PIPE(pipe, _PIPEASRC, _PIPEBSRC)
-#define PIPECONF(pipe) _PIPE(pipe, _PIPEACONF, _PIPEBCONF)
+#define PIPECONF(tran) _TRANSCODER(tran, _PIPEACONF, _PIPEBCONF)
 #define PIPEDSL(pipe)  _PIPE(pipe, _PIPEADSL, _PIPEBDSL)
 #define PIPEFRAME(pipe) _PIPE(pipe, _PIPEAFRAMEHIGH, _PIPEBFRAMEHIGH)
 #define PIPEFRAMEPIXEL(pipe)  _PIPE(pipe, _PIPEAFRAMEPIXEL, _PIPEBFRAMEPIXEL)
@@ -2998,12 +3008,19 @@
 #define   DISPPLANE_GAMMA_ENABLE		(1<<30)
 #define   DISPPLANE_GAMMA_DISABLE		0
 #define   DISPPLANE_PIXFORMAT_MASK		(0xf<<26)
+#define   DISPPLANE_YUV422			(0x0<<26)
 #define   DISPPLANE_8BPP			(0x2<<26)
-#define   DISPPLANE_15_16BPP			(0x4<<26)
-#define   DISPPLANE_16BPP			(0x5<<26)
-#define   DISPPLANE_32BPP_NO_ALPHA		(0x6<<26)
-#define   DISPPLANE_32BPP			(0x7<<26)
-#define   DISPPLANE_32BPP_30BIT_NO_ALPHA	(0xa<<26)
+#define   DISPPLANE_BGRA555			(0x3<<26)
+#define   DISPPLANE_BGRX555			(0x4<<26)
+#define   DISPPLANE_BGRX565			(0x5<<26)
+#define   DISPPLANE_BGRX888			(0x6<<26)
+#define   DISPPLANE_BGRA888			(0x7<<26)
+#define   DISPPLANE_RGBX101010			(0x8<<26)
+#define   DISPPLANE_RGBA101010			(0x9<<26)
+#define   DISPPLANE_BGRX101010			(0xa<<26)
+#define   DISPPLANE_RGBX161616			(0xc<<26)
+#define   DISPPLANE_RGBX888			(0xe<<26)
+#define   DISPPLANE_RGBA888			(0xf<<26)
 #define   DISPPLANE_STEREO_ENABLE		(1<<25)
 #define   DISPPLANE_STEREO_DISABLE		0
 #define   DISPPLANE_SEL_PIPE_SHIFT		24
@@ -3024,6 +3041,8 @@
 #define _DSPASIZE		0x70190
 #define _DSPASURF		0x7019C /* 965+ only */
 #define _DSPATILEOFF		0x701A4 /* 965+ only */
+#define _DSPAOFFSET		0x701A4 /* HSW */
+#define _DSPASURFLIVE		0x701AC
 
 #define DSPCNTR(plane) _PIPE(plane, _DSPACNTR, _DSPBCNTR)
 #define DSPADDR(plane) _PIPE(plane, _DSPAADDR, _DSPBADDR)
@@ -3033,6 +3052,8 @@
 #define DSPSURF(plane) _PIPE(plane, _DSPASURF, _DSPBSURF)
 #define DSPTILEOFF(plane) _PIPE(plane, _DSPATILEOFF, _DSPBTILEOFF)
 #define DSPLINOFF(plane) DSPADDR(plane)
+#define DSPOFFSET(plane) _PIPE(plane, _DSPAOFFSET, _DSPBOFFSET)
+#define DSPSURFLIVE(plane) _PIPE(plane, _DSPASURFLIVE, _DSPBSURFLIVE)
 
 /* Display/Sprite base address macros */
 #define DISP_BASEADDR_MASK	(0xfffff000)
@@ -3078,6 +3099,8 @@
 #define _DSPBSIZE		0x71190
 #define _DSPBSURF		0x7119C
 #define _DSPBTILEOFF		0x711A4
+#define _DSPBOFFSET		0x711A4
+#define _DSPBSURFLIVE		0x711AC
 
 /* Sprite A control */
 #define _DVSACNTR		0x72180
@@ -3143,6 +3166,7 @@
 #define DVSTILEOFF(pipe) _PIPE(pipe, _DVSATILEOFF, _DVSBTILEOFF)
 #define DVSKEYVAL(pipe) _PIPE(pipe, _DVSAKEYVAL, _DVSBKEYVAL)
 #define DVSKEYMSK(pipe) _PIPE(pipe, _DVSAKEYMSK, _DVSBKEYMSK)
+#define DVSSURFLIVE(pipe) _PIPE(pipe, _DVSASURFLIVE, _DVSBSURFLIVE)
 
 #define _SPRA_CTL		0x70280
 #define   SPRITE_ENABLE			(1<<31)
@@ -3177,6 +3201,8 @@
 #define _SPRA_SURF		0x7029c
 #define _SPRA_KEYMAX		0x702a0
 #define _SPRA_TILEOFF		0x702a4
+#define _SPRA_OFFSET		0x702a4
+#define _SPRA_SURFLIVE		0x702ac
 #define _SPRA_SCALE		0x70304
 #define   SPRITE_SCALE_ENABLE	(1<<31)
 #define   SPRITE_FILTER_MASK	(3<<29)
@@ -3197,6 +3223,8 @@
 #define _SPRB_SURF		0x7129c
 #define _SPRB_KEYMAX		0x712a0
 #define _SPRB_TILEOFF		0x712a4
+#define _SPRB_OFFSET		0x712a4
+#define _SPRB_SURFLIVE		0x712ac
 #define _SPRB_SCALE		0x71304
 #define _SPRB_GAMC		0x71400
 
@@ -3210,8 +3238,10 @@
 #define SPRSURF(pipe) _PIPE(pipe, _SPRA_SURF, _SPRB_SURF)
 #define SPRKEYMAX(pipe) _PIPE(pipe, _SPRA_KEYMAX, _SPRB_KEYMAX)
 #define SPRTILEOFF(pipe) _PIPE(pipe, _SPRA_TILEOFF, _SPRB_TILEOFF)
+#define SPROFFSET(pipe) _PIPE(pipe, _SPRA_OFFSET, _SPRB_OFFSET)
 #define SPRSCALE(pipe) _PIPE(pipe, _SPRA_SCALE, _SPRB_SCALE)
 #define SPRGAMC(pipe) _PIPE(pipe, _SPRA_GAMC, _SPRB_GAMC)
+#define SPRSURFLIVE(pipe) _PIPE(pipe, _SPRA_SURFLIVE, _SPRB_SURFLIVE)
 
 /* VBIOS regs */
 #define VGACNTRL		0x71400
@@ -3246,12 +3276,6 @@
 #define DISPLAY_PORT_PLL_BIOS_1         0x46010
 #define DISPLAY_PORT_PLL_BIOS_2         0x46014
 
-#define PCH_DSPCLK_GATE_D	0x42020
-# define DPFCUNIT_CLOCK_GATE_DISABLE		(1 << 9)
-# define DPFCRUNIT_CLOCK_GATE_DISABLE		(1 << 8)
-# define DPFDUNIT_CLOCK_GATE_DISABLE		(1 << 7)
-# define DPARBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
-
 #define PCH_3DCGDIS0		0x46020
 # define MARIUNIT_CLOCK_GATE_DISABLE		(1 << 18)
 # define SVSMUNIT_CLOCK_GATE_DISABLE		(1 << 1)
@@ -3301,20 +3325,22 @@
 #define _PIPEB_LINK_M2           0x61048
 #define _PIPEB_LINK_N2           0x6104c
 
-#define PIPE_DATA_M1(pipe) _PIPE(pipe, _PIPEA_DATA_M1, _PIPEB_DATA_M1)
-#define PIPE_DATA_N1(pipe) _PIPE(pipe, _PIPEA_DATA_N1, _PIPEB_DATA_N1)
-#define PIPE_DATA_M2(pipe) _PIPE(pipe, _PIPEA_DATA_M2, _PIPEB_DATA_M2)
-#define PIPE_DATA_N2(pipe) _PIPE(pipe, _PIPEA_DATA_N2, _PIPEB_DATA_N2)
-#define PIPE_LINK_M1(pipe) _PIPE(pipe, _PIPEA_LINK_M1, _PIPEB_LINK_M1)
-#define PIPE_LINK_N1(pipe) _PIPE(pipe, _PIPEA_LINK_N1, _PIPEB_LINK_N1)
-#define PIPE_LINK_M2(pipe) _PIPE(pipe, _PIPEA_LINK_M2, _PIPEB_LINK_M2)
-#define PIPE_LINK_N2(pipe) _PIPE(pipe, _PIPEA_LINK_N2, _PIPEB_LINK_N2)
+#define PIPE_DATA_M1(tran) _TRANSCODER(tran, _PIPEA_DATA_M1, _PIPEB_DATA_M1)
+#define PIPE_DATA_N1(tran) _TRANSCODER(tran, _PIPEA_DATA_N1, _PIPEB_DATA_N1)
+#define PIPE_DATA_M2(tran) _TRANSCODER(tran, _PIPEA_DATA_M2, _PIPEB_DATA_M2)
+#define PIPE_DATA_N2(tran) _TRANSCODER(tran, _PIPEA_DATA_N2, _PIPEB_DATA_N2)
+#define PIPE_LINK_M1(tran) _TRANSCODER(tran, _PIPEA_LINK_M1, _PIPEB_LINK_M1)
+#define PIPE_LINK_N1(tran) _TRANSCODER(tran, _PIPEA_LINK_N1, _PIPEB_LINK_N1)
+#define PIPE_LINK_M2(tran) _TRANSCODER(tran, _PIPEA_LINK_M2, _PIPEB_LINK_M2)
+#define PIPE_LINK_N2(tran) _TRANSCODER(tran, _PIPEA_LINK_N2, _PIPEB_LINK_N2)
 
 /* CPU panel fitter */
 /* IVB+ has 3 fitters, 0 is 7x5 capable, the other two only 3x3 */
 #define _PFA_CTL_1               0x68080
 #define _PFB_CTL_1               0x68880
 #define  PF_ENABLE              (1<<31)
+#define  PF_PIPE_SEL_MASK_IVB	(3<<29)
+#define  PF_PIPE_SEL_IVB(pipe)	((pipe)<<29)
 #define  PF_FILTER_MASK		(3<<23)
 #define  PF_FILTER_PROGRAMMED	(0<<23)
 #define  PF_FILTER_MED_3x3	(1<<23)
@@ -3423,15 +3449,13 @@
 #define  ILK_HDCP_DISABLE		(1<<25)
 #define  ILK_eDP_A_DISABLE		(1<<24)
 #define  ILK_DESKTOP			(1<<23)
-#define ILK_DSPCLK_GATE		0x42020
-#define  IVB_VRHUNIT_CLK_GATE	(1<<28)
-#define  ILK_DPARB_CLK_GATE	(1<<5)
-#define  ILK_DPFD_CLK_GATE	(1<<7)
 
-/* According to spec this bit 7/8/9 of 0x42020 should be set to enable FBC */
-#define   ILK_CLK_FBC		(1<<7)
-#define   ILK_DPFC_DIS1		(1<<8)
-#define   ILK_DPFC_DIS2		(1<<9)
+#define ILK_DSPCLK_GATE_D			0x42020
+#define   ILK_VRHUNIT_CLOCK_GATE_DISABLE	(1 << 28)
+#define   ILK_DPFCUNIT_CLOCK_GATE_DISABLE	(1 << 9)
+#define   ILK_DPFCRUNIT_CLOCK_GATE_DISABLE	(1 << 8)
+#define   ILK_DPFDUNIT_CLOCK_GATE_ENABLE	(1 << 7)
+#define   ILK_DPARBUNIT_CLOCK_GATE_ENABLE	(1 << 5)
 
 #define IVB_CHICKEN3	0x4200c
 # define CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE	(1 << 5)
@@ -3447,14 +3471,21 @@
 
 #define GEN7_L3CNTLREG1				0xB01C
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C4FFF8C
+#define  GEN7_L3AGDIS				(1<<19)
 
 #define GEN7_L3_CHICKEN_MODE_REGISTER		0xB030
 #define  GEN7_WA_L3_CHICKEN_MODE				0x20000000
 
+#define GEN7_L3SQCREG4				0xb034
+#define  L3SQ_URB_READ_CAM_MATCH_DISABLE	(1<<27)
+
 /* WaCatErrorRejectionIssue */
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
 
+#define HSW_FUSE_STRAP		0x42014
+#define  HSW_CDCLK_LIMIT	(1 << 24)
+
 /* PCH */
 
 /* south display engine interrupt: IBX */
@@ -3686,7 +3717,7 @@
 #define TVIDEO_DIP_DATA(pipe) _PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B)
 #define TVIDEO_DIP_GCP(pipe) _PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B)
 
-#define VLV_VIDEO_DIP_CTL_A		0x60220
+#define VLV_VIDEO_DIP_CTL_A		0x60200
 #define VLV_VIDEO_DIP_DATA_A		0x60208
 #define VLV_VIDEO_DIP_GDCP_PAYLOAD_A	0x60210
 
@@ -3795,18 +3826,26 @@
 #define  TRANS_6BPC             (2<<5)
 #define  TRANS_12BPC            (3<<5)
 
+#define _TRANSA_CHICKEN1	 0xf0060
+#define _TRANSB_CHICKEN1	 0xf1060
+#define TRANS_CHICKEN1(pipe) _PIPE(pipe, _TRANSA_CHICKEN1, _TRANSB_CHICKEN1)
+#define  TRANS_CHICKEN1_DP0UNIT_GC_DISABLE	(1<<4)
 #define _TRANSA_CHICKEN2	 0xf0064
 #define _TRANSB_CHICKEN2	 0xf1064
 #define TRANS_CHICKEN2(pipe) _PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2)
-#define   TRANS_AUTOTRAIN_GEN_STALL_DIS	(1<<31)
+#define  TRANS_CHICKEN2_TIMING_OVERRIDE		(1<<31)
+
 
 #define SOUTH_CHICKEN1		0xc2000
 #define  FDIA_PHASE_SYNC_SHIFT_OVR	19
 #define  FDIA_PHASE_SYNC_SHIFT_EN	18
-#define FDI_PHASE_SYNC_OVR(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_OVR - ((pipe) * 2)))
-#define FDI_PHASE_SYNC_EN(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_EN - ((pipe) * 2)))
+#define  FDI_PHASE_SYNC_OVR(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_OVR - ((pipe) * 2)))
+#define  FDI_PHASE_SYNC_EN(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_EN - ((pipe) * 2)))
+#define  FDI_BC_BIFURCATION_SELECT	(1 << 12)
 #define SOUTH_CHICKEN2		0xc2004
-#define  DPLS_EDP_PPS_FIX_DIS	(1<<0)
+#define  FDI_MPHY_IOSFSB_RESET_STATUS	(1<<13)
+#define  FDI_MPHY_IOSFSB_RESET_CTL	(1<<12)
+#define  DPLS_EDP_PPS_FIX_DIS		(1<<0)
 
 #define _FDI_RXA_CHICKEN         0xc200c
 #define _FDI_RXB_CHICKEN         0xc2010
@@ -3816,6 +3855,7 @@
 
 #define SOUTH_DSPCLK_GATE_D	0xc2020
 #define  PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1<<29)
+#define  PCH_LP_PARTITION_LEVEL_DISABLE  (1<<12)
 
 /* CPU: FDI_TX */
 #define _FDI_TXA_CTL             0x60100
@@ -3877,6 +3917,7 @@
 #define  FDI_FS_ERRC_ENABLE		(1<<27)
 #define  FDI_FE_ERRC_ENABLE		(1<<26)
 #define  FDI_DP_PORT_WIDTH_X8           (7<<19)
+#define  FDI_RX_POLARITY_REVERSED_LPT	(1<<16)
 #define  FDI_8BPC                       (0<<16)
 #define  FDI_10BPC                      (1<<16)
 #define  FDI_6BPC                       (2<<16)
@@ -3901,16 +3942,21 @@
 #define  FDI_PORT_WIDTH_2X_LPT			(1<<19)
 #define  FDI_PORT_WIDTH_1X_LPT			(0<<19)
 
-#define _FDI_RXA_MISC            0xf0010
-#define _FDI_RXB_MISC            0xf1010
+#define _FDI_RXA_MISC			0xf0010
+#define _FDI_RXB_MISC			0xf1010
+#define  FDI_RX_PWRDN_LANE1_MASK	(3<<26)
+#define  FDI_RX_PWRDN_LANE1_VAL(x)	((x)<<26)
+#define  FDI_RX_PWRDN_LANE0_MASK	(3<<24)
+#define  FDI_RX_PWRDN_LANE0_VAL(x)	((x)<<24)
+#define  FDI_RX_TP1_TO_TP2_48		(2<<20)
+#define  FDI_RX_TP1_TO_TP2_64		(3<<20)
+#define  FDI_RX_FDI_DELAY_90		(0x90<<0)
+#define FDI_RX_MISC(pipe) _PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
+
 #define _FDI_RXA_TUSIZE1         0xf0030
 #define _FDI_RXA_TUSIZE2         0xf0038
 #define _FDI_RXB_TUSIZE1         0xf1030
 #define _FDI_RXB_TUSIZE2         0xf1038
-#define  FDI_RX_TP1_TO_TP2_48	(2<<20)
-#define  FDI_RX_TP1_TO_TP2_64	(3<<20)
-#define  FDI_RX_FDI_DELAY_90	(0x90<<0)
-#define FDI_RX_MISC(pipe) _PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
 #define FDI_RX_TUSIZE1(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE1, _FDI_RXB_TUSIZE1)
 #define FDI_RX_TUSIZE2(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE2, _FDI_RXB_TUSIZE2)
 
@@ -4003,6 +4049,11 @@
 #define  PANEL_LIGHT_ON_DELAY_SHIFT	0
 
 #define PCH_PP_OFF_DELAYS	0xc720c
+#define  PANEL_POWER_PORT_SELECT_MASK	(0x3 << 30)
+#define  PANEL_POWER_PORT_LVDS		(0 << 30)
+#define  PANEL_POWER_PORT_DP_A		(1 << 30)
+#define  PANEL_POWER_PORT_DP_C		(2 << 30)
+#define  PANEL_POWER_PORT_DP_D		(3 << 30)
 #define  PANEL_POWER_DOWN_DELAY_MASK	(0x1fff0000)
 #define  PANEL_POWER_DOWN_DELAY_SHIFT	16
 #define  PANEL_LIGHT_OFF_DELAY_MASK	(0x1fff)
@@ -4050,7 +4101,7 @@
 #define TRANS_DP_CTL_A		0xe0300
 #define TRANS_DP_CTL_B		0xe1300
 #define TRANS_DP_CTL_C		0xe2300
-#define TRANS_DP_CTL(pipe)	(TRANS_DP_CTL_A + (pipe) * 0x01000)
+#define TRANS_DP_CTL(pipe)	_PIPE(pipe, TRANS_DP_CTL_A, TRANS_DP_CTL_B)
 #define  TRANS_DP_OUTPUT_ENABLE	(1<<31)
 #define  TRANS_DP_PORT_SEL_B	(0<<29)
 #define  TRANS_DP_PORT_SEL_C	(1<<29)
@@ -4108,6 +4159,8 @@
 #define  FORCEWAKE_ACK_HSW			0x130044
 #define  FORCEWAKE_ACK				0x130090
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
+#define   FORCEWAKE_KERNEL			0x1
+#define   FORCEWAKE_USER			0x2
 #define  FORCEWAKE_MT_ACK			0x130040
 #define  ECOBUS					0xa180
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
@@ -4220,6 +4273,10 @@
 #define   GEN6_READ_OC_PARAMS			0xc
 #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
 #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
+#define	  GEN6_PCODE_WRITE_RC6VIDS		0x4
+#define	  GEN6_PCODE_READ_RC6VIDS		0x5
+#define   GEN6_ENCODE_RC6_VID(mv)		(((mv) / 5) - 245) < 0 ?: 0
+#define   GEN6_DECODE_RC6_VID(vids)		(((vids) * 5) > 0 ? ((vids) * 5) + 245 : 0)
 #define GEN6_PCODE_DATA				0x138128
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 
@@ -4251,6 +4308,15 @@
 #define GEN7_L3LOG_BASE			0xB070
 #define GEN7_L3LOG_SIZE			0x80
 
+#define GEN7_HALF_SLICE_CHICKEN1	0xe100 /* IVB GT1 + VLV */
+#define GEN7_HALF_SLICE_CHICKEN1_GT2	0xf100
+#define   GEN7_MAX_PS_THREAD_DEP		(8<<12)
+#define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE	(1<<3)
+
+#define GEN7_ROW_CHICKEN2		0xe4f4
+#define GEN7_ROW_CHICKEN2_GT2		0xf4f4
+#define   DOP_CLOCK_GATING_DISABLE	(1<<0)
+
 #define G4X_AUD_VID_DID			0x62020
 #define INTEL_AUDIO_DEVCL		0x808629FB
 #define INTEL_AUDIO_DEVBLC		0x80862801
@@ -4380,33 +4446,39 @@
 #define HSW_PWR_WELL_CTL6			0x45414
 
 /* Per-pipe DDI Function Control */
-#define PIPE_DDI_FUNC_CTL_A		0x60400
-#define PIPE_DDI_FUNC_CTL_B		0x61400
-#define PIPE_DDI_FUNC_CTL_C		0x62400
-#define PIPE_DDI_FUNC_CTL_EDP		0x6F400
-#define DDI_FUNC_CTL(pipe) _PIPE(pipe, PIPE_DDI_FUNC_CTL_A, \
-				       PIPE_DDI_FUNC_CTL_B)
-#define  PIPE_DDI_FUNC_ENABLE		(1<<31)
+#define TRANS_DDI_FUNC_CTL_A		0x60400
+#define TRANS_DDI_FUNC_CTL_B		0x61400
+#define TRANS_DDI_FUNC_CTL_C		0x62400
+#define TRANS_DDI_FUNC_CTL_EDP		0x6F400
+#define TRANS_DDI_FUNC_CTL(tran) _TRANSCODER(tran, TRANS_DDI_FUNC_CTL_A, \
+						   TRANS_DDI_FUNC_CTL_B)
+#define  TRANS_DDI_FUNC_ENABLE		(1<<31)
 /* Those bits are ignored by pipe EDP since it can only connect to DDI A */
-#define  PIPE_DDI_PORT_MASK		(7<<28)
-#define  PIPE_DDI_SELECT_PORT(x)	((x)<<28)
-#define  PIPE_DDI_MODE_SELECT_MASK	(7<<24)
-#define  PIPE_DDI_MODE_SELECT_HDMI	(0<<24)
-#define  PIPE_DDI_MODE_SELECT_DVI	(1<<24)
-#define  PIPE_DDI_MODE_SELECT_DP_SST	(2<<24)
-#define  PIPE_DDI_MODE_SELECT_DP_MST	(3<<24)
-#define  PIPE_DDI_MODE_SELECT_FDI	(4<<24)
-#define  PIPE_DDI_BPC_MASK		(7<<20)
-#define  PIPE_DDI_BPC_8			(0<<20)
-#define  PIPE_DDI_BPC_10		(1<<20)
-#define  PIPE_DDI_BPC_6			(2<<20)
-#define  PIPE_DDI_BPC_12		(3<<20)
-#define  PIPE_DDI_PVSYNC		(1<<17)
-#define  PIPE_DDI_PHSYNC		(1<<16)
-#define  PIPE_DDI_BFI_ENABLE		(1<<4)
-#define  PIPE_DDI_PORT_WIDTH_X1		(0<<1)
-#define  PIPE_DDI_PORT_WIDTH_X2		(1<<1)
-#define  PIPE_DDI_PORT_WIDTH_X4		(3<<1)
+#define  TRANS_DDI_PORT_MASK		(7<<28)
+#define  TRANS_DDI_SELECT_PORT(x)	((x)<<28)
+#define  TRANS_DDI_PORT_NONE		(0<<28)
+#define  TRANS_DDI_MODE_SELECT_MASK	(7<<24)
+#define  TRANS_DDI_MODE_SELECT_HDMI	(0<<24)
+#define  TRANS_DDI_MODE_SELECT_DVI	(1<<24)
+#define  TRANS_DDI_MODE_SELECT_DP_SST	(2<<24)
+#define  TRANS_DDI_MODE_SELECT_DP_MST	(3<<24)
+#define  TRANS_DDI_MODE_SELECT_FDI	(4<<24)
+#define  TRANS_DDI_BPC_MASK		(7<<20)
+#define  TRANS_DDI_BPC_8		(0<<20)
+#define  TRANS_DDI_BPC_10		(1<<20)
+#define  TRANS_DDI_BPC_6		(2<<20)
+#define  TRANS_DDI_BPC_12		(3<<20)
+#define  TRANS_DDI_PVSYNC		(1<<17)
+#define  TRANS_DDI_PHSYNC		(1<<16)
+#define  TRANS_DDI_EDP_INPUT_MASK	(7<<12)
+#define  TRANS_DDI_EDP_INPUT_A_ON	(0<<12)
+#define  TRANS_DDI_EDP_INPUT_A_ONOFF	(4<<12)
+#define  TRANS_DDI_EDP_INPUT_B_ONOFF	(5<<12)
+#define  TRANS_DDI_EDP_INPUT_C_ONOFF	(6<<12)
+#define  TRANS_DDI_BFI_ENABLE		(1<<4)
+#define  TRANS_DDI_PORT_WIDTH_X1	(0<<1)
+#define  TRANS_DDI_PORT_WIDTH_X2	(1<<1)
+#define  TRANS_DDI_PORT_WIDTH_X4	(3<<1)
 
 /* DisplayPort Transport Control */
 #define DP_TP_CTL_A			0x64040
@@ -4420,12 +4492,16 @@
 #define  DP_TP_CTL_LINK_TRAIN_MASK		(7<<8)
 #define  DP_TP_CTL_LINK_TRAIN_PAT1		(0<<8)
 #define  DP_TP_CTL_LINK_TRAIN_PAT2		(1<<8)
+#define  DP_TP_CTL_LINK_TRAIN_PAT3		(4<<8)
+#define  DP_TP_CTL_LINK_TRAIN_IDLE		(2<<8)
 #define  DP_TP_CTL_LINK_TRAIN_NORMAL		(3<<8)
+#define  DP_TP_CTL_SCRAMBLE_DISABLE		(1<<7)
 
 /* DisplayPort Transport Status */
 #define DP_TP_STATUS_A			0x64044
 #define DP_TP_STATUS_B			0x64144
 #define DP_TP_STATUS(port) _PORT(port, DP_TP_STATUS_A, DP_TP_STATUS_B)
+#define  DP_TP_STATUS_IDLE_DONE		(1<<25)
 #define  DP_TP_STATUS_AUTOTRAIN_DONE	(1<<12)
 
 /* DDI Buffer Control */
@@ -4444,6 +4520,7 @@
 #define  DDI_BUF_EMP_800MV_3_5DB_HSW		(8<<24)   /* Sel8 */
 #define  DDI_BUF_EMP_MASK			(0xf<<24)
 #define  DDI_BUF_IS_IDLE			(1<<7)
+#define  DDI_A_4_LANES				(1<<4)
 #define  DDI_PORT_WIDTH_X1			(0<<1)
 #define  DDI_PORT_WIDTH_X2			(1<<1)
 #define  DDI_PORT_WIDTH_X4			(3<<1)
@@ -4460,6 +4537,10 @@
 #define SBI_ADDR			0xC6000
 #define SBI_DATA			0xC6004
 #define SBI_CTL_STAT			0xC6008
+#define  SBI_CTL_DEST_ICLK		(0x0<<16)
+#define  SBI_CTL_DEST_MPHY		(0x1<<16)
+#define  SBI_CTL_OP_IORD		(0x2<<8)
+#define  SBI_CTL_OP_IOWR		(0x3<<8)
 #define  SBI_CTL_OP_CRRD		(0x6<<8)
 #define  SBI_CTL_OP_CRWR		(0x7<<8)
 #define  SBI_RESPONSE_FAIL		(0x1<<1)
@@ -4477,10 +4558,12 @@
 #define   SBI_SSCDIVINTPHASE_PROPAGATE		(1<<0)
 #define  SBI_SSCCTL				0x020c
 #define  SBI_SSCCTL6				0x060C
+#define   SBI_SSCCTL_PATHALT			(1<<3)
 #define   SBI_SSCCTL_DISABLE			(1<<0)
 #define  SBI_SSCAUXDIV6				0x0610
 #define   SBI_SSCAUXDIV_FINALDIV2SEL(x)		((x)<<4)
 #define  SBI_DBUFF0				0x2a00
+#define   SBI_DBUFF0_ENABLE			(1<<0)
 
 /* LPT PIXCLK_GATE */
 #define PIXCLK_GATE			0xC6020
@@ -4490,8 +4573,8 @@
 /* SPLL */
 #define SPLL_CTL			0x46020
 #define  SPLL_PLL_ENABLE		(1<<31)
-#define  SPLL_PLL_SCC			(1<<28)
-#define  SPLL_PLL_NON_SCC		(2<<28)
+#define  SPLL_PLL_SSC			(1<<28)
+#define  SPLL_PLL_NON_SSC		(2<<28)
 #define  SPLL_PLL_FREQ_810MHz		(0<<26)
 #define  SPLL_PLL_FREQ_1350MHz		(1<<26)
 
@@ -4500,7 +4583,7 @@
 #define WRPLL_CTL2			0x46060
 #define  WRPLL_PLL_ENABLE		(1<<31)
 #define  WRPLL_PLL_SELECT_SSC		(0x01<<28)
-#define  WRPLL_PLL_SELECT_NON_SCC	(0x02<<28)
+#define  WRPLL_PLL_SELECT_NON_SSC	(0x02<<28)
 #define  WRPLL_PLL_SELECT_LCPLL_2700	(0x03<<28)
 /* WRPLL divider programming */
 #define  WRPLL_DIVIDER_REFERENCE(x)	((x)<<0)
@@ -4517,21 +4600,36 @@
 #define  PORT_CLK_SEL_SPLL		(3<<29)
 #define  PORT_CLK_SEL_WRPLL1		(4<<29)
 #define  PORT_CLK_SEL_WRPLL2		(5<<29)
+#define  PORT_CLK_SEL_NONE		(7<<29)
 
-/* Pipe clock selection */
-#define PIPE_CLK_SEL_A			0x46140
-#define PIPE_CLK_SEL_B			0x46144
-#define PIPE_CLK_SEL(pipe) _PIPE(pipe, PIPE_CLK_SEL_A, PIPE_CLK_SEL_B)
-/* For each pipe, we need to select the corresponding port clock */
-#define  PIPE_CLK_SEL_DISABLED		(0x0<<29)
-#define  PIPE_CLK_SEL_PORT(x)		((x+1)<<29)
+/* Transcoder clock selection */
+#define TRANS_CLK_SEL_A			0x46140
+#define TRANS_CLK_SEL_B			0x46144
+#define TRANS_CLK_SEL(tran) _TRANSCODER(tran, TRANS_CLK_SEL_A, TRANS_CLK_SEL_B)
+/* For each transcoder, we need to select the corresponding port clock */
+#define  TRANS_CLK_SEL_DISABLED		(0x0<<29)
+#define  TRANS_CLK_SEL_PORT(x)		((x+1)<<29)
+
+#define _TRANSA_MSA_MISC		0x60410
+#define _TRANSB_MSA_MISC		0x61410
+#define TRANS_MSA_MISC(tran) _TRANSCODER(tran, _TRANSA_MSA_MISC, \
+					       _TRANSB_MSA_MISC)
+#define  TRANS_MSA_SYNC_CLK		(1<<0)
+#define  TRANS_MSA_6_BPC		(0<<5)
+#define  TRANS_MSA_8_BPC		(1<<5)
+#define  TRANS_MSA_10_BPC		(2<<5)
+#define  TRANS_MSA_12_BPC		(3<<5)
+#define  TRANS_MSA_16_BPC		(4<<5)
 
 /* LCPLL Control */
 #define LCPLL_CTL			0x130040
 #define  LCPLL_PLL_DISABLE		(1<<31)
 #define  LCPLL_PLL_LOCK			(1<<30)
+#define  LCPLL_CLK_FREQ_MASK		(3<<26)
+#define  LCPLL_CLK_FREQ_450		(0<<26)
 #define  LCPLL_CD_CLOCK_DISABLE		(1<<25)
 #define  LCPLL_CD2X_CLOCK_DISABLE	(1<<23)
+#define  LCPLL_CD_SOURCE_FCLK		(1<<21)
 
 /* Pipe WM_LINETIME - watermark line time */
 #define PIPE_WM_LINETIME_A		0x45270

diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 5854bdd..63d4d30 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c

@@ -60,9 +60,9 @@
 		reg = (pipe == PIPE_A) ? _LGC_PALETTE_A : _LGC_PALETTE_B;
 
 	if (pipe == PIPE_A)
-		array = dev_priv->save_palette_a;
+		array = dev_priv->regfile.save_palette_a;
 	else
-		array = dev_priv->save_palette_b;
+		array = dev_priv->regfile.save_palette_b;
 
 	for (i = 0; i < 256; i++)
 		array[i] = I915_READ(reg + (i << 2));
@@ -82,9 +82,9 @@
 		reg = (pipe == PIPE_A) ? _LGC_PALETTE_A : _LGC_PALETTE_B;
 
 	if (pipe == PIPE_A)
-		array = dev_priv->save_palette_a;
+		array = dev_priv->regfile.save_palette_a;
 	else
-		array = dev_priv->save_palette_b;
+		array = dev_priv->regfile.save_palette_b;
 
 	for (i = 0; i < 256; i++)
 		I915_WRITE(reg + (i << 2), array[i]);
@@ -131,11 +131,11 @@
 	u16 cr_index, cr_data, st01;
 
 	/* VGA color palette registers */
-	dev_priv->saveDACMASK = I915_READ8(VGA_DACMASK);
+	dev_priv->regfile.saveDACMASK = I915_READ8(VGA_DACMASK);
 
 	/* MSR bits */
-	dev_priv->saveMSR = I915_READ8(VGA_MSR_READ);
-	if (dev_priv->saveMSR & VGA_MSR_CGA_MODE) {
+	dev_priv->regfile.saveMSR = I915_READ8(VGA_MSR_READ);
+	if (dev_priv->regfile.saveMSR & VGA_MSR_CGA_MODE) {
 		cr_index = VGA_CR_INDEX_CGA;
 		cr_data = VGA_CR_DATA_CGA;
 		st01 = VGA_ST01_CGA;
@@ -150,35 +150,35 @@
 			   i915_read_indexed(dev, cr_index, cr_data, 0x11) &
 			   (~0x80));
 	for (i = 0; i <= 0x24; i++)
-		dev_priv->saveCR[i] =
+		dev_priv->regfile.saveCR[i] =
 			i915_read_indexed(dev, cr_index, cr_data, i);
 	/* Make sure we don't turn off CR group 0 writes */
-	dev_priv->saveCR[0x11] &= ~0x80;
+	dev_priv->regfile.saveCR[0x11] &= ~0x80;
 
 	/* Attribute controller registers */
 	I915_READ8(st01);
-	dev_priv->saveAR_INDEX = I915_READ8(VGA_AR_INDEX);
+	dev_priv->regfile.saveAR_INDEX = I915_READ8(VGA_AR_INDEX);
 	for (i = 0; i <= 0x14; i++)
-		dev_priv->saveAR[i] = i915_read_ar(dev, st01, i, 0);
+		dev_priv->regfile.saveAR[i] = i915_read_ar(dev, st01, i, 0);
 	I915_READ8(st01);
-	I915_WRITE8(VGA_AR_INDEX, dev_priv->saveAR_INDEX);
+	I915_WRITE8(VGA_AR_INDEX, dev_priv->regfile.saveAR_INDEX);
 	I915_READ8(st01);
 
 	/* Graphics controller registers */
 	for (i = 0; i < 9; i++)
-		dev_priv->saveGR[i] =
+		dev_priv->regfile.saveGR[i] =
 			i915_read_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, i);
 
-	dev_priv->saveGR[0x10] =
+	dev_priv->regfile.saveGR[0x10] =
 		i915_read_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, 0x10);
-	dev_priv->saveGR[0x11] =
+	dev_priv->regfile.saveGR[0x11] =
 		i915_read_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, 0x11);
-	dev_priv->saveGR[0x18] =
+	dev_priv->regfile.saveGR[0x18] =
 		i915_read_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, 0x18);
 
 	/* Sequencer registers */
 	for (i = 0; i < 8; i++)
-		dev_priv->saveSR[i] =
+		dev_priv->regfile.saveSR[i] =
 			i915_read_indexed(dev, VGA_SR_INDEX, VGA_SR_DATA, i);
 }
 
@@ -189,8 +189,8 @@
 	u16 cr_index, cr_data, st01;
 
 	/* MSR bits */
-	I915_WRITE8(VGA_MSR_WRITE, dev_priv->saveMSR);
-	if (dev_priv->saveMSR & VGA_MSR_CGA_MODE) {
+	I915_WRITE8(VGA_MSR_WRITE, dev_priv->regfile.saveMSR);
+	if (dev_priv->regfile.saveMSR & VGA_MSR_CGA_MODE) {
 		cr_index = VGA_CR_INDEX_CGA;
 		cr_data = VGA_CR_DATA_CGA;
 		st01 = VGA_ST01_CGA;
@@ -203,36 +203,36 @@
 	/* Sequencer registers, don't write SR07 */
 	for (i = 0; i < 7; i++)
 		i915_write_indexed(dev, VGA_SR_INDEX, VGA_SR_DATA, i,
-				   dev_priv->saveSR[i]);
+				   dev_priv->regfile.saveSR[i]);
 
 	/* CRT controller regs */
 	/* Enable CR group 0 writes */
-	i915_write_indexed(dev, cr_index, cr_data, 0x11, dev_priv->saveCR[0x11]);
+	i915_write_indexed(dev, cr_index, cr_data, 0x11, dev_priv->regfile.saveCR[0x11]);
 	for (i = 0; i <= 0x24; i++)
-		i915_write_indexed(dev, cr_index, cr_data, i, dev_priv->saveCR[i]);
+		i915_write_indexed(dev, cr_index, cr_data, i, dev_priv->regfile.saveCR[i]);
 
 	/* Graphics controller regs */
 	for (i = 0; i < 9; i++)
 		i915_write_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, i,
-				   dev_priv->saveGR[i]);
+				   dev_priv->regfile.saveGR[i]);
 
 	i915_write_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, 0x10,
-			   dev_priv->saveGR[0x10]);
+			   dev_priv->regfile.saveGR[0x10]);
 	i915_write_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, 0x11,
-			   dev_priv->saveGR[0x11]);
+			   dev_priv->regfile.saveGR[0x11]);
 	i915_write_indexed(dev, VGA_GR_INDEX, VGA_GR_DATA, 0x18,
-			   dev_priv->saveGR[0x18]);
+			   dev_priv->regfile.saveGR[0x18]);
 
 	/* Attribute controller registers */
 	I915_READ8(st01); /* switch back to index mode */
 	for (i = 0; i <= 0x14; i++)
-		i915_write_ar(dev, st01, i, dev_priv->saveAR[i], 0);
+		i915_write_ar(dev, st01, i, dev_priv->regfile.saveAR[i], 0);
 	I915_READ8(st01); /* switch back to index mode */
-	I915_WRITE8(VGA_AR_INDEX, dev_priv->saveAR_INDEX | 0x20);
+	I915_WRITE8(VGA_AR_INDEX, dev_priv->regfile.saveAR_INDEX | 0x20);
 	I915_READ8(st01);
 
 	/* VGA color palette registers */
-	I915_WRITE8(VGA_DACMASK, dev_priv->saveDACMASK);
+	I915_WRITE8(VGA_DACMASK, dev_priv->regfile.saveDACMASK);
 }
 
 static void i915_save_modeset_reg(struct drm_device *dev)
@@ -244,156 +244,162 @@
 		return;
 
 	/* Cursor state */
-	dev_priv->saveCURACNTR = I915_READ(_CURACNTR);
-	dev_priv->saveCURAPOS = I915_READ(_CURAPOS);
-	dev_priv->saveCURABASE = I915_READ(_CURABASE);
-	dev_priv->saveCURBCNTR = I915_READ(_CURBCNTR);
-	dev_priv->saveCURBPOS = I915_READ(_CURBPOS);
-	dev_priv->saveCURBBASE = I915_READ(_CURBBASE);
+	dev_priv->regfile.saveCURACNTR = I915_READ(_CURACNTR);
+	dev_priv->regfile.saveCURAPOS = I915_READ(_CURAPOS);
+	dev_priv->regfile.saveCURABASE = I915_READ(_CURABASE);
+	dev_priv->regfile.saveCURBCNTR = I915_READ(_CURBCNTR);
+	dev_priv->regfile.saveCURBPOS = I915_READ(_CURBPOS);
+	dev_priv->regfile.saveCURBBASE = I915_READ(_CURBBASE);
 	if (IS_GEN2(dev))
-		dev_priv->saveCURSIZE = I915_READ(CURSIZE);
+		dev_priv->regfile.saveCURSIZE = I915_READ(CURSIZE);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->savePCH_DREF_CONTROL = I915_READ(PCH_DREF_CONTROL);
-		dev_priv->saveDISP_ARB_CTL = I915_READ(DISP_ARB_CTL);
+		dev_priv->regfile.savePCH_DREF_CONTROL = I915_READ(PCH_DREF_CONTROL);
+		dev_priv->regfile.saveDISP_ARB_CTL = I915_READ(DISP_ARB_CTL);
 	}
 
 	/* Pipe & plane A info */
-	dev_priv->savePIPEACONF = I915_READ(_PIPEACONF);
-	dev_priv->savePIPEASRC = I915_READ(_PIPEASRC);
+	dev_priv->regfile.savePIPEACONF = I915_READ(_PIPEACONF);
+	dev_priv->regfile.savePIPEASRC = I915_READ(_PIPEASRC);
 	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->saveFPA0 = I915_READ(_PCH_FPA0);
-		dev_priv->saveFPA1 = I915_READ(_PCH_FPA1);
-		dev_priv->saveDPLL_A = I915_READ(_PCH_DPLL_A);
+		dev_priv->regfile.saveFPA0 = I915_READ(_PCH_FPA0);
+		dev_priv->regfile.saveFPA1 = I915_READ(_PCH_FPA1);
+		dev_priv->regfile.saveDPLL_A = I915_READ(_PCH_DPLL_A);
 	} else {
-		dev_priv->saveFPA0 = I915_READ(_FPA0);
-		dev_priv->saveFPA1 = I915_READ(_FPA1);
-		dev_priv->saveDPLL_A = I915_READ(_DPLL_A);
+		dev_priv->regfile.saveFPA0 = I915_READ(_FPA0);
+		dev_priv->regfile.saveFPA1 = I915_READ(_FPA1);
+		dev_priv->regfile.saveDPLL_A = I915_READ(_DPLL_A);
 	}
 	if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev))
-		dev_priv->saveDPLL_A_MD = I915_READ(_DPLL_A_MD);
-	dev_priv->saveHTOTAL_A = I915_READ(_HTOTAL_A);
-	dev_priv->saveHBLANK_A = I915_READ(_HBLANK_A);
-	dev_priv->saveHSYNC_A = I915_READ(_HSYNC_A);
-	dev_priv->saveVTOTAL_A = I915_READ(_VTOTAL_A);
-	dev_priv->saveVBLANK_A = I915_READ(_VBLANK_A);
-	dev_priv->saveVSYNC_A = I915_READ(_VSYNC_A);
+		dev_priv->regfile.saveDPLL_A_MD = I915_READ(_DPLL_A_MD);
+	dev_priv->regfile.saveHTOTAL_A = I915_READ(_HTOTAL_A);
+	dev_priv->regfile.saveHBLANK_A = I915_READ(_HBLANK_A);
+	dev_priv->regfile.saveHSYNC_A = I915_READ(_HSYNC_A);
+	dev_priv->regfile.saveVTOTAL_A = I915_READ(_VTOTAL_A);
+	dev_priv->regfile.saveVBLANK_A = I915_READ(_VBLANK_A);
+	dev_priv->regfile.saveVSYNC_A = I915_READ(_VSYNC_A);
 	if (!HAS_PCH_SPLIT(dev))
-		dev_priv->saveBCLRPAT_A = I915_READ(_BCLRPAT_A);
+		dev_priv->regfile.saveBCLRPAT_A = I915_READ(_BCLRPAT_A);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->savePIPEA_DATA_M1 = I915_READ(_PIPEA_DATA_M1);
-		dev_priv->savePIPEA_DATA_N1 = I915_READ(_PIPEA_DATA_N1);
-		dev_priv->savePIPEA_LINK_M1 = I915_READ(_PIPEA_LINK_M1);
-		dev_priv->savePIPEA_LINK_N1 = I915_READ(_PIPEA_LINK_N1);
+		dev_priv->regfile.savePIPEA_DATA_M1 = I915_READ(_PIPEA_DATA_M1);
+		dev_priv->regfile.savePIPEA_DATA_N1 = I915_READ(_PIPEA_DATA_N1);
+		dev_priv->regfile.savePIPEA_LINK_M1 = I915_READ(_PIPEA_LINK_M1);
+		dev_priv->regfile.savePIPEA_LINK_N1 = I915_READ(_PIPEA_LINK_N1);
 
-		dev_priv->saveFDI_TXA_CTL = I915_READ(_FDI_TXA_CTL);
-		dev_priv->saveFDI_RXA_CTL = I915_READ(_FDI_RXA_CTL);
+		dev_priv->regfile.saveFDI_TXA_CTL = I915_READ(_FDI_TXA_CTL);
+		dev_priv->regfile.saveFDI_RXA_CTL = I915_READ(_FDI_RXA_CTL);
 
-		dev_priv->savePFA_CTL_1 = I915_READ(_PFA_CTL_1);
-		dev_priv->savePFA_WIN_SZ = I915_READ(_PFA_WIN_SZ);
-		dev_priv->savePFA_WIN_POS = I915_READ(_PFA_WIN_POS);
+		dev_priv->regfile.savePFA_CTL_1 = I915_READ(_PFA_CTL_1);
+		dev_priv->regfile.savePFA_WIN_SZ = I915_READ(_PFA_WIN_SZ);
+		dev_priv->regfile.savePFA_WIN_POS = I915_READ(_PFA_WIN_POS);
 
-		dev_priv->saveTRANSACONF = I915_READ(_TRANSACONF);
-		dev_priv->saveTRANS_HTOTAL_A = I915_READ(_TRANS_HTOTAL_A);
-		dev_priv->saveTRANS_HBLANK_A = I915_READ(_TRANS_HBLANK_A);
-		dev_priv->saveTRANS_HSYNC_A = I915_READ(_TRANS_HSYNC_A);
-		dev_priv->saveTRANS_VTOTAL_A = I915_READ(_TRANS_VTOTAL_A);
-		dev_priv->saveTRANS_VBLANK_A = I915_READ(_TRANS_VBLANK_A);
-		dev_priv->saveTRANS_VSYNC_A = I915_READ(_TRANS_VSYNC_A);
+		dev_priv->regfile.saveTRANSACONF = I915_READ(_TRANSACONF);
+		dev_priv->regfile.saveTRANS_HTOTAL_A = I915_READ(_TRANS_HTOTAL_A);
+		dev_priv->regfile.saveTRANS_HBLANK_A = I915_READ(_TRANS_HBLANK_A);
+		dev_priv->regfile.saveTRANS_HSYNC_A = I915_READ(_TRANS_HSYNC_A);
+		dev_priv->regfile.saveTRANS_VTOTAL_A = I915_READ(_TRANS_VTOTAL_A);
+		dev_priv->regfile.saveTRANS_VBLANK_A = I915_READ(_TRANS_VBLANK_A);
+		dev_priv->regfile.saveTRANS_VSYNC_A = I915_READ(_TRANS_VSYNC_A);
 	}
 
-	dev_priv->saveDSPACNTR = I915_READ(_DSPACNTR);
-	dev_priv->saveDSPASTRIDE = I915_READ(_DSPASTRIDE);
-	dev_priv->saveDSPASIZE = I915_READ(_DSPASIZE);
-	dev_priv->saveDSPAPOS = I915_READ(_DSPAPOS);
-	dev_priv->saveDSPAADDR = I915_READ(_DSPAADDR);
+	dev_priv->regfile.saveDSPACNTR = I915_READ(_DSPACNTR);
+	dev_priv->regfile.saveDSPASTRIDE = I915_READ(_DSPASTRIDE);
+	dev_priv->regfile.saveDSPASIZE = I915_READ(_DSPASIZE);
+	dev_priv->regfile.saveDSPAPOS = I915_READ(_DSPAPOS);
+	dev_priv->regfile.saveDSPAADDR = I915_READ(_DSPAADDR);
 	if (INTEL_INFO(dev)->gen >= 4) {
-		dev_priv->saveDSPASURF = I915_READ(_DSPASURF);
-		dev_priv->saveDSPATILEOFF = I915_READ(_DSPATILEOFF);
+		dev_priv->regfile.saveDSPASURF = I915_READ(_DSPASURF);
+		dev_priv->regfile.saveDSPATILEOFF = I915_READ(_DSPATILEOFF);
 	}
 	i915_save_palette(dev, PIPE_A);
-	dev_priv->savePIPEASTAT = I915_READ(_PIPEASTAT);
+	dev_priv->regfile.savePIPEASTAT = I915_READ(_PIPEASTAT);
 
 	/* Pipe & plane B info */
-	dev_priv->savePIPEBCONF = I915_READ(_PIPEBCONF);
-	dev_priv->savePIPEBSRC = I915_READ(_PIPEBSRC);
+	dev_priv->regfile.savePIPEBCONF = I915_READ(_PIPEBCONF);
+	dev_priv->regfile.savePIPEBSRC = I915_READ(_PIPEBSRC);
 	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->saveFPB0 = I915_READ(_PCH_FPB0);
-		dev_priv->saveFPB1 = I915_READ(_PCH_FPB1);
-		dev_priv->saveDPLL_B = I915_READ(_PCH_DPLL_B);
+		dev_priv->regfile.saveFPB0 = I915_READ(_PCH_FPB0);
+		dev_priv->regfile.saveFPB1 = I915_READ(_PCH_FPB1);
+		dev_priv->regfile.saveDPLL_B = I915_READ(_PCH_DPLL_B);
 	} else {
-		dev_priv->saveFPB0 = I915_READ(_FPB0);
-		dev_priv->saveFPB1 = I915_READ(_FPB1);
-		dev_priv->saveDPLL_B = I915_READ(_DPLL_B);
+		dev_priv->regfile.saveFPB0 = I915_READ(_FPB0);
+		dev_priv->regfile.saveFPB1 = I915_READ(_FPB1);
+		dev_priv->regfile.saveDPLL_B = I915_READ(_DPLL_B);
 	}
 	if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev))
-		dev_priv->saveDPLL_B_MD = I915_READ(_DPLL_B_MD);
-	dev_priv->saveHTOTAL_B = I915_READ(_HTOTAL_B);
-	dev_priv->saveHBLANK_B = I915_READ(_HBLANK_B);
-	dev_priv->saveHSYNC_B = I915_READ(_HSYNC_B);
-	dev_priv->saveVTOTAL_B = I915_READ(_VTOTAL_B);
-	dev_priv->saveVBLANK_B = I915_READ(_VBLANK_B);
-	dev_priv->saveVSYNC_B = I915_READ(_VSYNC_B);
+		dev_priv->regfile.saveDPLL_B_MD = I915_READ(_DPLL_B_MD);
+	dev_priv->regfile.saveHTOTAL_B = I915_READ(_HTOTAL_B);
+	dev_priv->regfile.saveHBLANK_B = I915_READ(_HBLANK_B);
+	dev_priv->regfile.saveHSYNC_B = I915_READ(_HSYNC_B);
+	dev_priv->regfile.saveVTOTAL_B = I915_READ(_VTOTAL_B);
+	dev_priv->regfile.saveVBLANK_B = I915_READ(_VBLANK_B);
+	dev_priv->regfile.saveVSYNC_B = I915_READ(_VSYNC_B);
 	if (!HAS_PCH_SPLIT(dev))
-		dev_priv->saveBCLRPAT_B = I915_READ(_BCLRPAT_B);
+		dev_priv->regfile.saveBCLRPAT_B = I915_READ(_BCLRPAT_B);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->savePIPEB_DATA_M1 = I915_READ(_PIPEB_DATA_M1);
-		dev_priv->savePIPEB_DATA_N1 = I915_READ(_PIPEB_DATA_N1);
-		dev_priv->savePIPEB_LINK_M1 = I915_READ(_PIPEB_LINK_M1);
-		dev_priv->savePIPEB_LINK_N1 = I915_READ(_PIPEB_LINK_N1);
+		dev_priv->regfile.savePIPEB_DATA_M1 = I915_READ(_PIPEB_DATA_M1);
+		dev_priv->regfile.savePIPEB_DATA_N1 = I915_READ(_PIPEB_DATA_N1);
+		dev_priv->regfile.savePIPEB_LINK_M1 = I915_READ(_PIPEB_LINK_M1);
+		dev_priv->regfile.savePIPEB_LINK_N1 = I915_READ(_PIPEB_LINK_N1);
 
-		dev_priv->saveFDI_TXB_CTL = I915_READ(_FDI_TXB_CTL);
-		dev_priv->saveFDI_RXB_CTL = I915_READ(_FDI_RXB_CTL);
+		dev_priv->regfile.saveFDI_TXB_CTL = I915_READ(_FDI_TXB_CTL);
+		dev_priv->regfile.saveFDI_RXB_CTL = I915_READ(_FDI_RXB_CTL);
 
-		dev_priv->savePFB_CTL_1 = I915_READ(_PFB_CTL_1);
-		dev_priv->savePFB_WIN_SZ = I915_READ(_PFB_WIN_SZ);
-		dev_priv->savePFB_WIN_POS = I915_READ(_PFB_WIN_POS);
+		dev_priv->regfile.savePFB_CTL_1 = I915_READ(_PFB_CTL_1);
+		dev_priv->regfile.savePFB_WIN_SZ = I915_READ(_PFB_WIN_SZ);
+		dev_priv->regfile.savePFB_WIN_POS = I915_READ(_PFB_WIN_POS);
 
-		dev_priv->saveTRANSBCONF = I915_READ(_TRANSBCONF);
-		dev_priv->saveTRANS_HTOTAL_B = I915_READ(_TRANS_HTOTAL_B);
-		dev_priv->saveTRANS_HBLANK_B = I915_READ(_TRANS_HBLANK_B);
-		dev_priv->saveTRANS_HSYNC_B = I915_READ(_TRANS_HSYNC_B);
-		dev_priv->saveTRANS_VTOTAL_B = I915_READ(_TRANS_VTOTAL_B);
-		dev_priv->saveTRANS_VBLANK_B = I915_READ(_TRANS_VBLANK_B);
-		dev_priv->saveTRANS_VSYNC_B = I915_READ(_TRANS_VSYNC_B);
+		dev_priv->regfile.saveTRANSBCONF = I915_READ(_TRANSBCONF);
+		dev_priv->regfile.saveTRANS_HTOTAL_B = I915_READ(_TRANS_HTOTAL_B);
+		dev_priv->regfile.saveTRANS_HBLANK_B = I915_READ(_TRANS_HBLANK_B);
+		dev_priv->regfile.saveTRANS_HSYNC_B = I915_READ(_TRANS_HSYNC_B);
+		dev_priv->regfile.saveTRANS_VTOTAL_B = I915_READ(_TRANS_VTOTAL_B);
+		dev_priv->regfile.saveTRANS_VBLANK_B = I915_READ(_TRANS_VBLANK_B);
+		dev_priv->regfile.saveTRANS_VSYNC_B = I915_READ(_TRANS_VSYNC_B);
 	}
 
-	dev_priv->saveDSPBCNTR = I915_READ(_DSPBCNTR);
-	dev_priv->saveDSPBSTRIDE = I915_READ(_DSPBSTRIDE);
-	dev_priv->saveDSPBSIZE = I915_READ(_DSPBSIZE);
-	dev_priv->saveDSPBPOS = I915_READ(_DSPBPOS);
-	dev_priv->saveDSPBADDR = I915_READ(_DSPBADDR);
+	dev_priv->regfile.saveDSPBCNTR = I915_READ(_DSPBCNTR);
+	dev_priv->regfile.saveDSPBSTRIDE = I915_READ(_DSPBSTRIDE);
+	dev_priv->regfile.saveDSPBSIZE = I915_READ(_DSPBSIZE);
+	dev_priv->regfile.saveDSPBPOS = I915_READ(_DSPBPOS);
+	dev_priv->regfile.saveDSPBADDR = I915_READ(_DSPBADDR);
 	if (INTEL_INFO(dev)->gen >= 4) {
-		dev_priv->saveDSPBSURF = I915_READ(_DSPBSURF);
-		dev_priv->saveDSPBTILEOFF = I915_READ(_DSPBTILEOFF);
+		dev_priv->regfile.saveDSPBSURF = I915_READ(_DSPBSURF);
+		dev_priv->regfile.saveDSPBTILEOFF = I915_READ(_DSPBTILEOFF);
 	}
 	i915_save_palette(dev, PIPE_B);
-	dev_priv->savePIPEBSTAT = I915_READ(_PIPEBSTAT);
+	dev_priv->regfile.savePIPEBSTAT = I915_READ(_PIPEBSTAT);
 
 	/* Fences */
 	switch (INTEL_INFO(dev)->gen) {
 	case 7:
 	case 6:
 		for (i = 0; i < 16; i++)
-			dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8));
+			dev_priv->regfile.saveFENCE[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8));
 		break;
 	case 5:
 	case 4:
 		for (i = 0; i < 16; i++)
-			dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
+			dev_priv->regfile.saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
 		break;
 	case 3:
 		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 			for (i = 0; i < 8; i++)
-				dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
+				dev_priv->regfile.saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
 	case 2:
 		for (i = 0; i < 8; i++)
-			dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
+			dev_priv->regfile.saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
 		break;
 	}
 
+	/* CRT state */
+	if (HAS_PCH_SPLIT(dev))
+		dev_priv->regfile.saveADPA = I915_READ(PCH_ADPA);
+	else
+		dev_priv->regfile.saveADPA = I915_READ(ADPA);
+
 	return;
 }
 
@@ -412,20 +418,20 @@
 	case 7:
 	case 6:
 		for (i = 0; i < 16; i++)
-			I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), dev_priv->saveFENCE[i]);
+			I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), dev_priv->regfile.saveFENCE[i]);
 		break;
 	case 5:
 	case 4:
 		for (i = 0; i < 16; i++)
-			I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
+			I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->regfile.saveFENCE[i]);
 		break;
 	case 3:
 	case 2:
 		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 			for (i = 0; i < 8; i++)
-				I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
+				I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->regfile.saveFENCE[i+8]);
 		for (i = 0; i < 8; i++)
-			I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
+			I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->regfile.saveFENCE[i]);
 		break;
 	}
 
@@ -447,158 +453,164 @@
 	}
 
 	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(PCH_DREF_CONTROL, dev_priv->savePCH_DREF_CONTROL);
-		I915_WRITE(DISP_ARB_CTL, dev_priv->saveDISP_ARB_CTL);
+		I915_WRITE(PCH_DREF_CONTROL, dev_priv->regfile.savePCH_DREF_CONTROL);
+		I915_WRITE(DISP_ARB_CTL, dev_priv->regfile.saveDISP_ARB_CTL);
 	}
 
 	/* Pipe & plane A info */
 	/* Prime the clock */
-	if (dev_priv->saveDPLL_A & DPLL_VCO_ENABLE) {
-		I915_WRITE(dpll_a_reg, dev_priv->saveDPLL_A &
+	if (dev_priv->regfile.saveDPLL_A & DPLL_VCO_ENABLE) {
+		I915_WRITE(dpll_a_reg, dev_priv->regfile.saveDPLL_A &
 			   ~DPLL_VCO_ENABLE);
 		POSTING_READ(dpll_a_reg);
 		udelay(150);
 	}
-	I915_WRITE(fpa0_reg, dev_priv->saveFPA0);
-	I915_WRITE(fpa1_reg, dev_priv->saveFPA1);
+	I915_WRITE(fpa0_reg, dev_priv->regfile.saveFPA0);
+	I915_WRITE(fpa1_reg, dev_priv->regfile.saveFPA1);
 	/* Actually enable it */
-	I915_WRITE(dpll_a_reg, dev_priv->saveDPLL_A);
+	I915_WRITE(dpll_a_reg, dev_priv->regfile.saveDPLL_A);
 	POSTING_READ(dpll_a_reg);
 	udelay(150);
 	if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(_DPLL_A_MD, dev_priv->saveDPLL_A_MD);
+		I915_WRITE(_DPLL_A_MD, dev_priv->regfile.saveDPLL_A_MD);
 		POSTING_READ(_DPLL_A_MD);
 	}
 	udelay(150);
 
 	/* Restore mode */
-	I915_WRITE(_HTOTAL_A, dev_priv->saveHTOTAL_A);
-	I915_WRITE(_HBLANK_A, dev_priv->saveHBLANK_A);
-	I915_WRITE(_HSYNC_A, dev_priv->saveHSYNC_A);
-	I915_WRITE(_VTOTAL_A, dev_priv->saveVTOTAL_A);
-	I915_WRITE(_VBLANK_A, dev_priv->saveVBLANK_A);
-	I915_WRITE(_VSYNC_A, dev_priv->saveVSYNC_A);
+	I915_WRITE(_HTOTAL_A, dev_priv->regfile.saveHTOTAL_A);
+	I915_WRITE(_HBLANK_A, dev_priv->regfile.saveHBLANK_A);
+	I915_WRITE(_HSYNC_A, dev_priv->regfile.saveHSYNC_A);
+	I915_WRITE(_VTOTAL_A, dev_priv->regfile.saveVTOTAL_A);
+	I915_WRITE(_VBLANK_A, dev_priv->regfile.saveVBLANK_A);
+	I915_WRITE(_VSYNC_A, dev_priv->regfile.saveVSYNC_A);
 	if (!HAS_PCH_SPLIT(dev))
-		I915_WRITE(_BCLRPAT_A, dev_priv->saveBCLRPAT_A);
+		I915_WRITE(_BCLRPAT_A, dev_priv->regfile.saveBCLRPAT_A);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(_PIPEA_DATA_M1, dev_priv->savePIPEA_DATA_M1);
-		I915_WRITE(_PIPEA_DATA_N1, dev_priv->savePIPEA_DATA_N1);
-		I915_WRITE(_PIPEA_LINK_M1, dev_priv->savePIPEA_LINK_M1);
-		I915_WRITE(_PIPEA_LINK_N1, dev_priv->savePIPEA_LINK_N1);
+		I915_WRITE(_PIPEA_DATA_M1, dev_priv->regfile.savePIPEA_DATA_M1);
+		I915_WRITE(_PIPEA_DATA_N1, dev_priv->regfile.savePIPEA_DATA_N1);
+		I915_WRITE(_PIPEA_LINK_M1, dev_priv->regfile.savePIPEA_LINK_M1);
+		I915_WRITE(_PIPEA_LINK_N1, dev_priv->regfile.savePIPEA_LINK_N1);
 
-		I915_WRITE(_FDI_RXA_CTL, dev_priv->saveFDI_RXA_CTL);
-		I915_WRITE(_FDI_TXA_CTL, dev_priv->saveFDI_TXA_CTL);
+		I915_WRITE(_FDI_RXA_CTL, dev_priv->regfile.saveFDI_RXA_CTL);
+		I915_WRITE(_FDI_TXA_CTL, dev_priv->regfile.saveFDI_TXA_CTL);
 
-		I915_WRITE(_PFA_CTL_1, dev_priv->savePFA_CTL_1);
-		I915_WRITE(_PFA_WIN_SZ, dev_priv->savePFA_WIN_SZ);
-		I915_WRITE(_PFA_WIN_POS, dev_priv->savePFA_WIN_POS);
+		I915_WRITE(_PFA_CTL_1, dev_priv->regfile.savePFA_CTL_1);
+		I915_WRITE(_PFA_WIN_SZ, dev_priv->regfile.savePFA_WIN_SZ);
+		I915_WRITE(_PFA_WIN_POS, dev_priv->regfile.savePFA_WIN_POS);
 
-		I915_WRITE(_TRANSACONF, dev_priv->saveTRANSACONF);
-		I915_WRITE(_TRANS_HTOTAL_A, dev_priv->saveTRANS_HTOTAL_A);
-		I915_WRITE(_TRANS_HBLANK_A, dev_priv->saveTRANS_HBLANK_A);
-		I915_WRITE(_TRANS_HSYNC_A, dev_priv->saveTRANS_HSYNC_A);
-		I915_WRITE(_TRANS_VTOTAL_A, dev_priv->saveTRANS_VTOTAL_A);
-		I915_WRITE(_TRANS_VBLANK_A, dev_priv->saveTRANS_VBLANK_A);
-		I915_WRITE(_TRANS_VSYNC_A, dev_priv->saveTRANS_VSYNC_A);
+		I915_WRITE(_TRANSACONF, dev_priv->regfile.saveTRANSACONF);
+		I915_WRITE(_TRANS_HTOTAL_A, dev_priv->regfile.saveTRANS_HTOTAL_A);
+		I915_WRITE(_TRANS_HBLANK_A, dev_priv->regfile.saveTRANS_HBLANK_A);
+		I915_WRITE(_TRANS_HSYNC_A, dev_priv->regfile.saveTRANS_HSYNC_A);
+		I915_WRITE(_TRANS_VTOTAL_A, dev_priv->regfile.saveTRANS_VTOTAL_A);
+		I915_WRITE(_TRANS_VBLANK_A, dev_priv->regfile.saveTRANS_VBLANK_A);
+		I915_WRITE(_TRANS_VSYNC_A, dev_priv->regfile.saveTRANS_VSYNC_A);
 	}
 
 	/* Restore plane info */
-	I915_WRITE(_DSPASIZE, dev_priv->saveDSPASIZE);
-	I915_WRITE(_DSPAPOS, dev_priv->saveDSPAPOS);
-	I915_WRITE(_PIPEASRC, dev_priv->savePIPEASRC);
-	I915_WRITE(_DSPAADDR, dev_priv->saveDSPAADDR);
-	I915_WRITE(_DSPASTRIDE, dev_priv->saveDSPASTRIDE);
+	I915_WRITE(_DSPASIZE, dev_priv->regfile.saveDSPASIZE);
+	I915_WRITE(_DSPAPOS, dev_priv->regfile.saveDSPAPOS);
+	I915_WRITE(_PIPEASRC, dev_priv->regfile.savePIPEASRC);
+	I915_WRITE(_DSPAADDR, dev_priv->regfile.saveDSPAADDR);
+	I915_WRITE(_DSPASTRIDE, dev_priv->regfile.saveDSPASTRIDE);
 	if (INTEL_INFO(dev)->gen >= 4) {
-		I915_WRITE(_DSPASURF, dev_priv->saveDSPASURF);
-		I915_WRITE(_DSPATILEOFF, dev_priv->saveDSPATILEOFF);
+		I915_WRITE(_DSPASURF, dev_priv->regfile.saveDSPASURF);
+		I915_WRITE(_DSPATILEOFF, dev_priv->regfile.saveDSPATILEOFF);
 	}
 
-	I915_WRITE(_PIPEACONF, dev_priv->savePIPEACONF);
+	I915_WRITE(_PIPEACONF, dev_priv->regfile.savePIPEACONF);
 
 	i915_restore_palette(dev, PIPE_A);
 	/* Enable the plane */
-	I915_WRITE(_DSPACNTR, dev_priv->saveDSPACNTR);
+	I915_WRITE(_DSPACNTR, dev_priv->regfile.saveDSPACNTR);
 	I915_WRITE(_DSPAADDR, I915_READ(_DSPAADDR));
 
 	/* Pipe & plane B info */
-	if (dev_priv->saveDPLL_B & DPLL_VCO_ENABLE) {
-		I915_WRITE(dpll_b_reg, dev_priv->saveDPLL_B &
+	if (dev_priv->regfile.saveDPLL_B & DPLL_VCO_ENABLE) {
+		I915_WRITE(dpll_b_reg, dev_priv->regfile.saveDPLL_B &
 			   ~DPLL_VCO_ENABLE);
 		POSTING_READ(dpll_b_reg);
 		udelay(150);
 	}
-	I915_WRITE(fpb0_reg, dev_priv->saveFPB0);
-	I915_WRITE(fpb1_reg, dev_priv->saveFPB1);
+	I915_WRITE(fpb0_reg, dev_priv->regfile.saveFPB0);
+	I915_WRITE(fpb1_reg, dev_priv->regfile.saveFPB1);
 	/* Actually enable it */
-	I915_WRITE(dpll_b_reg, dev_priv->saveDPLL_B);
+	I915_WRITE(dpll_b_reg, dev_priv->regfile.saveDPLL_B);
 	POSTING_READ(dpll_b_reg);
 	udelay(150);
 	if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(_DPLL_B_MD, dev_priv->saveDPLL_B_MD);
+		I915_WRITE(_DPLL_B_MD, dev_priv->regfile.saveDPLL_B_MD);
 		POSTING_READ(_DPLL_B_MD);
 	}
 	udelay(150);
 
 	/* Restore mode */
-	I915_WRITE(_HTOTAL_B, dev_priv->saveHTOTAL_B);
-	I915_WRITE(_HBLANK_B, dev_priv->saveHBLANK_B);
-	I915_WRITE(_HSYNC_B, dev_priv->saveHSYNC_B);
-	I915_WRITE(_VTOTAL_B, dev_priv->saveVTOTAL_B);
-	I915_WRITE(_VBLANK_B, dev_priv->saveVBLANK_B);
-	I915_WRITE(_VSYNC_B, dev_priv->saveVSYNC_B);
+	I915_WRITE(_HTOTAL_B, dev_priv->regfile.saveHTOTAL_B);
+	I915_WRITE(_HBLANK_B, dev_priv->regfile.saveHBLANK_B);
+	I915_WRITE(_HSYNC_B, dev_priv->regfile.saveHSYNC_B);
+	I915_WRITE(_VTOTAL_B, dev_priv->regfile.saveVTOTAL_B);
+	I915_WRITE(_VBLANK_B, dev_priv->regfile.saveVBLANK_B);
+	I915_WRITE(_VSYNC_B, dev_priv->regfile.saveVSYNC_B);
 	if (!HAS_PCH_SPLIT(dev))
-		I915_WRITE(_BCLRPAT_B, dev_priv->saveBCLRPAT_B);
+		I915_WRITE(_BCLRPAT_B, dev_priv->regfile.saveBCLRPAT_B);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(_PIPEB_DATA_M1, dev_priv->savePIPEB_DATA_M1);
-		I915_WRITE(_PIPEB_DATA_N1, dev_priv->savePIPEB_DATA_N1);
-		I915_WRITE(_PIPEB_LINK_M1, dev_priv->savePIPEB_LINK_M1);
-		I915_WRITE(_PIPEB_LINK_N1, dev_priv->savePIPEB_LINK_N1);
+		I915_WRITE(_PIPEB_DATA_M1, dev_priv->regfile.savePIPEB_DATA_M1);
+		I915_WRITE(_PIPEB_DATA_N1, dev_priv->regfile.savePIPEB_DATA_N1);
+		I915_WRITE(_PIPEB_LINK_M1, dev_priv->regfile.savePIPEB_LINK_M1);
+		I915_WRITE(_PIPEB_LINK_N1, dev_priv->regfile.savePIPEB_LINK_N1);
 
-		I915_WRITE(_FDI_RXB_CTL, dev_priv->saveFDI_RXB_CTL);
-		I915_WRITE(_FDI_TXB_CTL, dev_priv->saveFDI_TXB_CTL);
+		I915_WRITE(_FDI_RXB_CTL, dev_priv->regfile.saveFDI_RXB_CTL);
+		I915_WRITE(_FDI_TXB_CTL, dev_priv->regfile.saveFDI_TXB_CTL);
 
-		I915_WRITE(_PFB_CTL_1, dev_priv->savePFB_CTL_1);
-		I915_WRITE(_PFB_WIN_SZ, dev_priv->savePFB_WIN_SZ);
-		I915_WRITE(_PFB_WIN_POS, dev_priv->savePFB_WIN_POS);
+		I915_WRITE(_PFB_CTL_1, dev_priv->regfile.savePFB_CTL_1);
+		I915_WRITE(_PFB_WIN_SZ, dev_priv->regfile.savePFB_WIN_SZ);
+		I915_WRITE(_PFB_WIN_POS, dev_priv->regfile.savePFB_WIN_POS);
 
-		I915_WRITE(_TRANSBCONF, dev_priv->saveTRANSBCONF);
-		I915_WRITE(_TRANS_HTOTAL_B, dev_priv->saveTRANS_HTOTAL_B);
-		I915_WRITE(_TRANS_HBLANK_B, dev_priv->saveTRANS_HBLANK_B);
-		I915_WRITE(_TRANS_HSYNC_B, dev_priv->saveTRANS_HSYNC_B);
-		I915_WRITE(_TRANS_VTOTAL_B, dev_priv->saveTRANS_VTOTAL_B);
-		I915_WRITE(_TRANS_VBLANK_B, dev_priv->saveTRANS_VBLANK_B);
-		I915_WRITE(_TRANS_VSYNC_B, dev_priv->saveTRANS_VSYNC_B);
+		I915_WRITE(_TRANSBCONF, dev_priv->regfile.saveTRANSBCONF);
+		I915_WRITE(_TRANS_HTOTAL_B, dev_priv->regfile.saveTRANS_HTOTAL_B);
+		I915_WRITE(_TRANS_HBLANK_B, dev_priv->regfile.saveTRANS_HBLANK_B);
+		I915_WRITE(_TRANS_HSYNC_B, dev_priv->regfile.saveTRANS_HSYNC_B);
+		I915_WRITE(_TRANS_VTOTAL_B, dev_priv->regfile.saveTRANS_VTOTAL_B);
+		I915_WRITE(_TRANS_VBLANK_B, dev_priv->regfile.saveTRANS_VBLANK_B);
+		I915_WRITE(_TRANS_VSYNC_B, dev_priv->regfile.saveTRANS_VSYNC_B);
 	}
 
 	/* Restore plane info */
-	I915_WRITE(_DSPBSIZE, dev_priv->saveDSPBSIZE);
-	I915_WRITE(_DSPBPOS, dev_priv->saveDSPBPOS);
-	I915_WRITE(_PIPEBSRC, dev_priv->savePIPEBSRC);
-	I915_WRITE(_DSPBADDR, dev_priv->saveDSPBADDR);
-	I915_WRITE(_DSPBSTRIDE, dev_priv->saveDSPBSTRIDE);
+	I915_WRITE(_DSPBSIZE, dev_priv->regfile.saveDSPBSIZE);
+	I915_WRITE(_DSPBPOS, dev_priv->regfile.saveDSPBPOS);
+	I915_WRITE(_PIPEBSRC, dev_priv->regfile.savePIPEBSRC);
+	I915_WRITE(_DSPBADDR, dev_priv->regfile.saveDSPBADDR);
+	I915_WRITE(_DSPBSTRIDE, dev_priv->regfile.saveDSPBSTRIDE);
 	if (INTEL_INFO(dev)->gen >= 4) {
-		I915_WRITE(_DSPBSURF, dev_priv->saveDSPBSURF);
-		I915_WRITE(_DSPBTILEOFF, dev_priv->saveDSPBTILEOFF);
+		I915_WRITE(_DSPBSURF, dev_priv->regfile.saveDSPBSURF);
+		I915_WRITE(_DSPBTILEOFF, dev_priv->regfile.saveDSPBTILEOFF);
 	}
 
-	I915_WRITE(_PIPEBCONF, dev_priv->savePIPEBCONF);
+	I915_WRITE(_PIPEBCONF, dev_priv->regfile.savePIPEBCONF);
 
 	i915_restore_palette(dev, PIPE_B);
 	/* Enable the plane */
-	I915_WRITE(_DSPBCNTR, dev_priv->saveDSPBCNTR);
+	I915_WRITE(_DSPBCNTR, dev_priv->regfile.saveDSPBCNTR);
 	I915_WRITE(_DSPBADDR, I915_READ(_DSPBADDR));
 
 	/* Cursor state */
-	I915_WRITE(_CURAPOS, dev_priv->saveCURAPOS);
-	I915_WRITE(_CURACNTR, dev_priv->saveCURACNTR);
-	I915_WRITE(_CURABASE, dev_priv->saveCURABASE);
-	I915_WRITE(_CURBPOS, dev_priv->saveCURBPOS);
-	I915_WRITE(_CURBCNTR, dev_priv->saveCURBCNTR);
-	I915_WRITE(_CURBBASE, dev_priv->saveCURBBASE);
+	I915_WRITE(_CURAPOS, dev_priv->regfile.saveCURAPOS);
+	I915_WRITE(_CURACNTR, dev_priv->regfile.saveCURACNTR);
+	I915_WRITE(_CURABASE, dev_priv->regfile.saveCURABASE);
+	I915_WRITE(_CURBPOS, dev_priv->regfile.saveCURBPOS);
+	I915_WRITE(_CURBCNTR, dev_priv->regfile.saveCURBCNTR);
+	I915_WRITE(_CURBBASE, dev_priv->regfile.saveCURBBASE);
 	if (IS_GEN2(dev))
-		I915_WRITE(CURSIZE, dev_priv->saveCURSIZE);
+		I915_WRITE(CURSIZE, dev_priv->regfile.saveCURSIZE);
+
+	/* CRT state */
+	if (HAS_PCH_SPLIT(dev))
+		I915_WRITE(PCH_ADPA, dev_priv->regfile.saveADPA);
+	else
+		I915_WRITE(ADPA, dev_priv->regfile.saveADPA);
 
 	return;
 }
@@ -608,89 +620,84 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* Display arbitration control */
-	dev_priv->saveDSPARB = I915_READ(DSPARB);
+	dev_priv->regfile.saveDSPARB = I915_READ(DSPARB);
 
 	/* This is only meaningful in non-KMS mode */
-	/* Don't save them in KMS mode */
+	/* Don't regfile.save them in KMS mode */
 	i915_save_modeset_reg(dev);
 
-	/* CRT state */
-	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->saveADPA = I915_READ(PCH_ADPA);
-	} else {
-		dev_priv->saveADPA = I915_READ(ADPA);
-	}
-
 	/* LVDS state */
 	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->savePP_CONTROL = I915_READ(PCH_PP_CONTROL);
-		dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_PCH_CTL1);
-		dev_priv->saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_PCH_CTL2);
-		dev_priv->saveBLC_CPU_PWM_CTL = I915_READ(BLC_PWM_CPU_CTL);
-		dev_priv->saveBLC_CPU_PWM_CTL2 = I915_READ(BLC_PWM_CPU_CTL2);
-		dev_priv->saveLVDS = I915_READ(PCH_LVDS);
+		dev_priv->regfile.savePP_CONTROL = I915_READ(PCH_PP_CONTROL);
+		dev_priv->regfile.saveBLC_PWM_CTL = I915_READ(BLC_PWM_PCH_CTL1);
+		dev_priv->regfile.saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_PCH_CTL2);
+		dev_priv->regfile.saveBLC_CPU_PWM_CTL = I915_READ(BLC_PWM_CPU_CTL);
+		dev_priv->regfile.saveBLC_CPU_PWM_CTL2 = I915_READ(BLC_PWM_CPU_CTL2);
+		dev_priv->regfile.saveLVDS = I915_READ(PCH_LVDS);
 	} else {
-		dev_priv->savePP_CONTROL = I915_READ(PP_CONTROL);
-		dev_priv->savePFIT_PGM_RATIOS = I915_READ(PFIT_PGM_RATIOS);
-		dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL);
-		dev_priv->saveBLC_HIST_CTL = I915_READ(BLC_HIST_CTL);
+		dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL);
+		dev_priv->regfile.savePFIT_PGM_RATIOS = I915_READ(PFIT_PGM_RATIOS);
+		dev_priv->regfile.saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL);
+		dev_priv->regfile.saveBLC_HIST_CTL = I915_READ(BLC_HIST_CTL);
 		if (INTEL_INFO(dev)->gen >= 4)
-			dev_priv->saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_CTL2);
+			dev_priv->regfile.saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_CTL2);
 		if (IS_MOBILE(dev) && !IS_I830(dev))
-			dev_priv->saveLVDS = I915_READ(LVDS);
+			dev_priv->regfile.saveLVDS = I915_READ(LVDS);
 	}
 
 	if (!IS_I830(dev) && !IS_845G(dev) && !HAS_PCH_SPLIT(dev))
-		dev_priv->savePFIT_CONTROL = I915_READ(PFIT_CONTROL);
+		dev_priv->regfile.savePFIT_CONTROL = I915_READ(PFIT_CONTROL);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS);
-		dev_priv->savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS);
-		dev_priv->savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR);
+		dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS);
+		dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS);
+		dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR);
 	} else {
-		dev_priv->savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS);
-		dev_priv->savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS);
-		dev_priv->savePP_DIVISOR = I915_READ(PP_DIVISOR);
+		dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS);
+		dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS);
+		dev_priv->regfile.savePP_DIVISOR = I915_READ(PP_DIVISOR);
 	}
 
-	/* Display Port state */
-	if (SUPPORTS_INTEGRATED_DP(dev)) {
-		dev_priv->saveDP_B = I915_READ(DP_B);
-		dev_priv->saveDP_C = I915_READ(DP_C);
-		dev_priv->saveDP_D = I915_READ(DP_D);
-		dev_priv->savePIPEA_GMCH_DATA_M = I915_READ(_PIPEA_GMCH_DATA_M);
-		dev_priv->savePIPEB_GMCH_DATA_M = I915_READ(_PIPEB_GMCH_DATA_M);
-		dev_priv->savePIPEA_GMCH_DATA_N = I915_READ(_PIPEA_GMCH_DATA_N);
-		dev_priv->savePIPEB_GMCH_DATA_N = I915_READ(_PIPEB_GMCH_DATA_N);
-		dev_priv->savePIPEA_DP_LINK_M = I915_READ(_PIPEA_DP_LINK_M);
-		dev_priv->savePIPEB_DP_LINK_M = I915_READ(_PIPEB_DP_LINK_M);
-		dev_priv->savePIPEA_DP_LINK_N = I915_READ(_PIPEA_DP_LINK_N);
-		dev_priv->savePIPEB_DP_LINK_N = I915_READ(_PIPEB_DP_LINK_N);
+	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
+		/* Display Port state */
+		if (SUPPORTS_INTEGRATED_DP(dev)) {
+			dev_priv->regfile.saveDP_B = I915_READ(DP_B);
+			dev_priv->regfile.saveDP_C = I915_READ(DP_C);
+			dev_priv->regfile.saveDP_D = I915_READ(DP_D);
+			dev_priv->regfile.savePIPEA_GMCH_DATA_M = I915_READ(_PIPEA_GMCH_DATA_M);
+			dev_priv->regfile.savePIPEB_GMCH_DATA_M = I915_READ(_PIPEB_GMCH_DATA_M);
+			dev_priv->regfile.savePIPEA_GMCH_DATA_N = I915_READ(_PIPEA_GMCH_DATA_N);
+			dev_priv->regfile.savePIPEB_GMCH_DATA_N = I915_READ(_PIPEB_GMCH_DATA_N);
+			dev_priv->regfile.savePIPEA_DP_LINK_M = I915_READ(_PIPEA_DP_LINK_M);
+			dev_priv->regfile.savePIPEB_DP_LINK_M = I915_READ(_PIPEB_DP_LINK_M);
+			dev_priv->regfile.savePIPEA_DP_LINK_N = I915_READ(_PIPEA_DP_LINK_N);
+			dev_priv->regfile.savePIPEB_DP_LINK_N = I915_READ(_PIPEB_DP_LINK_N);
+		}
+		/* FIXME: regfile.save TV & SDVO state */
 	}
-	/* FIXME: save TV & SDVO state */
 
-	/* Only save FBC state on the platform that supports FBC */
+	/* Only regfile.save FBC state on the platform that supports FBC */
 	if (I915_HAS_FBC(dev)) {
 		if (HAS_PCH_SPLIT(dev)) {
-			dev_priv->saveDPFC_CB_BASE = I915_READ(ILK_DPFC_CB_BASE);
+			dev_priv->regfile.saveDPFC_CB_BASE = I915_READ(ILK_DPFC_CB_BASE);
 		} else if (IS_GM45(dev)) {
-			dev_priv->saveDPFC_CB_BASE = I915_READ(DPFC_CB_BASE);
+			dev_priv->regfile.saveDPFC_CB_BASE = I915_READ(DPFC_CB_BASE);
 		} else {
-			dev_priv->saveFBC_CFB_BASE = I915_READ(FBC_CFB_BASE);
-			dev_priv->saveFBC_LL_BASE = I915_READ(FBC_LL_BASE);
-			dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2);
-			dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL);
+			dev_priv->regfile.saveFBC_CFB_BASE = I915_READ(FBC_CFB_BASE);
+			dev_priv->regfile.saveFBC_LL_BASE = I915_READ(FBC_LL_BASE);
+			dev_priv->regfile.saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2);
+			dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL);
 		}
 	}
 
 	/* VGA state */
-	dev_priv->saveVGA0 = I915_READ(VGA0);
-	dev_priv->saveVGA1 = I915_READ(VGA1);
-	dev_priv->saveVGA_PD = I915_READ(VGA_PD);
+	dev_priv->regfile.saveVGA0 = I915_READ(VGA0);
+	dev_priv->regfile.saveVGA1 = I915_READ(VGA1);
+	dev_priv->regfile.saveVGA_PD = I915_READ(VGA_PD);
 	if (HAS_PCH_SPLIT(dev))
-		dev_priv->saveVGACNTRL = I915_READ(CPU_VGACNTRL);
+		dev_priv->regfile.saveVGACNTRL = I915_READ(CPU_VGACNTRL);
 	else
-		dev_priv->saveVGACNTRL = I915_READ(VGACNTRL);
+		dev_priv->regfile.saveVGACNTRL = I915_READ(VGACNTRL);
 
 	i915_save_vga(dev);
 }
@@ -700,97 +707,95 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* Display arbitration */
-	I915_WRITE(DSPARB, dev_priv->saveDSPARB);
+	I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB);
 
-	/* Display port ratios (must be done before clock is set) */
-	if (SUPPORTS_INTEGRATED_DP(dev)) {
-		I915_WRITE(_PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M);
-		I915_WRITE(_PIPEB_GMCH_DATA_M, dev_priv->savePIPEB_GMCH_DATA_M);
-		I915_WRITE(_PIPEA_GMCH_DATA_N, dev_priv->savePIPEA_GMCH_DATA_N);
-		I915_WRITE(_PIPEB_GMCH_DATA_N, dev_priv->savePIPEB_GMCH_DATA_N);
-		I915_WRITE(_PIPEA_DP_LINK_M, dev_priv->savePIPEA_DP_LINK_M);
-		I915_WRITE(_PIPEB_DP_LINK_M, dev_priv->savePIPEB_DP_LINK_M);
-		I915_WRITE(_PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N);
-		I915_WRITE(_PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N);
+	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
+		/* Display port ratios (must be done before clock is set) */
+		if (SUPPORTS_INTEGRATED_DP(dev)) {
+			I915_WRITE(_PIPEA_GMCH_DATA_M, dev_priv->regfile.savePIPEA_GMCH_DATA_M);
+			I915_WRITE(_PIPEB_GMCH_DATA_M, dev_priv->regfile.savePIPEB_GMCH_DATA_M);
+			I915_WRITE(_PIPEA_GMCH_DATA_N, dev_priv->regfile.savePIPEA_GMCH_DATA_N);
+			I915_WRITE(_PIPEB_GMCH_DATA_N, dev_priv->regfile.savePIPEB_GMCH_DATA_N);
+			I915_WRITE(_PIPEA_DP_LINK_M, dev_priv->regfile.savePIPEA_DP_LINK_M);
+			I915_WRITE(_PIPEB_DP_LINK_M, dev_priv->regfile.savePIPEB_DP_LINK_M);
+			I915_WRITE(_PIPEA_DP_LINK_N, dev_priv->regfile.savePIPEA_DP_LINK_N);
+			I915_WRITE(_PIPEB_DP_LINK_N, dev_priv->regfile.savePIPEB_DP_LINK_N);
+		}
 	}
 
 	/* This is only meaningful in non-KMS mode */
 	/* Don't restore them in KMS mode */
 	i915_restore_modeset_reg(dev);
 
-	/* CRT state */
-	if (HAS_PCH_SPLIT(dev))
-		I915_WRITE(PCH_ADPA, dev_priv->saveADPA);
-	else
-		I915_WRITE(ADPA, dev_priv->saveADPA);
-
 	/* LVDS state */
 	if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev))
-		I915_WRITE(BLC_PWM_CTL2, dev_priv->saveBLC_PWM_CTL2);
+		I915_WRITE(BLC_PWM_CTL2, dev_priv->regfile.saveBLC_PWM_CTL2);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(PCH_LVDS, dev_priv->saveLVDS);
+		I915_WRITE(PCH_LVDS, dev_priv->regfile.saveLVDS);
 	} else if (IS_MOBILE(dev) && !IS_I830(dev))
-		I915_WRITE(LVDS, dev_priv->saveLVDS);
+		I915_WRITE(LVDS, dev_priv->regfile.saveLVDS);
 
 	if (!IS_I830(dev) && !IS_845G(dev) && !HAS_PCH_SPLIT(dev))
-		I915_WRITE(PFIT_CONTROL, dev_priv->savePFIT_CONTROL);
+		I915_WRITE(PFIT_CONTROL, dev_priv->regfile.savePFIT_CONTROL);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(BLC_PWM_PCH_CTL1, dev_priv->saveBLC_PWM_CTL);
-		I915_WRITE(BLC_PWM_PCH_CTL2, dev_priv->saveBLC_PWM_CTL2);
+		I915_WRITE(BLC_PWM_PCH_CTL1, dev_priv->regfile.saveBLC_PWM_CTL);
+		I915_WRITE(BLC_PWM_PCH_CTL2, dev_priv->regfile.saveBLC_PWM_CTL2);
 		/* NOTE: BLC_PWM_CPU_CTL must be written after BLC_PWM_CPU_CTL2;
 		 * otherwise we get blank eDP screen after S3 on some machines
 		 */
-		I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->saveBLC_CPU_PWM_CTL2);
-		I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL);
-		I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS);
-		I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS);
-		I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR);
-		I915_WRITE(PCH_PP_CONTROL, dev_priv->savePP_CONTROL);
+		I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->regfile.saveBLC_CPU_PWM_CTL2);
+		I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->regfile.saveBLC_CPU_PWM_CTL);
+		I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
+		I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
+		I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
+		I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
 		I915_WRITE(RSTDBYCTL,
-			   dev_priv->saveMCHBAR_RENDER_STANDBY);
+			   dev_priv->regfile.saveMCHBAR_RENDER_STANDBY);
 	} else {
-		I915_WRITE(PFIT_PGM_RATIOS, dev_priv->savePFIT_PGM_RATIOS);
-		I915_WRITE(BLC_PWM_CTL, dev_priv->saveBLC_PWM_CTL);
-		I915_WRITE(BLC_HIST_CTL, dev_priv->saveBLC_HIST_CTL);
-		I915_WRITE(PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS);
-		I915_WRITE(PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS);
-		I915_WRITE(PP_DIVISOR, dev_priv->savePP_DIVISOR);
-		I915_WRITE(PP_CONTROL, dev_priv->savePP_CONTROL);
+		I915_WRITE(PFIT_PGM_RATIOS, dev_priv->regfile.savePFIT_PGM_RATIOS);
+		I915_WRITE(BLC_PWM_CTL, dev_priv->regfile.saveBLC_PWM_CTL);
+		I915_WRITE(BLC_HIST_CTL, dev_priv->regfile.saveBLC_HIST_CTL);
+		I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
+		I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
+		I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
+		I915_WRITE(PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
 	}
 
-	/* Display Port state */
-	if (SUPPORTS_INTEGRATED_DP(dev)) {
-		I915_WRITE(DP_B, dev_priv->saveDP_B);
-		I915_WRITE(DP_C, dev_priv->saveDP_C);
-		I915_WRITE(DP_D, dev_priv->saveDP_D);
+	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
+		/* Display Port state */
+		if (SUPPORTS_INTEGRATED_DP(dev)) {
+			I915_WRITE(DP_B, dev_priv->regfile.saveDP_B);
+			I915_WRITE(DP_C, dev_priv->regfile.saveDP_C);
+			I915_WRITE(DP_D, dev_priv->regfile.saveDP_D);
+		}
+		/* FIXME: restore TV & SDVO state */
 	}
-	/* FIXME: restore TV & SDVO state */
 
 	/* only restore FBC info on the platform that supports FBC*/
 	intel_disable_fbc(dev);
 	if (I915_HAS_FBC(dev)) {
 		if (HAS_PCH_SPLIT(dev)) {
-			I915_WRITE(ILK_DPFC_CB_BASE, dev_priv->saveDPFC_CB_BASE);
+			I915_WRITE(ILK_DPFC_CB_BASE, dev_priv->regfile.saveDPFC_CB_BASE);
 		} else if (IS_GM45(dev)) {
-			I915_WRITE(DPFC_CB_BASE, dev_priv->saveDPFC_CB_BASE);
+			I915_WRITE(DPFC_CB_BASE, dev_priv->regfile.saveDPFC_CB_BASE);
 		} else {
-			I915_WRITE(FBC_CFB_BASE, dev_priv->saveFBC_CFB_BASE);
-			I915_WRITE(FBC_LL_BASE, dev_priv->saveFBC_LL_BASE);
-			I915_WRITE(FBC_CONTROL2, dev_priv->saveFBC_CONTROL2);
-			I915_WRITE(FBC_CONTROL, dev_priv->saveFBC_CONTROL);
+			I915_WRITE(FBC_CFB_BASE, dev_priv->regfile.saveFBC_CFB_BASE);
+			I915_WRITE(FBC_LL_BASE, dev_priv->regfile.saveFBC_LL_BASE);
+			I915_WRITE(FBC_CONTROL2, dev_priv->regfile.saveFBC_CONTROL2);
+			I915_WRITE(FBC_CONTROL, dev_priv->regfile.saveFBC_CONTROL);
 		}
 	}
 	/* VGA state */
 	if (HAS_PCH_SPLIT(dev))
-		I915_WRITE(CPU_VGACNTRL, dev_priv->saveVGACNTRL);
+		I915_WRITE(CPU_VGACNTRL, dev_priv->regfile.saveVGACNTRL);
 	else
-		I915_WRITE(VGACNTRL, dev_priv->saveVGACNTRL);
+		I915_WRITE(VGACNTRL, dev_priv->regfile.saveVGACNTRL);
 
-	I915_WRITE(VGA0, dev_priv->saveVGA0);
-	I915_WRITE(VGA1, dev_priv->saveVGA1);
-	I915_WRITE(VGA_PD, dev_priv->saveVGA_PD);
+	I915_WRITE(VGA0, dev_priv->regfile.saveVGA0);
+	I915_WRITE(VGA1, dev_priv->regfile.saveVGA1);
+	I915_WRITE(VGA_PD, dev_priv->regfile.saveVGA_PD);
 	POSTING_READ(VGA_PD);
 	udelay(150);
 
@@ -802,46 +807,45 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i;
 
-	pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
+	pci_read_config_byte(dev->pdev, LBB, &dev_priv->regfile.saveLBB);
 
 	mutex_lock(&dev->struct_mutex);
 
-	/* Hardware status page */
-	dev_priv->saveHWS = I915_READ(HWS_PGA);
-
 	i915_save_display(dev);
 
-	/* Interrupt state */
-	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->saveDEIER = I915_READ(DEIER);
-		dev_priv->saveDEIMR = I915_READ(DEIMR);
-		dev_priv->saveGTIER = I915_READ(GTIER);
-		dev_priv->saveGTIMR = I915_READ(GTIMR);
-		dev_priv->saveFDI_RXA_IMR = I915_READ(_FDI_RXA_IMR);
-		dev_priv->saveFDI_RXB_IMR = I915_READ(_FDI_RXB_IMR);
-		dev_priv->saveMCHBAR_RENDER_STANDBY =
-			I915_READ(RSTDBYCTL);
-		dev_priv->savePCH_PORT_HOTPLUG = I915_READ(PCH_PORT_HOTPLUG);
-	} else {
-		dev_priv->saveIER = I915_READ(IER);
-		dev_priv->saveIMR = I915_READ(IMR);
+	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
+		/* Interrupt state */
+		if (HAS_PCH_SPLIT(dev)) {
+			dev_priv->regfile.saveDEIER = I915_READ(DEIER);
+			dev_priv->regfile.saveDEIMR = I915_READ(DEIMR);
+			dev_priv->regfile.saveGTIER = I915_READ(GTIER);
+			dev_priv->regfile.saveGTIMR = I915_READ(GTIMR);
+			dev_priv->regfile.saveFDI_RXA_IMR = I915_READ(_FDI_RXA_IMR);
+			dev_priv->regfile.saveFDI_RXB_IMR = I915_READ(_FDI_RXB_IMR);
+			dev_priv->regfile.saveMCHBAR_RENDER_STANDBY =
+				I915_READ(RSTDBYCTL);
+			dev_priv->regfile.savePCH_PORT_HOTPLUG = I915_READ(PCH_PORT_HOTPLUG);
+		} else {
+			dev_priv->regfile.saveIER = I915_READ(IER);
+			dev_priv->regfile.saveIMR = I915_READ(IMR);
+		}
 	}
 
 	intel_disable_gt_powersave(dev);
 
 	/* Cache mode state */
-	dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
+	dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
 
 	/* Memory Arbitration state */
-	dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
+	dev_priv->regfile.saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
 
 	/* Scratch space */
 	for (i = 0; i < 16; i++) {
-		dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
-		dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
+		dev_priv->regfile.saveSWF0[i] = I915_READ(SWF00 + (i << 2));
+		dev_priv->regfile.saveSWF1[i] = I915_READ(SWF10 + (i << 2));
 	}
 	for (i = 0; i < 3; i++)
-		dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
+		dev_priv->regfile.saveSWF2[i] = I915_READ(SWF30 + (i << 2));
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -853,41 +857,40 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i;
 
-	pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
+	pci_write_config_byte(dev->pdev, LBB, dev_priv->regfile.saveLBB);
 
 	mutex_lock(&dev->struct_mutex);
 
-	/* Hardware status page */
-	I915_WRITE(HWS_PGA, dev_priv->saveHWS);
-
 	i915_restore_display(dev);
 
-	/* Interrupt state */
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(DEIER, dev_priv->saveDEIER);
-		I915_WRITE(DEIMR, dev_priv->saveDEIMR);
-		I915_WRITE(GTIER, dev_priv->saveGTIER);
-		I915_WRITE(GTIMR, dev_priv->saveGTIMR);
-		I915_WRITE(_FDI_RXA_IMR, dev_priv->saveFDI_RXA_IMR);
-		I915_WRITE(_FDI_RXB_IMR, dev_priv->saveFDI_RXB_IMR);
-		I915_WRITE(PCH_PORT_HOTPLUG, dev_priv->savePCH_PORT_HOTPLUG);
-	} else {
-		I915_WRITE(IER, dev_priv->saveIER);
-		I915_WRITE(IMR, dev_priv->saveIMR);
+	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
+		/* Interrupt state */
+		if (HAS_PCH_SPLIT(dev)) {
+			I915_WRITE(DEIER, dev_priv->regfile.saveDEIER);
+			I915_WRITE(DEIMR, dev_priv->regfile.saveDEIMR);
+			I915_WRITE(GTIER, dev_priv->regfile.saveGTIER);
+			I915_WRITE(GTIMR, dev_priv->regfile.saveGTIMR);
+			I915_WRITE(_FDI_RXA_IMR, dev_priv->regfile.saveFDI_RXA_IMR);
+			I915_WRITE(_FDI_RXB_IMR, dev_priv->regfile.saveFDI_RXB_IMR);
+			I915_WRITE(PCH_PORT_HOTPLUG, dev_priv->regfile.savePCH_PORT_HOTPLUG);
+		} else {
+			I915_WRITE(IER, dev_priv->regfile.saveIER);
+			I915_WRITE(IMR, dev_priv->regfile.saveIMR);
+		}
 	}
 
 	/* Cache mode state */
-	I915_WRITE(CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000);
+	I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 | 0xffff0000);
 
 	/* Memory arbitration state */
-	I915_WRITE(MI_ARB_STATE, dev_priv->saveMI_ARB_STATE | 0xffff0000);
+	I915_WRITE(MI_ARB_STATE, dev_priv->regfile.saveMI_ARB_STATE | 0xffff0000);
 
 	for (i = 0; i < 16; i++) {
-		I915_WRITE(SWF00 + (i << 2), dev_priv->saveSWF0[i]);
-		I915_WRITE(SWF10 + (i << 2), dev_priv->saveSWF1[i]);
+		I915_WRITE(SWF00 + (i << 2), dev_priv->regfile.saveSWF0[i]);
+		I915_WRITE(SWF10 + (i << 2), dev_priv->regfile.saveSWF1[i]);
 	}
 	for (i = 0; i < 3; i++)
-		I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]);
+		I915_WRITE(SWF30 + (i << 2), dev_priv->regfile.saveSWF2[i]);
 
 	mutex_unlock(&dev->struct_mutex);
 

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 903eebd..9462081 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c

@@ -97,7 +97,7 @@
 
 static int l3_access_valid(struct drm_device *dev, loff_t offset)
 {
-	if (!IS_IVYBRIDGE(dev))
+	if (!HAS_L3_GPU_CACHE(dev))
 		return -EPERM;
 
 	if (offset % 4 != 0)
@@ -162,7 +162,7 @@
 	if (ret)
 		return ret;
 
-	if (!dev_priv->mm.l3_remap_info) {
+	if (!dev_priv->l3_parity.remap_info) {
 		temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
 		if (!temp) {
 			mutex_unlock(&drm_dev->struct_mutex);
@@ -182,9 +182,9 @@
 	 * at this point it is left as a TODO.
 	*/
 	if (temp)
-		dev_priv->mm.l3_remap_info = temp;
+		dev_priv->l3_parity.remap_info = temp;
 
-	memcpy(dev_priv->mm.l3_remap_info + (offset/4),
+	memcpy(dev_priv->l3_parity.remap_info + (offset/4),
 	       buf + (offset/4),
 	       count);
 
@@ -211,12 +211,9 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
+	mutex_lock(&dev_priv->rps.hw_lock);
 	ret = dev_priv->rps.cur_delay * GT_FREQUENCY_MULTIPLIER;
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return snprintf(buf, PAGE_SIZE, "%d", ret);
 }
@@ -228,12 +225,9 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
+	mutex_lock(&dev_priv->rps.hw_lock);
 	ret = dev_priv->rps.max_delay * GT_FREQUENCY_MULTIPLIER;
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return snprintf(buf, PAGE_SIZE, "%d", ret);
 }
@@ -254,16 +248,14 @@
 
 	val /= GT_FREQUENCY_MULTIPLIER;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	mutex_lock(&dev_priv->rps.hw_lock);
 
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
 	hw_max = (rp_state_cap & 0xff);
 	hw_min = ((rp_state_cap & 0xff0000) >> 16);
 
 	if (val < hw_min || val > hw_max || val < dev_priv->rps.min_delay) {
-		mutex_unlock(&dev->struct_mutex);
+		mutex_unlock(&dev_priv->rps.hw_lock);
 		return -EINVAL;
 	}
 
@@ -272,7 +264,7 @@
 
 	dev_priv->rps.max_delay = val;
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return count;
 }
@@ -284,12 +276,9 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
+	mutex_lock(&dev_priv->rps.hw_lock);
 	ret = dev_priv->rps.min_delay * GT_FREQUENCY_MULTIPLIER;
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return snprintf(buf, PAGE_SIZE, "%d", ret);
 }
@@ -310,16 +299,14 @@
 
 	val /= GT_FREQUENCY_MULTIPLIER;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	mutex_lock(&dev_priv->rps.hw_lock);
 
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
 	hw_max = (rp_state_cap & 0xff);
 	hw_min = ((rp_state_cap & 0xff0000) >> 16);
 
 	if (val < hw_min || val > hw_max || val > dev_priv->rps.max_delay) {
-		mutex_unlock(&dev->struct_mutex);
+		mutex_unlock(&dev_priv->rps.hw_lock);
 		return -EINVAL;
 	}
 
@@ -328,7 +315,7 @@
 
 	dev_priv->rps.min_delay = val;
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	return count;
 

diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 8134421..3db4a68 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h

@@ -229,24 +229,26 @@
 );
 
 TRACE_EVENT(i915_gem_ring_dispatch,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
-	    TP_ARGS(ring, seqno),
+	    TP_PROTO(struct intel_ring_buffer *ring, u32 seqno, u32 flags),
+	    TP_ARGS(ring, seqno, flags),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
 			     __field(u32, seqno)
+			     __field(u32, flags)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
 			   __entry->seqno = seqno;
+			   __entry->flags = flags;
 			   i915_trace_irq_get(ring, seqno);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, seqno=%u",
-		      __entry->dev, __entry->ring, __entry->seqno)
+	    TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
+		      __entry->dev, __entry->ring, __entry->seqno, __entry->flags)
 );
 
 TRACE_EVENT(i915_gem_ring_flush,

diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 56846ed..55ffba1 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c

@@ -755,7 +755,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	 /* Set the Panel Power On/Off timings if uninitialized. */
-	if ((I915_READ(PP_ON_DELAYS) == 0) && (I915_READ(PP_OFF_DELAYS) == 0)) {
+	if (!HAS_PCH_SPLIT(dev) &&
+	    I915_READ(PP_ON_DELAYS) == 0 && I915_READ(PP_OFF_DELAYS) == 0) {
 		/* Set T2 to 40ms and T5 to 200ms */
 		I915_WRITE(PP_ON_DELAYS, 0x019007d0);
 

diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 6345878..9293878 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c

@@ -198,6 +198,11 @@
 	if (mode->clock > max_clock)
 		return MODE_CLOCK_HIGH;
 
+	/* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */
+	if (HAS_PCH_LPT(dev) &&
+	    (ironlake_get_lanes_required(mode->clock, 270000, 24) > 2))
+		return MODE_CLOCK_HIGH;
+
 	return MODE_OK;
 }
 
@@ -221,14 +226,20 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 adpa;
 
-	adpa = ADPA_HOTPLUG_BITS;
+	if (HAS_PCH_SPLIT(dev))
+		adpa = ADPA_HOTPLUG_BITS;
+	else
+		adpa = 0;
+
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 		adpa |= ADPA_HSYNC_ACTIVE_HIGH;
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 		adpa |= ADPA_VSYNC_ACTIVE_HIGH;
 
 	/* For CPT allow 3 pipe config, for others just use A or B */
-	if (HAS_PCH_CPT(dev))
+	if (HAS_PCH_LPT(dev))
+		; /* Those bits don't exist here */
+	else if (HAS_PCH_CPT(dev))
 		adpa |= PORT_TRANS_SEL_CPT(intel_crtc->pipe);
 	else if (intel_crtc->pipe == 0)
 		adpa |= ADPA_PIPE_A_SELECT;
@@ -401,12 +412,16 @@
 				struct i2c_adapter *adapter)
 {
 	struct edid *edid;
+	int ret;
 
 	edid = intel_crt_get_edid(connector, adapter);
 	if (!edid)
 		return 0;
 
-	return intel_connector_update_modes(connector, edid);
+	ret = intel_connector_update_modes(connector, edid);
+	kfree(edid);
+
+	return ret;
 }
 
 static bool intel_crt_detect_ddc(struct drm_connector *connector)
@@ -644,10 +659,22 @@
 static void intel_crt_reset(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crt *crt = intel_attached_crt(connector);
 
-	if (HAS_PCH_SPLIT(dev))
+	if (HAS_PCH_SPLIT(dev)) {
+		u32 adpa;
+
+		adpa = I915_READ(PCH_ADPA);
+		adpa &= ~ADPA_CRT_HOTPLUG_MASK;
+		adpa |= ADPA_HOTPLUG_BITS;
+		I915_WRITE(PCH_ADPA, adpa);
+		POSTING_READ(PCH_ADPA);
+
+		DRM_DEBUG_KMS("pch crt adpa set to 0x%x\n", adpa);
 		crt->force_hotplug_required = 1;
+	}
+
 }
 
 /*
@@ -729,7 +756,7 @@
 
 	crt->base.type = INTEL_OUTPUT_ANALOG;
 	crt->base.cloneable = true;
-	if (IS_HASWELL(dev) || IS_I830(dev))
+	if (IS_I830(dev))
 		crt->base.crtc_mask = (1 << 0);
 	else
 		crt->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
@@ -749,7 +776,10 @@
 
 	crt->base.disable = intel_disable_crt;
 	crt->base.enable = intel_enable_crt;
-	crt->base.get_hw_state = intel_crt_get_hw_state;
+	if (IS_HASWELL(dev))
+		crt->base.get_hw_state = intel_ddi_get_hw_state;
+	else
+		crt->base.get_hw_state = intel_crt_get_hw_state;
 	intel_connector->get_hw_state = intel_connector_get_hw_state;
 
 	drm_encoder_helper_add(&crt->base.base, &crt_encoder_funcs);
@@ -766,18 +796,14 @@
 	 * Configure the automatic hotplug detection stuff
 	 */
 	crt->force_hotplug_required = 0;
-	if (HAS_PCH_SPLIT(dev)) {
-		u32 adpa;
-
-		adpa = I915_READ(PCH_ADPA);
-		adpa &= ~ADPA_CRT_HOTPLUG_MASK;
-		adpa |= ADPA_HOTPLUG_BITS;
-		I915_WRITE(PCH_ADPA, adpa);
-		POSTING_READ(PCH_ADPA);
-
-		DRM_DEBUG_KMS("pch crt adpa set to 0x%x\n", adpa);
-		crt->force_hotplug_required = 1;
-	}
 
 	dev_priv->hotplug_supported_mask |= CRT_HOTPLUG_INT_STATUS;
+
+	/*
+	 * TODO: find a proper way to discover whether we need to set the
+	 * polarity reversal bit or not, instead of relying on the BIOS.
+	 */
+	if (HAS_PCH_LPT(dev))
+		dev_priv->fdi_rx_polarity_reversed =
+		     !!(I915_READ(_FDI_RXA_CTL) & FDI_RX_POLARITY_REVERSED_LPT);
 }

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index bfe3754..4bad0f7 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c

@@ -58,6 +58,26 @@
 	0x00FFFFFF, 0x00040006		/* HDMI parameters */
 };
 
+static enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder)
+{
+	struct drm_encoder *encoder = &intel_encoder->base;
+	int type = intel_encoder->type;
+
+	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP ||
+	    type == INTEL_OUTPUT_HDMI || type == INTEL_OUTPUT_UNKNOWN) {
+		struct intel_digital_port *intel_dig_port =
+			enc_to_dig_port(encoder);
+		return intel_dig_port->port;
+
+	} else if (type == INTEL_OUTPUT_ANALOG) {
+		return PORT_E;
+
+	} else {
+		DRM_ERROR("Invalid DDI encoder type %d\n", type);
+		BUG();
+	}
+}
+
 /* On Haswell, DDI port buffers must be programmed with correct values
  * in advance. The buffer values are different for FDI and DP modes,
  * but the HDMI/DVI fields are shared among those. So we program the DDI
@@ -118,6 +138,19 @@
 	DDI_BUF_EMP_800MV_3_5DB_HSW
 };
 
+static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv,
+				    enum port port)
+{
+	uint32_t reg = DDI_BUF_CTL(port);
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		udelay(1);
+		if (I915_READ(reg) & DDI_BUF_IS_IDLE)
+			return;
+	}
+	DRM_ERROR("Timeout waiting for DDI BUF %c idle bit\n", port_name(port));
+}
 
 /* Starting with Haswell, different DDI ports can work in FDI mode for
  * connection to the PCH-located connectors. For this, it is necessary to train
@@ -133,25 +166,36 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i;
+	u32 temp, i, rx_ctl_val;
 
-	/* Configure CPU PLL, wait for warmup */
-	I915_WRITE(SPLL_CTL,
-			SPLL_PLL_ENABLE |
-			SPLL_PLL_FREQ_1350MHz |
-			SPLL_PLL_SCC);
+	/* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the
+	 * mode set "sequence for CRT port" document:
+	 * - TP1 to TP2 time with the default value
+	 * - FDI delay to 90h
+	 */
+	I915_WRITE(_FDI_RXA_MISC, FDI_RX_PWRDN_LANE1_VAL(2) |
+				  FDI_RX_PWRDN_LANE0_VAL(2) |
+				  FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
 
-	/* Use SPLL to drive the output when in FDI mode */
-	I915_WRITE(PORT_CLK_SEL(PORT_E),
-			PORT_CLK_SEL_SPLL);
-	I915_WRITE(PIPE_CLK_SEL(pipe),
-			PIPE_CLK_SEL_PORT(PORT_E));
+	/* Enable the PCH Receiver FDI PLL */
+	rx_ctl_val = FDI_RX_PLL_ENABLE | FDI_RX_ENHANCE_FRAME_ENABLE |
+		     ((intel_crtc->fdi_lanes - 1) << 19);
+	if (dev_priv->fdi_rx_polarity_reversed)
+		rx_ctl_val |= FDI_RX_POLARITY_REVERSED_LPT;
+	I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
+	POSTING_READ(_FDI_RXA_CTL);
+	udelay(220);
 
-	udelay(20);
+	/* Switch from Rawclk to PCDclk */
+	rx_ctl_val |= FDI_PCDCLK;
+	I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
 
-	/* Start the training iterating through available voltages and emphasis */
-	for (i=0; i < ARRAY_SIZE(hsw_ddi_buf_ctl_values); i++) {
+	/* Configure Port Clock Select */
+	I915_WRITE(PORT_CLK_SEL(PORT_E), intel_crtc->ddi_pll_sel);
+
+	/* Start the training iterating through available voltages and emphasis,
+	 * testing each value twice. */
+	for (i = 0; i < ARRAY_SIZE(hsw_ddi_buf_ctl_values) * 2; i++) {
 		/* Configure DP_TP_CTL with auto-training */
 		I915_WRITE(DP_TP_CTL(PORT_E),
 					DP_TP_CTL_FDI_AUTOTRAIN |
@@ -160,103 +204,75 @@
 					DP_TP_CTL_ENABLE);
 
 		/* Configure and enable DDI_BUF_CTL for DDI E with next voltage */
-		temp = I915_READ(DDI_BUF_CTL(PORT_E));
-		temp = (temp & ~DDI_BUF_EMP_MASK);
 		I915_WRITE(DDI_BUF_CTL(PORT_E),
-				temp |
-				DDI_BUF_CTL_ENABLE |
-				DDI_PORT_WIDTH_X2 |
-				hsw_ddi_buf_ctl_values[i]);
+			   DDI_BUF_CTL_ENABLE |
+			   ((intel_crtc->fdi_lanes - 1) << 1) |
+			   hsw_ddi_buf_ctl_values[i / 2]);
+		POSTING_READ(DDI_BUF_CTL(PORT_E));
 
 		udelay(600);
 
-		/* We need to program FDI_RX_MISC with the default TP1 to TP2
-		 * values before enabling the receiver, and configure the delay
-		 * for the FDI timing generator to 90h. Luckily, all the other
-		 * bits are supposed to be zeroed, so we can write those values
-		 * directly.
-		 */
-		I915_WRITE(FDI_RX_MISC(pipe), FDI_RX_TP1_TO_TP2_48 |
-				FDI_RX_FDI_DELAY_90);
+		/* Program PCH FDI Receiver TU */
+		I915_WRITE(_FDI_RXA_TUSIZE1, TU_SIZE(64));
 
-		/* Enable CPU FDI Receiver with auto-training */
-		reg = FDI_RX_CTL(pipe);
-		I915_WRITE(reg,
-				I915_READ(reg) |
-					FDI_LINK_TRAIN_AUTO |
-					FDI_RX_ENABLE |
-					FDI_LINK_TRAIN_PATTERN_1_CPT |
-					FDI_RX_ENHANCE_FRAME_ENABLE |
-					FDI_PORT_WIDTH_2X_LPT |
-					FDI_RX_PLL_ENABLE);
-		POSTING_READ(reg);
-		udelay(100);
+		/* Enable PCH FDI Receiver with auto-training */
+		rx_ctl_val |= FDI_RX_ENABLE | FDI_LINK_TRAIN_AUTO;
+		I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
+		POSTING_READ(_FDI_RXA_CTL);
+
+		/* Wait for FDI receiver lane calibration */
+		udelay(30);
+
+		/* Unset FDI_RX_MISC pwrdn lanes */
+		temp = I915_READ(_FDI_RXA_MISC);
+		temp &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK);
+		I915_WRITE(_FDI_RXA_MISC, temp);
+		POSTING_READ(_FDI_RXA_MISC);
+
+		/* Wait for FDI auto training time */
+		udelay(5);
 
 		temp = I915_READ(DP_TP_STATUS(PORT_E));
 		if (temp & DP_TP_STATUS_AUTOTRAIN_DONE) {
-			DRM_DEBUG_DRIVER("BUF_CTL training done on %d step\n", i);
+			DRM_DEBUG_KMS("FDI link training done on step %d\n", i);
 
 			/* Enable normal pixel sending for FDI */
 			I915_WRITE(DP_TP_CTL(PORT_E),
-						DP_TP_CTL_FDI_AUTOTRAIN |
-						DP_TP_CTL_LINK_TRAIN_NORMAL |
-						DP_TP_CTL_ENHANCED_FRAME_ENABLE |
-						DP_TP_CTL_ENABLE);
+				   DP_TP_CTL_FDI_AUTOTRAIN |
+				   DP_TP_CTL_LINK_TRAIN_NORMAL |
+				   DP_TP_CTL_ENHANCED_FRAME_ENABLE |
+				   DP_TP_CTL_ENABLE);
 
-			/* Enable PIPE_DDI_FUNC_CTL for the pipe to work in FDI mode */
-			temp = I915_READ(DDI_FUNC_CTL(pipe));
-			temp &= ~PIPE_DDI_PORT_MASK;
-			temp |= PIPE_DDI_SELECT_PORT(PORT_E) |
-					PIPE_DDI_MODE_SELECT_FDI |
-					PIPE_DDI_FUNC_ENABLE |
-					PIPE_DDI_PORT_WIDTH_X2;
-			I915_WRITE(DDI_FUNC_CTL(pipe),
-					temp);
-			break;
-		} else {
-			DRM_ERROR("Error training BUF_CTL %d\n", i);
-
-			/* Disable DP_TP_CTL and FDI_RX_CTL) and retry */
-			I915_WRITE(DP_TP_CTL(PORT_E),
-					I915_READ(DP_TP_CTL(PORT_E)) &
-						~DP_TP_CTL_ENABLE);
-			I915_WRITE(FDI_RX_CTL(pipe),
-					I915_READ(FDI_RX_CTL(pipe)) &
-						~FDI_RX_PLL_ENABLE);
-			continue;
+			return;
 		}
+
+		temp = I915_READ(DDI_BUF_CTL(PORT_E));
+		temp &= ~DDI_BUF_CTL_ENABLE;
+		I915_WRITE(DDI_BUF_CTL(PORT_E), temp);
+		POSTING_READ(DDI_BUF_CTL(PORT_E));
+
+		/* Disable DP_TP_CTL and FDI_RX_CTL and retry */
+		temp = I915_READ(DP_TP_CTL(PORT_E));
+		temp &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
+		temp |= DP_TP_CTL_LINK_TRAIN_PAT1;
+		I915_WRITE(DP_TP_CTL(PORT_E), temp);
+		POSTING_READ(DP_TP_CTL(PORT_E));
+
+		intel_wait_ddi_buf_idle(dev_priv, PORT_E);
+
+		rx_ctl_val &= ~FDI_RX_ENABLE;
+		I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
+		POSTING_READ(_FDI_RXA_CTL);
+
+		/* Reset FDI_RX_MISC pwrdn lanes */
+		temp = I915_READ(_FDI_RXA_MISC);
+		temp &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK);
+		temp |= FDI_RX_PWRDN_LANE1_VAL(2) | FDI_RX_PWRDN_LANE0_VAL(2);
+		I915_WRITE(_FDI_RXA_MISC, temp);
+		POSTING_READ(_FDI_RXA_MISC);
 	}
 
-	DRM_DEBUG_KMS("FDI train done.\n");
-}
-
-/* For DDI connections, it is possible to support different outputs over the
- * same DDI port, such as HDMI or DP or even VGA via FDI. So we don't know by
- * the time the output is detected what exactly is on the other end of it. This
- * function aims at providing support for this detection and proper output
- * configuration.
- */
-void intel_ddi_init(struct drm_device *dev, enum port port)
-{
-	/* For now, we don't do any proper output detection and assume that we
-	 * handle HDMI only */
-
-	switch(port){
-	case PORT_A:
-		/* We don't handle eDP and DP yet */
-		DRM_DEBUG_DRIVER("Found digital output on DDI port A\n");
-		break;
-	/* Assume that the  ports B, C and D are working in HDMI mode for now */
-	case PORT_B:
-	case PORT_C:
-	case PORT_D:
-		intel_hdmi_init(dev, DDI_BUF_CTL(port), port);
-		break;
-	default:
-		DRM_DEBUG_DRIVER("No handlers defined for port %d, skipping DDI initialization\n",
-				port);
-		break;
-	}
+	DRM_ERROR("FDI link training failed!\n");
 }
 
 /* WRPLL clock dividers */
@@ -645,116 +661,435 @@
 	{298000,	2,	21,	19},
 };
 
-void intel_ddi_mode_set(struct drm_encoder *encoder,
-				struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
+static void intel_ddi_mode_set(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc = encoder->crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	int port = intel_hdmi->ddi_port;
+	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+	int port = intel_ddi_get_encoder_port(intel_encoder);
 	int pipe = intel_crtc->pipe;
-	int p, n2, r2;
-	u32 temp, i;
+	int type = intel_encoder->type;
 
-	/* On Haswell, we need to enable the clocks and prepare DDI function to
-	 * work in HDMI mode for this pipe.
-	 */
-	DRM_DEBUG_KMS("Preparing HDMI DDI mode for Haswell on port %c, pipe %c\n", port_name(port), pipe_name(pipe));
+	DRM_DEBUG_KMS("Preparing DDI mode for Haswell on port %c, pipe %c\n",
+		      port_name(port), pipe_name(pipe));
+
+	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		intel_dp->DP = DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
+		switch (intel_dp->lane_count) {
+		case 1:
+			intel_dp->DP |= DDI_PORT_WIDTH_X1;
+			break;
+		case 2:
+			intel_dp->DP |= DDI_PORT_WIDTH_X2;
+			break;
+		case 4:
+			intel_dp->DP |= DDI_PORT_WIDTH_X4;
+			break;
+		default:
+			intel_dp->DP |= DDI_PORT_WIDTH_X4;
+			WARN(1, "Unexpected DP lane count %d\n",
+			     intel_dp->lane_count);
+			break;
+		}
+
+		if (intel_dp->has_audio) {
+			DRM_DEBUG_DRIVER("DP audio on pipe %c on DDI\n",
+					 pipe_name(intel_crtc->pipe));
+
+			/* write eld */
+			DRM_DEBUG_DRIVER("DP audio: write eld information\n");
+			intel_write_eld(encoder, adjusted_mode);
+		}
+
+		intel_dp_init_link_config(intel_dp);
+
+	} else if (type == INTEL_OUTPUT_HDMI) {
+		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+
+		if (intel_hdmi->has_audio) {
+			/* Proper support for digital audio needs a new logic
+			 * and a new set of registers, so we leave it for future
+			 * patch bombing.
+			 */
+			DRM_DEBUG_DRIVER("HDMI audio on pipe %c on DDI\n",
+					 pipe_name(intel_crtc->pipe));
+
+			/* write eld */
+			DRM_DEBUG_DRIVER("HDMI audio: write eld information\n");
+			intel_write_eld(encoder, adjusted_mode);
+		}
+
+		intel_hdmi->set_infoframes(encoder, adjusted_mode);
+	}
+}
+
+static struct intel_encoder *
+intel_ddi_get_crtc_encoder(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_encoder *intel_encoder, *ret = NULL;
+	int num_encoders = 0;
+
+	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
+		ret = intel_encoder;
+		num_encoders++;
+	}
+
+	if (num_encoders != 1)
+		WARN(1, "%d encoders on crtc for pipe %d\n", num_encoders,
+		     intel_crtc->pipe);
+
+	BUG_ON(ret == NULL);
+	return ret;
+}
+
+void intel_ddi_put_crtc_pll(struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	uint32_t val;
+
+	switch (intel_crtc->ddi_pll_sel) {
+	case PORT_CLK_SEL_SPLL:
+		plls->spll_refcount--;
+		if (plls->spll_refcount == 0) {
+			DRM_DEBUG_KMS("Disabling SPLL\n");
+			val = I915_READ(SPLL_CTL);
+			WARN_ON(!(val & SPLL_PLL_ENABLE));
+			I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE);
+			POSTING_READ(SPLL_CTL);
+		}
+		break;
+	case PORT_CLK_SEL_WRPLL1:
+		plls->wrpll1_refcount--;
+		if (plls->wrpll1_refcount == 0) {
+			DRM_DEBUG_KMS("Disabling WRPLL 1\n");
+			val = I915_READ(WRPLL_CTL1);
+			WARN_ON(!(val & WRPLL_PLL_ENABLE));
+			I915_WRITE(WRPLL_CTL1, val & ~WRPLL_PLL_ENABLE);
+			POSTING_READ(WRPLL_CTL1);
+		}
+		break;
+	case PORT_CLK_SEL_WRPLL2:
+		plls->wrpll2_refcount--;
+		if (plls->wrpll2_refcount == 0) {
+			DRM_DEBUG_KMS("Disabling WRPLL 2\n");
+			val = I915_READ(WRPLL_CTL2);
+			WARN_ON(!(val & WRPLL_PLL_ENABLE));
+			I915_WRITE(WRPLL_CTL2, val & ~WRPLL_PLL_ENABLE);
+			POSTING_READ(WRPLL_CTL2);
+		}
+		break;
+	}
+
+	WARN(plls->spll_refcount < 0, "Invalid SPLL refcount\n");
+	WARN(plls->wrpll1_refcount < 0, "Invalid WRPLL1 refcount\n");
+	WARN(plls->wrpll2_refcount < 0, "Invalid WRPLL2 refcount\n");
+
+	intel_crtc->ddi_pll_sel = PORT_CLK_SEL_NONE;
+}
+
+static void intel_ddi_calculate_wrpll(int clock, int *p, int *n2, int *r2)
+{
+	u32 i;
 
 	for (i = 0; i < ARRAY_SIZE(wrpll_tmds_clock_table); i++)
-		if (crtc->mode.clock <= wrpll_tmds_clock_table[i].clock)
+		if (clock <= wrpll_tmds_clock_table[i].clock)
 			break;
 
 	if (i == ARRAY_SIZE(wrpll_tmds_clock_table))
 		i--;
 
-	p = wrpll_tmds_clock_table[i].p;
-	n2 = wrpll_tmds_clock_table[i].n2;
-	r2 = wrpll_tmds_clock_table[i].r2;
+	*p = wrpll_tmds_clock_table[i].p;
+	*n2 = wrpll_tmds_clock_table[i].n2;
+	*r2 = wrpll_tmds_clock_table[i].r2;
 
-	if (wrpll_tmds_clock_table[i].clock != crtc->mode.clock)
-		DRM_INFO("WR PLL: using settings for %dKHz on %dKHz mode\n",
-			 wrpll_tmds_clock_table[i].clock, crtc->mode.clock);
+	if (wrpll_tmds_clock_table[i].clock != clock)
+		DRM_INFO("WRPLL: using settings for %dKHz on %dKHz mode\n",
+			 wrpll_tmds_clock_table[i].clock, clock);
 
-	DRM_DEBUG_KMS("WR PLL: %dKHz refresh rate with p=%d, n2=%d r2=%d\n",
-		      crtc->mode.clock, p, n2, r2);
+	DRM_DEBUG_KMS("WRPLL: %dKHz refresh rate with p=%d, n2=%d r2=%d\n",
+		      clock, *p, *n2, *r2);
+}
 
-	/* Enable LCPLL if disabled */
-	temp = I915_READ(LCPLL_CTL);
-	if (temp & LCPLL_PLL_DISABLE)
-		I915_WRITE(LCPLL_CTL,
-				temp & ~LCPLL_PLL_DISABLE);
+bool intel_ddi_pll_mode_set(struct drm_crtc *crtc, int clock)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
+	struct drm_encoder *encoder = &intel_encoder->base;
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
+	int type = intel_encoder->type;
+	enum pipe pipe = intel_crtc->pipe;
+	uint32_t reg, val;
 
-	/* Configure WR PLL 1, program the correct divider values for
-	 * the desired frequency and wait for warmup */
-	I915_WRITE(WRPLL_CTL1,
-			WRPLL_PLL_ENABLE |
-			WRPLL_PLL_SELECT_LCPLL_2700 |
-			WRPLL_DIVIDER_REFERENCE(r2) |
-			WRPLL_DIVIDER_FEEDBACK(n2) |
-			WRPLL_DIVIDER_POST(p));
+	/* TODO: reuse PLLs when possible (compare values) */
 
-	udelay(20);
+	intel_ddi_put_crtc_pll(crtc);
 
-	/* Use WRPLL1 clock to drive the output to the port, and tell the pipe to use
-	 * this port for connection.
-	 */
-	I915_WRITE(PORT_CLK_SEL(port),
-			PORT_CLK_SEL_WRPLL1);
-	I915_WRITE(PIPE_CLK_SEL(pipe),
-			PIPE_CLK_SEL_PORT(port));
+	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
-	udelay(20);
+		switch (intel_dp->link_bw) {
+		case DP_LINK_BW_1_62:
+			intel_crtc->ddi_pll_sel = PORT_CLK_SEL_LCPLL_810;
+			break;
+		case DP_LINK_BW_2_7:
+			intel_crtc->ddi_pll_sel = PORT_CLK_SEL_LCPLL_1350;
+			break;
+		case DP_LINK_BW_5_4:
+			intel_crtc->ddi_pll_sel = PORT_CLK_SEL_LCPLL_2700;
+			break;
+		default:
+			DRM_ERROR("Link bandwidth %d unsupported\n",
+				  intel_dp->link_bw);
+			return false;
+		}
 
-	if (intel_hdmi->has_audio) {
-		/* Proper support for digital audio needs a new logic and a new set
-		 * of registers, so we leave it for future patch bombing.
-		 */
-		DRM_DEBUG_DRIVER("HDMI audio on pipe %c on DDI\n",
-				 pipe_name(intel_crtc->pipe));
+		/* We don't need to turn any PLL on because we'll use LCPLL. */
+		return true;
 
-		/* write eld */
-		DRM_DEBUG_DRIVER("HDMI audio: write eld information\n");
-		intel_write_eld(encoder, adjusted_mode);
+	} else if (type == INTEL_OUTPUT_HDMI) {
+		int p, n2, r2;
+
+		if (plls->wrpll1_refcount == 0) {
+			DRM_DEBUG_KMS("Using WRPLL 1 on pipe %c\n",
+				      pipe_name(pipe));
+			plls->wrpll1_refcount++;
+			reg = WRPLL_CTL1;
+			intel_crtc->ddi_pll_sel = PORT_CLK_SEL_WRPLL1;
+		} else if (plls->wrpll2_refcount == 0) {
+			DRM_DEBUG_KMS("Using WRPLL 2 on pipe %c\n",
+				      pipe_name(pipe));
+			plls->wrpll2_refcount++;
+			reg = WRPLL_CTL2;
+			intel_crtc->ddi_pll_sel = PORT_CLK_SEL_WRPLL2;
+		} else {
+			DRM_ERROR("No WRPLLs available!\n");
+			return false;
+		}
+
+		WARN(I915_READ(reg) & WRPLL_PLL_ENABLE,
+		     "WRPLL already enabled\n");
+
+		intel_ddi_calculate_wrpll(clock, &p, &n2, &r2);
+
+		val = WRPLL_PLL_ENABLE | WRPLL_PLL_SELECT_LCPLL_2700 |
+		      WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) |
+		      WRPLL_DIVIDER_POST(p);
+
+	} else if (type == INTEL_OUTPUT_ANALOG) {
+		if (plls->spll_refcount == 0) {
+			DRM_DEBUG_KMS("Using SPLL on pipe %c\n",
+				      pipe_name(pipe));
+			plls->spll_refcount++;
+			reg = SPLL_CTL;
+			intel_crtc->ddi_pll_sel = PORT_CLK_SEL_SPLL;
+		}
+
+		WARN(I915_READ(reg) & SPLL_PLL_ENABLE,
+		     "SPLL already enabled\n");
+
+		val = SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC;
+
+	} else {
+		WARN(1, "Invalid DDI encoder type %d\n", type);
+		return false;
 	}
 
-	/* Enable PIPE_DDI_FUNC_CTL for the pipe to work in HDMI mode */
-	temp = PIPE_DDI_FUNC_ENABLE | PIPE_DDI_SELECT_PORT(port);
+	I915_WRITE(reg, val);
+	udelay(20);
+
+	return true;
+}
+
+void intel_ddi_set_pipe_settings(struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	int type = intel_encoder->type;
+	uint32_t temp;
+
+	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
+
+		temp = TRANS_MSA_SYNC_CLK;
+		switch (intel_crtc->bpp) {
+		case 18:
+			temp |= TRANS_MSA_6_BPC;
+			break;
+		case 24:
+			temp |= TRANS_MSA_8_BPC;
+			break;
+		case 30:
+			temp |= TRANS_MSA_10_BPC;
+			break;
+		case 36:
+			temp |= TRANS_MSA_12_BPC;
+			break;
+		default:
+			temp |= TRANS_MSA_8_BPC;
+			WARN(1, "%d bpp unsupported by DDI function\n",
+			     intel_crtc->bpp);
+		}
+		I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp);
+	}
+}
+
+void intel_ddi_enable_pipe_func(struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
+	struct drm_encoder *encoder = &intel_encoder->base;
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	enum pipe pipe = intel_crtc->pipe;
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum port port = intel_ddi_get_encoder_port(intel_encoder);
+	int type = intel_encoder->type;
+	uint32_t temp;
+
+	/* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */
+	temp = TRANS_DDI_FUNC_ENABLE;
+	temp |= TRANS_DDI_SELECT_PORT(port);
 
 	switch (intel_crtc->bpp) {
 	case 18:
-		temp |= PIPE_DDI_BPC_6;
+		temp |= TRANS_DDI_BPC_6;
 		break;
 	case 24:
-		temp |= PIPE_DDI_BPC_8;
+		temp |= TRANS_DDI_BPC_8;
 		break;
 	case 30:
-		temp |= PIPE_DDI_BPC_10;
+		temp |= TRANS_DDI_BPC_10;
 		break;
 	case 36:
-		temp |= PIPE_DDI_BPC_12;
+		temp |= TRANS_DDI_BPC_12;
 		break;
 	default:
-		WARN(1, "%d bpp unsupported by pipe DDI function\n",
+		WARN(1, "%d bpp unsupported by transcoder DDI function\n",
 		     intel_crtc->bpp);
 	}
 
-	if (intel_hdmi->has_hdmi_sink)
-		temp |= PIPE_DDI_MODE_SELECT_HDMI;
+	if (crtc->mode.flags & DRM_MODE_FLAG_PVSYNC)
+		temp |= TRANS_DDI_PVSYNC;
+	if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC)
+		temp |= TRANS_DDI_PHSYNC;
+
+	if (cpu_transcoder == TRANSCODER_EDP) {
+		switch (pipe) {
+		case PIPE_A:
+			temp |= TRANS_DDI_EDP_INPUT_A_ONOFF;
+			break;
+		case PIPE_B:
+			temp |= TRANS_DDI_EDP_INPUT_B_ONOFF;
+			break;
+		case PIPE_C:
+			temp |= TRANS_DDI_EDP_INPUT_C_ONOFF;
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+
+	if (type == INTEL_OUTPUT_HDMI) {
+		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+
+		if (intel_hdmi->has_hdmi_sink)
+			temp |= TRANS_DDI_MODE_SELECT_HDMI;
+		else
+			temp |= TRANS_DDI_MODE_SELECT_DVI;
+
+	} else if (type == INTEL_OUTPUT_ANALOG) {
+		temp |= TRANS_DDI_MODE_SELECT_FDI;
+		temp |= (intel_crtc->fdi_lanes - 1) << 1;
+
+	} else if (type == INTEL_OUTPUT_DISPLAYPORT ||
+		   type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		temp |= TRANS_DDI_MODE_SELECT_DP_SST;
+
+		switch (intel_dp->lane_count) {
+		case 1:
+			temp |= TRANS_DDI_PORT_WIDTH_X1;
+			break;
+		case 2:
+			temp |= TRANS_DDI_PORT_WIDTH_X2;
+			break;
+		case 4:
+			temp |= TRANS_DDI_PORT_WIDTH_X4;
+			break;
+		default:
+			temp |= TRANS_DDI_PORT_WIDTH_X4;
+			WARN(1, "Unsupported lane count %d\n",
+			     intel_dp->lane_count);
+		}
+
+	} else {
+		WARN(1, "Invalid encoder type %d for pipe %d\n",
+		     intel_encoder->type, pipe);
+	}
+
+	I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp);
+}
+
+void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
+				       enum transcoder cpu_transcoder)
+{
+	uint32_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
+	uint32_t val = I915_READ(reg);
+
+	val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK);
+	val |= TRANS_DDI_PORT_NONE;
+	I915_WRITE(reg, val);
+}
+
+bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
+{
+	struct drm_device *dev = intel_connector->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_encoder *intel_encoder = intel_connector->encoder;
+	int type = intel_connector->base.connector_type;
+	enum port port = intel_ddi_get_encoder_port(intel_encoder);
+	enum pipe pipe = 0;
+	enum transcoder cpu_transcoder;
+	uint32_t tmp;
+
+	if (!intel_encoder->get_hw_state(intel_encoder, &pipe))
+		return false;
+
+	if (port == PORT_A)
+		cpu_transcoder = TRANSCODER_EDP;
 	else
-		temp |= PIPE_DDI_MODE_SELECT_DVI;
+		cpu_transcoder = pipe;
 
-	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
-		temp |= PIPE_DDI_PVSYNC;
-	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
-		temp |= PIPE_DDI_PHSYNC;
+	tmp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
 
-	I915_WRITE(DDI_FUNC_CTL(pipe), temp);
+	switch (tmp & TRANS_DDI_MODE_SELECT_MASK) {
+	case TRANS_DDI_MODE_SELECT_HDMI:
+	case TRANS_DDI_MODE_SELECT_DVI:
+		return (type == DRM_MODE_CONNECTOR_HDMIA);
 
-	intel_hdmi->set_infoframes(encoder, adjusted_mode);
+	case TRANS_DDI_MODE_SELECT_DP_SST:
+		if (type == DRM_MODE_CONNECTOR_eDP)
+			return true;
+	case TRANS_DDI_MODE_SELECT_DP_MST:
+		return (type == DRM_MODE_CONNECTOR_DisplayPort);
+
+	case TRANS_DDI_MODE_SELECT_FDI:
+		return (type == DRM_MODE_CONNECTOR_VGA);
+
+	default:
+		return false;
+	}
 }
 
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
@@ -762,58 +1097,418 @@
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	enum port port = intel_ddi_get_encoder_port(encoder);
 	u32 tmp;
 	int i;
 
-	tmp = I915_READ(DDI_BUF_CTL(intel_hdmi->ddi_port));
+	tmp = I915_READ(DDI_BUF_CTL(port));
 
 	if (!(tmp & DDI_BUF_CTL_ENABLE))
 		return false;
 
-	for_each_pipe(i) {
-		tmp = I915_READ(DDI_FUNC_CTL(i));
+	if (port == PORT_A) {
+		tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
 
-		if ((tmp & PIPE_DDI_PORT_MASK)
-		    == PIPE_DDI_SELECT_PORT(intel_hdmi->ddi_port)) {
-			*pipe = i;
-			return true;
+		switch (tmp & TRANS_DDI_EDP_INPUT_MASK) {
+		case TRANS_DDI_EDP_INPUT_A_ON:
+		case TRANS_DDI_EDP_INPUT_A_ONOFF:
+			*pipe = PIPE_A;
+			break;
+		case TRANS_DDI_EDP_INPUT_B_ONOFF:
+			*pipe = PIPE_B;
+			break;
+		case TRANS_DDI_EDP_INPUT_C_ONOFF:
+			*pipe = PIPE_C;
+			break;
+		}
+
+		return true;
+	} else {
+		for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
+			tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+
+			if ((tmp & TRANS_DDI_PORT_MASK)
+			    == TRANS_DDI_SELECT_PORT(port)) {
+				*pipe = i;
+				return true;
+			}
 		}
 	}
 
-	DRM_DEBUG_KMS("No pipe for ddi port %i found\n", intel_hdmi->ddi_port);
+	DRM_DEBUG_KMS("No pipe for ddi port %i found\n", port);
 
 	return true;
 }
 
-void intel_enable_ddi(struct intel_encoder *encoder)
+static uint32_t intel_ddi_get_crtc_pll(struct drm_i915_private *dev_priv,
+				       enum pipe pipe)
 {
-	struct drm_device *dev = encoder->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
-	int port = intel_hdmi->ddi_port;
-	u32 temp;
+	uint32_t temp, ret;
+	enum port port;
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
+	int i;
 
-	temp = I915_READ(DDI_BUF_CTL(port));
-	temp |= DDI_BUF_CTL_ENABLE;
+	if (cpu_transcoder == TRANSCODER_EDP) {
+		port = PORT_A;
+	} else {
+		temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
+		temp &= TRANS_DDI_PORT_MASK;
 
-	/* Enable DDI_BUF_CTL. In HDMI/DVI mode, the port width,
-	 * and swing/emphasis values are ignored so nothing special needs
-	 * to be done besides enabling the port.
-	 */
-	I915_WRITE(DDI_BUF_CTL(port), temp);
+		for (i = PORT_B; i <= PORT_E; i++)
+			if (temp == TRANS_DDI_SELECT_PORT(i))
+				port = i;
+	}
+
+	ret = I915_READ(PORT_CLK_SEL(port));
+
+	DRM_DEBUG_KMS("Pipe %c connected to port %c using clock 0x%08x\n",
+		      pipe_name(pipe), port_name(port), ret);
+
+	return ret;
 }
 
-void intel_disable_ddi(struct intel_encoder *encoder)
+void intel_ddi_setup_hw_pll_state(struct drm_device *dev)
 {
-	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
-	int port = intel_hdmi->ddi_port;
-	u32 temp;
+	enum pipe pipe;
+	struct intel_crtc *intel_crtc;
 
-	temp = I915_READ(DDI_BUF_CTL(port));
-	temp &= ~DDI_BUF_CTL_ENABLE;
+	for_each_pipe(pipe) {
+		intel_crtc =
+			to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
 
-	I915_WRITE(DDI_BUF_CTL(port), temp);
+		if (!intel_crtc->active)
+			continue;
+
+		intel_crtc->ddi_pll_sel = intel_ddi_get_crtc_pll(dev_priv,
+								 pipe);
+
+		switch (intel_crtc->ddi_pll_sel) {
+		case PORT_CLK_SEL_SPLL:
+			dev_priv->ddi_plls.spll_refcount++;
+			break;
+		case PORT_CLK_SEL_WRPLL1:
+			dev_priv->ddi_plls.wrpll1_refcount++;
+			break;
+		case PORT_CLK_SEL_WRPLL2:
+			dev_priv->ddi_plls.wrpll2_refcount++;
+			break;
+		}
+	}
+}
+
+void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc)
+{
+	struct drm_crtc *crtc = &intel_crtc->base;
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
+	enum port port = intel_ddi_get_encoder_port(intel_encoder);
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+
+	if (cpu_transcoder != TRANSCODER_EDP)
+		I915_WRITE(TRANS_CLK_SEL(cpu_transcoder),
+			   TRANS_CLK_SEL_PORT(port));
+}
+
+void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
+{
+	struct drm_i915_private *dev_priv = intel_crtc->base.dev->dev_private;
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+
+	if (cpu_transcoder != TRANSCODER_EDP)
+		I915_WRITE(TRANS_CLK_SEL(cpu_transcoder),
+			   TRANS_CLK_SEL_DISABLED);
+}
+
+static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
+{
+	struct drm_encoder *encoder = &intel_encoder->base;
+	struct drm_crtc *crtc = encoder->crtc;
+	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	enum port port = intel_ddi_get_encoder_port(intel_encoder);
+	int type = intel_encoder->type;
+
+	if (type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+		ironlake_edp_panel_vdd_on(intel_dp);
+		ironlake_edp_panel_on(intel_dp);
+		ironlake_edp_panel_vdd_off(intel_dp, true);
+	}
+
+	WARN_ON(intel_crtc->ddi_pll_sel == PORT_CLK_SEL_NONE);
+	I915_WRITE(PORT_CLK_SEL(port), intel_crtc->ddi_pll_sel);
+
+	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+		intel_dp_start_link_train(intel_dp);
+		intel_dp_complete_link_train(intel_dp);
+	}
+}
+
+static void intel_ddi_post_disable(struct intel_encoder *intel_encoder)
+{
+	struct drm_encoder *encoder = &intel_encoder->base;
+	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	enum port port = intel_ddi_get_encoder_port(intel_encoder);
+	int type = intel_encoder->type;
+	uint32_t val;
+	bool wait = false;
+
+	val = I915_READ(DDI_BUF_CTL(port));
+	if (val & DDI_BUF_CTL_ENABLE) {
+		val &= ~DDI_BUF_CTL_ENABLE;
+		I915_WRITE(DDI_BUF_CTL(port), val);
+		wait = true;
+	}
+
+	val = I915_READ(DP_TP_CTL(port));
+	val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
+	val |= DP_TP_CTL_LINK_TRAIN_PAT1;
+	I915_WRITE(DP_TP_CTL(port), val);
+
+	if (wait)
+		intel_wait_ddi_buf_idle(dev_priv, port);
+
+	if (type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+		ironlake_edp_panel_vdd_on(intel_dp);
+		ironlake_edp_panel_off(intel_dp);
+	}
+
+	I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE);
+}
+
+static void intel_enable_ddi(struct intel_encoder *intel_encoder)
+{
+	struct drm_encoder *encoder = &intel_encoder->base;
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum port port = intel_ddi_get_encoder_port(intel_encoder);
+	int type = intel_encoder->type;
+
+	if (type == INTEL_OUTPUT_HDMI) {
+		/* In HDMI/DVI mode, the port width, and swing/emphasis values
+		 * are ignored so nothing special needs to be done besides
+		 * enabling the port.
+		 */
+		I915_WRITE(DDI_BUF_CTL(port), DDI_BUF_CTL_ENABLE);
+	} else if (type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		ironlake_edp_backlight_on(intel_dp);
+	}
+}
+
+static void intel_disable_ddi(struct intel_encoder *intel_encoder)
+{
+	struct drm_encoder *encoder = &intel_encoder->base;
+	int type = intel_encoder->type;
+
+	if (type == INTEL_OUTPUT_EDP) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		ironlake_edp_backlight_off(intel_dp);
+	}
+}
+
+int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv)
+{
+	if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT)
+		return 450;
+	else if ((I915_READ(LCPLL_CTL) & LCPLL_CLK_FREQ_MASK) ==
+		 LCPLL_CLK_FREQ_450)
+		return 450;
+	else if (IS_ULT(dev_priv->dev))
+		return 338;
+	else
+		return 540;
+}
+
+void intel_ddi_pll_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t val = I915_READ(LCPLL_CTL);
+
+	/* The LCPLL register should be turned on by the BIOS. For now let's
+	 * just check its state and print errors in case something is wrong.
+	 * Don't even try to turn it on.
+	 */
+
+	DRM_DEBUG_KMS("CDCLK running at %dMHz\n",
+		      intel_ddi_get_cdclk_freq(dev_priv));
+
+	if (val & LCPLL_CD_SOURCE_FCLK)
+		DRM_ERROR("CDCLK source is not LCPLL\n");
+
+	if (val & LCPLL_PLL_DISABLE)
+		DRM_ERROR("LCPLL is disabled\n");
+}
+
+void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder)
+{
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct intel_dp *intel_dp = &intel_dig_port->dp;
+	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	enum port port = intel_dig_port->port;
+	bool wait;
+	uint32_t val;
+
+	if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) {
+		val = I915_READ(DDI_BUF_CTL(port));
+		if (val & DDI_BUF_CTL_ENABLE) {
+			val &= ~DDI_BUF_CTL_ENABLE;
+			I915_WRITE(DDI_BUF_CTL(port), val);
+			wait = true;
+		}
+
+		val = I915_READ(DP_TP_CTL(port));
+		val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK);
+		val |= DP_TP_CTL_LINK_TRAIN_PAT1;
+		I915_WRITE(DP_TP_CTL(port), val);
+		POSTING_READ(DP_TP_CTL(port));
+
+		if (wait)
+			intel_wait_ddi_buf_idle(dev_priv, port);
+	}
+
+	val = DP_TP_CTL_ENABLE | DP_TP_CTL_MODE_SST |
+	      DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE;
+	if (intel_dp->link_configuration[1] & DP_LANE_COUNT_ENHANCED_FRAME_EN)
+		val |= DP_TP_CTL_ENHANCED_FRAME_ENABLE;
+	I915_WRITE(DP_TP_CTL(port), val);
+	POSTING_READ(DP_TP_CTL(port));
+
+	intel_dp->DP |= DDI_BUF_CTL_ENABLE;
+	I915_WRITE(DDI_BUF_CTL(port), intel_dp->DP);
+	POSTING_READ(DDI_BUF_CTL(port));
+
+	udelay(600);
+}
+
+void intel_ddi_fdi_disable(struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
+	uint32_t val;
+
+	intel_ddi_post_disable(intel_encoder);
+
+	val = I915_READ(_FDI_RXA_CTL);
+	val &= ~FDI_RX_ENABLE;
+	I915_WRITE(_FDI_RXA_CTL, val);
+
+	val = I915_READ(_FDI_RXA_MISC);
+	val &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK);
+	val |= FDI_RX_PWRDN_LANE1_VAL(2) | FDI_RX_PWRDN_LANE0_VAL(2);
+	I915_WRITE(_FDI_RXA_MISC, val);
+
+	val = I915_READ(_FDI_RXA_CTL);
+	val &= ~FDI_PCDCLK;
+	I915_WRITE(_FDI_RXA_CTL, val);
+
+	val = I915_READ(_FDI_RXA_CTL);
+	val &= ~FDI_RX_PLL_ENABLE;
+	I915_WRITE(_FDI_RXA_CTL, val);
+}
+
+static void intel_ddi_hot_plug(struct intel_encoder *intel_encoder)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	int type = intel_encoder->type;
+
+	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP)
+		intel_dp_check_link_status(intel_dp);
+}
+
+static void intel_ddi_destroy(struct drm_encoder *encoder)
+{
+	/* HDMI has nothing special to destroy, so we can go with this. */
+	intel_dp_encoder_destroy(encoder);
+}
+
+static bool intel_ddi_mode_fixup(struct drm_encoder *encoder,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
+{
+	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+	int type = intel_encoder->type;
+
+	WARN(type == INTEL_OUTPUT_UNKNOWN, "mode_fixup() on unknown output!\n");
+
+	if (type == INTEL_OUTPUT_HDMI)
+		return intel_hdmi_mode_fixup(encoder, mode, adjusted_mode);
+	else
+		return intel_dp_mode_fixup(encoder, mode, adjusted_mode);
+}
+
+static const struct drm_encoder_funcs intel_ddi_funcs = {
+	.destroy = intel_ddi_destroy,
+};
+
+static const struct drm_encoder_helper_funcs intel_ddi_helper_funcs = {
+	.mode_fixup = intel_ddi_mode_fixup,
+	.mode_set = intel_ddi_mode_set,
+	.disable = intel_encoder_noop,
+};
+
+void intel_ddi_init(struct drm_device *dev, enum port port)
+{
+	struct intel_digital_port *intel_dig_port;
+	struct intel_encoder *intel_encoder;
+	struct drm_encoder *encoder;
+	struct intel_connector *hdmi_connector = NULL;
+	struct intel_connector *dp_connector = NULL;
+
+	intel_dig_port = kzalloc(sizeof(struct intel_digital_port), GFP_KERNEL);
+	if (!intel_dig_port)
+		return;
+
+	dp_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
+	if (!dp_connector) {
+		kfree(intel_dig_port);
+		return;
+	}
+
+	if (port != PORT_A) {
+		hdmi_connector = kzalloc(sizeof(struct intel_connector),
+					 GFP_KERNEL);
+		if (!hdmi_connector) {
+			kfree(dp_connector);
+			kfree(intel_dig_port);
+			return;
+		}
+	}
+
+	intel_encoder = &intel_dig_port->base;
+	encoder = &intel_encoder->base;
+
+	drm_encoder_init(dev, encoder, &intel_ddi_funcs,
+			 DRM_MODE_ENCODER_TMDS);
+	drm_encoder_helper_add(encoder, &intel_ddi_helper_funcs);
+
+	intel_encoder->enable = intel_enable_ddi;
+	intel_encoder->pre_enable = intel_ddi_pre_enable;
+	intel_encoder->disable = intel_disable_ddi;
+	intel_encoder->post_disable = intel_ddi_post_disable;
+	intel_encoder->get_hw_state = intel_ddi_get_hw_state;
+
+	intel_dig_port->port = port;
+	if (hdmi_connector)
+		intel_dig_port->hdmi.sdvox_reg = DDI_BUF_CTL(port);
+	else
+		intel_dig_port->hdmi.sdvox_reg = 0;
+	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
+
+	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
+	intel_encoder->crtc_mask =  (1 << 0) | (1 << 1) | (1 << 2);
+	intel_encoder->cloneable = false;
+	intel_encoder->hot_plug = intel_ddi_hot_plug;
+
+	if (hdmi_connector)
+		intel_hdmi_init_connector(intel_dig_port, hdmi_connector);
+	intel_dp_init_connector(intel_dig_port, dp_connector);
 }

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b426d44..5d127e0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c

@@ -41,8 +41,6 @@
 #include <drm/drm_crtc_helper.h>
 #include <linux/dma_remapping.h>
 
-#define HAS_eDP (intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))
-
 bool intel_pipe_has_type(struct drm_crtc *crtc, int type);
 static void intel_increase_pllclock(struct drm_crtc *crtc);
 static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
@@ -80,6 +78,16 @@
 /* FDI */
 #define IRONLAKE_FDI_FREQ		2700000 /* in kHz for mode->clock */
 
+int
+intel_pch_rawclk(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	WARN_ON(!HAS_PCH_SPLIT(dev));
+
+	return I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK;
+}
+
 static bool
 intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
 		    int target, int refclk, intel_clock_t *match_clock,
@@ -380,7 +388,7 @@
 
 static const intel_limit_t intel_limits_vlv_hdmi = {
 	.dot = { .min = 20000, .max = 165000 },
-	.vco = { .min = 5994000, .max = 4000000 },
+	.vco = { .min = 4000000, .max = 5994000},
 	.n = { .min = 1, .max = 7 },
 	.m = { .min = 60, .max = 300 }, /* guess */
 	.m1 = { .min = 2, .max = 3 },
@@ -393,10 +401,10 @@
 };
 
 static const intel_limit_t intel_limits_vlv_dp = {
-	.dot = { .min = 162000, .max = 270000 },
-	.vco = { .min = 5994000, .max = 4000000 },
+	.dot = { .min = 25000, .max = 270000 },
+	.vco = { .min = 4000000, .max = 6000000 },
 	.n = { .min = 1, .max = 7 },
-	.m = { .min = 60, .max = 300 }, /* guess */
+	.m = { .min = 22, .max = 450 },
 	.m1 = { .min = 2, .max = 3 },
 	.m2 = { .min = 11, .max = 156 },
 	.p = { .min = 10, .max = 30 },
@@ -531,7 +539,7 @@
 				limit = &intel_limits_ironlake_single_lvds;
 		}
 	} else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) ||
-			HAS_eDP)
+		   intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))
 		limit = &intel_limits_ironlake_display_port;
 	else
 		limit = &intel_limits_ironlake_dac;
@@ -927,6 +935,15 @@
 	return true;
 }
 
+enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv,
+					     enum pipe pipe)
+{
+	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	return intel_crtc->cpu_transcoder;
+}
+
 static void ironlake_wait_for_vblank(struct drm_device *dev, int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -999,9 +1016,11 @@
 void intel_wait_for_pipe_off(struct drm_device *dev, int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
 
 	if (INTEL_INFO(dev)->gen >= 4) {
-		int reg = PIPECONF(pipe);
+		int reg = PIPECONF(cpu_transcoder);
 
 		/* Wait for the Pipe State to go off */
 		if (wait_for((I915_READ(reg) & I965_PIPECONF_ACTIVE) == 0,
@@ -1103,12 +1122,14 @@
 	int reg;
 	u32 val;
 	bool cur_state;
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
 
 	if (IS_HASWELL(dev_priv->dev)) {
 		/* On Haswell, DDI is used instead of FDI_TX_CTL */
-		reg = DDI_FUNC_CTL(pipe);
+		reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
 		val = I915_READ(reg);
-		cur_state = !!(val & PIPE_DDI_FUNC_ENABLE);
+		cur_state = !!(val & TRANS_DDI_FUNC_ENABLE);
 	} else {
 		reg = FDI_TX_CTL(pipe);
 		val = I915_READ(reg);
@@ -1128,14 +1149,9 @@
 	u32 val;
 	bool cur_state;
 
-	if (IS_HASWELL(dev_priv->dev) && pipe > 0) {
-			DRM_ERROR("Attempting to enable FDI_RX on Haswell pipe > 0\n");
-			return;
-	} else {
-		reg = FDI_RX_CTL(pipe);
-		val = I915_READ(reg);
-		cur_state = !!(val & FDI_RX_ENABLE);
-	}
+	reg = FDI_RX_CTL(pipe);
+	val = I915_READ(reg);
+	cur_state = !!(val & FDI_RX_ENABLE);
 	WARN(cur_state != state,
 	     "FDI RX state assertion failure (expected %s, current %s)\n",
 	     state_string(state), state_string(cur_state));
@@ -1168,10 +1184,6 @@
 	int reg;
 	u32 val;
 
-	if (IS_HASWELL(dev_priv->dev) && pipe > 0) {
-		DRM_ERROR("Attempting to enable FDI on Haswell with pipe > 0\n");
-		return;
-	}
 	reg = FDI_RX_CTL(pipe);
 	val = I915_READ(reg);
 	WARN(!(val & FDI_RX_PLL_ENABLE), "FDI RX PLL assertion failure, should be active but is disabled\n");
@@ -1212,12 +1224,14 @@
 	int reg;
 	u32 val;
 	bool cur_state;
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
 
 	/* if we need the pipe A quirk it must be always on */
 	if (pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE)
 		state = true;
 
-	reg = PIPECONF(pipe);
+	reg = PIPECONF(cpu_transcoder);
 	val = I915_READ(reg);
 	cur_state = !!(val & PIPECONF_ENABLE);
 	WARN(cur_state != state,
@@ -1492,24 +1506,26 @@
 
 /* SBI access */
 static void
-intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value)
+intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
+		enum intel_sbi_destination destination)
 {
 	unsigned long flags;
+	u32 tmp;
 
 	spin_lock_irqsave(&dev_priv->dpio_lock, flags);
-	if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0,
-				100)) {
+	if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0, 100)) {
 		DRM_ERROR("timeout waiting for SBI to become ready\n");
 		goto out_unlock;
 	}
 
-	I915_WRITE(SBI_ADDR,
-			(reg << 16));
-	I915_WRITE(SBI_DATA,
-			value);
-	I915_WRITE(SBI_CTL_STAT,
-			SBI_BUSY |
-			SBI_CTL_OP_CRWR);
+	I915_WRITE(SBI_ADDR, (reg << 16));
+	I915_WRITE(SBI_DATA, value);
+
+	if (destination == SBI_ICLK)
+		tmp = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRWR;
+	else
+		tmp = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IOWR;
+	I915_WRITE(SBI_CTL_STAT, SBI_BUSY | tmp);
 
 	if (wait_for((I915_READ(SBI_CTL_STAT) & (SBI_BUSY | SBI_RESPONSE_FAIL)) == 0,
 				100)) {
@@ -1522,23 +1538,25 @@
 }
 
 static u32
-intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg)
+intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
+	       enum intel_sbi_destination destination)
 {
 	unsigned long flags;
 	u32 value = 0;
 
 	spin_lock_irqsave(&dev_priv->dpio_lock, flags);
-	if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0,
-				100)) {
+	if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0, 100)) {
 		DRM_ERROR("timeout waiting for SBI to become ready\n");
 		goto out_unlock;
 	}
 
-	I915_WRITE(SBI_ADDR,
-			(reg << 16));
-	I915_WRITE(SBI_CTL_STAT,
-			SBI_BUSY |
-			SBI_CTL_OP_CRRD);
+	I915_WRITE(SBI_ADDR, (reg << 16));
+
+	if (destination == SBI_ICLK)
+		value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD;
+	else
+		value = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD;
+	I915_WRITE(SBI_CTL_STAT, value | SBI_BUSY);
 
 	if (wait_for((I915_READ(SBI_CTL_STAT) & (SBI_BUSY | SBI_RESPONSE_FAIL)) == 0,
 				100)) {
@@ -1554,14 +1572,14 @@
 }
 
 /**
- * intel_enable_pch_pll - enable PCH PLL
+ * ironlake_enable_pch_pll - enable PCH PLL
  * @dev_priv: i915 private structure
  * @pipe: pipe PLL to enable
  *
  * The PCH PLL needs to be enabled before the PCH transcoder, since it
  * drives the transcoder clock.
  */
-static void intel_enable_pch_pll(struct intel_crtc *intel_crtc)
+static void ironlake_enable_pch_pll(struct intel_crtc *intel_crtc)
 {
 	struct drm_i915_private *dev_priv = intel_crtc->base.dev->dev_private;
 	struct intel_pch_pll *pll;
@@ -1645,12 +1663,12 @@
 	pll->on = false;
 }
 
-static void intel_enable_transcoder(struct drm_i915_private *dev_priv,
-				    enum pipe pipe)
+static void ironlake_enable_pch_transcoder(struct drm_i915_private *dev_priv,
+					   enum pipe pipe)
 {
-	int reg;
-	u32 val, pipeconf_val;
+	struct drm_device *dev = dev_priv->dev;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+	uint32_t reg, val, pipeconf_val;
 
 	/* PCH only available on ILK+ */
 	BUG_ON(dev_priv->info->gen < 5);
@@ -1664,10 +1682,15 @@
 	assert_fdi_tx_enabled(dev_priv, pipe);
 	assert_fdi_rx_enabled(dev_priv, pipe);
 
-	if (IS_HASWELL(dev_priv->dev) && pipe > 0) {
-		DRM_ERROR("Attempting to enable transcoder on Haswell with pipe > 0\n");
-		return;
+	if (HAS_PCH_CPT(dev)) {
+		/* Workaround: Set the timing override bit before enabling the
+		 * pch transcoder. */
+		reg = TRANS_CHICKEN2(pipe);
+		val = I915_READ(reg);
+		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+		I915_WRITE(reg, val);
 	}
+
 	reg = TRANSCONF(pipe);
 	val = I915_READ(reg);
 	pipeconf_val = I915_READ(PIPECONF(pipe));
@@ -1696,11 +1719,42 @@
 		DRM_ERROR("failed to enable transcoder %d\n", pipe);
 }
 
-static void intel_disable_transcoder(struct drm_i915_private *dev_priv,
-				     enum pipe pipe)
+static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv,
+				      enum transcoder cpu_transcoder)
 {
-	int reg;
-	u32 val;
+	u32 val, pipeconf_val;
+
+	/* PCH only available on ILK+ */
+	BUG_ON(dev_priv->info->gen < 5);
+
+	/* FDI must be feeding us bits for PCH ports */
+	assert_fdi_tx_enabled(dev_priv, cpu_transcoder);
+	assert_fdi_rx_enabled(dev_priv, TRANSCODER_A);
+
+	/* Workaround: set timing override bit. */
+	val = I915_READ(_TRANSA_CHICKEN2);
+	val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+	I915_WRITE(_TRANSA_CHICKEN2, val);
+
+	val = TRANS_ENABLE;
+	pipeconf_val = I915_READ(PIPECONF(cpu_transcoder));
+
+	if ((pipeconf_val & PIPECONF_INTERLACE_MASK_HSW) ==
+	    PIPECONF_INTERLACED_ILK)
+		val |= TRANS_INTERLACED;
+	else
+		val |= TRANS_PROGRESSIVE;
+
+	I915_WRITE(TRANSCONF(TRANSCODER_A), val);
+	if (wait_for(I915_READ(_TRANSACONF) & TRANS_STATE_ENABLE, 100))
+		DRM_ERROR("Failed to enable PCH transcoder\n");
+}
+
+static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv,
+					    enum pipe pipe)
+{
+	struct drm_device *dev = dev_priv->dev;
+	uint32_t reg, val;
 
 	/* FDI relies on the transcoder */
 	assert_fdi_tx_disabled(dev_priv, pipe);
@@ -1716,6 +1770,31 @@
 	/* wait for PCH transcoder off, transcoder state */
 	if (wait_for((I915_READ(reg) & TRANS_STATE_ENABLE) == 0, 50))
 		DRM_ERROR("failed to disable transcoder %d\n", pipe);
+
+	if (!HAS_PCH_IBX(dev)) {
+		/* Workaround: Clear the timing override chicken bit again. */
+		reg = TRANS_CHICKEN2(pipe);
+		val = I915_READ(reg);
+		val &= ~TRANS_CHICKEN2_TIMING_OVERRIDE;
+		I915_WRITE(reg, val);
+	}
+}
+
+static void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	val = I915_READ(_TRANSACONF);
+	val &= ~TRANS_ENABLE;
+	I915_WRITE(_TRANSACONF, val);
+	/* wait for PCH transcoder off, transcoder state */
+	if (wait_for((I915_READ(_TRANSACONF) & TRANS_STATE_ENABLE) == 0, 50))
+		DRM_ERROR("Failed to disable PCH transcoder\n");
+
+	/* Workaround: clear timing override bit. */
+	val = I915_READ(_TRANSA_CHICKEN2);
+	val &= ~TRANS_CHICKEN2_TIMING_OVERRIDE;
+	I915_WRITE(_TRANSA_CHICKEN2, val);
 }
 
 /**
@@ -1735,9 +1814,17 @@
 static void intel_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe,
 			      bool pch_port)
 {
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
+	enum transcoder pch_transcoder;
 	int reg;
 	u32 val;
 
+	if (IS_HASWELL(dev_priv->dev))
+		pch_transcoder = TRANSCODER_A;
+	else
+		pch_transcoder = pipe;
+
 	/*
 	 * A pipe without a PLL won't actually be able to drive bits from
 	 * a plane.  On ILK+ the pipe PLLs are integrated, so we don't
@@ -1748,13 +1835,13 @@
 	else {
 		if (pch_port) {
 			/* if driving the PCH, we need FDI enabled */
-			assert_fdi_rx_pll_enabled(dev_priv, pipe);
-			assert_fdi_tx_pll_enabled(dev_priv, pipe);
+			assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder);
+			assert_fdi_tx_pll_enabled(dev_priv, cpu_transcoder);
 		}
 		/* FIXME: assert CPU port conditions for SNB+ */
 	}
 
-	reg = PIPECONF(pipe);
+	reg = PIPECONF(cpu_transcoder);
 	val = I915_READ(reg);
 	if (val & PIPECONF_ENABLE)
 		return;
@@ -1778,6 +1865,8 @@
 static void intel_disable_pipe(struct drm_i915_private *dev_priv,
 			       enum pipe pipe)
 {
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
 	int reg;
 	u32 val;
 
@@ -1791,7 +1880,7 @@
 	if (pipe == PIPE_A && (dev_priv->quirks & QUIRK_PIPEA_FORCE))
 		return;
 
-	reg = PIPECONF(pipe);
+	reg = PIPECONF(cpu_transcoder);
 	val = I915_READ(reg);
 	if ((val & PIPECONF_ENABLE) == 0)
 		return;
@@ -1807,8 +1896,10 @@
 void intel_flush_display_plane(struct drm_i915_private *dev_priv,
 				      enum plane plane)
 {
-	I915_WRITE(DSPADDR(plane), I915_READ(DSPADDR(plane)));
-	I915_WRITE(DSPSURF(plane), I915_READ(DSPSURF(plane)));
+	if (dev_priv->info->gen >= 4)
+		I915_WRITE(DSPSURF(plane), I915_READ(DSPSURF(plane)));
+	else
+		I915_WRITE(DSPADDR(plane), I915_READ(DSPADDR(plane)));
 }
 
 /**
@@ -1926,9 +2017,9 @@
 
 /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel
  * is assumed to be a power-of-two. */
-static unsigned long gen4_compute_dspaddr_offset_xtiled(int *x, int *y,
-							unsigned int bpp,
-							unsigned int pitch)
+unsigned long intel_gen4_compute_offset_xtiled(int *x, int *y,
+					       unsigned int bpp,
+					       unsigned int pitch)
 {
 	int tile_rows, tiles;
 
@@ -1969,24 +2060,38 @@
 	dspcntr = I915_READ(reg);
 	/* Mask out pixel format bits in case we change it */
 	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
-	switch (fb->bits_per_pixel) {
-	case 8:
+	switch (fb->pixel_format) {
+	case DRM_FORMAT_C8:
 		dspcntr |= DISPPLANE_8BPP;
 		break;
-	case 16:
-		if (fb->depth == 15)
-			dspcntr |= DISPPLANE_15_16BPP;
-		else
-			dspcntr |= DISPPLANE_16BPP;
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_ARGB1555:
+		dspcntr |= DISPPLANE_BGRX555;
 		break;
-	case 24:
-	case 32:
-		dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
+	case DRM_FORMAT_RGB565:
+		dspcntr |= DISPPLANE_BGRX565;
+		break;
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+		dspcntr |= DISPPLANE_BGRX888;
+		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		dspcntr |= DISPPLANE_RGBX888;
+		break;
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010:
+		dspcntr |= DISPPLANE_BGRX101010;
+		break;
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ABGR2101010:
+		dspcntr |= DISPPLANE_RGBX101010;
 		break;
 	default:
-		DRM_ERROR("Unknown color depth %d\n", fb->bits_per_pixel);
+		DRM_ERROR("Unknown pixel format 0x%08x\n", fb->pixel_format);
 		return -EINVAL;
 	}
+
 	if (INTEL_INFO(dev)->gen >= 4) {
 		if (obj->tiling_mode != I915_TILING_NONE)
 			dspcntr |= DISPPLANE_TILED;
@@ -2000,9 +2105,9 @@
 
 	if (INTEL_INFO(dev)->gen >= 4) {
 		intel_crtc->dspaddr_offset =
-			gen4_compute_dspaddr_offset_xtiled(&x, &y,
-							   fb->bits_per_pixel / 8,
-							   fb->pitches[0]);
+			intel_gen4_compute_offset_xtiled(&x, &y,
+							 fb->bits_per_pixel / 8,
+							 fb->pitches[0]);
 		linear_offset -= intel_crtc->dspaddr_offset;
 	} else {
 		intel_crtc->dspaddr_offset = linear_offset;
@@ -2053,27 +2158,31 @@
 	dspcntr = I915_READ(reg);
 	/* Mask out pixel format bits in case we change it */
 	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
-	switch (fb->bits_per_pixel) {
-	case 8:
+	switch (fb->pixel_format) {
+	case DRM_FORMAT_C8:
 		dspcntr |= DISPPLANE_8BPP;
 		break;
-	case 16:
-		if (fb->depth != 16)
-			return -EINVAL;
-
-		dspcntr |= DISPPLANE_16BPP;
+	case DRM_FORMAT_RGB565:
+		dspcntr |= DISPPLANE_BGRX565;
 		break;
-	case 24:
-	case 32:
-		if (fb->depth == 24)
-			dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
-		else if (fb->depth == 30)
-			dspcntr |= DISPPLANE_32BPP_30BIT_NO_ALPHA;
-		else
-			return -EINVAL;
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+		dspcntr |= DISPPLANE_BGRX888;
+		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		dspcntr |= DISPPLANE_RGBX888;
+		break;
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010:
+		dspcntr |= DISPPLANE_BGRX101010;
+		break;
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ABGR2101010:
+		dspcntr |= DISPPLANE_RGBX101010;
 		break;
 	default:
-		DRM_ERROR("Unknown color depth %d\n", fb->bits_per_pixel);
+		DRM_ERROR("Unknown pixel format 0x%08x\n", fb->pixel_format);
 		return -EINVAL;
 	}
 
@@ -2089,9 +2198,9 @@
 
 	linear_offset = y * fb->pitches[0] + x * (fb->bits_per_pixel / 8);
 	intel_crtc->dspaddr_offset =
-		gen4_compute_dspaddr_offset_xtiled(&x, &y,
-						   fb->bits_per_pixel / 8,
-						   fb->pitches[0]);
+		intel_gen4_compute_offset_xtiled(&x, &y,
+						 fb->bits_per_pixel / 8,
+						 fb->pitches[0]);
 	linear_offset -= intel_crtc->dspaddr_offset;
 
 	DRM_DEBUG_KMS("Writing base %08X %08lX %d %d %d\n",
@@ -2099,8 +2208,12 @@
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	I915_MODIFY_DISPBASE(DSPSURF(plane),
 			     obj->gtt_offset + intel_crtc->dspaddr_offset);
-	I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
-	I915_WRITE(DSPLINOFF(plane), linear_offset);
+	if (IS_HASWELL(dev)) {
+		I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
+	} else {
+		I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
+		I915_WRITE(DSPLINOFF(plane), linear_offset);
+	}
 	POSTING_READ(reg);
 
 	return 0;
@@ -2148,13 +2261,39 @@
 	return ret;
 }
 
+static void intel_crtc_update_sarea_pos(struct drm_crtc *crtc, int x, int y)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_master_private *master_priv;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	if (!dev->primary->master)
+		return;
+
+	master_priv = dev->primary->master->driver_priv;
+	if (!master_priv->sarea_priv)
+		return;
+
+	switch (intel_crtc->pipe) {
+	case 0:
+		master_priv->sarea_priv->pipeA_x = x;
+		master_priv->sarea_priv->pipeA_y = y;
+		break;
+	case 1:
+		master_priv->sarea_priv->pipeB_x = x;
+		master_priv->sarea_priv->pipeB_y = y;
+		break;
+	default:
+		break;
+	}
+}
+
 static int
 intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 		    struct drm_framebuffer *fb)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_master_private *master_priv;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_framebuffer *old_fb;
 	int ret;
@@ -2206,20 +2345,7 @@
 	intel_update_fbc(dev);
 	mutex_unlock(&dev->struct_mutex);
 
-	if (!dev->primary->master)
-		return 0;
-
-	master_priv = dev->primary->master->driver_priv;
-	if (!master_priv->sarea_priv)
-		return 0;
-
-	if (intel_crtc->pipe) {
-		master_priv->sarea_priv->pipeB_x = x;
-		master_priv->sarea_priv->pipeB_y = y;
-	} else {
-		master_priv->sarea_priv->pipeA_x = x;
-		master_priv->sarea_priv->pipeA_y = y;
-	}
+	intel_crtc_update_sarea_pos(crtc, x, y);
 
 	return 0;
 }
@@ -2302,16 +2428,27 @@
 			   FDI_FE_ERRC_ENABLE);
 }
 
-static void cpt_phase_pointer_enable(struct drm_device *dev, int pipe)
+static void ivb_modeset_global_resources(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 flags = I915_READ(SOUTH_CHICKEN1);
+	struct intel_crtc *pipe_B_crtc =
+		to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_B]);
+	struct intel_crtc *pipe_C_crtc =
+		to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_C]);
+	uint32_t temp;
 
-	flags |= FDI_PHASE_SYNC_OVR(pipe);
-	I915_WRITE(SOUTH_CHICKEN1, flags); /* once to unlock... */
-	flags |= FDI_PHASE_SYNC_EN(pipe);
-	I915_WRITE(SOUTH_CHICKEN1, flags); /* then again to enable */
-	POSTING_READ(SOUTH_CHICKEN1);
+	/* When everything is off disable fdi C so that we could enable fdi B
+	 * with all lanes. XXX: This misses the case where a pipe is not using
+	 * any pch resources and so doesn't need any fdi lanes. */
+	if (!pipe_B_crtc->base.enabled && !pipe_C_crtc->base.enabled) {
+		WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE);
+		WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE);
+
+		temp = I915_READ(SOUTH_CHICKEN1);
+		temp &= ~FDI_BC_BIFURCATION_SELECT;
+		DRM_DEBUG_KMS("disabling fdi C rx\n");
+		I915_WRITE(SOUTH_CHICKEN1, temp);
+	}
 }
 
 /* The FDI link training functions for ILK/Ibexpeak. */
@@ -2357,11 +2494,9 @@
 	udelay(150);
 
 	/* Ironlake workaround, enable clock pointer after FDI enable*/
-	if (HAS_PCH_IBX(dev)) {
-		I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR);
-		I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR |
-			   FDI_RX_PHASE_SYNC_POINTER_EN);
-	}
+	I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR);
+	I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR |
+		   FDI_RX_PHASE_SYNC_POINTER_EN);
 
 	reg = FDI_RX_IIR(pipe);
 	for (tries = 0; tries < 5; tries++) {
@@ -2450,6 +2585,9 @@
 	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
 	I915_WRITE(reg, temp | FDI_TX_ENABLE);
 
+	I915_WRITE(FDI_RX_MISC(pipe),
+		   FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
+
 	reg = FDI_RX_CTL(pipe);
 	temp = I915_READ(reg);
 	if (HAS_PCH_CPT(dev)) {
@@ -2464,9 +2602,6 @@
 	POSTING_READ(reg);
 	udelay(150);
 
-	if (HAS_PCH_CPT(dev))
-		cpt_phase_pointer_enable(dev, pipe);
-
 	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
@@ -2570,6 +2705,9 @@
 	POSTING_READ(reg);
 	udelay(150);
 
+	DRM_DEBUG_KMS("FDI_RX_IIR before link train 0x%x\n",
+		      I915_READ(FDI_RX_IIR(pipe)));
+
 	/* enable CPU FDI TX and PCH FDI RX */
 	reg = FDI_TX_CTL(pipe);
 	temp = I915_READ(reg);
@@ -2582,6 +2720,9 @@
 	temp |= FDI_COMPOSITE_SYNC;
 	I915_WRITE(reg, temp | FDI_TX_ENABLE);
 
+	I915_WRITE(FDI_RX_MISC(pipe),
+		   FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
+
 	reg = FDI_RX_CTL(pipe);
 	temp = I915_READ(reg);
 	temp &= ~FDI_LINK_TRAIN_AUTO;
@@ -2593,9 +2734,6 @@
 	POSTING_READ(reg);
 	udelay(150);
 
-	if (HAS_PCH_CPT(dev))
-		cpt_phase_pointer_enable(dev, pipe);
-
 	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
@@ -2613,7 +2751,7 @@
 		if (temp & FDI_RX_BIT_LOCK ||
 		    (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
 			I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
-			DRM_DEBUG_KMS("FDI train 1 done.\n");
+			DRM_DEBUG_KMS("FDI train 1 done, level %i.\n", i);
 			break;
 		}
 	}
@@ -2654,7 +2792,7 @@
 
 		if (temp & FDI_RX_SYMBOL_LOCK) {
 			I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
-			DRM_DEBUG_KMS("FDI train 2 done.\n");
+			DRM_DEBUG_KMS("FDI train 2 done, level %i.\n", i);
 			break;
 		}
 	}
@@ -2671,9 +2809,6 @@
 	int pipe = intel_crtc->pipe;
 	u32 reg, temp;
 
-	/* Write the TU size bits so error detection works */
-	I915_WRITE(FDI_RX_TUSIZE1(pipe),
-		   I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK);
 
 	/* enable PCH FDI RX PLL, wait warmup plus DMI latency */
 	reg = FDI_RX_CTL(pipe);
@@ -2737,17 +2872,6 @@
 	udelay(100);
 }
 
-static void cpt_phase_pointer_disable(struct drm_device *dev, int pipe)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 flags = I915_READ(SOUTH_CHICKEN1);
-
-	flags &= ~(FDI_PHASE_SYNC_EN(pipe));
-	I915_WRITE(SOUTH_CHICKEN1, flags); /* once to disable... */
-	flags &= ~(FDI_PHASE_SYNC_OVR(pipe));
-	I915_WRITE(SOUTH_CHICKEN1, flags); /* then again to lock */
-	POSTING_READ(SOUTH_CHICKEN1);
-}
 static void ironlake_fdi_disable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -2774,11 +2898,6 @@
 	/* Ironlake workaround, disable clock pointer after downing FDI */
 	if (HAS_PCH_IBX(dev)) {
 		I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR);
-		I915_WRITE(FDI_RX_CHICKEN(pipe),
-			   I915_READ(FDI_RX_CHICKEN(pipe) &
-				     ~FDI_RX_PHASE_SYNC_POINTER_EN));
-	} else if (HAS_PCH_CPT(dev)) {
-		cpt_phase_pointer_disable(dev, pipe);
 	}
 
 	/* still set train pattern 1 */
@@ -2839,7 +2958,7 @@
 	mutex_unlock(&dev->struct_mutex);
 }
 
-static bool intel_crtc_driving_pch(struct drm_crtc *crtc)
+static bool ironlake_crtc_driving_pch(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct intel_encoder *intel_encoder;
@@ -2849,23 +2968,6 @@
 	 * must be driven by its own crtc; no sharing is possible.
 	 */
 	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
-
-		/* On Haswell, LPT PCH handles the VGA connection via FDI, and Haswell
-		 * CPU handles all others */
-		if (IS_HASWELL(dev)) {
-			/* It is still unclear how this will work on PPT, so throw up a warning */
-			WARN_ON(!HAS_PCH_LPT(dev));
-
-			if (intel_encoder->type == INTEL_OUTPUT_ANALOG) {
-				DRM_DEBUG_KMS("Haswell detected DAC encoder, assuming is PCH\n");
-				return true;
-			} else {
-				DRM_DEBUG_KMS("Haswell detected encoder %d, assuming is CPU\n",
-					      intel_encoder->type);
-				return false;
-			}
-		}
-
 		switch (intel_encoder->type) {
 		case INTEL_OUTPUT_EDP:
 			if (!intel_encoder_is_pch_edp(&intel_encoder->base))
@@ -2877,6 +2979,11 @@
 	return true;
 }
 
+static bool haswell_crtc_driving_pch(struct drm_crtc *crtc)
+{
+	return intel_pipe_has_type(crtc, INTEL_OUTPUT_ANALOG);
+}
+
 /* Program iCLKIP clock to the desired frequency */
 static void lpt_program_iclkip(struct drm_crtc *crtc)
 {
@@ -2892,8 +2999,9 @@
 
 	/* Disable SSCCTL */
 	intel_sbi_write(dev_priv, SBI_SSCCTL6,
-				intel_sbi_read(dev_priv, SBI_SSCCTL6) |
-					SBI_SSCCTL_DISABLE);
+			intel_sbi_read(dev_priv, SBI_SSCCTL6, SBI_ICLK) |
+				SBI_SSCCTL_DISABLE,
+			SBI_ICLK);
 
 	/* 20MHz is a corner case which is out of range for the 7-bit divisor */
 	if (crtc->mode.clock == 20000) {
@@ -2934,33 +3042,25 @@
 			phaseinc);
 
 	/* Program SSCDIVINTPHASE6 */
-	temp = intel_sbi_read(dev_priv, SBI_SSCDIVINTPHASE6);
+	temp = intel_sbi_read(dev_priv, SBI_SSCDIVINTPHASE6, SBI_ICLK);
 	temp &= ~SBI_SSCDIVINTPHASE_DIVSEL_MASK;
 	temp |= SBI_SSCDIVINTPHASE_DIVSEL(divsel);
 	temp &= ~SBI_SSCDIVINTPHASE_INCVAL_MASK;
 	temp |= SBI_SSCDIVINTPHASE_INCVAL(phaseinc);
 	temp |= SBI_SSCDIVINTPHASE_DIR(phasedir);
 	temp |= SBI_SSCDIVINTPHASE_PROPAGATE;
-
-	intel_sbi_write(dev_priv,
-			SBI_SSCDIVINTPHASE6,
-			temp);
+	intel_sbi_write(dev_priv, SBI_SSCDIVINTPHASE6, temp, SBI_ICLK);
 
 	/* Program SSCAUXDIV */
-	temp = intel_sbi_read(dev_priv, SBI_SSCAUXDIV6);
+	temp = intel_sbi_read(dev_priv, SBI_SSCAUXDIV6, SBI_ICLK);
 	temp &= ~SBI_SSCAUXDIV_FINALDIV2SEL(1);
 	temp |= SBI_SSCAUXDIV_FINALDIV2SEL(auxdiv);
-	intel_sbi_write(dev_priv,
-			SBI_SSCAUXDIV6,
-			temp);
-
+	intel_sbi_write(dev_priv, SBI_SSCAUXDIV6, temp, SBI_ICLK);
 
 	/* Enable modulator and associated divider */
-	temp = intel_sbi_read(dev_priv, SBI_SSCCTL6);
+	temp = intel_sbi_read(dev_priv, SBI_SSCCTL6, SBI_ICLK);
 	temp &= ~SBI_SSCCTL_DISABLE;
-	intel_sbi_write(dev_priv,
-			SBI_SSCCTL6,
-			temp);
+	intel_sbi_write(dev_priv, SBI_SSCCTL6, temp, SBI_ICLK);
 
 	/* Wait for initialization time */
 	udelay(24);
@@ -2986,15 +3086,24 @@
 
 	assert_transcoder_disabled(dev_priv, pipe);
 
+	/* Write the TU size bits before fdi link training, so that error
+	 * detection works. */
+	I915_WRITE(FDI_RX_TUSIZE1(pipe),
+		   I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK);
+
 	/* For PCH output, training FDI link */
 	dev_priv->display.fdi_link_train(crtc);
 
-	intel_enable_pch_pll(intel_crtc);
+	/* XXX: pch pll's can be enabled any time before we enable the PCH
+	 * transcoder, and we actually should do this to not upset any PCH
+	 * transcoder that already use the clock when we share it.
+	 *
+	 * Note that enable_pch_pll tries to do the right thing, but get_pch_pll
+	 * unconditionally resets the pll - we need that to have the right LVDS
+	 * enable sequence. */
+	ironlake_enable_pch_pll(intel_crtc);
 
-	if (HAS_PCH_LPT(dev)) {
-		DRM_DEBUG_KMS("LPT detected: programming iCLKIP\n");
-		lpt_program_iclkip(crtc);
-	} else if (HAS_PCH_CPT(dev)) {
+	if (HAS_PCH_CPT(dev)) {
 		u32 sel;
 
 		temp = I915_READ(PCH_DPLL_SEL);
@@ -3031,8 +3140,7 @@
 	I915_WRITE(TRANS_VSYNC(pipe),  I915_READ(VSYNC(pipe)));
 	I915_WRITE(TRANS_VSYNCSHIFT(pipe),  I915_READ(VSYNCSHIFT(pipe)));
 
-	if (!IS_HASWELL(dev))
-		intel_fdi_normal_train(crtc);
+	intel_fdi_normal_train(crtc);
 
 	/* For PCH DP, enable TRANS_DP_CTL */
 	if (HAS_PCH_CPT(dev) &&
@@ -3064,15 +3172,37 @@
 			temp |= TRANS_DP_PORT_SEL_D;
 			break;
 		default:
-			DRM_DEBUG_KMS("Wrong PCH DP port return. Guess port B\n");
-			temp |= TRANS_DP_PORT_SEL_B;
-			break;
+			BUG();
 		}
 
 		I915_WRITE(reg, temp);
 	}
 
-	intel_enable_transcoder(dev_priv, pipe);
+	ironlake_enable_pch_transcoder(dev_priv, pipe);
+}
+
+static void lpt_pch_enable(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+
+	assert_transcoder_disabled(dev_priv, TRANSCODER_A);
+
+	lpt_program_iclkip(crtc);
+
+	/* Set transcoder timing. */
+	I915_WRITE(_TRANS_HTOTAL_A, I915_READ(HTOTAL(cpu_transcoder)));
+	I915_WRITE(_TRANS_HBLANK_A, I915_READ(HBLANK(cpu_transcoder)));
+	I915_WRITE(_TRANS_HSYNC_A,  I915_READ(HSYNC(cpu_transcoder)));
+
+	I915_WRITE(_TRANS_VTOTAL_A, I915_READ(VTOTAL(cpu_transcoder)));
+	I915_WRITE(_TRANS_VBLANK_A, I915_READ(VBLANK(cpu_transcoder)));
+	I915_WRITE(_TRANS_VSYNC_A,  I915_READ(VSYNC(cpu_transcoder)));
+	I915_WRITE(_TRANS_VSYNCSHIFT_A, I915_READ(VSYNCSHIFT(cpu_transcoder)));
+
+	lpt_enable_pch_transcoder(dev_priv, cpu_transcoder);
 }
 
 static void intel_put_pch_pll(struct intel_crtc *intel_crtc)
@@ -3165,16 +3295,12 @@
 void intel_cpt_verify_modeset(struct drm_device *dev, int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int dslreg = PIPEDSL(pipe), tc2reg = TRANS_CHICKEN2(pipe);
+	int dslreg = PIPEDSL(pipe);
 	u32 temp;
 
 	temp = I915_READ(dslreg);
 	udelay(500);
 	if (wait_for(I915_READ(dslreg) != temp, 5)) {
-		/* Without this, mode sets may fail silently on FDI */
-		I915_WRITE(tc2reg, TRANS_AUTOTRAIN_GEN_STALL_DIS);
-		udelay(250);
-		I915_WRITE(tc2reg, 0);
 		if (wait_for(I915_READ(dslreg) != temp, 5))
 			DRM_ERROR("mode set failed: pipe %d stuck\n", pipe);
 	}
@@ -3205,9 +3331,12 @@
 			I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN);
 	}
 
-	is_pch_port = intel_crtc_driving_pch(crtc);
+	is_pch_port = ironlake_crtc_driving_pch(crtc);
 
 	if (is_pch_port) {
+		/* Note: FDI PLL enabling _must_ be done before we enable the
+		 * cpu pipes, hence this is separate from all the other fdi/pch
+		 * enabling. */
 		ironlake_fdi_pll_enable(intel_crtc);
 	} else {
 		assert_fdi_tx_disabled(dev_priv, pipe);
@@ -3220,12 +3349,17 @@
 
 	/* Enable panel fitting for LVDS */
 	if (dev_priv->pch_pf_size &&
-	    (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) || HAS_eDP)) {
+	    (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) ||
+	     intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))) {
 		/* Force use of hard-coded filter coefficients
 		 * as some pre-programmed values are broken,
 		 * e.g. x201.
 		 */
-		I915_WRITE(PF_CTL(pipe), PF_ENABLE | PF_FILTER_MED_3x3);
+		if (IS_IVYBRIDGE(dev))
+			I915_WRITE(PF_CTL(pipe), PF_ENABLE | PF_FILTER_MED_3x3 |
+						 PF_PIPE_SEL_IVB(pipe));
+		else
+			I915_WRITE(PF_CTL(pipe), PF_ENABLE | PF_FILTER_MED_3x3);
 		I915_WRITE(PF_WIN_POS(pipe), dev_priv->pch_pf_pos);
 		I915_WRITE(PF_WIN_SZ(pipe), dev_priv->pch_pf_size);
 	}
@@ -3265,6 +3399,83 @@
 	intel_wait_for_vblank(dev, intel_crtc->pipe);
 }
 
+static void haswell_crtc_enable(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_encoder *encoder;
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+	bool is_pch_port;
+
+	WARN_ON(!crtc->enabled);
+
+	if (intel_crtc->active)
+		return;
+
+	intel_crtc->active = true;
+	intel_update_watermarks(dev);
+
+	is_pch_port = haswell_crtc_driving_pch(crtc);
+
+	if (is_pch_port)
+		dev_priv->display.fdi_link_train(crtc);
+
+	for_each_encoder_on_crtc(dev, crtc, encoder)
+		if (encoder->pre_enable)
+			encoder->pre_enable(encoder);
+
+	intel_ddi_enable_pipe_clock(intel_crtc);
+
+	/* Enable panel fitting for eDP */
+	if (dev_priv->pch_pf_size &&
+	    intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) {
+		/* Force use of hard-coded filter coefficients
+		 * as some pre-programmed values are broken,
+		 * e.g. x201.
+		 */
+		I915_WRITE(PF_CTL(pipe), PF_ENABLE | PF_FILTER_MED_3x3 |
+					 PF_PIPE_SEL_IVB(pipe));
+		I915_WRITE(PF_WIN_POS(pipe), dev_priv->pch_pf_pos);
+		I915_WRITE(PF_WIN_SZ(pipe), dev_priv->pch_pf_size);
+	}
+
+	/*
+	 * On ILK+ LUT must be loaded before the pipe is running but with
+	 * clocks enabled
+	 */
+	intel_crtc_load_lut(crtc);
+
+	intel_ddi_set_pipe_settings(crtc);
+	intel_ddi_enable_pipe_func(crtc);
+
+	intel_enable_pipe(dev_priv, pipe, is_pch_port);
+	intel_enable_plane(dev_priv, plane, pipe);
+
+	if (is_pch_port)
+		lpt_pch_enable(crtc);
+
+	mutex_lock(&dev->struct_mutex);
+	intel_update_fbc(dev);
+	mutex_unlock(&dev->struct_mutex);
+
+	intel_crtc_update_cursor(crtc, true);
+
+	for_each_encoder_on_crtc(dev, crtc, encoder)
+		encoder->enable(encoder);
+
+	/*
+	 * There seems to be a race in PCH platform hw (at least on some
+	 * outputs) where an enabled pipe still completes any pageflip right
+	 * away (as if the pipe is off) instead of waiting for vblank. As soon
+	 * as the first vblank happend, everything works as expected. Hence just
+	 * wait for one vblank before returning to avoid strange things
+	 * happening.
+	 */
+	intel_wait_for_vblank(dev, intel_crtc->pipe);
+}
+
 static void ironlake_crtc_disable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -3303,7 +3514,7 @@
 
 	ironlake_fdi_disable(crtc);
 
-	intel_disable_transcoder(dev_priv, pipe);
+	ironlake_disable_pch_transcoder(dev_priv, pipe);
 
 	if (HAS_PCH_CPT(dev)) {
 		/* disable TRANS_DP_CTL */
@@ -3345,12 +3556,78 @@
 	mutex_unlock(&dev->struct_mutex);
 }
 
+static void haswell_crtc_disable(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_encoder *encoder;
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	bool is_pch_port;
+
+	if (!intel_crtc->active)
+		return;
+
+	is_pch_port = haswell_crtc_driving_pch(crtc);
+
+	for_each_encoder_on_crtc(dev, crtc, encoder)
+		encoder->disable(encoder);
+
+	intel_crtc_wait_for_pending_flips(crtc);
+	drm_vblank_off(dev, pipe);
+	intel_crtc_update_cursor(crtc, false);
+
+	intel_disable_plane(dev_priv, plane, pipe);
+
+	if (dev_priv->cfb_plane == plane)
+		intel_disable_fbc(dev);
+
+	intel_disable_pipe(dev_priv, pipe);
+
+	intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder);
+
+	/* Disable PF */
+	I915_WRITE(PF_CTL(pipe), 0);
+	I915_WRITE(PF_WIN_SZ(pipe), 0);
+
+	intel_ddi_disable_pipe_clock(intel_crtc);
+
+	for_each_encoder_on_crtc(dev, crtc, encoder)
+		if (encoder->post_disable)
+			encoder->post_disable(encoder);
+
+	if (is_pch_port) {
+		lpt_disable_pch_transcoder(dev_priv);
+		intel_ddi_fdi_disable(crtc);
+	}
+
+	intel_crtc->active = false;
+	intel_update_watermarks(dev);
+
+	mutex_lock(&dev->struct_mutex);
+	intel_update_fbc(dev);
+	mutex_unlock(&dev->struct_mutex);
+}
+
 static void ironlake_crtc_off(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	intel_put_pch_pll(intel_crtc);
 }
 
+static void haswell_crtc_off(struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	/* Stop saying we're using TRANSCODER_EDP because some other CRTC might
+	 * start using it. */
+	intel_crtc->cpu_transcoder = intel_crtc->pipe;
+
+	intel_ddi_put_crtc_pll(crtc);
+}
+
 static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable)
 {
 	if (!enable && intel_crtc->overlay) {
@@ -4061,7 +4338,7 @@
 			   struct drm_display_mode *mode,
 			   struct drm_display_mode *adjusted_mode,
 			   intel_clock_t *clock, intel_clock_t *reduced_clock,
-			   int refclk, int num_connectors)
+			   int num_connectors)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4069,9 +4346,19 @@
 	int pipe = intel_crtc->pipe;
 	u32 dpll, mdiv, pdiv;
 	u32 bestn, bestm1, bestm2, bestp1, bestp2;
-	bool is_hdmi;
+	bool is_sdvo;
+	u32 temp;
 
-	is_hdmi = intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI);
+	is_sdvo = intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO) ||
+		intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI);
+
+	dpll = DPLL_VGA_MODE_DIS;
+	dpll |= DPLL_EXT_BUFFER_ENABLE_VLV;
+	dpll |= DPLL_REFA_CLK_ENABLE_VLV;
+	dpll |= DPLL_INTEGRATED_CLOCK_VLV;
+
+	I915_WRITE(DPLL(pipe), dpll);
+	POSTING_READ(DPLL(pipe));
 
 	bestn = clock->n;
 	bestm1 = clock->m1;
@@ -4079,12 +4366,10 @@
 	bestp1 = clock->p1;
 	bestp2 = clock->p2;
 
-	/* Enable DPIO clock input */
-	dpll = DPLL_EXT_BUFFER_ENABLE_VLV | DPLL_REFA_CLK_ENABLE_VLV |
-		DPLL_VGA_MODE_DIS | DPLL_INTEGRATED_CLOCK_VLV;
-	I915_WRITE(DPLL(pipe), dpll);
-	POSTING_READ(DPLL(pipe));
-
+	/*
+	 * In Valleyview PLL and program lane counter registers are exposed
+	 * through DPIO interface
+	 */
 	mdiv = ((bestm1 << DPIO_M1DIV_SHIFT) | (bestm2 & DPIO_M2DIV_MASK));
 	mdiv |= ((bestp1 << DPIO_P1_SHIFT) | (bestp2 << DPIO_P2_SHIFT));
 	mdiv |= ((bestn << DPIO_N_SHIFT));
@@ -4095,12 +4380,13 @@
 
 	intel_dpio_write(dev_priv, DPIO_CORE_CLK(pipe), 0x01000000);
 
-	pdiv = DPIO_REFSEL_OVERRIDE | (5 << DPIO_PLL_MODESEL_SHIFT) |
+	pdiv = (1 << DPIO_REFSEL_OVERRIDE) | (5 << DPIO_PLL_MODESEL_SHIFT) |
 		(3 << DPIO_BIAS_CURRENT_CTL_SHIFT) | (1<<20) |
-		(8 << DPIO_DRIVER_CTL_SHIFT) | (5 << DPIO_CLK_BIAS_CTL_SHIFT);
+		(7 << DPIO_PLL_REFCLK_SEL_SHIFT) | (8 << DPIO_DRIVER_CTL_SHIFT) |
+		(5 << DPIO_CLK_BIAS_CTL_SHIFT);
 	intel_dpio_write(dev_priv, DPIO_REFSFR(pipe), pdiv);
 
-	intel_dpio_write(dev_priv, DPIO_LFP_COEFF(pipe), 0x009f0051);
+	intel_dpio_write(dev_priv, DPIO_LFP_COEFF(pipe), 0x005f003b);
 
 	dpll |= DPLL_VCO_ENABLE;
 	I915_WRITE(DPLL(pipe), dpll);
@@ -4108,19 +4394,44 @@
 	if (wait_for(((I915_READ(DPLL(pipe)) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1))
 		DRM_ERROR("DPLL %d failed to lock\n", pipe);
 
-	if (is_hdmi) {
-		u32 temp = intel_mode_get_pixel_multiplier(adjusted_mode);
+	intel_dpio_write(dev_priv, DPIO_FASTCLK_DISABLE, 0x620);
 
+	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT))
+		intel_dp_set_m_n(crtc, mode, adjusted_mode);
+
+	I915_WRITE(DPLL(pipe), dpll);
+
+	/* Wait for the clocks to stabilize. */
+	POSTING_READ(DPLL(pipe));
+	udelay(150);
+
+	temp = 0;
+	if (is_sdvo) {
+		temp = intel_mode_get_pixel_multiplier(adjusted_mode);
 		if (temp > 1)
 			temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
 		else
 			temp = 0;
-
-		I915_WRITE(DPLL_MD(pipe), temp);
-		POSTING_READ(DPLL_MD(pipe));
 	}
+	I915_WRITE(DPLL_MD(pipe), temp);
+	POSTING_READ(DPLL_MD(pipe));
 
-	intel_dpio_write(dev_priv, DPIO_FASTCLK_DISABLE, 0x641); /* ??? */
+	/* Now program lane control registers */
+	if(intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)
+			|| intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI))
+	{
+		temp = 0x1000C4;
+		if(pipe == 1)
+			temp |= (1 << 21);
+		intel_dpio_write(dev_priv, DPIO_DATA_CHANNEL1, temp);
+	}
+	if(intel_pipe_has_type(crtc,INTEL_OUTPUT_EDP))
+	{
+		temp = 0x1000C4;
+		if(pipe == 1)
+			temp |= (1 << 21);
+		intel_dpio_write(dev_priv, DPIO_DATA_CHANNEL2, temp);
+	}
 }
 
 static void i9xx_update_pll(struct drm_crtc *crtc,
@@ -4136,6 +4447,8 @@
 	u32 dpll;
 	bool is_sdvo;
 
+	i9xx_update_pll_dividers(crtc, clock, reduced_clock);
+
 	is_sdvo = intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO) ||
 		intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI);
 
@@ -4236,7 +4549,7 @@
 
 static void i8xx_update_pll(struct drm_crtc *crtc,
 			    struct drm_display_mode *adjusted_mode,
-			    intel_clock_t *clock,
+			    intel_clock_t *clock, intel_clock_t *reduced_clock,
 			    int num_connectors)
 {
 	struct drm_device *dev = crtc->dev;
@@ -4245,6 +4558,8 @@
 	int pipe = intel_crtc->pipe;
 	u32 dpll;
 
+	i9xx_update_pll_dividers(crtc, clock, reduced_clock);
+
 	dpll = DPLL_VGA_MODE_DIS;
 
 	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
@@ -4294,6 +4609,64 @@
 	I915_WRITE(DPLL(pipe), dpll);
 }
 
+static void intel_set_pipe_timings(struct intel_crtc *intel_crtc,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = intel_crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum pipe pipe = intel_crtc->pipe;
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	uint32_t vsyncshift;
+
+	if (!IS_GEN2(dev) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		/* the chip adds 2 halflines automatically */
+		adjusted_mode->crtc_vtotal -= 1;
+		adjusted_mode->crtc_vblank_end -= 1;
+		vsyncshift = adjusted_mode->crtc_hsync_start
+			     - adjusted_mode->crtc_htotal / 2;
+	} else {
+		vsyncshift = 0;
+	}
+
+	if (INTEL_INFO(dev)->gen > 3)
+		I915_WRITE(VSYNCSHIFT(cpu_transcoder), vsyncshift);
+
+	I915_WRITE(HTOTAL(cpu_transcoder),
+		   (adjusted_mode->crtc_hdisplay - 1) |
+		   ((adjusted_mode->crtc_htotal - 1) << 16));
+	I915_WRITE(HBLANK(cpu_transcoder),
+		   (adjusted_mode->crtc_hblank_start - 1) |
+		   ((adjusted_mode->crtc_hblank_end - 1) << 16));
+	I915_WRITE(HSYNC(cpu_transcoder),
+		   (adjusted_mode->crtc_hsync_start - 1) |
+		   ((adjusted_mode->crtc_hsync_end - 1) << 16));
+
+	I915_WRITE(VTOTAL(cpu_transcoder),
+		   (adjusted_mode->crtc_vdisplay - 1) |
+		   ((adjusted_mode->crtc_vtotal - 1) << 16));
+	I915_WRITE(VBLANK(cpu_transcoder),
+		   (adjusted_mode->crtc_vblank_start - 1) |
+		   ((adjusted_mode->crtc_vblank_end - 1) << 16));
+	I915_WRITE(VSYNC(cpu_transcoder),
+		   (adjusted_mode->crtc_vsync_start - 1) |
+		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
+
+	/* Workaround: when the EDP input selection is B, the VTOTAL_B must be
+	 * programmed with the VTOTAL_EDP value. Same for VTOTAL_C. This is
+	 * documented on the DDI_FUNC_CTL register description, EDP Input Select
+	 * bits. */
+	if (IS_HASWELL(dev) && cpu_transcoder == TRANSCODER_EDP &&
+	    (pipe == PIPE_B || pipe == PIPE_C))
+		I915_WRITE(VTOTAL(pipe), I915_READ(VTOTAL(cpu_transcoder)));
+
+	/* pipesrc controls the size that is scaled from, which should
+	 * always be the user's requested size.
+	 */
+	I915_WRITE(PIPESRC(pipe),
+		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
+}
+
 static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 			      struct drm_display_mode *mode,
 			      struct drm_display_mode *adjusted_mode,
@@ -4307,7 +4680,7 @@
 	int plane = intel_crtc->plane;
 	int refclk, num_connectors = 0;
 	intel_clock_t clock, reduced_clock;
-	u32 dspcntr, pipeconf, vsyncshift;
+	u32 dspcntr, pipeconf;
 	bool ok, has_reduced_clock = false, is_sdvo = false;
 	bool is_lvds = false, is_tv = false, is_dp = false;
 	struct intel_encoder *encoder;
@@ -4371,14 +4744,14 @@
 	if (is_sdvo && is_tv)
 		i9xx_adjust_sdvo_tv_clock(adjusted_mode, &clock);
 
-	i9xx_update_pll_dividers(crtc, &clock, has_reduced_clock ?
-				 &reduced_clock : NULL);
-
 	if (IS_GEN2(dev))
-		i8xx_update_pll(crtc, adjusted_mode, &clock, num_connectors);
+		i8xx_update_pll(crtc, adjusted_mode, &clock,
+				has_reduced_clock ? &reduced_clock : NULL,
+				num_connectors);
 	else if (IS_VALLEYVIEW(dev))
-		vlv_update_pll(crtc, mode,adjusted_mode, &clock, NULL,
-			       refclk, num_connectors);
+		vlv_update_pll(crtc, mode, adjusted_mode, &clock,
+				has_reduced_clock ? &reduced_clock : NULL,
+				num_connectors);
 	else
 		i9xx_update_pll(crtc, mode, adjusted_mode, &clock,
 				has_reduced_clock ? &reduced_clock : NULL,
@@ -4419,6 +4792,14 @@
 		}
 	}
 
+	if (IS_VALLEYVIEW(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) {
+		if (adjusted_mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) {
+			pipeconf |= PIPECONF_BPP_6 |
+					PIPECONF_ENABLE |
+					I965_PIPECONF_ACTIVE;
+		}
+	}
+
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
@@ -4434,40 +4815,12 @@
 
 	pipeconf &= ~PIPECONF_INTERLACE_MASK;
 	if (!IS_GEN2(dev) &&
-	    adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) {
+	    adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
 		pipeconf |= PIPECONF_INTERLACE_W_FIELD_INDICATION;
-		/* the chip adds 2 halflines automatically */
-		adjusted_mode->crtc_vtotal -= 1;
-		adjusted_mode->crtc_vblank_end -= 1;
-		vsyncshift = adjusted_mode->crtc_hsync_start
-			     - adjusted_mode->crtc_htotal/2;
-	} else {
+	else
 		pipeconf |= PIPECONF_PROGRESSIVE;
-		vsyncshift = 0;
-	}
 
-	if (!IS_GEN3(dev))
-		I915_WRITE(VSYNCSHIFT(pipe), vsyncshift);
-
-	I915_WRITE(HTOTAL(pipe),
-		   (adjusted_mode->crtc_hdisplay - 1) |
-		   ((adjusted_mode->crtc_htotal - 1) << 16));
-	I915_WRITE(HBLANK(pipe),
-		   (adjusted_mode->crtc_hblank_start - 1) |
-		   ((adjusted_mode->crtc_hblank_end - 1) << 16));
-	I915_WRITE(HSYNC(pipe),
-		   (adjusted_mode->crtc_hsync_start - 1) |
-		   ((adjusted_mode->crtc_hsync_end - 1) << 16));
-
-	I915_WRITE(VTOTAL(pipe),
-		   (adjusted_mode->crtc_vdisplay - 1) |
-		   ((adjusted_mode->crtc_vtotal - 1) << 16));
-	I915_WRITE(VBLANK(pipe),
-		   (adjusted_mode->crtc_vblank_start - 1) |
-		   ((adjusted_mode->crtc_vblank_end - 1) << 16));
-	I915_WRITE(VSYNC(pipe),
-		   (adjusted_mode->crtc_vsync_start - 1) |
-		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
+	intel_set_pipe_timings(intel_crtc, mode, adjusted_mode);
 
 	/* pipesrc and dspsize control the size that is scaled from,
 	 * which should always be the user's requested size.
@@ -4476,8 +4829,6 @@
 		   ((mode->vdisplay - 1) << 16) |
 		   (mode->hdisplay - 1));
 	I915_WRITE(DSPPOS(plane), 0);
-	I915_WRITE(PIPESRC(pipe),
-		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
 
 	I915_WRITE(PIPECONF(pipe), pipeconf);
 	POSTING_READ(PIPECONF(pipe));
@@ -4495,10 +4846,7 @@
 	return ret;
 }
 
-/*
- * Initialize reference clocks when the driver loads
- */
-void ironlake_init_pch_refclk(struct drm_device *dev)
+static void ironlake_init_pch_refclk(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_mode_config *mode_config = &dev->mode_config;
@@ -4612,6 +4960,182 @@
 	}
 }
 
+/* Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O. */
+static void lpt_init_pch_refclk(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct intel_encoder *encoder;
+	bool has_vga = false;
+	bool is_sdv = false;
+	u32 tmp;
+
+	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+		switch (encoder->type) {
+		case INTEL_OUTPUT_ANALOG:
+			has_vga = true;
+			break;
+		}
+	}
+
+	if (!has_vga)
+		return;
+
+	/* XXX: Rip out SDV support once Haswell ships for real. */
+	if (IS_HASWELL(dev) && (dev->pci_device & 0xFF00) == 0x0C00)
+		is_sdv = true;
+
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	tmp &= ~SBI_SSCCTL_DISABLE;
+	tmp |= SBI_SSCCTL_PATHALT;
+	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+	udelay(24);
+
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	tmp &= ~SBI_SSCCTL_PATHALT;
+	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+	if (!is_sdv) {
+		tmp = I915_READ(SOUTH_CHICKEN2);
+		tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
+		I915_WRITE(SOUTH_CHICKEN2, tmp);
+
+		if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
+				       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
+			DRM_ERROR("FDI mPHY reset assert timeout\n");
+
+		tmp = I915_READ(SOUTH_CHICKEN2);
+		tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
+		I915_WRITE(SOUTH_CHICKEN2, tmp);
+
+		if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
+				        FDI_MPHY_IOSFSB_RESET_STATUS) == 0,
+				       100))
+			DRM_ERROR("FDI mPHY reset de-assert timeout\n");
+	}
+
+	tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY);
+	tmp &= ~(0xFF << 24);
+	tmp |= (0x12 << 24);
+	intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY);
+
+	if (!is_sdv) {
+		tmp = intel_sbi_read(dev_priv, 0x808C, SBI_MPHY);
+		tmp &= ~(0x3 << 6);
+		tmp |= (1 << 6) | (1 << 0);
+		intel_sbi_write(dev_priv, 0x808C, tmp, SBI_MPHY);
+	}
+
+	if (is_sdv) {
+		tmp = intel_sbi_read(dev_priv, 0x800C, SBI_MPHY);
+		tmp |= 0x7FFF;
+		intel_sbi_write(dev_priv, 0x800C, tmp, SBI_MPHY);
+	}
+
+	tmp = intel_sbi_read(dev_priv, 0x2008, SBI_MPHY);
+	tmp |= (1 << 11);
+	intel_sbi_write(dev_priv, 0x2008, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x2108, SBI_MPHY);
+	tmp |= (1 << 11);
+	intel_sbi_write(dev_priv, 0x2108, tmp, SBI_MPHY);
+
+	if (is_sdv) {
+		tmp = intel_sbi_read(dev_priv, 0x2038, SBI_MPHY);
+		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
+		intel_sbi_write(dev_priv, 0x2038, tmp, SBI_MPHY);
+
+		tmp = intel_sbi_read(dev_priv, 0x2138, SBI_MPHY);
+		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
+		intel_sbi_write(dev_priv, 0x2138, tmp, SBI_MPHY);
+
+		tmp = intel_sbi_read(dev_priv, 0x203C, SBI_MPHY);
+		tmp |= (0x3F << 8);
+		intel_sbi_write(dev_priv, 0x203C, tmp, SBI_MPHY);
+
+		tmp = intel_sbi_read(dev_priv, 0x213C, SBI_MPHY);
+		tmp |= (0x3F << 8);
+		intel_sbi_write(dev_priv, 0x213C, tmp, SBI_MPHY);
+	}
+
+	tmp = intel_sbi_read(dev_priv, 0x206C, SBI_MPHY);
+	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
+	intel_sbi_write(dev_priv, 0x206C, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x216C, SBI_MPHY);
+	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
+	intel_sbi_write(dev_priv, 0x216C, tmp, SBI_MPHY);
+
+	if (!is_sdv) {
+		tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
+		tmp &= ~(7 << 13);
+		tmp |= (5 << 13);
+		intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
+
+		tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
+		tmp &= ~(7 << 13);
+		tmp |= (5 << 13);
+		intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
+	}
+
+	tmp = intel_sbi_read(dev_priv, 0x208C, SBI_MPHY);
+	tmp &= ~0xFF;
+	tmp |= 0x1C;
+	intel_sbi_write(dev_priv, 0x208C, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x218C, SBI_MPHY);
+	tmp &= ~0xFF;
+	tmp |= 0x1C;
+	intel_sbi_write(dev_priv, 0x218C, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x2098, SBI_MPHY);
+	tmp &= ~(0xFF << 16);
+	tmp |= (0x1C << 16);
+	intel_sbi_write(dev_priv, 0x2098, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x2198, SBI_MPHY);
+	tmp &= ~(0xFF << 16);
+	tmp |= (0x1C << 16);
+	intel_sbi_write(dev_priv, 0x2198, tmp, SBI_MPHY);
+
+	if (!is_sdv) {
+		tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
+		tmp |= (1 << 27);
+		intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
+
+		tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
+		tmp |= (1 << 27);
+		intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
+
+		tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
+		tmp &= ~(0xF << 28);
+		tmp |= (4 << 28);
+		intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
+
+		tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
+		tmp &= ~(0xF << 28);
+		tmp |= (4 << 28);
+		intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
+	}
+
+	/* ULT uses SBI_GEN0, but ULT doesn't have VGA, so we don't care. */
+	tmp = intel_sbi_read(dev_priv, SBI_DBUFF0, SBI_ICLK);
+	tmp |= SBI_DBUFF0_ENABLE;
+	intel_sbi_write(dev_priv, SBI_DBUFF0, tmp, SBI_ICLK);
+}
+
+/*
+ * Initialize reference clocks when the driver loads
+ */
+void intel_init_pch_refclk(struct drm_device *dev)
+{
+	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
+		ironlake_init_pch_refclk(dev);
+	else if (HAS_PCH_LPT(dev))
+		lpt_init_pch_refclk(dev);
+}
+
 static int ironlake_get_refclk(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -4668,8 +5192,8 @@
 		val |= PIPE_12BPC;
 		break;
 	default:
-		val |= PIPE_8BPC;
-		break;
+		/* Case prevented by intel_choose_pipe_bpp_dither. */
+		BUG();
 	}
 
 	val &= ~(PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_MASK);
@@ -4686,6 +5210,31 @@
 	POSTING_READ(PIPECONF(pipe));
 }
 
+static void haswell_set_pipeconf(struct drm_crtc *crtc,
+				 struct drm_display_mode *adjusted_mode,
+				 bool dither)
+{
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	uint32_t val;
+
+	val = I915_READ(PIPECONF(cpu_transcoder));
+
+	val &= ~(PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_MASK);
+	if (dither)
+		val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP);
+
+	val &= ~PIPECONF_INTERLACE_MASK_HSW;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
+		val |= PIPECONF_INTERLACED_ILK;
+	else
+		val |= PIPECONF_PROGRESSIVE;
+
+	I915_WRITE(PIPECONF(cpu_transcoder), val);
+	POSTING_READ(PIPECONF(cpu_transcoder));
+}
+
 static bool ironlake_compute_clocks(struct drm_crtc *crtc,
 				    struct drm_display_mode *adjusted_mode,
 				    intel_clock_t *clock,
@@ -4749,74 +5298,126 @@
 	return true;
 }
 
-static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
-				  struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode,
-				  int x, int y,
-				  struct drm_framebuffer *fb)
+static void cpt_enable_fdi_bc_bifurcation(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t temp;
+
+	temp = I915_READ(SOUTH_CHICKEN1);
+	if (temp & FDI_BC_BIFURCATION_SELECT)
+		return;
+
+	WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE);
+	WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE);
+
+	temp |= FDI_BC_BIFURCATION_SELECT;
+	DRM_DEBUG_KMS("enabling fdi C rx\n");
+	I915_WRITE(SOUTH_CHICKEN1, temp);
+	POSTING_READ(SOUTH_CHICKEN1);
+}
+
+static bool ironlake_check_fdi_lanes(struct intel_crtc *intel_crtc)
+{
+	struct drm_device *dev = intel_crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *pipe_B_crtc =
+		to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_B]);
+
+	DRM_DEBUG_KMS("checking fdi config on pipe %i, lanes %i\n",
+		      intel_crtc->pipe, intel_crtc->fdi_lanes);
+	if (intel_crtc->fdi_lanes > 4) {
+		DRM_DEBUG_KMS("invalid fdi lane config on pipe %i: %i lanes\n",
+			      intel_crtc->pipe, intel_crtc->fdi_lanes);
+		/* Clamp lanes to avoid programming the hw with bogus values. */
+		intel_crtc->fdi_lanes = 4;
+
+		return false;
+	}
+
+	if (dev_priv->num_pipe == 2)
+		return true;
+
+	switch (intel_crtc->pipe) {
+	case PIPE_A:
+		return true;
+	case PIPE_B:
+		if (dev_priv->pipe_to_crtc_mapping[PIPE_C]->enabled &&
+		    intel_crtc->fdi_lanes > 2) {
+			DRM_DEBUG_KMS("invalid shared fdi lane config on pipe %i: %i lanes\n",
+				      intel_crtc->pipe, intel_crtc->fdi_lanes);
+			/* Clamp lanes to avoid programming the hw with bogus values. */
+			intel_crtc->fdi_lanes = 2;
+
+			return false;
+		}
+
+		if (intel_crtc->fdi_lanes > 2)
+			WARN_ON(I915_READ(SOUTH_CHICKEN1) & FDI_BC_BIFURCATION_SELECT);
+		else
+			cpt_enable_fdi_bc_bifurcation(dev);
+
+		return true;
+	case PIPE_C:
+		if (!pipe_B_crtc->base.enabled || pipe_B_crtc->fdi_lanes <= 2) {
+			if (intel_crtc->fdi_lanes > 2) {
+				DRM_DEBUG_KMS("invalid shared fdi lane config on pipe %i: %i lanes\n",
+					      intel_crtc->pipe, intel_crtc->fdi_lanes);
+				/* Clamp lanes to avoid programming the hw with bogus values. */
+				intel_crtc->fdi_lanes = 2;
+
+				return false;
+			}
+		} else {
+			DRM_DEBUG_KMS("fdi link B uses too many lanes to enable link C\n");
+			return false;
+		}
+
+		cpt_enable_fdi_bc_bifurcation(dev);
+
+		return true;
+	default:
+		BUG();
+	}
+}
+
+int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp)
+{
+	/*
+	 * Account for spread spectrum to avoid
+	 * oversubscribing the link. Max center spread
+	 * is 2.5%; use 5% for safety's sake.
+	 */
+	u32 bps = target_clock * bpp * 21 / 20;
+	return bps / (link_bw * 8) + 1;
+}
+
+static void ironlake_set_m_n(struct drm_crtc *crtc,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
-	int num_connectors = 0;
-	intel_clock_t clock, reduced_clock;
-	u32 dpll, fp = 0, fp2 = 0;
-	bool ok, has_reduced_clock = false, is_sdvo = false;
-	bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false;
-	struct intel_encoder *encoder, *edp_encoder = NULL;
-	int ret;
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	struct intel_encoder *intel_encoder, *edp_encoder = NULL;
 	struct fdi_m_n m_n = {0};
-	u32 temp;
-	int target_clock, pixel_multiplier, lane, link_bw, factor;
-	unsigned int pipe_bpp;
-	bool dither;
-	bool is_cpu_edp = false, is_pch_edp = false;
+	int target_clock, pixel_multiplier, lane, link_bw;
+	bool is_dp = false, is_cpu_edp = false;
 
-	for_each_encoder_on_crtc(dev, crtc, encoder) {
-		switch (encoder->type) {
-		case INTEL_OUTPUT_LVDS:
-			is_lvds = true;
-			break;
-		case INTEL_OUTPUT_SDVO:
-		case INTEL_OUTPUT_HDMI:
-			is_sdvo = true;
-			if (encoder->needs_tv_clock)
-				is_tv = true;
-			break;
-		case INTEL_OUTPUT_TVOUT:
-			is_tv = true;
-			break;
-		case INTEL_OUTPUT_ANALOG:
-			is_crt = true;
-			break;
+	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
+		switch (intel_encoder->type) {
 		case INTEL_OUTPUT_DISPLAYPORT:
 			is_dp = true;
 			break;
 		case INTEL_OUTPUT_EDP:
 			is_dp = true;
-			if (intel_encoder_is_pch_edp(&encoder->base))
-				is_pch_edp = true;
-			else
+			if (!intel_encoder_is_pch_edp(&intel_encoder->base))
 				is_cpu_edp = true;
-			edp_encoder = encoder;
+			edp_encoder = intel_encoder;
 			break;
 		}
-
-		num_connectors++;
 	}
 
-	ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock,
-				     &has_reduced_clock, &reduced_clock);
-	if (!ok) {
-		DRM_ERROR("Couldn't find PLL settings for mode!\n");
-		return -EINVAL;
-	}
-
-	/* Ensure that the cursor is valid for the new mode before changing... */
-	intel_crtc_update_cursor(crtc, true);
-
 	/* FDI link */
 	pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
 	lane = 0;
@@ -4843,29 +5444,9 @@
 	else
 		target_clock = adjusted_mode->clock;
 
-	/* determine panel color depth */
-	dither = intel_choose_pipe_bpp_dither(crtc, fb, &pipe_bpp,
-					      adjusted_mode);
-	if (is_lvds && dev_priv->lvds_dither)
-		dither = true;
-
-	if (pipe_bpp != 18 && pipe_bpp != 24 && pipe_bpp != 30 &&
-	    pipe_bpp != 36) {
-		WARN(1, "intel_choose_pipe_bpp returned invalid value %d\n",
-		     pipe_bpp);
-		pipe_bpp = 24;
-	}
-	intel_crtc->bpp = pipe_bpp;
-
-	if (!lane) {
-		/*
-		 * Account for spread spectrum to avoid
-		 * oversubscribing the link. Max center spread
-		 * is 2.5%; use 5% for safety's sake.
-		 */
-		u32 bps = target_clock * intel_crtc->bpp * 21 / 20;
-		lane = bps / (link_bw * 8) + 1;
-	}
+	if (!lane)
+		lane = ironlake_get_lanes_required(target_clock, link_bw,
+						   intel_crtc->bpp);
 
 	intel_crtc->fdi_lanes = lane;
 
@@ -4874,10 +5455,51 @@
 	ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw,
 			     &m_n);
 
-	fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
-	if (has_reduced_clock)
-		fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
-			reduced_clock.m2;
+	I915_WRITE(PIPE_DATA_M1(cpu_transcoder), TU_SIZE(m_n.tu) | m_n.gmch_m);
+	I915_WRITE(PIPE_DATA_N1(cpu_transcoder), m_n.gmch_n);
+	I915_WRITE(PIPE_LINK_M1(cpu_transcoder), m_n.link_m);
+	I915_WRITE(PIPE_LINK_N1(cpu_transcoder), m_n.link_n);
+}
+
+static uint32_t ironlake_compute_dpll(struct intel_crtc *intel_crtc,
+				      struct drm_display_mode *adjusted_mode,
+				      intel_clock_t *clock, u32 fp)
+{
+	struct drm_crtc *crtc = &intel_crtc->base;
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_encoder *intel_encoder;
+	uint32_t dpll;
+	int factor, pixel_multiplier, num_connectors = 0;
+	bool is_lvds = false, is_sdvo = false, is_tv = false;
+	bool is_dp = false, is_cpu_edp = false;
+
+	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
+		switch (intel_encoder->type) {
+		case INTEL_OUTPUT_LVDS:
+			is_lvds = true;
+			break;
+		case INTEL_OUTPUT_SDVO:
+		case INTEL_OUTPUT_HDMI:
+			is_sdvo = true;
+			if (intel_encoder->needs_tv_clock)
+				is_tv = true;
+			break;
+		case INTEL_OUTPUT_TVOUT:
+			is_tv = true;
+			break;
+		case INTEL_OUTPUT_DISPLAYPORT:
+			is_dp = true;
+			break;
+		case INTEL_OUTPUT_EDP:
+			is_dp = true;
+			if (!intel_encoder_is_pch_edp(&intel_encoder->base))
+				is_cpu_edp = true;
+			break;
+		}
+
+		num_connectors++;
+	}
 
 	/* Enable autotuning of the PLL clock (if permissible) */
 	factor = 21;
@@ -4889,7 +5511,7 @@
 	} else if (is_sdvo && is_tv)
 		factor = 20;
 
-	if (clock.m < factor * clock.n)
+	if (clock->m < factor * clock->n)
 		fp |= FP_CB_TUNE;
 
 	dpll = 0;
@@ -4899,7 +5521,7 @@
 	else
 		dpll |= DPLLB_MODE_DAC_SERIAL;
 	if (is_sdvo) {
-		int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
+		pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
 		if (pixel_multiplier > 1) {
 			dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 		}
@@ -4909,11 +5531,11 @@
 		dpll |= DPLL_DVO_HIGH_SPEED;
 
 	/* compute bitmask from p1 value */
-	dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+	dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
 	/* also FPA1 */
-	dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
+	dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 
-	switch (clock.p2) {
+	switch (clock->p2) {
 	case 5:
 		dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5;
 		break;
@@ -4939,15 +5561,79 @@
 	else
 		dpll |= PLL_REF_INPUT_DREFCLK;
 
+	return dpll;
+}
+
+static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode,
+				  int x, int y,
+				  struct drm_framebuffer *fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+	int num_connectors = 0;
+	intel_clock_t clock, reduced_clock;
+	u32 dpll, fp = 0, fp2 = 0;
+	bool ok, has_reduced_clock = false;
+	bool is_lvds = false, is_dp = false, is_cpu_edp = false;
+	struct intel_encoder *encoder;
+	u32 temp;
+	int ret;
+	bool dither, fdi_config_ok;
+
+	for_each_encoder_on_crtc(dev, crtc, encoder) {
+		switch (encoder->type) {
+		case INTEL_OUTPUT_LVDS:
+			is_lvds = true;
+			break;
+		case INTEL_OUTPUT_DISPLAYPORT:
+			is_dp = true;
+			break;
+		case INTEL_OUTPUT_EDP:
+			is_dp = true;
+			if (!intel_encoder_is_pch_edp(&encoder->base))
+				is_cpu_edp = true;
+			break;
+		}
+
+		num_connectors++;
+	}
+
+	WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)),
+	     "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev));
+
+	ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock,
+				     &has_reduced_clock, &reduced_clock);
+	if (!ok) {
+		DRM_ERROR("Couldn't find PLL settings for mode!\n");
+		return -EINVAL;
+	}
+
+	/* Ensure that the cursor is valid for the new mode before changing... */
+	intel_crtc_update_cursor(crtc, true);
+
+	/* determine panel color depth */
+	dither = intel_choose_pipe_bpp_dither(crtc, fb, &intel_crtc->bpp,
+					      adjusted_mode);
+	if (is_lvds && dev_priv->lvds_dither)
+		dither = true;
+
+	fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
+	if (has_reduced_clock)
+		fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
+			reduced_clock.m2;
+
+	dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock, fp);
+
 	DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe);
 	drm_mode_debug_printmodeline(mode);
 
-	/* CPU eDP is the only output that doesn't need a PCH PLL of its own on
-	 * pre-Haswell/LPT generation */
-	if (HAS_PCH_LPT(dev)) {
-		DRM_DEBUG_KMS("LPT detected: no PLL for pipe %d necessary\n",
-				pipe);
-	} else if (!is_cpu_edp) {
+	/* CPU eDP is the only output that doesn't need a PCH PLL of its own. */
+	if (!is_cpu_edp) {
 		struct intel_pch_pll *pll;
 
 		pll = intel_get_pch_pll(intel_crtc, dpll, fp);
@@ -5033,47 +5719,13 @@
 		}
 	}
 
-	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) {
-		/* the chip adds 2 halflines automatically */
-		adjusted_mode->crtc_vtotal -= 1;
-		adjusted_mode->crtc_vblank_end -= 1;
-		I915_WRITE(VSYNCSHIFT(pipe),
-			   adjusted_mode->crtc_hsync_start
-			   - adjusted_mode->crtc_htotal/2);
-	} else {
-		I915_WRITE(VSYNCSHIFT(pipe), 0);
-	}
+	intel_set_pipe_timings(intel_crtc, mode, adjusted_mode);
 
-	I915_WRITE(HTOTAL(pipe),
-		   (adjusted_mode->crtc_hdisplay - 1) |
-		   ((adjusted_mode->crtc_htotal - 1) << 16));
-	I915_WRITE(HBLANK(pipe),
-		   (adjusted_mode->crtc_hblank_start - 1) |
-		   ((adjusted_mode->crtc_hblank_end - 1) << 16));
-	I915_WRITE(HSYNC(pipe),
-		   (adjusted_mode->crtc_hsync_start - 1) |
-		   ((adjusted_mode->crtc_hsync_end - 1) << 16));
+	/* Note, this also computes intel_crtc->fdi_lanes which is used below in
+	 * ironlake_check_fdi_lanes. */
+	ironlake_set_m_n(crtc, mode, adjusted_mode);
 
-	I915_WRITE(VTOTAL(pipe),
-		   (adjusted_mode->crtc_vdisplay - 1) |
-		   ((adjusted_mode->crtc_vtotal - 1) << 16));
-	I915_WRITE(VBLANK(pipe),
-		   (adjusted_mode->crtc_vblank_start - 1) |
-		   ((adjusted_mode->crtc_vblank_end - 1) << 16));
-	I915_WRITE(VSYNC(pipe),
-		   (adjusted_mode->crtc_vsync_start - 1) |
-		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
-
-	/* pipesrc controls the size that is scaled from, which should
-	 * always be the user's requested size.
-	 */
-	I915_WRITE(PIPESRC(pipe),
-		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
-
-	I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
-	I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
-	I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
-	I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
+	fdi_config_ok = ironlake_check_fdi_lanes(intel_crtc);
 
 	if (is_cpu_edp)
 		ironlake_set_pll_edp(crtc, adjusted_mode->clock);
@@ -5092,6 +5744,217 @@
 
 	intel_update_linetime_watermarks(dev, pipe, adjusted_mode);
 
+	return fdi_config_ok ? ret : -EINVAL;
+}
+
+static int haswell_crtc_mode_set(struct drm_crtc *crtc,
+				 struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode,
+				 int x, int y,
+				 struct drm_framebuffer *fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+	int num_connectors = 0;
+	intel_clock_t clock, reduced_clock;
+	u32 dpll = 0, fp = 0, fp2 = 0;
+	bool ok, has_reduced_clock = false;
+	bool is_lvds = false, is_dp = false, is_cpu_edp = false;
+	struct intel_encoder *encoder;
+	u32 temp;
+	int ret;
+	bool dither;
+
+	for_each_encoder_on_crtc(dev, crtc, encoder) {
+		switch (encoder->type) {
+		case INTEL_OUTPUT_LVDS:
+			is_lvds = true;
+			break;
+		case INTEL_OUTPUT_DISPLAYPORT:
+			is_dp = true;
+			break;
+		case INTEL_OUTPUT_EDP:
+			is_dp = true;
+			if (!intel_encoder_is_pch_edp(&encoder->base))
+				is_cpu_edp = true;
+			break;
+		}
+
+		num_connectors++;
+	}
+
+	if (is_cpu_edp)
+		intel_crtc->cpu_transcoder = TRANSCODER_EDP;
+	else
+		intel_crtc->cpu_transcoder = pipe;
+
+	/* We are not sure yet this won't happen. */
+	WARN(!HAS_PCH_LPT(dev), "Unexpected PCH type %d\n",
+	     INTEL_PCH_TYPE(dev));
+
+	WARN(num_connectors != 1, "%d connectors attached to pipe %c\n",
+	     num_connectors, pipe_name(pipe));
+
+	WARN_ON(I915_READ(PIPECONF(intel_crtc->cpu_transcoder)) &
+		(PIPECONF_ENABLE | I965_PIPECONF_ACTIVE));
+
+	WARN_ON(I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE);
+
+	if (!intel_ddi_pll_mode_set(crtc, adjusted_mode->clock))
+		return -EINVAL;
+
+	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) {
+		ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock,
+					     &has_reduced_clock,
+					     &reduced_clock);
+		if (!ok) {
+			DRM_ERROR("Couldn't find PLL settings for mode!\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Ensure that the cursor is valid for the new mode before changing... */
+	intel_crtc_update_cursor(crtc, true);
+
+	/* determine panel color depth */
+	dither = intel_choose_pipe_bpp_dither(crtc, fb, &intel_crtc->bpp,
+					      adjusted_mode);
+	if (is_lvds && dev_priv->lvds_dither)
+		dither = true;
+
+	DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe);
+	drm_mode_debug_printmodeline(mode);
+
+	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) {
+		fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
+		if (has_reduced_clock)
+			fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
+			      reduced_clock.m2;
+
+		dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock,
+					     fp);
+
+		/* CPU eDP is the only output that doesn't need a PCH PLL of its
+		 * own on pre-Haswell/LPT generation */
+		if (!is_cpu_edp) {
+			struct intel_pch_pll *pll;
+
+			pll = intel_get_pch_pll(intel_crtc, dpll, fp);
+			if (pll == NULL) {
+				DRM_DEBUG_DRIVER("failed to find PLL for pipe %d\n",
+						 pipe);
+				return -EINVAL;
+			}
+		} else
+			intel_put_pch_pll(intel_crtc);
+
+		/* The LVDS pin pair needs to be on before the DPLLs are
+		 * enabled.  This is an exception to the general rule that
+		 * mode_set doesn't turn things on.
+		 */
+		if (is_lvds) {
+			temp = I915_READ(PCH_LVDS);
+			temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
+			if (HAS_PCH_CPT(dev)) {
+				temp &= ~PORT_TRANS_SEL_MASK;
+				temp |= PORT_TRANS_SEL_CPT(pipe);
+			} else {
+				if (pipe == 1)
+					temp |= LVDS_PIPEB_SELECT;
+				else
+					temp &= ~LVDS_PIPEB_SELECT;
+			}
+
+			/* set the corresponsding LVDS_BORDER bit */
+			temp |= dev_priv->lvds_border_bits;
+			/* Set the B0-B3 data pairs corresponding to whether
+			 * we're going to set the DPLLs for dual-channel mode or
+			 * not.
+			 */
+			if (clock.p2 == 7)
+				temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP;
+			else
+				temp &= ~(LVDS_B0B3_POWER_UP |
+					  LVDS_CLKB_POWER_UP);
+
+			/* It would be nice to set 24 vs 18-bit mode
+			 * (LVDS_A3_POWER_UP) appropriately here, but we need to
+			 * look more thoroughly into how panels behave in the
+			 * two modes.
+			 */
+			temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY);
+			if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
+				temp |= LVDS_HSYNC_POLARITY;
+			if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
+				temp |= LVDS_VSYNC_POLARITY;
+			I915_WRITE(PCH_LVDS, temp);
+		}
+	}
+
+	if (is_dp && !is_cpu_edp) {
+		intel_dp_set_m_n(crtc, mode, adjusted_mode);
+	} else {
+		if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) {
+			/* For non-DP output, clear any trans DP clock recovery
+			 * setting.*/
+			I915_WRITE(TRANSDATA_M1(pipe), 0);
+			I915_WRITE(TRANSDATA_N1(pipe), 0);
+			I915_WRITE(TRANSDPLINK_M1(pipe), 0);
+			I915_WRITE(TRANSDPLINK_N1(pipe), 0);
+		}
+	}
+
+	intel_crtc->lowfreq_avail = false;
+	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) {
+		if (intel_crtc->pch_pll) {
+			I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll);
+
+			/* Wait for the clocks to stabilize. */
+			POSTING_READ(intel_crtc->pch_pll->pll_reg);
+			udelay(150);
+
+			/* The pixel multiplier can only be updated once the
+			 * DPLL is enabled and the clocks are stable.
+			 *
+			 * So write it again.
+			 */
+			I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll);
+		}
+
+		if (intel_crtc->pch_pll) {
+			if (is_lvds && has_reduced_clock && i915_powersave) {
+				I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp2);
+				intel_crtc->lowfreq_avail = true;
+			} else {
+				I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp);
+			}
+		}
+	}
+
+	intel_set_pipe_timings(intel_crtc, mode, adjusted_mode);
+
+	if (!is_dp || is_cpu_edp)
+		ironlake_set_m_n(crtc, mode, adjusted_mode);
+
+	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
+		if (is_cpu_edp)
+			ironlake_set_pll_edp(crtc, adjusted_mode->clock);
+
+	haswell_set_pipeconf(crtc, adjusted_mode, dither);
+
+	/* Set up the display plane register */
+	I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE);
+	POSTING_READ(DSPCNTR(plane));
+
+	ret = intel_pipe_set_base(crtc, x, y, fb);
+
+	intel_update_watermarks(dev);
+
+	intel_update_linetime_watermarks(dev, pipe, adjusted_mode);
+
 	return ret;
 }
 
@@ -5103,6 +5966,8 @@
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
 	int ret;
@@ -5113,7 +5978,19 @@
 					      x, y, fb);
 	drm_vblank_post_modeset(dev, pipe);
 
-	return ret;
+	if (ret != 0)
+		return ret;
+
+	for_each_encoder_on_crtc(dev, crtc, encoder) {
+		DRM_DEBUG_KMS("[ENCODER:%d:%s] set [MODE:%d:%s]\n",
+			encoder->base.base.id,
+			drm_get_encoder_name(&encoder->base),
+			mode->base.id, mode->name);
+		encoder_funcs = encoder->base.helper_private;
+		encoder_funcs->mode_set(&encoder->base, mode, adjusted_mode);
+	}
+
+	return 0;
 }
 
 static bool intel_eld_uptodate(struct drm_connector *connector,
@@ -5749,7 +6626,7 @@
 				  int depth, int bpp)
 {
 	struct drm_i915_gem_object *obj;
-	struct drm_mode_fb_cmd2 mode_cmd;
+	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
 
 	obj = i915_gem_alloc_object(dev,
 				    intel_framebuffer_size_for_mode(mode, bpp));
@@ -5879,24 +6756,19 @@
 		DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
 	if (IS_ERR(fb)) {
 		DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
-		goto fail;
+		return false;
 	}
 
 	if (!intel_set_mode(crtc, mode, 0, 0, fb)) {
 		DRM_DEBUG_KMS("failed to set mode on load-detect pipe\n");
 		if (old->release_fb)
 			old->release_fb->funcs->destroy(old->release_fb);
-		goto fail;
+		return false;
 	}
 
 	/* let the connector get through one full cycle before testing */
 	intel_wait_for_vblank(dev, intel_crtc->pipe);
-
 	return true;
-fail:
-	connector->encoder = NULL;
-	encoder->crtc = NULL;
-	return false;
 }
 
 void intel_release_load_detect_pipe(struct drm_connector *connector,
@@ -6021,12 +6893,12 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
 	struct drm_display_mode *mode;
-	int htot = I915_READ(HTOTAL(pipe));
-	int hsync = I915_READ(HSYNC(pipe));
-	int vtot = I915_READ(VTOTAL(pipe));
-	int vsync = I915_READ(VSYNC(pipe));
+	int htot = I915_READ(HTOTAL(cpu_transcoder));
+	int hsync = I915_READ(HSYNC(cpu_transcoder));
+	int vtot = I915_READ(VTOTAL(cpu_transcoder));
+	int vsync = I915_READ(VSYNC(cpu_transcoder));
 
 	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
 	if (!mode)
@@ -6183,14 +7055,19 @@
 {
 	struct intel_unpin_work *work =
 		container_of(__work, struct intel_unpin_work, work);
+	struct drm_device *dev = work->crtc->dev;
 
-	mutex_lock(&work->dev->struct_mutex);
+	mutex_lock(&dev->struct_mutex);
 	intel_unpin_fb_obj(work->old_fb_obj);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 	drm_gem_object_unreference(&work->old_fb_obj->base);
 
-	intel_update_fbc(work->dev);
-	mutex_unlock(&work->dev->struct_mutex);
+	intel_update_fbc(dev);
+	mutex_unlock(&dev->struct_mutex);
+
+	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
+	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
+
 	kfree(work);
 }
 
@@ -6201,8 +7078,6 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_unpin_work *work;
 	struct drm_i915_gem_object *obj;
-	struct drm_pending_vblank_event *e;
-	struct timeval tvbl;
 	unsigned long flags;
 
 	/* Ignore early vblank irqs */
@@ -6211,24 +7086,22 @@
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
-	if (work == NULL || !work->pending) {
+
+	/* Ensure we don't miss a work->pending update ... */
+	smp_rmb();
+
+	if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
 	}
 
+	/* and that the unpin work is consistent wrt ->pending. */
+	smp_rmb();
+
 	intel_crtc->unpin_work = NULL;
 
-	if (work->event) {
-		e = work->event;
-		e->event.sequence = drm_vblank_count_and_time(dev, intel_crtc->pipe, &tvbl);
-
-		e->event.tv_sec = tvbl.tv_sec;
-		e->event.tv_usec = tvbl.tv_usec;
-
-		list_add_tail(&e->base.link,
-			      &e->base.file_priv->event_list);
-		wake_up_interruptible(&e->base.file_priv->event_wait);
-	}
+	if (work->event)
+		drm_send_vblank_event(dev, intel_crtc->pipe, work->event);
 
 	drm_vblank_put(dev, intel_crtc->pipe);
 
@@ -6238,9 +7111,9 @@
 
 	atomic_clear_mask(1 << intel_crtc->plane,
 			  &obj->pending_flip.counter);
-
 	wake_up(&dev_priv->pending_flip_queue);
-	schedule_work(&work->work);
+
+	queue_work(dev_priv->wq, &work->work);
 
 	trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
 }
@@ -6268,16 +7141,25 @@
 		to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]);
 	unsigned long flags;
 
+	/* NB: An MMIO update of the plane base pointer will also
+	 * generate a page-flip completion irq, i.e. every modeset
+	 * is also accompanied by a spurious intel_prepare_page_flip().
+	 */
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (intel_crtc->unpin_work) {
-		if ((++intel_crtc->unpin_work->pending) > 1)
-			DRM_ERROR("Prepared flip multiple times\n");
-	} else {
-		DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n");
-	}
+	if (intel_crtc->unpin_work)
+		atomic_inc_not_zero(&intel_crtc->unpin_work->pending);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
+inline static void intel_mark_page_flip_active(struct intel_crtc *intel_crtc)
+{
+	/* Ensure that the work item is consistent when activating it ... */
+	smp_wmb();
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
+	/* and that it is marked active as soon as the irq could fire. */
+	smp_wmb();
+}
+
 static int intel_gen2_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -6311,6 +7193,8 @@
 	intel_ring_emit(ring, fb->pitches[0]);
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -6351,6 +7235,7 @@
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, MI_NOOP);
 
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -6397,6 +7282,8 @@
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -6439,6 +7326,8 @@
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -6493,6 +7382,8 @@
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, (MI_NOOP));
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -6541,7 +7432,7 @@
 		return -ENOMEM;
 
 	work->event = event;
-	work->dev = crtc->dev;
+	work->crtc = crtc;
 	intel_fb = to_intel_framebuffer(crtc->fb);
 	work->old_fb_obj = intel_fb->obj;
 	INIT_WORK(&work->work, intel_unpin_work_fn);
@@ -6566,6 +7457,9 @@
 	intel_fb = to_intel_framebuffer(fb);
 	obj = intel_fb->obj;
 
+	if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
+		flush_workqueue(dev_priv->wq);
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		goto cleanup;
@@ -6584,6 +7478,7 @@
 	 * the flip occurs and the object is no longer visible.
 	 */
 	atomic_add(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
+	atomic_inc(&intel_crtc->unpin_work_count);
 
 	ret = dev_priv->display.queue_flip(dev, crtc, fb, obj);
 	if (ret)
@@ -6598,6 +7493,7 @@
 	return 0;
 
 cleanup_pending:
+	atomic_dec(&intel_crtc->unpin_work_count);
 	atomic_sub(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
 	drm_gem_object_unreference(&work->old_fb_obj->base);
 	drm_gem_object_unreference(&obj->base);
@@ -6893,7 +7789,7 @@
 				dev->mode_config.dpms_property;
 
 			connector->dpms = DRM_MODE_DPMS_ON;
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 dpms_property,
 							 DRM_MODE_DPMS_ON);
 
@@ -7015,8 +7911,6 @@
 	struct drm_device *dev = crtc->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_display_mode *adjusted_mode, saved_mode, saved_hwmode;
-	struct drm_encoder_helper_funcs *encoder_funcs;
-	struct drm_encoder *encoder;
 	struct intel_crtc *intel_crtc;
 	unsigned disable_pipes, prepare_pipes, modeset_pipes;
 	bool ret = true;
@@ -7061,6 +7955,9 @@
 	 * update the the output configuration. */
 	intel_modeset_update_state(dev, prepare_pipes);
 
+	if (dev_priv->display.modeset_global_resources)
+		dev_priv->display.modeset_global_resources(dev);
+
 	/* Set up the DPLL and any encoders state that needs to adjust or depend
 	 * on the DPLL.
 	 */
@@ -7070,18 +7967,6 @@
 					   x, y, fb);
 		if (!ret)
 		    goto done;
-
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-
-			if (encoder->crtc != &intel_crtc->base)
-				continue;
-
-			DRM_DEBUG_KMS("[ENCODER:%d:%s] set [MODE:%d:%s]\n",
-				encoder->base.id, drm_get_encoder_name(encoder),
-				mode->base.id, mode->name);
-			encoder_funcs = encoder->helper_private;
-			encoder_funcs->mode_set(encoder, mode, adjusted_mode);
-		}
 	}
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
@@ -7420,6 +8305,12 @@
 	.page_flip = intel_crtc_page_flip,
 };
 
+static void intel_cpu_pll_init(struct drm_device *dev)
+{
+	if (IS_HASWELL(dev))
+		intel_ddi_pll_init(dev);
+}
+
 static void intel_pch_pll_init(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -7459,6 +8350,7 @@
 	/* Swap pipes & planes for FBC on pre-965 */
 	intel_crtc->pipe = pipe;
 	intel_crtc->plane = pipe;
+	intel_crtc->cpu_transcoder = pipe;
 	if (IS_MOBILE(dev) && IS_GEN3(dev)) {
 		DRM_DEBUG_KMS("swapping pipes & planes for FBC\n");
 		intel_crtc->plane = !pipe;
@@ -7551,17 +8443,9 @@
 		I915_WRITE(PFIT_CONTROL, 0);
 	}
 
-	if (HAS_PCH_SPLIT(dev)) {
-		dpd_is_edp = intel_dpd_is_edp(dev);
-
-		if (has_edp_a(dev))
-			intel_dp_init(dev, DP_A, PORT_A);
-
-		if (dpd_is_edp && (I915_READ(PCH_DP_D) & DP_DETECTED))
-			intel_dp_init(dev, PCH_DP_D, PORT_D);
-	}
-
-	intel_crt_init(dev);
+	if (!(IS_HASWELL(dev) &&
+	      (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES)))
+		intel_crt_init(dev);
 
 	if (IS_HASWELL(dev)) {
 		int found;
@@ -7584,6 +8468,10 @@
 			intel_ddi_init(dev, PORT_D);
 	} else if (HAS_PCH_SPLIT(dev)) {
 		int found;
+		dpd_is_edp = intel_dpd_is_edp(dev);
+
+		if (has_edp_a(dev))
+			intel_dp_init(dev, DP_A, PORT_A);
 
 		if (I915_READ(HDMIB) & PORT_DETECTED) {
 			/* PCH SDVOB multiplex with HDMIB */
@@ -7603,11 +8491,15 @@
 		if (I915_READ(PCH_DP_C) & DP_DETECTED)
 			intel_dp_init(dev, PCH_DP_C, PORT_C);
 
-		if (!dpd_is_edp && (I915_READ(PCH_DP_D) & DP_DETECTED))
+		if (I915_READ(PCH_DP_D) & DP_DETECTED)
 			intel_dp_init(dev, PCH_DP_D, PORT_D);
 	} else if (IS_VALLEYVIEW(dev)) {
 		int found;
 
+		/* Check for built-in panel first. Shares lanes with HDMI on SDVOC */
+		if (I915_READ(DP_C) & DP_DETECTED)
+			intel_dp_init(dev, DP_C, PORT_C);
+
 		if (I915_READ(SDVOB) & PORT_DETECTED) {
 			/* SDVOB multiplex with HDMIB */
 			found = intel_sdvo_init(dev, SDVOB, true);
@@ -7620,9 +8512,6 @@
 		if (I915_READ(SDVOC) & PORT_DETECTED)
 			intel_hdmi_init(dev, SDVOC, PORT_C);
 
-		/* Shares lanes with HDMI on SDVOC */
-		if (I915_READ(DP_C) & DP_DETECTED)
-			intel_dp_init(dev, DP_C, PORT_C);
 	} else if (SUPPORTS_DIGITAL_OUTPUTS(dev)) {
 		bool found = false;
 
@@ -7676,8 +8565,9 @@
 			intel_encoder_clones(encoder);
 	}
 
-	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
-		ironlake_init_pch_refclk(dev);
+	intel_init_pch_refclk(dev);
+
+	drm_helper_move_panel_connectors_to_head(dev);
 }
 
 static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
@@ -7718,27 +8608,51 @@
 	if (mode_cmd->pitches[0] & 63)
 		return -EINVAL;
 
+	/* FIXME <= Gen4 stride limits are bit unclear */
+	if (mode_cmd->pitches[0] > 32768)
+		return -EINVAL;
+
+	if (obj->tiling_mode != I915_TILING_NONE &&
+	    mode_cmd->pitches[0] != obj->stride)
+		return -EINVAL;
+
+	/* Reject formats not supported by any plane early. */
 	switch (mode_cmd->pixel_format) {
-	case DRM_FORMAT_RGB332:
+	case DRM_FORMAT_C8:
 	case DRM_FORMAT_RGB565:
 	case DRM_FORMAT_XRGB8888:
-	case DRM_FORMAT_XBGR8888:
 	case DRM_FORMAT_ARGB8888:
+		break;
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_ARGB1555:
+		if (INTEL_INFO(dev)->gen > 3)
+			return -EINVAL;
+		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
 	case DRM_FORMAT_XRGB2101010:
 	case DRM_FORMAT_ARGB2101010:
-		/* RGB formats are common across chipsets */
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ABGR2101010:
+		if (INTEL_INFO(dev)->gen < 4)
+			return -EINVAL;
 		break;
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_VYUY:
+		if (INTEL_INFO(dev)->gen < 6)
+			return -EINVAL;
 		break;
 	default:
-		DRM_DEBUG_KMS("unsupported pixel format %u\n",
-				mode_cmd->pixel_format);
+		DRM_DEBUG_KMS("unsupported pixel format 0x%08x\n", mode_cmd->pixel_format);
 		return -EINVAL;
 	}
 
+	/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
+	if (mode_cmd->offsets[0] != 0)
+		return -EINVAL;
+
 	ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
 	if (ret) {
 		DRM_ERROR("framebuffer init failed %d\n", ret);
@@ -7776,7 +8690,13 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* We always want a DPMS function */
-	if (HAS_PCH_SPLIT(dev)) {
+	if (IS_HASWELL(dev)) {
+		dev_priv->display.crtc_mode_set = haswell_crtc_mode_set;
+		dev_priv->display.crtc_enable = haswell_crtc_enable;
+		dev_priv->display.crtc_disable = haswell_crtc_disable;
+		dev_priv->display.off = haswell_crtc_off;
+		dev_priv->display.update_plane = ironlake_update_plane;
+	} else if (HAS_PCH_SPLIT(dev)) {
 		dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set;
 		dev_priv->display.crtc_enable = ironlake_crtc_enable;
 		dev_priv->display.crtc_disable = ironlake_crtc_disable;
@@ -7827,6 +8747,8 @@
 			/* FIXME: detect B0+ stepping and use auto training */
 			dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
 			dev_priv->display.write_eld = ironlake_write_eld;
+			dev_priv->display.modeset_global_resources =
+				ivb_modeset_global_resources;
 		} else if (IS_HASWELL(dev)) {
 			dev_priv->display.fdi_link_train = hsw_fdi_link_train;
 			dev_priv->display.write_eld = haswell_write_eld;
@@ -8058,6 +8980,7 @@
 			DRM_DEBUG_KMS("plane %d init failed: %d\n", i, ret);
 	}
 
+	intel_cpu_pll_init(dev);
 	intel_pch_pll_init(dev);
 
 	/* Just disable it once at startup */
@@ -8127,7 +9050,7 @@
 	u32 reg;
 
 	/* Clear any frame start delays used for debugging left by the BIOS */
-	reg = PIPECONF(crtc->pipe);
+	reg = PIPECONF(crtc->cpu_transcoder);
 	I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
 
 	/* We need to sanitize the plane -> pipe mapping first because this will
@@ -8246,7 +9169,8 @@
 
 /* Scan out the current hw modeset state, sanitizes it and maps it into the drm
  * and i915 state tracking structures. */
-void intel_modeset_setup_hw_state(struct drm_device *dev)
+void intel_modeset_setup_hw_state(struct drm_device *dev,
+				  bool force_restore)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe;
@@ -8255,10 +9179,35 @@
 	struct intel_encoder *encoder;
 	struct intel_connector *connector;
 
+	if (IS_HASWELL(dev)) {
+		tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
+
+		if (tmp & TRANS_DDI_FUNC_ENABLE) {
+			switch (tmp & TRANS_DDI_EDP_INPUT_MASK) {
+			case TRANS_DDI_EDP_INPUT_A_ON:
+			case TRANS_DDI_EDP_INPUT_A_ONOFF:
+				pipe = PIPE_A;
+				break;
+			case TRANS_DDI_EDP_INPUT_B_ONOFF:
+				pipe = PIPE_B;
+				break;
+			case TRANS_DDI_EDP_INPUT_C_ONOFF:
+				pipe = PIPE_C;
+				break;
+			}
+
+			crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
+			crtc->cpu_transcoder = TRANSCODER_EDP;
+
+			DRM_DEBUG_KMS("Pipe %c using transcoder EDP\n",
+				      pipe_name(pipe));
+		}
+	}
+
 	for_each_pipe(pipe) {
 		crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
 
-		tmp = I915_READ(PIPECONF(pipe));
+		tmp = I915_READ(PIPECONF(crtc->cpu_transcoder));
 		if (tmp & PIPECONF_ENABLE)
 			crtc->active = true;
 		else
@@ -8271,6 +9220,9 @@
 			      crtc->active ? "enabled" : "disabled");
 	}
 
+	if (IS_HASWELL(dev))
+		intel_ddi_setup_hw_pll_state(dev);
+
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
 			    base.head) {
 		pipe = 0;
@@ -8317,9 +9269,19 @@
 		intel_sanitize_crtc(crtc);
 	}
 
-	intel_modeset_update_staged_output_state(dev);
+	if (force_restore) {
+		for_each_pipe(pipe) {
+			crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
+			intel_set_mode(&crtc->base, &crtc->base.mode,
+				       crtc->base.x, crtc->base.y, crtc->base.fb);
+		}
+	} else {
+		intel_modeset_update_staged_output_state(dev);
+	}
 
 	intel_modeset_check_state(dev);
+
+	drm_mode_config_reset(dev);
 }
 
 void intel_modeset_gem_init(struct drm_device *dev)
@@ -8328,7 +9290,7 @@
 
 	intel_setup_overlay(dev);
 
-	intel_modeset_setup_hw_state(dev);
+	intel_modeset_setup_hw_state(dev, false);
 }
 
 void intel_modeset_cleanup(struct drm_device *dev)
@@ -8447,6 +9409,7 @@
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_display_error_state *error;
+	enum transcoder cpu_transcoder;
 	int i;
 
 	error = kmalloc(sizeof(*error), GFP_ATOMIC);
@@ -8454,6 +9417,8 @@
 		return NULL;
 
 	for_each_pipe(i) {
+		cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, i);
+
 		error->cursor[i].control = I915_READ(CURCNTR(i));
 		error->cursor[i].position = I915_READ(CURPOS(i));
 		error->cursor[i].base = I915_READ(CURBASE(i));
@@ -8468,14 +9433,14 @@
 			error->plane[i].tile_offset = I915_READ(DSPTILEOFF(i));
 		}
 
-		error->pipe[i].conf = I915_READ(PIPECONF(i));
+		error->pipe[i].conf = I915_READ(PIPECONF(cpu_transcoder));
 		error->pipe[i].source = I915_READ(PIPESRC(i));
-		error->pipe[i].htotal = I915_READ(HTOTAL(i));
-		error->pipe[i].hblank = I915_READ(HBLANK(i));
-		error->pipe[i].hsync = I915_READ(HSYNC(i));
-		error->pipe[i].vtotal = I915_READ(VTOTAL(i));
-		error->pipe[i].vblank = I915_READ(VBLANK(i));
-		error->pipe[i].vsync = I915_READ(VSYNC(i));
+		error->pipe[i].htotal = I915_READ(HTOTAL(cpu_transcoder));
+		error->pipe[i].hblank = I915_READ(HBLANK(cpu_transcoder));
+		error->pipe[i].hsync = I915_READ(HSYNC(cpu_transcoder));
+		error->pipe[i].vtotal = I915_READ(VTOTAL(cpu_transcoder));
+		error->pipe[i].vblank = I915_READ(VBLANK(cpu_transcoder));
+		error->pipe[i].vsync = I915_READ(VSYNC(cpu_transcoder));
 	}
 
 	return error;

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 368ed8e..1b63d55 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c

@@ -36,8 +36,6 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
-#define DP_RECEIVER_CAP_SIZE	0xf
-#define DP_LINK_STATUS_SIZE	6
 #define DP_LINK_CHECK_TIMEOUT	(10 * 1000)
 
 /**
@@ -49,7 +47,9 @@
  */
 static bool is_edp(struct intel_dp *intel_dp)
 {
-	return intel_dp->base.type == INTEL_OUTPUT_EDP;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+
+	return intel_dig_port->base.type == INTEL_OUTPUT_EDP;
 }
 
 /**
@@ -76,15 +76,16 @@
 	return is_edp(intel_dp) && !is_pch_edp(intel_dp);
 }
 
-static struct intel_dp *enc_to_intel_dp(struct drm_encoder *encoder)
+static struct drm_device *intel_dp_to_dev(struct intel_dp *intel_dp)
 {
-	return container_of(encoder, struct intel_dp, base.base);
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+
+	return intel_dig_port->base.base.dev;
 }
 
 static struct intel_dp *intel_attached_dp(struct drm_connector *connector)
 {
-	return container_of(intel_attached_encoder(connector),
-			    struct intel_dp, base);
+	return enc_to_intel_dp(&intel_attached_encoder(connector)->base);
 }
 
 /**
@@ -106,49 +107,32 @@
 	return is_pch_edp(intel_dp);
 }
 
-static void intel_dp_start_link_train(struct intel_dp *intel_dp);
-static void intel_dp_complete_link_train(struct intel_dp *intel_dp);
 static void intel_dp_link_down(struct intel_dp *intel_dp);
 
 void
 intel_edp_link_config(struct intel_encoder *intel_encoder,
 		       int *lane_num, int *link_bw)
 {
-	struct intel_dp *intel_dp = container_of(intel_encoder, struct intel_dp, base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
 
 	*lane_num = intel_dp->lane_count;
-	if (intel_dp->link_bw == DP_LINK_BW_1_62)
-		*link_bw = 162000;
-	else if (intel_dp->link_bw == DP_LINK_BW_2_7)
-		*link_bw = 270000;
+	*link_bw = drm_dp_bw_code_to_link_rate(intel_dp->link_bw);
 }
 
 int
 intel_edp_target_clock(struct intel_encoder *intel_encoder,
 		       struct drm_display_mode *mode)
 {
-	struct intel_dp *intel_dp = container_of(intel_encoder, struct intel_dp, base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+	struct intel_connector *intel_connector = intel_dp->attached_connector;
 
-	if (intel_dp->panel_fixed_mode)
-		return intel_dp->panel_fixed_mode->clock;
+	if (intel_connector->panel.fixed_mode)
+		return intel_connector->panel.fixed_mode->clock;
 	else
 		return mode->clock;
 }
 
 static int
-intel_dp_max_lane_count(struct intel_dp *intel_dp)
-{
-	int max_lane_count = intel_dp->dpcd[DP_MAX_LANE_COUNT] & 0x1f;
-	switch (max_lane_count) {
-	case 1: case 2: case 4:
-		break;
-	default:
-		max_lane_count = 4;
-	}
-	return max_lane_count;
-}
-
-static int
 intel_dp_max_link_bw(struct intel_dp *intel_dp)
 {
 	int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE];
@@ -208,7 +192,7 @@
 			  bool adjust_mode)
 {
 	int max_link_clock = intel_dp_link_clock(intel_dp_max_link_bw(intel_dp));
-	int max_lanes = intel_dp_max_lane_count(intel_dp);
+	int max_lanes = drm_dp_max_lane_count(intel_dp->dpcd);
 	int max_rate, mode_rate;
 
 	mode_rate = intel_dp_link_required(mode->clock, 24);
@@ -234,12 +218,14 @@
 		    struct drm_display_mode *mode)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 
-	if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) {
-		if (mode->hdisplay > intel_dp->panel_fixed_mode->hdisplay)
+	if (is_edp(intel_dp) && fixed_mode) {
+		if (mode->hdisplay > fixed_mode->hdisplay)
 			return MODE_PANEL;
 
-		if (mode->vdisplay > intel_dp->panel_fixed_mode->vdisplay)
+		if (mode->vdisplay > fixed_mode->vdisplay)
 			return MODE_PANEL;
 	}
 
@@ -285,6 +271,10 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t clkcfg;
 
+	/* There is no CLKCFG reg in Valleyview. VLV hrawclk is 200 MHz */
+	if (IS_VALLEYVIEW(dev))
+		return 200;
+
 	clkcfg = I915_READ(CLKCFG);
 	switch (clkcfg & CLKCFG_FSB_MASK) {
 	case CLKCFG_FSB_400:
@@ -310,7 +300,7 @@
 
 static bool ironlake_edp_have_panel_power(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	return (I915_READ(PCH_PP_STATUS) & PP_ON) != 0;
@@ -318,7 +308,7 @@
 
 static bool ironlake_edp_have_panel_vdd(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	return (I915_READ(PCH_PP_CONTROL) & EDP_FORCE_VDD) != 0;
@@ -327,7 +317,7 @@
 static void
 intel_dp_check_edp(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (!is_edp(intel_dp))
@@ -346,7 +336,8 @@
 		uint8_t *recv, int recv_size)
 {
 	uint32_t output_reg = intel_dp->output_reg;
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t ch_ctl = output_reg + 0x10;
 	uint32_t ch_data = ch_ctl + 4;
@@ -356,6 +347,29 @@
 	uint32_t aux_clock_divider;
 	int try, precharge;
 
+	if (IS_HASWELL(dev)) {
+		switch (intel_dig_port->port) {
+		case PORT_A:
+			ch_ctl = DPA_AUX_CH_CTL;
+			ch_data = DPA_AUX_CH_DATA1;
+			break;
+		case PORT_B:
+			ch_ctl = PCH_DPB_AUX_CH_CTL;
+			ch_data = PCH_DPB_AUX_CH_DATA1;
+			break;
+		case PORT_C:
+			ch_ctl = PCH_DPC_AUX_CH_CTL;
+			ch_data = PCH_DPC_AUX_CH_DATA1;
+			break;
+		case PORT_D:
+			ch_ctl = PCH_DPD_AUX_CH_CTL;
+			ch_data = PCH_DPD_AUX_CH_DATA1;
+			break;
+		default:
+			BUG();
+		}
+	}
+
 	intel_dp_check_edp(intel_dp);
 	/* The clock divider is based off the hrawclk,
 	 * and would like to run at 2MHz. So, take the
@@ -365,12 +379,16 @@
 	 * clock divider.
 	 */
 	if (is_cpu_edp(intel_dp)) {
-		if (IS_GEN6(dev) || IS_GEN7(dev))
+		if (IS_HASWELL(dev))
+			aux_clock_divider = intel_ddi_get_cdclk_freq(dev_priv) >> 1;
+		else if (IS_VALLEYVIEW(dev))
+			aux_clock_divider = 100;
+		else if (IS_GEN6(dev) || IS_GEN7(dev))
 			aux_clock_divider = 200; /* SNB & IVB eDP input clock at 400Mhz */
 		else
 			aux_clock_divider = 225; /* eDP input clock at 450Mhz */
 	} else if (HAS_PCH_SPLIT(dev))
-		aux_clock_divider = 63; /* IRL input clock fixed at 125Mhz */
+		aux_clock_divider = DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
 	else
 		aux_clock_divider = intel_hrawclk(dev) / 2;
 
@@ -642,9 +660,6 @@
 	return -EREMOTEIO;
 }
 
-static void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp);
-static void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync);
-
 static int
 intel_dp_i2c_init(struct intel_dp *intel_dp,
 		  struct intel_connector *intel_connector, const char *name)
@@ -670,22 +685,25 @@
 	return ret;
 }
 
-static bool
+bool
 intel_dp_mode_fixup(struct drm_encoder *encoder,
 		    const struct drm_display_mode *mode,
 		    struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_connector *intel_connector = intel_dp->attached_connector;
 	int lane_count, clock;
-	int max_lane_count = intel_dp_max_lane_count(intel_dp);
+	int max_lane_count = drm_dp_max_lane_count(intel_dp->dpcd);
 	int max_clock = intel_dp_max_link_bw(intel_dp) == DP_LINK_BW_2_7 ? 1 : 0;
 	int bpp, mode_rate;
 	static int bws[2] = { DP_LINK_BW_1_62, DP_LINK_BW_2_7 };
 
-	if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) {
-		intel_fixed_panel_mode(intel_dp->panel_fixed_mode, adjusted_mode);
-		intel_pch_panel_fitting(dev, DRM_MODE_SCALE_FULLSCREEN,
+	if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) {
+		intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
+				       adjusted_mode);
+		intel_pch_panel_fitting(dev,
+					intel_connector->panel.fitting_mode,
 					mode, adjusted_mode);
 	}
 
@@ -762,21 +780,23 @@
 		 struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = crtc->dev;
-	struct intel_encoder *encoder;
+	struct intel_encoder *intel_encoder;
+	struct intel_dp *intel_dp;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int lane_count = 4;
 	struct intel_dp_m_n m_n;
 	int pipe = intel_crtc->pipe;
+	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
 
 	/*
 	 * Find the lane count in the intel_encoder private
 	 */
-	for_each_encoder_on_crtc(dev, crtc, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
+		intel_dp = enc_to_intel_dp(&intel_encoder->base);
 
-		if (intel_dp->base.type == INTEL_OUTPUT_DISPLAYPORT ||
-		    intel_dp->base.type == INTEL_OUTPUT_EDP)
+		if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
+		    intel_encoder->type == INTEL_OUTPUT_EDP)
 		{
 			lane_count = intel_dp->lane_count;
 			break;
@@ -791,23 +811,46 @@
 	intel_dp_compute_m_n(intel_crtc->bpp, lane_count,
 			     mode->clock, adjusted_mode->clock, &m_n);
 
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(TRANSDATA_M1(pipe),
-			   ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) |
-			   m_n.gmch_m);
+	if (IS_HASWELL(dev)) {
+		I915_WRITE(PIPE_DATA_M1(cpu_transcoder),
+			   TU_SIZE(m_n.tu) | m_n.gmch_m);
+		I915_WRITE(PIPE_DATA_N1(cpu_transcoder), m_n.gmch_n);
+		I915_WRITE(PIPE_LINK_M1(cpu_transcoder), m_n.link_m);
+		I915_WRITE(PIPE_LINK_N1(cpu_transcoder), m_n.link_n);
+	} else if (HAS_PCH_SPLIT(dev)) {
+		I915_WRITE(TRANSDATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
 		I915_WRITE(TRANSDATA_N1(pipe), m_n.gmch_n);
 		I915_WRITE(TRANSDPLINK_M1(pipe), m_n.link_m);
 		I915_WRITE(TRANSDPLINK_N1(pipe), m_n.link_n);
+	} else if (IS_VALLEYVIEW(dev)) {
+		I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
+		I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
+		I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
+		I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
 	} else {
 		I915_WRITE(PIPE_GMCH_DATA_M(pipe),
-			   ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) |
-			   m_n.gmch_m);
+			   TU_SIZE(m_n.tu) | m_n.gmch_m);
 		I915_WRITE(PIPE_GMCH_DATA_N(pipe), m_n.gmch_n);
 		I915_WRITE(PIPE_DP_LINK_M(pipe), m_n.link_m);
 		I915_WRITE(PIPE_DP_LINK_N(pipe), m_n.link_n);
 	}
 }
 
+void intel_dp_init_link_config(struct intel_dp *intel_dp)
+{
+	memset(intel_dp->link_configuration, 0, DP_LINK_CONFIGURATION_SIZE);
+	intel_dp->link_configuration[0] = intel_dp->link_bw;
+	intel_dp->link_configuration[1] = intel_dp->lane_count;
+	intel_dp->link_configuration[8] = DP_SET_ANSI_8B10B;
+	/*
+	 * Check for DPCD version > 1.1 and enhanced framing support
+	 */
+	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
+	    (intel_dp->dpcd[DP_MAX_LANE_COUNT] & DP_ENHANCED_FRAME_CAP)) {
+		intel_dp->link_configuration[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	}
+}
+
 static void
 intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		  struct drm_display_mode *adjusted_mode)
@@ -815,7 +858,7 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
-	struct drm_crtc *crtc = intel_dp->base.base.crtc;
+	struct drm_crtc *crtc = encoder->crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
 	/*
@@ -860,21 +903,12 @@
 		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
 		intel_write_eld(encoder, adjusted_mode);
 	}
-	memset(intel_dp->link_configuration, 0, DP_LINK_CONFIGURATION_SIZE);
-	intel_dp->link_configuration[0] = intel_dp->link_bw;
-	intel_dp->link_configuration[1] = intel_dp->lane_count;
-	intel_dp->link_configuration[8] = DP_SET_ANSI_8B10B;
-	/*
-	 * Check for DPCD version > 1.1 and enhanced framing support
-	 */
-	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
-	    (intel_dp->dpcd[DP_MAX_LANE_COUNT] & DP_ENHANCED_FRAME_CAP)) {
-		intel_dp->link_configuration[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-	}
+
+	intel_dp_init_link_config(intel_dp);
 
 	/* Split out the IBX/CPU vs CPT settings */
 
-	if (is_cpu_edp(intel_dp) && IS_GEN7(dev)) {
+	if (is_cpu_edp(intel_dp) && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
 		if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 			intel_dp->DP |= DP_SYNC_HS_HIGH;
 		if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
@@ -931,7 +965,7 @@
 				       u32 mask,
 				       u32 value)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	DRM_DEBUG_KMS("mask %08x value %08x status %08x control %08x\n",
@@ -978,9 +1012,9 @@
 	return control;
 }
 
-static void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp)
+void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
@@ -1019,7 +1053,7 @@
 
 static void ironlake_panel_vdd_off_sync(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
@@ -1041,14 +1075,14 @@
 {
 	struct intel_dp *intel_dp = container_of(to_delayed_work(__work),
 						 struct intel_dp, panel_vdd_work);
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
 	mutex_lock(&dev->mode_config.mutex);
 	ironlake_panel_vdd_off_sync(intel_dp);
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
-static void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
+void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
 {
 	if (!is_edp(intel_dp))
 		return;
@@ -1071,9 +1105,9 @@
 	}
 }
 
-static void ironlake_edp_panel_on(struct intel_dp *intel_dp)
+void ironlake_edp_panel_on(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
@@ -1113,9 +1147,9 @@
 	}
 }
 
-static void ironlake_edp_panel_off(struct intel_dp *intel_dp)
+void ironlake_edp_panel_off(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
@@ -1138,10 +1172,12 @@
 	ironlake_wait_panel_off(intel_dp);
 }
 
-static void ironlake_edp_backlight_on(struct intel_dp *intel_dp)
+void ironlake_edp_backlight_on(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe = to_intel_crtc(intel_dig_port->base.base.crtc)->pipe;
 	u32 pp;
 
 	if (!is_edp(intel_dp))
@@ -1159,17 +1195,21 @@
 	pp |= EDP_BLC_ENABLE;
 	I915_WRITE(PCH_PP_CONTROL, pp);
 	POSTING_READ(PCH_PP_CONTROL);
+
+	intel_panel_enable_backlight(dev, pipe);
 }
 
-static void ironlake_edp_backlight_off(struct intel_dp *intel_dp)
+void ironlake_edp_backlight_off(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
 	if (!is_edp(intel_dp))
 		return;
 
+	intel_panel_disable_backlight(dev);
+
 	DRM_DEBUG_KMS("\n");
 	pp = ironlake_get_pp_control(dev_priv);
 	pp &= ~EDP_BLC_ENABLE;
@@ -1180,8 +1220,9 @@
 
 static void ironlake_edp_pll_on(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
-	struct drm_crtc *crtc = intel_dp->base.base.crtc;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
+	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 dpa_ctl;
 
@@ -1205,8 +1246,9 @@
 
 static void ironlake_edp_pll_off(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
-	struct drm_crtc *crtc = intel_dp->base.base.crtc;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
+	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 dpa_ctl;
 
@@ -1228,7 +1270,7 @@
 }
 
 /* If the sink supports it, try to set the power state appropriately */
-static void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
+void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
 {
 	int ret, i;
 
@@ -1298,9 +1340,10 @@
 				return true;
 			}
 		}
-	}
 
-	DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n", intel_dp->output_reg);
+		DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n",
+			      intel_dp->output_reg);
+	}
 
 	return true;
 }
@@ -1396,38 +1439,6 @@
 					      DP_LINK_STATUS_SIZE);
 }
 
-static uint8_t
-intel_dp_link_status(uint8_t link_status[DP_LINK_STATUS_SIZE],
-		     int r)
-{
-	return link_status[r - DP_LANE0_1_STATUS];
-}
-
-static uint8_t
-intel_get_adjust_request_voltage(uint8_t adjust_request[2],
-				 int lane)
-{
-	int	    s = ((lane & 1) ?
-			 DP_ADJUST_VOLTAGE_SWING_LANE1_SHIFT :
-			 DP_ADJUST_VOLTAGE_SWING_LANE0_SHIFT);
-	uint8_t l = adjust_request[lane>>1];
-
-	return ((l >> s) & 3) << DP_TRAIN_VOLTAGE_SWING_SHIFT;
-}
-
-static uint8_t
-intel_get_adjust_request_pre_emphasis(uint8_t adjust_request[2],
-				      int lane)
-{
-	int	    s = ((lane & 1) ?
-			 DP_ADJUST_PRE_EMPHASIS_LANE1_SHIFT :
-			 DP_ADJUST_PRE_EMPHASIS_LANE0_SHIFT);
-	uint8_t l = adjust_request[lane>>1];
-
-	return ((l >> s) & 3) << DP_TRAIN_PRE_EMPHASIS_SHIFT;
-}
-
-
 #if 0
 static char	*voltage_names[] = {
 	"0.4V", "0.6V", "0.8V", "1.2V"
@@ -1448,7 +1459,7 @@
 static uint8_t
 intel_dp_voltage_max(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
 	if (IS_GEN7(dev) && is_cpu_edp(intel_dp))
 		return DP_TRAIN_VOLTAGE_SWING_800;
@@ -1461,9 +1472,21 @@
 static uint8_t
 intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
-	if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) {
+	if (IS_HASWELL(dev)) {
+		switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) {
+		case DP_TRAIN_VOLTAGE_SWING_400:
+			return DP_TRAIN_PRE_EMPHASIS_9_5;
+		case DP_TRAIN_VOLTAGE_SWING_600:
+			return DP_TRAIN_PRE_EMPHASIS_6;
+		case DP_TRAIN_VOLTAGE_SWING_800:
+			return DP_TRAIN_PRE_EMPHASIS_3_5;
+		case DP_TRAIN_VOLTAGE_SWING_1200:
+		default:
+			return DP_TRAIN_PRE_EMPHASIS_0;
+		}
+	} else if (IS_GEN7(dev) && is_cpu_edp(intel_dp) && !IS_VALLEYVIEW(dev)) {
 		switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) {
 		case DP_TRAIN_VOLTAGE_SWING_400:
 			return DP_TRAIN_PRE_EMPHASIS_6;
@@ -1494,13 +1517,12 @@
 	uint8_t v = 0;
 	uint8_t p = 0;
 	int lane;
-	uint8_t	*adjust_request = link_status + (DP_ADJUST_REQUEST_LANE0_1 - DP_LANE0_1_STATUS);
 	uint8_t voltage_max;
 	uint8_t preemph_max;
 
 	for (lane = 0; lane < intel_dp->lane_count; lane++) {
-		uint8_t this_v = intel_get_adjust_request_voltage(adjust_request, lane);
-		uint8_t this_p = intel_get_adjust_request_pre_emphasis(adjust_request, lane);
+		uint8_t this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
+		uint8_t this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
 
 		if (this_v > v)
 			v = this_v;
@@ -1617,52 +1639,38 @@
 	}
 }
 
-static uint8_t
-intel_get_lane_status(uint8_t link_status[DP_LINK_STATUS_SIZE],
-		      int lane)
+/* Gen7.5's (HSW) DP voltage swing and pre-emphasis control */
+static uint32_t
+intel_dp_signal_levels_hsw(uint8_t train_set)
 {
-	int s = (lane & 1) * 4;
-	uint8_t l = link_status[lane>>1];
+	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
+					 DP_TRAIN_PRE_EMPHASIS_MASK);
+	switch (signal_levels) {
+	case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_0:
+		return DDI_BUF_EMP_400MV_0DB_HSW;
+	case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_3_5:
+		return DDI_BUF_EMP_400MV_3_5DB_HSW;
+	case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_6:
+		return DDI_BUF_EMP_400MV_6DB_HSW;
+	case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_9_5:
+		return DDI_BUF_EMP_400MV_9_5DB_HSW;
 
-	return (l >> s) & 0xf;
-}
+	case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_0:
+		return DDI_BUF_EMP_600MV_0DB_HSW;
+	case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_3_5:
+		return DDI_BUF_EMP_600MV_3_5DB_HSW;
+	case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_6:
+		return DDI_BUF_EMP_600MV_6DB_HSW;
 
-/* Check for clock recovery is done on all channels */
-static bool
-intel_clock_recovery_ok(uint8_t link_status[DP_LINK_STATUS_SIZE], int lane_count)
-{
-	int lane;
-	uint8_t lane_status;
-
-	for (lane = 0; lane < lane_count; lane++) {
-		lane_status = intel_get_lane_status(link_status, lane);
-		if ((lane_status & DP_LANE_CR_DONE) == 0)
-			return false;
+	case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_0:
+		return DDI_BUF_EMP_800MV_0DB_HSW;
+	case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_3_5:
+		return DDI_BUF_EMP_800MV_3_5DB_HSW;
+	default:
+		DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level:"
+			      "0x%x\n", signal_levels);
+		return DDI_BUF_EMP_400MV_0DB_HSW;
 	}
-	return true;
-}
-
-/* Check to see if channel eq is done on all channels */
-#define CHANNEL_EQ_BITS (DP_LANE_CR_DONE|\
-			 DP_LANE_CHANNEL_EQ_DONE|\
-			 DP_LANE_SYMBOL_LOCKED)
-static bool
-intel_channel_eq_ok(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE])
-{
-	uint8_t lane_align;
-	uint8_t lane_status;
-	int lane;
-
-	lane_align = intel_dp_link_status(link_status,
-					  DP_LANE_ALIGN_STATUS_UPDATED);
-	if ((lane_align & DP_INTERLANE_ALIGN_DONE) == 0)
-		return false;
-	for (lane = 0; lane < intel_dp->lane_count; lane++) {
-		lane_status = intel_get_lane_status(link_status, lane);
-		if ((lane_status & CHANNEL_EQ_BITS) != CHANNEL_EQ_BITS)
-			return false;
-	}
-	return true;
 }
 
 static bool
@@ -1670,11 +1678,49 @@
 			uint32_t dp_reg_value,
 			uint8_t dp_train_pat)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum port port = intel_dig_port->port;
 	int ret;
+	uint32_t temp;
 
-	if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || !is_cpu_edp(intel_dp))) {
+	if (IS_HASWELL(dev)) {
+		temp = I915_READ(DP_TP_CTL(port));
+
+		if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE)
+			temp |= DP_TP_CTL_SCRAMBLE_DISABLE;
+		else
+			temp &= ~DP_TP_CTL_SCRAMBLE_DISABLE;
+
+		temp &= ~DP_TP_CTL_LINK_TRAIN_MASK;
+		switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
+		case DP_TRAINING_PATTERN_DISABLE:
+			temp |= DP_TP_CTL_LINK_TRAIN_IDLE;
+			I915_WRITE(DP_TP_CTL(port), temp);
+
+			if (wait_for((I915_READ(DP_TP_STATUS(port)) &
+				      DP_TP_STATUS_IDLE_DONE), 1))
+				DRM_ERROR("Timed out waiting for DP idle patterns\n");
+
+			temp &= ~DP_TP_CTL_LINK_TRAIN_MASK;
+			temp |= DP_TP_CTL_LINK_TRAIN_NORMAL;
+
+			break;
+		case DP_TRAINING_PATTERN_1:
+			temp |= DP_TP_CTL_LINK_TRAIN_PAT1;
+			break;
+		case DP_TRAINING_PATTERN_2:
+			temp |= DP_TP_CTL_LINK_TRAIN_PAT2;
+			break;
+		case DP_TRAINING_PATTERN_3:
+			temp |= DP_TP_CTL_LINK_TRAIN_PAT3;
+			break;
+		}
+		I915_WRITE(DP_TP_CTL(port), temp);
+
+	} else if (HAS_PCH_CPT(dev) &&
+		   (IS_GEN7(dev) || !is_cpu_edp(intel_dp))) {
 		dp_reg_value &= ~DP_LINK_TRAIN_MASK_CPT;
 
 		switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
@@ -1734,16 +1780,20 @@
 }
 
 /* Enable corresponding port and start training pattern 1 */
-static void
+void
 intel_dp_start_link_train(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_encoder *encoder = &dp_to_dig_port(intel_dp)->base.base;
+	struct drm_device *dev = encoder->dev;
 	int i;
 	uint8_t voltage;
 	bool clock_recovery = false;
 	int voltage_tries, loop_tries;
 	uint32_t DP = intel_dp->DP;
 
+	if (IS_HASWELL(dev))
+		intel_ddi_prepare_link_retrain(encoder);
+
 	/* Write the link configuration data */
 	intel_dp_aux_native_write(intel_dp, DP_LINK_BW_SET,
 				  intel_dp->link_configuration,
@@ -1761,8 +1811,11 @@
 		uint8_t	    link_status[DP_LINK_STATUS_SIZE];
 		uint32_t    signal_levels;
 
-
-		if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) {
+		if (IS_HASWELL(dev)) {
+			signal_levels = intel_dp_signal_levels_hsw(
+							intel_dp->train_set[0]);
+			DP = (DP & ~DDI_BUF_EMP_MASK) | signal_levels;
+		} else if (IS_GEN7(dev) && is_cpu_edp(intel_dp) && !IS_VALLEYVIEW(dev)) {
 			signal_levels = intel_gen7_edp_signal_levels(intel_dp->train_set[0]);
 			DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB) | signal_levels;
 		} else if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) {
@@ -1770,23 +1823,24 @@
 			DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB) | signal_levels;
 		} else {
 			signal_levels = intel_dp_signal_levels(intel_dp->train_set[0]);
-			DRM_DEBUG_KMS("training pattern 1 signal levels %08x\n", signal_levels);
 			DP = (DP & ~(DP_VOLTAGE_MASK|DP_PRE_EMPHASIS_MASK)) | signal_levels;
 		}
+		DRM_DEBUG_KMS("training pattern 1 signal levels %08x\n",
+			      signal_levels);
 
+		/* Set training pattern 1 */
 		if (!intel_dp_set_link_train(intel_dp, DP,
 					     DP_TRAINING_PATTERN_1 |
 					     DP_LINK_SCRAMBLING_DISABLE))
 			break;
-		/* Set training pattern 1 */
 
-		udelay(100);
+		drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd);
 		if (!intel_dp_get_link_status(intel_dp, link_status)) {
 			DRM_ERROR("failed to get link status\n");
 			break;
 		}
 
-		if (intel_clock_recovery_ok(link_status, intel_dp->lane_count)) {
+		if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
 			DRM_DEBUG_KMS("clock recovery OK\n");
 			clock_recovery = true;
 			break;
@@ -1825,10 +1879,10 @@
 	intel_dp->DP = DP;
 }
 
-static void
+void
 intel_dp_complete_link_train(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	bool channel_eq = false;
 	int tries, cr_tries;
 	uint32_t DP = intel_dp->DP;
@@ -1848,7 +1902,10 @@
 			break;
 		}
 
-		if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) {
+		if (IS_HASWELL(dev)) {
+			signal_levels = intel_dp_signal_levels_hsw(intel_dp->train_set[0]);
+			DP = (DP & ~DDI_BUF_EMP_MASK) | signal_levels;
+		} else if (IS_GEN7(dev) && is_cpu_edp(intel_dp) && !IS_VALLEYVIEW(dev)) {
 			signal_levels = intel_gen7_edp_signal_levels(intel_dp->train_set[0]);
 			DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB) | signal_levels;
 		} else if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) {
@@ -1865,18 +1922,18 @@
 					     DP_LINK_SCRAMBLING_DISABLE))
 			break;
 
-		udelay(400);
+		drm_dp_link_train_channel_eq_delay(intel_dp->dpcd);
 		if (!intel_dp_get_link_status(intel_dp, link_status))
 			break;
 
 		/* Make sure clock is still ok */
-		if (!intel_clock_recovery_ok(link_status, intel_dp->lane_count)) {
+		if (!drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
 			intel_dp_start_link_train(intel_dp);
 			cr_tries++;
 			continue;
 		}
 
-		if (intel_channel_eq_ok(intel_dp, link_status)) {
+		if (drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
 			channel_eq = true;
 			break;
 		}
@@ -1895,16 +1952,38 @@
 		++tries;
 	}
 
+	if (channel_eq)
+		DRM_DEBUG_KMS("Channel EQ done. DP Training successfull\n");
+
 	intel_dp_set_link_train(intel_dp, DP, DP_TRAINING_PATTERN_DISABLE);
 }
 
 static void
 intel_dp_link_down(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t DP = intel_dp->DP;
 
+	/*
+	 * DDI code has a strict mode set sequence and we should try to respect
+	 * it, otherwise we might hang the machine in many different ways. So we
+	 * really should be disabling the port only on a complete crtc_disable
+	 * sequence. This function is just called under two conditions on DDI
+	 * code:
+	 * - Link train failed while doing crtc_enable, and on this case we
+	 *   really should respect the mode set sequence and wait for a
+	 *   crtc_disable.
+	 * - Someone turned the monitor off and intel_dp_check_link_status
+	 *   called us. We don't need to disable the whole port on this case, so
+	 *   when someone turns the monitor on again,
+	 *   intel_ddi_prepare_link_retrain will take care of redoing the link
+	 *   train.
+	 */
+	if (IS_HASWELL(dev))
+		return;
+
 	if (WARN_ON((I915_READ(intel_dp->output_reg) & DP_PORT_EN) == 0))
 		return;
 
@@ -1923,7 +2002,7 @@
 
 	if (HAS_PCH_IBX(dev) &&
 	    I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) {
-		struct drm_crtc *crtc = intel_dp->base.base.crtc;
+		struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
 
 		/* Hardware workaround: leaving our transcoder select
 		 * set to transcoder B while it's off will prevent the
@@ -2024,7 +2103,7 @@
 intel_dp_handle_test_request(struct intel_dp *intel_dp)
 {
 	/* NAK by default */
-	intel_dp_aux_native_write_1(intel_dp, DP_TEST_RESPONSE, DP_TEST_ACK);
+	intel_dp_aux_native_write_1(intel_dp, DP_TEST_RESPONSE, DP_TEST_NAK);
 }
 
 /*
@@ -2036,16 +2115,17 @@
  *  4. Check link status on receipt of hot-plug interrupt
  */
 
-static void
+void
 intel_dp_check_link_status(struct intel_dp *intel_dp)
 {
+	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
 	u8 sink_irq_vector;
 	u8 link_status[DP_LINK_STATUS_SIZE];
 
-	if (!intel_dp->base.connectors_active)
+	if (!intel_encoder->connectors_active)
 		return;
 
-	if (WARN_ON(!intel_dp->base.base.crtc))
+	if (WARN_ON(!intel_encoder->base.crtc))
 		return;
 
 	/* Try to read receiver status if the link appears to be up */
@@ -2074,9 +2154,9 @@
 			DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n");
 	}
 
-	if (!intel_channel_eq_ok(intel_dp, link_status)) {
+	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
 		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
-			      drm_get_encoder_name(&intel_dp->base.base));
+			      drm_get_encoder_name(&intel_encoder->base));
 		intel_dp_start_link_train(intel_dp);
 		intel_dp_complete_link_train(intel_dp);
 	}
@@ -2125,11 +2205,12 @@
 static enum drm_connector_status
 ironlake_dp_detect(struct intel_dp *intel_dp)
 {
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	enum drm_connector_status status;
 
 	/* Can't disconnect eDP, but you can close the lid... */
 	if (is_edp(intel_dp)) {
-		status = intel_panel_detect(intel_dp->base.base.dev);
+		status = intel_panel_detect(dev);
 		if (status == connector_status_unknown)
 			status = connector_status_connected;
 		return status;
@@ -2141,7 +2222,7 @@
 static enum drm_connector_status
 g4x_dp_detect(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t bit;
 
@@ -2168,44 +2249,45 @@
 static struct edid *
 intel_dp_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
-	struct edid	*edid;
-	int size;
+	struct intel_connector *intel_connector = to_intel_connector(connector);
 
-	if (is_edp(intel_dp)) {
-		if (!intel_dp->edid)
+	/* use cached edid if we have one */
+	if (intel_connector->edid) {
+		struct edid *edid;
+		int size;
+
+		/* invalid edid */
+		if (IS_ERR(intel_connector->edid))
 			return NULL;
 
-		size = (intel_dp->edid->extensions + 1) * EDID_LENGTH;
+		size = (intel_connector->edid->extensions + 1) * EDID_LENGTH;
 		edid = kmalloc(size, GFP_KERNEL);
 		if (!edid)
 			return NULL;
 
-		memcpy(edid, intel_dp->edid, size);
+		memcpy(edid, intel_connector->edid, size);
 		return edid;
 	}
 
-	edid = drm_get_edid(connector, adapter);
-	return edid;
+	return drm_get_edid(connector, adapter);
 }
 
 static int
 intel_dp_get_edid_modes(struct drm_connector *connector, struct i2c_adapter *adapter)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
-	int	ret;
+	struct intel_connector *intel_connector = to_intel_connector(connector);
 
-	if (is_edp(intel_dp)) {
-		drm_mode_connector_update_edid_property(connector,
-							intel_dp->edid);
-		ret = drm_add_edid_modes(connector, intel_dp->edid);
-		drm_edid_to_eld(connector,
-				intel_dp->edid);
-		return intel_dp->edid_mode_count;
+	/* use cached edid if we have one */
+	if (intel_connector->edid) {
+		/* invalid edid */
+		if (IS_ERR(intel_connector->edid))
+			return 0;
+
+		return intel_connector_update_modes(connector,
+						    intel_connector->edid);
 	}
 
-	ret = intel_ddc_get_modes(connector, adapter);
-	return ret;
+	return intel_ddc_get_modes(connector, adapter);
 }
 
 
@@ -2219,9 +2301,12 @@
 intel_dp_detect(struct drm_connector *connector, bool force)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
-	struct drm_device *dev = intel_dp->base.base.dev;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
+	struct drm_device *dev = connector->dev;
 	enum drm_connector_status status;
 	struct edid *edid = NULL;
+	char dpcd_hex_dump[sizeof(intel_dp->dpcd) * 3];
 
 	intel_dp->has_audio = false;
 
@@ -2230,10 +2315,9 @@
 	else
 		status = g4x_dp_detect(intel_dp);
 
-	DRM_DEBUG_KMS("DPCD: %02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx\n",
-		      intel_dp->dpcd[0], intel_dp->dpcd[1], intel_dp->dpcd[2],
-		      intel_dp->dpcd[3], intel_dp->dpcd[4], intel_dp->dpcd[5],
-		      intel_dp->dpcd[6], intel_dp->dpcd[7]);
+	hex_dump_to_buffer(intel_dp->dpcd, sizeof(intel_dp->dpcd),
+			   32, 1, dpcd_hex_dump, sizeof(dpcd_hex_dump), false);
+	DRM_DEBUG_KMS("DPCD: %s\n", dpcd_hex_dump);
 
 	if (status != connector_status_connected)
 		return status;
@@ -2250,49 +2334,31 @@
 		}
 	}
 
+	if (intel_encoder->type != INTEL_OUTPUT_EDP)
+		intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
 	return connector_status_connected;
 }
 
 static int intel_dp_get_modes(struct drm_connector *connector)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
-	struct drm_device *dev = intel_dp->base.base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct drm_device *dev = connector->dev;
 	int ret;
 
 	/* We should parse the EDID data and find out if it has an audio sink
 	 */
 
 	ret = intel_dp_get_edid_modes(connector, &intel_dp->adapter);
-	if (ret) {
-		if (is_edp(intel_dp) && !intel_dp->panel_fixed_mode) {
-			struct drm_display_mode *newmode;
-			list_for_each_entry(newmode, &connector->probed_modes,
-					    head) {
-				if ((newmode->type & DRM_MODE_TYPE_PREFERRED)) {
-					intel_dp->panel_fixed_mode =
-						drm_mode_duplicate(dev, newmode);
-					break;
-				}
-			}
-		}
+	if (ret)
 		return ret;
-	}
 
-	/* if eDP has no EDID, try to use fixed panel mode from VBT */
-	if (is_edp(intel_dp)) {
-		/* initialize panel mode from VBT if available for eDP */
-		if (intel_dp->panel_fixed_mode == NULL && dev_priv->lfp_lvds_vbt_mode != NULL) {
-			intel_dp->panel_fixed_mode =
-				drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
-			if (intel_dp->panel_fixed_mode) {
-				intel_dp->panel_fixed_mode->type |=
-					DRM_MODE_TYPE_PREFERRED;
-			}
-		}
-		if (intel_dp->panel_fixed_mode) {
-			struct drm_display_mode *mode;
-			mode = drm_mode_duplicate(dev, intel_dp->panel_fixed_mode);
+	/* if eDP has no EDID, fall back to fixed mode */
+	if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) {
+		struct drm_display_mode *mode;
+		mode = drm_mode_duplicate(dev,
+					  intel_connector->panel.fixed_mode);
+		if (mode) {
 			drm_mode_probed_add(connector, mode);
 			return 1;
 		}
@@ -2322,10 +2388,12 @@
 		      uint64_t val)
 {
 	struct drm_i915_private *dev_priv = connector->dev->dev_private;
-	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct intel_encoder *intel_encoder = intel_attached_encoder(connector);
+	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
 	int ret;
 
-	ret = drm_connector_property_set_value(connector, property, val);
+	ret = drm_object_property_set_value(&connector->base, property, val);
 	if (ret)
 		return ret;
 
@@ -2358,11 +2426,27 @@
 		goto done;
 	}
 
+	if (is_edp(intel_dp) &&
+	    property == connector->dev->mode_config.scaling_mode_property) {
+		if (val == DRM_MODE_SCALE_NONE) {
+			DRM_DEBUG_KMS("no scaling not supported\n");
+			return -EINVAL;
+		}
+
+		if (intel_connector->panel.fitting_mode == val) {
+			/* the eDP scaling property is not changed */
+			return 0;
+		}
+		intel_connector->panel.fitting_mode = val;
+
+		goto done;
+	}
+
 	return -EINVAL;
 
 done:
-	if (intel_dp->base.base.crtc) {
-		struct drm_crtc *crtc = intel_dp->base.base.crtc;
+	if (intel_encoder->base.crtc) {
+		struct drm_crtc *crtc = intel_encoder->base.crtc;
 		intel_set_mode(crtc, &crtc->mode,
 			       crtc->x, crtc->y, crtc->fb);
 	}
@@ -2375,27 +2459,33 @@
 {
 	struct drm_device *dev = connector->dev;
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct intel_connector *intel_connector = to_intel_connector(connector);
 
-	if (is_edp(intel_dp))
+	if (!IS_ERR_OR_NULL(intel_connector->edid))
+		kfree(intel_connector->edid);
+
+	if (is_edp(intel_dp)) {
 		intel_panel_destroy_backlight(dev);
+		intel_panel_fini(&intel_connector->panel);
+	}
 
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
 }
 
-static void intel_dp_encoder_destroy(struct drm_encoder *encoder)
+void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
 	i2c_del_adapter(&intel_dp->adapter);
 	drm_encoder_cleanup(encoder);
 	if (is_edp(intel_dp)) {
-		kfree(intel_dp->edid);
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
 		ironlake_panel_vdd_off_sync(intel_dp);
 	}
-	kfree(intel_dp);
+	kfree(intel_dig_port);
 }
 
 static const struct drm_encoder_helper_funcs intel_dp_helper_funcs = {
@@ -2425,7 +2515,7 @@
 static void
 intel_dp_hot_plug(struct intel_encoder *intel_encoder)
 {
-	struct intel_dp *intel_dp = container_of(intel_encoder, struct intel_dp, base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
 
 	intel_dp_check_link_status(intel_dp);
 }
@@ -2435,13 +2525,14 @@
 intel_trans_dp_port_sel(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct intel_encoder *encoder;
+	struct intel_encoder *intel_encoder;
+	struct intel_dp *intel_dp;
 
-	for_each_encoder_on_crtc(dev, crtc, encoder) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
+		intel_dp = enc_to_intel_dp(&intel_encoder->base);
 
-		if (intel_dp->base.type == INTEL_OUTPUT_DISPLAYPORT ||
-		    intel_dp->base.type == INTEL_OUTPUT_EDP)
+		if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
+		    intel_encoder->type == INTEL_OUTPUT_EDP)
 			return intel_dp->output_reg;
 	}
 
@@ -2471,78 +2562,191 @@
 static void
 intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
 {
+	struct intel_connector *intel_connector = to_intel_connector(connector);
+
 	intel_attach_force_audio_property(connector);
 	intel_attach_broadcast_rgb_property(connector);
+
+	if (is_edp(intel_dp)) {
+		drm_mode_create_scaling_mode_property(connector->dev);
+		drm_object_attach_property(
+			&connector->base,
+			connector->dev->mode_config.scaling_mode_property,
+			DRM_MODE_SCALE_ASPECT);
+		intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT;
+	}
+}
+
+static void
+intel_dp_init_panel_power_sequencer(struct drm_device *dev,
+				    struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct edp_power_seq cur, vbt, spec, final;
+	u32 pp_on, pp_off, pp_div, pp;
+
+	/* Workaround: Need to write PP_CONTROL with the unlock key as
+	 * the very first thing. */
+	pp = ironlake_get_pp_control(dev_priv);
+	I915_WRITE(PCH_PP_CONTROL, pp);
+
+	pp_on = I915_READ(PCH_PP_ON_DELAYS);
+	pp_off = I915_READ(PCH_PP_OFF_DELAYS);
+	pp_div = I915_READ(PCH_PP_DIVISOR);
+
+	/* Pull timing values out of registers */
+	cur.t1_t3 = (pp_on & PANEL_POWER_UP_DELAY_MASK) >>
+		PANEL_POWER_UP_DELAY_SHIFT;
+
+	cur.t8 = (pp_on & PANEL_LIGHT_ON_DELAY_MASK) >>
+		PANEL_LIGHT_ON_DELAY_SHIFT;
+
+	cur.t9 = (pp_off & PANEL_LIGHT_OFF_DELAY_MASK) >>
+		PANEL_LIGHT_OFF_DELAY_SHIFT;
+
+	cur.t10 = (pp_off & PANEL_POWER_DOWN_DELAY_MASK) >>
+		PANEL_POWER_DOWN_DELAY_SHIFT;
+
+	cur.t11_t12 = ((pp_div & PANEL_POWER_CYCLE_DELAY_MASK) >>
+		       PANEL_POWER_CYCLE_DELAY_SHIFT) * 1000;
+
+	DRM_DEBUG_KMS("cur t1_t3 %d t8 %d t9 %d t10 %d t11_t12 %d\n",
+		      cur.t1_t3, cur.t8, cur.t9, cur.t10, cur.t11_t12);
+
+	vbt = dev_priv->edp.pps;
+
+	/* Upper limits from eDP 1.3 spec. Note that we use the clunky units of
+	 * our hw here, which are all in 100usec. */
+	spec.t1_t3 = 210 * 10;
+	spec.t8 = 50 * 10; /* no limit for t8, use t7 instead */
+	spec.t9 = 50 * 10; /* no limit for t9, make it symmetric with t8 */
+	spec.t10 = 500 * 10;
+	/* This one is special and actually in units of 100ms, but zero
+	 * based in the hw (so we need to add 100 ms). But the sw vbt
+	 * table multiplies it with 1000 to make it in units of 100usec,
+	 * too. */
+	spec.t11_t12 = (510 + 100) * 10;
+
+	DRM_DEBUG_KMS("vbt t1_t3 %d t8 %d t9 %d t10 %d t11_t12 %d\n",
+		      vbt.t1_t3, vbt.t8, vbt.t9, vbt.t10, vbt.t11_t12);
+
+	/* Use the max of the register settings and vbt. If both are
+	 * unset, fall back to the spec limits. */
+#define assign_final(field)	final.field = (max(cur.field, vbt.field) == 0 ? \
+				       spec.field : \
+				       max(cur.field, vbt.field))
+	assign_final(t1_t3);
+	assign_final(t8);
+	assign_final(t9);
+	assign_final(t10);
+	assign_final(t11_t12);
+#undef assign_final
+
+#define get_delay(field)	(DIV_ROUND_UP(final.field, 10))
+	intel_dp->panel_power_up_delay = get_delay(t1_t3);
+	intel_dp->backlight_on_delay = get_delay(t8);
+	intel_dp->backlight_off_delay = get_delay(t9);
+	intel_dp->panel_power_down_delay = get_delay(t10);
+	intel_dp->panel_power_cycle_delay = get_delay(t11_t12);
+#undef get_delay
+
+	/* And finally store the new values in the power sequencer. */
+	pp_on = (final.t1_t3 << PANEL_POWER_UP_DELAY_SHIFT) |
+		(final.t8 << PANEL_LIGHT_ON_DELAY_SHIFT);
+	pp_off = (final.t9 << PANEL_LIGHT_OFF_DELAY_SHIFT) |
+		 (final.t10 << PANEL_POWER_DOWN_DELAY_SHIFT);
+	/* Compute the divisor for the pp clock, simply match the Bspec
+	 * formula. */
+	pp_div = ((100 * intel_pch_rawclk(dev))/2 - 1)
+			<< PP_REFERENCE_DIVIDER_SHIFT;
+	pp_div |= (DIV_ROUND_UP(final.t11_t12, 1000)
+			<< PANEL_POWER_CYCLE_DELAY_SHIFT);
+
+	/* Haswell doesn't have any port selection bits for the panel
+	 * power sequencer any more. */
+	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) {
+		if (is_cpu_edp(intel_dp))
+			pp_on |= PANEL_POWER_PORT_DP_A;
+		else
+			pp_on |= PANEL_POWER_PORT_DP_D;
+	}
+
+	I915_WRITE(PCH_PP_ON_DELAYS, pp_on);
+	I915_WRITE(PCH_PP_OFF_DELAYS, pp_off);
+	I915_WRITE(PCH_PP_DIVISOR, pp_div);
+
+
+	DRM_DEBUG_KMS("panel power up delay %d, power down delay %d, power cycle delay %d\n",
+		      intel_dp->panel_power_up_delay, intel_dp->panel_power_down_delay,
+		      intel_dp->panel_power_cycle_delay);
+
+	DRM_DEBUG_KMS("backlight on delay %d, off delay %d\n",
+		      intel_dp->backlight_on_delay, intel_dp->backlight_off_delay);
+
+	DRM_DEBUG_KMS("panel power sequencer register settings: PP_ON %#x, PP_OFF %#x, PP_DIV %#x\n",
+		      I915_READ(PCH_PP_ON_DELAYS),
+		      I915_READ(PCH_PP_OFF_DELAYS),
+		      I915_READ(PCH_PP_DIVISOR));
 }
 
 void
-intel_dp_init(struct drm_device *dev, int output_reg, enum port port)
+intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
+			struct intel_connector *intel_connector)
 {
+	struct drm_connector *connector = &intel_connector->base;
+	struct intel_dp *intel_dp = &intel_dig_port->dp;
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
+	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_connector *connector;
-	struct intel_dp *intel_dp;
-	struct intel_encoder *intel_encoder;
-	struct intel_connector *intel_connector;
+	struct drm_display_mode *fixed_mode = NULL;
+	enum port port = intel_dig_port->port;
 	const char *name = NULL;
 	int type;
 
-	intel_dp = kzalloc(sizeof(struct intel_dp), GFP_KERNEL);
-	if (!intel_dp)
-		return;
-
-	intel_dp->output_reg = output_reg;
-	intel_dp->port = port;
 	/* Preserve the current hw state. */
 	intel_dp->DP = I915_READ(intel_dp->output_reg);
+	intel_dp->attached_connector = intel_connector;
 
-	intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
-	if (!intel_connector) {
-		kfree(intel_dp);
-		return;
-	}
-	intel_encoder = &intel_dp->base;
-
-	if (HAS_PCH_SPLIT(dev) && output_reg == PCH_DP_D)
+	if (HAS_PCH_SPLIT(dev) && port == PORT_D)
 		if (intel_dpd_is_edp(dev))
 			intel_dp->is_pch_edp = true;
 
-	if (output_reg == DP_A || is_pch_edp(intel_dp)) {
+	/*
+	 * FIXME : We need to initialize built-in panels before external panels.
+	 * For X0, DP_C is fixed as eDP. Revisit this as part of VLV eDP cleanup
+	 */
+	if (IS_VALLEYVIEW(dev) && port == PORT_C) {
+		type = DRM_MODE_CONNECTOR_eDP;
+		intel_encoder->type = INTEL_OUTPUT_EDP;
+	} else if (port == PORT_A || is_pch_edp(intel_dp)) {
 		type = DRM_MODE_CONNECTOR_eDP;
 		intel_encoder->type = INTEL_OUTPUT_EDP;
 	} else {
+		/* The intel_encoder->type value may be INTEL_OUTPUT_UNKNOWN for
+		 * DDI or INTEL_OUTPUT_DISPLAYPORT for the older gens, so don't
+		 * rewrite it.
+		 */
 		type = DRM_MODE_CONNECTOR_DisplayPort;
-		intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
 	}
 
-	connector = &intel_connector->base;
 	drm_connector_init(dev, connector, &intel_dp_connector_funcs, type);
 	drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs);
 
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
-
-	intel_encoder->cloneable = false;
+	connector->interlace_allowed = true;
+	connector->doublescan_allowed = 0;
 
 	INIT_DELAYED_WORK(&intel_dp->panel_vdd_work,
 			  ironlake_panel_vdd_work);
 
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
-
-	connector->interlace_allowed = true;
-	connector->doublescan_allowed = 0;
-
-	drm_encoder_init(dev, &intel_encoder->base, &intel_dp_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
-	drm_encoder_helper_add(&intel_encoder->base, &intel_dp_helper_funcs);
-
 	intel_connector_attach_encoder(intel_connector, intel_encoder);
 	drm_sysfs_connector_add(connector);
 
-	intel_encoder->enable = intel_enable_dp;
-	intel_encoder->pre_enable = intel_pre_enable_dp;
-	intel_encoder->disable = intel_disable_dp;
-	intel_encoder->post_disable = intel_post_disable_dp;
-	intel_encoder->get_hw_state = intel_dp_get_hw_state;
-	intel_connector->get_hw_state = intel_connector_get_hw_state;
+	if (IS_HASWELL(dev))
+		intel_connector->get_hw_state = intel_ddi_connector_get_hw_state;
+	else
+		intel_connector->get_hw_state = intel_connector_get_hw_state;
+
 
 	/* Set up the DDC bus. */
 	switch (port) {
@@ -2566,66 +2770,15 @@
 		break;
 	}
 
-	/* Cache some DPCD data in the eDP case */
-	if (is_edp(intel_dp)) {
-		struct edp_power_seq	cur, vbt;
-		u32 pp_on, pp_off, pp_div;
-
-		pp_on = I915_READ(PCH_PP_ON_DELAYS);
-		pp_off = I915_READ(PCH_PP_OFF_DELAYS);
-		pp_div = I915_READ(PCH_PP_DIVISOR);
-
-		if (!pp_on || !pp_off || !pp_div) {
-			DRM_INFO("bad panel power sequencing delays, disabling panel\n");
-			intel_dp_encoder_destroy(&intel_dp->base.base);
-			intel_dp_destroy(&intel_connector->base);
-			return;
-		}
-
-		/* Pull timing values out of registers */
-		cur.t1_t3 = (pp_on & PANEL_POWER_UP_DELAY_MASK) >>
-			PANEL_POWER_UP_DELAY_SHIFT;
-
-		cur.t8 = (pp_on & PANEL_LIGHT_ON_DELAY_MASK) >>
-			PANEL_LIGHT_ON_DELAY_SHIFT;
-
-		cur.t9 = (pp_off & PANEL_LIGHT_OFF_DELAY_MASK) >>
-			PANEL_LIGHT_OFF_DELAY_SHIFT;
-
-		cur.t10 = (pp_off & PANEL_POWER_DOWN_DELAY_MASK) >>
-			PANEL_POWER_DOWN_DELAY_SHIFT;
-
-		cur.t11_t12 = ((pp_div & PANEL_POWER_CYCLE_DELAY_MASK) >>
-			       PANEL_POWER_CYCLE_DELAY_SHIFT) * 1000;
-
-		DRM_DEBUG_KMS("cur t1_t3 %d t8 %d t9 %d t10 %d t11_t12 %d\n",
-			      cur.t1_t3, cur.t8, cur.t9, cur.t10, cur.t11_t12);
-
-		vbt = dev_priv->edp.pps;
-
-		DRM_DEBUG_KMS("vbt t1_t3 %d t8 %d t9 %d t10 %d t11_t12 %d\n",
-			      vbt.t1_t3, vbt.t8, vbt.t9, vbt.t10, vbt.t11_t12);
-
-#define get_delay(field)	((max(cur.field, vbt.field) + 9) / 10)
-
-		intel_dp->panel_power_up_delay = get_delay(t1_t3);
-		intel_dp->backlight_on_delay = get_delay(t8);
-		intel_dp->backlight_off_delay = get_delay(t9);
-		intel_dp->panel_power_down_delay = get_delay(t10);
-		intel_dp->panel_power_cycle_delay = get_delay(t11_t12);
-
-		DRM_DEBUG_KMS("panel power up delay %d, power down delay %d, power cycle delay %d\n",
-			      intel_dp->panel_power_up_delay, intel_dp->panel_power_down_delay,
-			      intel_dp->panel_power_cycle_delay);
-
-		DRM_DEBUG_KMS("backlight on delay %d, off delay %d\n",
-			      intel_dp->backlight_on_delay, intel_dp->backlight_off_delay);
-	}
+	if (is_edp(intel_dp))
+		intel_dp_init_panel_power_sequencer(dev, intel_dp);
 
 	intel_dp_i2c_init(intel_dp, intel_connector, name);
 
+	/* Cache DPCD and EDID for edp. */
 	if (is_edp(intel_dp)) {
 		bool ret;
+		struct drm_display_mode *scan;
 		struct edid *edid;
 
 		ironlake_edp_panel_vdd_on(intel_dp);
@@ -2640,29 +2793,47 @@
 		} else {
 			/* if this fails, presume the device is a ghost */
 			DRM_INFO("failed to retrieve link info, disabling eDP\n");
-			intel_dp_encoder_destroy(&intel_dp->base.base);
-			intel_dp_destroy(&intel_connector->base);
+			intel_dp_encoder_destroy(&intel_encoder->base);
+			intel_dp_destroy(connector);
 			return;
 		}
 
 		ironlake_edp_panel_vdd_on(intel_dp);
 		edid = drm_get_edid(connector, &intel_dp->adapter);
 		if (edid) {
-			drm_mode_connector_update_edid_property(connector,
-								edid);
-			intel_dp->edid_mode_count =
-				drm_add_edid_modes(connector, edid);
-			drm_edid_to_eld(connector, edid);
-			intel_dp->edid = edid;
+			if (drm_add_edid_modes(connector, edid)) {
+				drm_mode_connector_update_edid_property(connector, edid);
+				drm_edid_to_eld(connector, edid);
+			} else {
+				kfree(edid);
+				edid = ERR_PTR(-EINVAL);
+			}
+		} else {
+			edid = ERR_PTR(-ENOENT);
 		}
+		intel_connector->edid = edid;
+
+		/* prefer fixed mode from EDID if available */
+		list_for_each_entry(scan, &connector->probed_modes, head) {
+			if ((scan->type & DRM_MODE_TYPE_PREFERRED)) {
+				fixed_mode = drm_mode_duplicate(dev, scan);
+				break;
+			}
+		}
+
+		/* fallback to VBT if available for eDP */
+		if (!fixed_mode && dev_priv->lfp_lvds_vbt_mode) {
+			fixed_mode = drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
+			if (fixed_mode)
+				fixed_mode->type |= DRM_MODE_TYPE_PREFERRED;
+		}
+
 		ironlake_edp_panel_vdd_off(intel_dp, false);
 	}
 
-	intel_encoder->hot_plug = intel_dp_hot_plug;
-
 	if (is_edp(intel_dp)) {
-		dev_priv->int_edp_connector = connector;
-		intel_panel_setup_backlight(dev);
+		intel_panel_init(&intel_connector->panel, fixed_mode);
+		intel_panel_setup_backlight(connector);
 	}
 
 	intel_dp_add_properties(intel_dp, connector);
@@ -2676,3 +2847,45 @@
 		I915_WRITE(PEG_BAND_GAP_DATA, (temp & ~0xf) | 0xd);
 	}
 }
+
+void
+intel_dp_init(struct drm_device *dev, int output_reg, enum port port)
+{
+	struct intel_digital_port *intel_dig_port;
+	struct intel_encoder *intel_encoder;
+	struct drm_encoder *encoder;
+	struct intel_connector *intel_connector;
+
+	intel_dig_port = kzalloc(sizeof(struct intel_digital_port), GFP_KERNEL);
+	if (!intel_dig_port)
+		return;
+
+	intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
+	if (!intel_connector) {
+		kfree(intel_dig_port);
+		return;
+	}
+
+	intel_encoder = &intel_dig_port->base;
+	encoder = &intel_encoder->base;
+
+	drm_encoder_init(dev, &intel_encoder->base, &intel_dp_enc_funcs,
+			 DRM_MODE_ENCODER_TMDS);
+	drm_encoder_helper_add(&intel_encoder->base, &intel_dp_helper_funcs);
+
+	intel_encoder->enable = intel_enable_dp;
+	intel_encoder->pre_enable = intel_pre_enable_dp;
+	intel_encoder->disable = intel_disable_dp;
+	intel_encoder->post_disable = intel_post_disable_dp;
+	intel_encoder->get_hw_state = intel_dp_get_hw_state;
+
+	intel_dig_port->port = port;
+	intel_dig_port->dp.output_reg = output_reg;
+
+	intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
+	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	intel_encoder->cloneable = false;
+	intel_encoder->hot_plug = intel_dp_hot_plug;
+
+	intel_dp_init_connector(intel_dig_port, intel_connector);
+}

diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index fe71425..8a1bd4a 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h

@@ -94,6 +94,7 @@
 #define INTEL_OUTPUT_HDMI 6
 #define INTEL_OUTPUT_DISPLAYPORT 7
 #define INTEL_OUTPUT_EDP 8
+#define INTEL_OUTPUT_UNKNOWN 9
 
 #define INTEL_DVO_CHIP_NONE 0
 #define INTEL_DVO_CHIP_LVDS 1
@@ -163,6 +164,11 @@
 	int crtc_mask;
 };
 
+struct intel_panel {
+	struct drm_display_mode *fixed_mode;
+	int fitting_mode;
+};
+
 struct intel_connector {
 	struct drm_connector base;
 	/*
@@ -179,12 +185,19 @@
 	/* Reads out the current hw, returning true if the connector is enabled
 	 * and active (i.e. dpms ON state). */
 	bool (*get_hw_state)(struct intel_connector *);
+
+	/* Panel info for eDP and LVDS */
+	struct intel_panel panel;
+
+	/* Cached EDID for eDP and LVDS. May hold ERR_PTR for invalid EDID. */
+	struct edid *edid;
 };
 
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
 	enum plane plane;
+	enum transcoder cpu_transcoder;
 	u8 lut_r[256], lut_g[256], lut_b[256];
 	/*
 	 * Whether the crtc and the connected output pipeline is active. Implies
@@ -198,6 +211,8 @@
 	struct intel_unpin_work *unpin_work;
 	int fdi_lanes;
 
+	atomic_t unpin_work_count;
+
 	/* Display surface base address adjustement for pageflips. Note that on
 	 * gen4+ this only adjusts up to a tile, offsets within a tile are
 	 * handled in the hw itself (with the TILEOFF register). */
@@ -212,12 +227,14 @@
 
 	/* We can share PLLs across outputs if the timings match */
 	struct intel_pch_pll *pch_pll;
+	uint32_t ddi_pll_sel;
 };
 
 struct intel_plane {
 	struct drm_plane base;
 	enum pipe pipe;
 	struct drm_i915_gem_object *obj;
+	bool can_scale;
 	int max_downscale;
 	u32 lut_r[1024], lut_g[1024], lut_b[1024];
 	void (*update_plane)(struct drm_plane *plane,
@@ -317,10 +334,8 @@
 } __attribute__((packed));
 
 struct intel_hdmi {
-	struct intel_encoder base;
 	u32 sdvox_reg;
 	int ddc_bus;
-	int ddi_port;
 	uint32_t color_range;
 	bool has_hdmi_sink;
 	bool has_audio;
@@ -331,18 +346,15 @@
 			       struct drm_display_mode *adjusted_mode);
 };
 
-#define DP_RECEIVER_CAP_SIZE		0xf
 #define DP_MAX_DOWNSTREAM_PORTS		0x10
 #define DP_LINK_CONFIGURATION_SIZE	9
 
 struct intel_dp {
-	struct intel_encoder base;
 	uint32_t output_reg;
 	uint32_t DP;
 	uint8_t  link_configuration[DP_LINK_CONFIGURATION_SIZE];
 	bool has_audio;
 	enum hdmi_force_audio force_audio;
-	enum port port;
 	uint32_t color_range;
 	uint8_t link_bw;
 	uint8_t lane_count;
@@ -357,11 +369,16 @@
 	int panel_power_cycle_delay;
 	int backlight_on_delay;
 	int backlight_off_delay;
-	struct drm_display_mode *panel_fixed_mode;  /* for eDP */
 	struct delayed_work panel_vdd_work;
 	bool want_panel_vdd;
-	struct edid *edid; /* cached EDID for eDP */
-	int edid_mode_count;
+	struct intel_connector *attached_connector;
+};
+
+struct intel_digital_port {
+	struct intel_encoder base;
+	enum port port;
+	struct intel_dp dp;
+	struct intel_hdmi hdmi;
 };
 
 static inline struct drm_crtc *
@@ -380,11 +397,14 @@
 
 struct intel_unpin_work {
 	struct work_struct work;
-	struct drm_device *dev;
+	struct drm_crtc *crtc;
 	struct drm_i915_gem_object *old_fb_obj;
 	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
-	int pending;
+	atomic_t pending;
+#define INTEL_FLIP_INACTIVE	0
+#define INTEL_FLIP_PENDING	1
+#define INTEL_FLIP_COMPLETE	2
 	bool enable_stall_check;
 };
 
@@ -395,6 +415,8 @@
 	int interval;
 };
 
+int intel_pch_rawclk(struct drm_device *dev);
+
 int intel_connector_update_modes(struct drm_connector *connector,
 				struct edid *edid);
 int intel_ddc_get_modes(struct drm_connector *c, struct i2c_adapter *adapter);
@@ -405,7 +427,12 @@
 extern void intel_crt_init(struct drm_device *dev);
 extern void intel_hdmi_init(struct drm_device *dev,
 			    int sdvox_reg, enum port port);
+extern void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
+				      struct intel_connector *intel_connector);
 extern struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
+extern bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
+				  const struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode);
 extern void intel_dip_infoframe_csum(struct dip_infoframe *avi_if);
 extern bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg,
 			    bool is_sdvob);
@@ -418,10 +445,27 @@
 extern bool intel_lvds_init(struct drm_device *dev);
 extern void intel_dp_init(struct drm_device *dev, int output_reg,
 			  enum port port);
+extern void intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
+				    struct intel_connector *intel_connector);
 void
 intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
 		 struct drm_display_mode *adjusted_mode);
+extern void intel_dp_init_link_config(struct intel_dp *intel_dp);
+extern void intel_dp_start_link_train(struct intel_dp *intel_dp);
+extern void intel_dp_complete_link_train(struct intel_dp *intel_dp);
+extern void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
+extern void intel_dp_encoder_destroy(struct drm_encoder *encoder);
+extern void intel_dp_check_link_status(struct intel_dp *intel_dp);
+extern bool intel_dp_mode_fixup(struct drm_encoder *encoder,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode);
 extern bool intel_dpd_is_edp(struct drm_device *dev);
+extern void ironlake_edp_backlight_on(struct intel_dp *intel_dp);
+extern void ironlake_edp_backlight_off(struct intel_dp *intel_dp);
+extern void ironlake_edp_panel_on(struct intel_dp *intel_dp);
+extern void ironlake_edp_panel_off(struct intel_dp *intel_dp);
+extern void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp);
+extern void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync);
 extern void intel_edp_link_config(struct intel_encoder *, int *, int *);
 extern int intel_edp_target_clock(struct intel_encoder *,
 				  struct drm_display_mode *mode);
@@ -431,6 +475,10 @@
 				      enum plane plane);
 
 /* intel_panel.c */
+extern int intel_panel_init(struct intel_panel *panel,
+			    struct drm_display_mode *fixed_mode);
+extern void intel_panel_fini(struct intel_panel *panel);
+
 extern void intel_fixed_panel_mode(struct drm_display_mode *fixed_mode,
 				   struct drm_display_mode *adjusted_mode);
 extern void intel_pch_panel_fitting(struct drm_device *dev,
@@ -439,7 +487,7 @@
 				    struct drm_display_mode *adjusted_mode);
 extern u32 intel_panel_get_max_backlight(struct drm_device *dev);
 extern void intel_panel_set_backlight(struct drm_device *dev, u32 level);
-extern int intel_panel_setup_backlight(struct drm_device *dev);
+extern int intel_panel_setup_backlight(struct drm_connector *connector);
 extern void intel_panel_enable_backlight(struct drm_device *dev,
 					 enum pipe pipe);
 extern void intel_panel_disable_backlight(struct drm_device *dev);
@@ -473,6 +521,31 @@
 	return to_intel_connector(connector)->encoder;
 }
 
+static inline struct intel_dp *enc_to_intel_dp(struct drm_encoder *encoder)
+{
+	struct intel_digital_port *intel_dig_port =
+		container_of(encoder, struct intel_digital_port, base.base);
+	return &intel_dig_port->dp;
+}
+
+static inline struct intel_digital_port *
+enc_to_dig_port(struct drm_encoder *encoder)
+{
+	return container_of(encoder, struct intel_digital_port, base.base);
+}
+
+static inline struct intel_digital_port *
+dp_to_dig_port(struct intel_dp *intel_dp)
+{
+	return container_of(intel_dp, struct intel_digital_port, dp);
+}
+
+static inline struct intel_digital_port *
+hdmi_to_dig_port(struct intel_hdmi *intel_hdmi)
+{
+	return container_of(intel_hdmi, struct intel_digital_port, hdmi);
+}
+
 extern void intel_connector_attach_encoder(struct intel_connector *connector,
 					   struct intel_encoder *encoder);
 extern struct drm_encoder *intel_best_encoder(struct drm_connector *connector);
@@ -481,8 +554,12 @@
 						    struct drm_crtc *crtc);
 int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+extern enum transcoder
+intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv,
+			     enum pipe pipe);
 extern void intel_wait_for_vblank(struct drm_device *dev, int pipe);
 extern void intel_wait_for_pipe_off(struct drm_device *dev, int pipe);
+extern int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp);
 
 struct intel_load_detect_pipe {
 	struct drm_framebuffer *release_fb;
@@ -550,6 +627,10 @@
 extern void intel_update_linetime_watermarks(struct drm_device *dev, int pipe,
 			 struct drm_display_mode *mode);
 
+extern unsigned long intel_gen4_compute_offset_xtiled(int *x, int *y,
+						      unsigned int bpp,
+						      unsigned int pitch);
+
 extern int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
 				     struct drm_file *file_priv);
 extern int intel_sprite_get_colorkey(struct drm_device *dev, void *data,
@@ -573,12 +654,22 @@
 extern void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv);
 extern void ironlake_teardown_rc6(struct drm_device *dev);
 
-extern void intel_enable_ddi(struct intel_encoder *encoder);
-extern void intel_disable_ddi(struct intel_encoder *encoder);
 extern bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe);
-extern void intel_ddi_mode_set(struct drm_encoder *encoder,
-				struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode);
+extern int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv);
+extern void intel_ddi_pll_init(struct drm_device *dev);
+extern void intel_ddi_enable_pipe_func(struct drm_crtc *crtc);
+extern void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
+					      enum transcoder cpu_transcoder);
+extern void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc);
+extern void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc);
+extern void intel_ddi_setup_hw_pll_state(struct drm_device *dev);
+extern bool intel_ddi_pll_mode_set(struct drm_crtc *crtc, int clock);
+extern void intel_ddi_put_crtc_pll(struct drm_crtc *crtc);
+extern void intel_ddi_set_pipe_settings(struct drm_crtc *crtc);
+extern void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder);
+extern bool
+intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
+extern void intel_ddi_fdi_disable(struct drm_crtc *crtc);
 
 #endif /* __INTEL_DRV_H__ */

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 9ba0aae..2ee9821 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c

@@ -36,10 +36,15 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
+static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi)
+{
+	return hdmi_to_dig_port(intel_hdmi)->base.base.dev;
+}
+
 static void
 assert_hdmi_port_disabled(struct intel_hdmi *intel_hdmi)
 {
-	struct drm_device *dev = intel_hdmi->base.base.dev;
+	struct drm_device *dev = intel_hdmi_to_dev(intel_hdmi);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t enabled_bits;
 
@@ -51,13 +56,14 @@
 
 struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder)
 {
-	return container_of(encoder, struct intel_hdmi, base.base);
+	struct intel_digital_port *intel_dig_port =
+		container_of(encoder, struct intel_digital_port, base.base);
+	return &intel_dig_port->hdmi;
 }
 
 static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector)
 {
-	return container_of(intel_attached_encoder(connector),
-			    struct intel_hdmi, base);
+	return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base);
 }
 
 void intel_dip_infoframe_csum(struct dip_infoframe *frame)
@@ -334,6 +340,8 @@
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		avi_if.body.avi.YQ_CN_PR |= DIP_AVI_PR_2;
 
+	avi_if.body.avi.VIC = drm_mode_cea_vic(adjusted_mode);
+
 	intel_set_infoframe(encoder, &avi_if);
 }
 
@@ -754,16 +762,16 @@
 	return MODE_OK;
 }
 
-static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
+bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
+			   const struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode)
 {
 	return true;
 }
 
 static bool g4x_hdmi_connected(struct intel_hdmi *intel_hdmi)
 {
-	struct drm_device *dev = intel_hdmi->base.base.dev;
+	struct drm_device *dev = intel_hdmi_to_dev(intel_hdmi);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t bit;
 
@@ -786,6 +794,9 @@
 intel_hdmi_detect(struct drm_connector *connector, bool force)
 {
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_digital_port *intel_dig_port =
+		hdmi_to_dig_port(intel_hdmi);
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
 	struct drm_i915_private *dev_priv = connector->dev->dev_private;
 	struct edid *edid;
 	enum drm_connector_status status = connector_status_disconnected;
@@ -814,6 +825,7 @@
 		if (intel_hdmi->force_audio != HDMI_AUDIO_AUTO)
 			intel_hdmi->has_audio =
 				(intel_hdmi->force_audio == HDMI_AUDIO_ON);
+		intel_encoder->type = INTEL_OUTPUT_HDMI;
 	}
 
 	return status;
@@ -859,10 +871,12 @@
 			uint64_t val)
 {
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct intel_digital_port *intel_dig_port =
+		hdmi_to_dig_port(intel_hdmi);
 	struct drm_i915_private *dev_priv = connector->dev->dev_private;
 	int ret;
 
-	ret = drm_connector_property_set_value(connector, property, val);
+	ret = drm_object_property_set_value(&connector->base, property, val);
 	if (ret)
 		return ret;
 
@@ -898,8 +912,8 @@
 	return -EINVAL;
 
 done:
-	if (intel_hdmi->base.base.crtc) {
-		struct drm_crtc *crtc = intel_hdmi->base.base.crtc;
+	if (intel_dig_port->base.base.crtc) {
+		struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
 		intel_set_mode(crtc, &crtc->mode,
 			       crtc->x, crtc->y, crtc->fb);
 	}
@@ -914,12 +928,6 @@
 	kfree(connector);
 }
 
-static const struct drm_encoder_helper_funcs intel_hdmi_helper_funcs_hsw = {
-	.mode_fixup = intel_hdmi_mode_fixup,
-	.mode_set = intel_ddi_mode_set,
-	.disable = intel_encoder_noop,
-};
-
 static const struct drm_encoder_helper_funcs intel_hdmi_helper_funcs = {
 	.mode_fixup = intel_hdmi_mode_fixup,
 	.mode_set = intel_hdmi_mode_set,
@@ -951,43 +959,24 @@
 	intel_attach_broadcast_rgb_property(connector);
 }
 
-void intel_hdmi_init(struct drm_device *dev, int sdvox_reg, enum port port)
+void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
+			       struct intel_connector *intel_connector)
 {
+	struct drm_connector *connector = &intel_connector->base;
+	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
+	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_connector *connector;
-	struct intel_encoder *intel_encoder;
-	struct intel_connector *intel_connector;
-	struct intel_hdmi *intel_hdmi;
+	enum port port = intel_dig_port->port;
 
-	intel_hdmi = kzalloc(sizeof(struct intel_hdmi), GFP_KERNEL);
-	if (!intel_hdmi)
-		return;
-
-	intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
-	if (!intel_connector) {
-		kfree(intel_hdmi);
-		return;
-	}
-
-	intel_encoder = &intel_hdmi->base;
-	drm_encoder_init(dev, &intel_encoder->base, &intel_hdmi_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
-
-	connector = &intel_connector->base;
 	drm_connector_init(dev, connector, &intel_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_HDMIA);
 	drm_connector_helper_add(connector, &intel_hdmi_connector_helper_funcs);
 
-	intel_encoder->type = INTEL_OUTPUT_HDMI;
-
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
 	connector->interlace_allowed = 1;
 	connector->doublescan_allowed = 0;
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 
-	intel_encoder->cloneable = false;
-
-	intel_hdmi->ddi_port = port;
 	switch (port) {
 	case PORT_B:
 		intel_hdmi->ddc_bus = GMBUS_PORT_DPB;
@@ -1007,8 +996,6 @@
 		BUG();
 	}
 
-	intel_hdmi->sdvox_reg = sdvox_reg;
-
 	if (!HAS_PCH_SPLIT(dev)) {
 		intel_hdmi->write_infoframe = g4x_write_infoframe;
 		intel_hdmi->set_infoframes = g4x_set_infoframes;
@@ -1026,21 +1013,10 @@
 		intel_hdmi->set_infoframes = cpt_set_infoframes;
 	}
 
-	if (IS_HASWELL(dev)) {
-		intel_encoder->enable = intel_enable_ddi;
-		intel_encoder->disable = intel_disable_ddi;
-		intel_encoder->get_hw_state = intel_ddi_get_hw_state;
-		drm_encoder_helper_add(&intel_encoder->base,
-				       &intel_hdmi_helper_funcs_hsw);
-	} else {
-		intel_encoder->enable = intel_enable_hdmi;
-		intel_encoder->disable = intel_disable_hdmi;
-		intel_encoder->get_hw_state = intel_hdmi_get_hw_state;
-		drm_encoder_helper_add(&intel_encoder->base,
-				       &intel_hdmi_helper_funcs);
-	}
-	intel_connector->get_hw_state = intel_connector_get_hw_state;
-
+	if (IS_HASWELL(dev))
+		intel_connector->get_hw_state = intel_ddi_connector_get_hw_state;
+	else
+		intel_connector->get_hw_state = intel_connector_get_hw_state;
 
 	intel_hdmi_add_properties(intel_hdmi, connector);
 
@@ -1056,3 +1032,42 @@
 		I915_WRITE(PEG_BAND_GAP_DATA, (temp & ~0xf) | 0xd);
 	}
 }
+
+void intel_hdmi_init(struct drm_device *dev, int sdvox_reg, enum port port)
+{
+	struct intel_digital_port *intel_dig_port;
+	struct intel_encoder *intel_encoder;
+	struct drm_encoder *encoder;
+	struct intel_connector *intel_connector;
+
+	intel_dig_port = kzalloc(sizeof(struct intel_digital_port), GFP_KERNEL);
+	if (!intel_dig_port)
+		return;
+
+	intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
+	if (!intel_connector) {
+		kfree(intel_dig_port);
+		return;
+	}
+
+	intel_encoder = &intel_dig_port->base;
+	encoder = &intel_encoder->base;
+
+	drm_encoder_init(dev, &intel_encoder->base, &intel_hdmi_enc_funcs,
+			 DRM_MODE_ENCODER_TMDS);
+	drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs);
+
+	intel_encoder->enable = intel_enable_hdmi;
+	intel_encoder->disable = intel_disable_hdmi;
+	intel_encoder->get_hw_state = intel_hdmi_get_hw_state;
+
+	intel_encoder->type = INTEL_OUTPUT_HDMI;
+	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	intel_encoder->cloneable = false;
+
+	intel_dig_port->port = port;
+	intel_dig_port->hdmi.sdvox_reg = sdvox_reg;
+	intel_dig_port->dp.output_reg = 0;
+
+	intel_hdmi_init_connector(intel_dig_port, intel_connector);
+}

diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index c2c6dbc..3ef5af1 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c

@@ -432,7 +432,7 @@
 	I915_WRITE(GMBUS0 + reg_offset, 0);
 
 	/* Hardware may not support GMBUS over these pins? Try GPIO bitbanging instead. */
-	bus->force_bit = true;
+	bus->force_bit = 1;
 	ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
 
 out:
@@ -491,7 +491,7 @@
 
 		/* gmbus seems to be broken on i830 */
 		if (IS_I830(dev))
-			bus->force_bit = true;
+			bus->force_bit = 1;
 
 		intel_gpio_setup(bus, port);
 
@@ -532,7 +532,10 @@
 {
 	struct intel_gmbus *bus = to_intel_gmbus(adapter);
 
-	bus->force_bit = force_bit;
+	bus->force_bit += force_bit ? 1 : -1;
+	DRM_DEBUG_KMS("%sabling bit-banging on %s. force bit now %d\n",
+		      force_bit ? "en" : "dis", adapter->name,
+		      bus->force_bit);
 }
 
 void intel_teardown_gmbus(struct drm_device *dev)

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index edba93b..b9a660a 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c

@@ -40,28 +40,30 @@
 #include <linux/acpi.h>
 
 /* Private structure for the integrated LVDS support */
-struct intel_lvds {
+struct intel_lvds_connector {
+	struct intel_connector base;
+
+	struct notifier_block lid_notifier;
+};
+
+struct intel_lvds_encoder {
 	struct intel_encoder base;
 
-	struct edid *edid;
-
-	int fitting_mode;
 	u32 pfit_control;
 	u32 pfit_pgm_ratios;
 	bool pfit_dirty;
 
-	struct drm_display_mode *fixed_mode;
+	struct intel_lvds_connector *attached_connector;
 };
 
-static struct intel_lvds *to_intel_lvds(struct drm_encoder *encoder)
+static struct intel_lvds_encoder *to_lvds_encoder(struct drm_encoder *encoder)
 {
-	return container_of(encoder, struct intel_lvds, base.base);
+	return container_of(encoder, struct intel_lvds_encoder, base.base);
 }
 
-static struct intel_lvds *intel_attached_lvds(struct drm_connector *connector)
+static struct intel_lvds_connector *to_lvds_connector(struct drm_connector *connector)
 {
-	return container_of(intel_attached_encoder(connector),
-			    struct intel_lvds, base);
+	return container_of(connector, struct intel_lvds_connector, base.base);
 }
 
 static bool intel_lvds_get_hw_state(struct intel_encoder *encoder,
@@ -96,7 +98,7 @@
 static void intel_enable_lvds(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
-	struct intel_lvds *intel_lvds = to_intel_lvds(&encoder->base);
+	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 ctl_reg, lvds_reg, stat_reg;
@@ -113,7 +115,7 @@
 
 	I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN);
 
-	if (intel_lvds->pfit_dirty) {
+	if (lvds_encoder->pfit_dirty) {
 		/*
 		 * Enable automatic panel scaling so that non-native modes
 		 * fill the screen.  The panel fitter should only be
@@ -121,12 +123,12 @@
 		 * register description and PRM.
 		 */
 		DRM_DEBUG_KMS("applying panel-fitter: %x, %x\n",
-			      intel_lvds->pfit_control,
-			      intel_lvds->pfit_pgm_ratios);
+			      lvds_encoder->pfit_control,
+			      lvds_encoder->pfit_pgm_ratios);
 
-		I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios);
-		I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control);
-		intel_lvds->pfit_dirty = false;
+		I915_WRITE(PFIT_PGM_RATIOS, lvds_encoder->pfit_pgm_ratios);
+		I915_WRITE(PFIT_CONTROL, lvds_encoder->pfit_control);
+		lvds_encoder->pfit_dirty = false;
 	}
 
 	I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON);
@@ -140,7 +142,7 @@
 static void intel_disable_lvds(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
-	struct intel_lvds *intel_lvds = to_intel_lvds(&encoder->base);
+	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 ctl_reg, lvds_reg, stat_reg;
 
@@ -160,9 +162,9 @@
 	if (wait_for((I915_READ(stat_reg) & PP_ON) == 0, 1000))
 		DRM_ERROR("timed out waiting for panel to power off\n");
 
-	if (intel_lvds->pfit_control) {
+	if (lvds_encoder->pfit_control) {
 		I915_WRITE(PFIT_CONTROL, 0);
-		intel_lvds->pfit_dirty = true;
+		lvds_encoder->pfit_dirty = true;
 	}
 
 	I915_WRITE(lvds_reg, I915_READ(lvds_reg) & ~LVDS_PORT_EN);
@@ -172,8 +174,8 @@
 static int intel_lvds_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
-	struct intel_lvds *intel_lvds = intel_attached_lvds(connector);
-	struct drm_display_mode *fixed_mode = intel_lvds->fixed_mode;
+	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 
 	if (mode->hdisplay > fixed_mode->hdisplay)
 		return MODE_PANEL;
@@ -249,8 +251,10 @@
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_lvds *intel_lvds = to_intel_lvds(encoder);
-	struct intel_crtc *intel_crtc = intel_lvds->base.new_crtc;
+	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(encoder);
+	struct intel_connector *intel_connector =
+		&lvds_encoder->attached_connector->base;
+	struct intel_crtc *intel_crtc = lvds_encoder->base.new_crtc;
 	u32 pfit_control = 0, pfit_pgm_ratios = 0, border = 0;
 	int pipe;
 
@@ -260,7 +264,7 @@
 		return false;
 	}
 
-	if (intel_encoder_check_is_cloned(&intel_lvds->base))
+	if (intel_encoder_check_is_cloned(&lvds_encoder->base))
 		return false;
 
 	/*
@@ -269,10 +273,12 @@
 	 * with the panel scaling set up to source from the H/VDisplay
 	 * of the original mode.
 	 */
-	intel_fixed_panel_mode(intel_lvds->fixed_mode, adjusted_mode);
+	intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
+			       adjusted_mode);
 
 	if (HAS_PCH_SPLIT(dev)) {
-		intel_pch_panel_fitting(dev, intel_lvds->fitting_mode,
+		intel_pch_panel_fitting(dev,
+					intel_connector->panel.fitting_mode,
 					mode, adjusted_mode);
 		return true;
 	}
@@ -298,7 +304,7 @@
 
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 
-	switch (intel_lvds->fitting_mode) {
+	switch (intel_connector->panel.fitting_mode) {
 	case DRM_MODE_SCALE_CENTER:
 		/*
 		 * For centered modes, we have to calculate border widths &
@@ -396,11 +402,11 @@
 	if (INTEL_INFO(dev)->gen < 4 && dev_priv->lvds_dither)
 		pfit_control |= PANEL_8TO6_DITHER_ENABLE;
 
-	if (pfit_control != intel_lvds->pfit_control ||
-	    pfit_pgm_ratios != intel_lvds->pfit_pgm_ratios) {
-		intel_lvds->pfit_control = pfit_control;
-		intel_lvds->pfit_pgm_ratios = pfit_pgm_ratios;
-		intel_lvds->pfit_dirty = true;
+	if (pfit_control != lvds_encoder->pfit_control ||
+	    pfit_pgm_ratios != lvds_encoder->pfit_pgm_ratios) {
+		lvds_encoder->pfit_control = pfit_control;
+		lvds_encoder->pfit_pgm_ratios = pfit_pgm_ratios;
+		lvds_encoder->pfit_dirty = true;
 	}
 	dev_priv->lvds_border_bits = border;
 
@@ -449,14 +455,15 @@
  */
 static int intel_lvds_get_modes(struct drm_connector *connector)
 {
-	struct intel_lvds *intel_lvds = intel_attached_lvds(connector);
+	struct intel_lvds_connector *lvds_connector = to_lvds_connector(connector);
 	struct drm_device *dev = connector->dev;
 	struct drm_display_mode *mode;
 
-	if (intel_lvds->edid)
-		return drm_add_edid_modes(connector, intel_lvds->edid);
+	/* use cached edid if we have one */
+	if (!IS_ERR_OR_NULL(lvds_connector->base.edid))
+		return drm_add_edid_modes(connector, lvds_connector->base.edid);
 
-	mode = drm_mode_duplicate(dev, intel_lvds->fixed_mode);
+	mode = drm_mode_duplicate(dev, lvds_connector->base.panel.fixed_mode);
 	if (mode == NULL)
 		return 0;
 
@@ -496,10 +503,11 @@
 static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
 			    void *unused)
 {
-	struct drm_i915_private *dev_priv =
-		container_of(nb, struct drm_i915_private, lid_notifier);
-	struct drm_device *dev = dev_priv->dev;
-	struct drm_connector *connector = dev_priv->int_lvds_connector;
+	struct intel_lvds_connector *lvds_connector =
+		container_of(nb, struct intel_lvds_connector, lid_notifier);
+	struct drm_connector *connector = &lvds_connector->base.base;
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (dev->switch_power_state != DRM_SWITCH_POWER_ON)
 		return NOTIFY_OK;
@@ -508,9 +516,7 @@
 	 * check and update the status of LVDS connector after receiving
 	 * the LID nofication event.
 	 */
-	if (connector)
-		connector->status = connector->funcs->detect(connector,
-							     false);
+	connector->status = connector->funcs->detect(connector, false);
 
 	/* Don't force modeset on machines where it causes a GPU lockup */
 	if (dmi_check_system(intel_no_modeset_on_lid))
@@ -526,7 +532,7 @@
 	dev_priv->modeset_on_lid = 0;
 
 	mutex_lock(&dev->mode_config.mutex);
-	intel_modeset_check_state(dev);
+	intel_modeset_setup_hw_state(dev, true);
 	mutex_unlock(&dev->mode_config.mutex);
 
 	return NOTIFY_OK;
@@ -541,13 +547,18 @@
  */
 static void intel_lvds_destroy(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_lvds_connector *lvds_connector =
+		to_lvds_connector(connector);
 
-	intel_panel_destroy_backlight(dev);
+	if (lvds_connector->lid_notifier.notifier_call)
+		acpi_lid_notifier_unregister(&lvds_connector->lid_notifier);
 
-	if (dev_priv->lid_notifier.notifier_call)
-		acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
+	if (!IS_ERR_OR_NULL(lvds_connector->base.edid))
+		kfree(lvds_connector->base.edid);
+
+	intel_panel_destroy_backlight(connector->dev);
+	intel_panel_fini(&lvds_connector->base.panel);
+
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -557,22 +568,24 @@
 				   struct drm_property *property,
 				   uint64_t value)
 {
-	struct intel_lvds *intel_lvds = intel_attached_lvds(connector);
+	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct drm_device *dev = connector->dev;
 
 	if (property == dev->mode_config.scaling_mode_property) {
-		struct drm_crtc *crtc = intel_lvds->base.base.crtc;
+		struct drm_crtc *crtc;
 
 		if (value == DRM_MODE_SCALE_NONE) {
 			DRM_DEBUG_KMS("no scaling not supported\n");
 			return -EINVAL;
 		}
 
-		if (intel_lvds->fitting_mode == value) {
+		if (intel_connector->panel.fitting_mode == value) {
 			/* the LVDS scaling property is not changed */
 			return 0;
 		}
-		intel_lvds->fitting_mode = value;
+		intel_connector->panel.fitting_mode = value;
+
+		crtc = intel_attached_encoder(connector)->base.crtc;
 		if (crtc && crtc->enabled) {
 			/*
 			 * If the CRTC is enabled, the display will be changed
@@ -912,12 +925,15 @@
 bool intel_lvds_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_lvds *intel_lvds;
+	struct intel_lvds_encoder *lvds_encoder;
 	struct intel_encoder *intel_encoder;
+	struct intel_lvds_connector *lvds_connector;
 	struct intel_connector *intel_connector;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
 	struct drm_display_mode *scan; /* *modes, *bios_mode; */
+	struct drm_display_mode *fixed_mode = NULL;
+	struct edid *edid;
 	struct drm_crtc *crtc;
 	u32 lvds;
 	int pipe;
@@ -945,23 +961,25 @@
 		}
 	}
 
-	intel_lvds = kzalloc(sizeof(struct intel_lvds), GFP_KERNEL);
-	if (!intel_lvds) {
+	lvds_encoder = kzalloc(sizeof(struct intel_lvds_encoder), GFP_KERNEL);
+	if (!lvds_encoder)
+		return false;
+
+	lvds_connector = kzalloc(sizeof(struct intel_lvds_connector), GFP_KERNEL);
+	if (!lvds_connector) {
+		kfree(lvds_encoder);
 		return false;
 	}
 
-	intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
-	if (!intel_connector) {
-		kfree(intel_lvds);
-		return false;
-	}
+	lvds_encoder->attached_connector = lvds_connector;
 
 	if (!HAS_PCH_SPLIT(dev)) {
-		intel_lvds->pfit_control = I915_READ(PFIT_CONTROL);
+		lvds_encoder->pfit_control = I915_READ(PFIT_CONTROL);
 	}
 
-	intel_encoder = &intel_lvds->base;
+	intel_encoder = &lvds_encoder->base;
 	encoder = &intel_encoder->base;
+	intel_connector = &lvds_connector->base;
 	connector = &intel_connector->base;
 	drm_connector_init(dev, &intel_connector->base, &intel_lvds_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
@@ -993,14 +1011,10 @@
 
 	/* create the scaling mode property */
 	drm_mode_create_scaling_mode_property(dev);
-	/*
-	 * the initial panel fitting mode will be FULL_SCREEN.
-	 */
-
-	drm_connector_attach_property(&intel_connector->base,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.scaling_mode_property,
 				      DRM_MODE_SCALE_ASPECT);
-	intel_lvds->fitting_mode = DRM_MODE_SCALE_ASPECT;
+	intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT;
 	/*
 	 * LVDS discovery:
 	 * 1) check for EDID on DDC
@@ -1015,20 +1029,21 @@
 	 * Attempt to get the fixed panel mode from DDC.  Assume that the
 	 * preferred mode is the right one.
 	 */
-	intel_lvds->edid = drm_get_edid(connector,
-					intel_gmbus_get_adapter(dev_priv,
-								pin));
-	if (intel_lvds->edid) {
-		if (drm_add_edid_modes(connector,
-				       intel_lvds->edid)) {
+	edid = drm_get_edid(connector, intel_gmbus_get_adapter(dev_priv, pin));
+	if (edid) {
+		if (drm_add_edid_modes(connector, edid)) {
 			drm_mode_connector_update_edid_property(connector,
-								intel_lvds->edid);
+								edid);
 		} else {
-			kfree(intel_lvds->edid);
-			intel_lvds->edid = NULL;
+			kfree(edid);
+			edid = ERR_PTR(-EINVAL);
 		}
+	} else {
+		edid = ERR_PTR(-ENOENT);
 	}
-	if (!intel_lvds->edid) {
+	lvds_connector->base.edid = edid;
+
+	if (IS_ERR_OR_NULL(edid)) {
 		/* Didn't get an EDID, so
 		 * Set wide sync ranges so we get all modes
 		 * handed to valid_mode for checking
@@ -1041,22 +1056,26 @@
 
 	list_for_each_entry(scan, &connector->probed_modes, head) {
 		if (scan->type & DRM_MODE_TYPE_PREFERRED) {
-			intel_lvds->fixed_mode =
-				drm_mode_duplicate(dev, scan);
-			intel_find_lvds_downclock(dev,
-						  intel_lvds->fixed_mode,
-						  connector);
-			goto out;
+			DRM_DEBUG_KMS("using preferred mode from EDID: ");
+			drm_mode_debug_printmodeline(scan);
+
+			fixed_mode = drm_mode_duplicate(dev, scan);
+			if (fixed_mode) {
+				intel_find_lvds_downclock(dev, fixed_mode,
+							  connector);
+				goto out;
+			}
 		}
 	}
 
 	/* Failed to get EDID, what about VBT? */
 	if (dev_priv->lfp_lvds_vbt_mode) {
-		intel_lvds->fixed_mode =
-			drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
-		if (intel_lvds->fixed_mode) {
-			intel_lvds->fixed_mode->type |=
-				DRM_MODE_TYPE_PREFERRED;
+		DRM_DEBUG_KMS("using mode from VBT: ");
+		drm_mode_debug_printmodeline(dev_priv->lfp_lvds_vbt_mode);
+
+		fixed_mode = drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
+		if (fixed_mode) {
+			fixed_mode->type |= DRM_MODE_TYPE_PREFERRED;
 			goto out;
 		}
 	}
@@ -1076,16 +1095,17 @@
 	crtc = intel_get_crtc_for_pipe(dev, pipe);
 
 	if (crtc && (lvds & LVDS_PORT_EN)) {
-		intel_lvds->fixed_mode = intel_crtc_mode_get(dev, crtc);
-		if (intel_lvds->fixed_mode) {
-			intel_lvds->fixed_mode->type |=
-				DRM_MODE_TYPE_PREFERRED;
+		fixed_mode = intel_crtc_mode_get(dev, crtc);
+		if (fixed_mode) {
+			DRM_DEBUG_KMS("using current (BIOS) mode: ");
+			drm_mode_debug_printmodeline(fixed_mode);
+			fixed_mode->type |= DRM_MODE_TYPE_PREFERRED;
 			goto out;
 		}
 	}
 
 	/* If we still don't have a mode after all that, give up. */
-	if (!intel_lvds->fixed_mode)
+	if (!fixed_mode)
 		goto failed;
 
 out:
@@ -1100,16 +1120,15 @@
 		I915_WRITE(PP_CONTROL,
 			   I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
 	}
-	dev_priv->lid_notifier.notifier_call = intel_lid_notify;
-	if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
+	lvds_connector->lid_notifier.notifier_call = intel_lid_notify;
+	if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) {
 		DRM_DEBUG_KMS("lid notifier registration failed\n");
-		dev_priv->lid_notifier.notifier_call = NULL;
+		lvds_connector->lid_notifier.notifier_call = NULL;
 	}
-	/* keep the LVDS connector */
-	dev_priv->int_lvds_connector = connector;
 	drm_sysfs_connector_add(connector);
 
-	intel_panel_setup_backlight(dev);
+	intel_panel_init(&intel_connector->panel, fixed_mode);
+	intel_panel_setup_backlight(connector);
 
 	return true;
 
@@ -1117,7 +1136,9 @@
 	DRM_DEBUG_KMS("No LVDS modes found, disabling.\n");
 	drm_connector_cleanup(connector);
 	drm_encoder_cleanup(encoder);
-	kfree(intel_lvds);
-	kfree(intel_connector);
+	if (fixed_mode)
+		drm_mode_destroy(dev, fixed_mode);
+	kfree(lvds_encoder);
+	kfree(lvds_connector);
 	return false;
 }

diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c
index cabd84b..b00f1c8 100644
--- a/drivers/gpu/drm/i915/intel_modes.c
+++ b/drivers/gpu/drm/i915/intel_modes.c

@@ -45,7 +45,6 @@
 	drm_mode_connector_update_edid_property(connector, edid);
 	ret = drm_add_edid_modes(connector, edid);
 	drm_edid_to_eld(connector, edid);
-	kfree(edid);
 
 	return ret;
 }
@@ -61,12 +60,16 @@
 			struct i2c_adapter *adapter)
 {
 	struct edid *edid;
+	int ret;
 
 	edid = drm_get_edid(connector, adapter);
 	if (!edid)
 		return 0;
 
-	return intel_connector_update_modes(connector, edid);
+	ret = intel_connector_update_modes(connector, edid);
+	kfree(edid);
+
+	return ret;
 }
 
 static const struct drm_prop_enum_list force_audio_names[] = {
@@ -94,7 +97,7 @@
 
 		dev_priv->force_audio_property = prop;
 	}
-	drm_connector_attach_property(connector, prop, 0);
+	drm_object_attach_property(&connector->base, prop, 0);
 }
 
 static const struct drm_prop_enum_list broadcast_rgb_names[] = {
@@ -121,5 +124,5 @@
 		dev_priv->broadcast_rgb_property = prop;
 	}
 
-	drm_connector_attach_property(connector, prop, 0);
+	drm_object_attach_property(&connector->base, prop, 0);
 }

diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 5530413..7741c22 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c

@@ -154,6 +154,8 @@
 	struct opregion_asle __iomem *asle = dev_priv->opregion.asle;
 	u32 max;
 
+	DRM_DEBUG_DRIVER("bclp = 0x%08x\n", bclp);
+
 	if (!(bclp & ASLE_BCLP_VALID))
 		return ASLE_BACKLIGHT_FAILED;
 

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index e2aacd3..bee8cb6 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c

@@ -130,32 +130,34 @@
 	return 0;
 }
 
-static u32 i915_read_blc_pwm_ctl(struct drm_i915_private *dev_priv)
+static u32 i915_read_blc_pwm_ctl(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 val;
 
 	/* Restore the CTL value if it lost, e.g. GPU reset */
 
 	if (HAS_PCH_SPLIT(dev_priv->dev)) {
 		val = I915_READ(BLC_PWM_PCH_CTL2);
-		if (dev_priv->saveBLC_PWM_CTL2 == 0) {
-			dev_priv->saveBLC_PWM_CTL2 = val;
+		if (dev_priv->regfile.saveBLC_PWM_CTL2 == 0) {
+			dev_priv->regfile.saveBLC_PWM_CTL2 = val;
 		} else if (val == 0) {
-			I915_WRITE(BLC_PWM_PCH_CTL2,
-				   dev_priv->saveBLC_PWM_CTL2);
-			val = dev_priv->saveBLC_PWM_CTL2;
+			val = dev_priv->regfile.saveBLC_PWM_CTL2;
+			I915_WRITE(BLC_PWM_PCH_CTL2, val);
 		}
 	} else {
 		val = I915_READ(BLC_PWM_CTL);
-		if (dev_priv->saveBLC_PWM_CTL == 0) {
-			dev_priv->saveBLC_PWM_CTL = val;
-			dev_priv->saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_CTL2);
+		if (dev_priv->regfile.saveBLC_PWM_CTL == 0) {
+			dev_priv->regfile.saveBLC_PWM_CTL = val;
+			if (INTEL_INFO(dev)->gen >= 4)
+				dev_priv->regfile.saveBLC_PWM_CTL2 =
+					I915_READ(BLC_PWM_CTL2);
 		} else if (val == 0) {
-			I915_WRITE(BLC_PWM_CTL,
-				   dev_priv->saveBLC_PWM_CTL);
-			I915_WRITE(BLC_PWM_CTL2,
-				   dev_priv->saveBLC_PWM_CTL2);
-			val = dev_priv->saveBLC_PWM_CTL;
+			val = dev_priv->regfile.saveBLC_PWM_CTL;
+			I915_WRITE(BLC_PWM_CTL, val);
+			if (INTEL_INFO(dev)->gen >= 4)
+				I915_WRITE(BLC_PWM_CTL2,
+					   dev_priv->regfile.saveBLC_PWM_CTL2);
 		}
 	}
 
@@ -164,10 +166,9 @@
 
 static u32 _intel_panel_get_max_backlight(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 max;
 
-	max = i915_read_blc_pwm_ctl(dev_priv);
+	max = i915_read_blc_pwm_ctl(dev);
 
 	if (HAS_PCH_SPLIT(dev)) {
 		max >>= 16;
@@ -275,7 +276,7 @@
 	}
 
 	tmp = I915_READ(BLC_PWM_CTL);
-	if (INTEL_INFO(dev)->gen < 4) 
+	if (INTEL_INFO(dev)->gen < 4)
 		level <<= 1;
 	tmp &= ~BACKLIGHT_DUTY_CYCLE_MASK;
 	I915_WRITE(BLC_PWM_CTL, tmp | level);
@@ -374,26 +375,23 @@
 enum drm_connector_status
 intel_panel_detect(struct drm_device *dev)
 {
-#if 0
 	struct drm_i915_private *dev_priv = dev->dev_private;
-#endif
 
-	if (i915_panel_ignore_lid)
-		return i915_panel_ignore_lid > 0 ?
-			connector_status_connected :
-			connector_status_disconnected;
-
-	/* opregion lid state on HP 2540p is wrong at boot up,
-	 * appears to be either the BIOS or Linux ACPI fault */
-#if 0
 	/* Assume that the BIOS does not lie through the OpRegion... */
-	if (dev_priv->opregion.lid_state)
+	if (!i915_panel_ignore_lid && dev_priv->opregion.lid_state) {
 		return ioread32(dev_priv->opregion.lid_state) & 0x1 ?
 			connector_status_connected :
 			connector_status_disconnected;
-#endif
+	}
 
-	return connector_status_unknown;
+	switch (i915_panel_ignore_lid) {
+	case -2:
+		return connector_status_connected;
+	case -1:
+		return connector_status_disconnected;
+	default:
+		return connector_status_unknown;
+	}
 }
 
 #ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
@@ -416,21 +414,14 @@
 	.get_brightness = intel_panel_get_brightness,
 };
 
-int intel_panel_setup_backlight(struct drm_device *dev)
+int intel_panel_setup_backlight(struct drm_connector *connector)
 {
+	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct backlight_properties props;
-	struct drm_connector *connector;
 
 	intel_panel_init_backlight(dev);
 
-	if (dev_priv->int_lvds_connector)
-		connector = dev_priv->int_lvds_connector;
-	else if (dev_priv->int_edp_connector)
-		connector = dev_priv->int_edp_connector;
-	else
-		return -ENODEV;
-
 	memset(&props, 0, sizeof(props));
 	props.type = BACKLIGHT_RAW;
 	props.max_brightness = _intel_panel_get_max_backlight(dev);
@@ -460,9 +451,9 @@
 		backlight_device_unregister(dev_priv->backlight);
 }
 #else
-int intel_panel_setup_backlight(struct drm_device *dev)
+int intel_panel_setup_backlight(struct drm_connector *connector)
 {
-	intel_panel_init_backlight(dev);
+	intel_panel_init_backlight(connector->dev);
 	return 0;
 }
 
@@ -471,3 +462,20 @@
 	return;
 }
 #endif
+
+int intel_panel_init(struct intel_panel *panel,
+		     struct drm_display_mode *fixed_mode)
+{
+	panel->fixed_mode = fixed_mode;
+
+	return 0;
+}
+
+void intel_panel_fini(struct intel_panel *panel)
+{
+	struct intel_connector *intel_connector =
+		container_of(panel, struct intel_connector, panel);
+
+	if (panel->fixed_mode)
+		drm_mode_destroy(intel_connector->base.dev, panel->fixed_mode);
+}

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 442968f..496caa7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c

@@ -1325,10 +1325,11 @@
 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
 		   planea_wm);
 	I915_WRITE(DSPFW2,
-		   (I915_READ(DSPFW2) & DSPFW_CURSORA_MASK) |
+		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
 	I915_WRITE(DSPFW3,
-		   (I915_READ(DSPFW3) | (cursor_sr << DSPFW_CURSOR_SR_SHIFT)));
+		   (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
+		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
 }
 
 static void g4x_update_wm(struct drm_device *dev)
@@ -1374,11 +1375,11 @@
 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
 		   planea_wm);
 	I915_WRITE(DSPFW2,
-		   (I915_READ(DSPFW2) & DSPFW_CURSORA_MASK) |
+		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
 	/* HPLL off in SR has some issues on G4x... disable it */
 	I915_WRITE(DSPFW3,
-		   (I915_READ(DSPFW3) & ~DSPFW_HPLL_SR_EN) |
+		   (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
 		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
 }
 
@@ -1468,9 +1469,12 @@
 	fifo_size = dev_priv->display.get_fifo_size(dev, 0);
 	crtc = intel_get_crtc_for_plane(dev, 0);
 	if (crtc->enabled && crtc->fb) {
+		int cpp = crtc->fb->bits_per_pixel / 8;
+		if (IS_GEN2(dev))
+			cpp = 4;
+
 		planea_wm = intel_calculate_wm(crtc->mode.clock,
-					       wm_info, fifo_size,
-					       crtc->fb->bits_per_pixel / 8,
+					       wm_info, fifo_size, cpp,
 					       latency_ns);
 		enabled = crtc;
 	} else
@@ -1479,9 +1483,12 @@
 	fifo_size = dev_priv->display.get_fifo_size(dev, 1);
 	crtc = intel_get_crtc_for_plane(dev, 1);
 	if (crtc->enabled && crtc->fb) {
+		int cpp = crtc->fb->bits_per_pixel / 8;
+		if (IS_GEN2(dev))
+			cpp = 4;
+
 		planeb_wm = intel_calculate_wm(crtc->mode.clock,
-					       wm_info, fifo_size,
-					       crtc->fb->bits_per_pixel / 8,
+					       wm_info, fifo_size, cpp,
 					       latency_ns);
 		if (enabled == NULL)
 			enabled = crtc;
@@ -1571,8 +1578,7 @@
 
 	planea_wm = intel_calculate_wm(crtc->mode.clock, &i830_wm_info,
 				       dev_priv->display.get_fifo_size(dev, 0),
-				       crtc->fb->bits_per_pixel / 8,
-				       latency_ns);
+				       4, latency_ns);
 	fwater_lo = I915_READ(FW_BLC) & ~0xfff;
 	fwater_lo |= (3<<8) | planea_wm;
 
@@ -2323,7 +2329,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 limits = gen6_rps_limits(dev_priv, &val);
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 	WARN_ON(val > dev_priv->rps.max_delay);
 	WARN_ON(val < dev_priv->rps.min_delay);
 
@@ -2398,12 +2404,12 @@
 	struct intel_ring_buffer *ring;
 	u32 rp_state_cap;
 	u32 gt_perf_status;
-	u32 pcu_mbox, rc6_mask = 0;
+	u32 rc6vids, pcu_mbox, rc6_mask = 0;
 	u32 gtfifodbg;
 	int rc6_mode;
-	int i;
+	int i, ret;
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
 	/* Here begins a magic sequence of register writes to enable
 	 * auto-downclocking.
@@ -2497,30 +2503,16 @@
 		   GEN6_RP_UP_BUSY_AVG |
 		   (IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT));
 
-	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-		     500))
-		DRM_ERROR("timeout waiting for pcode mailbox to become idle\n");
-
-	I915_WRITE(GEN6_PCODE_DATA, 0);
-	I915_WRITE(GEN6_PCODE_MAILBOX,
-		   GEN6_PCODE_READY |
-		   GEN6_PCODE_WRITE_MIN_FREQ_TABLE);
-	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-		     500))
-		DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
-
-	/* Check for overclock support */
-	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-		     500))
-		DRM_ERROR("timeout waiting for pcode mailbox to become idle\n");
-	I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_READ_OC_PARAMS);
-	pcu_mbox = I915_READ(GEN6_PCODE_DATA);
-	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-		     500))
-		DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
-	if (pcu_mbox & (1<<31)) { /* OC supported */
-		dev_priv->rps.max_delay = pcu_mbox & 0xff;
-		DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
+	ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
+	if (!ret) {
+		pcu_mbox = 0;
+		ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
+		if (ret && pcu_mbox & (1<<31)) { /* OC supported */
+			dev_priv->rps.max_delay = pcu_mbox & 0xff;
+			DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
+		}
+	} else {
+		DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
 	}
 
 	gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
@@ -2534,6 +2526,20 @@
 	/* enable all PM interrupts */
 	I915_WRITE(GEN6_PMINTRMSK, 0);
 
+	rc6vids = 0;
+	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
+	if (IS_GEN6(dev) && ret) {
+		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+	} else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
+		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+		rc6vids &= 0xffff00;
+		rc6vids |= GEN6_ENCODE_RC6_VID(450);
+		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
+		if (ret)
+			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+	}
+
 	gen6_gt_force_wake_put(dev_priv);
 }
 
@@ -2541,10 +2547,11 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int min_freq = 15;
-	int gpu_freq, ia_freq, max_ia_freq;
+	int gpu_freq;
+	unsigned int ia_freq, max_ia_freq;
 	int scaling_factor = 180;
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
 	max_ia_freq = cpufreq_quick_get_max(0);
 	/*
@@ -2575,17 +2582,11 @@
 		else
 			ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
 		ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+		ia_freq <<= GEN6_PCODE_FREQ_IA_RATIO_SHIFT;
 
-		I915_WRITE(GEN6_PCODE_DATA,
-			   (ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT) |
-			   gpu_freq);
-		I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY |
-			   GEN6_PCODE_WRITE_MIN_FREQ_TABLE);
-		if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) &
-			      GEN6_PCODE_READY) == 0, 10)) {
-			DRM_ERROR("pcode write of freq table timed out\n");
-			continue;
-		}
+		sandybridge_pcode_write(dev_priv,
+					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+					ia_freq | gpu_freq);
 	}
 }
 
@@ -2593,16 +2594,16 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->renderctx) {
-		i915_gem_object_unpin(dev_priv->renderctx);
-		drm_gem_object_unreference(&dev_priv->renderctx->base);
-		dev_priv->renderctx = NULL;
+	if (dev_priv->ips.renderctx) {
+		i915_gem_object_unpin(dev_priv->ips.renderctx);
+		drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
+		dev_priv->ips.renderctx = NULL;
 	}
 
-	if (dev_priv->pwrctx) {
-		i915_gem_object_unpin(dev_priv->pwrctx);
-		drm_gem_object_unreference(&dev_priv->pwrctx->base);
-		dev_priv->pwrctx = NULL;
+	if (dev_priv->ips.pwrctx) {
+		i915_gem_object_unpin(dev_priv->ips.pwrctx);
+		drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
+		dev_priv->ips.pwrctx = NULL;
 	}
 }
 
@@ -2628,14 +2629,14 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->renderctx == NULL)
-		dev_priv->renderctx = intel_alloc_context_page(dev);
-	if (!dev_priv->renderctx)
+	if (dev_priv->ips.renderctx == NULL)
+		dev_priv->ips.renderctx = intel_alloc_context_page(dev);
+	if (!dev_priv->ips.renderctx)
 		return -ENOMEM;
 
-	if (dev_priv->pwrctx == NULL)
-		dev_priv->pwrctx = intel_alloc_context_page(dev);
-	if (!dev_priv->pwrctx) {
+	if (dev_priv->ips.pwrctx == NULL)
+		dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
+	if (!dev_priv->ips.pwrctx) {
 		ironlake_teardown_rc6(dev);
 		return -ENOMEM;
 	}
@@ -2647,6 +2648,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	bool was_interruptible;
 	int ret;
 
 	/* rc6 disabled by default due to repeated reports of hanging during
@@ -2661,6 +2663,9 @@
 	if (ret)
 		return;
 
+	was_interruptible = dev_priv->mm.interruptible;
+	dev_priv->mm.interruptible = false;
+
 	/*
 	 * GPU can automatically power down the render unit if given a page
 	 * to save state.
@@ -2668,12 +2673,13 @@
 	ret = intel_ring_begin(ring, 6);
 	if (ret) {
 		ironlake_teardown_rc6(dev);
+		dev_priv->mm.interruptible = was_interruptible;
 		return;
 	}
 
 	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
 	intel_ring_emit(ring, MI_SET_CONTEXT);
-	intel_ring_emit(ring, dev_priv->renderctx->gtt_offset |
+	intel_ring_emit(ring, dev_priv->ips.renderctx->gtt_offset |
 			MI_MM_SPACE_GTT |
 			MI_SAVE_EXT_STATE_EN |
 			MI_RESTORE_EXT_STATE_EN |
@@ -2688,14 +2694,15 @@
 	 * does an implicit flush, combined with MI_FLUSH above, it should be
 	 * safe to assume that renderctx is valid
 	 */
-	ret = intel_wait_ring_idle(ring);
+	ret = intel_ring_idle(ring);
+	dev_priv->mm.interruptible = was_interruptible;
 	if (ret) {
 		DRM_ERROR("failed to enable ironlake power power savings\n");
 		ironlake_teardown_rc6(dev);
 		return;
 	}
 
-	I915_WRITE(PWRCTXA, dev_priv->pwrctx->gtt_offset | PWRCTX_EN);
+	I915_WRITE(PWRCTXA, dev_priv->ips.pwrctx->gtt_offset | PWRCTX_EN);
 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
 }
 
@@ -3304,37 +3311,72 @@
 
 void intel_disable_gt_powersave(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	if (IS_IRONLAKE_M(dev)) {
 		ironlake_disable_drps(dev);
 		ironlake_disable_rc6(dev);
 	} else if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) {
+		cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
+		mutex_lock(&dev_priv->rps.hw_lock);
 		gen6_disable_rps(dev);
+		mutex_unlock(&dev_priv->rps.hw_lock);
 	}
 }
 
+static void intel_gen6_powersave_work(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, struct drm_i915_private,
+			     rps.delayed_resume_work.work);
+	struct drm_device *dev = dev_priv->dev;
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+	gen6_enable_rps(dev);
+	gen6_update_ring_freq(dev);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
 void intel_enable_gt_powersave(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	if (IS_IRONLAKE_M(dev)) {
 		ironlake_enable_drps(dev);
 		ironlake_enable_rc6(dev);
 		intel_init_emon(dev);
 	} else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) {
-		gen6_enable_rps(dev);
-		gen6_update_ring_freq(dev);
+		/*
+		 * PCU communication is slow and this doesn't need to be
+		 * done at any specific time, so do this out of our fast path
+		 * to make resume and init faster.
+		 */
+		schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
+				      round_jiffies_up_relative(HZ));
 	}
 }
 
+static void ibx_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+}
+
 static void ironlake_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
+	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
 	/* Required for FBC */
-	dspclk_gate |= DPFCUNIT_CLOCK_GATE_DISABLE |
-		DPFCRUNIT_CLOCK_GATE_DISABLE |
-		DPFDUNIT_CLOCK_GATE_DISABLE;
-	/* Required for CxSR */
-	dspclk_gate |= DPARBUNIT_CLOCK_GATE_DISABLE;
+	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
+		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
+		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
 
 	I915_WRITE(PCH_3DCGDIS0,
 		   MARIUNIT_CLOCK_GATE_DISABLE |
@@ -3342,8 +3384,6 @@
 	I915_WRITE(PCH_3DCGDIS1,
 		   VFMUNIT_CLOCK_GATE_DISABLE);
 
-	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
-
 	/*
 	 * According to the spec the following bits should be set in
 	 * order to enable memory self-refresh
@@ -3354,9 +3394,7 @@
 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
 		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
 		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
-	I915_WRITE(ILK_DSPCLK_GATE,
-		   (I915_READ(ILK_DSPCLK_GATE) |
-		    ILK_DPARB_CLK_GATE));
+	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
 	I915_WRITE(DISP_ARB_CTL,
 		   (I915_READ(DISP_ARB_CTL) |
 		    DISP_FBC_WM_DIS));
@@ -3378,28 +3416,56 @@
 		I915_WRITE(ILK_DISPLAY_CHICKEN2,
 			   I915_READ(ILK_DISPLAY_CHICKEN2) |
 			   ILK_DPARB_GATE);
-		I915_WRITE(ILK_DSPCLK_GATE,
-			   I915_READ(ILK_DSPCLK_GATE) |
-			   ILK_DPFC_DIS1 |
-			   ILK_DPFC_DIS2 |
-			   ILK_CLK_FBC);
 	}
 
+	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
+
 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
 		   ILK_ELPIN_409_SELECT);
 	I915_WRITE(_3D_CHICKEN2,
 		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
 		   _3D_CHICKEN2_WM_READ_PIPELINED);
+
+	/* WaDisableRenderCachePipelinedFlush */
+	I915_WRITE(CACHE_MODE_0,
+		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
+
+	ibx_init_clock_gating(dev);
+}
+
+static void cpt_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe;
+
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
+		   DPLS_EDP_PPS_FIX_DIS);
+	/* The below fixes the weird display corruption, a few pixels shifted
+	 * downward, on (only) LVDS of some HP laptops with IVY.
+	 */
+	for_each_pipe(pipe)
+		I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_CHICKEN2_TIMING_OVERRIDE);
+	/* WADP0ClockGatingDisable */
+	for_each_pipe(pipe) {
+		I915_WRITE(TRANS_CHICKEN1(pipe),
+			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+	}
 }
 
 static void gen6_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
-	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
+	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
+	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
 
 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
@@ -3454,11 +3520,12 @@
 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
 		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
-	I915_WRITE(ILK_DSPCLK_GATE,
-		   I915_READ(ILK_DSPCLK_GATE) |
-		   ILK_DPARB_CLK_GATE  |
-		   ILK_DPFD_CLK_GATE);
+	I915_WRITE(ILK_DSPCLK_GATE_D,
+		   I915_READ(ILK_DSPCLK_GATE_D) |
+		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
+		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
 
+	/* WaMbcDriverBootEnable */
 	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
 		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
 
@@ -3473,6 +3540,8 @@
 	 * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
 	I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
 	I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
+
+	cpt_init_clock_gating(dev);
 }
 
 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
@@ -3487,13 +3556,24 @@
 	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
 }
 
+static void lpt_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * TODO: this bit should only be enabled when really needed, then
+	 * disabled when not needed anymore in order to save power.
+	 */
+	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
+		I915_WRITE(SOUTH_DSPCLK_GATE_D,
+			   I915_READ(SOUTH_DSPCLK_GATE_D) |
+			   PCH_LP_PARTITION_LEVEL_DISABLE);
+}
+
 static void haswell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
-	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
-
-	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
 
 	I915_WRITE(WM3_LP_ILK, 0);
 	I915_WRITE(WM2_LP_ILK, 0);
@@ -3504,12 +3584,6 @@
 	 */
 	I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
-	I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
-
-	I915_WRITE(IVB_CHICKEN3,
-		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
-		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
-
 	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -3538,6 +3612,10 @@
 	I915_WRITE(CACHE_MODE_1,
 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
+	/* WaMbcDriverBootEnable */
+	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
+		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
+
 	/* XXX: This is a workaround for early silicon revisions and should be
 	 * removed later.
 	 */
@@ -3547,27 +3625,38 @@
 			WM_DBG_DISALLOW_SPRITE |
 			WM_DBG_DISALLOW_MAXFIFO);
 
+	lpt_init_clock_gating(dev);
 }
 
 static void ivybridge_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
-	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
 	uint32_t snpcr;
 
-	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
-
 	I915_WRITE(WM3_LP_ILK, 0);
 	I915_WRITE(WM2_LP_ILK, 0);
 	I915_WRITE(WM1_LP_ILK, 0);
 
-	I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
+	I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
 
+	/* WaDisableEarlyCull */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
+
+	/* WaDisableBackToBackFlipFix */
 	I915_WRITE(IVB_CHICKEN3,
 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+	/* WaDisablePSDDualDispatchEnable */
+	if (IS_IVB_GT1(dev))
+		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+	else
+		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
+			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
 	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -3576,7 +3665,18 @@
 	I915_WRITE(GEN7_L3CNTLREG1,
 			GEN7_WA_FOR_GEN7_L3_CONTROL);
 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
-			GEN7_WA_L3_CHICKEN_MODE);
+		   GEN7_WA_L3_CHICKEN_MODE);
+	if (IS_IVB_GT1(dev))
+		I915_WRITE(GEN7_ROW_CHICKEN2,
+			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+	else
+		I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
+			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+
+	/* WaForceL3Serialization */
+	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
 
 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
 	 * gating disable must be set.  Failure to set it results in
@@ -3607,6 +3707,7 @@
 		intel_flush_display_plane(dev_priv, pipe);
 	}
 
+	/* WaMbcDriverBootEnable */
 	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
 		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
 
@@ -3620,39 +3721,59 @@
 	snpcr &= ~GEN6_MBC_SNPCR_MASK;
 	snpcr |= GEN6_MBC_SNPCR_MED;
 	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
+
+	cpt_init_clock_gating(dev);
 }
 
 static void valleyview_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
-	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
-
-	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
 
 	I915_WRITE(WM3_LP_ILK, 0);
 	I915_WRITE(WM2_LP_ILK, 0);
 	I915_WRITE(WM1_LP_ILK, 0);
 
-	I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
+	I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
 
+	/* WaDisableEarlyCull */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
+
+	/* WaDisableBackToBackFlipFix */
 	I915_WRITE(IVB_CHICKEN3,
 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+		   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
 	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
 
 	/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
-	I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+	I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
 
+	/* WaForceL3Serialization */
+	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+	/* WaDisableDopClockGating */
+	I915_WRITE(GEN7_ROW_CHICKEN2,
+		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+	/* WaForceL3Serialization */
+	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
 	/* This is required by WaCatErrorRejectionIssue */
 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
 		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
 		   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
+	/* WaMbcDriverBootEnable */
 	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
 		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
 
@@ -3704,6 +3825,13 @@
 		   PIPEA_HLINE_INT_EN | PIPEA_VBLANK_INT_EN |
 		   SPRITEB_FLIPDONE_INT_EN | SPRITEA_FLIPDONE_INT_EN |
 		   PLANEA_FLIPDONE_INT_EN);
+
+	/*
+	 * WaDisableVLVClockGating_VBIIssue
+	 * Disable clock gating on th GCFG unit to prevent a delay
+	 * in the reporting of vblank events.
+	 */
+	I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -3722,6 +3850,10 @@
 	if (IS_GM45(dev))
 		dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
 	I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
+
+	/* WaDisableRenderCachePipelinedFlush */
+	I915_WRITE(CACHE_MODE_0,
+		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 }
 
 static void crestline_init_clock_gating(struct drm_device *dev)
@@ -3777,44 +3909,11 @@
 	I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
 }
 
-static void ibx_init_clock_gating(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	/*
-	 * On Ibex Peak and Cougar Point, we need to disable clock
-	 * gating for the panel power sequencer or it will fail to
-	 * start up when no ports are active.
-	 */
-	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
-}
-
-static void cpt_init_clock_gating(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int pipe;
-
-	/*
-	 * On Ibex Peak and Cougar Point, we need to disable clock
-	 * gating for the panel power sequencer or it will fail to
-	 * start up when no ports are active.
-	 */
-	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
-	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
-		   DPLS_EDP_PPS_FIX_DIS);
-	/* Without this, mode sets may fail silently on FDI */
-	for_each_pipe(pipe)
-		I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_AUTOTRAIN_GEN_STALL_DIS);
-}
-
 void intel_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	dev_priv->display.init_clock_gating(dev);
-
-	if (dev_priv->display.init_pch_clock_gating)
-		dev_priv->display.init_pch_clock_gating(dev);
 }
 
 /* Starting with Haswell, we have different power wells for
@@ -3840,7 +3939,7 @@
 
 		if ((well & HSW_PWR_WELL_STATE) == 0) {
 			I915_WRITE(power_wells[i], well & HSW_PWR_WELL_ENABLE);
-			if (wait_for(I915_READ(power_wells[i] & HSW_PWR_WELL_STATE), 20))
+			if (wait_for((I915_READ(power_wells[i]) & HSW_PWR_WELL_STATE), 20))
 				DRM_ERROR("Error enabling power well %lx\n", power_wells[i]);
 		}
 	}
@@ -3878,11 +3977,6 @@
 
 	/* For FIFO watermark updates */
 	if (HAS_PCH_SPLIT(dev)) {
-		if (HAS_PCH_IBX(dev))
-			dev_priv->display.init_pch_clock_gating = ibx_init_clock_gating;
-		else if (HAS_PCH_CPT(dev))
-			dev_priv->display.init_pch_clock_gating = cpt_init_clock_gating;
-
 		if (IS_GEN5(dev)) {
 			if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK)
 				dev_priv->display.update_wm = ironlake_update_wm;
@@ -3993,6 +4087,12 @@
 		DRM_ERROR("GT thread status wait timed out\n");
 }
 
+static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE_NOTRACE(FORCEWAKE, 0);
+	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
+}
+
 static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
 	u32 forcewake_ack;
@@ -4006,7 +4106,7 @@
 			    FORCEWAKE_ACK_TIMEOUT_MS))
 		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
-	I915_WRITE_NOTRACE(FORCEWAKE, 1);
+	I915_WRITE_NOTRACE(FORCEWAKE, FORCEWAKE_KERNEL);
 	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
 
 	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1),
@@ -4016,6 +4116,12 @@
 	__gen6_gt_wait_for_thread_c0(dev_priv);
 }
 
+static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff));
+	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
+}
+
 static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
 {
 	u32 forcewake_ack;
@@ -4029,7 +4135,7 @@
 			    FORCEWAKE_ACK_TIMEOUT_MS))
 		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
-	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
+	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
 	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
 
 	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1),
@@ -4073,7 +4179,7 @@
 
 static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
 {
-	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(1));
+	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
 	/* gen6_gt_check_fifodbg doubles as the POSTING_READ */
 	gen6_gt_check_fifodbg(dev_priv);
 }
@@ -4111,13 +4217,18 @@
 	return ret;
 }
 
+static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff));
+}
+
 static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 {
 	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1) == 0,
 			    FORCEWAKE_ACK_TIMEOUT_MS))
 		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
-	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(1));
+	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
 
 	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1),
 			    FORCEWAKE_ACK_TIMEOUT_MS))
@@ -4128,49 +4239,89 @@
 
 static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
 {
-	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(1));
+	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
 	/* The below doubles as a POSTING_READ */
 	gen6_gt_check_fifodbg(dev_priv);
 }
 
+void intel_gt_reset(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (IS_VALLEYVIEW(dev)) {
+		vlv_force_wake_reset(dev_priv);
+	} else if (INTEL_INFO(dev)->gen >= 6) {
+		__gen6_gt_force_wake_reset(dev_priv);
+		if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
+			__gen6_gt_force_wake_mt_reset(dev_priv);
+	}
+}
+
 void intel_gt_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	spin_lock_init(&dev_priv->gt_lock);
 
+	intel_gt_reset(dev);
+
 	if (IS_VALLEYVIEW(dev)) {
 		dev_priv->gt.force_wake_get = vlv_force_wake_get;
 		dev_priv->gt.force_wake_put = vlv_force_wake_put;
-	} else if (INTEL_INFO(dev)->gen >= 6) {
+	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
+		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get;
+		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put;
+	} else if (IS_GEN6(dev)) {
 		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
 		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
-
-		/* IVB configs may use multi-threaded forcewake */
-		if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
-			u32 ecobus;
-
-			/* A small trick here - if the bios hasn't configured
-			 * MT forcewake, and if the device is in RC6, then
-			 * force_wake_mt_get will not wake the device and the
-			 * ECOBUS read will return zero. Which will be
-			 * (correctly) interpreted by the test below as MT
-			 * forcewake being disabled.
-			 */
-			mutex_lock(&dev->struct_mutex);
-			__gen6_gt_force_wake_mt_get(dev_priv);
-			ecobus = I915_READ_NOTRACE(ECOBUS);
-			__gen6_gt_force_wake_mt_put(dev_priv);
-			mutex_unlock(&dev->struct_mutex);
-
-			if (ecobus & FORCEWAKE_MT_ENABLE) {
-				DRM_DEBUG_KMS("Using MT version of forcewake\n");
-				dev_priv->gt.force_wake_get =
-					__gen6_gt_force_wake_mt_get;
-				dev_priv->gt.force_wake_put =
-					__gen6_gt_force_wake_mt_put;
-			}
-		}
 	}
+	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
+			  intel_gen6_powersave_work);
 }
 
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+	if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
+		DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
+		return -EAGAIN;
+	}
+
+	I915_WRITE(GEN6_PCODE_DATA, *val);
+	I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+
+	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
+		     500)) {
+		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
+		return -ETIMEDOUT;
+	}
+
+	*val = I915_READ(GEN6_PCODE_DATA);
+	I915_WRITE(GEN6_PCODE_DATA, 0);
+
+	return 0;
+}
+
+int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+	if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
+		DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
+		return -EAGAIN;
+	}
+
+	I915_WRITE(GEN6_PCODE_DATA, val);
+	I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+
+	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
+		     500)) {
+		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
+		return -ETIMEDOUT;
+	}
+
+	I915_WRITE(GEN6_PCODE_DATA, 0);
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ecbc5c5..2346b92 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -45,7 +45,7 @@
 
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
-	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
+	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
 	if (space < 0)
 		space += ring->size;
 	return space;
@@ -245,7 +245,7 @@
 		/*
 		 * TLB invalidate requires a post-sync write.
 		 */
-		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	}
 
 	ret = intel_ring_begin(ring, 4);
@@ -555,15 +555,11 @@
 
 static void
 update_mboxes(struct intel_ring_buffer *ring,
-	    u32 seqno,
-	    u32 mmio_offset)
+	      u32 mmio_offset)
 {
-	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
-			      MI_SEMAPHORE_GLOBAL_GTT |
-			      MI_SEMAPHORE_REGISTER |
-			      MI_SEMAPHORE_UPDATE);
-	intel_ring_emit(ring, seqno);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 	intel_ring_emit(ring, mmio_offset);
+	intel_ring_emit(ring, ring->outstanding_lazy_request);
 }
 
 /**
@@ -576,8 +572,7 @@
  * This acts like a signal in the canonical semaphore.
  */
 static int
-gen6_add_request(struct intel_ring_buffer *ring,
-		 u32 *seqno)
+gen6_add_request(struct intel_ring_buffer *ring)
 {
 	u32 mbox1_reg;
 	u32 mbox2_reg;
@@ -590,13 +585,11 @@
 	mbox1_reg = ring->signal_mbox[0];
 	mbox2_reg = ring->signal_mbox[1];
 
-	*seqno = i915_gem_next_request_seqno(ring);
-
-	update_mboxes(ring, *seqno, mbox1_reg);
-	update_mboxes(ring, *seqno, mbox2_reg);
+	update_mboxes(ring, mbox1_reg);
+	update_mboxes(ring, mbox2_reg);
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, *seqno);
+	intel_ring_emit(ring, ring->outstanding_lazy_request);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_advance(ring);
 
@@ -653,10 +646,8 @@
 } while (0)
 
 static int
-pc_render_add_request(struct intel_ring_buffer *ring,
-		      u32 *result)
+pc_render_add_request(struct intel_ring_buffer *ring)
 {
-	u32 seqno = i915_gem_next_request_seqno(ring);
 	struct pipe_control *pc = ring->private;
 	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
@@ -677,7 +668,7 @@
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, seqno);
+	intel_ring_emit(ring, ring->outstanding_lazy_request);
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 128; /* write to separate cachelines */
@@ -696,11 +687,10 @@
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, seqno);
+	intel_ring_emit(ring, ring->outstanding_lazy_request);
 	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
 
-	*result = seqno;
 	return 0;
 }
 
@@ -888,25 +878,20 @@
 }
 
 static int
-i9xx_add_request(struct intel_ring_buffer *ring,
-		 u32 *result)
+i9xx_add_request(struct intel_ring_buffer *ring)
 {
-	u32 seqno;
 	int ret;
 
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
 
-	seqno = i915_gem_next_request_seqno(ring);
-
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, seqno);
+	intel_ring_emit(ring, ring->outstanding_lazy_request);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_advance(ring);
 
-	*result = seqno;
 	return 0;
 }
 
@@ -964,7 +949,9 @@
 }
 
 static int
-i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
+i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
+			 u32 offset, u32 length,
+			 unsigned flags)
 {
 	int ret;
 
@@ -975,7 +962,7 @@
 	intel_ring_emit(ring,
 			MI_BATCH_BUFFER_START |
 			MI_BATCH_GTT |
-			MI_BATCH_NON_SECURE_I965);
+			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
 	intel_ring_emit(ring, offset);
 	intel_ring_advance(ring);
 
@@ -984,7 +971,8 @@
 
 static int
 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
-				u32 offset, u32 len)
+				u32 offset, u32 len,
+				unsigned flags)
 {
 	int ret;
 
@@ -993,7 +981,7 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_BATCH_BUFFER);
-	intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
+	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
 	intel_ring_emit(ring, offset + len - 8);
 	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
@@ -1003,7 +991,8 @@
 
 static int
 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
-				u32 offset, u32 len)
+			 u32 offset, u32 len,
+			 unsigned flags)
 {
 	int ret;
 
@@ -1012,7 +1001,7 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
-	intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
+	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
 	intel_ring_advance(ring);
 
 	return 0;
@@ -1075,6 +1064,29 @@
 	return ret;
 }
 
+static int init_phys_hws_pga(struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	u32 addr;
+
+	if (!dev_priv->status_page_dmah) {
+		dev_priv->status_page_dmah =
+			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
+		if (!dev_priv->status_page_dmah)
+			return -ENOMEM;
+	}
+
+	addr = dev_priv->status_page_dmah->busaddr;
+	if (INTEL_INFO(ring->dev)->gen >= 4)
+		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
+	I915_WRITE(HWS_PGA, addr);
+
+	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
+	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
+
+	return 0;
+}
+
 static int intel_init_ring_buffer(struct drm_device *dev,
 				  struct intel_ring_buffer *ring)
 {
@@ -1086,6 +1098,7 @@
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	ring->size = 32 * PAGE_SIZE;
+	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
 
 	init_waitqueue_head(&ring->irq_queue);
 
@@ -1093,6 +1106,11 @@
 		ret = init_status_page(ring);
 		if (ret)
 			return ret;
+	} else {
+		BUG_ON(ring->id != RCS);
+		ret = init_phys_hws_pga(ring);
+		if (ret)
+			return ret;
 	}
 
 	obj = i915_gem_alloc_object(dev, ring->size);
@@ -1157,7 +1175,7 @@
 
 	/* Disable the ring buffer. The ring must be idle at this point */
 	dev_priv = ring->dev->dev_private;
-	ret = intel_wait_ring_idle(ring);
+	ret = intel_ring_idle(ring);
 	if (ret)
 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
 			  ring->name, ret);
@@ -1176,28 +1194,6 @@
 	cleanup_status_page(ring);
 }
 
-static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
-{
-	uint32_t __iomem *virt;
-	int rem = ring->size - ring->tail;
-
-	if (ring->space < rem) {
-		int ret = intel_wait_ring_buffer(ring, rem);
-		if (ret)
-			return ret;
-	}
-
-	virt = ring->virtual_start + ring->tail;
-	rem /= 4;
-	while (rem--)
-		iowrite32(MI_NOOP, virt++);
-
-	ring->tail = 0;
-	ring->space = ring_space(ring);
-
-	return 0;
-}
-
 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
 {
 	int ret;
@@ -1231,7 +1227,7 @@
 		if (request->tail == -1)
 			continue;
 
-		space = request->tail - (ring->tail + 8);
+		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
 		if (space < 0)
 			space += ring->size;
 		if (space >= n) {
@@ -1266,7 +1262,7 @@
 	return 0;
 }
 
-int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
+static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1309,6 +1305,60 @@
 	return -EBUSY;
 }
 
+static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
+{
+	uint32_t __iomem *virt;
+	int rem = ring->size - ring->tail;
+
+	if (ring->space < rem) {
+		int ret = ring_wait_for_space(ring, rem);
+		if (ret)
+			return ret;
+	}
+
+	virt = ring->virtual_start + ring->tail;
+	rem /= 4;
+	while (rem--)
+		iowrite32(MI_NOOP, virt++);
+
+	ring->tail = 0;
+	ring->space = ring_space(ring);
+
+	return 0;
+}
+
+int intel_ring_idle(struct intel_ring_buffer *ring)
+{
+	u32 seqno;
+	int ret;
+
+	/* We need to add any requests required to flush the objects and ring */
+	if (ring->outstanding_lazy_request) {
+		ret = i915_add_request(ring, NULL, NULL);
+		if (ret)
+			return ret;
+	}
+
+	/* Wait upon the last request to be completed */
+	if (list_empty(&ring->request_list))
+		return 0;
+
+	seqno = list_entry(ring->request_list.prev,
+			   struct drm_i915_gem_request,
+			   list)->seqno;
+
+	return i915_wait_seqno(ring, seqno);
+}
+
+static int
+intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
+{
+	if (ring->outstanding_lazy_request)
+		return 0;
+
+	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
+}
+
 int intel_ring_begin(struct intel_ring_buffer *ring,
 		     int num_dwords)
 {
@@ -1320,6 +1370,11 @@
 	if (ret)
 		return ret;
 
+	/* Preallocate the olr before touching the ring */
+	ret = intel_ring_alloc_seqno(ring);
+	if (ret)
+		return ret;
+
 	if (unlikely(ring->tail + n > ring->effective_size)) {
 		ret = intel_wrap_ring_buffer(ring);
 		if (unlikely(ret))
@@ -1327,7 +1382,7 @@
 	}
 
 	if (unlikely(ring->space < n)) {
-		ret = intel_wait_ring_buffer(ring, n);
+		ret = ring_wait_for_space(ring, n);
 		if (unlikely(ret))
 			return ret;
 	}
@@ -1391,10 +1446,17 @@
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+	/*
+	 * Bspec vol 1c.5 - video engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
 	if (invalidate & I915_GEM_GPU_DOMAINS)
-		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -1402,8 +1464,9 @@
 }
 
 static int
-gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
-			      u32 offset, u32 len)
+hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+			      u32 offset, u32 len,
+			      unsigned flags)
 {
 	int ret;
 
@@ -1411,7 +1474,30 @@
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+	intel_ring_emit(ring,
+			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
+			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
+	/* bit0-7 is the length on GEN6+ */
+	intel_ring_emit(ring, offset);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
+gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+			      u32 offset, u32 len,
+			      unsigned flags)
+{
+	int ret;
+
+	ret = intel_ring_begin(ring, 2);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring,
+			MI_BATCH_BUFFER_START |
+			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
 	/* bit0-7 is the length on GEN6+ */
 	intel_ring_emit(ring, offset);
 	intel_ring_advance(ring);
@@ -1432,10 +1518,17 @@
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+	/*
+	 * Bspec vol 1c.3 - blitter engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
 	if (invalidate & I915_GEM_DOMAIN_RENDER)
-		cmd |= MI_INVALIDATE_TLB;
+		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+			MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -1490,7 +1583,9 @@
 		ring->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 	ring->write_tail = ring_write_tail;
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (IS_HASWELL(dev))
+		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
+	else if (INTEL_INFO(dev)->gen >= 6)
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	else if (INTEL_INFO(dev)->gen >= 4)
 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
@@ -1501,12 +1596,6 @@
 	ring->init = init_render_ring;
 	ring->cleanup = render_ring_cleanup;
 
-
-	if (!I915_NEED_GFX_HWS(dev)) {
-		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
-		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
-	}
-
 	return intel_init_ring_buffer(dev, ring);
 }
 
@@ -1514,6 +1603,7 @@
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	int ret;
 
 	ring->name = "render ring";
 	ring->id = RCS;
@@ -1551,16 +1641,13 @@
 	ring->init = init_render_ring;
 	ring->cleanup = render_ring_cleanup;
 
-	if (!I915_NEED_GFX_HWS(dev))
-		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
-
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 
 	ring->size = size;
 	ring->effective_size = ring->size;
-	if (IS_I830(ring->dev))
+	if (IS_I830(ring->dev) || IS_845G(ring->dev))
 		ring->effective_size -= 128;
 
 	ring->virtual_start = ioremap_wc(start, size);
@@ -1570,6 +1657,12 @@
 		return -ENOMEM;
 	}
 
+	if (!I915_NEED_GFX_HWS(dev)) {
+		ret = init_phys_hws_pga(ring);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -1618,7 +1711,6 @@
 	}
 	ring->init = init_ring_common;
 
-
 	return intel_init_ring_buffer(dev, ring);
 }
 

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2ea7a31..526182e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h

@@ -1,6 +1,17 @@
 #ifndef _INTEL_RINGBUFFER_H_
 #define _INTEL_RINGBUFFER_H_
 
+/*
+ * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
+ * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
+ * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
+ *
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
+ * cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ */
+#define I915_RING_FREE_SPACE 64
+
 struct  intel_hw_status_page {
 	u32		*page_addr;
 	unsigned int	gfx_addr;
@@ -70,8 +81,7 @@
 	int __must_check (*flush)(struct intel_ring_buffer *ring,
 				  u32	invalidate_domains,
 				  u32	flush_domains);
-	int		(*add_request)(struct intel_ring_buffer *ring,
-				       u32 *seqno);
+	int		(*add_request)(struct intel_ring_buffer *ring);
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
@@ -81,7 +91,9 @@
 	u32		(*get_seqno)(struct intel_ring_buffer *ring,
 				     bool lazy_coherency);
 	int		(*dispatch_execbuffer)(struct intel_ring_buffer *ring,
-					       u32 offset, u32 length);
+					       u32 offset, u32 length,
+					       unsigned flags);
+#define I915_DISPATCH_SECURE 0x1
 	void		(*cleanup)(struct intel_ring_buffer *ring);
 	int		(*sync_to)(struct intel_ring_buffer *ring,
 				   struct intel_ring_buffer *to,
@@ -181,27 +193,21 @@
  * The area from dword 0x20 to 0x3ff is available for driver usage.
  */
 #define I915_GEM_HWS_INDEX		0x20
+#define I915_GEM_HWS_SCRATCH_INDEX	0x30
+#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
 
-int __must_check intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n);
-static inline int intel_wait_ring_idle(struct intel_ring_buffer *ring)
-{
-	return intel_wait_ring_buffer(ring, ring->size - 8);
-}
-
 int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n);
-
 static inline void intel_ring_emit(struct intel_ring_buffer *ring,
 				   u32 data)
 {
 	iowrite32(data, ring->virtual_start + ring->tail);
 	ring->tail += 4;
 }
-
 void intel_ring_advance(struct intel_ring_buffer *ring);
+int __must_check intel_ring_idle(struct intel_ring_buffer *ring);
 
-u32 intel_ring_get_seqno(struct intel_ring_buffer *ring);
 int intel_ring_flush_all_caches(struct intel_ring_buffer *ring);
 int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring);
 
@@ -217,6 +223,12 @@
 	return ring->tail;
 }
 
+static inline u32 intel_ring_get_seqno(struct intel_ring_buffer *ring)
+{
+	BUG_ON(ring->outstanding_lazy_request == 0);
+	return ring->outstanding_lazy_request;
+}
+
 static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno)
 {
 	if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index a6ac0b4..c275bf0 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c

@@ -509,7 +509,7 @@
 static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo,
 				     void *response, int response_len)
 {
-	u8 retry = 5;
+	u8 retry = 15; /* 5 quick checks, followed by 10 long checks */
 	u8 status;
 	int i;
 
@@ -522,14 +522,27 @@
 	 * command to be complete.
 	 *
 	 * Check 5 times in case the hardware failed to read the docs.
+	 *
+	 * Also beware that the first response by many devices is to
+	 * reply PENDING and stall for time. TVs are notorious for
+	 * requiring longer than specified to complete their replies.
+	 * Originally (in the DDX long ago), the delay was only ever 15ms
+	 * with an additional delay of 30ms applied for TVs added later after
+	 * many experiments. To accommodate both sets of delays, we do a
+	 * sequence of slow checks if the device is falling behind and fails
+	 * to reply within 5*15µs.
 	 */
 	if (!intel_sdvo_read_byte(intel_sdvo,
 				  SDVO_I2C_CMD_STATUS,
 				  &status))
 		goto log_fail;
 
-	while (status == SDVO_CMD_STATUS_PENDING && retry--) {
-		udelay(15);
+	while (status == SDVO_CMD_STATUS_PENDING && --retry) {
+		if (retry < 10)
+			msleep(15);
+		else
+			udelay(15);
+
 		if (!intel_sdvo_read_byte(intel_sdvo,
 					  SDVO_I2C_CMD_STATUS,
 					  &status))
@@ -1228,6 +1241,30 @@
 
 	temp = I915_READ(intel_sdvo->sdvo_reg);
 	if ((temp & SDVO_ENABLE) != 0) {
+		/* HW workaround for IBX, we need to move the port to
+		 * transcoder A before disabling it. */
+		if (HAS_PCH_IBX(encoder->base.dev)) {
+			struct drm_crtc *crtc = encoder->base.crtc;
+			int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
+
+			if (temp & SDVO_PIPE_B_SELECT) {
+				temp &= ~SDVO_PIPE_B_SELECT;
+				I915_WRITE(intel_sdvo->sdvo_reg, temp);
+				POSTING_READ(intel_sdvo->sdvo_reg);
+
+				/* Again we need to write this twice. */
+				I915_WRITE(intel_sdvo->sdvo_reg, temp);
+				POSTING_READ(intel_sdvo->sdvo_reg);
+
+				/* Transcoder selection bits only update
+				 * effectively on vblank. */
+				if (crtc)
+					intel_wait_for_vblank(encoder->base.dev, pipe);
+				else
+					msleep(50);
+			}
+		}
+
 		intel_sdvo_write_sdvox(intel_sdvo, temp & ~SDVO_ENABLE);
 	}
 }
@@ -1244,8 +1281,20 @@
 	u8 status;
 
 	temp = I915_READ(intel_sdvo->sdvo_reg);
-	if ((temp & SDVO_ENABLE) == 0)
+	if ((temp & SDVO_ENABLE) == 0) {
+		/* HW workaround for IBX, we need to move the port
+		 * to transcoder A before disabling it. */
+		if (HAS_PCH_IBX(dev)) {
+			struct drm_crtc *crtc = encoder->base.crtc;
+			int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
+
+			/* Restore the transcoder select bit. */
+			if (pipe == PIPE_B)
+				temp |= SDVO_PIPE_B_SELECT;
+		}
+
 		intel_sdvo_write_sdvox(intel_sdvo, temp | SDVO_ENABLE);
+	}
 	for (i = 0; i < 2; i++)
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
 
@@ -1499,15 +1548,9 @@
 	struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector);
 	enum drm_connector_status ret;
 
-	if (!intel_sdvo_write_cmd(intel_sdvo,
-				  SDVO_CMD_GET_ATTACHED_DISPLAYS, NULL, 0))
-		return connector_status_unknown;
-
-	/* add 30ms delay when the output type might be TV */
-	if (intel_sdvo->caps.output_flags & SDVO_TV_MASK)
-		msleep(30);
-
-	if (!intel_sdvo_read_response(intel_sdvo, &response, 2))
+	if (!intel_sdvo_get_value(intel_sdvo,
+				  SDVO_CMD_GET_ATTACHED_DISPLAYS,
+				  &response, 2))
 		return connector_status_unknown;
 
 	DRM_DEBUG_KMS("SDVO response %d %d [%x]\n",
@@ -1796,7 +1839,7 @@
 	intel_sdvo_destroy_enhance_property(connector);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
-	kfree(connector);
+	kfree(intel_sdvo_connector);
 }
 
 static bool intel_sdvo_detect_hdmi_audio(struct drm_connector *connector)
@@ -1828,7 +1871,7 @@
 	uint8_t cmd;
 	int ret;
 
-	ret = drm_connector_property_set_value(connector, property, val);
+	ret = drm_object_property_set_value(&connector->base, property, val);
 	if (ret)
 		return ret;
 
@@ -1883,7 +1926,7 @@
 	} else if (IS_TV_OR_LVDS(intel_sdvo_connector)) {
 		temp_value = val;
 		if (intel_sdvo_connector->left == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 intel_sdvo_connector->right, val);
 			if (intel_sdvo_connector->left_margin == temp_value)
 				return 0;
@@ -1895,7 +1938,7 @@
 			cmd = SDVO_CMD_SET_OVERSCAN_H;
 			goto set_value;
 		} else if (intel_sdvo_connector->right == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 intel_sdvo_connector->left, val);
 			if (intel_sdvo_connector->right_margin == temp_value)
 				return 0;
@@ -1907,7 +1950,7 @@
 			cmd = SDVO_CMD_SET_OVERSCAN_H;
 			goto set_value;
 		} else if (intel_sdvo_connector->top == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 intel_sdvo_connector->bottom, val);
 			if (intel_sdvo_connector->top_margin == temp_value)
 				return 0;
@@ -1919,7 +1962,7 @@
 			cmd = SDVO_CMD_SET_OVERSCAN_V;
 			goto set_value;
 		} else if (intel_sdvo_connector->bottom == property) {
-			drm_connector_property_set_value(connector,
+			drm_object_property_set_value(&connector->base,
 							 intel_sdvo_connector->top, val);
 			if (intel_sdvo_connector->bottom_margin == temp_value)
 				return 0;
@@ -2072,17 +2115,24 @@
 	else
 		mapping = &dev_priv->sdvo_mappings[1];
 
-	pin = GMBUS_PORT_DPB;
-	if (mapping->initialized)
+	if (mapping->initialized && intel_gmbus_is_port_valid(mapping->i2c_pin))
 		pin = mapping->i2c_pin;
+	else
+		pin = GMBUS_PORT_DPB;
 
-	if (intel_gmbus_is_port_valid(pin)) {
-		sdvo->i2c = intel_gmbus_get_adapter(dev_priv, pin);
-		intel_gmbus_set_speed(sdvo->i2c, GMBUS_RATE_1MHZ);
-		intel_gmbus_force_bit(sdvo->i2c, true);
-	} else {
-		sdvo->i2c = intel_gmbus_get_adapter(dev_priv, GMBUS_PORT_DPB);
-	}
+	sdvo->i2c = intel_gmbus_get_adapter(dev_priv, pin);
+
+	/* With gmbus we should be able to drive sdvo i2c at 2MHz, but somehow
+	 * our code totally fails once we start using gmbus. Hence fall back to
+	 * bit banging for now. */
+	intel_gmbus_force_bit(sdvo->i2c, true);
+}
+
+/* undo any changes intel_sdvo_select_i2c_bus() did to sdvo->i2c */
+static void
+intel_sdvo_unselect_i2c_bus(struct intel_sdvo *sdvo)
+{
+	intel_gmbus_force_bit(sdvo->i2c, false);
 }
 
 static bool
@@ -2427,7 +2477,7 @@
 				i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
 
 	intel_sdvo->tv_format_index = intel_sdvo_connector->tv_format_supported[0];
-	drm_connector_attach_property(&intel_sdvo_connector->base.base,
+	drm_object_attach_property(&intel_sdvo_connector->base.base.base,
 				      intel_sdvo_connector->tv_format, 0);
 	return true;
 
@@ -2443,7 +2493,7 @@
 		intel_sdvo_connector->name = \
 			drm_property_create_range(dev, 0, #name, 0, data_value[0]); \
 		if (!intel_sdvo_connector->name) return false; \
-		drm_connector_attach_property(connector, \
+		drm_object_attach_property(&connector->base, \
 					      intel_sdvo_connector->name, \
 					      intel_sdvo_connector->cur_##name); \
 		DRM_DEBUG_KMS(#name ": max %d, default %d, current %d\n", \
@@ -2480,7 +2530,7 @@
 		if (!intel_sdvo_connector->left)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      intel_sdvo_connector->left,
 					      intel_sdvo_connector->left_margin);
 
@@ -2489,7 +2539,7 @@
 		if (!intel_sdvo_connector->right)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      intel_sdvo_connector->right,
 					      intel_sdvo_connector->right_margin);
 		DRM_DEBUG_KMS("h_overscan: max %d, "
@@ -2517,7 +2567,7 @@
 		if (!intel_sdvo_connector->top)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      intel_sdvo_connector->top,
 					      intel_sdvo_connector->top_margin);
 
@@ -2527,7 +2577,7 @@
 		if (!intel_sdvo_connector->bottom)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      intel_sdvo_connector->bottom,
 					      intel_sdvo_connector->bottom_margin);
 		DRM_DEBUG_KMS("v_overscan: max %d, "
@@ -2559,7 +2609,7 @@
 		if (!intel_sdvo_connector->dot_crawl)
 			return false;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      intel_sdvo_connector->dot_crawl,
 					      intel_sdvo_connector->cur_dot_crawl);
 		DRM_DEBUG_KMS("dot crawl: current %d\n", response);
@@ -2663,10 +2713,8 @@
 	intel_sdvo->is_sdvob = is_sdvob;
 	intel_sdvo->slave_addr = intel_sdvo_get_slave_addr(dev, intel_sdvo) >> 1;
 	intel_sdvo_select_i2c_bus(dev_priv, intel_sdvo, sdvo_reg);
-	if (!intel_sdvo_init_ddc_proxy(intel_sdvo, dev)) {
-		kfree(intel_sdvo);
-		return false;
-	}
+	if (!intel_sdvo_init_ddc_proxy(intel_sdvo, dev))
+		goto err_i2c_bus;
 
 	/* encoder type will be decided later */
 	intel_encoder = &intel_sdvo->base;
@@ -2765,6 +2813,8 @@
 err:
 	drm_encoder_cleanup(&intel_encoder->base);
 	i2c_del_adapter(&intel_sdvo->ddc);
+err_i2c_bus:
+	intel_sdvo_unselect_i2c_bus(intel_sdvo);
 	kfree(intel_sdvo);
 
 	return false;

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 82f5e5c7..827dcd4 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c

@@ -48,7 +48,8 @@
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	int pipe = intel_plane->pipe;
 	u32 sprctl, sprscale = 0;
-	int pixel_size;
+	unsigned long sprsurf_offset, linear_offset;
+	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
 
 	sprctl = I915_READ(SPRCTL(pipe));
 
@@ -61,33 +62,24 @@
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_XBGR8888:
 		sprctl |= SPRITE_FORMAT_RGBX888 | SPRITE_RGB_ORDER_RGBX;
-		pixel_size = 4;
 		break;
 	case DRM_FORMAT_XRGB8888:
 		sprctl |= SPRITE_FORMAT_RGBX888;
-		pixel_size = 4;
 		break;
 	case DRM_FORMAT_YUYV:
 		sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_YUYV;
-		pixel_size = 2;
 		break;
 	case DRM_FORMAT_YVYU:
 		sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_YVYU;
-		pixel_size = 2;
 		break;
 	case DRM_FORMAT_UYVY:
 		sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_UYVY;
-		pixel_size = 2;
 		break;
 	case DRM_FORMAT_VYUY:
 		sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_VYUY;
-		pixel_size = 2;
 		break;
 	default:
-		DRM_DEBUG_DRIVER("bad pixel format, assuming RGBX888\n");
-		sprctl |= SPRITE_FORMAT_RGBX888;
-		pixel_size = 4;
-		break;
+		BUG();
 	}
 
 	if (obj->tiling_mode != I915_TILING_NONE)
@@ -127,18 +119,28 @@
 
 	I915_WRITE(SPRSTRIDE(pipe), fb->pitches[0]);
 	I915_WRITE(SPRPOS(pipe), (crtc_y << 16) | crtc_x);
-	if (obj->tiling_mode != I915_TILING_NONE) {
-		I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x);
-	} else {
-		unsigned long offset;
 
-		offset = y * fb->pitches[0] + x * (fb->bits_per_pixel / 8);
-		I915_WRITE(SPRLINOFF(pipe), offset);
-	}
+	linear_offset = y * fb->pitches[0] + x * (fb->bits_per_pixel / 8);
+	sprsurf_offset =
+		intel_gen4_compute_offset_xtiled(&x, &y,
+						 fb->bits_per_pixel / 8,
+						 fb->pitches[0]);
+	linear_offset -= sprsurf_offset;
+
+	/* HSW consolidates SPRTILEOFF and SPRLINOFF into a single SPROFFSET
+	 * register */
+	if (IS_HASWELL(dev))
+		I915_WRITE(SPROFFSET(pipe), (y << 16) | x);
+	else if (obj->tiling_mode != I915_TILING_NONE)
+		I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x);
+	else
+		I915_WRITE(SPRLINOFF(pipe), linear_offset);
+
 	I915_WRITE(SPRSIZE(pipe), (crtc_h << 16) | crtc_w);
-	I915_WRITE(SPRSCALE(pipe), sprscale);
+	if (intel_plane->can_scale)
+		I915_WRITE(SPRSCALE(pipe), sprscale);
 	I915_WRITE(SPRCTL(pipe), sprctl);
-	I915_MODIFY_DISPBASE(SPRSURF(pipe), obj->gtt_offset);
+	I915_MODIFY_DISPBASE(SPRSURF(pipe), obj->gtt_offset + sprsurf_offset);
 	POSTING_READ(SPRSURF(pipe));
 }
 
@@ -152,7 +154,8 @@
 
 	I915_WRITE(SPRCTL(pipe), I915_READ(SPRCTL(pipe)) & ~SPRITE_ENABLE);
 	/* Can't leave the scaler enabled... */
-	I915_WRITE(SPRSCALE(pipe), 0);
+	if (intel_plane->can_scale)
+		I915_WRITE(SPRSCALE(pipe), 0);
 	/* Activate double buffered register update */
 	I915_MODIFY_DISPBASE(SPRSURF(pipe), 0);
 	POSTING_READ(SPRSURF(pipe));
@@ -225,8 +228,10 @@
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
-	int pipe = intel_plane->pipe, pixel_size;
+	int pipe = intel_plane->pipe;
+	unsigned long dvssurf_offset, linear_offset;
 	u32 dvscntr, dvsscale;
+	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
 
 	dvscntr = I915_READ(DVSCNTR(pipe));
 
@@ -239,33 +244,24 @@
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_XBGR8888:
 		dvscntr |= DVS_FORMAT_RGBX888 | DVS_RGB_ORDER_XBGR;
-		pixel_size = 4;
 		break;
 	case DRM_FORMAT_XRGB8888:
 		dvscntr |= DVS_FORMAT_RGBX888;
-		pixel_size = 4;
 		break;
 	case DRM_FORMAT_YUYV:
 		dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_YUYV;
-		pixel_size = 2;
 		break;
 	case DRM_FORMAT_YVYU:
 		dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_YVYU;
-		pixel_size = 2;
 		break;
 	case DRM_FORMAT_UYVY:
 		dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_UYVY;
-		pixel_size = 2;
 		break;
 	case DRM_FORMAT_VYUY:
 		dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_VYUY;
-		pixel_size = 2;
 		break;
 	default:
-		DRM_DEBUG_DRIVER("bad pixel format, assuming RGBX888\n");
-		dvscntr |= DVS_FORMAT_RGBX888;
-		pixel_size = 4;
-		break;
+		BUG();
 	}
 
 	if (obj->tiling_mode != I915_TILING_NONE)
@@ -289,18 +285,23 @@
 
 	I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]);
 	I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x);
-	if (obj->tiling_mode != I915_TILING_NONE) {
-		I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x);
-	} else {
-		unsigned long offset;
 
-		offset = y * fb->pitches[0] + x * (fb->bits_per_pixel / 8);
-		I915_WRITE(DVSLINOFF(pipe), offset);
-	}
+	linear_offset = y * fb->pitches[0] + x * (fb->bits_per_pixel / 8);
+	dvssurf_offset =
+		intel_gen4_compute_offset_xtiled(&x, &y,
+						 fb->bits_per_pixel / 8,
+						 fb->pitches[0]);
+	linear_offset -= dvssurf_offset;
+
+	if (obj->tiling_mode != I915_TILING_NONE)
+		I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x);
+	else
+		I915_WRITE(DVSLINOFF(pipe), linear_offset);
+
 	I915_WRITE(DVSSIZE(pipe), (crtc_h << 16) | crtc_w);
 	I915_WRITE(DVSSCALE(pipe), dvsscale);
 	I915_WRITE(DVSCNTR(pipe), dvscntr);
-	I915_MODIFY_DISPBASE(DVSSURF(pipe), obj->gtt_offset);
+	I915_MODIFY_DISPBASE(DVSSURF(pipe), obj->gtt_offset + dvssurf_offset);
 	POSTING_READ(DVSSURF(pipe));
 }
 
@@ -422,6 +423,8 @@
 	struct intel_framebuffer *intel_fb;
 	struct drm_i915_gem_object *obj, *old_obj;
 	int pipe = intel_plane->pipe;
+	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
+								      pipe);
 	int ret = 0;
 	int x = src_x >> 16, y = src_y >> 16;
 	int primary_w = crtc->mode.hdisplay, primary_h = crtc->mode.vdisplay;
@@ -436,7 +439,7 @@
 	src_h = src_h >> 16;
 
 	/* Pipe must be running... */
-	if (!(I915_READ(PIPECONF(pipe)) & PIPECONF_ENABLE))
+	if (!(I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_ENABLE))
 		return -EINVAL;
 
 	if (crtc_x >= primary_w || crtc_y >= primary_h)
@@ -446,6 +449,15 @@
 	if (intel_plane->pipe != intel_crtc->pipe)
 		return -EINVAL;
 
+	/* Sprite planes can be linear or x-tiled surfaces */
+	switch (obj->tiling_mode) {
+		case I915_TILING_NONE:
+		case I915_TILING_X:
+			break;
+		default:
+			return -EINVAL;
+	}
+
 	/*
 	 * Clamp the width & height into the visible area.  Note we don't
 	 * try to scale the source if part of the visible region is offscreen.
@@ -473,6 +485,12 @@
 		goto out;
 
 	/*
+	 * We may not have a scaler, eg. HSW does not have it any more
+	 */
+	if (!intel_plane->can_scale && (crtc_w != src_w || crtc_h != src_h))
+		return -EINVAL;
+
+	/*
 	 * We can take a larger source and scale it down, but
 	 * only so much...  16x is the max on SNB.
 	 */
@@ -665,6 +683,7 @@
 	switch (INTEL_INFO(dev)->gen) {
 	case 5:
 	case 6:
+		intel_plane->can_scale = true;
 		intel_plane->max_downscale = 16;
 		intel_plane->update_plane = ilk_update_plane;
 		intel_plane->disable_plane = ilk_disable_plane;
@@ -681,6 +700,10 @@
 		break;
 
 	case 7:
+		if (IS_HASWELL(dev) || IS_VALLEYVIEW(dev))
+			intel_plane->can_scale = false;
+		else
+			intel_plane->can_scale = true;
 		intel_plane->max_downscale = 2;
 		intel_plane->update_plane = ivb_update_plane;
 		intel_plane->disable_plane = ivb_disable_plane;

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 62bb048..ea93520 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c

@@ -1088,13 +1088,11 @@
 		int dspcntr_reg = DSPCNTR(intel_crtc->plane);
 		int pipeconf = I915_READ(pipeconf_reg);
 		int dspcntr = I915_READ(dspcntr_reg);
-		int dspbase_reg = DSPADDR(intel_crtc->plane);
 		int xpos = 0x0, ypos = 0x0;
 		unsigned int xsize, ysize;
 		/* Pipe must be off here */
 		I915_WRITE(dspcntr_reg, dspcntr & ~DISPLAY_PLANE_ENABLE);
-		/* Flush the plane changes */
-		I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+		intel_flush_display_plane(dev_priv, intel_crtc->plane);
 
 		/* Wait for vblank for the disable to take effect */
 		if (IS_GEN2(dev))
@@ -1123,8 +1121,7 @@
 
 		I915_WRITE(pipeconf_reg, pipeconf);
 		I915_WRITE(dspcntr_reg, dspcntr);
-		/* Flush the plane changes */
-		I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+		intel_flush_display_plane(dev_priv, intel_crtc->plane);
 	}
 
 	j = 0;
@@ -1292,7 +1289,7 @@
 	}
 
 	intel_tv->tv_format = tv_mode->name;
-	drm_connector_property_set_value(connector,
+	drm_object_property_set_value(&connector->base,
 		connector->dev->mode_config.tv_mode_property, i);
 }
 
@@ -1446,7 +1443,7 @@
 	int ret = 0;
 	bool changed = false;
 
-	ret = drm_connector_property_set_value(connector, property, val);
+	ret = drm_object_property_set_value(&connector->base, property, val);
 	if (ret < 0)
 		goto out;
 
@@ -1658,18 +1655,18 @@
 				      ARRAY_SIZE(tv_modes),
 				      tv_format_names);
 
-	drm_connector_attach_property(connector, dev->mode_config.tv_mode_property,
+	drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
 				   initial_mode);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_left_margin_property,
 				   intel_tv->margin[TV_MARGIN_LEFT]);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_top_margin_property,
 				   intel_tv->margin[TV_MARGIN_TOP]);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_right_margin_property,
 				   intel_tv->margin[TV_MARGIN_RIGHT]);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_bottom_margin_property,
 				   intel_tv->margin[TV_MARGIN_BOTTOM]);
 	drm_sysfs_connector_add(connector);

diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index d6a1aae..70dd3c5 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c

@@ -133,6 +133,8 @@
 {
 	void __iomem *mem;
 	struct apertures_struct *aper = alloc_apertures(1);
+	if (!aper)
+		return -ENOMEM;
 
 	/* BAR 0 is VRAM */
 	mdev->mc.vram_base = pci_resource_start(mdev->dev->pdev, 0);
@@ -140,9 +142,9 @@
 
 	aper->ranges[0].base = mdev->mc.vram_base;
 	aper->ranges[0].size = mdev->mc.vram_window;
-	aper->count = 1;
 
 	remove_conflicting_framebuffers(aper, "mgafb", true);
+	kfree(aper);
 
 	if (!request_mem_region(mdev->mc.vram_base, mdev->mc.vram_window,
 				"mgadrmfb_vram")) {

diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 1504699..8fc9d92 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c

@@ -186,11 +186,11 @@
 
 static int mgag200_bo_move(struct ttm_buffer_object *bo,
 		       bool evict, bool interruptible,
-		       bool no_wait_reserve, bool no_wait_gpu,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	return r;
 }
 
@@ -355,7 +355,7 @@
 
 	ret = ttm_bo_init(&mdev->ttm.bdev, &mgabo->bo, size,
 			  ttm_bo_type_device, &mgabo->placement,
-			  align >> PAGE_SHIFT, 0, false, NULL, acc_size,
+			  align >> PAGE_SHIFT, false, NULL, acc_size,
 			  NULL, mgag200_bo_ttm_destroy);
 	if (ret)
 		return ret;
@@ -382,7 +382,7 @@
 	mgag200_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -405,7 +405,7 @@
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
 
@@ -430,7 +430,7 @@
 	for (i = 0; i < bo->placement.num_placement ; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false, false);
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
 		DRM_ERROR("pushing to VRAM failed\n");
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index a990df4..ab25752 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile

@@ -11,6 +11,7 @@
 nouveau-y += core/core/engctx.o
 nouveau-y += core/core/engine.o
 nouveau-y += core/core/enum.o
+nouveau-y += core/core/falcon.o
 nouveau-y += core/core/gpuobj.o
 nouveau-y += core/core/handle.o
 nouveau-y += core/core/mm.o
@@ -29,6 +30,7 @@
 nouveau-y += core/subdev/bios/bit.o
 nouveau-y += core/subdev/bios/conn.o
 nouveau-y += core/subdev/bios/dcb.o
+nouveau-y += core/subdev/bios/disp.o
 nouveau-y += core/subdev/bios/dp.o
 nouveau-y += core/subdev/bios/extdev.o
 nouveau-y += core/subdev/bios/gpio.o
@@ -64,9 +66,19 @@
 nouveau-y += core/subdev/fb/base.o
 nouveau-y += core/subdev/fb/nv04.o
 nouveau-y += core/subdev/fb/nv10.o
+nouveau-y += core/subdev/fb/nv1a.o
 nouveau-y += core/subdev/fb/nv20.o
+nouveau-y += core/subdev/fb/nv25.o
 nouveau-y += core/subdev/fb/nv30.o
+nouveau-y += core/subdev/fb/nv35.o
+nouveau-y += core/subdev/fb/nv36.o
 nouveau-y += core/subdev/fb/nv40.o
+nouveau-y += core/subdev/fb/nv41.o
+nouveau-y += core/subdev/fb/nv44.o
+nouveau-y += core/subdev/fb/nv46.o
+nouveau-y += core/subdev/fb/nv47.o
+nouveau-y += core/subdev/fb/nv49.o
+nouveau-y += core/subdev/fb/nv4e.o
 nouveau-y += core/subdev/fb/nv50.o
 nouveau-y += core/subdev/fb/nvc0.o
 nouveau-y += core/subdev/gpio/base.o
@@ -111,7 +123,10 @@
 nouveau-y += core/engine/dmaobj/nv04.o
 nouveau-y += core/engine/dmaobj/nv50.o
 nouveau-y += core/engine/dmaobj/nvc0.o
+nouveau-y += core/engine/dmaobj/nvd0.o
 nouveau-y += core/engine/bsp/nv84.o
+nouveau-y += core/engine/bsp/nvc0.o
+nouveau-y += core/engine/bsp/nve0.o
 nouveau-y += core/engine/copy/nva3.o
 nouveau-y += core/engine/copy/nvc0.o
 nouveau-y += core/engine/copy/nve0.o
@@ -119,7 +134,21 @@
 nouveau-y += core/engine/crypt/nv98.o
 nouveau-y += core/engine/disp/nv04.o
 nouveau-y += core/engine/disp/nv50.o
+nouveau-y += core/engine/disp/nv84.o
+nouveau-y += core/engine/disp/nv94.o
+nouveau-y += core/engine/disp/nva0.o
+nouveau-y += core/engine/disp/nva3.o
 nouveau-y += core/engine/disp/nvd0.o
+nouveau-y += core/engine/disp/nve0.o
+nouveau-y += core/engine/disp/dacnv50.o
+nouveau-y += core/engine/disp/hdanva3.o
+nouveau-y += core/engine/disp/hdanvd0.o
+nouveau-y += core/engine/disp/hdminv84.o
+nouveau-y += core/engine/disp/hdminva3.o
+nouveau-y += core/engine/disp/hdminvd0.o
+nouveau-y += core/engine/disp/sornv50.o
+nouveau-y += core/engine/disp/sornv94.o
+nouveau-y += core/engine/disp/sornvd0.o
 nouveau-y += core/engine/disp/vga.o
 nouveau-y += core/engine/fifo/base.o
 nouveau-y += core/engine/fifo/nv04.o
@@ -151,11 +180,14 @@
 nouveau-y += core/engine/mpeg/nv50.o
 nouveau-y += core/engine/mpeg/nv84.o
 nouveau-y += core/engine/ppp/nv98.o
+nouveau-y += core/engine/ppp/nvc0.o
 nouveau-y += core/engine/software/nv04.o
 nouveau-y += core/engine/software/nv10.o
 nouveau-y += core/engine/software/nv50.o
 nouveau-y += core/engine/software/nvc0.o
 nouveau-y += core/engine/vp/nv84.o
+nouveau-y += core/engine/vp/nvc0.o
+nouveau-y += core/engine/vp/nve0.o
 
 # drm/core
 nouveau-y += nouveau_drm.o nouveau_chan.o nouveau_dma.o nouveau_fence.o
@@ -166,7 +198,7 @@
 
 # drm/kms
 nouveau-y += nouveau_bios.o nouveau_fbcon.o nouveau_display.o
-nouveau-y += nouveau_connector.o nouveau_hdmi.o nouveau_dp.o
+nouveau-y += nouveau_connector.o nouveau_dp.o
 nouveau-y += nv04_fbcon.o nv50_fbcon.o nvc0_fbcon.o
 
 # drm/kms/nv04:nv50
@@ -175,9 +207,7 @@
 nouveau-y += nv04_crtc.o nv04_display.o nv04_cursor.o
 
 # drm/kms/nv50-
-nouveau-y += nv50_display.o nvd0_display.o
-nouveau-y += nv50_crtc.o nv50_dac.o nv50_sor.o nv50_cursor.o
-nouveau-y += nv50_evo.o
+nouveau-y += nv50_display.o
 
 # drm/pm
 nouveau-y += nouveau_pm.o nouveau_volt.o nouveau_perf.o

diff --git a/drivers/gpu/drm/nouveau/core/core/engctx.c b/drivers/gpu/drm/nouveau/core/core/engctx.c
index e41b10d..84c71fa 100644
--- a/drivers/gpu/drm/nouveau/core/core/engctx.c
+++ b/drivers/gpu/drm/nouveau/core/core/engctx.c

@@ -189,6 +189,21 @@
 	return nouveau_gpuobj_fini(&engctx->base, suspend);
 }
 
+int
+_nouveau_engctx_ctor(struct nouveau_object *parent,
+		     struct nouveau_object *engine,
+		     struct nouveau_oclass *oclass, void *data, u32 size,
+		     struct nouveau_object **pobject)
+{
+	struct nouveau_engctx *engctx;
+	int ret;
+
+	ret = nouveau_engctx_create(parent, engine, oclass, NULL, 256, 256,
+				    NVOBJ_FLAG_ZERO_ALLOC, &engctx);
+	*pobject = nv_object(engctx);
+	return ret;
+}
+
 void
 _nouveau_engctx_dtor(struct nouveau_object *object)
 {

diff --git a/drivers/gpu/drm/nouveau/core/core/falcon.c b/drivers/gpu/drm/nouveau/core/core/falcon.c
new file mode 100644
index 0000000..6b0843c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/core/falcon.c

@@ -0,0 +1,247 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <core/falcon.h>
+
+#include <subdev/timer.h>
+
+u32
+_nouveau_falcon_rd32(struct nouveau_object *object, u64 addr)
+{
+	struct nouveau_falcon *falcon = (void *)object;
+	return nv_rd32(falcon, falcon->addr + addr);
+}
+
+void
+_nouveau_falcon_wr32(struct nouveau_object *object, u64 addr, u32 data)
+{
+	struct nouveau_falcon *falcon = (void *)object;
+	nv_wr32(falcon, falcon->addr + addr, data);
+}
+
+int
+_nouveau_falcon_init(struct nouveau_object *object)
+{
+	struct nouveau_device *device = nv_device(object);
+	struct nouveau_falcon *falcon = (void *)object;
+	const struct firmware *fw;
+	char name[32] = "internal";
+	int ret, i;
+	u32 caps;
+
+	/* enable engine, and determine its capabilities */
+	ret = nouveau_engine_init(&falcon->base);
+	if (ret)
+		return ret;
+
+	if (device->chipset <  0xa3 ||
+	    device->chipset == 0xaa || device->chipset == 0xac) {
+		falcon->version = 0;
+		falcon->secret  = (falcon->addr == 0x087000) ? 1 : 0;
+	} else {
+		caps = nv_ro32(falcon, 0x12c);
+		falcon->version = (caps & 0x0000000f);
+		falcon->secret  = (caps & 0x00000030) >> 4;
+	}
+
+	caps = nv_ro32(falcon, 0x108);
+	falcon->code.limit = (caps & 0x000001ff) << 8;
+	falcon->data.limit = (caps & 0x0003fe00) >> 1;
+
+	nv_debug(falcon, "falcon version: %d\n", falcon->version);
+	nv_debug(falcon, "secret level: %d\n", falcon->secret);
+	nv_debug(falcon, "code limit: %d\n", falcon->code.limit);
+	nv_debug(falcon, "data limit: %d\n", falcon->data.limit);
+
+	/* wait for 'uc halted' to be signalled before continuing */
+	if (falcon->secret) {
+		nv_wait(falcon, 0x008, 0x00000010, 0x00000010);
+		nv_wo32(falcon, 0x004, 0x00000010);
+	}
+
+	/* disable all interrupts */
+	nv_wo32(falcon, 0x014, 0xffffffff);
+
+	/* no default ucode provided by the engine implementation, try and
+	 * locate a "self-bootstrapping" firmware image for the engine
+	 */
+	if (!falcon->code.data) {
+		snprintf(name, sizeof(name), "nouveau/nv%02x_fuc%03x",
+			 device->chipset, falcon->addr >> 12);
+
+		ret = request_firmware(&fw, name, &device->pdev->dev);
+		if (ret == 0) {
+			falcon->code.data = kmemdup(fw->data, fw->size, GFP_KERNEL);
+			falcon->code.size = fw->size;
+			falcon->data.data = NULL;
+			falcon->data.size = 0;
+			release_firmware(fw);
+		}
+
+		falcon->external = true;
+	}
+
+	/* next step is to try and load "static code/data segment" firmware
+	 * images for the engine
+	 */
+	if (!falcon->code.data) {
+		snprintf(name, sizeof(name), "nouveau/nv%02x_fuc%03xd",
+			 device->chipset, falcon->addr >> 12);
+
+		ret = request_firmware(&fw, name, &device->pdev->dev);
+		if (ret) {
+			nv_error(falcon, "unable to load firmware data\n");
+			return ret;
+		}
+
+		falcon->data.data = kmemdup(fw->data, fw->size, GFP_KERNEL);
+		falcon->data.size = fw->size;
+		release_firmware(fw);
+		if (!falcon->data.data)
+			return -ENOMEM;
+
+		snprintf(name, sizeof(name), "nouveau/nv%02x_fuc%03xc",
+			 device->chipset, falcon->addr >> 12);
+
+		ret = request_firmware(&fw, name, &device->pdev->dev);
+		if (ret) {
+			nv_error(falcon, "unable to load firmware code\n");
+			return ret;
+		}
+
+		falcon->code.data = kmemdup(fw->data, fw->size, GFP_KERNEL);
+		falcon->code.size = fw->size;
+		release_firmware(fw);
+		if (!falcon->code.data)
+			return -ENOMEM;
+	}
+
+	nv_debug(falcon, "firmware: %s (%s)\n", name, falcon->data.data ?
+		 "static code/data segments" : "self-bootstrapping");
+
+	/* ensure any "self-bootstrapping" firmware image is in vram */
+	if (!falcon->data.data && !falcon->core) {
+		ret = nouveau_gpuobj_new(object->parent, NULL,
+					 falcon->code.size, 256, 0,
+					&falcon->core);
+		if (ret) {
+			nv_error(falcon, "core allocation failed, %d\n", ret);
+			return ret;
+		}
+
+		for (i = 0; i < falcon->code.size; i += 4)
+			nv_wo32(falcon->core, i, falcon->code.data[i / 4]);
+	}
+
+	/* upload firmware bootloader (or the full code segments) */
+	if (falcon->core) {
+		if (device->card_type < NV_C0)
+			nv_wo32(falcon, 0x618, 0x04000000);
+		else
+			nv_wo32(falcon, 0x618, 0x00000114);
+		nv_wo32(falcon, 0x11c, 0);
+		nv_wo32(falcon, 0x110, falcon->core->addr >> 8);
+		nv_wo32(falcon, 0x114, 0);
+		nv_wo32(falcon, 0x118, 0x00006610);
+	} else {
+		if (falcon->code.size > falcon->code.limit ||
+		    falcon->data.size > falcon->data.limit) {
+			nv_error(falcon, "ucode exceeds falcon limit(s)\n");
+			return -EINVAL;
+		}
+
+		if (falcon->version < 3) {
+			nv_wo32(falcon, 0xff8, 0x00100000);
+			for (i = 0; i < falcon->code.size / 4; i++)
+				nv_wo32(falcon, 0xff4, falcon->code.data[i]);
+		} else {
+			nv_wo32(falcon, 0x180, 0x01000000);
+			for (i = 0; i < falcon->code.size / 4; i++) {
+				if ((i & 0x3f) == 0)
+					nv_wo32(falcon, 0x188, i >> 6);
+				nv_wo32(falcon, 0x184, falcon->code.data[i]);
+			}
+		}
+	}
+
+	/* upload data segment (if necessary), zeroing the remainder */
+	if (falcon->version < 3) {
+		nv_wo32(falcon, 0xff8, 0x00000000);
+		for (i = 0; !falcon->core && i < falcon->data.size / 4; i++)
+			nv_wo32(falcon, 0xff4, falcon->data.data[i]);
+		for (; i < falcon->data.limit; i += 4)
+			nv_wo32(falcon, 0xff4, 0x00000000);
+	} else {
+		nv_wo32(falcon, 0x1c0, 0x01000000);
+		for (i = 0; !falcon->core && i < falcon->data.size / 4; i++)
+			nv_wo32(falcon, 0x1c4, falcon->data.data[i]);
+		for (; i < falcon->data.limit / 4; i++)
+			nv_wo32(falcon, 0x1c4, 0x00000000);
+	}
+
+	/* start it running */
+	nv_wo32(falcon, 0x10c, 0x00000001); /* BLOCK_ON_FIFO */
+	nv_wo32(falcon, 0x104, 0x00000000); /* ENTRY */
+	nv_wo32(falcon, 0x100, 0x00000002); /* TRIGGER */
+	nv_wo32(falcon, 0x048, 0x00000003); /* FIFO | CHSW */
+	return 0;
+}
+
+int
+_nouveau_falcon_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nouveau_falcon *falcon = (void *)object;
+
+	if (!suspend) {
+		nouveau_gpuobj_ref(NULL, &falcon->core);
+		if (falcon->external) {
+			kfree(falcon->data.data);
+			kfree(falcon->code.data);
+			falcon->code.data = NULL;
+		}
+	}
+
+	nv_mo32(falcon, 0x048, 0x00000003, 0x00000000);
+	nv_wo32(falcon, 0x014, 0xffffffff);
+
+	return nouveau_engine_fini(&falcon->base, suspend);
+}
+
+int
+nouveau_falcon_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass, u32 addr, bool enable,
+		       const char *iname, const char *fname,
+		       int length, void **pobject)
+{
+	struct nouveau_falcon *falcon;
+	int ret;
+
+	ret = nouveau_engine_create_(parent, engine, oclass, enable, iname,
+				     fname, length, pobject);
+	falcon = *pobject;
+	if (ret)
+		return ret;
+
+	falcon->addr = addr;
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/core/gpuobj.c b/drivers/gpu/drm/nouveau/core/core/gpuobj.c
index 70586fd..560b221 100644
--- a/drivers/gpu/drm/nouveau/core/core/gpuobj.c
+++ b/drivers/gpu/drm/nouveau/core/core/gpuobj.c

@@ -183,7 +183,7 @@
 }
 
 u32
-_nouveau_gpuobj_rd32(struct nouveau_object *object, u32 addr)
+_nouveau_gpuobj_rd32(struct nouveau_object *object, u64 addr)
 {
 	struct nouveau_gpuobj *gpuobj = nv_gpuobj(object);
 	struct nouveau_ofuncs *pfuncs = nv_ofuncs(gpuobj->parent);
@@ -193,7 +193,7 @@
 }
 
 void
-_nouveau_gpuobj_wr32(struct nouveau_object *object, u32 addr, u32 data)
+_nouveau_gpuobj_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	struct nouveau_gpuobj *gpuobj = nv_gpuobj(object);
 	struct nouveau_ofuncs *pfuncs = nv_ofuncs(gpuobj->parent);

diff --git a/drivers/gpu/drm/nouveau/core/core/mm.c b/drivers/gpu/drm/nouveau/core/core/mm.c
index a6d3cd6..0261a11 100644
--- a/drivers/gpu/drm/nouveau/core/core/mm.c
+++ b/drivers/gpu/drm/nouveau/core/core/mm.c

@@ -234,15 +234,18 @@
 int
 nouveau_mm_fini(struct nouveau_mm *mm)
 {
-	struct nouveau_mm_node *node, *heap =
-		list_first_entry(&mm->nodes, struct nouveau_mm_node, nl_entry);
-	int nodes = 0;
+	if (nouveau_mm_initialised(mm)) {
+		struct nouveau_mm_node *node, *heap =
+			list_first_entry(&mm->nodes, typeof(*heap), nl_entry);
+		int nodes = 0;
 
-	list_for_each_entry(node, &mm->nodes, nl_entry) {
-		if (WARN_ON(nodes++ == mm->heap_nodes))
-			return -EBUSY;
+		list_for_each_entry(node, &mm->nodes, nl_entry) {
+			if (WARN_ON(nodes++ == mm->heap_nodes))
+				return -EBUSY;
+		}
+
+		kfree(heap);
 	}
 
-	kfree(heap);
 	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c
index 66f7dfd..1d9f614 100644
--- a/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c

@@ -22,18 +22,13 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/os.h>
-#include <core/class.h>
 #include <core/engctx.h>
+#include <core/class.h>
 
 #include <engine/bsp.h>
 
 struct nv84_bsp_priv {
-	struct nouveau_bsp base;
-};
-
-struct nv84_bsp_chan {
-	struct nouveau_bsp_chan base;
+	struct nouveau_engine base;
 };
 
 /*******************************************************************************
@@ -49,61 +44,16 @@
  * BSP context
  ******************************************************************************/
 
-static int
-nv84_bsp_context_ctor(struct nouveau_object *parent,
-		      struct nouveau_object *engine,
-		      struct nouveau_oclass *oclass, void *data, u32 size,
-		      struct nouveau_object **pobject)
-{
-	struct nv84_bsp_chan *priv;
-	int ret;
-
-	ret = nouveau_bsp_context_create(parent, engine, oclass, NULL,
-					 0, 0, 0, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static void
-nv84_bsp_context_dtor(struct nouveau_object *object)
-{
-	struct nv84_bsp_chan *priv = (void *)object;
-	nouveau_bsp_context_destroy(&priv->base);
-}
-
-static int
-nv84_bsp_context_init(struct nouveau_object *object)
-{
-	struct nv84_bsp_chan *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_bsp_context_init(&priv->base);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int
-nv84_bsp_context_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv84_bsp_chan *priv = (void *)object;
-	return nouveau_bsp_context_fini(&priv->base, suspend);
-}
-
 static struct nouveau_oclass
 nv84_bsp_cclass = {
 	.handle = NV_ENGCTX(BSP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv84_bsp_context_ctor,
-		.dtor = nv84_bsp_context_dtor,
-		.init = nv84_bsp_context_init,
-		.fini = nv84_bsp_context_fini,
-		.rd32 = _nouveau_bsp_context_rd32,
-		.wr32 = _nouveau_bsp_context_wr32,
+		.ctor = _nouveau_engctx_ctor,
+		.dtor = _nouveau_engctx_dtor,
+		.init = _nouveau_engctx_init,
+		.fini = _nouveau_engctx_fini,
+		.rd32 = _nouveau_engctx_rd32,
+		.wr32 = _nouveau_engctx_wr32,
 	},
 };
 
@@ -111,11 +61,6 @@
  * BSP engine/subdev functions
  ******************************************************************************/
 
-static void
-nv84_bsp_intr(struct nouveau_subdev *subdev)
-{
-}
-
 static int
 nv84_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	      struct nouveau_oclass *oclass, void *data, u32 size,
@@ -124,52 +69,25 @@
 	struct nv84_bsp_priv *priv;
 	int ret;
 
-	ret = nouveau_bsp_create(parent, engine, oclass, &priv);
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PBSP", "bsp", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	nv_subdev(priv)->unit = 0x04008000;
-	nv_subdev(priv)->intr = nv84_bsp_intr;
 	nv_engine(priv)->cclass = &nv84_bsp_cclass;
 	nv_engine(priv)->sclass = nv84_bsp_sclass;
 	return 0;
 }
 
-static void
-nv84_bsp_dtor(struct nouveau_object *object)
-{
-	struct nv84_bsp_priv *priv = (void *)object;
-	nouveau_bsp_destroy(&priv->base);
-}
-
-static int
-nv84_bsp_init(struct nouveau_object *object)
-{
-	struct nv84_bsp_priv *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_bsp_init(&priv->base);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int
-nv84_bsp_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv84_bsp_priv *priv = (void *)object;
-	return nouveau_bsp_fini(&priv->base, suspend);
-}
-
 struct nouveau_oclass
 nv84_bsp_oclass = {
 	.handle = NV_ENGINE(BSP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv84_bsp_ctor,
-		.dtor = nv84_bsp_dtor,
-		.init = nv84_bsp_init,
-		.fini = nv84_bsp_fini,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
new file mode 100644
index 0000000..0a5aa6b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c

@@ -0,0 +1,110 @@
+/*
+ * Copyright 2012 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Maarten Lankhorst
+ */
+
+#include <core/falcon.h>
+
+#include <engine/bsp.h>
+
+struct nvc0_bsp_priv {
+	struct nouveau_falcon base;
+};
+
+/*******************************************************************************
+ * BSP object classes
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nvc0_bsp_sclass[] = {
+	{ 0x90b1, &nouveau_object_ofuncs },
+	{},
+};
+
+/*******************************************************************************
+ * PBSP context
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nvc0_bsp_cclass = {
+	.handle = NV_ENGCTX(BSP, 0xc0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_falcon_context_ctor,
+		.dtor = _nouveau_falcon_context_dtor,
+		.init = _nouveau_falcon_context_init,
+		.fini = _nouveau_falcon_context_fini,
+		.rd32 = _nouveau_falcon_context_rd32,
+		.wr32 = _nouveau_falcon_context_wr32,
+	},
+};
+
+/*******************************************************************************
+ * PBSP engine/subdev functions
+ ******************************************************************************/
+
+static int
+nvc0_bsp_init(struct nouveau_object *object)
+{
+	struct nvc0_bsp_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_falcon_init(&priv->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x084010, 0x0000fff2);
+	nv_wr32(priv, 0x08401c, 0x0000fff2);
+	return 0;
+}
+
+static int
+nvc0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	      struct nouveau_oclass *oclass, void *data, u32 size,
+	      struct nouveau_object **pobject)
+{
+	struct nvc0_bsp_priv *priv;
+	int ret;
+
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x084000, true,
+				    "PBSP", "bsp", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x00008000;
+	nv_engine(priv)->cclass = &nvc0_bsp_cclass;
+	nv_engine(priv)->sclass = nvc0_bsp_sclass;
+	return 0;
+}
+
+struct nouveau_oclass
+nvc0_bsp_oclass = {
+	.handle = NV_ENGINE(BSP, 0xc0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvc0_bsp_ctor,
+		.dtor = _nouveau_falcon_dtor,
+		.init = nvc0_bsp_init,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
new file mode 100644
index 0000000..d4f23bb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c

@@ -0,0 +1,110 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/falcon.h>
+
+#include <engine/bsp.h>
+
+struct nve0_bsp_priv {
+	struct nouveau_falcon base;
+};
+
+/*******************************************************************************
+ * BSP object classes
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nve0_bsp_sclass[] = {
+	{ 0x95b1, &nouveau_object_ofuncs },
+	{},
+};
+
+/*******************************************************************************
+ * PBSP context
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nve0_bsp_cclass = {
+	.handle = NV_ENGCTX(BSP, 0xe0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_falcon_context_ctor,
+		.dtor = _nouveau_falcon_context_dtor,
+		.init = _nouveau_falcon_context_init,
+		.fini = _nouveau_falcon_context_fini,
+		.rd32 = _nouveau_falcon_context_rd32,
+		.wr32 = _nouveau_falcon_context_wr32,
+	},
+};
+
+/*******************************************************************************
+ * PBSP engine/subdev functions
+ ******************************************************************************/
+
+static int
+nve0_bsp_init(struct nouveau_object *object)
+{
+	struct nve0_bsp_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_falcon_init(&priv->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x084010, 0x0000fff2);
+	nv_wr32(priv, 0x08401c, 0x0000fff2);
+	return 0;
+}
+
+static int
+nve0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	      struct nouveau_oclass *oclass, void *data, u32 size,
+	      struct nouveau_object **pobject)
+{
+	struct nve0_bsp_priv *priv;
+	int ret;
+
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x084000, true,
+				    "PBSP", "bsp", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x00008000;
+	nv_engine(priv)->cclass = &nve0_bsp_cclass;
+	nv_engine(priv)->sclass = nve0_bsp_sclass;
+	return 0;
+}
+
+struct nouveau_oclass
+nve0_bsp_oclass = {
+	.handle = NV_ENGINE(BSP, 0xe0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nve0_bsp_ctor,
+		.dtor = _nouveau_falcon_dtor,
+		.init = nve0_bsp_init,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/copy/nva3.c b/drivers/gpu/drm/nouveau/core/engine/copy/nva3.c
index 4df6da0..283248c 100644
--- a/drivers/gpu/drm/nouveau/core/engine/copy/nva3.c
+++ b/drivers/gpu/drm/nouveau/core/engine/copy/nva3.c

@@ -22,10 +22,9 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/os.h>
-#include <core/enum.h>
+#include <core/falcon.h>
 #include <core/class.h>
-#include <core/engctx.h>
+#include <core/enum.h>
 
 #include <subdev/fb.h>
 #include <subdev/vm.h>
@@ -36,11 +35,7 @@
 #include "fuc/nva3.fuc.h"
 
 struct nva3_copy_priv {
-	struct nouveau_copy base;
-};
-
-struct nva3_copy_chan {
-	struct nouveau_copy_chan base;
+	struct nouveau_falcon base;
 };
 
 /*******************************************************************************
@@ -57,34 +52,16 @@
  * PCOPY context
  ******************************************************************************/
 
-static int
-nva3_copy_context_ctor(struct nouveau_object *parent,
-		       struct nouveau_object *engine,
-		       struct nouveau_oclass *oclass, void *data, u32 size,
-		       struct nouveau_object **pobject)
-{
-	struct nva3_copy_chan *priv;
-	int ret;
-
-	ret = nouveau_copy_context_create(parent, engine, oclass, NULL, 256, 0,
-					  NVOBJ_FLAG_ZERO_ALLOC, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static struct nouveau_oclass
 nva3_copy_cclass = {
 	.handle = NV_ENGCTX(COPY0, 0xa3),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nva3_copy_context_ctor,
-		.dtor = _nouveau_copy_context_dtor,
-		.init = _nouveau_copy_context_init,
-		.fini = _nouveau_copy_context_fini,
-		.rd32 = _nouveau_copy_context_rd32,
-		.wr32 = _nouveau_copy_context_wr32,
+		.ctor = _nouveau_falcon_context_ctor,
+		.dtor = _nouveau_falcon_context_dtor,
+		.init = _nouveau_falcon_context_init,
+		.fini = _nouveau_falcon_context_fini,
+		.rd32 = _nouveau_falcon_context_rd32,
+		.wr32 = _nouveau_falcon_context_wr32,
 
 	},
 };
@@ -100,41 +77,40 @@
 	{}
 };
 
-static void
+void
 nva3_copy_intr(struct nouveau_subdev *subdev)
 {
 	struct nouveau_fifo *pfifo = nouveau_fifo(subdev);
 	struct nouveau_engine *engine = nv_engine(subdev);
+	struct nouveau_falcon *falcon = (void *)subdev;
 	struct nouveau_object *engctx;
-	struct nva3_copy_priv *priv = (void *)subdev;
-	u32 dispatch = nv_rd32(priv, 0x10401c);
-	u32 stat = nv_rd32(priv, 0x104008) & dispatch & ~(dispatch >> 16);
-	u64 inst = nv_rd32(priv, 0x104050) & 0x3fffffff;
-	u32 ssta = nv_rd32(priv, 0x104040) & 0x0000ffff;
-	u32 addr = nv_rd32(priv, 0x104040) >> 16;
+	u32 dispatch = nv_ro32(falcon, 0x01c);
+	u32 stat = nv_ro32(falcon, 0x008) & dispatch & ~(dispatch >> 16);
+	u64 inst = nv_ro32(falcon, 0x050) & 0x3fffffff;
+	u32 ssta = nv_ro32(falcon, 0x040) & 0x0000ffff;
+	u32 addr = nv_ro32(falcon, 0x040) >> 16;
 	u32 mthd = (addr & 0x07ff) << 2;
 	u32 subc = (addr & 0x3800) >> 11;
-	u32 data = nv_rd32(priv, 0x104044);
+	u32 data = nv_ro32(falcon, 0x044);
 	int chid;
 
 	engctx = nouveau_engctx_get(engine, inst);
 	chid   = pfifo->chid(pfifo, engctx);
 
 	if (stat & 0x00000040) {
-		nv_error(priv, "DISPATCH_ERROR [");
+		nv_error(falcon, "DISPATCH_ERROR [");
 		nouveau_enum_print(nva3_copy_isr_error_name, ssta);
 		printk("] ch %d [0x%010llx] subc %d mthd 0x%04x data 0x%08x\n",
 		       chid, inst << 12, subc, mthd, data);
-		nv_wr32(priv, 0x104004, 0x00000040);
+		nv_wo32(falcon, 0x004, 0x00000040);
 		stat &= ~0x00000040;
 	}
 
 	if (stat) {
-		nv_error(priv, "unhandled intr 0x%08x\n", stat);
-		nv_wr32(priv, 0x104004, stat);
+		nv_error(falcon, "unhandled intr 0x%08x\n", stat);
+		nv_wo32(falcon, 0x004, stat);
 	}
 
-	nv50_fb_trap(nouveau_fb(priv), 1);
 	nouveau_engctx_put(engctx);
 }
 
@@ -154,7 +130,8 @@
 	struct nva3_copy_priv *priv;
 	int ret;
 
-	ret = nouveau_copy_create(parent, engine, oclass, enable, 0, &priv);
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x104000, enable,
+				    "PCE0", "copy0", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
@@ -164,59 +141,22 @@
 	nv_engine(priv)->cclass = &nva3_copy_cclass;
 	nv_engine(priv)->sclass = nva3_copy_sclass;
 	nv_engine(priv)->tlb_flush = nva3_copy_tlb_flush;
+	nv_falcon(priv)->code.data = nva3_pcopy_code;
+	nv_falcon(priv)->code.size = sizeof(nva3_pcopy_code);
+	nv_falcon(priv)->data.data = nva3_pcopy_data;
+	nv_falcon(priv)->data.size = sizeof(nva3_pcopy_data);
 	return 0;
 }
 
-static int
-nva3_copy_init(struct nouveau_object *object)
-{
-	struct nva3_copy_priv *priv = (void *)object;
-	int ret, i;
-
-	ret = nouveau_copy_init(&priv->base);
-	if (ret)
-		return ret;
-
-	/* disable all interrupts */
-	nv_wr32(priv, 0x104014, 0xffffffff);
-
-	/* upload ucode */
-	nv_wr32(priv, 0x1041c0, 0x01000000);
-	for (i = 0; i < sizeof(nva3_pcopy_data) / 4; i++)
-		nv_wr32(priv, 0x1041c4, nva3_pcopy_data[i]);
-
-	nv_wr32(priv, 0x104180, 0x01000000);
-	for (i = 0; i < sizeof(nva3_pcopy_code) / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nv_wr32(priv, 0x104188, i >> 6);
-		nv_wr32(priv, 0x104184, nva3_pcopy_code[i]);
-	}
-
-	/* start it running */
-	nv_wr32(priv, 0x10410c, 0x00000000);
-	nv_wr32(priv, 0x104104, 0x00000000); /* ENTRY */
-	nv_wr32(priv, 0x104100, 0x00000002); /* TRIGGER */
-	return 0;
-}
-
-static int
-nva3_copy_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nva3_copy_priv *priv = (void *)object;
-
-	nv_mask(priv, 0x104048, 0x00000003, 0x00000000);
-	nv_wr32(priv, 0x104014, 0xffffffff);
-
-	return nouveau_copy_fini(&priv->base, suspend);
-}
-
 struct nouveau_oclass
 nva3_copy_oclass = {
 	.handle = NV_ENGINE(COPY0, 0xa3),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nva3_copy_ctor,
-		.dtor = _nouveau_copy_dtor,
-		.init = nva3_copy_init,
-		.fini = nva3_copy_fini,
+		.dtor = _nouveau_falcon_dtor,
+		.init = _nouveau_falcon_init,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/copy/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/copy/nvc0.c
index 06d4a87..b3ed273 100644
--- a/drivers/gpu/drm/nouveau/core/engine/copy/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/copy/nvc0.c

@@ -22,10 +22,9 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/os.h>
-#include <core/enum.h>
+#include <core/falcon.h>
 #include <core/class.h>
-#include <core/engctx.h>
+#include <core/enum.h>
 
 #include <engine/fifo.h>
 #include <engine/copy.h>
@@ -33,11 +32,7 @@
 #include "fuc/nvc0.fuc.h"
 
 struct nvc0_copy_priv {
-	struct nouveau_copy base;
-};
-
-struct nvc0_copy_chan {
-	struct nouveau_copy_chan base;
+	struct nouveau_falcon base;
 };
 
 /*******************************************************************************
@@ -60,32 +55,14 @@
  * PCOPY context
  ******************************************************************************/
 
-static int
-nvc0_copy_context_ctor(struct nouveau_object *parent,
-		       struct nouveau_object *engine,
-		       struct nouveau_oclass *oclass, void *data, u32 size,
-		       struct nouveau_object **pobject)
-{
-	struct nvc0_copy_chan *priv;
-	int ret;
-
-	ret = nouveau_copy_context_create(parent, engine, oclass, NULL, 256,
-					  256, NVOBJ_FLAG_ZERO_ALLOC, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static struct nouveau_ofuncs
 nvc0_copy_context_ofuncs = {
-	.ctor = nvc0_copy_context_ctor,
-	.dtor = _nouveau_copy_context_dtor,
-	.init = _nouveau_copy_context_init,
-	.fini = _nouveau_copy_context_fini,
-	.rd32 = _nouveau_copy_context_rd32,
-	.wr32 = _nouveau_copy_context_wr32,
+	.ctor = _nouveau_falcon_context_ctor,
+	.dtor = _nouveau_falcon_context_dtor,
+	.init = _nouveau_falcon_context_init,
+	.fini = _nouveau_falcon_context_fini,
+	.rd32 = _nouveau_falcon_context_rd32,
+	.wr32 = _nouveau_falcon_context_wr32,
 };
 
 static struct nouveau_oclass
@@ -104,50 +81,18 @@
  * PCOPY engine/subdev functions
  ******************************************************************************/
 
-static const struct nouveau_enum nvc0_copy_isr_error_name[] = {
-	{ 0x0001, "ILLEGAL_MTHD" },
-	{ 0x0002, "INVALID_ENUM" },
-	{ 0x0003, "INVALID_BITFIELD" },
-	{}
-};
-
-static void
-nvc0_copy_intr(struct nouveau_subdev *subdev)
+static int
+nvc0_copy_init(struct nouveau_object *object)
 {
-	struct nouveau_fifo *pfifo = nouveau_fifo(subdev);
-	struct nouveau_engine *engine = nv_engine(subdev);
-	struct nouveau_object *engctx;
-	int idx = nv_engidx(nv_object(subdev)) - NVDEV_ENGINE_COPY0;
-	struct nvc0_copy_priv *priv = (void *)subdev;
-	u32 disp = nv_rd32(priv, 0x10401c + (idx * 0x1000));
-	u32 intr = nv_rd32(priv, 0x104008 + (idx * 0x1000));
-	u32 stat = intr & disp & ~(disp >> 16);
-	u64 inst = nv_rd32(priv, 0x104050 + (idx * 0x1000)) & 0x0fffffff;
-	u32 ssta = nv_rd32(priv, 0x104040 + (idx * 0x1000)) & 0x0000ffff;
-	u32 addr = nv_rd32(priv, 0x104040 + (idx * 0x1000)) >> 16;
-	u32 mthd = (addr & 0x07ff) << 2;
-	u32 subc = (addr & 0x3800) >> 11;
-	u32 data = nv_rd32(priv, 0x104044 + (idx * 0x1000));
-	int chid;
+	struct nvc0_copy_priv *priv = (void *)object;
+	int ret;
 
-	engctx = nouveau_engctx_get(engine, inst);
-	chid   = pfifo->chid(pfifo, engctx);
+	ret = nouveau_falcon_init(&priv->base);
+	if (ret)
+		return ret;
 
-	if (stat & 0x00000040) {
-		nv_error(priv, "DISPATCH_ERROR [");
-		nouveau_enum_print(nvc0_copy_isr_error_name, ssta);
-		printk("] ch %d [0x%010llx] subc %d mthd 0x%04x data 0x%08x\n",
-		       chid, (u64)inst << 12, subc, mthd, data);
-		nv_wr32(priv, 0x104004 + (idx * 0x1000), 0x00000040);
-		stat &= ~0x00000040;
-	}
-
-	if (stat) {
-		nv_error(priv, "unhandled intr 0x%08x\n", stat);
-		nv_wr32(priv, 0x104004 + (idx * 0x1000), stat);
-	}
-
-	nouveau_engctx_put(engctx);
+	nv_wo32(priv, 0x084, nv_engidx(object) - NVDEV_ENGINE_COPY0);
+	return 0;
 }
 
 static int
@@ -161,15 +106,20 @@
 	if (nv_rd32(parent, 0x022500) & 0x00000100)
 		return -ENODEV;
 
-	ret = nouveau_copy_create(parent, engine, oclass, true, 0, &priv);
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x104000, true,
+				    "PCE0", "copy0", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00000040;
-	nv_subdev(priv)->intr = nvc0_copy_intr;
+	nv_subdev(priv)->intr = nva3_copy_intr;
 	nv_engine(priv)->cclass = &nvc0_copy0_cclass;
 	nv_engine(priv)->sclass = nvc0_copy0_sclass;
+	nv_falcon(priv)->code.data = nvc0_pcopy_code;
+	nv_falcon(priv)->code.size = sizeof(nvc0_pcopy_code);
+	nv_falcon(priv)->data.data = nvc0_pcopy_data;
+	nv_falcon(priv)->data.size = sizeof(nvc0_pcopy_data);
 	return 0;
 }
 
@@ -184,72 +134,33 @@
 	if (nv_rd32(parent, 0x022500) & 0x00000200)
 		return -ENODEV;
 
-	ret = nouveau_copy_create(parent, engine, oclass, true, 1, &priv);
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x105000, true,
+				    "PCE1", "copy1", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00000080;
-	nv_subdev(priv)->intr = nvc0_copy_intr;
+	nv_subdev(priv)->intr = nva3_copy_intr;
 	nv_engine(priv)->cclass = &nvc0_copy1_cclass;
 	nv_engine(priv)->sclass = nvc0_copy1_sclass;
+	nv_falcon(priv)->code.data = nvc0_pcopy_code;
+	nv_falcon(priv)->code.size = sizeof(nvc0_pcopy_code);
+	nv_falcon(priv)->data.data = nvc0_pcopy_data;
+	nv_falcon(priv)->data.size = sizeof(nvc0_pcopy_data);
 	return 0;
 }
 
-static int
-nvc0_copy_init(struct nouveau_object *object)
-{
-	int idx = nv_engidx(object) - NVDEV_ENGINE_COPY0;
-	struct nvc0_copy_priv *priv = (void *)object;
-	int ret, i;
-
-	ret = nouveau_copy_init(&priv->base);
-	if (ret)
-		return ret;
-
-	/* disable all interrupts */
-	nv_wr32(priv, 0x104014 + (idx * 0x1000), 0xffffffff);
-
-	/* upload ucode */
-	nv_wr32(priv, 0x1041c0 + (idx * 0x1000), 0x01000000);
-	for (i = 0; i < sizeof(nvc0_pcopy_data) / 4; i++)
-		nv_wr32(priv, 0x1041c4 + (idx * 0x1000), nvc0_pcopy_data[i]);
-
-	nv_wr32(priv, 0x104180 + (idx * 0x1000), 0x01000000);
-	for (i = 0; i < sizeof(nvc0_pcopy_code) / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nv_wr32(priv, 0x104188 + (idx * 0x1000), i >> 6);
-		nv_wr32(priv, 0x104184 + (idx * 0x1000), nvc0_pcopy_code[i]);
-	}
-
-	/* start it running */
-	nv_wr32(priv, 0x104084 + (idx * 0x1000), idx);
-	nv_wr32(priv, 0x10410c + (idx * 0x1000), 0x00000000);
-	nv_wr32(priv, 0x104104 + (idx * 0x1000), 0x00000000); /* ENTRY */
-	nv_wr32(priv, 0x104100 + (idx * 0x1000), 0x00000002); /* TRIGGER */
-	return 0;
-}
-
-static int
-nvc0_copy_fini(struct nouveau_object *object, bool suspend)
-{
-	int idx = nv_engidx(object) - NVDEV_ENGINE_COPY0;
-	struct nvc0_copy_priv *priv = (void *)object;
-
-	nv_mask(priv, 0x104048 + (idx * 0x1000), 0x00000003, 0x00000000);
-	nv_wr32(priv, 0x104014 + (idx * 0x1000), 0xffffffff);
-
-	return nouveau_copy_fini(&priv->base, suspend);
-}
-
 struct nouveau_oclass
 nvc0_copy0_oclass = {
 	.handle = NV_ENGINE(COPY0, 0xc0),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nvc0_copy0_ctor,
-		.dtor = _nouveau_copy_dtor,
+		.dtor = _nouveau_falcon_dtor,
 		.init = nvc0_copy_init,
-		.fini = nvc0_copy_fini,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
 	},
 };
 
@@ -258,8 +169,10 @@
 	.handle = NV_ENGINE(COPY1, 0xc0),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nvc0_copy1_ctor,
-		.dtor = _nouveau_copy_dtor,
+		.dtor = _nouveau_falcon_dtor,
 		.init = nvc0_copy_init,
-		.fini = nvc0_copy_fini,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/copy/nve0.c b/drivers/gpu/drm/nouveau/core/engine/copy/nve0.c
index 2017c15..dbbe9e8 100644
--- a/drivers/gpu/drm/nouveau/core/engine/copy/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/copy/nve0.c

@@ -30,11 +30,7 @@
 #include <engine/copy.h>
 
 struct nve0_copy_priv {
-	struct nouveau_copy base;
-};
-
-struct nve0_copy_chan {
-	struct nouveau_copy_chan base;
+	struct nouveau_engine base;
 };
 
 /*******************************************************************************
@@ -51,32 +47,14 @@
  * PCOPY context
  ******************************************************************************/
 
-static int
-nve0_copy_context_ctor(struct nouveau_object *parent,
-		       struct nouveau_object *engine,
-		       struct nouveau_oclass *oclass, void *data, u32 size,
-		       struct nouveau_object **pobject)
-{
-	struct nve0_copy_chan *priv;
-	int ret;
-
-	ret = nouveau_copy_context_create(parent, engine, oclass, NULL, 256,
-					  256, NVOBJ_FLAG_ZERO_ALLOC, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static struct nouveau_ofuncs
 nve0_copy_context_ofuncs = {
-	.ctor = nve0_copy_context_ctor,
-	.dtor = _nouveau_copy_context_dtor,
-	.init = _nouveau_copy_context_init,
-	.fini = _nouveau_copy_context_fini,
-	.rd32 = _nouveau_copy_context_rd32,
-	.wr32 = _nouveau_copy_context_wr32,
+	.ctor = _nouveau_engctx_ctor,
+	.dtor = _nouveau_engctx_dtor,
+	.init = _nouveau_engctx_init,
+	.fini = _nouveau_engctx_fini,
+	.rd32 = _nouveau_engctx_rd32,
+	.wr32 = _nouveau_engctx_wr32,
 };
 
 static struct nouveau_oclass
@@ -100,7 +78,8 @@
 	if (nv_rd32(parent, 0x022500) & 0x00000100)
 		return -ENODEV;
 
-	ret = nouveau_copy_create(parent, engine, oclass, true, 0, &priv);
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PCE0", "copy0", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
@@ -122,7 +101,8 @@
 	if (nv_rd32(parent, 0x022500) & 0x00000200)
 		return -ENODEV;
 
-	ret = nouveau_copy_create(parent, engine, oclass, true, 1, &priv);
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PCE1", "copy1", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
@@ -138,9 +118,9 @@
 	.handle = NV_ENGINE(COPY0, 0xe0),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nve0_copy0_ctor,
-		.dtor = _nouveau_copy_dtor,
-		.init = _nouveau_copy_init,
-		.fini = _nouveau_copy_fini,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
 	},
 };
 
@@ -149,8 +129,8 @@
 	.handle = NV_ENGINE(COPY1, 0xe0),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nve0_copy1_ctor,
-		.dtor = _nouveau_copy_dtor,
-		.init = _nouveau_copy_init,
-		.fini = _nouveau_copy_fini,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/crypt/nv84.c b/drivers/gpu/drm/nouveau/core/engine/crypt/nv84.c
index 1d85e5b..b974905 100644
--- a/drivers/gpu/drm/nouveau/core/engine/crypt/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/engine/crypt/nv84.c

@@ -34,11 +34,7 @@
 #include <engine/crypt.h>
 
 struct nv84_crypt_priv {
-	struct nouveau_crypt base;
-};
-
-struct nv84_crypt_chan {
-	struct nouveau_crypt_chan base;
+	struct nouveau_engine base;
 };
 
 /*******************************************************************************
@@ -87,34 +83,16 @@
  * PCRYPT context
  ******************************************************************************/
 
-static int
-nv84_crypt_context_ctor(struct nouveau_object *parent,
-			struct nouveau_object *engine,
-			struct nouveau_oclass *oclass, void *data, u32 size,
-			struct nouveau_object **pobject)
-{
-	struct nv84_crypt_chan *priv;
-	int ret;
-
-	ret = nouveau_crypt_context_create(parent, engine, oclass, NULL, 256,
-					   0, NVOBJ_FLAG_ZERO_ALLOC, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static struct nouveau_oclass
 nv84_crypt_cclass = {
 	.handle = NV_ENGCTX(CRYPT, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv84_crypt_context_ctor,
-		.dtor = _nouveau_crypt_context_dtor,
-		.init = _nouveau_crypt_context_init,
-		.fini = _nouveau_crypt_context_fini,
-		.rd32 = _nouveau_crypt_context_rd32,
-		.wr32 = _nouveau_crypt_context_wr32,
+		.ctor = _nouveau_engctx_ctor,
+		.dtor = _nouveau_engctx_dtor,
+		.init = _nouveau_engctx_init,
+		.fini = _nouveau_engctx_fini,
+		.rd32 = _nouveau_engctx_rd32,
+		.wr32 = _nouveau_engctx_wr32,
 	},
 };
 
@@ -157,7 +135,6 @@
 	nv_wr32(priv, 0x102130, stat);
 	nv_wr32(priv, 0x10200c, 0x10);
 
-	nv50_fb_trap(nouveau_fb(priv), 1);
 	nouveau_engctx_put(engctx);
 }
 
@@ -176,7 +153,8 @@
 	struct nv84_crypt_priv *priv;
 	int ret;
 
-	ret = nouveau_crypt_create(parent, engine, oclass, &priv);
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PCRYPT", "crypt", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
@@ -195,7 +173,7 @@
 	struct nv84_crypt_priv *priv = (void *)object;
 	int ret;
 
-	ret = nouveau_crypt_init(&priv->base);
+	ret = nouveau_engine_init(&priv->base);
 	if (ret)
 		return ret;
 
@@ -210,8 +188,8 @@
 	.handle = NV_ENGINE(CRYPT, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv84_crypt_ctor,
-		.dtor = _nouveau_crypt_dtor,
+		.dtor = _nouveau_engine_dtor,
 		.init = nv84_crypt_init,
-		.fini = _nouveau_crypt_fini,
+		.fini = _nouveau_engine_fini,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/crypt/nv98.c b/drivers/gpu/drm/nouveau/core/engine/crypt/nv98.c
index 9e3876c..21986f3 100644
--- a/drivers/gpu/drm/nouveau/core/engine/crypt/nv98.c
+++ b/drivers/gpu/drm/nouveau/core/engine/crypt/nv98.c

@@ -26,6 +26,7 @@
 #include <core/enum.h>
 #include <core/class.h>
 #include <core/engctx.h>
+#include <core/falcon.h>
 
 #include <subdev/timer.h>
 #include <subdev/fb.h>
@@ -36,11 +37,7 @@
 #include "fuc/nv98.fuc.h"
 
 struct nv98_crypt_priv {
-	struct nouveau_crypt base;
-};
-
-struct nv98_crypt_chan {
-	struct nouveau_crypt_chan base;
+	struct nouveau_falcon base;
 };
 
 /*******************************************************************************
@@ -57,34 +54,16 @@
  * PCRYPT context
  ******************************************************************************/
 
-static int
-nv98_crypt_context_ctor(struct nouveau_object *parent,
-			struct nouveau_object *engine,
-			struct nouveau_oclass *oclass, void *data, u32 size,
-			struct nouveau_object **pobject)
-{
-	struct nv98_crypt_chan *priv;
-	int ret;
-
-	ret = nouveau_crypt_context_create(parent, engine, oclass, NULL, 256,
-					   256, NVOBJ_FLAG_ZERO_ALLOC, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static struct nouveau_oclass
 nv98_crypt_cclass = {
 	.handle = NV_ENGCTX(CRYPT, 0x98),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv98_crypt_context_ctor,
-		.dtor = _nouveau_crypt_context_dtor,
-		.init = _nouveau_crypt_context_init,
-		.fini = _nouveau_crypt_context_fini,
-		.rd32 = _nouveau_crypt_context_rd32,
-		.wr32 = _nouveau_crypt_context_wr32,
+		.ctor = _nouveau_falcon_context_ctor,
+		.dtor = _nouveau_falcon_context_dtor,
+		.init = _nouveau_falcon_context_init,
+		.fini = _nouveau_falcon_context_fini,
+		.rd32 = _nouveau_falcon_context_rd32,
+		.wr32 = _nouveau_falcon_context_wr32,
 	},
 };
 
@@ -134,7 +113,6 @@
 		nv_wr32(priv, 0x087004, stat);
 	}
 
-	nv50_fb_trap(nouveau_fb(priv), 1);
 	nouveau_engctx_put(engctx);
 }
 
@@ -153,7 +131,8 @@
 	struct nv98_crypt_priv *priv;
 	int ret;
 
-	ret = nouveau_crypt_create(parent, engine, oclass, &priv);
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x087000, true,
+				    "PCRYPT", "crypt", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
@@ -163,36 +142,10 @@
 	nv_engine(priv)->cclass = &nv98_crypt_cclass;
 	nv_engine(priv)->sclass = nv98_crypt_sclass;
 	nv_engine(priv)->tlb_flush = nv98_crypt_tlb_flush;
-	return 0;
-}
-
-static int
-nv98_crypt_init(struct nouveau_object *object)
-{
-	struct nv98_crypt_priv *priv = (void *)object;
-	int ret, i;
-
-	ret = nouveau_crypt_init(&priv->base);
-	if (ret)
-		return ret;
-
-	/* wait for exit interrupt to signal */
-	nv_wait(priv, 0x087008, 0x00000010, 0x00000010);
-	nv_wr32(priv, 0x087004, 0x00000010);
-
-	/* upload microcode code and data segments */
-	nv_wr32(priv, 0x087ff8, 0x00100000);
-	for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_code); i++)
-		nv_wr32(priv, 0x087ff4, nv98_pcrypt_code[i]);
-
-	nv_wr32(priv, 0x087ff8, 0x00000000);
-	for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_data); i++)
-		nv_wr32(priv, 0x087ff4, nv98_pcrypt_data[i]);
-
-	/* start it running */
-	nv_wr32(priv, 0x08710c, 0x00000000);
-	nv_wr32(priv, 0x087104, 0x00000000); /* ENTRY */
-	nv_wr32(priv, 0x087100, 0x00000002); /* TRIGGER */
+	nv_falcon(priv)->code.data = nv98_pcrypt_code;
+	nv_falcon(priv)->code.size = sizeof(nv98_pcrypt_code);
+	nv_falcon(priv)->data.data = nv98_pcrypt_data;
+	nv_falcon(priv)->data.size = sizeof(nv98_pcrypt_data);
 	return 0;
 }
 
@@ -201,8 +154,10 @@
 	.handle = NV_ENGINE(CRYPT, 0x98),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv98_crypt_ctor,
-		.dtor = _nouveau_crypt_dtor,
-		.init = nv98_crypt_init,
-		.fini = _nouveau_crypt_fini,
+		.dtor = _nouveau_falcon_dtor,
+		.init = _nouveau_falcon_init,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/dacnv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/dacnv50.c
new file mode 100644
index 0000000..d0817d9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/dacnv50.c

@@ -0,0 +1,88 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/timer.h>
+
+#include "nv50.h"
+
+int
+nv50_dac_power(struct nv50_disp_priv *priv, int or, u32 data)
+{
+	const u32 stat = (data & NV50_DISP_DAC_PWR_HSYNC) |
+		         (data & NV50_DISP_DAC_PWR_VSYNC) |
+		         (data & NV50_DISP_DAC_PWR_DATA) |
+		         (data & NV50_DISP_DAC_PWR_STATE);
+	const u32 doff = (or * 0x800);
+	nv_wait(priv, 0x61a004 + doff, 0x80000000, 0x00000000);
+	nv_mask(priv, 0x61a004 + doff, 0xc000007f, 0x80000000 | stat);
+	nv_wait(priv, 0x61a004 + doff, 0x80000000, 0x00000000);
+	return 0;
+}
+
+int
+nv50_dac_sense(struct nv50_disp_priv *priv, int or, u32 loadval)
+{
+	const u32 doff = (or * 0x800);
+	int load = -EINVAL;
+	nv_wr32(priv, 0x61a00c + doff, 0x00100000 | loadval);
+	udelay(9500);
+	nv_wr32(priv, 0x61a00c + doff, 0x80000000);
+	load = (nv_rd32(priv, 0x61a00c + doff) & 0x38000000) >> 27;
+	nv_wr32(priv, 0x61a00c + doff, 0x00000000);
+	return load;
+}
+
+int
+nv50_dac_mthd(struct nouveau_object *object, u32 mthd, void *args, u32 size)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	const u8 or = (mthd & NV50_DISP_DAC_MTHD_OR);
+	u32 *data = args;
+	int ret;
+
+	if (size < sizeof(u32))
+		return -EINVAL;
+
+	switch (mthd & ~0x3f) {
+	case NV50_DISP_DAC_PWR:
+		ret = priv->dac.power(priv, or, data[0]);
+		break;
+	case NV50_DISP_DAC_LOAD:
+		ret = priv->dac.sense(priv, or, data[0]);
+		if (ret >= 0) {
+			data[0] = ret;
+			ret = 0;
+		}
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+	return ret;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c
new file mode 100644
index 0000000..373dbcc5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c

@@ -0,0 +1,48 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include "nv50.h"
+
+int
+nva3_hda_eld(struct nv50_disp_priv *priv, int or, u8 *data, u32 size)
+{
+	const u32 soff = (or * 0x800);
+	int i;
+
+	if (data && data[0]) {
+		for (i = 0; i < size; i++)
+			nv_wr32(priv, 0x61c440 + soff, (i << 8) | data[i]);
+		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000003);
+	} else
+	if (data) {
+		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000001);
+	} else {
+		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000000);
+	}
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c
new file mode 100644
index 0000000..dc57e24
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c

@@ -0,0 +1,53 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/bios/dp.h>
+#include <subdev/bios/init.h>
+
+#include "nv50.h"
+
+int
+nvd0_hda_eld(struct nv50_disp_priv *priv, int or, u8 *data, u32 size)
+{
+	const u32 soff = (or * 0x030);
+	int i;
+
+	if (data && data[0]) {
+		for (i = 0; i < size; i++)
+			nv_wr32(priv, 0x10ec00 + soff, (i << 8) | data[i]);
+		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000003);
+	} else
+	if (data) {
+		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000001);
+	} else {
+		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000000);
+	}
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdminv84.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdminv84.c
new file mode 100644
index 0000000..0d36bdc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdminv84.c

@@ -0,0 +1,66 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include "nv50.h"
+
+int
+nv84_hdmi_ctrl(struct nv50_disp_priv *priv, int head, int or, u32 data)
+{
+	const u32 hoff = (head * 0x800);
+
+	if (!(data & NV84_DISP_SOR_HDMI_PWR_STATE_ON)) {
+		nv_mask(priv, 0x6165a4 + hoff, 0x40000000, 0x00000000);
+		nv_mask(priv, 0x616520 + hoff, 0x00000001, 0x00000000);
+		nv_mask(priv, 0x616500 + hoff, 0x00000001, 0x00000000);
+		return 0;
+	}
+
+	/* AVI InfoFrame */
+	nv_mask(priv, 0x616520 + hoff, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x616528 + hoff, 0x000d0282);
+	nv_wr32(priv, 0x61652c + hoff, 0x0000006f);
+	nv_wr32(priv, 0x616530 + hoff, 0x00000000);
+	nv_wr32(priv, 0x616534 + hoff, 0x00000000);
+	nv_wr32(priv, 0x616538 + hoff, 0x00000000);
+	nv_mask(priv, 0x616520 + hoff, 0x00000001, 0x00000001);
+
+	/* Audio InfoFrame */
+	nv_mask(priv, 0x616500 + hoff, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x616508 + hoff, 0x000a0184);
+	nv_wr32(priv, 0x61650c + hoff, 0x00000071);
+	nv_wr32(priv, 0x616510 + hoff, 0x00000000);
+	nv_mask(priv, 0x616500 + hoff, 0x00000001, 0x00000001);
+
+	/* ??? */
+	nv_mask(priv, 0x61733c, 0x00100000, 0x00100000); /* RESETF */
+	nv_mask(priv, 0x61733c, 0x10000000, 0x10000000); /* LOOKUP_EN */
+	nv_mask(priv, 0x61733c, 0x00100000, 0x00000000); /* !RESETF */
+
+	/* HDMI_CTRL */
+	nv_mask(priv, 0x6165a4 + hoff, 0x5f1f007f, data | 0x1f000000 /* ??? */);
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c
new file mode 100644
index 0000000..f065fc2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c

@@ -0,0 +1,66 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include "nv50.h"
+
+int
+nva3_hdmi_ctrl(struct nv50_disp_priv *priv, int head, int or, u32 data)
+{
+	const u32 soff = (or * 0x800);
+
+	if (!(data & NV84_DISP_SOR_HDMI_PWR_STATE_ON)) {
+		nv_mask(priv, 0x61c5a4 + soff, 0x40000000, 0x00000000);
+		nv_mask(priv, 0x61c520 + soff, 0x00000001, 0x00000000);
+		nv_mask(priv, 0x61c500 + soff, 0x00000001, 0x00000000);
+		return 0;
+	}
+
+	/* AVI InfoFrame */
+	nv_mask(priv, 0x61c520 + soff, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x61c528 + soff, 0x000d0282);
+	nv_wr32(priv, 0x61c52c + soff, 0x0000006f);
+	nv_wr32(priv, 0x61c530 + soff, 0x00000000);
+	nv_wr32(priv, 0x61c534 + soff, 0x00000000);
+	nv_wr32(priv, 0x61c538 + soff, 0x00000000);
+	nv_mask(priv, 0x61c520 + soff, 0x00000001, 0x00000001);
+
+	/* Audio InfoFrame */
+	nv_mask(priv, 0x61c500 + soff, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x61c508 + soff, 0x000a0184);
+	nv_wr32(priv, 0x61c50c + soff, 0x00000071);
+	nv_wr32(priv, 0x61c510 + soff, 0x00000000);
+	nv_mask(priv, 0x61c500 + soff, 0x00000001, 0x00000001);
+
+	/* ??? */
+	nv_mask(priv, 0x61733c, 0x00100000, 0x00100000); /* RESETF */
+	nv_mask(priv, 0x61733c, 0x10000000, 0x10000000); /* LOOKUP_EN */
+	nv_mask(priv, 0x61733c, 0x00100000, 0x00000000); /* !RESETF */
+
+	/* HDMI_CTRL */
+	nv_mask(priv, 0x61c5a4 + soff, 0x5f1f007f, data | 0x1f000000 /* ??? */);
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdminvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdminvd0.c
new file mode 100644
index 0000000..5151bb2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdminvd0.c

@@ -0,0 +1,62 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include "nv50.h"
+
+int
+nvd0_hdmi_ctrl(struct nv50_disp_priv *priv, int head, int or, u32 data)
+{
+	const u32 hoff = (head * 0x800);
+
+	if (!(data & NV84_DISP_SOR_HDMI_PWR_STATE_ON)) {
+		nv_mask(priv, 0x616798 + hoff, 0x40000000, 0x00000000);
+		nv_mask(priv, 0x6167a4 + hoff, 0x00000001, 0x00000000);
+		nv_mask(priv, 0x616714 + hoff, 0x00000001, 0x00000000);
+		return 0;
+	}
+
+	/* AVI InfoFrame */
+	nv_mask(priv, 0x616714 + hoff, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x61671c + hoff, 0x000d0282);
+	nv_wr32(priv, 0x616720 + hoff, 0x0000006f);
+	nv_wr32(priv, 0x616724 + hoff, 0x00000000);
+	nv_wr32(priv, 0x616728 + hoff, 0x00000000);
+	nv_wr32(priv, 0x61672c + hoff, 0x00000000);
+	nv_mask(priv, 0x616714 + hoff, 0x00000001, 0x00000001);
+
+	/* ??? InfoFrame? */
+	nv_mask(priv, 0x6167a4 + hoff, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x6167ac + hoff, 0x00000010);
+	nv_mask(priv, 0x6167a4 + hoff, 0x00000001, 0x00000001);
+
+	/* HDMI_CTRL */
+	nv_mask(priv, 0x616798 + hoff, 0x401f007f, data);
+
+	/* NFI, audio doesn't work without it though.. */
+	nv_mask(priv, 0x616548 + hoff, 0x00000070, 0x00000000);
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
index 15b182c..0f09af1 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c

@@ -22,20 +22,740 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/bar.h>
+#include <core/object.h>
+#include <core/parent.h>
+#include <core/handle.h>
+#include <core/class.h>
 
 #include <engine/software.h>
 #include <engine/disp.h>
 
-struct nv50_disp_priv {
-	struct nouveau_disp base;
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/bios/disp.h>
+#include <subdev/bios/init.h>
+#include <subdev/bios/pll.h>
+#include <subdev/timer.h>
+#include <subdev/fb.h>
+#include <subdev/bar.h>
+#include <subdev/clock.h>
+
+#include "nv50.h"
+
+/*******************************************************************************
+ * EVO channel base class
+ ******************************************************************************/
+
+int
+nv50_disp_chan_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass, int chid,
+		       int length, void **pobject)
+{
+	struct nv50_disp_base *base = (void *)parent;
+	struct nv50_disp_chan *chan;
+	int ret;
+
+	if (base->chan & (1 << chid))
+		return -EBUSY;
+	base->chan |= (1 << chid);
+
+	ret = nouveau_namedb_create_(parent, engine, oclass, 0, NULL,
+				     (1ULL << NVDEV_ENGINE_DMAOBJ),
+				     length, pobject);
+	chan = *pobject;
+	if (ret)
+		return ret;
+
+	chan->chid = chid;
+	return 0;
+}
+
+void
+nv50_disp_chan_destroy(struct nv50_disp_chan *chan)
+{
+	struct nv50_disp_base *base = (void *)nv_object(chan)->parent;
+	base->chan &= ~(1 << chan->chid);
+	nouveau_namedb_destroy(&chan->base);
+}
+
+u32
+nv50_disp_chan_rd32(struct nouveau_object *object, u64 addr)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_chan *chan = (void *)object;
+	return nv_rd32(priv, 0x640000 + (chan->chid * 0x1000) + addr);
+}
+
+void
+nv50_disp_chan_wr32(struct nouveau_object *object, u64 addr, u32 data)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_chan *chan = (void *)object;
+	nv_wr32(priv, 0x640000 + (chan->chid * 0x1000) + addr, data);
+}
+
+/*******************************************************************************
+ * EVO DMA channel base class
+ ******************************************************************************/
+
+static int
+nv50_disp_dmac_object_attach(struct nouveau_object *parent,
+			     struct nouveau_object *object, u32 name)
+{
+	struct nv50_disp_base *base = (void *)parent->parent;
+	struct nv50_disp_chan *chan = (void *)parent;
+	u32 addr = nv_gpuobj(object)->node->offset;
+	u32 chid = chan->chid;
+	u32 data = (chid << 28) | (addr << 10) | chid;
+	return nouveau_ramht_insert(base->ramht, chid, name, data);
+}
+
+static void
+nv50_disp_dmac_object_detach(struct nouveau_object *parent, int cookie)
+{
+	struct nv50_disp_base *base = (void *)parent->parent;
+	nouveau_ramht_remove(base->ramht, cookie);
+}
+
+int
+nv50_disp_dmac_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass, u32 pushbuf, int chid,
+		       int length, void **pobject)
+{
+	struct nv50_disp_dmac *dmac;
+	int ret;
+
+	ret = nv50_disp_chan_create_(parent, engine, oclass, chid,
+				     length, pobject);
+	dmac = *pobject;
+	if (ret)
+		return ret;
+
+	dmac->pushdma = (void *)nouveau_handle_ref(parent, pushbuf);
+	if (!dmac->pushdma)
+		return -ENOENT;
+
+	switch (nv_mclass(dmac->pushdma)) {
+	case 0x0002:
+	case 0x003d:
+		if (dmac->pushdma->limit - dmac->pushdma->start != 0xfff)
+			return -EINVAL;
+
+		switch (dmac->pushdma->target) {
+		case NV_MEM_TARGET_VRAM:
+			dmac->push = 0x00000000 | dmac->pushdma->start >> 8;
+			break;
+		case NV_MEM_TARGET_PCI_NOSNOOP:
+			dmac->push = 0x00000003 | dmac->pushdma->start >> 8;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void
+nv50_disp_dmac_dtor(struct nouveau_object *object)
+{
+	struct nv50_disp_dmac *dmac = (void *)object;
+	nouveau_object_ref(NULL, (struct nouveau_object **)&dmac->pushdma);
+	nv50_disp_chan_destroy(&dmac->base);
+}
+
+static int
+nv50_disp_dmac_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *dmac = (void *)object;
+	int chid = dmac->base.chid;
+	int ret;
+
+	ret = nv50_disp_chan_init(&dmac->base);
+	if (ret)
+		return ret;
+
+	/* enable error reporting */
+	nv_mask(priv, 0x610028, 0x00010001 << chid, 0x00010001 << chid);
+
+	/* initialise channel for dma command submission */
+	nv_wr32(priv, 0x610204 + (chid * 0x0010), dmac->push);
+	nv_wr32(priv, 0x610208 + (chid * 0x0010), 0x00010000);
+	nv_wr32(priv, 0x61020c + (chid * 0x0010), chid);
+	nv_mask(priv, 0x610200 + (chid * 0x0010), 0x00000010, 0x00000010);
+	nv_wr32(priv, 0x640000 + (chid * 0x1000), 0x00000000);
+	nv_wr32(priv, 0x610200 + (chid * 0x0010), 0x00000013);
+
+	/* wait for it to go inactive */
+	if (!nv_wait(priv, 0x610200 + (chid * 0x10), 0x80000000, 0x00000000)) {
+		nv_error(dmac, "init timeout, 0x%08x\n",
+			 nv_rd32(priv, 0x610200 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nv50_disp_dmac_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *dmac = (void *)object;
+	int chid = dmac->base.chid;
+
+	/* deactivate channel */
+	nv_mask(priv, 0x610200 + (chid * 0x0010), 0x00001010, 0x00001000);
+	nv_mask(priv, 0x610200 + (chid * 0x0010), 0x00000003, 0x00000000);
+	if (!nv_wait(priv, 0x610200 + (chid * 0x10), 0x001e0000, 0x00000000)) {
+		nv_error(dmac, "fini timeout, 0x%08x\n",
+			 nv_rd32(priv, 0x610200 + (chid * 0x10)));
+		if (suspend)
+			return -EBUSY;
+	}
+
+	/* disable error reporting */
+	nv_mask(priv, 0x610028, 0x00010001 << chid, 0x00000000 << chid);
+
+	return nv50_disp_chan_fini(&dmac->base, suspend);
+}
+
+/*******************************************************************************
+ * EVO master channel object
+ ******************************************************************************/
+
+static int
+nv50_disp_mast_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_mast_class *args = data;
+	struct nv50_disp_dmac *mast;
+	int ret;
+
+	if (size < sizeof(*args))
+		return -EINVAL;
+
+	ret = nv50_disp_dmac_create_(parent, engine, oclass, args->pushbuf,
+				     0, sizeof(*mast), (void **)&mast);
+	*pobject = nv_object(mast);
+	if (ret)
+		return ret;
+
+	nv_parent(mast)->object_attach = nv50_disp_dmac_object_attach;
+	nv_parent(mast)->object_detach = nv50_disp_dmac_object_detach;
+	return 0;
+}
+
+static int
+nv50_disp_mast_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *mast = (void *)object;
+	int ret;
+
+	ret = nv50_disp_chan_init(&mast->base);
+	if (ret)
+		return ret;
+
+	/* enable error reporting */
+	nv_mask(priv, 0x610028, 0x00010001, 0x00010001);
+
+	/* attempt to unstick channel from some unknown state */
+	if ((nv_rd32(priv, 0x610200) & 0x009f0000) == 0x00020000)
+		nv_mask(priv, 0x610200, 0x00800000, 0x00800000);
+	if ((nv_rd32(priv, 0x610200) & 0x003f0000) == 0x00030000)
+		nv_mask(priv, 0x610200, 0x00600000, 0x00600000);
+
+	/* initialise channel for dma command submission */
+	nv_wr32(priv, 0x610204, mast->push);
+	nv_wr32(priv, 0x610208, 0x00010000);
+	nv_wr32(priv, 0x61020c, 0x00000000);
+	nv_mask(priv, 0x610200, 0x00000010, 0x00000010);
+	nv_wr32(priv, 0x640000, 0x00000000);
+	nv_wr32(priv, 0x610200, 0x01000013);
+
+	/* wait for it to go inactive */
+	if (!nv_wait(priv, 0x610200, 0x80000000, 0x00000000)) {
+		nv_error(mast, "init: 0x%08x\n", nv_rd32(priv, 0x610200));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nv50_disp_mast_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *mast = (void *)object;
+
+	/* deactivate channel */
+	nv_mask(priv, 0x610200, 0x00000010, 0x00000000);
+	nv_mask(priv, 0x610200, 0x00000003, 0x00000000);
+	if (!nv_wait(priv, 0x610200, 0x001e0000, 0x00000000)) {
+		nv_error(mast, "fini: 0x%08x\n", nv_rd32(priv, 0x610200));
+		if (suspend)
+			return -EBUSY;
+	}
+
+	/* disable error reporting */
+	nv_mask(priv, 0x610028, 0x00010001, 0x00000000);
+
+	return nv50_disp_chan_fini(&mast->base, suspend);
+}
+
+struct nouveau_ofuncs
+nv50_disp_mast_ofuncs = {
+	.ctor = nv50_disp_mast_ctor,
+	.dtor = nv50_disp_dmac_dtor,
+	.init = nv50_disp_mast_init,
+	.fini = nv50_disp_mast_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO sync channel objects
+ ******************************************************************************/
+
+static int
+nv50_disp_sync_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_sync_class *args = data;
+	struct nv50_disp_dmac *dmac;
+	int ret;
+
+	if (size < sizeof(*data) || args->head > 1)
+		return -EINVAL;
+
+	ret = nv50_disp_dmac_create_(parent, engine, oclass, args->pushbuf,
+				     1 + args->head, sizeof(*dmac),
+				     (void **)&dmac);
+	*pobject = nv_object(dmac);
+	if (ret)
+		return ret;
+
+	nv_parent(dmac)->object_attach = nv50_disp_dmac_object_attach;
+	nv_parent(dmac)->object_detach = nv50_disp_dmac_object_detach;
+	return 0;
+}
+
+struct nouveau_ofuncs
+nv50_disp_sync_ofuncs = {
+	.ctor = nv50_disp_sync_ctor,
+	.dtor = nv50_disp_dmac_dtor,
+	.init = nv50_disp_dmac_init,
+	.fini = nv50_disp_dmac_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO overlay channel objects
+ ******************************************************************************/
+
+static int
+nv50_disp_ovly_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_ovly_class *args = data;
+	struct nv50_disp_dmac *dmac;
+	int ret;
+
+	if (size < sizeof(*data) || args->head > 1)
+		return -EINVAL;
+
+	ret = nv50_disp_dmac_create_(parent, engine, oclass, args->pushbuf,
+				     3 + args->head, sizeof(*dmac),
+				     (void **)&dmac);
+	*pobject = nv_object(dmac);
+	if (ret)
+		return ret;
+
+	nv_parent(dmac)->object_attach = nv50_disp_dmac_object_attach;
+	nv_parent(dmac)->object_detach = nv50_disp_dmac_object_detach;
+	return 0;
+}
+
+struct nouveau_ofuncs
+nv50_disp_ovly_ofuncs = {
+	.ctor = nv50_disp_ovly_ctor,
+	.dtor = nv50_disp_dmac_dtor,
+	.init = nv50_disp_dmac_init,
+	.fini = nv50_disp_dmac_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO PIO channel base class
+ ******************************************************************************/
+
+static int
+nv50_disp_pioc_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass, int chid,
+		       int length, void **pobject)
+{
+	return nv50_disp_chan_create_(parent, engine, oclass, chid,
+				      length, pobject);
+}
+
+static void
+nv50_disp_pioc_dtor(struct nouveau_object *object)
+{
+	struct nv50_disp_pioc *pioc = (void *)object;
+	nv50_disp_chan_destroy(&pioc->base);
+}
+
+static int
+nv50_disp_pioc_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_pioc *pioc = (void *)object;
+	int chid = pioc->base.chid;
+	int ret;
+
+	ret = nv50_disp_chan_init(&pioc->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x610200 + (chid * 0x10), 0x00002000);
+	if (!nv_wait(priv, 0x610200 + (chid * 0x10), 0x00000000, 0x00000000)) {
+		nv_error(pioc, "timeout0: 0x%08x\n",
+			 nv_rd32(priv, 0x610200 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	nv_wr32(priv, 0x610200 + (chid * 0x10), 0x00000001);
+	if (!nv_wait(priv, 0x610200 + (chid * 0x10), 0x00030000, 0x00010000)) {
+		nv_error(pioc, "timeout1: 0x%08x\n",
+			 nv_rd32(priv, 0x610200 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nv50_disp_pioc_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_pioc *pioc = (void *)object;
+	int chid = pioc->base.chid;
+
+	nv_mask(priv, 0x610200 + (chid * 0x10), 0x00000001, 0x00000000);
+	if (!nv_wait(priv, 0x610200 + (chid * 0x10), 0x00030000, 0x00000000)) {
+		nv_error(pioc, "timeout: 0x%08x\n",
+			 nv_rd32(priv, 0x610200 + (chid * 0x10)));
+		if (suspend)
+			return -EBUSY;
+	}
+
+	return nv50_disp_chan_fini(&pioc->base, suspend);
+}
+
+/*******************************************************************************
+ * EVO immediate overlay channel objects
+ ******************************************************************************/
+
+static int
+nv50_disp_oimm_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_oimm_class *args = data;
+	struct nv50_disp_pioc *pioc;
+	int ret;
+
+	if (size < sizeof(*args) || args->head > 1)
+		return -EINVAL;
+
+	ret = nv50_disp_pioc_create_(parent, engine, oclass, 5 + args->head,
+				     sizeof(*pioc), (void **)&pioc);
+	*pobject = nv_object(pioc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_ofuncs
+nv50_disp_oimm_ofuncs = {
+	.ctor = nv50_disp_oimm_ctor,
+	.dtor = nv50_disp_pioc_dtor,
+	.init = nv50_disp_pioc_init,
+	.fini = nv50_disp_pioc_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO cursor channel objects
+ ******************************************************************************/
+
+static int
+nv50_disp_curs_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_curs_class *args = data;
+	struct nv50_disp_pioc *pioc;
+	int ret;
+
+	if (size < sizeof(*args) || args->head > 1)
+		return -EINVAL;
+
+	ret = nv50_disp_pioc_create_(parent, engine, oclass, 7 + args->head,
+				     sizeof(*pioc), (void **)&pioc);
+	*pobject = nv_object(pioc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_ofuncs
+nv50_disp_curs_ofuncs = {
+	.ctor = nv50_disp_curs_ctor,
+	.dtor = nv50_disp_pioc_dtor,
+	.init = nv50_disp_pioc_init,
+	.fini = nv50_disp_pioc_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * Base display object
+ ******************************************************************************/
+
+static int
+nv50_disp_base_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv = (void *)engine;
+	struct nv50_disp_base *base;
+	int ret;
+
+	ret = nouveau_parent_create(parent, engine, oclass, 0,
+				    priv->sclass, 0, &base);
+	*pobject = nv_object(base);
+	if (ret)
+		return ret;
+
+	return nouveau_ramht_new(parent, parent, 0x1000, 0, &base->ramht);
+}
+
+static void
+nv50_disp_base_dtor(struct nouveau_object *object)
+{
+	struct nv50_disp_base *base = (void *)object;
+	nouveau_ramht_ref(NULL, &base->ramht);
+	nouveau_parent_destroy(&base->base);
+}
+
+static int
+nv50_disp_base_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_base *base = (void *)object;
+	int ret, i;
+	u32 tmp;
+
+	ret = nouveau_parent_init(&base->base);
+	if (ret)
+		return ret;
+
+	/* The below segments of code copying values from one register to
+	 * another appear to inform EVO of the display capabilities or
+	 * something similar.  NFI what the 0x614004 caps are for..
+	 */
+	tmp = nv_rd32(priv, 0x614004);
+	nv_wr32(priv, 0x610184, tmp);
+
+	/* ... CRTC caps */
+	for (i = 0; i < priv->head.nr; i++) {
+		tmp = nv_rd32(priv, 0x616100 + (i * 0x800));
+		nv_wr32(priv, 0x610190 + (i * 0x10), tmp);
+		tmp = nv_rd32(priv, 0x616104 + (i * 0x800));
+		nv_wr32(priv, 0x610194 + (i * 0x10), tmp);
+		tmp = nv_rd32(priv, 0x616108 + (i * 0x800));
+		nv_wr32(priv, 0x610198 + (i * 0x10), tmp);
+		tmp = nv_rd32(priv, 0x61610c + (i * 0x800));
+		nv_wr32(priv, 0x61019c + (i * 0x10), tmp);
+	}
+
+	/* ... DAC caps */
+	for (i = 0; i < priv->dac.nr; i++) {
+		tmp = nv_rd32(priv, 0x61a000 + (i * 0x800));
+		nv_wr32(priv, 0x6101d0 + (i * 0x04), tmp);
+	}
+
+	/* ... SOR caps */
+	for (i = 0; i < priv->sor.nr; i++) {
+		tmp = nv_rd32(priv, 0x61c000 + (i * 0x800));
+		nv_wr32(priv, 0x6101e0 + (i * 0x04), tmp);
+	}
+
+	/* ... EXT caps */
+	for (i = 0; i < 3; i++) {
+		tmp = nv_rd32(priv, 0x61e000 + (i * 0x800));
+		nv_wr32(priv, 0x6101f0 + (i * 0x04), tmp);
+	}
+
+	/* steal display away from vbios, or something like that */
+	if (nv_rd32(priv, 0x610024) & 0x00000100) {
+		nv_wr32(priv, 0x610024, 0x00000100);
+		nv_mask(priv, 0x6194e8, 0x00000001, 0x00000000);
+		if (!nv_wait(priv, 0x6194e8, 0x00000002, 0x00000000)) {
+			nv_error(priv, "timeout acquiring display\n");
+			return -EBUSY;
+		}
+	}
+
+	/* point at display engine memory area (hash table, objects) */
+	nv_wr32(priv, 0x610010, (nv_gpuobj(base->ramht)->addr >> 8) | 9);
+
+	/* enable supervisor interrupts, disable everything else */
+	nv_wr32(priv, 0x61002c, 0x00000370);
+	nv_wr32(priv, 0x610028, 0x00000000);
+	return 0;
+}
+
+static int
+nv50_disp_base_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_base *base = (void *)object;
+
+	/* disable all interrupts */
+	nv_wr32(priv, 0x610024, 0x00000000);
+	nv_wr32(priv, 0x610020, 0x00000000);
+
+	return nouveau_parent_fini(&base->base, suspend);
+}
+
+struct nouveau_ofuncs
+nv50_disp_base_ofuncs = {
+	.ctor = nv50_disp_base_ctor,
+	.dtor = nv50_disp_base_dtor,
+	.init = nv50_disp_base_init,
+	.fini = nv50_disp_base_fini,
+};
+
+static struct nouveau_omthds
+nv50_disp_base_omthds[] = {
+	{ SOR_MTHD(NV50_DISP_SOR_PWR)         , nv50_sor_mthd },
+	{ SOR_MTHD(NV50_DISP_SOR_LVDS_SCRIPT) , nv50_sor_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_PWR)         , nv50_dac_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_LOAD)        , nv50_dac_mthd },
+	{},
+};
+
+static struct nouveau_oclass
+nv50_disp_base_oclass[] = {
+	{ NV50_DISP_CLASS, &nv50_disp_base_ofuncs, nv50_disp_base_omthds },
+	{}
 };
 
 static struct nouveau_oclass
 nv50_disp_sclass[] = {
-	{},
+	{ NV50_DISP_MAST_CLASS, &nv50_disp_mast_ofuncs },
+	{ NV50_DISP_SYNC_CLASS, &nv50_disp_sync_ofuncs },
+	{ NV50_DISP_OVLY_CLASS, &nv50_disp_ovly_ofuncs },
+	{ NV50_DISP_OIMM_CLASS, &nv50_disp_oimm_ofuncs },
+	{ NV50_DISP_CURS_CLASS, &nv50_disp_curs_ofuncs },
+	{}
 };
 
+/*******************************************************************************
+ * Display context, tracks instmem allocation and prevents more than one
+ * client using the display hardware at any time.
+ ******************************************************************************/
+
+static int
+nv50_disp_data_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv = (void *)engine;
+	struct nouveau_engctx *ectx;
+	int ret = -EBUSY;
+
+	/* no context needed for channel objects... */
+	if (nv_mclass(parent) != NV_DEVICE_CLASS) {
+		atomic_inc(&parent->refcount);
+		*pobject = parent;
+		return 0;
+	}
+
+	/* allocate display hardware to client */
+	mutex_lock(&nv_subdev(priv)->mutex);
+	if (list_empty(&nv_engine(priv)->contexts)) {
+		ret = nouveau_engctx_create(parent, engine, oclass, NULL,
+					    0x10000, 0x10000,
+					    NVOBJ_FLAG_HEAP, &ectx);
+		*pobject = nv_object(ectx);
+	}
+	mutex_unlock(&nv_subdev(priv)->mutex);
+	return ret;
+}
+
+struct nouveau_oclass
+nv50_disp_cclass = {
+	.handle = NV_ENGCTX(DISP, 0x50),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv50_disp_data_ctor,
+		.dtor = _nouveau_engctx_dtor,
+		.init = _nouveau_engctx_init,
+		.fini = _nouveau_engctx_fini,
+		.rd32 = _nouveau_engctx_rd32,
+		.wr32 = _nouveau_engctx_wr32,
+	},
+};
+
+/*******************************************************************************
+ * Display engine implementation
+ ******************************************************************************/
+
+static void
+nv50_disp_intr_error(struct nv50_disp_priv *priv)
+{
+	u32 channels = (nv_rd32(priv, 0x610020) & 0x001f0000) >> 16;
+	u32 addr, data;
+	int chid;
+
+	for (chid = 0; chid < 5; chid++) {
+		if (!(channels & (1 << chid)))
+			continue;
+
+		nv_wr32(priv, 0x610020, 0x00010000 << chid);
+		addr = nv_rd32(priv, 0x610080 + (chid * 0x08));
+		data = nv_rd32(priv, 0x610084 + (chid * 0x08));
+		nv_wr32(priv, 0x610080 + (chid * 0x08), 0x90000000);
+
+		nv_error(priv, "chid %d mthd 0x%04x data 0x%08x 0x%08x\n",
+			 chid, addr & 0xffc, data, addr);
+	}
+}
+
 static void
 nv50_disp_intr_vblank(struct nv50_disp_priv *priv, int crtc)
 {
@@ -80,30 +800,422 @@
 		disp->vblank.notify(disp->vblank.data, crtc);
 }
 
+static u16
+exec_lookup(struct nv50_disp_priv *priv, int head, int outp, u32 ctrl,
+	    struct dcb_output *dcb, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+	    struct nvbios_outp *info)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	u16 mask, type, data;
+
+	if (outp < 4) {
+		type = DCB_OUTPUT_ANALOG;
+		mask = 0;
+	} else {
+		outp -= 4;
+		switch (ctrl & 0x00000f00) {
+		case 0x00000000: type = DCB_OUTPUT_LVDS; mask = 1; break;
+		case 0x00000100: type = DCB_OUTPUT_TMDS; mask = 1; break;
+		case 0x00000200: type = DCB_OUTPUT_TMDS; mask = 2; break;
+		case 0x00000500: type = DCB_OUTPUT_TMDS; mask = 3; break;
+		case 0x00000800: type = DCB_OUTPUT_DP; mask = 1; break;
+		case 0x00000900: type = DCB_OUTPUT_DP; mask = 2; break;
+		default:
+			nv_error(priv, "unknown SOR mc 0x%08x\n", ctrl);
+			return 0x0000;
+		}
+	}
+
+	mask  = 0x00c0 & (mask << 6);
+	mask |= 0x0001 << outp;
+	mask |= 0x0100 << head;
+
+	data = dcb_outp_match(bios, type, mask, ver, hdr, dcb);
+	if (!data)
+		return 0x0000;
+
+	return nvbios_outp_match(bios, type, mask, ver, hdr, cnt, len, info);
+}
+
+static bool
+exec_script(struct nv50_disp_priv *priv, int head, int id)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_outp info;
+	struct dcb_output dcb;
+	u8  ver, hdr, cnt, len;
+	u16 data;
+	u32 ctrl = 0x00000000;
+	int i;
+
+	for (i = 0; !(ctrl & (1 << head)) && i < 3; i++)
+		ctrl = nv_rd32(priv, 0x610b5c + (i * 8));
+
+	if (nv_device(priv)->chipset  < 0x90 ||
+	    nv_device(priv)->chipset == 0x92 ||
+	    nv_device(priv)->chipset == 0xa0) {
+		for (i = 0; !(ctrl & (1 << head)) && i < 2; i++)
+			ctrl = nv_rd32(priv, 0x610b74 + (i * 8));
+		i += 3;
+	} else {
+		for (i = 0; !(ctrl & (1 << head)) && i < 4; i++)
+			ctrl = nv_rd32(priv, 0x610798 + (i * 8));
+		i += 3;
+	}
+
+	if (!(ctrl & (1 << head)))
+		return false;
+
+	data = exec_lookup(priv, head, i, ctrl, &dcb, &ver, &hdr, &cnt, &len, &info);
+	if (data) {
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = bios,
+			.offset = info.script[id],
+			.outp = &dcb,
+			.crtc = head,
+			.execute = 1,
+		};
+
+		return nvbios_exec(&init) == 0;
+	}
+
+	return false;
+}
+
+static u32
+exec_clkcmp(struct nv50_disp_priv *priv, int head, int id, u32 pclk,
+	    struct dcb_output *outp)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_outp info1;
+	struct nvbios_ocfg info2;
+	u8  ver, hdr, cnt, len;
+	u16 data, conf;
+	u32 ctrl = 0x00000000;
+	int i;
+
+	for (i = 0; !(ctrl & (1 << head)) && i < 3; i++)
+		ctrl = nv_rd32(priv, 0x610b58 + (i * 8));
+
+	if (nv_device(priv)->chipset  < 0x90 ||
+	    nv_device(priv)->chipset == 0x92 ||
+	    nv_device(priv)->chipset == 0xa0) {
+		for (i = 0; !(ctrl & (1 << head)) && i < 2; i++)
+			ctrl = nv_rd32(priv, 0x610b70 + (i * 8));
+		i += 3;
+	} else {
+		for (i = 0; !(ctrl & (1 << head)) && i < 4; i++)
+			ctrl = nv_rd32(priv, 0x610794 + (i * 8));
+		i += 3;
+	}
+
+	if (!(ctrl & (1 << head)))
+		return 0x0000;
+
+	data = exec_lookup(priv, head, i, ctrl, outp, &ver, &hdr, &cnt, &len, &info1);
+	if (!data)
+		return 0x0000;
+
+	switch (outp->type) {
+	case DCB_OUTPUT_TMDS:
+		conf = (ctrl & 0x00000f00) >> 8;
+		if (pclk >= 165000)
+			conf |= 0x0100;
+		break;
+	case DCB_OUTPUT_LVDS:
+		conf = priv->sor.lvdsconf;
+		break;
+	case DCB_OUTPUT_DP:
+		conf = (ctrl & 0x00000f00) >> 8;
+		break;
+	case DCB_OUTPUT_ANALOG:
+	default:
+		conf = 0x00ff;
+		break;
+	}
+
+	data = nvbios_ocfg_match(bios, data, conf, &ver, &hdr, &cnt, &len, &info2);
+	if (data) {
+		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
+		if (data) {
+			struct nvbios_init init = {
+				.subdev = nv_subdev(priv),
+				.bios = bios,
+				.offset = data,
+				.outp = outp,
+				.crtc = head,
+				.execute = 1,
+			};
+
+			if (nvbios_exec(&init))
+				return 0x0000;
+			return conf;
+		}
+	}
+
+	return 0x0000;
+}
+
 static void
+nv50_disp_intr_unk10(struct nv50_disp_priv *priv, u32 super)
+{
+	int head = ffs((super & 0x00000060) >> 5) - 1;
+	if (head >= 0) {
+		head = ffs((super & 0x00000180) >> 7) - 1;
+		if (head >= 0)
+			exec_script(priv, head, 1);
+	}
+
+	nv_wr32(priv, 0x610024, 0x00000010);
+	nv_wr32(priv, 0x610030, 0x80000000);
+}
+
+static void
+nv50_disp_intr_unk20_dp(struct nv50_disp_priv *priv,
+		        struct dcb_output *outp, u32 pclk)
+{
+	const int link = !(outp->sorconf.link & 1);
+	const int   or = ffs(outp->or) - 1;
+	const u32 soff = (  or * 0x800);
+	const u32 loff = (link * 0x080) + soff;
+	const u32 ctrl = nv_rd32(priv, 0x610794 + (or * 8));
+	const u32 symbol = 100000;
+	u32 dpctrl = nv_rd32(priv, 0x61c10c + loff) & 0x0000f0000;
+	u32 clksor = nv_rd32(priv, 0x614300 + soff);
+	int bestTU = 0, bestVTUi = 0, bestVTUf = 0, bestVTUa = 0;
+	int TU, VTUi, VTUf, VTUa;
+	u64 link_data_rate, link_ratio, unk;
+	u32 best_diff = 64 * symbol;
+	u32 link_nr, link_bw, bits, r;
+
+	/* calculate packed data rate for each lane */
+	if      (dpctrl > 0x00030000) link_nr = 4;
+	else if (dpctrl > 0x00010000) link_nr = 2;
+	else			      link_nr = 1;
+
+	if (clksor & 0x000c0000)
+		link_bw = 270000;
+	else
+		link_bw = 162000;
+
+	if      ((ctrl & 0xf0000) == 0x60000) bits = 30;
+	else if ((ctrl & 0xf0000) == 0x50000) bits = 24;
+	else                                  bits = 18;
+
+	link_data_rate = (pclk * bits / 8) / link_nr;
+
+	/* calculate ratio of packed data rate to link symbol rate */
+	link_ratio = link_data_rate * symbol;
+	r = do_div(link_ratio, link_bw);
+
+	for (TU = 64; TU >= 32; TU--) {
+		/* calculate average number of valid symbols in each TU */
+		u32 tu_valid = link_ratio * TU;
+		u32 calc, diff;
+
+		/* find a hw representation for the fraction.. */
+		VTUi = tu_valid / symbol;
+		calc = VTUi * symbol;
+		diff = tu_valid - calc;
+		if (diff) {
+			if (diff >= (symbol / 2)) {
+				VTUf = symbol / (symbol - diff);
+				if (symbol - (VTUf * diff))
+					VTUf++;
+
+				if (VTUf <= 15) {
+					VTUa  = 1;
+					calc += symbol - (symbol / VTUf);
+				} else {
+					VTUa  = 0;
+					VTUf  = 1;
+					calc += symbol;
+				}
+			} else {
+				VTUa  = 0;
+				VTUf  = min((int)(symbol / diff), 15);
+				calc += symbol / VTUf;
+			}
+
+			diff = calc - tu_valid;
+		} else {
+			/* no remainder, but the hw doesn't like the fractional
+			 * part to be zero.  decrement the integer part and
+			 * have the fraction add a whole symbol back
+			 */
+			VTUa = 0;
+			VTUf = 1;
+			VTUi--;
+		}
+
+		if (diff < best_diff) {
+			best_diff = diff;
+			bestTU = TU;
+			bestVTUa = VTUa;
+			bestVTUf = VTUf;
+			bestVTUi = VTUi;
+			if (diff == 0)
+				break;
+		}
+	}
+
+	if (!bestTU) {
+		nv_error(priv, "unable to find suitable dp config\n");
+		return;
+	}
+
+	/* XXX close to vbios numbers, but not right */
+	unk  = (symbol - link_ratio) * bestTU;
+	unk *= link_ratio;
+	r = do_div(unk, symbol);
+	r = do_div(unk, symbol);
+	unk += 6;
+
+	nv_mask(priv, 0x61c10c + loff, 0x000001fc, bestTU << 2);
+	nv_mask(priv, 0x61c128 + loff, 0x010f7f3f, bestVTUa << 24 |
+						   bestVTUf << 16 |
+						   bestVTUi << 8 | unk);
+}
+
+static void
+nv50_disp_intr_unk20(struct nv50_disp_priv *priv, u32 super)
+{
+	struct dcb_output outp;
+	u32 addr, mask, data;
+	int head;
+
+	/* finish detaching encoder? */
+	head = ffs((super & 0x00000180) >> 7) - 1;
+	if (head >= 0)
+		exec_script(priv, head, 2);
+
+	/* check whether a vpll change is required */
+	head = ffs((super & 0x00000600) >> 9) - 1;
+	if (head >= 0) {
+		u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff;
+		if (pclk) {
+			struct nouveau_clock *clk = nouveau_clock(priv);
+			clk->pll_set(clk, PLL_VPLL0 + head, pclk);
+		}
+
+		nv_mask(priv, 0x614200 + head * 0x800, 0x0000000f, 0x00000000);
+	}
+
+	/* (re)attach the relevant OR to the head */
+	head = ffs((super & 0x00000180) >> 7) - 1;
+	if (head >= 0) {
+		u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff;
+		u32 conf = exec_clkcmp(priv, head, 0, pclk, &outp);
+		if (conf) {
+			if (outp.type == DCB_OUTPUT_ANALOG) {
+				addr = 0x614280 + (ffs(outp.or) - 1) * 0x800;
+				mask = 0xffffffff;
+				data = 0x00000000;
+			} else {
+				if (outp.type == DCB_OUTPUT_DP)
+					nv50_disp_intr_unk20_dp(priv, &outp, pclk);
+				addr = 0x614300 + (ffs(outp.or) - 1) * 0x800;
+				mask = 0x00000707;
+				data = (conf & 0x0100) ? 0x0101 : 0x0000;
+			}
+
+			nv_mask(priv, addr, mask, data);
+		}
+	}
+
+	nv_wr32(priv, 0x610024, 0x00000020);
+	nv_wr32(priv, 0x610030, 0x80000000);
+}
+
+/* If programming a TMDS output on a SOR that can also be configured for
+ * DisplayPort, make sure NV50_SOR_DP_CTRL_ENABLE is forced off.
+ *
+ * It looks like the VBIOS TMDS scripts make an attempt at this, however,
+ * the VBIOS scripts on at least one board I have only switch it off on
+ * link 0, causing a blank display if the output has previously been
+ * programmed for DisplayPort.
+ */
+static void
+nv50_disp_intr_unk40_tmds(struct nv50_disp_priv *priv, struct dcb_output *outp)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const int link = !(outp->sorconf.link & 1);
+	const int   or = ffs(outp->or) - 1;
+	const u32 loff = (or * 0x800) + (link * 0x80);
+	const u16 mask = (outp->sorconf.link << 6) | outp->or;
+	u8  ver, hdr;
+
+	if (dcb_outp_match(bios, DCB_OUTPUT_DP, mask, &ver, &hdr, outp))
+		nv_mask(priv, 0x61c10c + loff, 0x00000001, 0x00000000);
+}
+
+static void
+nv50_disp_intr_unk40(struct nv50_disp_priv *priv, u32 super)
+{
+	int head = ffs((super & 0x00000180) >> 7) - 1;
+	if (head >= 0) {
+		struct dcb_output outp;
+		u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff;
+		if (pclk && exec_clkcmp(priv, head, 1, pclk, &outp)) {
+			if (outp.type == DCB_OUTPUT_TMDS)
+				nv50_disp_intr_unk40_tmds(priv, &outp);
+		}
+	}
+
+	nv_wr32(priv, 0x610024, 0x00000040);
+	nv_wr32(priv, 0x610030, 0x80000000);
+}
+
+static void
+nv50_disp_intr_super(struct nv50_disp_priv *priv, u32 intr1)
+{
+	u32 super = nv_rd32(priv, 0x610030);
+
+	nv_debug(priv, "supervisor 0x%08x 0x%08x\n", intr1, super);
+
+	if (intr1 & 0x00000010)
+		nv50_disp_intr_unk10(priv, super);
+	if (intr1 & 0x00000020)
+		nv50_disp_intr_unk20(priv, super);
+	if (intr1 & 0x00000040)
+		nv50_disp_intr_unk40(priv, super);
+}
+
+void
 nv50_disp_intr(struct nouveau_subdev *subdev)
 {
 	struct nv50_disp_priv *priv = (void *)subdev;
-	u32 stat1 = nv_rd32(priv, 0x610024);
+	u32 intr0 = nv_rd32(priv, 0x610020);
+	u32 intr1 = nv_rd32(priv, 0x610024);
 
-	if (stat1 & 0x00000004) {
+	if (intr0 & 0x001f0000) {
+		nv50_disp_intr_error(priv);
+		intr0 &= ~0x001f0000;
+	}
+
+	if (intr1 & 0x00000004) {
 		nv50_disp_intr_vblank(priv, 0);
 		nv_wr32(priv, 0x610024, 0x00000004);
-		stat1 &= ~0x00000004;
+		intr1 &= ~0x00000004;
 	}
 
-	if (stat1 & 0x00000008) {
+	if (intr1 & 0x00000008) {
 		nv50_disp_intr_vblank(priv, 1);
 		nv_wr32(priv, 0x610024, 0x00000008);
-		stat1 &= ~0x00000008;
+		intr1 &= ~0x00000008;
 	}
 
+	if (intr1 & 0x00000070) {
+		nv50_disp_intr_super(priv, intr1);
+		intr1 &= ~0x00000070;
+	}
 }
 
 static int
 nv50_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-		  struct nouveau_oclass *oclass, void *data, u32 size,
-		  struct nouveau_object **pobject)
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
 {
 	struct nv50_disp_priv *priv;
 	int ret;
@@ -114,8 +1226,16 @@
 	if (ret)
 		return ret;
 
-	nv_engine(priv)->sclass = nv50_disp_sclass;
+	nv_engine(priv)->sclass = nv50_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nv50_disp_intr;
+	priv->sclass = nv50_disp_sclass;
+	priv->head.nr = 2;
+	priv->dac.nr = 3;
+	priv->sor.nr = 2;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
 
 	INIT_LIST_HEAD(&priv->base.vblank.list);
 	spin_lock_init(&priv->base.vblank.lock);

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h
new file mode 100644
index 0000000..a6bb9314
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h

@@ -0,0 +1,142 @@
+#ifndef __NV50_DISP_H__
+#define __NV50_DISP_H__
+
+#include <core/parent.h>
+#include <core/namedb.h>
+#include <core/ramht.h>
+
+#include <engine/dmaobj.h>
+#include <engine/disp.h>
+
+struct dcb_output;
+
+struct nv50_disp_priv {
+	struct nouveau_disp base;
+	struct nouveau_oclass *sclass;
+	struct {
+		int nr;
+	} head;
+	struct {
+		int nr;
+		int (*power)(struct nv50_disp_priv *, int dac, u32 data);
+		int (*sense)(struct nv50_disp_priv *, int dac, u32 load);
+	} dac;
+	struct {
+		int nr;
+		int (*power)(struct nv50_disp_priv *, int sor, u32 data);
+		int (*hda_eld)(struct nv50_disp_priv *, int sor, u8 *, u32);
+		int (*hdmi)(struct nv50_disp_priv *, int head, int sor, u32);
+		int (*dp_train_init)(struct nv50_disp_priv *, int sor, int link,
+				     int head, u16 type, u16 mask, u32 data,
+				     struct dcb_output *);
+		int (*dp_train_fini)(struct nv50_disp_priv *, int sor, int link,
+				     int head, u16 type, u16 mask, u32 data,
+				     struct dcb_output *);
+		int (*dp_train)(struct nv50_disp_priv *, int sor, int link,
+				u16 type, u16 mask, u32 data,
+				struct dcb_output *);
+		int (*dp_lnkctl)(struct nv50_disp_priv *, int sor, int link,
+				 int head, u16 type, u16 mask, u32 data,
+				 struct dcb_output *);
+		int (*dp_drvctl)(struct nv50_disp_priv *, int sor, int link,
+				 int lane, u16 type, u16 mask, u32 data,
+				 struct dcb_output *);
+		u32 lvdsconf;
+	} sor;
+};
+
+#define DAC_MTHD(n) (n), (n) + 0x03
+
+int nv50_dac_mthd(struct nouveau_object *, u32, void *, u32);
+int nv50_dac_power(struct nv50_disp_priv *, int, u32);
+int nv50_dac_sense(struct nv50_disp_priv *, int, u32);
+
+#define SOR_MTHD(n) (n), (n) + 0x3f
+
+int nva3_hda_eld(struct nv50_disp_priv *, int, u8 *, u32);
+int nvd0_hda_eld(struct nv50_disp_priv *, int, u8 *, u32);
+
+int nv84_hdmi_ctrl(struct nv50_disp_priv *, int, int, u32);
+int nva3_hdmi_ctrl(struct nv50_disp_priv *, int, int, u32);
+int nvd0_hdmi_ctrl(struct nv50_disp_priv *, int, int, u32);
+
+int nv50_sor_mthd(struct nouveau_object *, u32, void *, u32);
+int nv50_sor_power(struct nv50_disp_priv *, int, u32);
+
+int nv94_sor_dp_train_init(struct nv50_disp_priv *, int, int, int, u16, u16,
+		           u32, struct dcb_output *);
+int nv94_sor_dp_train_fini(struct nv50_disp_priv *, int, int, int, u16, u16,
+		           u32, struct dcb_output *);
+int nv94_sor_dp_train(struct nv50_disp_priv *, int, int, u16, u16, u32,
+		      struct dcb_output *);
+int nv94_sor_dp_lnkctl(struct nv50_disp_priv *, int, int, int, u16, u16, u32,
+		       struct dcb_output *);
+int nv94_sor_dp_drvctl(struct nv50_disp_priv *, int, int, int, u16, u16, u32,
+		       struct dcb_output *);
+
+int nvd0_sor_dp_train(struct nv50_disp_priv *, int, int, u16, u16, u32,
+		      struct dcb_output *);
+int nvd0_sor_dp_lnkctl(struct nv50_disp_priv *, int, int, int, u16, u16, u32,
+		       struct dcb_output *);
+int nvd0_sor_dp_drvctl(struct nv50_disp_priv *, int, int, int, u16, u16, u32,
+		       struct dcb_output *);
+
+struct nv50_disp_base {
+	struct nouveau_parent base;
+	struct nouveau_ramht *ramht;
+	u32 chan;
+};
+
+struct nv50_disp_chan {
+	struct nouveau_namedb base;
+	int chid;
+};
+
+int  nv50_disp_chan_create_(struct nouveau_object *, struct nouveau_object *,
+			    struct nouveau_oclass *, int, int, void **);
+void nv50_disp_chan_destroy(struct nv50_disp_chan *);
+u32  nv50_disp_chan_rd32(struct nouveau_object *, u64);
+void nv50_disp_chan_wr32(struct nouveau_object *, u64, u32);
+
+#define nv50_disp_chan_init(a)                                                 \
+	nouveau_namedb_init(&(a)->base)
+#define nv50_disp_chan_fini(a,b)                                               \
+	nouveau_namedb_fini(&(a)->base, (b))
+
+int  nv50_disp_dmac_create_(struct nouveau_object *, struct nouveau_object *,
+			    struct nouveau_oclass *, u32, int, int, void **);
+void nv50_disp_dmac_dtor(struct nouveau_object *);
+
+struct nv50_disp_dmac {
+	struct nv50_disp_chan base;
+	struct nouveau_dmaobj *pushdma;
+	u32 push;
+};
+
+struct nv50_disp_pioc {
+	struct nv50_disp_chan base;
+};
+
+extern struct nouveau_ofuncs nv50_disp_mast_ofuncs;
+extern struct nouveau_ofuncs nv50_disp_sync_ofuncs;
+extern struct nouveau_ofuncs nv50_disp_ovly_ofuncs;
+extern struct nouveau_ofuncs nv50_disp_oimm_ofuncs;
+extern struct nouveau_ofuncs nv50_disp_curs_ofuncs;
+extern struct nouveau_ofuncs nv50_disp_base_ofuncs;
+extern struct nouveau_oclass nv50_disp_cclass;
+void nv50_disp_intr(struct nouveau_subdev *);
+
+extern struct nouveau_omthds nv84_disp_base_omthds[];
+
+extern struct nouveau_omthds nva3_disp_base_omthds[];
+
+extern struct nouveau_ofuncs nvd0_disp_mast_ofuncs;
+extern struct nouveau_ofuncs nvd0_disp_sync_ofuncs;
+extern struct nouveau_ofuncs nvd0_disp_ovly_ofuncs;
+extern struct nouveau_ofuncs nvd0_disp_oimm_ofuncs;
+extern struct nouveau_ofuncs nvd0_disp_curs_ofuncs;
+extern struct nouveau_ofuncs nvd0_disp_base_ofuncs;
+extern struct nouveau_oclass nvd0_disp_cclass;
+void nvd0_disp_intr(struct nouveau_subdev *);
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c
new file mode 100644
index 0000000..fc84eac
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c

@@ -0,0 +1,98 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <engine/software.h>
+#include <engine/disp.h>
+
+#include <core/class.h>
+
+#include "nv50.h"
+
+static struct nouveau_oclass
+nv84_disp_sclass[] = {
+	{ NV84_DISP_MAST_CLASS, &nv50_disp_mast_ofuncs },
+	{ NV84_DISP_SYNC_CLASS, &nv50_disp_sync_ofuncs },
+	{ NV84_DISP_OVLY_CLASS, &nv50_disp_ovly_ofuncs },
+	{ NV84_DISP_OIMM_CLASS, &nv50_disp_oimm_ofuncs },
+	{ NV84_DISP_CURS_CLASS, &nv50_disp_curs_ofuncs },
+	{}
+};
+
+struct nouveau_omthds
+nv84_disp_base_omthds[] = {
+	{ SOR_MTHD(NV50_DISP_SOR_PWR)         , nv50_sor_mthd },
+	{ SOR_MTHD(NV84_DISP_SOR_HDMI_PWR)    , nv50_sor_mthd },
+	{ SOR_MTHD(NV50_DISP_SOR_LVDS_SCRIPT) , nv50_sor_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_PWR)         , nv50_dac_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_LOAD)        , nv50_dac_mthd },
+	{},
+};
+
+static struct nouveau_oclass
+nv84_disp_base_oclass[] = {
+	{ NV84_DISP_CLASS, &nv50_disp_base_ofuncs, nv84_disp_base_omthds },
+	{}
+};
+
+static int
+nv84_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv;
+	int ret;
+
+	ret = nouveau_disp_create(parent, engine, oclass, "PDISP",
+				  "display", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_engine(priv)->sclass = nv84_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
+	nv_subdev(priv)->intr = nv50_disp_intr;
+	priv->sclass = nv84_disp_sclass;
+	priv->head.nr = 2;
+	priv->dac.nr = 3;
+	priv->sor.nr = 2;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
+	priv->sor.hdmi = nv84_hdmi_ctrl;
+
+	INIT_LIST_HEAD(&priv->base.vblank.list);
+	spin_lock_init(&priv->base.vblank.lock);
+	return 0;
+}
+
+struct nouveau_oclass
+nv84_disp_oclass = {
+	.handle = NV_ENGINE(DISP, 0x82),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv84_disp_ctor,
+		.dtor = _nouveau_disp_dtor,
+		.init = _nouveau_disp_init,
+		.fini = _nouveau_disp_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c
new file mode 100644
index 0000000..ba9dfd4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c

@@ -0,0 +1,109 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <engine/software.h>
+#include <engine/disp.h>
+
+#include <core/class.h>
+
+#include "nv50.h"
+
+static struct nouveau_oclass
+nv94_disp_sclass[] = {
+	{ NV94_DISP_MAST_CLASS, &nv50_disp_mast_ofuncs },
+	{ NV94_DISP_SYNC_CLASS, &nv50_disp_sync_ofuncs },
+	{ NV94_DISP_OVLY_CLASS, &nv50_disp_ovly_ofuncs },
+	{ NV94_DISP_OIMM_CLASS, &nv50_disp_oimm_ofuncs },
+	{ NV94_DISP_CURS_CLASS, &nv50_disp_curs_ofuncs },
+	{}
+};
+
+static struct nouveau_omthds
+nv94_disp_base_omthds[] = {
+	{ SOR_MTHD(NV50_DISP_SOR_PWR)         , nv50_sor_mthd },
+	{ SOR_MTHD(NV84_DISP_SOR_HDMI_PWR)    , nv50_sor_mthd },
+	{ SOR_MTHD(NV50_DISP_SOR_LVDS_SCRIPT) , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_TRAIN)    , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_LNKCTL)   , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(0)), nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(1)), nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(2)), nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(3)), nv50_sor_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_PWR)         , nv50_dac_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_LOAD)        , nv50_dac_mthd },
+	{},
+};
+
+static struct nouveau_oclass
+nv94_disp_base_oclass[] = {
+	{ NV94_DISP_CLASS, &nv50_disp_base_ofuncs, nv94_disp_base_omthds },
+	{}
+};
+
+static int
+nv94_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv;
+	int ret;
+
+	ret = nouveau_disp_create(parent, engine, oclass, "PDISP",
+				  "display", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_engine(priv)->sclass = nv94_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
+	nv_subdev(priv)->intr = nv50_disp_intr;
+	priv->sclass = nv94_disp_sclass;
+	priv->head.nr = 2;
+	priv->dac.nr = 3;
+	priv->sor.nr = 4;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
+	priv->sor.hdmi = nv84_hdmi_ctrl;
+	priv->sor.dp_train = nv94_sor_dp_train;
+	priv->sor.dp_train_init = nv94_sor_dp_train_init;
+	priv->sor.dp_train_fini = nv94_sor_dp_train_fini;
+	priv->sor.dp_lnkctl = nv94_sor_dp_lnkctl;
+	priv->sor.dp_drvctl = nv94_sor_dp_drvctl;
+
+	INIT_LIST_HEAD(&priv->base.vblank.list);
+	spin_lock_init(&priv->base.vblank.lock);
+	return 0;
+}
+
+struct nouveau_oclass
+nv94_disp_oclass = {
+	.handle = NV_ENGINE(DISP, 0x88),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv94_disp_ctor,
+		.dtor = _nouveau_disp_dtor,
+		.init = _nouveau_disp_init,
+		.fini = _nouveau_disp_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c
new file mode 100644
index 0000000..5d63902
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c

@@ -0,0 +1,88 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <engine/software.h>
+#include <engine/disp.h>
+
+#include <core/class.h>
+
+#include "nv50.h"
+
+static struct nouveau_oclass
+nva0_disp_sclass[] = {
+	{ NVA0_DISP_MAST_CLASS, &nv50_disp_mast_ofuncs },
+	{ NVA0_DISP_SYNC_CLASS, &nv50_disp_sync_ofuncs },
+	{ NVA0_DISP_OVLY_CLASS, &nv50_disp_ovly_ofuncs },
+	{ NVA0_DISP_OIMM_CLASS, &nv50_disp_oimm_ofuncs },
+	{ NVA0_DISP_CURS_CLASS, &nv50_disp_curs_ofuncs },
+	{}
+};
+
+static struct nouveau_oclass
+nva0_disp_base_oclass[] = {
+	{ NVA0_DISP_CLASS, &nv50_disp_base_ofuncs, nv84_disp_base_omthds },
+	{}
+};
+
+static int
+nva0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv;
+	int ret;
+
+	ret = nouveau_disp_create(parent, engine, oclass, "PDISP",
+				  "display", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_engine(priv)->sclass = nva0_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
+	nv_subdev(priv)->intr = nv50_disp_intr;
+	priv->sclass = nva0_disp_sclass;
+	priv->head.nr = 2;
+	priv->dac.nr = 3;
+	priv->sor.nr = 2;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
+	priv->sor.hdmi = nv84_hdmi_ctrl;
+
+	INIT_LIST_HEAD(&priv->base.vblank.list);
+	spin_lock_init(&priv->base.vblank.lock);
+	return 0;
+}
+
+struct nouveau_oclass
+nva0_disp_oclass = {
+	.handle = NV_ENGINE(DISP, 0x83),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nva0_disp_ctor,
+		.dtor = _nouveau_disp_dtor,
+		.init = _nouveau_disp_init,
+		.fini = _nouveau_disp_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c
new file mode 100644
index 0000000..e9192ca
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c

@@ -0,0 +1,111 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <engine/software.h>
+#include <engine/disp.h>
+
+#include <core/class.h>
+
+#include "nv50.h"
+
+static struct nouveau_oclass
+nva3_disp_sclass[] = {
+	{ NVA3_DISP_MAST_CLASS, &nv50_disp_mast_ofuncs },
+	{ NVA3_DISP_SYNC_CLASS, &nv50_disp_sync_ofuncs },
+	{ NVA3_DISP_OVLY_CLASS, &nv50_disp_ovly_ofuncs },
+	{ NVA3_DISP_OIMM_CLASS, &nv50_disp_oimm_ofuncs },
+	{ NVA3_DISP_CURS_CLASS, &nv50_disp_curs_ofuncs },
+	{}
+};
+
+struct nouveau_omthds
+nva3_disp_base_omthds[] = {
+	{ SOR_MTHD(NV50_DISP_SOR_PWR)         , nv50_sor_mthd },
+	{ SOR_MTHD(NVA3_DISP_SOR_HDA_ELD)     , nv50_sor_mthd },
+	{ SOR_MTHD(NV84_DISP_SOR_HDMI_PWR)    , nv50_sor_mthd },
+	{ SOR_MTHD(NV50_DISP_SOR_LVDS_SCRIPT) , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_TRAIN)    , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_LNKCTL)   , nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(0)), nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(1)), nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(2)), nv50_sor_mthd },
+	{ SOR_MTHD(NV94_DISP_SOR_DP_DRVCTL(3)), nv50_sor_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_PWR)         , nv50_dac_mthd },
+	{ DAC_MTHD(NV50_DISP_DAC_LOAD)        , nv50_dac_mthd },
+	{},
+};
+
+static struct nouveau_oclass
+nva3_disp_base_oclass[] = {
+	{ NVA3_DISP_CLASS, &nv50_disp_base_ofuncs, nva3_disp_base_omthds },
+	{}
+};
+
+static int
+nva3_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv;
+	int ret;
+
+	ret = nouveau_disp_create(parent, engine, oclass, "PDISP",
+				  "display", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_engine(priv)->sclass = nva3_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
+	nv_subdev(priv)->intr = nv50_disp_intr;
+	priv->sclass = nva3_disp_sclass;
+	priv->head.nr = 2;
+	priv->dac.nr = 3;
+	priv->sor.nr = 4;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
+	priv->sor.hda_eld = nva3_hda_eld;
+	priv->sor.hdmi = nva3_hdmi_ctrl;
+	priv->sor.dp_train = nv94_sor_dp_train;
+	priv->sor.dp_train_init = nv94_sor_dp_train_init;
+	priv->sor.dp_train_fini = nv94_sor_dp_train_fini;
+	priv->sor.dp_lnkctl = nv94_sor_dp_lnkctl;
+	priv->sor.dp_drvctl = nv94_sor_dp_drvctl;
+
+	INIT_LIST_HEAD(&priv->base.vblank.list);
+	spin_lock_init(&priv->base.vblank.lock);
+	return 0;
+}
+
+struct nouveau_oclass
+nva3_disp_oclass = {
+	.handle = NV_ENGINE(DISP, 0x85),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nva3_disp_ctor,
+		.dtor = _nouveau_disp_dtor,
+		.init = _nouveau_disp_init,
+		.fini = _nouveau_disp_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
index d93efbc..9e38ebf 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c

@@ -22,22 +22,808 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/bar.h>
+#include <core/object.h>
+#include <core/parent.h>
+#include <core/handle.h>
+#include <core/class.h>
 
 #include <engine/software.h>
 #include <engine/disp.h>
 
-struct nvd0_disp_priv {
-	struct nouveau_disp base;
+#include <subdev/timer.h>
+#include <subdev/fb.h>
+#include <subdev/bar.h>
+#include <subdev/clock.h>
+
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/bios/disp.h>
+#include <subdev/bios/init.h>
+#include <subdev/bios/pll.h>
+
+#include "nv50.h"
+
+/*******************************************************************************
+ * EVO DMA channel base class
+ ******************************************************************************/
+
+static int
+nvd0_disp_dmac_object_attach(struct nouveau_object *parent,
+			     struct nouveau_object *object, u32 name)
+{
+	struct nv50_disp_base *base = (void *)parent->parent;
+	struct nv50_disp_chan *chan = (void *)parent;
+	u32 addr = nv_gpuobj(object)->node->offset;
+	u32 data = (chan->chid << 27) | (addr << 9) | 0x00000001;
+	return nouveau_ramht_insert(base->ramht, chan->chid, name, data);
+}
+
+static void
+nvd0_disp_dmac_object_detach(struct nouveau_object *parent, int cookie)
+{
+	struct nv50_disp_base *base = (void *)parent->parent;
+	nouveau_ramht_remove(base->ramht, cookie);
+}
+
+static int
+nvd0_disp_dmac_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *dmac = (void *)object;
+	int chid = dmac->base.chid;
+	int ret;
+
+	ret = nv50_disp_chan_init(&dmac->base);
+	if (ret)
+		return ret;
+
+	/* enable error reporting */
+	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000001 << chid);
+	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+
+	/* initialise channel for dma command submission */
+	nv_wr32(priv, 0x610494 + (chid * 0x0010), dmac->push);
+	nv_wr32(priv, 0x610498 + (chid * 0x0010), 0x00010000);
+	nv_wr32(priv, 0x61049c + (chid * 0x0010), 0x00000001);
+	nv_mask(priv, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010);
+	nv_wr32(priv, 0x640000 + (chid * 0x1000), 0x00000000);
+	nv_wr32(priv, 0x610490 + (chid * 0x0010), 0x00000013);
+
+	/* wait for it to go inactive */
+	if (!nv_wait(priv, 0x610490 + (chid * 0x10), 0x80000000, 0x00000000)) {
+		nv_error(dmac, "init: 0x%08x\n",
+			 nv_rd32(priv, 0x610490 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nvd0_disp_dmac_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *dmac = (void *)object;
+	int chid = dmac->base.chid;
+
+	/* deactivate channel */
+	nv_mask(priv, 0x610490 + (chid * 0x0010), 0x00001010, 0x00001000);
+	nv_mask(priv, 0x610490 + (chid * 0x0010), 0x00000003, 0x00000000);
+	if (!nv_wait(priv, 0x610490 + (chid * 0x10), 0x001e0000, 0x00000000)) {
+		nv_error(dmac, "fini: 0x%08x\n",
+			 nv_rd32(priv, 0x610490 + (chid * 0x10)));
+		if (suspend)
+			return -EBUSY;
+	}
+
+	/* disable error reporting */
+	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000000);
+	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000000);
+
+	return nv50_disp_chan_fini(&dmac->base, suspend);
+}
+
+/*******************************************************************************
+ * EVO master channel object
+ ******************************************************************************/
+
+static int
+nvd0_disp_mast_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_mast_class *args = data;
+	struct nv50_disp_dmac *mast;
+	int ret;
+
+	if (size < sizeof(*args))
+		return -EINVAL;
+
+	ret = nv50_disp_dmac_create_(parent, engine, oclass, args->pushbuf,
+				     0, sizeof(*mast), (void **)&mast);
+	*pobject = nv_object(mast);
+	if (ret)
+		return ret;
+
+	nv_parent(mast)->object_attach = nvd0_disp_dmac_object_attach;
+	nv_parent(mast)->object_detach = nvd0_disp_dmac_object_detach;
+	return 0;
+}
+
+static int
+nvd0_disp_mast_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *mast = (void *)object;
+	int ret;
+
+	ret = nv50_disp_chan_init(&mast->base);
+	if (ret)
+		return ret;
+
+	/* enable error reporting */
+	nv_mask(priv, 0x610090, 0x00000001, 0x00000001);
+	nv_mask(priv, 0x6100a0, 0x00000001, 0x00000001);
+
+	/* initialise channel for dma command submission */
+	nv_wr32(priv, 0x610494, mast->push);
+	nv_wr32(priv, 0x610498, 0x00010000);
+	nv_wr32(priv, 0x61049c, 0x00000001);
+	nv_mask(priv, 0x610490, 0x00000010, 0x00000010);
+	nv_wr32(priv, 0x640000, 0x00000000);
+	nv_wr32(priv, 0x610490, 0x01000013);
+
+	/* wait for it to go inactive */
+	if (!nv_wait(priv, 0x610490, 0x80000000, 0x00000000)) {
+		nv_error(mast, "init: 0x%08x\n", nv_rd32(priv, 0x610490));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nvd0_disp_mast_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_dmac *mast = (void *)object;
+
+	/* deactivate channel */
+	nv_mask(priv, 0x610490, 0x00000010, 0x00000000);
+	nv_mask(priv, 0x610490, 0x00000003, 0x00000000);
+	if (!nv_wait(priv, 0x610490, 0x001e0000, 0x00000000)) {
+		nv_error(mast, "fini: 0x%08x\n", nv_rd32(priv, 0x610490));
+		if (suspend)
+			return -EBUSY;
+	}
+
+	/* disable error reporting */
+	nv_mask(priv, 0x610090, 0x00000001, 0x00000000);
+	nv_mask(priv, 0x6100a0, 0x00000001, 0x00000000);
+
+	return nv50_disp_chan_fini(&mast->base, suspend);
+}
+
+struct nouveau_ofuncs
+nvd0_disp_mast_ofuncs = {
+	.ctor = nvd0_disp_mast_ctor,
+	.dtor = nv50_disp_dmac_dtor,
+	.init = nvd0_disp_mast_init,
+	.fini = nvd0_disp_mast_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO sync channel objects
+ ******************************************************************************/
+
+static int
+nvd0_disp_sync_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_sync_class *args = data;
+	struct nv50_disp_priv *priv = (void *)engine;
+	struct nv50_disp_dmac *dmac;
+	int ret;
+
+	if (size < sizeof(*data) || args->head >= priv->head.nr)
+		return -EINVAL;
+
+	ret = nv50_disp_dmac_create_(parent, engine, oclass, args->pushbuf,
+				     1 + args->head, sizeof(*dmac),
+				     (void **)&dmac);
+	*pobject = nv_object(dmac);
+	if (ret)
+		return ret;
+
+	nv_parent(dmac)->object_attach = nvd0_disp_dmac_object_attach;
+	nv_parent(dmac)->object_detach = nvd0_disp_dmac_object_detach;
+	return 0;
+}
+
+struct nouveau_ofuncs
+nvd0_disp_sync_ofuncs = {
+	.ctor = nvd0_disp_sync_ctor,
+	.dtor = nv50_disp_dmac_dtor,
+	.init = nvd0_disp_dmac_init,
+	.fini = nvd0_disp_dmac_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO overlay channel objects
+ ******************************************************************************/
+
+static int
+nvd0_disp_ovly_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_ovly_class *args = data;
+	struct nv50_disp_priv *priv = (void *)engine;
+	struct nv50_disp_dmac *dmac;
+	int ret;
+
+	if (size < sizeof(*data) || args->head >= priv->head.nr)
+		return -EINVAL;
+
+	ret = nv50_disp_dmac_create_(parent, engine, oclass, args->pushbuf,
+				     5 + args->head, sizeof(*dmac),
+				     (void **)&dmac);
+	*pobject = nv_object(dmac);
+	if (ret)
+		return ret;
+
+	nv_parent(dmac)->object_attach = nvd0_disp_dmac_object_attach;
+	nv_parent(dmac)->object_detach = nvd0_disp_dmac_object_detach;
+	return 0;
+}
+
+struct nouveau_ofuncs
+nvd0_disp_ovly_ofuncs = {
+	.ctor = nvd0_disp_ovly_ctor,
+	.dtor = nv50_disp_dmac_dtor,
+	.init = nvd0_disp_dmac_init,
+	.fini = nvd0_disp_dmac_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO PIO channel base class
+ ******************************************************************************/
+
+static int
+nvd0_disp_pioc_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass, int chid,
+		       int length, void **pobject)
+{
+	return nv50_disp_chan_create_(parent, engine, oclass, chid,
+				      length, pobject);
+}
+
+static void
+nvd0_disp_pioc_dtor(struct nouveau_object *object)
+{
+	struct nv50_disp_pioc *pioc = (void *)object;
+	nv50_disp_chan_destroy(&pioc->base);
+}
+
+static int
+nvd0_disp_pioc_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_pioc *pioc = (void *)object;
+	int chid = pioc->base.chid;
+	int ret;
+
+	ret = nv50_disp_chan_init(&pioc->base);
+	if (ret)
+		return ret;
+
+	/* enable error reporting */
+	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000001 << chid);
+	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+
+	/* activate channel */
+	nv_wr32(priv, 0x610490 + (chid * 0x10), 0x00000001);
+	if (!nv_wait(priv, 0x610490 + (chid * 0x10), 0x00030000, 0x00010000)) {
+		nv_error(pioc, "init: 0x%08x\n",
+			 nv_rd32(priv, 0x610490 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nvd0_disp_pioc_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_pioc *pioc = (void *)object;
+	int chid = pioc->base.chid;
+
+	nv_mask(priv, 0x610490 + (chid * 0x10), 0x00000001, 0x00000000);
+	if (!nv_wait(priv, 0x610490 + (chid * 0x10), 0x00030000, 0x00000000)) {
+		nv_error(pioc, "timeout: 0x%08x\n",
+			 nv_rd32(priv, 0x610490 + (chid * 0x10)));
+		if (suspend)
+			return -EBUSY;
+	}
+
+	/* disable error reporting */
+	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000000);
+	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000000);
+
+	return nv50_disp_chan_fini(&pioc->base, suspend);
+}
+
+/*******************************************************************************
+ * EVO immediate overlay channel objects
+ ******************************************************************************/
+
+static int
+nvd0_disp_oimm_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_oimm_class *args = data;
+	struct nv50_disp_priv *priv = (void *)engine;
+	struct nv50_disp_pioc *pioc;
+	int ret;
+
+	if (size < sizeof(*args) || args->head >= priv->head.nr)
+		return -EINVAL;
+
+	ret = nvd0_disp_pioc_create_(parent, engine, oclass, 9 + args->head,
+				     sizeof(*pioc), (void **)&pioc);
+	*pobject = nv_object(pioc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_ofuncs
+nvd0_disp_oimm_ofuncs = {
+	.ctor = nvd0_disp_oimm_ctor,
+	.dtor = nvd0_disp_pioc_dtor,
+	.init = nvd0_disp_pioc_init,
+	.fini = nvd0_disp_pioc_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * EVO cursor channel objects
+ ******************************************************************************/
+
+static int
+nvd0_disp_curs_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_display_curs_class *args = data;
+	struct nv50_disp_priv *priv = (void *)engine;
+	struct nv50_disp_pioc *pioc;
+	int ret;
+
+	if (size < sizeof(*args) || args->head >= priv->head.nr)
+		return -EINVAL;
+
+	ret = nvd0_disp_pioc_create_(parent, engine, oclass, 13 + args->head,
+				     sizeof(*pioc), (void **)&pioc);
+	*pobject = nv_object(pioc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_ofuncs
+nvd0_disp_curs_ofuncs = {
+	.ctor = nvd0_disp_curs_ctor,
+	.dtor = nvd0_disp_pioc_dtor,
+	.init = nvd0_disp_pioc_init,
+	.fini = nvd0_disp_pioc_fini,
+	.rd32 = nv50_disp_chan_rd32,
+	.wr32 = nv50_disp_chan_wr32,
+};
+
+/*******************************************************************************
+ * Base display object
+ ******************************************************************************/
+
+static int
+nvd0_disp_base_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv = (void *)engine;
+	struct nv50_disp_base *base;
+	int ret;
+
+	ret = nouveau_parent_create(parent, engine, oclass, 0,
+				    priv->sclass, 0, &base);
+	*pobject = nv_object(base);
+	if (ret)
+		return ret;
+
+	return nouveau_ramht_new(parent, parent, 0x1000, 0, &base->ramht);
+}
+
+static void
+nvd0_disp_base_dtor(struct nouveau_object *object)
+{
+	struct nv50_disp_base *base = (void *)object;
+	nouveau_ramht_ref(NULL, &base->ramht);
+	nouveau_parent_destroy(&base->base);
+}
+
+static int
+nvd0_disp_base_init(struct nouveau_object *object)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_base *base = (void *)object;
+	int ret, i;
+	u32 tmp;
+
+	ret = nouveau_parent_init(&base->base);
+	if (ret)
+		return ret;
+
+	/* The below segments of code copying values from one register to
+	 * another appear to inform EVO of the display capabilities or
+	 * something similar.
+	 */
+
+	/* ... CRTC caps */
+	for (i = 0; i < priv->head.nr; i++) {
+		tmp = nv_rd32(priv, 0x616104 + (i * 0x800));
+		nv_wr32(priv, 0x6101b4 + (i * 0x800), tmp);
+		tmp = nv_rd32(priv, 0x616108 + (i * 0x800));
+		nv_wr32(priv, 0x6101b8 + (i * 0x800), tmp);
+		tmp = nv_rd32(priv, 0x61610c + (i * 0x800));
+		nv_wr32(priv, 0x6101bc + (i * 0x800), tmp);
+	}
+
+	/* ... DAC caps */
+	for (i = 0; i < priv->dac.nr; i++) {
+		tmp = nv_rd32(priv, 0x61a000 + (i * 0x800));
+		nv_wr32(priv, 0x6101c0 + (i * 0x800), tmp);
+	}
+
+	/* ... SOR caps */
+	for (i = 0; i < priv->sor.nr; i++) {
+		tmp = nv_rd32(priv, 0x61c000 + (i * 0x800));
+		nv_wr32(priv, 0x6301c4 + (i * 0x800), tmp);
+	}
+
+	/* steal display away from vbios, or something like that */
+	if (nv_rd32(priv, 0x6100ac) & 0x00000100) {
+		nv_wr32(priv, 0x6100ac, 0x00000100);
+		nv_mask(priv, 0x6194e8, 0x00000001, 0x00000000);
+		if (!nv_wait(priv, 0x6194e8, 0x00000002, 0x00000000)) {
+			nv_error(priv, "timeout acquiring display\n");
+			return -EBUSY;
+		}
+	}
+
+	/* point at display engine memory area (hash table, objects) */
+	nv_wr32(priv, 0x610010, (nv_gpuobj(object->parent)->addr >> 8) | 9);
+
+	/* enable supervisor interrupts, disable everything else */
+	nv_wr32(priv, 0x610090, 0x00000000);
+	nv_wr32(priv, 0x6100a0, 0x00000000);
+	nv_wr32(priv, 0x6100b0, 0x00000307);
+
+	return 0;
+}
+
+static int
+nvd0_disp_base_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nv50_disp_base *base = (void *)object;
+
+	/* disable all interrupts */
+	nv_wr32(priv, 0x6100b0, 0x00000000);
+
+	return nouveau_parent_fini(&base->base, suspend);
+}
+
+struct nouveau_ofuncs
+nvd0_disp_base_ofuncs = {
+	.ctor = nvd0_disp_base_ctor,
+	.dtor = nvd0_disp_base_dtor,
+	.init = nvd0_disp_base_init,
+	.fini = nvd0_disp_base_fini,
+};
+
+static struct nouveau_oclass
+nvd0_disp_base_oclass[] = {
+	{ NVD0_DISP_CLASS, &nvd0_disp_base_ofuncs, nva3_disp_base_omthds },
+	{}
 };
 
 static struct nouveau_oclass
 nvd0_disp_sclass[] = {
-	{},
+	{ NVD0_DISP_MAST_CLASS, &nvd0_disp_mast_ofuncs },
+	{ NVD0_DISP_SYNC_CLASS, &nvd0_disp_sync_ofuncs },
+	{ NVD0_DISP_OVLY_CLASS, &nvd0_disp_ovly_ofuncs },
+	{ NVD0_DISP_OIMM_CLASS, &nvd0_disp_oimm_ofuncs },
+	{ NVD0_DISP_CURS_CLASS, &nvd0_disp_curs_ofuncs },
+	{}
 };
 
+/*******************************************************************************
+ * Display engine implementation
+ ******************************************************************************/
+
+static u16
+exec_lookup(struct nv50_disp_priv *priv, int head, int outp, u32 ctrl,
+	    struct dcb_output *dcb, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+	    struct nvbios_outp *info)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	u16 mask, type, data;
+
+	if (outp < 4) {
+		type = DCB_OUTPUT_ANALOG;
+		mask = 0;
+	} else {
+		outp -= 4;
+		switch (ctrl & 0x00000f00) {
+		case 0x00000000: type = DCB_OUTPUT_LVDS; mask = 1; break;
+		case 0x00000100: type = DCB_OUTPUT_TMDS; mask = 1; break;
+		case 0x00000200: type = DCB_OUTPUT_TMDS; mask = 2; break;
+		case 0x00000500: type = DCB_OUTPUT_TMDS; mask = 3; break;
+		case 0x00000800: type = DCB_OUTPUT_DP; mask = 1; break;
+		case 0x00000900: type = DCB_OUTPUT_DP; mask = 2; break;
+		default:
+			nv_error(priv, "unknown SOR mc 0x%08x\n", ctrl);
+			return 0x0000;
+		}
+		dcb->sorconf.link = mask;
+	}
+
+	mask  = 0x00c0 & (mask << 6);
+	mask |= 0x0001 << outp;
+	mask |= 0x0100 << head;
+
+	data = dcb_outp_match(bios, type, mask, ver, hdr, dcb);
+	if (!data)
+		return 0x0000;
+
+	return nvbios_outp_match(bios, type, mask, ver, hdr, cnt, len, info);
+}
+
+static bool
+exec_script(struct nv50_disp_priv *priv, int head, int outp, u32 ctrl, int id)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_outp info;
+	struct dcb_output dcb;
+	u8  ver, hdr, cnt, len;
+	u16 data;
+
+	data = exec_lookup(priv, head, outp, ctrl, &dcb, &ver, &hdr, &cnt, &len, &info);
+	if (data) {
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = bios,
+			.offset = info.script[id],
+			.outp = &dcb,
+			.crtc = head,
+			.execute = 1,
+		};
+
+		return nvbios_exec(&init) == 0;
+	}
+
+	return false;
+}
+
+static u32
+exec_clkcmp(struct nv50_disp_priv *priv, int head, int outp,
+	    u32 ctrl, int id, u32 pclk)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_outp info1;
+	struct nvbios_ocfg info2;
+	struct dcb_output dcb;
+	u8  ver, hdr, cnt, len;
+	u16 data, conf;
+
+	data = exec_lookup(priv, head, outp, ctrl, &dcb, &ver, &hdr, &cnt, &len, &info1);
+	if (data == 0x0000)
+		return false;
+
+	switch (dcb.type) {
+	case DCB_OUTPUT_TMDS:
+		conf = (ctrl & 0x00000f00) >> 8;
+		if (pclk >= 165000)
+			conf |= 0x0100;
+		break;
+	case DCB_OUTPUT_LVDS:
+		conf = priv->sor.lvdsconf;
+		break;
+	case DCB_OUTPUT_DP:
+		conf = (ctrl & 0x00000f00) >> 8;
+		break;
+	case DCB_OUTPUT_ANALOG:
+	default:
+		conf = 0x00ff;
+		break;
+	}
+
+	data = nvbios_ocfg_match(bios, data, conf, &ver, &hdr, &cnt, &len, &info2);
+	if (data) {
+		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
+		if (data) {
+			struct nvbios_init init = {
+				.subdev = nv_subdev(priv),
+				.bios = bios,
+				.offset = data,
+				.outp = &dcb,
+				.crtc = head,
+				.execute = 1,
+			};
+
+			if (nvbios_exec(&init))
+				return 0x0000;
+			return conf;
+		}
+	}
+
+	return 0x0000;
+}
+
 static void
-nvd0_disp_intr_vblank(struct nvd0_disp_priv *priv, int crtc)
+nvd0_display_unk1_handler(struct nv50_disp_priv *priv, u32 head, u32 mask)
+{
+	int i;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcc = nv_rd32(priv, 0x640180 + (i * 0x20));
+		if (mcc & (1 << head))
+			exec_script(priv, head, i, mcc, 1);
+	}
+
+	nv_wr32(priv, 0x6101d4, 0x00000000);
+	nv_wr32(priv, 0x6109d4, 0x00000000);
+	nv_wr32(priv, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_unk2_calc_tu(struct nv50_disp_priv *priv, int head, int or)
+{
+	const u32 ctrl = nv_rd32(priv, 0x660200 + (or   * 0x020));
+	const u32 conf = nv_rd32(priv, 0x660404 + (head * 0x300));
+	const u32 pclk = nv_rd32(priv, 0x660450 + (head * 0x300)) / 1000;
+	const u32 link = ((ctrl & 0xf00) == 0x800) ? 0 : 1;
+	const u32 hoff = (head * 0x800);
+	const u32 soff = (  or * 0x800);
+	const u32 loff = (link * 0x080) + soff;
+	const u32 symbol = 100000;
+	const u32 TU = 64;
+	u32 dpctrl = nv_rd32(priv, 0x61c10c + loff) & 0x000f0000;
+	u32 clksor = nv_rd32(priv, 0x612300 + soff);
+	u32 datarate, link_nr, link_bw, bits;
+	u64 ratio, value;
+
+	if      ((conf & 0x3c0) == 0x180) bits = 30;
+	else if ((conf & 0x3c0) == 0x140) bits = 24;
+	else                              bits = 18;
+	datarate = (pclk * bits) / 8;
+
+	if      (dpctrl > 0x00030000) link_nr = 4;
+	else if (dpctrl > 0x00010000) link_nr = 2;
+	else			      link_nr = 1;
+
+	link_bw  = (clksor & 0x007c0000) >> 18;
+	link_bw *= 27000;
+
+	ratio  = datarate;
+	ratio *= symbol;
+	do_div(ratio, link_nr * link_bw);
+
+	value  = (symbol - ratio) * TU;
+	value *= ratio;
+	do_div(value, symbol);
+	do_div(value, symbol);
+
+	value += 5;
+	value |= 0x08000000;
+
+	nv_wr32(priv, 0x616610 + hoff, value);
+}
+
+static void
+nvd0_display_unk2_handler(struct nv50_disp_priv *priv, u32 head, u32 mask)
+{
+	u32 pclk;
+	int i;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcc = nv_rd32(priv, 0x640180 + (i * 0x20));
+		if (mcc & (1 << head))
+			exec_script(priv, head, i, mcc, 2);
+	}
+
+	pclk = nv_rd32(priv, 0x660450 + (head * 0x300)) / 1000;
+	nv_debug(priv, "head %d pclk %d mask 0x%08x\n", head, pclk, mask);
+	if (pclk && (mask & 0x00010000)) {
+		struct nouveau_clock *clk = nouveau_clock(priv);
+		clk->pll_set(clk, PLL_VPLL0 + head, pclk);
+	}
+
+	nv_wr32(priv, 0x612200 + (head * 0x800), 0x00000000);
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcp = nv_rd32(priv, 0x660180 + (i * 0x20)), cfg;
+		if (mcp & (1 << head)) {
+			if ((cfg = exec_clkcmp(priv, head, i, mcp, 0, pclk))) {
+				u32 addr, mask, data = 0x00000000;
+				if (i < 4) {
+					addr = 0x612280 + ((i - 0) * 0x800);
+					mask = 0xffffffff;
+				} else {
+					switch (mcp & 0x00000f00) {
+					case 0x00000800:
+					case 0x00000900:
+						nvd0_display_unk2_calc_tu(priv, head, i - 4);
+						break;
+					default:
+						break;
+					}
+
+					addr = 0x612300 + ((i - 4) * 0x800);
+					mask = 0x00000707;
+					if (cfg & 0x00000100)
+						data = 0x00000101;
+				}
+				nv_mask(priv, addr, mask, data);
+			}
+			break;
+		}
+	}
+
+	nv_wr32(priv, 0x6101d4, 0x00000000);
+	nv_wr32(priv, 0x6109d4, 0x00000000);
+	nv_wr32(priv, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_unk4_handler(struct nv50_disp_priv *priv, u32 head, u32 mask)
+{
+	int pclk, i;
+
+	pclk = nv_rd32(priv, 0x660450 + (head * 0x300)) / 1000;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcp = nv_rd32(priv, 0x660180 + (i * 0x20));
+		if (mcp & (1 << head))
+			exec_clkcmp(priv, head, i, mcp, 1, pclk);
+	}
+
+	nv_wr32(priv, 0x6101d4, 0x00000000);
+	nv_wr32(priv, 0x6109d4, 0x00000000);
+	nv_wr32(priv, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_disp_intr_vblank(struct nv50_disp_priv *priv, int crtc)
 {
 	struct nouveau_bar *bar = nouveau_bar(priv);
 	struct nouveau_disp *disp = &priv->base;
@@ -65,14 +851,71 @@
 		disp->vblank.notify(disp->vblank.data, crtc);
 }
 
-static void
+void
 nvd0_disp_intr(struct nouveau_subdev *subdev)
 {
-	struct nvd0_disp_priv *priv = (void *)subdev;
+	struct nv50_disp_priv *priv = (void *)subdev;
 	u32 intr = nv_rd32(priv, 0x610088);
 	int i;
 
-	for (i = 0; i < 4; i++) {
+	if (intr & 0x00000001) {
+		u32 stat = nv_rd32(priv, 0x61008c);
+		nv_wr32(priv, 0x61008c, stat);
+		intr &= ~0x00000001;
+	}
+
+	if (intr & 0x00000002) {
+		u32 stat = nv_rd32(priv, 0x61009c);
+		int chid = ffs(stat) - 1;
+		if (chid >= 0) {
+			u32 mthd = nv_rd32(priv, 0x6101f0 + (chid * 12));
+			u32 data = nv_rd32(priv, 0x6101f4 + (chid * 12));
+			u32 unkn = nv_rd32(priv, 0x6101f8 + (chid * 12));
+
+			nv_error(priv, "chid %d mthd 0x%04x data 0x%08x "
+				       "0x%08x 0x%08x\n",
+				 chid, (mthd & 0x0000ffc), data, mthd, unkn);
+			nv_wr32(priv, 0x61009c, (1 << chid));
+			nv_wr32(priv, 0x6101f0 + (chid * 12), 0x90000000);
+		}
+
+		intr &= ~0x00000002;
+	}
+
+	if (intr & 0x00100000) {
+		u32 stat = nv_rd32(priv, 0x6100ac);
+		u32 mask = 0, crtc = ~0;
+
+		while (!mask && ++crtc < priv->head.nr)
+			mask = nv_rd32(priv, 0x6101d4 + (crtc * 0x800));
+
+		if (stat & 0x00000001) {
+			nv_wr32(priv, 0x6100ac, 0x00000001);
+			nvd0_display_unk1_handler(priv, crtc, mask);
+			stat &= ~0x00000001;
+		}
+
+		if (stat & 0x00000002) {
+			nv_wr32(priv, 0x6100ac, 0x00000002);
+			nvd0_display_unk2_handler(priv, crtc, mask);
+			stat &= ~0x00000002;
+		}
+
+		if (stat & 0x00000004) {
+			nv_wr32(priv, 0x6100ac, 0x00000004);
+			nvd0_display_unk4_handler(priv, crtc, mask);
+			stat &= ~0x00000004;
+		}
+
+		if (stat) {
+			nv_info(priv, "unknown intr24 0x%08x\n", stat);
+			nv_wr32(priv, 0x6100ac, stat);
+		}
+
+		intr &= ~0x00100000;
+	}
+
+	for (i = 0; i < priv->head.nr; i++) {
 		u32 mask = 0x01000000 << i;
 		if (mask & intr) {
 			u32 stat = nv_rd32(priv, 0x6100bc + (i * 0x800));
@@ -86,10 +929,10 @@
 
 static int
 nvd0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-		  struct nouveau_oclass *oclass, void *data, u32 size,
-		  struct nouveau_object **pobject)
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
 {
-	struct nvd0_disp_priv *priv;
+	struct nv50_disp_priv *priv;
 	int ret;
 
 	ret = nouveau_disp_create(parent, engine, oclass, "PDISP",
@@ -98,8 +941,23 @@
 	if (ret)
 		return ret;
 
-	nv_engine(priv)->sclass = nvd0_disp_sclass;
+	nv_engine(priv)->sclass = nvd0_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nvd0_disp_intr;
+	priv->sclass = nvd0_disp_sclass;
+	priv->head.nr = nv_rd32(priv, 0x022448);
+	priv->dac.nr = 3;
+	priv->sor.nr = 4;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
+	priv->sor.hda_eld = nvd0_hda_eld;
+	priv->sor.hdmi = nvd0_hdmi_ctrl;
+	priv->sor.dp_train = nvd0_sor_dp_train;
+	priv->sor.dp_train_init = nv94_sor_dp_train_init;
+	priv->sor.dp_train_fini = nv94_sor_dp_train_fini;
+	priv->sor.dp_lnkctl = nvd0_sor_dp_lnkctl;
+	priv->sor.dp_drvctl = nvd0_sor_dp_drvctl;
 
 	INIT_LIST_HEAD(&priv->base.vblank.list);
 	spin_lock_init(&priv->base.vblank.lock);
@@ -108,7 +966,7 @@
 
 struct nouveau_oclass
 nvd0_disp_oclass = {
-	.handle = NV_ENGINE(DISP, 0xd0),
+	.handle = NV_ENGINE(DISP, 0x90),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nvd0_disp_ctor,
 		.dtor = _nouveau_disp_dtor,

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c
new file mode 100644
index 0000000..259537c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c

@@ -0,0 +1,94 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <engine/software.h>
+#include <engine/disp.h>
+
+#include <core/class.h>
+
+#include "nv50.h"
+
+static struct nouveau_oclass
+nve0_disp_sclass[] = {
+	{ NVE0_DISP_MAST_CLASS, &nvd0_disp_mast_ofuncs },
+	{ NVE0_DISP_SYNC_CLASS, &nvd0_disp_sync_ofuncs },
+	{ NVE0_DISP_OVLY_CLASS, &nvd0_disp_ovly_ofuncs },
+	{ NVE0_DISP_OIMM_CLASS, &nvd0_disp_oimm_ofuncs },
+	{ NVE0_DISP_CURS_CLASS, &nvd0_disp_curs_ofuncs },
+	{}
+};
+
+static struct nouveau_oclass
+nve0_disp_base_oclass[] = {
+	{ NVE0_DISP_CLASS, &nvd0_disp_base_ofuncs, nva3_disp_base_omthds },
+	{}
+};
+
+static int
+nve0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct nv50_disp_priv *priv;
+	int ret;
+
+	ret = nouveau_disp_create(parent, engine, oclass, "PDISP",
+				  "display", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_engine(priv)->sclass = nve0_disp_base_oclass;
+	nv_engine(priv)->cclass = &nv50_disp_cclass;
+	nv_subdev(priv)->intr = nvd0_disp_intr;
+	priv->sclass = nve0_disp_sclass;
+	priv->head.nr = nv_rd32(priv, 0x022448);
+	priv->dac.nr = 3;
+	priv->sor.nr = 4;
+	priv->dac.power = nv50_dac_power;
+	priv->dac.sense = nv50_dac_sense;
+	priv->sor.power = nv50_sor_power;
+	priv->sor.hda_eld = nvd0_hda_eld;
+	priv->sor.hdmi = nvd0_hdmi_ctrl;
+	priv->sor.dp_train = nvd0_sor_dp_train;
+	priv->sor.dp_train_init = nv94_sor_dp_train_init;
+	priv->sor.dp_train_fini = nv94_sor_dp_train_fini;
+	priv->sor.dp_lnkctl = nvd0_sor_dp_lnkctl;
+	priv->sor.dp_drvctl = nvd0_sor_dp_drvctl;
+
+	INIT_LIST_HEAD(&priv->base.vblank.list);
+	spin_lock_init(&priv->base.vblank.lock);
+	return 0;
+}
+
+struct nouveau_oclass
+nve0_disp_oclass = {
+	.handle = NV_ENGINE(DISP, 0x91),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nve0_disp_ctor,
+		.dtor = _nouveau_disp_dtor,
+		.init = _nouveau_disp_init,
+		.fini = _nouveau_disp_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c
new file mode 100644
index 0000000..39b6b677
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c

@@ -0,0 +1,112 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/timer.h>
+
+#include "nv50.h"
+
+int
+nv50_sor_power(struct nv50_disp_priv *priv, int or, u32 data)
+{
+	const u32 stat = data & NV50_DISP_SOR_PWR_STATE;
+	const u32 soff = (or * 0x800);
+	nv_wait(priv, 0x61c004 + soff, 0x80000000, 0x00000000);
+	nv_mask(priv, 0x61c004 + soff, 0x80000001, 0x80000000 | stat);
+	nv_wait(priv, 0x61c004 + soff, 0x80000000, 0x00000000);
+	nv_wait(priv, 0x61c030 + soff, 0x10000000, 0x00000000);
+	return 0;
+}
+
+int
+nv50_sor_mthd(struct nouveau_object *object, u32 mthd, void *args, u32 size)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const u16 type = (mthd & NV50_DISP_SOR_MTHD_TYPE) >> 12;
+	const u8  head = (mthd & NV50_DISP_SOR_MTHD_HEAD) >> 3;
+	const u8  link = (mthd & NV50_DISP_SOR_MTHD_LINK) >> 2;
+	const u8    or = (mthd & NV50_DISP_SOR_MTHD_OR);
+	const u16 mask = (0x0100 << head) | (0x0040 << link) | (0x0001 << or);
+	struct dcb_output outp;
+	u8  ver, hdr;
+	u32 data;
+	int ret = -EINVAL;
+
+	if (size < sizeof(u32))
+		return -EINVAL;
+	data = *(u32 *)args;
+
+	if (type && !dcb_outp_match(bios, type, mask, &ver, &hdr, &outp))
+		return -ENODEV;
+
+	switch (mthd & ~0x3f) {
+	case NV50_DISP_SOR_PWR:
+		ret = priv->sor.power(priv, or, data);
+		break;
+	case NVA3_DISP_SOR_HDA_ELD:
+		ret = priv->sor.hda_eld(priv, or, args, size);
+		break;
+	case NV84_DISP_SOR_HDMI_PWR:
+		ret = priv->sor.hdmi(priv, head, or, data);
+		break;
+	case NV50_DISP_SOR_LVDS_SCRIPT:
+		priv->sor.lvdsconf = data & NV50_DISP_SOR_LVDS_SCRIPT_ID;
+		ret = 0;
+		break;
+	case NV94_DISP_SOR_DP_TRAIN:
+		switch (data & NV94_DISP_SOR_DP_TRAIN_OP) {
+		case NV94_DISP_SOR_DP_TRAIN_OP_PATTERN:
+			ret = priv->sor.dp_train(priv, or, link, type, mask, data, &outp);
+			break;
+		case NV94_DISP_SOR_DP_TRAIN_OP_INIT:
+			ret = priv->sor.dp_train_init(priv, or, link, head, type, mask, data, &outp);
+			break;
+		case NV94_DISP_SOR_DP_TRAIN_OP_FINI:
+			ret = priv->sor.dp_train_fini(priv, or, link, head, type, mask, data, &outp);
+			break;
+		default:
+			break;
+		}
+		break;
+	case NV94_DISP_SOR_DP_LNKCTL:
+		ret = priv->sor.dp_lnkctl(priv, or, link, head, type, mask, data, &outp);
+		break;
+	case NV94_DISP_SOR_DP_DRVCTL(0):
+	case NV94_DISP_SOR_DP_DRVCTL(1):
+	case NV94_DISP_SOR_DP_DRVCTL(2):
+	case NV94_DISP_SOR_DP_DRVCTL(3):
+		ret = priv->sor.dp_drvctl(priv, or, link, (mthd & 0xc0) >> 6,
+				          type, mask, data, &outp);
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+	return ret;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/sornv94.c b/drivers/gpu/drm/nouveau/core/engine/disp/sornv94.c
new file mode 100644
index 0000000..f6edd00
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/sornv94.c

@@ -0,0 +1,190 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/bios/dp.h>
+#include <subdev/bios/init.h>
+
+#include "nv50.h"
+
+static inline u32
+nv94_sor_dp_lane_map(struct nv50_disp_priv *priv, u8 lane)
+{
+	static const u8 nvaf[] = { 24, 16, 8, 0 }; /* thanks, apple.. */
+	static const u8 nv94[] = { 16, 8, 0, 24 };
+	if (nv_device(priv)->chipset == 0xaf)
+		return nvaf[lane];
+	return nv94[lane];
+}
+
+int
+nv94_sor_dp_train_init(struct nv50_disp_priv *priv, int or, int link, int head,
+		       u16 type, u16 mask, u32 data, struct dcb_output *dcbo)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_dpout info;
+	u8  ver, hdr, cnt, len;
+	u16 outp;
+
+	outp = nvbios_dpout_match(bios, type, mask, &ver, &hdr, &cnt, &len, &info);
+	if (outp) {
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = bios,
+			.outp = dcbo,
+			.crtc = head,
+			.execute = 1,
+		};
+
+		if (data & NV94_DISP_SOR_DP_TRAIN_INIT_SPREAD_ON)
+			init.offset = info.script[2];
+		else
+			init.offset = info.script[3];
+		nvbios_exec(&init);
+
+		init.offset = info.script[0];
+		nvbios_exec(&init);
+	}
+
+	return 0;
+}
+
+int
+nv94_sor_dp_train_fini(struct nv50_disp_priv *priv, int or, int link, int head,
+		       u16 type, u16 mask, u32 data, struct dcb_output *dcbo)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_dpout info;
+	u8  ver, hdr, cnt, len;
+	u16 outp;
+
+	outp = nvbios_dpout_match(bios, type, mask, &ver, &hdr, &cnt, &len, &info);
+	if (outp) {
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = bios,
+			.offset = info.script[1],
+			.outp = dcbo,
+			.crtc = head,
+			.execute = 1,
+		};
+
+		nvbios_exec(&init);
+	}
+
+	return 0;
+}
+
+int
+nv94_sor_dp_train(struct nv50_disp_priv *priv, int or, int link,
+		  u16 type, u16 mask, u32 data, struct dcb_output *info)
+{
+	const u32 loff = (or * 0x800) + (link * 0x80);
+	const u32 patt = (data & NV94_DISP_SOR_DP_TRAIN_PATTERN);
+	nv_mask(priv, 0x61c10c + loff, 0x0f000000, patt << 24);
+	return 0;
+}
+
+int
+nv94_sor_dp_lnkctl(struct nv50_disp_priv *priv, int or, int link, int head,
+		   u16 type, u16 mask, u32 data, struct dcb_output *dcbo)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const u32 loff = (or * 0x800) + (link * 0x80);
+	const u32 soff = (or * 0x800);
+	u16 link_bw = (data & NV94_DISP_SOR_DP_LNKCTL_WIDTH) >> 8;
+	u8  link_nr = (data & NV94_DISP_SOR_DP_LNKCTL_COUNT);
+	u32 dpctrl = 0x00000000;
+	u32 clksor = 0x00000000;
+	u32 outp, lane = 0;
+	u8  ver, hdr, cnt, len;
+	struct nvbios_dpout info;
+	int i;
+
+	/* -> 10Khz units */
+	link_bw *= 2700;
+
+	outp = nvbios_dpout_match(bios, type, mask, &ver, &hdr, &cnt, &len, &info);
+	if (outp && info.lnkcmp) {
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = bios,
+			.offset = 0x0000,
+			.outp = dcbo,
+			.crtc = head,
+			.execute = 1,
+		};
+
+		while (link_bw < nv_ro16(bios, info.lnkcmp))
+			info.lnkcmp += 4;
+		init.offset = nv_ro16(bios, info.lnkcmp + 2);
+
+		nvbios_exec(&init);
+	}
+
+	dpctrl |= ((1 << link_nr) - 1) << 16;
+	if (data & NV94_DISP_SOR_DP_LNKCTL_FRAME_ENH)
+		dpctrl |= 0x00004000;
+	if (link_bw > 16200)
+		clksor |= 0x00040000;
+
+	for (i = 0; i < link_nr; i++)
+		lane |= 1 << (nv94_sor_dp_lane_map(priv, i) >> 3);
+
+	nv_mask(priv, 0x614300 + soff, 0x000c0000, clksor);
+	nv_mask(priv, 0x61c10c + loff, 0x001f4000, dpctrl);
+	nv_mask(priv, 0x61c130 + loff, 0x0000000f, lane);
+	return 0;
+}
+
+int
+nv94_sor_dp_drvctl(struct nv50_disp_priv *priv, int or, int link, int lane,
+		   u16 type, u16 mask, u32 data, struct dcb_output *dcbo)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const u32 loff = (or * 0x800) + (link * 0x80);
+	const u8 swing = (data & NV94_DISP_SOR_DP_DRVCTL_VS) >> 8;
+	const u8 preem = (data & NV94_DISP_SOR_DP_DRVCTL_PE);
+	u32 addr, shift = nv94_sor_dp_lane_map(priv, lane);
+	u8  ver, hdr, cnt, len;
+	struct nvbios_dpout outp;
+	struct nvbios_dpcfg ocfg;
+
+	addr = nvbios_dpout_match(bios, type, mask, &ver, &hdr, &cnt, &len, &outp);
+	if (!addr)
+		return -ENODEV;
+
+	addr = nvbios_dpcfg_match(bios, addr, 0, swing, preem, &ver, &hdr, &cnt, &len, &ocfg);
+	if (!addr)
+		return -EINVAL;
+
+	nv_mask(priv, 0x61c118 + loff, 0x000000ff << shift, ocfg.drv << shift);
+	nv_mask(priv, 0x61c120 + loff, 0x000000ff << shift, ocfg.pre << shift);
+	nv_mask(priv, 0x61c130 + loff, 0x0000ff00, ocfg.unk << 8);
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/sornvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/sornvd0.c
new file mode 100644
index 0000000..c37ce7e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/sornvd0.c

@@ -0,0 +1,126 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/os.h>
+#include <core/class.h>
+
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/bios/dp.h>
+#include <subdev/bios/init.h>
+
+#include "nv50.h"
+
+static inline u32
+nvd0_sor_dp_lane_map(struct nv50_disp_priv *priv, u8 lane)
+{
+	static const u8 nvd0[] = { 16, 8, 0, 24 };
+	return nvd0[lane];
+}
+
+int
+nvd0_sor_dp_train(struct nv50_disp_priv *priv, int or, int link,
+		  u16 type, u16 mask, u32 data, struct dcb_output *info)
+{
+	const u32 loff = (or * 0x800) + (link * 0x80);
+	const u32 patt = (data & NV94_DISP_SOR_DP_TRAIN_PATTERN);
+	nv_mask(priv, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * patt);
+	return 0;
+}
+
+int
+nvd0_sor_dp_lnkctl(struct nv50_disp_priv *priv, int or, int link, int head,
+		   u16 type, u16 mask, u32 data, struct dcb_output *dcbo)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const u32 loff = (or * 0x800) + (link * 0x80);
+	const u32 soff = (or * 0x800);
+	const u8  link_bw = (data & NV94_DISP_SOR_DP_LNKCTL_WIDTH) >> 8;
+	const u8  link_nr = (data & NV94_DISP_SOR_DP_LNKCTL_COUNT);
+	u32 dpctrl = 0x00000000;
+	u32 clksor = 0x00000000;
+	u32 outp, lane = 0;
+	u8  ver, hdr, cnt, len;
+	struct nvbios_dpout info;
+	int i;
+
+	outp = nvbios_dpout_match(bios, type, mask, &ver, &hdr, &cnt, &len, &info);
+	if (outp && info.lnkcmp) {
+		struct nvbios_init init = {
+			.subdev = nv_subdev(priv),
+			.bios = bios,
+			.offset = 0x0000,
+			.outp = dcbo,
+			.crtc = head,
+			.execute = 1,
+		};
+
+		while (nv_ro08(bios, info.lnkcmp) < link_bw)
+			info.lnkcmp += 3;
+		init.offset = nv_ro16(bios, info.lnkcmp + 1);
+
+		nvbios_exec(&init);
+	}
+
+	clksor |= link_bw << 18;
+	dpctrl |= ((1 << link_nr) - 1) << 16;
+	if (data & NV94_DISP_SOR_DP_LNKCTL_FRAME_ENH)
+		dpctrl |= 0x00004000;
+
+	for (i = 0; i < link_nr; i++)
+		lane |= 1 << (nvd0_sor_dp_lane_map(priv, i) >> 3);
+
+	nv_mask(priv, 0x612300 + soff, 0x007c0000, clksor);
+	nv_mask(priv, 0x61c10c + loff, 0x001f4000, dpctrl);
+	nv_mask(priv, 0x61c130 + loff, 0x0000000f, lane);
+	return 0;
+}
+
+int
+nvd0_sor_dp_drvctl(struct nv50_disp_priv *priv, int or, int link, int lane,
+		   u16 type, u16 mask, u32 data, struct dcb_output *dcbo)
+{
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	const u32 loff = (or * 0x800) + (link * 0x80);
+	const u8 swing = (data & NV94_DISP_SOR_DP_DRVCTL_VS) >> 8;
+	const u8 preem = (data & NV94_DISP_SOR_DP_DRVCTL_PE);
+	u32 addr, shift = nvd0_sor_dp_lane_map(priv, lane);
+	u8  ver, hdr, cnt, len;
+	struct nvbios_dpout outp;
+	struct nvbios_dpcfg ocfg;
+
+	addr = nvbios_dpout_match(bios, type, mask, &ver, &hdr, &cnt, &len, &outp);
+	if (!addr)
+		return -ENODEV;
+
+	addr = nvbios_dpcfg_match(bios, addr, 0, swing, preem, &ver, &hdr, &cnt, &len, &ocfg);
+	if (!addr)
+		return -EINVAL;
+
+	nv_mask(priv, 0x61c118 + loff, 0x000000ff << shift, ocfg.drv << shift);
+	nv_mask(priv, 0x61c120 + loff, 0x000000ff << shift, ocfg.pre << shift);
+	nv_mask(priv, 0x61c130 + loff, 0x0000ff00, ocfg.unk << 8);
+	nv_mask(priv, 0x61c13c + loff, 0x00000000, 0x00000000);
+	return 0;
+}

diff --git a/drivers/gpu/drm/nouveau/core/engine/dmaobj/base.c b/drivers/gpu/drm/nouveau/core/engine/dmaobj/base.c
index e1f013d..5103e88 100644
--- a/drivers/gpu/drm/nouveau/core/engine/dmaobj/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/dmaobj/base.c

@@ -28,37 +28,39 @@
 #include <subdev/fb.h>
 #include <engine/dmaobj.h>
 
-int
-nouveau_dmaobj_create_(struct nouveau_object *parent,
-		       struct nouveau_object *engine,
-		       struct nouveau_oclass *oclass,
-		       void *data, u32 size, int len, void **pobject)
+static int
+nouveau_dmaobj_ctor(struct nouveau_object *parent,
+		    struct nouveau_object *engine,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
+		    struct nouveau_object **pobject)
 {
+	struct nouveau_dmaeng *dmaeng = (void *)engine;
+	struct nouveau_dmaobj *dmaobj;
+	struct nouveau_gpuobj *gpuobj;
 	struct nv_dma_class *args = data;
-	struct nouveau_dmaobj *object;
 	int ret;
 
 	if (size < sizeof(*args))
 		return -EINVAL;
 
-	ret = nouveau_object_create_(parent, engine, oclass, 0, len, pobject);
-	object = *pobject;
+	ret = nouveau_object_create(parent, engine, oclass, 0, &dmaobj);
+	*pobject = nv_object(dmaobj);
 	if (ret)
 		return ret;
 
 	switch (args->flags & NV_DMA_TARGET_MASK) {
 	case NV_DMA_TARGET_VM:
-		object->target = NV_MEM_TARGET_VM;
+		dmaobj->target = NV_MEM_TARGET_VM;
 		break;
 	case NV_DMA_TARGET_VRAM:
-		object->target = NV_MEM_TARGET_VRAM;
+		dmaobj->target = NV_MEM_TARGET_VRAM;
 		break;
 	case NV_DMA_TARGET_PCI:
-		object->target = NV_MEM_TARGET_PCI;
+		dmaobj->target = NV_MEM_TARGET_PCI;
 		break;
 	case NV_DMA_TARGET_PCI_US:
 	case NV_DMA_TARGET_AGP:
-		object->target = NV_MEM_TARGET_PCI_NOSNOOP;
+		dmaobj->target = NV_MEM_TARGET_PCI_NOSNOOP;
 		break;
 	default:
 		return -EINVAL;
@@ -66,22 +68,53 @@
 
 	switch (args->flags & NV_DMA_ACCESS_MASK) {
 	case NV_DMA_ACCESS_VM:
-		object->access = NV_MEM_ACCESS_VM;
+		dmaobj->access = NV_MEM_ACCESS_VM;
 		break;
 	case NV_DMA_ACCESS_RD:
-		object->access = NV_MEM_ACCESS_RO;
+		dmaobj->access = NV_MEM_ACCESS_RO;
 		break;
 	case NV_DMA_ACCESS_WR:
-		object->access = NV_MEM_ACCESS_WO;
+		dmaobj->access = NV_MEM_ACCESS_WO;
 		break;
 	case NV_DMA_ACCESS_RDWR:
-		object->access = NV_MEM_ACCESS_RW;
+		dmaobj->access = NV_MEM_ACCESS_RW;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	object->start = args->start;
-	object->limit = args->limit;
-	return 0;
+	dmaobj->start = args->start;
+	dmaobj->limit = args->limit;
+	dmaobj->conf0 = args->conf0;
+
+	switch (nv_mclass(parent)) {
+	case NV_DEVICE_CLASS:
+		/* delayed, or no, binding */
+		break;
+	default:
+		ret = dmaeng->bind(dmaeng, *pobject, dmaobj, &gpuobj);
+		if (ret == 0) {
+			nouveau_object_ref(NULL, pobject);
+			*pobject = nv_object(gpuobj);
+		}
+		break;
+	}
+
+	return ret;
 }
+
+static struct nouveau_ofuncs
+nouveau_dmaobj_ofuncs = {
+	.ctor = nouveau_dmaobj_ctor,
+	.dtor = nouveau_object_destroy,
+	.init = nouveau_object_init,
+	.fini = nouveau_object_fini,
+};
+
+struct nouveau_oclass
+nouveau_dmaobj_sclass[] = {
+	{ NV_DMA_FROM_MEMORY_CLASS, &nouveau_dmaobj_ofuncs },
+	{ NV_DMA_TO_MEMORY_CLASS, &nouveau_dmaobj_ofuncs },
+	{ NV_DMA_IN_MEMORY_CLASS, &nouveau_dmaobj_ofuncs },
+	{}
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv04.c b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv04.c
index 9f4cc2f..027d821 100644
--- a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv04.c

@@ -34,10 +34,6 @@
 	struct nouveau_dmaeng base;
 };
 
-struct nv04_dmaobj_priv {
-	struct nouveau_dmaobj base;
-};
-
 static int
 nv04_dmaobj_bind(struct nouveau_dmaeng *dmaeng,
 		 struct nouveau_object *parent,
@@ -53,6 +49,18 @@
 	u32 length = dmaobj->limit - dmaobj->start;
 	int ret;
 
+	if (!nv_iclass(parent, NV_ENGCTX_CLASS)) {
+		switch (nv_mclass(parent->parent)) {
+		case NV03_CHANNEL_DMA_CLASS:
+		case NV10_CHANNEL_DMA_CLASS:
+		case NV17_CHANNEL_DMA_CLASS:
+		case NV40_CHANNEL_DMA_CLASS:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
 	if (dmaobj->target == NV_MEM_TARGET_VM) {
 		if (nv_object(vmm)->oclass == &nv04_vmmgr_oclass) {
 			struct nouveau_gpuobj *pgt = vmm->vm->pgt[0].obj[0];
@@ -106,56 +114,6 @@
 }
 
 static int
-nv04_dmaobj_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-		 struct nouveau_oclass *oclass, void *data, u32 size,
-		 struct nouveau_object **pobject)
-{
-	struct nouveau_dmaeng *dmaeng = (void *)engine;
-	struct nv04_dmaobj_priv *dmaobj;
-	struct nouveau_gpuobj *gpuobj;
-	int ret;
-
-	ret = nouveau_dmaobj_create(parent, engine, oclass,
-				    data, size, &dmaobj);
-	*pobject = nv_object(dmaobj);
-	if (ret)
-		return ret;
-
-	switch (nv_mclass(parent)) {
-	case NV_DEVICE_CLASS:
-		break;
-	case NV03_CHANNEL_DMA_CLASS:
-	case NV10_CHANNEL_DMA_CLASS:
-	case NV17_CHANNEL_DMA_CLASS:
-	case NV40_CHANNEL_DMA_CLASS:
-		ret = dmaeng->bind(dmaeng, *pobject, &dmaobj->base, &gpuobj);
-		nouveau_object_ref(NULL, pobject);
-		*pobject = nv_object(gpuobj);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return ret;
-}
-
-static struct nouveau_ofuncs
-nv04_dmaobj_ofuncs = {
-	.ctor = nv04_dmaobj_ctor,
-	.dtor = _nouveau_dmaobj_dtor,
-	.init = _nouveau_dmaobj_init,
-	.fini = _nouveau_dmaobj_fini,
-};
-
-static struct nouveau_oclass
-nv04_dmaobj_sclass[] = {
-	{ 0x0002, &nv04_dmaobj_ofuncs },
-	{ 0x0003, &nv04_dmaobj_ofuncs },
-	{ 0x003d, &nv04_dmaobj_ofuncs },
-	{}
-};
-
-static int
 nv04_dmaeng_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		 struct nouveau_oclass *oclass, void *data, u32 size,
 		 struct nouveau_object **pobject)
@@ -168,7 +126,7 @@
 	if (ret)
 		return ret;
 
-	priv->base.base.sclass = nv04_dmaobj_sclass;
+	nv_engine(priv)->sclass = nouveau_dmaobj_sclass;
 	priv->base.bind = nv04_dmaobj_bind;
 	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv50.c b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv50.c
index 045d256..750183f 100644
--- a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nv50.c

@@ -32,36 +32,74 @@
 	struct nouveau_dmaeng base;
 };
 
-struct nv50_dmaobj_priv {
-	struct nouveau_dmaobj base;
-};
-
 static int
 nv50_dmaobj_bind(struct nouveau_dmaeng *dmaeng,
 		 struct nouveau_object *parent,
 		 struct nouveau_dmaobj *dmaobj,
 		 struct nouveau_gpuobj **pgpuobj)
 {
-	u32 flags = nv_mclass(dmaobj);
+	u32 flags0 = nv_mclass(dmaobj);
+	u32 flags5 = 0x00000000;
 	int ret;
 
+	if (!nv_iclass(parent, NV_ENGCTX_CLASS)) {
+		switch (nv_mclass(parent->parent)) {
+		case NV50_CHANNEL_DMA_CLASS:
+		case NV84_CHANNEL_DMA_CLASS:
+		case NV50_CHANNEL_IND_CLASS:
+		case NV84_CHANNEL_IND_CLASS:
+		case NV50_DISP_MAST_CLASS:
+		case NV84_DISP_MAST_CLASS:
+		case NV94_DISP_MAST_CLASS:
+		case NVA0_DISP_MAST_CLASS:
+		case NVA3_DISP_MAST_CLASS:
+		case NV50_DISP_SYNC_CLASS:
+		case NV84_DISP_SYNC_CLASS:
+		case NV94_DISP_SYNC_CLASS:
+		case NVA0_DISP_SYNC_CLASS:
+		case NVA3_DISP_SYNC_CLASS:
+		case NV50_DISP_OVLY_CLASS:
+		case NV84_DISP_OVLY_CLASS:
+		case NV94_DISP_OVLY_CLASS:
+		case NVA0_DISP_OVLY_CLASS:
+		case NVA3_DISP_OVLY_CLASS:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (!(dmaobj->conf0 & NV50_DMA_CONF0_ENABLE)) {
+		if (dmaobj->target == NV_MEM_TARGET_VM) {
+			dmaobj->conf0  = NV50_DMA_CONF0_PRIV_VM;
+			dmaobj->conf0 |= NV50_DMA_CONF0_PART_VM;
+			dmaobj->conf0 |= NV50_DMA_CONF0_COMP_VM;
+			dmaobj->conf0 |= NV50_DMA_CONF0_TYPE_VM;
+		} else {
+			dmaobj->conf0  = NV50_DMA_CONF0_PRIV_US;
+			dmaobj->conf0 |= NV50_DMA_CONF0_PART_256;
+			dmaobj->conf0 |= NV50_DMA_CONF0_COMP_NONE;
+			dmaobj->conf0 |= NV50_DMA_CONF0_TYPE_LINEAR;
+		}
+	}
+
+	flags0 |= (dmaobj->conf0 & NV50_DMA_CONF0_COMP) << 22;
+	flags0 |= (dmaobj->conf0 & NV50_DMA_CONF0_TYPE) << 22;
+	flags0 |= (dmaobj->conf0 & NV50_DMA_CONF0_PRIV);
+	flags5 |= (dmaobj->conf0 & NV50_DMA_CONF0_PART);
+
 	switch (dmaobj->target) {
 	case NV_MEM_TARGET_VM:
-		flags |= 0x00000000;
-		flags |= 0x60000000; /* COMPRESSION_USEVM */
-		flags |= 0x1fc00000; /* STORAGE_TYPE_USEVM */
+		flags0 |= 0x00000000;
 		break;
 	case NV_MEM_TARGET_VRAM:
-		flags |= 0x00010000;
-		flags |= 0x00100000; /* ACCESSUS_USER_SYSTEM */
+		flags0 |= 0x00010000;
 		break;
 	case NV_MEM_TARGET_PCI:
-		flags |= 0x00020000;
-		flags |= 0x00100000; /* ACCESSUS_USER_SYSTEM */
+		flags0 |= 0x00020000;
 		break;
 	case NV_MEM_TARGET_PCI_NOSNOOP:
-		flags |= 0x00030000;
-		flags |= 0x00100000; /* ACCESSUS_USER_SYSTEM */
+		flags0 |= 0x00030000;
 		break;
 	default:
 		return -EINVAL;
@@ -71,79 +109,29 @@
 	case NV_MEM_ACCESS_VM:
 		break;
 	case NV_MEM_ACCESS_RO:
-		flags |= 0x00040000;
+		flags0 |= 0x00040000;
 		break;
 	case NV_MEM_ACCESS_WO:
 	case NV_MEM_ACCESS_RW:
-		flags |= 0x00080000;
+		flags0 |= 0x00080000;
 		break;
 	}
 
 	ret = nouveau_gpuobj_new(parent, parent, 24, 32, 0, pgpuobj);
 	if (ret == 0) {
-		nv_wo32(*pgpuobj, 0x00, flags);
+		nv_wo32(*pgpuobj, 0x00, flags0);
 		nv_wo32(*pgpuobj, 0x04, lower_32_bits(dmaobj->limit));
 		nv_wo32(*pgpuobj, 0x08, lower_32_bits(dmaobj->start));
 		nv_wo32(*pgpuobj, 0x0c, upper_32_bits(dmaobj->limit) << 24 |
 					upper_32_bits(dmaobj->start));
 		nv_wo32(*pgpuobj, 0x10, 0x00000000);
-		nv_wo32(*pgpuobj, 0x14, 0x00000000);
+		nv_wo32(*pgpuobj, 0x14, flags5);
 	}
 
 	return ret;
 }
 
 static int
-nv50_dmaobj_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-		 struct nouveau_oclass *oclass, void *data, u32 size,
-		 struct nouveau_object **pobject)
-{
-	struct nouveau_dmaeng *dmaeng = (void *)engine;
-	struct nv50_dmaobj_priv *dmaobj;
-	struct nouveau_gpuobj *gpuobj;
-	int ret;
-
-	ret = nouveau_dmaobj_create(parent, engine, oclass,
-				    data, size, &dmaobj);
-	*pobject = nv_object(dmaobj);
-	if (ret)
-		return ret;
-
-	switch (nv_mclass(parent)) {
-	case NV_DEVICE_CLASS:
-		break;
-	case NV50_CHANNEL_DMA_CLASS:
-	case NV84_CHANNEL_DMA_CLASS:
-	case NV50_CHANNEL_IND_CLASS:
-	case NV84_CHANNEL_IND_CLASS:
-		ret = dmaeng->bind(dmaeng, *pobject, &dmaobj->base, &gpuobj);
-		nouveau_object_ref(NULL, pobject);
-		*pobject = nv_object(gpuobj);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return ret;
-}
-
-static struct nouveau_ofuncs
-nv50_dmaobj_ofuncs = {
-	.ctor = nv50_dmaobj_ctor,
-	.dtor = _nouveau_dmaobj_dtor,
-	.init = _nouveau_dmaobj_init,
-	.fini = _nouveau_dmaobj_fini,
-};
-
-static struct nouveau_oclass
-nv50_dmaobj_sclass[] = {
-	{ 0x0002, &nv50_dmaobj_ofuncs },
-	{ 0x0003, &nv50_dmaobj_ofuncs },
-	{ 0x003d, &nv50_dmaobj_ofuncs },
-	{}
-};
-
-static int
 nv50_dmaeng_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		 struct nouveau_oclass *oclass, void *data, u32 size,
 		 struct nouveau_object **pobject)
@@ -156,7 +144,7 @@
 	if (ret)
 		return ret;
 
-	priv->base.base.sclass = nv50_dmaobj_sclass;
+	nv_engine(priv)->sclass = nouveau_dmaobj_sclass;
 	priv->base.bind = nv50_dmaobj_bind;
 	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvc0.c
index 5baa086..cd3970d 100644
--- a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvc0.c

@@ -22,7 +22,9 @@
  * Authors: Ben Skeggs
  */
 
+#include <core/device.h>
 #include <core/gpuobj.h>
+#include <core/class.h>
 
 #include <subdev/fb.h>
 #include <engine/dmaobj.h>
@@ -31,45 +33,86 @@
 	struct nouveau_dmaeng base;
 };
 
-struct nvc0_dmaobj_priv {
-	struct nouveau_dmaobj base;
-};
-
 static int
-nvc0_dmaobj_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-		 struct nouveau_oclass *oclass, void *data, u32 size,
-		 struct nouveau_object **pobject)
+nvc0_dmaobj_bind(struct nouveau_dmaeng *dmaeng,
+		 struct nouveau_object *parent,
+		 struct nouveau_dmaobj *dmaobj,
+		 struct nouveau_gpuobj **pgpuobj)
 {
-	struct nvc0_dmaobj_priv *dmaobj;
+	u32 flags0 = nv_mclass(dmaobj);
+	u32 flags5 = 0x00000000;
 	int ret;
 
-	ret = nouveau_dmaobj_create(parent, engine, oclass, data, size, &dmaobj);
-	*pobject = nv_object(dmaobj);
-	if (ret)
-		return ret;
+	if (!nv_iclass(parent, NV_ENGCTX_CLASS)) {
+		switch (nv_mclass(parent->parent)) {
+		case NVA3_DISP_MAST_CLASS:
+		case NVA3_DISP_SYNC_CLASS:
+		case NVA3_DISP_OVLY_CLASS:
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else
+		return 0;
 
-	if (dmaobj->base.target != NV_MEM_TARGET_VM || dmaobj->base.start)
+	if (!(dmaobj->conf0 & NVC0_DMA_CONF0_ENABLE)) {
+		if (dmaobj->target == NV_MEM_TARGET_VM) {
+			dmaobj->conf0  = NVC0_DMA_CONF0_PRIV_VM;
+			dmaobj->conf0 |= NVC0_DMA_CONF0_TYPE_VM;
+		} else {
+			dmaobj->conf0  = NVC0_DMA_CONF0_PRIV_US;
+			dmaobj->conf0 |= NVC0_DMA_CONF0_TYPE_LINEAR;
+			dmaobj->conf0 |= 0x00020000;
+		}
+	}
+
+	flags0 |= (dmaobj->conf0 & NVC0_DMA_CONF0_TYPE) << 22;
+	flags0 |= (dmaobj->conf0 & NVC0_DMA_CONF0_PRIV);
+	flags5 |= (dmaobj->conf0 & NVC0_DMA_CONF0_UNKN);
+
+	switch (dmaobj->target) {
+	case NV_MEM_TARGET_VM:
+		flags0 |= 0x00000000;
+		break;
+	case NV_MEM_TARGET_VRAM:
+		flags0 |= 0x00010000;
+		break;
+	case NV_MEM_TARGET_PCI:
+		flags0 |= 0x00020000;
+		break;
+	case NV_MEM_TARGET_PCI_NOSNOOP:
+		flags0 |= 0x00030000;
+		break;
+	default:
 		return -EINVAL;
+	}
 
-	return 0;
+	switch (dmaobj->access) {
+	case NV_MEM_ACCESS_VM:
+		break;
+	case NV_MEM_ACCESS_RO:
+		flags0 |= 0x00040000;
+		break;
+	case NV_MEM_ACCESS_WO:
+	case NV_MEM_ACCESS_RW:
+		flags0 |= 0x00080000;
+		break;
+	}
+
+	ret = nouveau_gpuobj_new(parent, parent, 24, 32, 0, pgpuobj);
+	if (ret == 0) {
+		nv_wo32(*pgpuobj, 0x00, flags0);
+		nv_wo32(*pgpuobj, 0x04, lower_32_bits(dmaobj->limit));
+		nv_wo32(*pgpuobj, 0x08, lower_32_bits(dmaobj->start));
+		nv_wo32(*pgpuobj, 0x0c, upper_32_bits(dmaobj->limit) << 24 |
+					upper_32_bits(dmaobj->start));
+		nv_wo32(*pgpuobj, 0x10, 0x00000000);
+		nv_wo32(*pgpuobj, 0x14, flags5);
+	}
+
+	return ret;
 }
 
-static struct nouveau_ofuncs
-nvc0_dmaobj_ofuncs = {
-	.ctor = nvc0_dmaobj_ctor,
-	.dtor = _nouveau_dmaobj_dtor,
-	.init = _nouveau_dmaobj_init,
-	.fini = _nouveau_dmaobj_fini,
-};
-
-static struct nouveau_oclass
-nvc0_dmaobj_sclass[] = {
-	{ 0x0002, &nvc0_dmaobj_ofuncs },
-	{ 0x0003, &nvc0_dmaobj_ofuncs },
-	{ 0x003d, &nvc0_dmaobj_ofuncs },
-	{}
-};
-
 static int
 nvc0_dmaeng_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		 struct nouveau_oclass *oclass, void *data, u32 size,
@@ -83,7 +126,8 @@
 	if (ret)
 		return ret;
 
-	priv->base.base.sclass = nvc0_dmaobj_sclass;
+	nv_engine(priv)->sclass = nouveau_dmaobj_sclass;
+	priv->base.bind = nvc0_dmaobj_bind;
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvd0.c b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvd0.c
new file mode 100644
index 0000000..d152875
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/dmaobj/nvd0.c

@@ -0,0 +1,122 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/device.h>
+#include <core/gpuobj.h>
+#include <core/class.h>
+
+#include <subdev/fb.h>
+#include <engine/dmaobj.h>
+
+struct nvd0_dmaeng_priv {
+	struct nouveau_dmaeng base;
+};
+
+static int
+nvd0_dmaobj_bind(struct nouveau_dmaeng *dmaeng,
+		 struct nouveau_object *parent,
+		 struct nouveau_dmaobj *dmaobj,
+		 struct nouveau_gpuobj **pgpuobj)
+{
+	u32 flags0 = 0x00000000;
+	int ret;
+
+	if (!nv_iclass(parent, NV_ENGCTX_CLASS)) {
+		switch (nv_mclass(parent->parent)) {
+		case NVD0_DISP_MAST_CLASS:
+		case NVD0_DISP_SYNC_CLASS:
+		case NVD0_DISP_OVLY_CLASS:
+		case NVE0_DISP_MAST_CLASS:
+		case NVE0_DISP_SYNC_CLASS:
+		case NVE0_DISP_OVLY_CLASS:
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else
+		return 0;
+
+	if (!(dmaobj->conf0 & NVD0_DMA_CONF0_ENABLE)) {
+		if (dmaobj->target == NV_MEM_TARGET_VM) {
+			dmaobj->conf0 |= NVD0_DMA_CONF0_TYPE_VM;
+			dmaobj->conf0 |= NVD0_DMA_CONF0_PAGE_LP;
+		} else {
+			dmaobj->conf0 |= NVD0_DMA_CONF0_TYPE_LINEAR;
+			dmaobj->conf0 |= NVD0_DMA_CONF0_PAGE_SP;
+		}
+	}
+
+	flags0 |= (dmaobj->conf0 & NVD0_DMA_CONF0_TYPE) << 20;
+	flags0 |= (dmaobj->conf0 & NVD0_DMA_CONF0_PAGE) >> 4;
+
+	switch (dmaobj->target) {
+	case NV_MEM_TARGET_VRAM:
+		flags0 |= 0x00000009;
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	ret = nouveau_gpuobj_new(parent, parent, 24, 32, 0, pgpuobj);
+	if (ret == 0) {
+		nv_wo32(*pgpuobj, 0x00, flags0);
+		nv_wo32(*pgpuobj, 0x04, dmaobj->start >> 8);
+		nv_wo32(*pgpuobj, 0x08, dmaobj->limit >> 8);
+		nv_wo32(*pgpuobj, 0x0c, 0x00000000);
+		nv_wo32(*pgpuobj, 0x10, 0x00000000);
+		nv_wo32(*pgpuobj, 0x14, 0x00000000);
+	}
+
+	return ret;
+}
+
+static int
+nvd0_dmaeng_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		 struct nouveau_oclass *oclass, void *data, u32 size,
+		 struct nouveau_object **pobject)
+{
+	struct nvd0_dmaeng_priv *priv;
+	int ret;
+
+	ret = nouveau_dmaeng_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_engine(priv)->sclass = nouveau_dmaobj_sclass;
+	priv->base.bind = nvd0_dmaobj_bind;
+	return 0;
+}
+
+struct nouveau_oclass
+nvd0_dmaeng_oclass = {
+	.handle = NV_ENGINE(DMAOBJ, 0xd0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvd0_dmaeng_ctor,
+		.dtor = _nouveau_dmaeng_dtor,
+		.init = _nouveau_dmaeng_init,
+		.fini = _nouveau_dmaeng_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/base.c b/drivers/gpu/drm/nouveau/core/engine/fifo/base.c
index bbb43c6..c2b9db3 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/base.c

@@ -24,6 +24,7 @@
 
 #include <core/object.h>
 #include <core/handle.h>
+#include <core/class.h>
 
 #include <engine/dmaobj.h>
 #include <engine/fifo.h>
@@ -33,7 +34,7 @@
 			     struct nouveau_object *engine,
 			     struct nouveau_oclass *oclass,
 			     int bar, u32 addr, u32 size, u32 pushbuf,
-			     u32 engmask, int len, void **ptr)
+			     u64 engmask, int len, void **ptr)
 {
 	struct nouveau_device *device = nv_device(engine);
 	struct nouveau_fifo *priv = (void *)engine;
@@ -56,18 +57,16 @@
 
 	dmaeng = (void *)chan->pushdma->base.engine;
 	switch (chan->pushdma->base.oclass->handle) {
-	case 0x0002:
-	case 0x003d:
+	case NV_DMA_FROM_MEMORY_CLASS:
+	case NV_DMA_IN_MEMORY_CLASS:
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (dmaeng->bind) {
-		ret = dmaeng->bind(dmaeng, parent, chan->pushdma, &chan->pushgpu);
-		if (ret)
-			return ret;
-	}
+	ret = dmaeng->bind(dmaeng, parent, chan->pushdma, &chan->pushgpu);
+	if (ret)
+		return ret;
 
 	/* find a free fifo channel */
 	spin_lock_irqsave(&priv->lock, flags);
@@ -119,14 +118,14 @@
 }
 
 u32
-_nouveau_fifo_channel_rd32(struct nouveau_object *object, u32 addr)
+_nouveau_fifo_channel_rd32(struct nouveau_object *object, u64 addr)
 {
 	struct nouveau_fifo_chan *chan = (void *)object;
 	return ioread32_native(chan->user + addr);
 }
 
 void
-_nouveau_fifo_channel_wr32(struct nouveau_object *object, u32 addr, u32 data)
+_nouveau_fifo_channel_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	struct nouveau_fifo_chan *chan = (void *)object;
 	iowrite32_native(data, chan->user + addr);

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c
index ea76e3e..a47a854 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv04.c

@@ -126,9 +126,9 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0x800000,
 					  0x10000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR), &chan);
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;
@@ -440,7 +440,7 @@
 			}
 
 			if (!nv04_fifo_swmthd(priv, chid, mthd, data)) {
-				nv_info(priv, "CACHE_ERROR - Ch %d/%d "
+				nv_error(priv, "CACHE_ERROR - Ch %d/%d "
 					      "Mthd 0x%04x Data 0x%08x\n",
 					chid, (mthd >> 13) & 7, mthd & 0x1ffc,
 					data);
@@ -476,7 +476,7 @@
 				u32 ib_get = nv_rd32(priv, 0x003334);
 				u32 ib_put = nv_rd32(priv, 0x003330);
 
-				nv_info(priv, "DMA_PUSHER - Ch %d Get 0x%02x%08x "
+				nv_error(priv, "DMA_PUSHER - Ch %d Get 0x%02x%08x "
 				     "Put 0x%02x%08x IbGet 0x%08x IbPut 0x%08x "
 				     "State 0x%08x (err: %s) Push 0x%08x\n",
 					chid, ho_get, dma_get, ho_put,
@@ -494,7 +494,7 @@
 					nv_wr32(priv, 0x003334, ib_put);
 				}
 			} else {
-				nv_info(priv, "DMA_PUSHER - Ch %d Get 0x%08x "
+				nv_error(priv, "DMA_PUSHER - Ch %d Get 0x%08x "
 					     "Put 0x%08x State 0x%08x (err: %s) Push 0x%08x\n",
 					chid, dma_get, dma_put, state,
 					nv_dma_state_err(state), push);
@@ -525,14 +525,13 @@
 
 		if (device->card_type == NV_50) {
 			if (status & 0x00000010) {
-				nv50_fb_trap(nouveau_fb(priv), 1);
 				status &= ~0x00000010;
 				nv_wr32(priv, 0x002100, 0x00000010);
 			}
 		}
 
 		if (status) {
-			nv_info(priv, "unknown intr 0x%08x, ch %d\n",
+			nv_warn(priv, "unknown intr 0x%08x, ch %d\n",
 				status, chid);
 			nv_wr32(priv, NV03_PFIFO_INTR_0, status);
 			status = 0;
@@ -542,7 +541,7 @@
 	}
 
 	if (status) {
-		nv_info(priv, "still angry after %d spins, halt\n", cnt);
+		nv_error(priv, "still angry after %d spins, halt\n", cnt);
 		nv_wr32(priv, 0x002140, 0);
 		nv_wr32(priv, 0x000140, 0);
 	}

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv10.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv10.c
index 4ba7542..2c927c1 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv10.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv10.c

@@ -69,9 +69,9 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0x800000,
 					  0x10000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR), &chan);
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv17.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv17.c
index b96e6b0..a9cb51d 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv17.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv17.c

@@ -74,10 +74,10 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0x800000,
 					  0x10000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR) |
-					  (1 << NVDEV_ENGINE_MPEG), /* NV31- */
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR) |
+					  (1ULL << NVDEV_ENGINE_MPEG), /* NV31- */
 					  &chan);
 	*pobject = nv_object(chan);
 	if (ret)

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
index 559c3b4..2b1f917 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c

@@ -192,10 +192,10 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0xc00000,
 					  0x1000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR) |
-					  (1 << NVDEV_ENGINE_MPEG), &chan);
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR) |
+					  (1ULL << NVDEV_ENGINE_MPEG), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv50.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv50.c
index 536e763..bd09636 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv50.c

@@ -112,14 +112,6 @@
 		return -EINVAL;
 	}
 
-	nv_wo32(base->eng, addr + 0x00, 0x00000000);
-	nv_wo32(base->eng, addr + 0x04, 0x00000000);
-	nv_wo32(base->eng, addr + 0x08, 0x00000000);
-	nv_wo32(base->eng, addr + 0x0c, 0x00000000);
-	nv_wo32(base->eng, addr + 0x10, 0x00000000);
-	nv_wo32(base->eng, addr + 0x14, 0x00000000);
-	bar->flush(bar);
-
 	/* HW bug workaround:
 	 *
 	 * PFIFO will hang forever if the connected engines don't report
@@ -141,8 +133,18 @@
 		if (suspend)
 			ret = -EBUSY;
 	}
-
 	nv_wr32(priv, 0x00b860, me);
+
+	if (ret == 0) {
+		nv_wo32(base->eng, addr + 0x00, 0x00000000);
+		nv_wo32(base->eng, addr + 0x04, 0x00000000);
+		nv_wo32(base->eng, addr + 0x08, 0x00000000);
+		nv_wo32(base->eng, addr + 0x0c, 0x00000000);
+		nv_wo32(base->eng, addr + 0x10, 0x00000000);
+		nv_wo32(base->eng, addr + 0x14, 0x00000000);
+		bar->flush(bar);
+	}
+
 	return ret;
 }
 
@@ -194,10 +196,10 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0xc00000,
 					  0x2000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR) |
-					  (1 << NVDEV_ENGINE_MPEG), &chan);
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR) |
+					  (1ULL << NVDEV_ENGINE_MPEG), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;
@@ -247,10 +249,10 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0xc00000,
 					  0x2000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR) |
-					  (1 << NVDEV_ENGINE_MPEG), &chan);
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR) |
+					  (1ULL << NVDEV_ENGINE_MPEG), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
index b4fd26d..1eb1c51 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c

@@ -95,14 +95,6 @@
 		return -EINVAL;
 	}
 
-	nv_wo32(base->eng, addr + 0x00, 0x00000000);
-	nv_wo32(base->eng, addr + 0x04, 0x00000000);
-	nv_wo32(base->eng, addr + 0x08, 0x00000000);
-	nv_wo32(base->eng, addr + 0x0c, 0x00000000);
-	nv_wo32(base->eng, addr + 0x10, 0x00000000);
-	nv_wo32(base->eng, addr + 0x14, 0x00000000);
-	bar->flush(bar);
-
 	save = nv_mask(priv, 0x002520, 0x0000003f, 1 << engn);
 	nv_wr32(priv, 0x0032fc, nv_gpuobj(base)->addr >> 12);
 	done = nv_wait_ne(priv, 0x0032fc, 0xffffffff, 0xffffffff);
@@ -112,6 +104,14 @@
 		if (suspend)
 			return -EBUSY;
 	}
+
+	nv_wo32(base->eng, addr + 0x00, 0x00000000);
+	nv_wo32(base->eng, addr + 0x04, 0x00000000);
+	nv_wo32(base->eng, addr + 0x08, 0x00000000);
+	nv_wo32(base->eng, addr + 0x0c, 0x00000000);
+	nv_wo32(base->eng, addr + 0x10, 0x00000000);
+	nv_wo32(base->eng, addr + 0x14, 0x00000000);
+	bar->flush(bar);
 	return 0;
 }
 
@@ -163,17 +163,17 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0xc00000,
 					  0x2000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR) |
-					  (1 << NVDEV_ENGINE_MPEG) |
-					  (1 << NVDEV_ENGINE_ME) |
-					  (1 << NVDEV_ENGINE_VP) |
-					  (1 << NVDEV_ENGINE_CRYPT) |
-					  (1 << NVDEV_ENGINE_BSP) |
-					  (1 << NVDEV_ENGINE_PPP) |
-					  (1 << NVDEV_ENGINE_COPY0) |
-					  (1 << NVDEV_ENGINE_UNK1C1), &chan);
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR) |
+					  (1ULL << NVDEV_ENGINE_MPEG) |
+					  (1ULL << NVDEV_ENGINE_ME) |
+					  (1ULL << NVDEV_ENGINE_VP) |
+					  (1ULL << NVDEV_ENGINE_CRYPT) |
+					  (1ULL << NVDEV_ENGINE_BSP) |
+					  (1ULL << NVDEV_ENGINE_PPP) |
+					  (1ULL << NVDEV_ENGINE_COPY0) |
+					  (1ULL << NVDEV_ENGINE_UNK1C1), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;
@@ -225,17 +225,17 @@
 
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0, 0xc00000,
 					  0x2000, args->pushbuf,
-					  (1 << NVDEV_ENGINE_DMAOBJ) |
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR) |
-					  (1 << NVDEV_ENGINE_MPEG) |
-					  (1 << NVDEV_ENGINE_ME) |
-					  (1 << NVDEV_ENGINE_VP) |
-					  (1 << NVDEV_ENGINE_CRYPT) |
-					  (1 << NVDEV_ENGINE_BSP) |
-					  (1 << NVDEV_ENGINE_PPP) |
-					  (1 << NVDEV_ENGINE_COPY0) |
-					  (1 << NVDEV_ENGINE_UNK1C1), &chan);
+					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR) |
+					  (1ULL << NVDEV_ENGINE_MPEG) |
+					  (1ULL << NVDEV_ENGINE_ME) |
+					  (1ULL << NVDEV_ENGINE_VP) |
+					  (1ULL << NVDEV_ENGINE_CRYPT) |
+					  (1ULL << NVDEV_ENGINE_BSP) |
+					  (1ULL << NVDEV_ENGINE_PPP) |
+					  (1ULL << NVDEV_ENGINE_COPY0) |
+					  (1ULL << NVDEV_ENGINE_UNK1C1), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c
index 6f21be6..b4365dd 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nvc0.c

@@ -103,6 +103,9 @@
 	case NVDEV_ENGINE_GR   : addr = 0x0210; break;
 	case NVDEV_ENGINE_COPY0: addr = 0x0230; break;
 	case NVDEV_ENGINE_COPY1: addr = 0x0240; break;
+	case NVDEV_ENGINE_BSP  : addr = 0x0270; break;
+	case NVDEV_ENGINE_VP   : addr = 0x0250; break;
+	case NVDEV_ENGINE_PPP  : addr = 0x0260; break;
 	default:
 		return -EINVAL;
 	}
@@ -137,14 +140,13 @@
 	case NVDEV_ENGINE_GR   : addr = 0x0210; break;
 	case NVDEV_ENGINE_COPY0: addr = 0x0230; break;
 	case NVDEV_ENGINE_COPY1: addr = 0x0240; break;
+	case NVDEV_ENGINE_BSP  : addr = 0x0270; break;
+	case NVDEV_ENGINE_VP   : addr = 0x0250; break;
+	case NVDEV_ENGINE_PPP  : addr = 0x0260; break;
 	default:
 		return -EINVAL;
 	}
 
-	nv_wo32(base, addr + 0x00, 0x00000000);
-	nv_wo32(base, addr + 0x04, 0x00000000);
-	bar->flush(bar);
-
 	nv_wr32(priv, 0x002634, chan->base.chid);
 	if (!nv_wait(priv, 0x002634, 0xffffffff, chan->base.chid)) {
 		nv_error(priv, "channel %d kick timeout\n", chan->base.chid);
@@ -152,6 +154,9 @@
 			return -EBUSY;
 	}
 
+	nv_wo32(base, addr + 0x00, 0x00000000);
+	nv_wo32(base, addr + 0x04, 0x00000000);
+	bar->flush(bar);
 	return 0;
 }
 
@@ -175,10 +180,13 @@
 	ret = nouveau_fifo_channel_create(parent, engine, oclass, 1,
 					  priv->user.bar.offset, 0x1000,
 					  args->pushbuf,
-					  (1 << NVDEV_ENGINE_SW) |
-					  (1 << NVDEV_ENGINE_GR) |
-					  (1 << NVDEV_ENGINE_COPY0) |
-					  (1 << NVDEV_ENGINE_COPY1), &chan);
+					  (1ULL << NVDEV_ENGINE_SW) |
+					  (1ULL << NVDEV_ENGINE_GR) |
+					  (1ULL << NVDEV_ENGINE_COPY0) |
+					  (1ULL << NVDEV_ENGINE_COPY1) |
+					  (1ULL << NVDEV_ENGINE_BSP) |
+					  (1ULL << NVDEV_ENGINE_VP) |
+					  (1ULL << NVDEV_ENGINE_PPP), &chan);
 	*pobject = nv_object(chan);
 	if (ret)
 		return ret;
@@ -494,7 +502,7 @@
 	u32 stat = nv_rd32(priv, 0x002100) & mask;
 
 	if (stat & 0x00000100) {
-		nv_info(priv, "unknown status 0x00000100\n");
+		nv_warn(priv, "unknown status 0x00000100\n");
 		nv_wr32(priv, 0x002100, 0x00000100);
 		stat &= ~0x00000100;
 	}

diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c
index 36e81b6..c930da9 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c

@@ -38,12 +38,12 @@
 #include <engine/dmaobj.h>
 #include <engine/fifo.h>
 
-#define _(a,b) { (a), ((1 << (a)) | (b)) }
+#define _(a,b) { (a), ((1ULL << (a)) | (b)) }
 static const struct {
-	int subdev;
-	u32 mask;
+	u64 subdev;
+	u64 mask;
 } fifo_engine[] = {
-	_(NVDEV_ENGINE_GR      , (1 << NVDEV_ENGINE_SW)),
+	_(NVDEV_ENGINE_GR      , (1ULL << NVDEV_ENGINE_SW)),
 	_(NVDEV_ENGINE_VP      , 0),
 	_(NVDEV_ENGINE_PPP     , 0),
 	_(NVDEV_ENGINE_BSP     , 0),
@@ -138,6 +138,9 @@
 	case NVDEV_ENGINE_GR   :
 	case NVDEV_ENGINE_COPY0:
 	case NVDEV_ENGINE_COPY1: addr = 0x0210; break;
+	case NVDEV_ENGINE_BSP  : addr = 0x0270; break;
+	case NVDEV_ENGINE_VP   : addr = 0x0250; break;
+	case NVDEV_ENGINE_PPP  : addr = 0x0260; break;
 	default:
 		return -EINVAL;
 	}
@@ -172,14 +175,13 @@
 	case NVDEV_ENGINE_GR   :
 	case NVDEV_ENGINE_COPY0:
 	case NVDEV_ENGINE_COPY1: addr = 0x0210; break;
+	case NVDEV_ENGINE_BSP  : addr = 0x0270; break;
+	case NVDEV_ENGINE_VP   : addr = 0x0250; break;
+	case NVDEV_ENGINE_PPP  : addr = 0x0260; break;
 	default:
 		return -EINVAL;
 	}
 
-	nv_wo32(base, addr + 0x00, 0x00000000);
-	nv_wo32(base, addr + 0x04, 0x00000000);
-	bar->flush(bar);
-
 	nv_wr32(priv, 0x002634, chan->base.chid);
 	if (!nv_wait(priv, 0x002634, 0xffffffff, chan->base.chid)) {
 		nv_error(priv, "channel %d kick timeout\n", chan->base.chid);
@@ -187,6 +189,9 @@
 			return -EBUSY;
 	}
 
+	nv_wo32(base, addr + 0x00, 0x00000000);
+	nv_wo32(base, addr + 0x04, 0x00000000);
+	bar->flush(bar);
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv04.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv04.c
index 6185282..e30a9c5 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv04.c

@@ -787,168 +787,168 @@
 
 static struct nouveau_omthds
 nv03_graph_gdi_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_patt },
-	{ 0x0188, nv04_graph_mthd_bind_rop },
-	{ 0x018c, nv04_graph_mthd_bind_beta1 },
-	{ 0x0190, nv04_graph_mthd_bind_surf_dst },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_patt },
+	{ 0x0188, 0x0188, nv04_graph_mthd_bind_rop },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_beta1 },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_surf_dst },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_gdi_omthds[] = {
-	{ 0x0188, nv04_graph_mthd_bind_patt },
-	{ 0x018c, nv04_graph_mthd_bind_rop },
-	{ 0x0190, nv04_graph_mthd_bind_beta1 },
-	{ 0x0194, nv04_graph_mthd_bind_beta4 },
-	{ 0x0198, nv04_graph_mthd_bind_surf2d },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0188, 0x0188, nv04_graph_mthd_bind_patt },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_rop },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_beta1 },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta4 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_surf2d },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv01_graph_blit_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_chroma },
-	{ 0x0188, nv01_graph_mthd_bind_clip },
-	{ 0x018c, nv01_graph_mthd_bind_patt },
-	{ 0x0190, nv04_graph_mthd_bind_rop },
-	{ 0x0194, nv04_graph_mthd_bind_beta1 },
-	{ 0x0198, nv04_graph_mthd_bind_surf_dst },
-	{ 0x019c, nv04_graph_mthd_bind_surf_src },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_chroma },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_clip },
+	{ 0x018c, 0x018c, nv01_graph_mthd_bind_patt },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_rop },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta1 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_surf_dst },
+	{ 0x019c, 0x019c, nv04_graph_mthd_bind_surf_src },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_blit_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_chroma },
-	{ 0x0188, nv01_graph_mthd_bind_clip },
-	{ 0x018c, nv04_graph_mthd_bind_patt },
-	{ 0x0190, nv04_graph_mthd_bind_rop },
-	{ 0x0194, nv04_graph_mthd_bind_beta1 },
-	{ 0x0198, nv04_graph_mthd_bind_beta4 },
-	{ 0x019c, nv04_graph_mthd_bind_surf2d },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_chroma },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_clip },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_patt },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_rop },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta1 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_beta4 },
+	{ 0x019c, 0x019c, nv04_graph_mthd_bind_surf2d },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_iifc_omthds[] = {
-	{ 0x0188, nv01_graph_mthd_bind_chroma },
-	{ 0x018c, nv01_graph_mthd_bind_clip },
-	{ 0x0190, nv04_graph_mthd_bind_patt },
-	{ 0x0194, nv04_graph_mthd_bind_rop },
-	{ 0x0198, nv04_graph_mthd_bind_beta1 },
-	{ 0x019c, nv04_graph_mthd_bind_beta4 },
-	{ 0x01a0, nv04_graph_mthd_bind_surf2d_swzsurf },
-	{ 0x03e4, nv04_graph_mthd_set_operation },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_chroma },
+	{ 0x018c, 0x018c, nv01_graph_mthd_bind_clip },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_patt },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_rop },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_beta1 },
+	{ 0x019c, 0x019c, nv04_graph_mthd_bind_beta4 },
+	{ 0x01a0, 0x01a0, nv04_graph_mthd_bind_surf2d_swzsurf },
+	{ 0x03e4, 0x03e4, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv01_graph_ifc_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_chroma },
-	{ 0x0188, nv01_graph_mthd_bind_clip },
-	{ 0x018c, nv01_graph_mthd_bind_patt },
-	{ 0x0190, nv04_graph_mthd_bind_rop },
-	{ 0x0194, nv04_graph_mthd_bind_beta1 },
-	{ 0x0198, nv04_graph_mthd_bind_surf_dst },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_chroma },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_clip },
+	{ 0x018c, 0x018c, nv01_graph_mthd_bind_patt },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_rop },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta1 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_surf_dst },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_ifc_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_chroma },
-	{ 0x0188, nv01_graph_mthd_bind_clip },
-	{ 0x018c, nv04_graph_mthd_bind_patt },
-	{ 0x0190, nv04_graph_mthd_bind_rop },
-	{ 0x0194, nv04_graph_mthd_bind_beta1 },
-	{ 0x0198, nv04_graph_mthd_bind_beta4 },
-	{ 0x019c, nv04_graph_mthd_bind_surf2d },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_chroma },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_clip },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_patt },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_rop },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta1 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_beta4 },
+	{ 0x019c, 0x019c, nv04_graph_mthd_bind_surf2d },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv03_graph_sifc_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_chroma },
-	{ 0x0188, nv01_graph_mthd_bind_patt },
-	{ 0x018c, nv04_graph_mthd_bind_rop },
-	{ 0x0190, nv04_graph_mthd_bind_beta1 },
-	{ 0x0194, nv04_graph_mthd_bind_surf_dst },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_chroma },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_patt },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_rop },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_beta1 },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_surf_dst },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_sifc_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_chroma },
-	{ 0x0188, nv04_graph_mthd_bind_patt },
-	{ 0x018c, nv04_graph_mthd_bind_rop },
-	{ 0x0190, nv04_graph_mthd_bind_beta1 },
-	{ 0x0194, nv04_graph_mthd_bind_beta4 },
-	{ 0x0198, nv04_graph_mthd_bind_surf2d },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_chroma },
+	{ 0x0188, 0x0188, nv04_graph_mthd_bind_patt },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_rop },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_beta1 },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta4 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_surf2d },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv03_graph_sifm_omthds[] = {
-	{ 0x0188, nv01_graph_mthd_bind_patt },
-	{ 0x018c, nv04_graph_mthd_bind_rop },
-	{ 0x0190, nv04_graph_mthd_bind_beta1 },
-	{ 0x0194, nv04_graph_mthd_bind_surf_dst },
-	{ 0x0304, nv04_graph_mthd_set_operation },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_patt },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_rop },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_beta1 },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_surf_dst },
+	{ 0x0304, 0x0304, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_sifm_omthds[] = {
-	{ 0x0188, nv04_graph_mthd_bind_patt },
-	{ 0x018c, nv04_graph_mthd_bind_rop },
-	{ 0x0190, nv04_graph_mthd_bind_beta1 },
-	{ 0x0194, nv04_graph_mthd_bind_beta4 },
-	{ 0x0198, nv04_graph_mthd_bind_surf2d },
-	{ 0x0304, nv04_graph_mthd_set_operation },
+	{ 0x0188, 0x0188, nv04_graph_mthd_bind_patt },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_rop },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_beta1 },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta4 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_surf2d },
+	{ 0x0304, 0x0304, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_surf3d_omthds[] = {
-	{ 0x02f8, nv04_graph_mthd_surf3d_clip_h },
-	{ 0x02fc, nv04_graph_mthd_surf3d_clip_v },
+	{ 0x02f8, 0x02f8, nv04_graph_mthd_surf3d_clip_h },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_surf3d_clip_v },
 	{}
 };
 
 static struct nouveau_omthds
 nv03_graph_ttri_omthds[] = {
-	{ 0x0188, nv01_graph_mthd_bind_clip },
-	{ 0x018c, nv04_graph_mthd_bind_surf_color },
-	{ 0x0190, nv04_graph_mthd_bind_surf_zeta },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_clip },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_surf_color },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_surf_zeta },
 	{}
 };
 
 static struct nouveau_omthds
 nv01_graph_prim_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_clip },
-	{ 0x0188, nv01_graph_mthd_bind_patt },
-	{ 0x018c, nv04_graph_mthd_bind_rop },
-	{ 0x0190, nv04_graph_mthd_bind_beta1 },
-	{ 0x0194, nv04_graph_mthd_bind_surf_dst },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_clip },
+	{ 0x0188, 0x0188, nv01_graph_mthd_bind_patt },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_rop },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_beta1 },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_surf_dst },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 
 static struct nouveau_omthds
 nv04_graph_prim_omthds[] = {
-	{ 0x0184, nv01_graph_mthd_bind_clip },
-	{ 0x0188, nv04_graph_mthd_bind_patt },
-	{ 0x018c, nv04_graph_mthd_bind_rop },
-	{ 0x0190, nv04_graph_mthd_bind_beta1 },
-	{ 0x0194, nv04_graph_mthd_bind_beta4 },
-	{ 0x0198, nv04_graph_mthd_bind_surf2d },
-	{ 0x02fc, nv04_graph_mthd_set_operation },
+	{ 0x0184, 0x0184, nv01_graph_mthd_bind_clip },
+	{ 0x0188, 0x0188, nv04_graph_mthd_bind_patt },
+	{ 0x018c, 0x018c, nv04_graph_mthd_bind_rop },
+	{ 0x0190, 0x0190, nv04_graph_mthd_bind_beta1 },
+	{ 0x0194, 0x0194, nv04_graph_mthd_bind_beta4 },
+	{ 0x0198, 0x0198, nv04_graph_mthd_bind_surf2d },
+	{ 0x02fc, 0x02fc, nv04_graph_mthd_set_operation },
 	{}
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv10.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv10.c
index 92521c8..5c0f843 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv10.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv10.c

@@ -570,11 +570,11 @@
 
 static struct nouveau_omthds
 nv17_celcius_omthds[] = {
-	{ 0x1638, nv17_graph_mthd_lma_window },
-	{ 0x163c, nv17_graph_mthd_lma_window },
-	{ 0x1640, nv17_graph_mthd_lma_window },
-	{ 0x1644, nv17_graph_mthd_lma_window },
-	{ 0x1658, nv17_graph_mthd_lma_enable },
+	{ 0x1638, 0x1638, nv17_graph_mthd_lma_window },
+	{ 0x163c, 0x163c, nv17_graph_mthd_lma_window },
+	{ 0x1640, 0x1640, nv17_graph_mthd_lma_window },
+	{ 0x1644, 0x1644, nv17_graph_mthd_lma_window },
+	{ 0x1658, 0x1658, nv17_graph_mthd_lma_enable },
 	{}
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv20.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv20.c
index 8f3f619..5b20401 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv20.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv20.c

@@ -183,7 +183,7 @@
 	nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
 	nv_wr32(priv, NV10_PGRAPH_RDI_DATA, tile->addr);
 
-	if (nv_device(engine)->card_type == NV_20) {
+	if (nv_device(engine)->chipset != 0x34) {
 		nv_wr32(priv, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
 		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i);
 		nv_wr32(priv, NV10_PGRAPH_RDI_DATA, tile->zcomp);
@@ -224,14 +224,14 @@
 	nv_wr32(priv, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
-		nv_info(priv, "");
+		nv_error(priv, "");
 		nouveau_bitfield_print(nv10_graph_intr_name, show);
 		printk(" nsource:");
 		nouveau_bitfield_print(nv04_graph_nsource, nsource);
 		printk(" nstatus:");
 		nouveau_bitfield_print(nv10_graph_nstatus, nstatus);
 		printk("\n");
-		nv_info(priv, "ch %d/%d class 0x%04x mthd 0x%04x data 0x%08x\n",
+		nv_error(priv, "ch %d/%d class 0x%04x mthd 0x%04x data 0x%08x\n",
 			chid, subc, class, mthd, data);
 	}
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c
index cc6574e..0b36dd3 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c

@@ -216,10 +216,10 @@
 
 	switch (nv_device(priv)->chipset) {
 	case 0x40:
-	case 0x41: /* guess */
+	case 0x41:
 	case 0x42:
 	case 0x43:
-	case 0x45: /* guess */
+	case 0x45:
 	case 0x4e:
 		nv_wr32(priv, NV20_PGRAPH_TSIZE(i), tile->pitch);
 		nv_wr32(priv, NV20_PGRAPH_TLIMIT(i), tile->limit);
@@ -227,6 +227,21 @@
 		nv_wr32(priv, NV40_PGRAPH_TSIZE1(i), tile->pitch);
 		nv_wr32(priv, NV40_PGRAPH_TLIMIT1(i), tile->limit);
 		nv_wr32(priv, NV40_PGRAPH_TILE1(i), tile->addr);
+		switch (nv_device(priv)->chipset) {
+		case 0x40:
+		case 0x45:
+			nv_wr32(priv, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
+			nv_wr32(priv, NV40_PGRAPH_ZCOMP1(i), tile->zcomp);
+			break;
+		case 0x41:
+		case 0x42:
+		case 0x43:
+			nv_wr32(priv, NV41_PGRAPH_ZCOMP0(i), tile->zcomp);
+			nv_wr32(priv, NV41_PGRAPH_ZCOMP1(i), tile->zcomp);
+			break;
+		default:
+			break;
+		}
 		break;
 	case 0x44:
 	case 0x4a:
@@ -235,18 +250,31 @@
 		nv_wr32(priv, NV20_PGRAPH_TILE(i), tile->addr);
 		break;
 	case 0x46:
+	case 0x4c:
 	case 0x47:
 	case 0x49:
 	case 0x4b:
-	case 0x4c:
+	case 0x63:
 	case 0x67:
-	default:
+	case 0x68:
 		nv_wr32(priv, NV47_PGRAPH_TSIZE(i), tile->pitch);
 		nv_wr32(priv, NV47_PGRAPH_TLIMIT(i), tile->limit);
 		nv_wr32(priv, NV47_PGRAPH_TILE(i), tile->addr);
 		nv_wr32(priv, NV40_PGRAPH_TSIZE1(i), tile->pitch);
 		nv_wr32(priv, NV40_PGRAPH_TLIMIT1(i), tile->limit);
 		nv_wr32(priv, NV40_PGRAPH_TILE1(i), tile->addr);
+		switch (nv_device(priv)->chipset) {
+		case 0x47:
+		case 0x49:
+		case 0x4b:
+			nv_wr32(priv, NV47_PGRAPH_ZCOMP0(i), tile->zcomp);
+			nv_wr32(priv, NV47_PGRAPH_ZCOMP1(i), tile->zcomp);
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
 		break;
 	}
 
@@ -293,7 +321,7 @@
 	nv_wr32(priv, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
-		nv_info(priv, "");
+		nv_error(priv, "");
 		nouveau_bitfield_print(nv10_graph_intr_name, show);
 		printk(" nsource:");
 		nouveau_bitfield_print(nv04_graph_nsource, nsource);

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
index ab3b9dc..b1c3d83 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c

@@ -184,6 +184,65 @@
 	return 0;
 }
 
+static const struct nouveau_bitfield nv50_pgraph_status[] = {
+	{ 0x00000001, "BUSY" }, /* set when any bit is set */
+	{ 0x00000002, "DISPATCH" },
+	{ 0x00000004, "UNK2" },
+	{ 0x00000008, "UNK3" },
+	{ 0x00000010, "UNK4" },
+	{ 0x00000020, "UNK5" },
+	{ 0x00000040, "M2MF" },
+	{ 0x00000080, "UNK7" },
+	{ 0x00000100, "CTXPROG" },
+	{ 0x00000200, "VFETCH" },
+	{ 0x00000400, "CCACHE_UNK4" },
+	{ 0x00000800, "STRMOUT_GSCHED_UNK5" },
+	{ 0x00001000, "UNK14XX" },
+	{ 0x00002000, "UNK24XX_CSCHED" },
+	{ 0x00004000, "UNK1CXX" },
+	{ 0x00008000, "CLIPID" },
+	{ 0x00010000, "ZCULL" },
+	{ 0x00020000, "ENG2D" },
+	{ 0x00040000, "UNK34XX" },
+	{ 0x00080000, "TPRAST" },
+	{ 0x00100000, "TPROP" },
+	{ 0x00200000, "TEX" },
+	{ 0x00400000, "TPVP" },
+	{ 0x00800000, "MP" },
+	{ 0x01000000, "ROP" },
+	{}
+};
+
+static const char *const nv50_pgraph_vstatus_0[] = {
+	"VFETCH", "CCACHE", "UNK4", "UNK5", "GSCHED", "STRMOUT", "UNK14XX", NULL
+};
+
+static const char *const nv50_pgraph_vstatus_1[] = {
+	"TPRAST", "TPROP", "TEXTURE", "TPVP", "MP", NULL
+};
+
+static const char *const nv50_pgraph_vstatus_2[] = {
+	"UNK24XX", "CSCHED", "UNK1CXX", "CLIPID", "ZCULL", "ENG2D", "UNK34XX",
+	"ROP", NULL
+};
+
+static void nouveau_pgraph_vstatus_print(struct nv50_graph_priv *priv, int r,
+		const char *const units[], u32 status)
+{
+	int i;
+
+	nv_error(priv, "PGRAPH_VSTATUS%d: 0x%08x", r, status);
+
+	for (i = 0; units[i] && status; i++) {
+		if ((status & 7) == 1)
+			pr_cont(" %s", units[i]);
+		status >>= 3;
+	}
+	if (status)
+		pr_cont(" (invalid: 0x%x)", status);
+	pr_cont("\n");
+}
+
 static int
 nv84_graph_tlb_flush(struct nouveau_engine *engine)
 {
@@ -219,10 +278,19 @@
 		 !(timeout = ptimer->read(ptimer) - start > 2000000000));
 
 	if (timeout) {
-		nv_error(priv, "PGRAPH TLB flush idle timeout fail: "
-			      "0x%08x 0x%08x 0x%08x 0x%08x\n",
-			 nv_rd32(priv, 0x400700), nv_rd32(priv, 0x400380),
-			 nv_rd32(priv, 0x400384), nv_rd32(priv, 0x400388));
+		nv_error(priv, "PGRAPH TLB flush idle timeout fail\n");
+
+		tmp = nv_rd32(priv, 0x400700);
+		nv_error(priv, "PGRAPH_STATUS  : 0x%08x", tmp);
+		nouveau_bitfield_print(nv50_pgraph_status, tmp);
+		pr_cont("\n");
+
+		nouveau_pgraph_vstatus_print(priv, 0, nv50_pgraph_vstatus_0,
+				nv_rd32(priv, 0x400380));
+		nouveau_pgraph_vstatus_print(priv, 1, nv50_pgraph_vstatus_1,
+				nv_rd32(priv, 0x400384));
+		nouveau_pgraph_vstatus_print(priv, 2, nv50_pgraph_vstatus_2,
+				nv_rd32(priv, 0x400388));
 	}
 
 	nv50_vm_flush_engine(&engine->base, 0x00);
@@ -453,13 +521,13 @@
 		}
 		if (ustatus) {
 			if (display)
-				nv_info(priv, "%s - TP%d: Unhandled ustatus 0x%08x\n", name, i, ustatus);
+				nv_error(priv, "%s - TP%d: Unhandled ustatus 0x%08x\n", name, i, ustatus);
 		}
 		nv_wr32(priv, ustatus_addr, 0xc0000000);
 	}
 
 	if (!tps && display)
-		nv_info(priv, "%s - No TPs claiming errors?\n", name);
+		nv_warn(priv, "%s - No TPs claiming errors?\n", name);
 }
 
 static int
@@ -718,13 +786,12 @@
 	nv_wr32(priv, 0x400500, 0x00010001);
 
 	if (show) {
-		nv_info(priv, "");
+		nv_error(priv, "");
 		nouveau_bitfield_print(nv50_graph_intr_name, show);
 		printk("\n");
 		nv_error(priv, "ch %d [0x%010llx] subc %d class 0x%04x "
 			       "mthd 0x%04x data 0x%08x\n",
 			 chid, (u64)inst << 12, subc, class, mthd, data);
-		nv50_fb_trap(nouveau_fb(priv), 1);
 	}
 
 	if (nv_rd32(priv, 0x400824) & (1 << 31))

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
index c62f2d0..47a0208 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c

@@ -814,7 +814,7 @@
 		nv_wr32(priv, 0x41a100, 0x00000002);
 		nv_wr32(priv, 0x409100, 0x00000002);
 		if (!nv_wait(priv, 0x409800, 0x00000001, 0x00000001))
-			nv_info(priv, "0x409800 wait failed\n");
+			nv_warn(priv, "0x409800 wait failed\n");
 
 		nv_wr32(priv, 0x409840, 0xffffffff);
 		nv_wr32(priv, 0x409500, 0x7fffffff);

diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/regs.h b/drivers/gpu/drm/nouveau/core/engine/graph/regs.h
index 9c715a2..fde8e24 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/regs.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/regs.h

@@ -205,6 +205,7 @@
 #define NV20_PGRAPH_TSIZE(i)                               (0x00400908 + (i*16))
 #define NV20_PGRAPH_TSTATUS(i)                             (0x0040090C + (i*16))
 #define NV20_PGRAPH_ZCOMP(i)                               (0x00400980 + 4*(i))
+#define NV41_PGRAPH_ZCOMP0(i)                              (0x004009c0 + 4*(i))
 #define NV10_PGRAPH_TILE(i)                                (0x00400B00 + (i*16))
 #define NV10_PGRAPH_TLIMIT(i)                              (0x00400B04 + (i*16))
 #define NV10_PGRAPH_TSIZE(i)                               (0x00400B08 + (i*16))
@@ -216,6 +217,7 @@
 #define NV47_PGRAPH_TSTATUS(i)                             (0x00400D0C + (i*16))
 #define NV04_PGRAPH_V_RAM                                  0x00400D40
 #define NV04_PGRAPH_W_RAM                                  0x00400D80
+#define NV47_PGRAPH_ZCOMP0(i)                              (0x00400e00 + 4*(i))
 #define NV10_PGRAPH_COMBINER0_IN_ALPHA                     0x00400E40
 #define NV10_PGRAPH_COMBINER1_IN_ALPHA                     0x00400E44
 #define NV10_PGRAPH_COMBINER0_IN_RGB                       0x00400E48
@@ -261,9 +263,12 @@
 #define NV04_PGRAPH_DMA_B_OFFSET                           0x00401098
 #define NV04_PGRAPH_DMA_B_SIZE                             0x0040109C
 #define NV04_PGRAPH_DMA_B_Y_SIZE                           0x004010A0
+#define NV47_PGRAPH_ZCOMP1(i)                              (0x004068c0 + 4*(i))
 #define NV40_PGRAPH_TILE1(i)                               (0x00406900 + (i*16))
 #define NV40_PGRAPH_TLIMIT1(i)                             (0x00406904 + (i*16))
 #define NV40_PGRAPH_TSIZE1(i)                              (0x00406908 + (i*16))
 #define NV40_PGRAPH_TSTATUS1(i)                            (0x0040690C + (i*16))
+#define NV40_PGRAPH_ZCOMP1(i)                              (0x00406980 + 4*(i))
+#define NV41_PGRAPH_ZCOMP1(i)                              (0x004069c0 + 4*(i))
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c
index 1f394a2..9fd8637 100644
--- a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c

@@ -121,9 +121,9 @@
 
 static struct nouveau_omthds
 nv31_mpeg_omthds[] = {
-	{ 0x0190, nv31_mpeg_mthd_dma },
-	{ 0x01a0, nv31_mpeg_mthd_dma },
-	{ 0x01b0, nv31_mpeg_mthd_dma },
+	{ 0x0190, 0x0190, nv31_mpeg_mthd_dma },
+	{ 0x01a0, 0x01a0, nv31_mpeg_mthd_dma },
+	{ 0x01b0, 0x01b0, nv31_mpeg_mthd_dma },
 	{}
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv50.c b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv50.c
index 8678a99..bc7d12b3 100644
--- a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv50.c

@@ -157,7 +157,6 @@
 
 	nv_wr32(priv, 0x00b100, stat);
 	nv_wr32(priv, 0x00b230, 0x00000001);
-	nv50_fb_trap(nouveau_fb(priv), 1);
 }
 
 static void

diff --git a/drivers/gpu/drm/nouveau/core/engine/ppp/nv98.c b/drivers/gpu/drm/nouveau/core/engine/ppp/nv98.c
index 50e7e0d..5a5b2a7 100644
--- a/drivers/gpu/drm/nouveau/core/engine/ppp/nv98.c
+++ b/drivers/gpu/drm/nouveau/core/engine/ppp/nv98.c

@@ -22,18 +22,18 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/os.h>
-#include <core/class.h>
+#include <core/engine.h>
 #include <core/engctx.h>
+#include <core/class.h>
 
 #include <engine/ppp.h>
 
 struct nv98_ppp_priv {
-	struct nouveau_ppp base;
+	struct nouveau_engine base;
 };
 
 struct nv98_ppp_chan {
-	struct nouveau_ppp_chan base;
+	struct nouveau_engctx base;
 };
 
 /*******************************************************************************
@@ -49,61 +49,16 @@
  * PPPP context
  ******************************************************************************/
 
-static int
-nv98_ppp_context_ctor(struct nouveau_object *parent,
-		      struct nouveau_object *engine,
-		      struct nouveau_oclass *oclass, void *data, u32 size,
-		      struct nouveau_object **pobject)
-{
-	struct nv98_ppp_chan *priv;
-	int ret;
-
-	ret = nouveau_ppp_context_create(parent, engine, oclass, NULL,
-					 0, 0, 0, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static void
-nv98_ppp_context_dtor(struct nouveau_object *object)
-{
-	struct nv98_ppp_chan *priv = (void *)object;
-	nouveau_ppp_context_destroy(&priv->base);
-}
-
-static int
-nv98_ppp_context_init(struct nouveau_object *object)
-{
-	struct nv98_ppp_chan *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_ppp_context_init(&priv->base);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int
-nv98_ppp_context_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv98_ppp_chan *priv = (void *)object;
-	return nouveau_ppp_context_fini(&priv->base, suspend);
-}
-
 static struct nouveau_oclass
 nv98_ppp_cclass = {
 	.handle = NV_ENGCTX(PPP, 0x98),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv98_ppp_context_ctor,
-		.dtor = nv98_ppp_context_dtor,
-		.init = nv98_ppp_context_init,
-		.fini = nv98_ppp_context_fini,
-		.rd32 = _nouveau_ppp_context_rd32,
-		.wr32 = _nouveau_ppp_context_wr32,
+		.ctor = _nouveau_engctx_ctor,
+		.dtor = _nouveau_engctx_dtor,
+		.init = _nouveau_engctx_init,
+		.fini = _nouveau_engctx_fini,
+		.rd32 = _nouveau_engctx_rd32,
+		.wr32 = _nouveau_engctx_wr32,
 	},
 };
 
@@ -111,11 +66,6 @@
  * PPPP engine/subdev functions
  ******************************************************************************/
 
-static void
-nv98_ppp_intr(struct nouveau_subdev *subdev)
-{
-}
-
 static int
 nv98_ppp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	      struct nouveau_oclass *oclass, void *data, u32 size,
@@ -124,52 +74,25 @@
 	struct nv98_ppp_priv *priv;
 	int ret;
 
-	ret = nouveau_ppp_create(parent, engine, oclass, &priv);
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PPPP", "ppp", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00400002;
-	nv_subdev(priv)->intr = nv98_ppp_intr;
 	nv_engine(priv)->cclass = &nv98_ppp_cclass;
 	nv_engine(priv)->sclass = nv98_ppp_sclass;
 	return 0;
 }
 
-static void
-nv98_ppp_dtor(struct nouveau_object *object)
-{
-	struct nv98_ppp_priv *priv = (void *)object;
-	nouveau_ppp_destroy(&priv->base);
-}
-
-static int
-nv98_ppp_init(struct nouveau_object *object)
-{
-	struct nv98_ppp_priv *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_ppp_init(&priv->base);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int
-nv98_ppp_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv98_ppp_priv *priv = (void *)object;
-	return nouveau_ppp_fini(&priv->base, suspend);
-}
-
 struct nouveau_oclass
 nv98_ppp_oclass = {
 	.handle = NV_ENGINE(PPP, 0x98),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv98_ppp_ctor,
-		.dtor = nv98_ppp_dtor,
-		.init = nv98_ppp_init,
-		.fini = nv98_ppp_fini,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
new file mode 100644
index 0000000..ebf0d86
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c

@@ -0,0 +1,110 @@
+/*
+ * Copyright 2012 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Maarten Lankhorst
+ */
+
+#include <core/falcon.h>
+
+#include <engine/ppp.h>
+
+struct nvc0_ppp_priv {
+	struct nouveau_falcon base;
+};
+
+/*******************************************************************************
+ * PPP object classes
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nvc0_ppp_sclass[] = {
+	{ 0x90b3, &nouveau_object_ofuncs },
+	{},
+};
+
+/*******************************************************************************
+ * PPPP context
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nvc0_ppp_cclass = {
+	.handle = NV_ENGCTX(PPP, 0xc0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_falcon_context_ctor,
+		.dtor = _nouveau_falcon_context_dtor,
+		.init = _nouveau_falcon_context_init,
+		.fini = _nouveau_falcon_context_fini,
+		.rd32 = _nouveau_falcon_context_rd32,
+		.wr32 = _nouveau_falcon_context_wr32,
+	},
+};
+
+/*******************************************************************************
+ * PPPP engine/subdev functions
+ ******************************************************************************/
+
+static int
+nvc0_ppp_init(struct nouveau_object *object)
+{
+	struct nvc0_ppp_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_falcon_init(&priv->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x086010, 0x0000fff2);
+	nv_wr32(priv, 0x08601c, 0x0000fff2);
+	return 0;
+}
+
+static int
+nvc0_ppp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	      struct nouveau_oclass *oclass, void *data, u32 size,
+	      struct nouveau_object **pobject)
+{
+	struct nvc0_ppp_priv *priv;
+	int ret;
+
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x086000, true,
+				    "PPPP", "ppp", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x00000002;
+	nv_engine(priv)->cclass = &nvc0_ppp_cclass;
+	nv_engine(priv)->sclass = nvc0_ppp_sclass;
+	return 0;
+}
+
+struct nouveau_oclass
+nvc0_ppp_oclass = {
+	.handle = NV_ENGINE(PPP, 0xc0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvc0_ppp_ctor,
+		.dtor = _nouveau_falcon_dtor,
+		.init = nvc0_ppp_init,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nv04.c b/drivers/gpu/drm/nouveau/core/engine/software/nv04.c
index 3ca4c3a..2a859a3 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nv04.c

@@ -63,8 +63,8 @@
 
 static struct nouveau_omthds
 nv04_software_omthds[] = {
-	{ 0x0150, nv04_software_set_ref },
-	{ 0x0500, nv04_software_flip },
+	{ 0x0150, 0x0150, nv04_software_set_ref },
+	{ 0x0500, 0x0500, nv04_software_flip },
 	{}
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nv10.c b/drivers/gpu/drm/nouveau/core/engine/software/nv10.c
index 6e699af..a019364 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nv10.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nv10.c

@@ -52,7 +52,7 @@
 
 static struct nouveau_omthds
 nv10_software_omthds[] = {
-	{ 0x0500, nv10_software_flip },
+	{ 0x0500, 0x0500, nv10_software_flip },
 	{}
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
index a2edcd3..b0e7e1c 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c

@@ -117,11 +117,11 @@
 
 static struct nouveau_omthds
 nv50_software_omthds[] = {
-	{ 0x018c, nv50_software_mthd_dma_vblsem },
-	{ 0x0400, nv50_software_mthd_vblsem_offset },
-	{ 0x0404, nv50_software_mthd_vblsem_value },
-	{ 0x0408, nv50_software_mthd_vblsem_release },
-	{ 0x0500, nv50_software_mthd_flip },
+	{ 0x018c, 0x018c, nv50_software_mthd_dma_vblsem },
+	{ 0x0400, 0x0400, nv50_software_mthd_vblsem_offset },
+	{ 0x0404, 0x0404, nv50_software_mthd_vblsem_value },
+	{ 0x0408, 0x0408, nv50_software_mthd_vblsem_release },
+	{ 0x0500, 0x0500, nv50_software_mthd_flip },
 	{}
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
index b7b0d7e..282a1cd 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c

@@ -99,11 +99,11 @@
 
 static struct nouveau_omthds
 nvc0_software_omthds[] = {
-	{ 0x0400, nvc0_software_mthd_vblsem_offset },
-	{ 0x0404, nvc0_software_mthd_vblsem_offset },
-	{ 0x0408, nvc0_software_mthd_vblsem_value },
-	{ 0x040c, nvc0_software_mthd_vblsem_release },
-	{ 0x0500, nvc0_software_mthd_flip },
+	{ 0x0400, 0x0400, nvc0_software_mthd_vblsem_offset },
+	{ 0x0404, 0x0404, nvc0_software_mthd_vblsem_offset },
+	{ 0x0408, 0x0408, nvc0_software_mthd_vblsem_value },
+	{ 0x040c, 0x040c, nvc0_software_mthd_vblsem_release },
+	{ 0x0500, 0x0500, nvc0_software_mthd_flip },
 	{}
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c b/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c
index dd23c80..261cd96 100644
--- a/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c

@@ -22,18 +22,13 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/os.h>
-#include <core/class.h>
 #include <core/engctx.h>
+#include <core/class.h>
 
 #include <engine/vp.h>
 
 struct nv84_vp_priv {
-	struct nouveau_vp base;
-};
-
-struct nv84_vp_chan {
-	struct nouveau_vp_chan base;
+	struct nouveau_engine base;
 };
 
 /*******************************************************************************
@@ -49,61 +44,16 @@
  * PVP context
  ******************************************************************************/
 
-static int
-nv84_vp_context_ctor(struct nouveau_object *parent,
-		     struct nouveau_object *engine,
-		     struct nouveau_oclass *oclass, void *data, u32 size,
-		     struct nouveau_object **pobject)
-{
-	struct nv84_vp_chan *priv;
-	int ret;
-
-	ret = nouveau_vp_context_create(parent, engine, oclass, NULL,
-					0, 0, 0, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static void
-nv84_vp_context_dtor(struct nouveau_object *object)
-{
-	struct nv84_vp_chan *priv = (void *)object;
-	nouveau_vp_context_destroy(&priv->base);
-}
-
-static int
-nv84_vp_context_init(struct nouveau_object *object)
-{
-	struct nv84_vp_chan *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_vp_context_init(&priv->base);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int
-nv84_vp_context_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv84_vp_chan *priv = (void *)object;
-	return nouveau_vp_context_fini(&priv->base, suspend);
-}
-
 static struct nouveau_oclass
 nv84_vp_cclass = {
 	.handle = NV_ENGCTX(VP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv84_vp_context_ctor,
-		.dtor = nv84_vp_context_dtor,
-		.init = nv84_vp_context_init,
-		.fini = nv84_vp_context_fini,
-		.rd32 = _nouveau_vp_context_rd32,
-		.wr32 = _nouveau_vp_context_wr32,
+		.ctor = _nouveau_engctx_ctor,
+		.dtor = _nouveau_engctx_dtor,
+		.init = _nouveau_engctx_init,
+		.fini = _nouveau_engctx_fini,
+		.rd32 = _nouveau_engctx_rd32,
+		.wr32 = _nouveau_engctx_wr32,
 	},
 };
 
@@ -111,11 +61,6 @@
  * PVP engine/subdev functions
  ******************************************************************************/
 
-static void
-nv84_vp_intr(struct nouveau_subdev *subdev)
-{
-}
-
 static int
 nv84_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	     struct nouveau_oclass *oclass, void *data, u32 size,
@@ -124,52 +69,25 @@
 	struct nv84_vp_priv *priv;
 	int ret;
 
-	ret = nouveau_vp_create(parent, engine, oclass, &priv);
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PVP", "vp", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	nv_subdev(priv)->unit = 0x01020000;
-	nv_subdev(priv)->intr = nv84_vp_intr;
 	nv_engine(priv)->cclass = &nv84_vp_cclass;
 	nv_engine(priv)->sclass = nv84_vp_sclass;
 	return 0;
 }
 
-static void
-nv84_vp_dtor(struct nouveau_object *object)
-{
-	struct nv84_vp_priv *priv = (void *)object;
-	nouveau_vp_destroy(&priv->base);
-}
-
-static int
-nv84_vp_init(struct nouveau_object *object)
-{
-	struct nv84_vp_priv *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_vp_init(&priv->base);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int
-nv84_vp_fini(struct nouveau_object *object, bool suspend)
-{
-	struct nv84_vp_priv *priv = (void *)object;
-	return nouveau_vp_fini(&priv->base, suspend);
-}
-
 struct nouveau_oclass
 nv84_vp_oclass = {
 	.handle = NV_ENGINE(VP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv84_vp_ctor,
-		.dtor = nv84_vp_dtor,
-		.init = nv84_vp_init,
-		.fini = nv84_vp_fini,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
 	},
 };

diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
new file mode 100644
index 0000000..f761949
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c

@@ -0,0 +1,110 @@
+/*
+ * Copyright 2012 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Maarten Lankhorst
+ */
+
+#include <core/falcon.h>
+
+#include <engine/vp.h>
+
+struct nvc0_vp_priv {
+	struct nouveau_falcon base;
+};
+
+/*******************************************************************************
+ * VP object classes
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nvc0_vp_sclass[] = {
+	{ 0x90b2, &nouveau_object_ofuncs },
+	{},
+};
+
+/*******************************************************************************
+ * PVP context
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nvc0_vp_cclass = {
+	.handle = NV_ENGCTX(VP, 0xc0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_falcon_context_ctor,
+		.dtor = _nouveau_falcon_context_dtor,
+		.init = _nouveau_falcon_context_init,
+		.fini = _nouveau_falcon_context_fini,
+		.rd32 = _nouveau_falcon_context_rd32,
+		.wr32 = _nouveau_falcon_context_wr32,
+	},
+};
+
+/*******************************************************************************
+ * PVP engine/subdev functions
+ ******************************************************************************/
+
+static int
+nvc0_vp_init(struct nouveau_object *object)
+{
+	struct nvc0_vp_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_falcon_init(&priv->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x085010, 0x0000fff2);
+	nv_wr32(priv, 0x08501c, 0x0000fff2);
+	return 0;
+}
+
+static int
+nvc0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nvc0_vp_priv *priv;
+	int ret;
+
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x085000, true,
+				    "PVP", "vp", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x00020000;
+	nv_engine(priv)->cclass = &nvc0_vp_cclass;
+	nv_engine(priv)->sclass = nvc0_vp_sclass;
+	return 0;
+}
+
+struct nouveau_oclass
+nvc0_vp_oclass = {
+	.handle = NV_ENGINE(VP, 0xc0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvc0_vp_ctor,
+		.dtor = _nouveau_falcon_dtor,
+		.init = nvc0_vp_init,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
new file mode 100644
index 0000000..2384ce5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c

@@ -0,0 +1,110 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/falcon.h>
+
+#include <engine/vp.h>
+
+struct nve0_vp_priv {
+	struct nouveau_falcon base;
+};
+
+/*******************************************************************************
+ * VP object classes
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nve0_vp_sclass[] = {
+	{ 0x95b2, &nouveau_object_ofuncs },
+	{},
+};
+
+/*******************************************************************************
+ * PVP context
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nve0_vp_cclass = {
+	.handle = NV_ENGCTX(VP, 0xe0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_falcon_context_ctor,
+		.dtor = _nouveau_falcon_context_dtor,
+		.init = _nouveau_falcon_context_init,
+		.fini = _nouveau_falcon_context_fini,
+		.rd32 = _nouveau_falcon_context_rd32,
+		.wr32 = _nouveau_falcon_context_wr32,
+	},
+};
+
+/*******************************************************************************
+ * PVP engine/subdev functions
+ ******************************************************************************/
+
+static int
+nve0_vp_init(struct nouveau_object *object)
+{
+	struct nve0_vp_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_falcon_init(&priv->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x085010, 0x0000fff2);
+	nv_wr32(priv, 0x08501c, 0x0000fff2);
+	return 0;
+}
+
+static int
+nve0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nve0_vp_priv *priv;
+	int ret;
+
+	ret = nouveau_falcon_create(parent, engine, oclass, 0x085000, true,
+				    "PVP", "vp", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x00020000;
+	nv_engine(priv)->cclass = &nve0_vp_cclass;
+	nv_engine(priv)->sclass = nve0_vp_sclass;
+	return 0;
+}
+
+struct nouveau_oclass
+nve0_vp_oclass = {
+	.handle = NV_ENGINE(VP, 0xe0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nve0_vp_ctor,
+		.dtor = _nouveau_falcon_dtor,
+		.init = nve0_vp_init,
+		.fini = _nouveau_falcon_fini,
+		.rd32 = _nouveau_falcon_rd32,
+		.wr32 = _nouveau_falcon_wr32,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/include/core/class.h b/drivers/gpu/drm/nouveau/core/include/core/class.h
index 6180ae9..47c4b3a 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/class.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/class.h

@@ -23,6 +23,7 @@
 #define NV_DEVICE_DISABLE_COPY0                           0x0000008000000000ULL
 #define NV_DEVICE_DISABLE_COPY1                           0x0000010000000000ULL
 #define NV_DEVICE_DISABLE_UNK1C1                          0x0000020000000000ULL
+#define NV_DEVICE_DISABLE_VENC                            0x0000040000000000ULL
 
 struct nv_device_class {
 	u64 device;	/* device identifier, ~0 for client default */
@@ -52,11 +53,49 @@
 #define NV_DMA_ACCESS_WR                                             0x00000200
 #define NV_DMA_ACCESS_RDWR                                           0x00000300
 
+/* NV50:NVC0 */
+#define NV50_DMA_CONF0_ENABLE                                        0x80000000
+#define NV50_DMA_CONF0_PRIV                                          0x00300000
+#define NV50_DMA_CONF0_PRIV_VM                                       0x00000000
+#define NV50_DMA_CONF0_PRIV_US                                       0x00100000
+#define NV50_DMA_CONF0_PRIV__S                                       0x00200000
+#define NV50_DMA_CONF0_PART                                          0x00030000
+#define NV50_DMA_CONF0_PART_VM                                       0x00000000
+#define NV50_DMA_CONF0_PART_256                                      0x00010000
+#define NV50_DMA_CONF0_PART_1KB                                      0x00020000
+#define NV50_DMA_CONF0_COMP                                          0x00000180
+#define NV50_DMA_CONF0_COMP_NONE                                     0x00000000
+#define NV50_DMA_CONF0_COMP_VM                                       0x00000180
+#define NV50_DMA_CONF0_TYPE                                          0x0000007f
+#define NV50_DMA_CONF0_TYPE_LINEAR                                   0x00000000
+#define NV50_DMA_CONF0_TYPE_VM                                       0x0000007f
+
+/* NVC0:NVD9 */
+#define NVC0_DMA_CONF0_ENABLE                                        0x80000000
+#define NVC0_DMA_CONF0_PRIV                                          0x00300000
+#define NVC0_DMA_CONF0_PRIV_VM                                       0x00000000
+#define NVC0_DMA_CONF0_PRIV_US                                       0x00100000
+#define NVC0_DMA_CONF0_PRIV__S                                       0x00200000
+#define NVC0_DMA_CONF0_UNKN /* PART? */                              0x00030000
+#define NVC0_DMA_CONF0_TYPE                                          0x000000ff
+#define NVC0_DMA_CONF0_TYPE_LINEAR                                   0x00000000
+#define NVC0_DMA_CONF0_TYPE_VM                                       0x000000ff
+
+/* NVD9- */
+#define NVD0_DMA_CONF0_ENABLE                                        0x80000000
+#define NVD0_DMA_CONF0_PAGE                                          0x00000400
+#define NVD0_DMA_CONF0_PAGE_LP                                       0x00000000
+#define NVD0_DMA_CONF0_PAGE_SP                                       0x00000400
+#define NVD0_DMA_CONF0_TYPE                                          0x000000ff
+#define NVD0_DMA_CONF0_TYPE_LINEAR                                   0x00000000
+#define NVD0_DMA_CONF0_TYPE_VM                                       0x000000ff
+
 struct nv_dma_class {
 	u32 flags;
 	u32 pad0;
 	u64 start;
 	u64 limit;
+	u32 conf0;
 };
 
 /* DMA FIFO channel classes
@@ -115,4 +154,190 @@
 	u32 engine;
 };
 
+/* 5070: NV50_DISP
+ * 8270: NV84_DISP
+ * 8370: NVA0_DISP
+ * 8870: NV94_DISP
+ * 8570: NVA3_DISP
+ * 9070: NVD0_DISP
+ * 9170: NVE0_DISP
+ */
+
+#define NV50_DISP_CLASS                                              0x00005070
+#define NV84_DISP_CLASS                                              0x00008270
+#define NVA0_DISP_CLASS                                              0x00008370
+#define NV94_DISP_CLASS                                              0x00008870
+#define NVA3_DISP_CLASS                                              0x00008570
+#define NVD0_DISP_CLASS                                              0x00009070
+#define NVE0_DISP_CLASS                                              0x00009170
+
+#define NV50_DISP_SOR_MTHD                                           0x00010000
+#define NV50_DISP_SOR_MTHD_TYPE                                      0x0000f000
+#define NV50_DISP_SOR_MTHD_HEAD                                      0x00000018
+#define NV50_DISP_SOR_MTHD_LINK                                      0x00000004
+#define NV50_DISP_SOR_MTHD_OR                                        0x00000003
+
+#define NV50_DISP_SOR_PWR                                            0x00010000
+#define NV50_DISP_SOR_PWR_STATE                                      0x00000001
+#define NV50_DISP_SOR_PWR_STATE_ON                                   0x00000001
+#define NV50_DISP_SOR_PWR_STATE_OFF                                  0x00000000
+#define NVA3_DISP_SOR_HDA_ELD                                        0x00010100
+#define NV84_DISP_SOR_HDMI_PWR                                       0x00012000
+#define NV84_DISP_SOR_HDMI_PWR_STATE                                 0x40000000
+#define NV84_DISP_SOR_HDMI_PWR_STATE_OFF                             0x00000000
+#define NV84_DISP_SOR_HDMI_PWR_STATE_ON                              0x40000000
+#define NV84_DISP_SOR_HDMI_PWR_MAX_AC_PACKET                         0x001f0000
+#define NV84_DISP_SOR_HDMI_PWR_REKEY                                 0x0000007f
+#define NV50_DISP_SOR_LVDS_SCRIPT                                    0x00013000
+#define NV50_DISP_SOR_LVDS_SCRIPT_ID                                 0x0000ffff
+#define NV94_DISP_SOR_DP_TRAIN                                       0x00016000
+#define NV94_DISP_SOR_DP_TRAIN_OP                                    0xf0000000
+#define NV94_DISP_SOR_DP_TRAIN_OP_PATTERN                            0x00000000
+#define NV94_DISP_SOR_DP_TRAIN_OP_INIT                               0x10000000
+#define NV94_DISP_SOR_DP_TRAIN_OP_FINI                               0x20000000
+#define NV94_DISP_SOR_DP_TRAIN_INIT_SPREAD                           0x00000001
+#define NV94_DISP_SOR_DP_TRAIN_INIT_SPREAD_OFF                       0x00000000
+#define NV94_DISP_SOR_DP_TRAIN_INIT_SPREAD_ON                        0x00000001
+#define NV94_DISP_SOR_DP_TRAIN_PATTERN                               0x00000003
+#define NV94_DISP_SOR_DP_TRAIN_PATTERN_DISABLED                      0x00000000
+#define NV94_DISP_SOR_DP_LNKCTL                                      0x00016040
+#define NV94_DISP_SOR_DP_LNKCTL_FRAME                                0x80000000
+#define NV94_DISP_SOR_DP_LNKCTL_FRAME_STD                            0x00000000
+#define NV94_DISP_SOR_DP_LNKCTL_FRAME_ENH                            0x80000000
+#define NV94_DISP_SOR_DP_LNKCTL_WIDTH                                0x00001f00
+#define NV94_DISP_SOR_DP_LNKCTL_COUNT                                0x00000007
+#define NV94_DISP_SOR_DP_DRVCTL(l)                     ((l) * 0x40 + 0x00016100)
+#define NV94_DISP_SOR_DP_DRVCTL_VS                                   0x00000300
+#define NV94_DISP_SOR_DP_DRVCTL_PE                                   0x00000003
+
+#define NV50_DISP_DAC_MTHD                                           0x00020000
+#define NV50_DISP_DAC_MTHD_TYPE                                      0x0000f000
+#define NV50_DISP_DAC_MTHD_OR                                        0x00000003
+
+#define NV50_DISP_DAC_PWR                                            0x00020000
+#define NV50_DISP_DAC_PWR_HSYNC                                      0x00000001
+#define NV50_DISP_DAC_PWR_HSYNC_ON                                   0x00000000
+#define NV50_DISP_DAC_PWR_HSYNC_LO                                   0x00000001
+#define NV50_DISP_DAC_PWR_VSYNC                                      0x00000004
+#define NV50_DISP_DAC_PWR_VSYNC_ON                                   0x00000000
+#define NV50_DISP_DAC_PWR_VSYNC_LO                                   0x00000004
+#define NV50_DISP_DAC_PWR_DATA                                       0x00000010
+#define NV50_DISP_DAC_PWR_DATA_ON                                    0x00000000
+#define NV50_DISP_DAC_PWR_DATA_LO                                    0x00000010
+#define NV50_DISP_DAC_PWR_STATE                                      0x00000040
+#define NV50_DISP_DAC_PWR_STATE_ON                                   0x00000000
+#define NV50_DISP_DAC_PWR_STATE_OFF                                  0x00000040
+#define NV50_DISP_DAC_LOAD                                           0x0002000c
+#define NV50_DISP_DAC_LOAD_VALUE                                     0x00000007
+
+struct nv50_display_class {
+};
+
+/* 507a: NV50_DISP_CURS
+ * 827a: NV84_DISP_CURS
+ * 837a: NVA0_DISP_CURS
+ * 887a: NV94_DISP_CURS
+ * 857a: NVA3_DISP_CURS
+ * 907a: NVD0_DISP_CURS
+ * 917a: NVE0_DISP_CURS
+ */
+
+#define NV50_DISP_CURS_CLASS                                         0x0000507a
+#define NV84_DISP_CURS_CLASS                                         0x0000827a
+#define NVA0_DISP_CURS_CLASS                                         0x0000837a
+#define NV94_DISP_CURS_CLASS                                         0x0000887a
+#define NVA3_DISP_CURS_CLASS                                         0x0000857a
+#define NVD0_DISP_CURS_CLASS                                         0x0000907a
+#define NVE0_DISP_CURS_CLASS                                         0x0000917a
+
+struct nv50_display_curs_class {
+	u32 head;
+};
+
+/* 507b: NV50_DISP_OIMM
+ * 827b: NV84_DISP_OIMM
+ * 837b: NVA0_DISP_OIMM
+ * 887b: NV94_DISP_OIMM
+ * 857b: NVA3_DISP_OIMM
+ * 907b: NVD0_DISP_OIMM
+ * 917b: NVE0_DISP_OIMM
+ */
+
+#define NV50_DISP_OIMM_CLASS                                         0x0000507b
+#define NV84_DISP_OIMM_CLASS                                         0x0000827b
+#define NVA0_DISP_OIMM_CLASS                                         0x0000837b
+#define NV94_DISP_OIMM_CLASS                                         0x0000887b
+#define NVA3_DISP_OIMM_CLASS                                         0x0000857b
+#define NVD0_DISP_OIMM_CLASS                                         0x0000907b
+#define NVE0_DISP_OIMM_CLASS                                         0x0000917b
+
+struct nv50_display_oimm_class {
+	u32 head;
+};
+
+/* 507c: NV50_DISP_SYNC
+ * 827c: NV84_DISP_SYNC
+ * 837c: NVA0_DISP_SYNC
+ * 887c: NV94_DISP_SYNC
+ * 857c: NVA3_DISP_SYNC
+ * 907c: NVD0_DISP_SYNC
+ * 917c: NVE0_DISP_SYNC
+ */
+
+#define NV50_DISP_SYNC_CLASS                                         0x0000507c
+#define NV84_DISP_SYNC_CLASS                                         0x0000827c
+#define NVA0_DISP_SYNC_CLASS                                         0x0000837c
+#define NV94_DISP_SYNC_CLASS                                         0x0000887c
+#define NVA3_DISP_SYNC_CLASS                                         0x0000857c
+#define NVD0_DISP_SYNC_CLASS                                         0x0000907c
+#define NVE0_DISP_SYNC_CLASS                                         0x0000917c
+
+struct nv50_display_sync_class {
+	u32 pushbuf;
+	u32 head;
+};
+
+/* 507d: NV50_DISP_MAST
+ * 827d: NV84_DISP_MAST
+ * 837d: NVA0_DISP_MAST
+ * 887d: NV94_DISP_MAST
+ * 857d: NVA3_DISP_MAST
+ * 907d: NVD0_DISP_MAST
+ * 917d: NVE0_DISP_MAST
+ */
+
+#define NV50_DISP_MAST_CLASS                                         0x0000507d
+#define NV84_DISP_MAST_CLASS                                         0x0000827d
+#define NVA0_DISP_MAST_CLASS                                         0x0000837d
+#define NV94_DISP_MAST_CLASS                                         0x0000887d
+#define NVA3_DISP_MAST_CLASS                                         0x0000857d
+#define NVD0_DISP_MAST_CLASS                                         0x0000907d
+#define NVE0_DISP_MAST_CLASS                                         0x0000917d
+
+struct nv50_display_mast_class {
+	u32 pushbuf;
+};
+
+/* 507e: NV50_DISP_OVLY
+ * 827e: NV84_DISP_OVLY
+ * 837e: NVA0_DISP_OVLY
+ * 887e: NV94_DISP_OVLY
+ * 857e: NVA3_DISP_OVLY
+ * 907e: NVD0_DISP_OVLY
+ * 917e: NVE0_DISP_OVLY
+ */
+
+#define NV50_DISP_OVLY_CLASS                                         0x0000507e
+#define NV84_DISP_OVLY_CLASS                                         0x0000827e
+#define NVA0_DISP_OVLY_CLASS                                         0x0000837e
+#define NV94_DISP_OVLY_CLASS                                         0x0000887e
+#define NVA3_DISP_OVLY_CLASS                                         0x0000857e
+#define NVD0_DISP_OVLY_CLASS                                         0x0000907e
+#define NVE0_DISP_OVLY_CLASS                                         0x0000917e
+
+struct nv50_display_ovly_class {
+	u32 pushbuf;
+	u32 head;
+};
+
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/core/engctx.h b/drivers/gpu/drm/nouveau/core/include/core/engctx.h
index 8a947b6..2fd48b5 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/engctx.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/engctx.h

@@ -39,6 +39,9 @@
 int  nouveau_engctx_init(struct nouveau_engctx *);
 int  nouveau_engctx_fini(struct nouveau_engctx *, bool suspend);
 
+int  _nouveau_engctx_ctor(struct nouveau_object *, struct nouveau_object *,
+			  struct nouveau_oclass *, void *, u32,
+			  struct nouveau_object **);
 void _nouveau_engctx_dtor(struct nouveau_object *);
 int  _nouveau_engctx_init(struct nouveau_object *);
 int  _nouveau_engctx_fini(struct nouveau_object *, bool suspend);

diff --git a/drivers/gpu/drm/nouveau/core/include/core/falcon.h b/drivers/gpu/drm/nouveau/core/include/core/falcon.h
new file mode 100644
index 0000000..1edec38
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/include/core/falcon.h

@@ -0,0 +1,81 @@
+#ifndef __NOUVEAU_FALCON_H__
+#define __NOUVEAU_FALCON_H__
+
+#include <core/engine.h>
+#include <core/engctx.h>
+#include <core/gpuobj.h>
+
+struct nouveau_falcon_chan {
+	struct nouveau_engctx base;
+};
+
+#define nouveau_falcon_context_create(p,e,c,g,s,a,f,d)                         \
+	nouveau_engctx_create((p), (e), (c), (g), (s), (a), (f), (d))
+#define nouveau_falcon_context_destroy(d)                                      \
+	nouveau_engctx_destroy(&(d)->base)
+#define nouveau_falcon_context_init(d)                                         \
+	nouveau_engctx_init(&(d)->base)
+#define nouveau_falcon_context_fini(d,s)                                       \
+	nouveau_engctx_fini(&(d)->base, (s))
+
+#define _nouveau_falcon_context_ctor _nouveau_engctx_ctor
+#define _nouveau_falcon_context_dtor _nouveau_engctx_dtor
+#define _nouveau_falcon_context_init _nouveau_engctx_init
+#define _nouveau_falcon_context_fini _nouveau_engctx_fini
+#define _nouveau_falcon_context_rd32 _nouveau_engctx_rd32
+#define _nouveau_falcon_context_wr32 _nouveau_engctx_wr32
+
+struct nouveau_falcon_data {
+	bool external;
+};
+
+struct nouveau_falcon {
+	struct nouveau_engine base;
+
+	u32 addr;
+	u8  version;
+	u8  secret;
+
+	struct nouveau_gpuobj *core;
+	bool external;
+
+	struct {
+		u32 limit;
+		u32 *data;
+		u32  size;
+	} code;
+
+	struct {
+		u32 limit;
+		u32 *data;
+		u32  size;
+	} data;
+};
+
+#define nv_falcon(priv) (&(priv)->base)
+
+#define nouveau_falcon_create(p,e,c,b,d,i,f,r)                                 \
+	nouveau_falcon_create_((p), (e), (c), (b), (d), (i), (f),              \
+			       sizeof(**r),(void **)r)
+#define nouveau_falcon_destroy(p)                                              \
+	nouveau_engine_destroy(&(p)->base)
+#define nouveau_falcon_init(p) ({                                              \
+	struct nouveau_falcon *falcon = (p);                                   \
+	_nouveau_falcon_init(nv_object(falcon));                               \
+})
+#define nouveau_falcon_fini(p,s) ({                                            \
+	struct nouveau_falcon *falcon = (p);                                   \
+	_nouveau_falcon_fini(nv_object(falcon), (s));                          \
+})
+
+int nouveau_falcon_create_(struct nouveau_object *, struct nouveau_object *,
+			   struct nouveau_oclass *, u32, bool, const char *,
+			   const char *, int, void **);
+
+#define _nouveau_falcon_dtor _nouveau_engine_dtor
+int  _nouveau_falcon_init(struct nouveau_object *);
+int  _nouveau_falcon_fini(struct nouveau_object *, bool);
+u32  _nouveau_falcon_rd32(struct nouveau_object *, u64);
+void _nouveau_falcon_wr32(struct nouveau_object *, u64, u32);
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/include/core/gpuobj.h b/drivers/gpu/drm/nouveau/core/include/core/gpuobj.h
index 6eaff79..b3b9ce4 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/gpuobj.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/gpuobj.h

@@ -65,7 +65,7 @@
 void _nouveau_gpuobj_dtor(struct nouveau_object *);
 int  _nouveau_gpuobj_init(struct nouveau_object *);
 int  _nouveau_gpuobj_fini(struct nouveau_object *, bool);
-u32  _nouveau_gpuobj_rd32(struct nouveau_object *, u32);
-void _nouveau_gpuobj_wr32(struct nouveau_object *, u32, u32);
+u32  _nouveau_gpuobj_rd32(struct nouveau_object *, u64);
+void _nouveau_gpuobj_wr32(struct nouveau_object *, u64, u32);
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/core/mm.h b/drivers/gpu/drm/nouveau/core/include/core/mm.h
index 975137b..2514e81 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/mm.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/mm.h

@@ -21,6 +21,12 @@
 	int heap_nodes;
 };
 
+static inline bool
+nouveau_mm_initialised(struct nouveau_mm *mm)
+{
+	return mm->block_size != 0;
+}
+
 int  nouveau_mm_init(struct nouveau_mm *, u32 offset, u32 length, u32 block);
 int  nouveau_mm_fini(struct nouveau_mm *);
 int  nouveau_mm_head(struct nouveau_mm *, u8 type, u32 size_max, u32 size_min,

diff --git a/drivers/gpu/drm/nouveau/core/include/core/object.h b/drivers/gpu/drm/nouveau/core/include/core/object.h
index 486f1a9..5982935 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/object.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/object.h

@@ -70,7 +70,8 @@
 }
 
 struct nouveau_omthds {
-	u32 method;
+	u32 start;
+	u32 limit;
 	int (*call)(struct nouveau_object *, u32, void *, u32);
 };
 
@@ -81,12 +82,12 @@
 	void (*dtor)(struct nouveau_object *);
 	int  (*init)(struct nouveau_object *);
 	int  (*fini)(struct nouveau_object *, bool suspend);
-	u8   (*rd08)(struct nouveau_object *, u32 offset);
-	u16  (*rd16)(struct nouveau_object *, u32 offset);
-	u32  (*rd32)(struct nouveau_object *, u32 offset);
-	void (*wr08)(struct nouveau_object *, u32 offset, u8 data);
-	void (*wr16)(struct nouveau_object *, u32 offset, u16 data);
-	void (*wr32)(struct nouveau_object *, u32 offset, u32 data);
+	u8   (*rd08)(struct nouveau_object *, u64 offset);
+	u16  (*rd16)(struct nouveau_object *, u64 offset);
+	u32  (*rd32)(struct nouveau_object *, u64 offset);
+	void (*wr08)(struct nouveau_object *, u64 offset, u8 data);
+	void (*wr16)(struct nouveau_object *, u64 offset, u16 data);
+	void (*wr32)(struct nouveau_object *, u64 offset, u32 data);
 };
 
 static inline struct nouveau_ofuncs *
@@ -109,21 +110,27 @@
 void nouveau_object_debug(void);
 
 static inline int
-nv_call(void *obj, u32 mthd, u32 data)
+nv_exec(void *obj, u32 mthd, void *data, u32 size)
 {
 	struct nouveau_omthds *method = nv_oclass(obj)->omthds;
 
 	while (method && method->call) {
-		if (method->method == mthd)
-			return method->call(obj, mthd, &data, sizeof(data));
+		if (mthd >= method->start && mthd <= method->limit)
+			return method->call(obj, mthd, data, size);
 		method++;
 	}
 
 	return -EINVAL;
 }
 
+static inline int
+nv_call(void *obj, u32 mthd, u32 data)
+{
+	return nv_exec(obj, mthd, &data, sizeof(data));
+}
+
 static inline u8
-nv_ro08(void *obj, u32 addr)
+nv_ro08(void *obj, u64 addr)
 {
 	u8 data = nv_ofuncs(obj)->rd08(obj, addr);
 	nv_spam(obj, "nv_ro08 0x%08x 0x%02x\n", addr, data);
@@ -131,7 +138,7 @@
 }
 
 static inline u16
-nv_ro16(void *obj, u32 addr)
+nv_ro16(void *obj, u64 addr)
 {
 	u16 data = nv_ofuncs(obj)->rd16(obj, addr);
 	nv_spam(obj, "nv_ro16 0x%08x 0x%04x\n", addr, data);
@@ -139,7 +146,7 @@
 }
 
 static inline u32
-nv_ro32(void *obj, u32 addr)
+nv_ro32(void *obj, u64 addr)
 {
 	u32 data = nv_ofuncs(obj)->rd32(obj, addr);
 	nv_spam(obj, "nv_ro32 0x%08x 0x%08x\n", addr, data);
@@ -147,28 +154,28 @@
 }
 
 static inline void
-nv_wo08(void *obj, u32 addr, u8 data)
+nv_wo08(void *obj, u64 addr, u8 data)
 {
 	nv_spam(obj, "nv_wo08 0x%08x 0x%02x\n", addr, data);
 	nv_ofuncs(obj)->wr08(obj, addr, data);
 }
 
 static inline void
-nv_wo16(void *obj, u32 addr, u16 data)
+nv_wo16(void *obj, u64 addr, u16 data)
 {
 	nv_spam(obj, "nv_wo16 0x%08x 0x%04x\n", addr, data);
 	nv_ofuncs(obj)->wr16(obj, addr, data);
 }
 
 static inline void
-nv_wo32(void *obj, u32 addr, u32 data)
+nv_wo32(void *obj, u64 addr, u32 data)
 {
 	nv_spam(obj, "nv_wo32 0x%08x 0x%08x\n", addr, data);
 	nv_ofuncs(obj)->wr32(obj, addr, data);
 }
 
 static inline u32
-nv_mo32(void *obj, u32 addr, u32 mask, u32 data)
+nv_mo32(void *obj, u64 addr, u32 mask, u32 data)
 {
 	u32 temp = nv_ro32(obj, addr);
 	nv_wo32(obj, addr, (temp & ~mask) | data);

diff --git a/drivers/gpu/drm/nouveau/core/include/core/parent.h b/drivers/gpu/drm/nouveau/core/include/core/parent.h
index 3c2e940..31cd852 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/parent.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/parent.h

@@ -14,7 +14,7 @@
 	struct nouveau_object base;
 
 	struct nouveau_sclass *sclass;
-	u32 engine;
+	u64 engine;
 
 	int  (*context_attach)(struct nouveau_object *,
 			       struct nouveau_object *);

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/bsp.h b/drivers/gpu/drm/nouveau/core/include/engine/bsp.h
index 75d1ed5..13ccdf5 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/bsp.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/bsp.h

@@ -1,45 +1,8 @@
 #ifndef __NOUVEAU_BSP_H__
 #define __NOUVEAU_BSP_H__
 
-#include <core/engine.h>
-#include <core/engctx.h>
-
-struct nouveau_bsp_chan {
-	struct nouveau_engctx base;
-};
-
-#define nouveau_bsp_context_create(p,e,c,g,s,a,f,d)                            \
-	nouveau_engctx_create((p), (e), (c), (g), (s), (a), (f), (d))
-#define nouveau_bsp_context_destroy(d)                                         \
-	nouveau_engctx_destroy(&(d)->base)
-#define nouveau_bsp_context_init(d)                                            \
-	nouveau_engctx_init(&(d)->base)
-#define nouveau_bsp_context_fini(d,s)                                          \
-	nouveau_engctx_fini(&(d)->base, (s))
-
-#define _nouveau_bsp_context_dtor _nouveau_engctx_dtor
-#define _nouveau_bsp_context_init _nouveau_engctx_init
-#define _nouveau_bsp_context_fini _nouveau_engctx_fini
-#define _nouveau_bsp_context_rd32 _nouveau_engctx_rd32
-#define _nouveau_bsp_context_wr32 _nouveau_engctx_wr32
-
-struct nouveau_bsp {
-	struct nouveau_engine base;
-};
-
-#define nouveau_bsp_create(p,e,c,d)                                            \
-	nouveau_engine_create((p), (e), (c), true, "PBSP", "bsp", (d))
-#define nouveau_bsp_destroy(d)                                                 \
-	nouveau_engine_destroy(&(d)->base)
-#define nouveau_bsp_init(d)                                                    \
-	nouveau_engine_init(&(d)->base)
-#define nouveau_bsp_fini(d,s)                                                  \
-	nouveau_engine_fini(&(d)->base, (s))
-
-#define _nouveau_bsp_dtor _nouveau_engine_dtor
-#define _nouveau_bsp_init _nouveau_engine_init
-#define _nouveau_bsp_fini _nouveau_engine_fini
-
 extern struct nouveau_oclass nv84_bsp_oclass;
+extern struct nouveau_oclass nvc0_bsp_oclass;
+extern struct nouveau_oclass nve0_bsp_oclass;
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/copy.h b/drivers/gpu/drm/nouveau/core/include/engine/copy.h
index 70b9d8c..8cad2cf 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/copy.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/copy.h

@@ -1,44 +1,7 @@
 #ifndef __NOUVEAU_COPY_H__
 #define __NOUVEAU_COPY_H__
 
-#include <core/engine.h>
-#include <core/engctx.h>
-
-struct nouveau_copy_chan {
-	struct nouveau_engctx base;
-};
-
-#define nouveau_copy_context_create(p,e,c,g,s,a,f,d)                           \
-	nouveau_engctx_create((p), (e), (c), (g), (s), (a), (f), (d))
-#define nouveau_copy_context_destroy(d)                                        \
-	nouveau_engctx_destroy(&(d)->base)
-#define nouveau_copy_context_init(d)                                           \
-	nouveau_engctx_init(&(d)->base)
-#define nouveau_copy_context_fini(d,s)                                         \
-	nouveau_engctx_fini(&(d)->base, (s))
-
-#define _nouveau_copy_context_dtor _nouveau_engctx_dtor
-#define _nouveau_copy_context_init _nouveau_engctx_init
-#define _nouveau_copy_context_fini _nouveau_engctx_fini
-#define _nouveau_copy_context_rd32 _nouveau_engctx_rd32
-#define _nouveau_copy_context_wr32 _nouveau_engctx_wr32
-
-struct nouveau_copy {
-	struct nouveau_engine base;
-};
-
-#define nouveau_copy_create(p,e,c,y,i,d)                                       \
-	nouveau_engine_create((p), (e), (c), (y), "PCE"#i, "copy"#i, (d))
-#define nouveau_copy_destroy(d)                                                \
-	nouveau_engine_destroy(&(d)->base)
-#define nouveau_copy_init(d)                                                   \
-	nouveau_engine_init(&(d)->base)
-#define nouveau_copy_fini(d,s)                                                 \
-	nouveau_engine_fini(&(d)->base, (s))
-
-#define _nouveau_copy_dtor _nouveau_engine_dtor
-#define _nouveau_copy_init _nouveau_engine_init
-#define _nouveau_copy_fini _nouveau_engine_fini
+void nva3_copy_intr(struct nouveau_subdev *);
 
 extern struct nouveau_oclass nva3_copy_oclass;
 extern struct nouveau_oclass nvc0_copy0_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/crypt.h b/drivers/gpu/drm/nouveau/core/include/engine/crypt.h
index e3674743..db97561 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/crypt.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/crypt.h

@@ -1,45 +1,6 @@
 #ifndef __NOUVEAU_CRYPT_H__
 #define __NOUVEAU_CRYPT_H__
 
-#include <core/engine.h>
-#include <core/engctx.h>
-
-struct nouveau_crypt_chan {
-	struct nouveau_engctx base;
-};
-
-#define nouveau_crypt_context_create(p,e,c,g,s,a,f,d)                          \
-	nouveau_engctx_create((p), (e), (c), (g), (s), (a), (f), (d))
-#define nouveau_crypt_context_destroy(d)                                       \
-	nouveau_engctx_destroy(&(d)->base)
-#define nouveau_crypt_context_init(d)                                          \
-	nouveau_engctx_init(&(d)->base)
-#define nouveau_crypt_context_fini(d,s)                                        \
-	nouveau_engctx_fini(&(d)->base, (s))
-
-#define _nouveau_crypt_context_dtor _nouveau_engctx_dtor
-#define _nouveau_crypt_context_init _nouveau_engctx_init
-#define _nouveau_crypt_context_fini _nouveau_engctx_fini
-#define _nouveau_crypt_context_rd32 _nouveau_engctx_rd32
-#define _nouveau_crypt_context_wr32 _nouveau_engctx_wr32
-
-struct nouveau_crypt {
-	struct nouveau_engine base;
-};
-
-#define nouveau_crypt_create(p,e,c,d)                                          \
-	nouveau_engine_create((p), (e), (c), true, "PCRYPT", "crypt", (d))
-#define nouveau_crypt_destroy(d)                                               \
-	nouveau_engine_destroy(&(d)->base)
-#define nouveau_crypt_init(d)                                                  \
-	nouveau_engine_init(&(d)->base)
-#define nouveau_crypt_fini(d,s)                                                \
-	nouveau_engine_fini(&(d)->base, (s))
-
-#define _nouveau_crypt_dtor _nouveau_engine_dtor
-#define _nouveau_crypt_init _nouveau_engine_init
-#define _nouveau_crypt_fini _nouveau_engine_fini
-
 extern struct nouveau_oclass nv84_crypt_oclass;
 extern struct nouveau_oclass nv98_crypt_oclass;
 

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/disp.h b/drivers/gpu/drm/nouveau/core/include/engine/disp.h
index 38ec125..4694828 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/disp.h

@@ -39,6 +39,11 @@
 
 extern struct nouveau_oclass nv04_disp_oclass;
 extern struct nouveau_oclass nv50_disp_oclass;
+extern struct nouveau_oclass nv84_disp_oclass;
+extern struct nouveau_oclass nva0_disp_oclass;
+extern struct nouveau_oclass nv94_disp_oclass;
+extern struct nouveau_oclass nva3_disp_oclass;
 extern struct nouveau_oclass nvd0_disp_oclass;
+extern struct nouveau_oclass nve0_disp_oclass;
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/dmaobj.h b/drivers/gpu/drm/nouveau/core/include/engine/dmaobj.h
index 700ccbb..b28914e 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/dmaobj.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/dmaobj.h

@@ -12,29 +12,17 @@
 	u32 access;
 	u64 start;
 	u64 limit;
+	u32 conf0;
 };
 
-#define nouveau_dmaobj_create(p,e,c,a,s,d)                                     \
-	nouveau_dmaobj_create_((p), (e), (c), (a), (s), sizeof(**d), (void **)d)
-#define nouveau_dmaobj_destroy(p)                                              \
-	nouveau_object_destroy(&(p)->base)
-#define nouveau_dmaobj_init(p)                                                 \
-	nouveau_object_init(&(p)->base)
-#define nouveau_dmaobj_fini(p,s)                                               \
-	nouveau_object_fini(&(p)->base, (s))
-
-int nouveau_dmaobj_create_(struct nouveau_object *, struct nouveau_object *,
-			   struct nouveau_oclass *, void *data, u32 size,
-			   int length, void **);
-
-#define _nouveau_dmaobj_dtor nouveau_object_destroy
-#define _nouveau_dmaobj_init nouveau_object_init
-#define _nouveau_dmaobj_fini nouveau_object_fini
-
 struct nouveau_dmaeng {
 	struct nouveau_engine base;
-	int (*bind)(struct nouveau_dmaeng *, struct nouveau_object *parent,
-		    struct nouveau_dmaobj *, struct nouveau_gpuobj **);
+
+	/* creates a "physical" dma object from a struct nouveau_dmaobj */
+	int (*bind)(struct nouveau_dmaeng *dmaeng,
+		    struct nouveau_object *parent,
+		    struct nouveau_dmaobj *dmaobj,
+		    struct nouveau_gpuobj **);
 };
 
 #define nouveau_dmaeng_create(p,e,c,d)                                         \
@@ -53,5 +41,8 @@
 extern struct nouveau_oclass nv04_dmaeng_oclass;
 extern struct nouveau_oclass nv50_dmaeng_oclass;
 extern struct nouveau_oclass nvc0_dmaeng_oclass;
+extern struct nouveau_oclass nvd0_dmaeng_oclass;
+
+extern struct nouveau_oclass nouveau_dmaobj_sclass[];
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h
index d67fed1e..f18846c 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h

@@ -33,15 +33,15 @@
 				  struct nouveau_object *,
 				  struct nouveau_oclass *,
 				  int bar, u32 addr, u32 size, u32 push,
-				  u32 engmask, int len, void **);
+				  u64 engmask, int len, void **);
 void nouveau_fifo_channel_destroy(struct nouveau_fifo_chan *);
 
 #define _nouveau_fifo_channel_init _nouveau_namedb_init
 #define _nouveau_fifo_channel_fini _nouveau_namedb_fini
 
 void _nouveau_fifo_channel_dtor(struct nouveau_object *);
-u32  _nouveau_fifo_channel_rd32(struct nouveau_object *, u32);
-void _nouveau_fifo_channel_wr32(struct nouveau_object *, u32, u32);
+u32  _nouveau_fifo_channel_rd32(struct nouveau_object *, u64);
+void _nouveau_fifo_channel_wr32(struct nouveau_object *, u64, u32);
 
 struct nouveau_fifo_base {
 	struct nouveau_gpuobj base;

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/ppp.h b/drivers/gpu/drm/nouveau/core/include/engine/ppp.h
index 74d554f..0a66781 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/ppp.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/ppp.h

@@ -1,45 +1,7 @@
 #ifndef __NOUVEAU_PPP_H__
 #define __NOUVEAU_PPP_H__
 
-#include <core/engine.h>
-#include <core/engctx.h>
-
-struct nouveau_ppp_chan {
-	struct nouveau_engctx base;
-};
-
-#define nouveau_ppp_context_create(p,e,c,g,s,a,f,d)                            \
-	nouveau_engctx_create((p), (e), (c), (g), (s), (a), (f), (d))
-#define nouveau_ppp_context_destroy(d)                                         \
-	nouveau_engctx_destroy(&(d)->base)
-#define nouveau_ppp_context_init(d)                                            \
-	nouveau_engctx_init(&(d)->base)
-#define nouveau_ppp_context_fini(d,s)                                          \
-	nouveau_engctx_fini(&(d)->base, (s))
-
-#define _nouveau_ppp_context_dtor _nouveau_engctx_dtor
-#define _nouveau_ppp_context_init _nouveau_engctx_init
-#define _nouveau_ppp_context_fini _nouveau_engctx_fini
-#define _nouveau_ppp_context_rd32 _nouveau_engctx_rd32
-#define _nouveau_ppp_context_wr32 _nouveau_engctx_wr32
-
-struct nouveau_ppp {
-	struct nouveau_engine base;
-};
-
-#define nouveau_ppp_create(p,e,c,d)                                            \
-	nouveau_engine_create((p), (e), (c), true, "PPPP", "ppp", (d))
-#define nouveau_ppp_destroy(d)                                                 \
-	nouveau_engine_destroy(&(d)->base)
-#define nouveau_ppp_init(d)                                                    \
-	nouveau_engine_init(&(d)->base)
-#define nouveau_ppp_fini(d,s)                                                  \
-	nouveau_engine_fini(&(d)->base, (s))
-
-#define _nouveau_ppp_dtor _nouveau_engine_dtor
-#define _nouveau_ppp_init _nouveau_engine_init
-#define _nouveau_ppp_fini _nouveau_engine_fini
-
 extern struct nouveau_oclass nv98_ppp_oclass;
+extern struct nouveau_oclass nvc0_ppp_oclass;
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/engine/vp.h b/drivers/gpu/drm/nouveau/core/include/engine/vp.h
index 05cd08f..d7b287b 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/vp.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/vp.h

@@ -1,45 +1,8 @@
 #ifndef __NOUVEAU_VP_H__
 #define __NOUVEAU_VP_H__
 
-#include <core/engine.h>
-#include <core/engctx.h>
-
-struct nouveau_vp_chan {
-	struct nouveau_engctx base;
-};
-
-#define nouveau_vp_context_create(p,e,c,g,s,a,f,d)                             \
-	nouveau_engctx_create((p), (e), (c), (g), (s), (a), (f), (d))
-#define nouveau_vp_context_destroy(d)                                          \
-	nouveau_engctx_destroy(&(d)->base)
-#define nouveau_vp_context_init(d)                                             \
-	nouveau_engctx_init(&(d)->base)
-#define nouveau_vp_context_fini(d,s)                                           \
-	nouveau_engctx_fini(&(d)->base, (s))
-
-#define _nouveau_vp_context_dtor _nouveau_engctx_dtor
-#define _nouveau_vp_context_init _nouveau_engctx_init
-#define _nouveau_vp_context_fini _nouveau_engctx_fini
-#define _nouveau_vp_context_rd32 _nouveau_engctx_rd32
-#define _nouveau_vp_context_wr32 _nouveau_engctx_wr32
-
-struct nouveau_vp {
-	struct nouveau_engine base;
-};
-
-#define nouveau_vp_create(p,e,c,d)                                             \
-	nouveau_engine_create((p), (e), (c), true, "PVP", "vp", (d))
-#define nouveau_vp_destroy(d)                                                  \
-	nouveau_engine_destroy(&(d)->base)
-#define nouveau_vp_init(d)                                                     \
-	nouveau_engine_init(&(d)->base)
-#define nouveau_vp_fini(d,s)                                                   \
-	nouveau_engine_fini(&(d)->base, (s))
-
-#define _nouveau_vp_dtor _nouveau_engine_dtor
-#define _nouveau_vp_init _nouveau_engine_init
-#define _nouveau_vp_fini _nouveau_engine_fini
-
 extern struct nouveau_oclass nv84_vp_oclass;
+extern struct nouveau_oclass nvc0_vp_oclass;
+extern struct nouveau_oclass nve0_vp_oclass;
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/dcb.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/dcb.h
index d682fb6..b79025d 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/dcb.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/dcb.h

@@ -23,6 +23,7 @@
 	uint8_t bus;
 	uint8_t location;
 	uint8_t or;
+	uint8_t link;
 	bool duallink_possible;
 	union {
 		struct sor_conf {
@@ -55,36 +56,11 @@
 
 u16 dcb_table(struct nouveau_bios *, u8 *ver, u8 *hdr, u8 *ent, u8 *len);
 u16 dcb_outp(struct nouveau_bios *, u8 idx, u8 *ver, u8 *len);
+u16 dcb_outp_parse(struct nouveau_bios *, u8 idx, u8 *, u8 *,
+		   struct dcb_output *);
+u16 dcb_outp_match(struct nouveau_bios *, u16 type, u16 mask, u8 *, u8 *,
+		   struct dcb_output *);
 int dcb_outp_foreach(struct nouveau_bios *, void *data, int (*exec)
 		     (struct nouveau_bios *, void *, int index, u16 entry));
 
-
-/* BIT 'U'/'d' table encoder subtables have hashes matching them to
- * a particular set of encoders.
- *
- * This function returns true if a particular DCB entry matches.
- */
-static inline bool
-dcb_hash_match(struct dcb_output *dcb, u32 hash)
-{
-	if ((hash & 0x000000f0) != (dcb->location << 4))
-		return false;
-	if ((hash & 0x0000000f) != dcb->type)
-		return false;
-	if (!(hash & (dcb->or << 16)))
-		return false;
-
-	switch (dcb->type) {
-	case DCB_OUTPUT_TMDS:
-	case DCB_OUTPUT_LVDS:
-	case DCB_OUTPUT_DP:
-		if (hash & 0x00c00000) {
-			if (!(hash & (dcb->sorconf.link << 22)))
-				return false;
-		}
-	default:
-		return true;
-	}
-}
-
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/disp.h
new file mode 100644
index 0000000..c35937e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/disp.h

@@ -0,0 +1,48 @@
+#ifndef __NVBIOS_DISP_H__
+#define __NVBIOS_DISP_H__
+
+u16 nvbios_disp_table(struct nouveau_bios *,
+		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *sub);
+
+struct nvbios_disp {
+	u16 data;
+};
+
+u16 nvbios_disp_entry(struct nouveau_bios *, u8 idx,
+		      u8 *ver, u8 *hdr__, u8 *sub);
+u16 nvbios_disp_parse(struct nouveau_bios *, u8 idx,
+		      u8 *ver, u8 *hdr__, u8 *sub,
+		      struct nvbios_disp *);
+
+struct nvbios_outp {
+	u16 type;
+	u16 mask;
+	u16 script[3];
+};
+
+u16 nvbios_outp_entry(struct nouveau_bios *, u8 idx,
+		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u16 nvbios_outp_parse(struct nouveau_bios *, u8 idx,
+		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		      struct nvbios_outp *);
+u16 nvbios_outp_match(struct nouveau_bios *, u16 type, u16 mask,
+		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		      struct nvbios_outp *);
+
+
+struct nvbios_ocfg {
+	u16 match;
+	u16 clkcmp[2];
+};
+
+u16 nvbios_ocfg_entry(struct nouveau_bios *, u16 outp, u8 idx,
+		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u16 nvbios_ocfg_parse(struct nouveau_bios *, u16 outp, u8 idx,
+		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		      struct nvbios_ocfg *);
+u16 nvbios_ocfg_match(struct nouveau_bios *, u16 outp, u16 type,
+		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		      struct nvbios_ocfg *);
+u16 nvbios_oclk_match(struct nouveau_bios *, u16 cmp, u32 khz);
+
+#endif

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h
index 73b5e5d..6e54218 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/dp.h

@@ -1,8 +1,34 @@
 #ifndef __NVBIOS_DP_H__
 #define __NVBIOS_DP_H__
 
-u16 dp_table(struct nouveau_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u16 dp_outp(struct nouveau_bios *, u8 idx, u8 *ver, u8 *len);
-u16 dp_outp_match(struct nouveau_bios *, struct dcb_output *, u8 *ver, u8 *len);
+struct nvbios_dpout {
+	u16 type;
+	u16 mask;
+	u8  flags;
+	u32 script[5];
+	u32 lnkcmp;
+};
+
+u16 nvbios_dpout_parse(struct nouveau_bios *, u8 idx,
+		       u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		       struct nvbios_dpout *);
+u16 nvbios_dpout_match(struct nouveau_bios *, u16 type, u16 mask,
+		       u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		       struct nvbios_dpout *);
+
+struct nvbios_dpcfg {
+	u8 drv;
+	u8 pre;
+	u8 unk;
+};
+
+u16
+nvbios_dpcfg_parse(struct nouveau_bios *, u16 outp, u8 idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_dpcfg *);
+u16
+nvbios_dpcfg_match(struct nouveau_bios *, u16 outp, u8 un, u8 vs, u8 pe,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_dpcfg *);
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
index 5c1b5e1..da470e6 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h

@@ -69,8 +69,11 @@
 		} type;
 		u64 stolen;
 		u64 size;
-		int ranks;
 
+		int ranks;
+		int parts;
+
+		int  (*init)(struct nouveau_fb *);
 		int  (*get)(struct nouveau_fb *, u64 size, u32 align,
 			    u32 size_nc, u32 type, struct nouveau_mem **);
 		void (*put)(struct nouveau_fb *, struct nouveau_mem **);
@@ -84,6 +87,8 @@
 		int regions;
 		void (*init)(struct nouveau_fb *, int i, u32 addr, u32 size,
 			     u32 pitch, u32 flags, struct nouveau_fb_tile *);
+		void (*comp)(struct nouveau_fb *, int i, u32 size, u32 flags,
+			     struct nouveau_fb_tile *);
 		void (*fini)(struct nouveau_fb *, int i,
 			     struct nouveau_fb_tile *);
 		void (*prog)(struct nouveau_fb *, int i,
@@ -99,7 +104,7 @@
 
 #define nouveau_fb_create(p,e,c,d)                                             \
 	nouveau_subdev_create((p), (e), (c), 0, "PFB", "fb", (d))
-int  nouveau_fb_created(struct nouveau_fb *);
+int  nouveau_fb_preinit(struct nouveau_fb *);
 void nouveau_fb_destroy(struct nouveau_fb *);
 int  nouveau_fb_init(struct nouveau_fb *);
 #define nouveau_fb_fini(p,s)                                                   \
@@ -111,9 +116,19 @@
 
 extern struct nouveau_oclass nv04_fb_oclass;
 extern struct nouveau_oclass nv10_fb_oclass;
+extern struct nouveau_oclass nv1a_fb_oclass;
 extern struct nouveau_oclass nv20_fb_oclass;
+extern struct nouveau_oclass nv25_fb_oclass;
 extern struct nouveau_oclass nv30_fb_oclass;
+extern struct nouveau_oclass nv35_fb_oclass;
+extern struct nouveau_oclass nv36_fb_oclass;
 extern struct nouveau_oclass nv40_fb_oclass;
+extern struct nouveau_oclass nv41_fb_oclass;
+extern struct nouveau_oclass nv44_fb_oclass;
+extern struct nouveau_oclass nv46_fb_oclass;
+extern struct nouveau_oclass nv47_fb_oclass;
+extern struct nouveau_oclass nv49_fb_oclass;
+extern struct nouveau_oclass nv4e_fb_oclass;
 extern struct nouveau_oclass nv50_fb_oclass;
 extern struct nouveau_oclass nvc0_fb_oclass;
 
@@ -122,13 +137,35 @@
 
 bool nv04_fb_memtype_valid(struct nouveau_fb *, u32 memtype);
 
+void nv10_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
+		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
+void nv10_fb_tile_fini(struct nouveau_fb *, int i, struct nouveau_fb_tile *);
 void nv10_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
 
+int  nv20_fb_vram_init(struct nouveau_fb *);
+void nv20_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
+		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
+void nv20_fb_tile_fini(struct nouveau_fb *, int i, struct nouveau_fb_tile *);
+void nv20_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
+
+int  nv30_fb_init(struct nouveau_object *);
 void nv30_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
 		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
-void nv30_fb_tile_fini(struct nouveau_fb *, int i, struct nouveau_fb_tile *);
+
+void nv40_fb_tile_comp(struct nouveau_fb *, int i, u32 size, u32 flags,
+		       struct nouveau_fb_tile *);
+
+int  nv41_fb_vram_init(struct nouveau_fb *);
+int  nv41_fb_init(struct nouveau_object *);
+void nv41_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
+
+int  nv44_fb_vram_init(struct nouveau_fb *);
+int  nv44_fb_init(struct nouveau_object *);
+void nv44_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
+
+void nv46_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
+		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
 
 void nv50_fb_vram_del(struct nouveau_fb *, struct nouveau_mem **);
-void nv50_fb_trap(struct nouveau_fb *, int display);
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
index cd01c53..d70ba34 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c

@@ -65,14 +65,14 @@
 }
 
 static u32
-nouveau_barobj_rd32(struct nouveau_object *object, u32 addr)
+nouveau_barobj_rd32(struct nouveau_object *object, u64 addr)
 {
 	struct nouveau_barobj *barobj = (void *)object;
 	return ioread32_native(barobj->iomem + addr);
 }
 
 static void
-nouveau_barobj_wr32(struct nouveau_object *object, u32 addr, u32 data)
+nouveau_barobj_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	struct nouveau_barobj *barobj = (void *)object;
 	iowrite32_native(data, barobj->iomem + addr);

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c
index 70ca7d5a..dd11194 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c

@@ -63,7 +63,7 @@
 	struct pci_dev *pdev = nv_device(bios)->pdev;
 	struct device_node *dn;
 	const u32 *data;
-	int size, i;
+	int size;
 
 	dn = pci_device_to_OF_node(pdev);
 	if (!dn) {
@@ -210,11 +210,19 @@
 		return;
 
 	bios->data = kmalloc(bios->size, GFP_KERNEL);
-	for (i = 0; bios->data && i < bios->size; i += cnt) {
-		cnt = min((bios->size - i), (u32)4096);
-		ret = nouveau_acpi_get_bios_chunk(bios->data, i, cnt);
-		if (ret != cnt)
-			break;
+	if (bios->data) {
+		/* disobey the acpi spec - much faster on at least w530 ... */
+		ret = nouveau_acpi_get_bios_chunk(bios->data, 0, bios->size);
+		if (ret != bios->size ||
+		    nvbios_checksum(bios->data, bios->size)) {
+			/* ... that didn't work, ok, i'll be good now */
+			for (i = 0; i < bios->size; i += cnt) {
+				cnt = min((bios->size - i), (u32)4096);
+				ret = nouveau_acpi_get_bios_chunk(bios->data, i, cnt);
+				if (ret != cnt)
+					break;
+			}
+		}
 	}
 }
 
@@ -358,42 +366,42 @@
 }
 
 static u8
-nouveau_bios_rd08(struct nouveau_object *object, u32 addr)
+nouveau_bios_rd08(struct nouveau_object *object, u64 addr)
 {
 	struct nouveau_bios *bios = (void *)object;
 	return bios->data[addr];
 }
 
 static u16
-nouveau_bios_rd16(struct nouveau_object *object, u32 addr)
+nouveau_bios_rd16(struct nouveau_object *object, u64 addr)
 {
 	struct nouveau_bios *bios = (void *)object;
 	return get_unaligned_le16(&bios->data[addr]);
 }
 
 static u32
-nouveau_bios_rd32(struct nouveau_object *object, u32 addr)
+nouveau_bios_rd32(struct nouveau_object *object, u64 addr)
 {
 	struct nouveau_bios *bios = (void *)object;
 	return get_unaligned_le32(&bios->data[addr]);
 }
 
 static void
-nouveau_bios_wr08(struct nouveau_object *object, u32 addr, u8 data)
+nouveau_bios_wr08(struct nouveau_object *object, u64 addr, u8 data)
 {
 	struct nouveau_bios *bios = (void *)object;
 	bios->data[addr] = data;
 }
 
 static void
-nouveau_bios_wr16(struct nouveau_object *object, u32 addr, u16 data)
+nouveau_bios_wr16(struct nouveau_object *object, u64 addr, u16 data)
 {
 	struct nouveau_bios *bios = (void *)object;
 	put_unaligned_le16(data, &bios->data[addr]);
 }
 
 static void
-nouveau_bios_wr32(struct nouveau_object *object, u32 addr, u32 data)
+nouveau_bios_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	struct nouveau_bios *bios = (void *)object;
 	put_unaligned_le32(data, &bios->data[addr]);

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
index c511971..0fd87df 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c

@@ -107,6 +107,69 @@
 	return 0x0000;
 }
 
+u16
+dcb_outp_parse(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len,
+	       struct dcb_output *outp)
+{
+	u16 dcb = dcb_outp(bios, idx, ver, len);
+	if (dcb) {
+		if (*ver >= 0x20) {
+			u32 conn = nv_ro32(bios, dcb + 0x00);
+			outp->or        = (conn & 0x0f000000) >> 24;
+			outp->location  = (conn & 0x00300000) >> 20;
+			outp->bus       = (conn & 0x000f0000) >> 16;
+			outp->connector = (conn & 0x0000f000) >> 12;
+			outp->heads     = (conn & 0x00000f00) >> 8;
+			outp->i2c_index = (conn & 0x000000f0) >> 4;
+			outp->type      = (conn & 0x0000000f);
+			outp->link      = 0;
+		} else {
+			dcb = 0x0000;
+		}
+
+		if (*ver >= 0x40) {
+			u32 conf = nv_ro32(bios, dcb + 0x04);
+			switch (outp->type) {
+			case DCB_OUTPUT_TMDS:
+			case DCB_OUTPUT_LVDS:
+			case DCB_OUTPUT_DP:
+				outp->link = (conf & 0x00000030) >> 4;
+				outp->sorconf.link = outp->link; /*XXX*/
+				break;
+			default:
+				break;
+			}
+		}
+	}
+	return dcb;
+}
+
+static inline u16
+dcb_outp_hasht(struct dcb_output *outp)
+{
+	return outp->type;
+}
+
+static inline u16
+dcb_outp_hashm(struct dcb_output *outp)
+{
+	return (outp->heads << 8) | (outp->link << 6) | outp->or;
+}
+
+u16
+dcb_outp_match(struct nouveau_bios *bios, u16 type, u16 mask,
+	       u8 *ver, u8 *len, struct dcb_output *outp)
+{
+	u16 dcb, idx = 0;
+	while ((dcb = dcb_outp_parse(bios, idx++, ver, len, outp))) {
+		if (dcb_outp_hasht(outp) == type) {
+			if ((dcb_outp_hashm(outp) & mask) == mask)
+				break;
+		}
+	}
+	return dcb;
+}
+
 int
 dcb_outp_foreach(struct nouveau_bios *bios, void *data,
 		 int (*exec)(struct nouveau_bios *, void *, int, u16))

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/disp.c b/drivers/gpu/drm/nouveau/core/subdev/bios/disp.c
new file mode 100644
index 0000000..7f16e52
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/disp.c

@@ -0,0 +1,178 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/disp.h>
+
+u16
+nvbios_disp_table(struct nouveau_bios *bios,
+		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *sub)
+{
+	struct bit_entry U;
+
+	if (!bit_entry(bios, 'U', &U)) {
+		if (U.version == 1) {
+			u16 data = nv_ro16(bios, U.offset);
+			if (data) {
+				*ver = nv_ro08(bios, data + 0x00);
+				switch (*ver) {
+				case 0x20:
+				case 0x21:
+					*hdr = nv_ro08(bios, data + 0x01);
+					*len = nv_ro08(bios, data + 0x02);
+					*cnt = nv_ro08(bios, data + 0x03);
+					*sub = nv_ro08(bios, data + 0x04);
+					return data;
+				default:
+					break;
+				}
+			}
+		}
+	}
+
+	return 0x0000;
+}
+
+u16
+nvbios_disp_entry(struct nouveau_bios *bios, u8 idx,
+		  u8 *ver, u8 *len, u8 *sub)
+{
+	u8  hdr, cnt;
+	u16 data = nvbios_disp_table(bios, ver, &hdr, &cnt, len, sub);
+	if (data && idx < cnt)
+		return data + hdr + (idx * *len);
+	*ver = 0x00;
+	return 0x0000;
+}
+
+u16
+nvbios_disp_parse(struct nouveau_bios *bios, u8 idx,
+		  u8 *ver, u8 *len, u8 *sub,
+		  struct nvbios_disp *info)
+{
+	u16 data = nvbios_disp_entry(bios, idx, ver, len, sub);
+	if (data && *len >= 2) {
+		info->data = nv_ro16(bios, data + 0);
+		return data;
+	}
+	return 0x0000;
+}
+
+u16
+nvbios_outp_entry(struct nouveau_bios *bios, u8 idx,
+		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+{
+	struct nvbios_disp info;
+	u16 data = nvbios_disp_parse(bios, idx, ver, len, hdr, &info);
+	if (data) {
+		*cnt = nv_ro08(bios, info.data + 0x05);
+		*len = 0x06;
+		data = info.data;
+	}
+	return data;
+}
+
+u16
+nvbios_outp_parse(struct nouveau_bios *bios, u8 idx,
+		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		  struct nvbios_outp *info)
+{
+	u16 data = nvbios_outp_entry(bios, idx, ver, hdr, cnt, len);
+	if (data && *hdr >= 0x0a) {
+		info->type      = nv_ro16(bios, data + 0x00);
+		info->mask      = nv_ro32(bios, data + 0x02);
+		if (*ver <= 0x20) /* match any link */
+			info->mask |= 0x00c0;
+		info->script[0] = nv_ro16(bios, data + 0x06);
+		info->script[1] = nv_ro16(bios, data + 0x08);
+		info->script[2] = 0x0000;
+		if (*hdr >= 0x0c)
+			info->script[2] = nv_ro16(bios, data + 0x0a);
+		return data;
+	}
+	return 0x0000;
+}
+
+u16
+nvbios_outp_match(struct nouveau_bios *bios, u16 type, u16 mask,
+		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		  struct nvbios_outp *info)
+{
+	u16 data, idx = 0;
+	while ((data = nvbios_outp_parse(bios, idx++, ver, hdr, cnt, len, info)) || *ver) {
+		if (data && info->type == type) {
+			if ((info->mask & mask) == mask)
+				break;
+		}
+	}
+	return data;
+}
+
+u16
+nvbios_ocfg_entry(struct nouveau_bios *bios, u16 outp, u8 idx,
+		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+{
+	if (idx < *cnt)
+		return outp + *hdr + (idx * *len);
+	return 0x0000;
+}
+
+u16
+nvbios_ocfg_parse(struct nouveau_bios *bios, u16 outp, u8 idx,
+		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		  struct nvbios_ocfg *info)
+{
+	u16 data = nvbios_ocfg_entry(bios, outp, idx, ver, hdr, cnt, len);
+	if (data) {
+		info->match     = nv_ro16(bios, data + 0x00);
+		info->clkcmp[0] = nv_ro16(bios, data + 0x02);
+		info->clkcmp[1] = nv_ro16(bios, data + 0x04);
+	}
+	return data;
+}
+
+u16
+nvbios_ocfg_match(struct nouveau_bios *bios, u16 outp, u16 type,
+		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		  struct nvbios_ocfg *info)
+{
+	u16 data, idx = 0;
+	while ((data = nvbios_ocfg_parse(bios, outp, idx++, ver, hdr, cnt, len, info))) {
+		if (info->match == type)
+			break;
+	}
+	return data;
+}
+
+u16
+nvbios_oclk_match(struct nouveau_bios *bios, u16 cmp, u32 khz)
+{
+	while (cmp) {
+		if (khz / 10 >= nv_ro16(bios, cmp + 0x00))
+			return  nv_ro16(bios, cmp + 0x02);
+		cmp += 0x04;
+	}
+	return 0x0000;
+}

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c
index 3cbc0f3..663853b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/dp.c

@@ -25,23 +25,29 @@
 
 #include "subdev/bios.h"
 #include "subdev/bios/bit.h"
-#include "subdev/bios/dcb.h"
 #include "subdev/bios/dp.h"
 
-u16
-dp_table(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+static u16
+nvbios_dp_table(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
-	struct bit_entry bit_d;
+	struct bit_entry d;
 
-	if (!bit_entry(bios, 'd', &bit_d)) {
-		if (bit_d.version == 1) {
-			u16 data = nv_ro16(bios, bit_d.offset);
+	if (!bit_entry(bios, 'd', &d)) {
+		if (d.version == 1 && d.length >= 2) {
+			u16 data = nv_ro16(bios, d.offset);
 			if (data) {
-				*ver = nv_ro08(bios, data + 0);
-				*hdr = nv_ro08(bios, data + 1);
-				*len = nv_ro08(bios, data + 2);
-				*cnt = nv_ro08(bios, data + 3);
-				return data;
+				*ver = nv_ro08(bios, data + 0x00);
+				switch (*ver) {
+				case 0x21:
+				case 0x30:
+				case 0x40:
+					*hdr = nv_ro08(bios, data + 0x01);
+					*len = nv_ro08(bios, data + 0x02);
+					*cnt = nv_ro08(bios, data + 0x03);
+					return data;
+				default:
+					break;
+				}
 			}
 		}
 	}
@@ -49,28 +55,150 @@
 	return 0x0000;
 }
 
-u16
-dp_outp(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len)
+static u16
+nvbios_dpout_entry(struct nouveau_bios *bios, u8 idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
-	u8  hdr, cnt;
-	u16 table = dp_table(bios, ver, &hdr, &cnt, len);
-	if (table && idx < cnt)
-		return nv_ro16(bios, table + hdr + (idx * *len));
-	return 0xffff;
+	u16 data = nvbios_dp_table(bios, ver, hdr, cnt, len);
+	if (data && idx < *cnt) {
+		u16 outp = nv_ro16(bios, data + *hdr + idx * *len);
+		switch (*ver * !!outp) {
+		case 0x21:
+		case 0x30:
+			*hdr = nv_ro08(bios, data + 0x04);
+			*len = nv_ro08(bios, data + 0x05);
+			*cnt = nv_ro08(bios, outp + 0x04);
+			break;
+		case 0x40:
+			*hdr = nv_ro08(bios, data + 0x04);
+			*cnt = 0;
+			*len = 0;
+			break;
+		default:
+			break;
+		}
+		return outp;
+	}
+	*ver = 0x00;
+	return 0x0000;
 }
 
 u16
-dp_outp_match(struct nouveau_bios *bios, struct dcb_output *outp,
-	      u8 *ver, u8 *len)
+nvbios_dpout_parse(struct nouveau_bios *bios, u8 idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_dpout *info)
 {
-	u8  idx = 0;
-	u16 data;
-	while ((data = dp_outp(bios, idx++, ver, len)) != 0xffff) {
-		if (data) {
-			u32 hash = nv_ro32(bios, data);
-			if (dcb_hash_match(outp, hash))
-				return data;
+	u16 data = nvbios_dpout_entry(bios, idx, ver, hdr, cnt, len);
+	if (data && *ver) {
+		info->type = nv_ro16(bios, data + 0x00);
+		info->mask = nv_ro16(bios, data + 0x02);
+		switch (*ver) {
+		case 0x21:
+		case 0x30:
+			info->flags     = nv_ro08(bios, data + 0x05);
+			info->script[0] = nv_ro16(bios, data + 0x06);
+			info->script[1] = nv_ro16(bios, data + 0x08);
+			info->lnkcmp    = nv_ro16(bios, data + 0x0a);
+			info->script[2] = nv_ro16(bios, data + 0x0c);
+			info->script[3] = nv_ro16(bios, data + 0x0e);
+			info->script[4] = nv_ro16(bios, data + 0x10);
+			break;
+		case 0x40:
+			info->flags     = nv_ro08(bios, data + 0x04);
+			info->script[0] = nv_ro16(bios, data + 0x05);
+			info->script[1] = nv_ro16(bios, data + 0x07);
+			info->lnkcmp    = nv_ro16(bios, data + 0x09);
+			info->script[2] = nv_ro16(bios, data + 0x0b);
+			info->script[3] = nv_ro16(bios, data + 0x0d);
+			info->script[4] = nv_ro16(bios, data + 0x0f);
+			break;
+		default:
+			data = 0x0000;
+			break;
 		}
 	}
+	return data;
+}
+
+u16
+nvbios_dpout_match(struct nouveau_bios *bios, u16 type, u16 mask,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_dpout *info)
+{
+	u16 data, idx = 0;
+	while ((data = nvbios_dpout_parse(bios, idx++, ver, hdr, cnt, len, info)) || *ver) {
+		if (data && info->type == type) {
+			if ((info->mask & mask) == mask)
+				break;
+		}
+	}
+	return data;
+}
+
+static u16
+nvbios_dpcfg_entry(struct nouveau_bios *bios, u16 outp, u8 idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+{
+	if (*ver >= 0x40) {
+		outp = nvbios_dp_table(bios, ver, hdr, cnt, len);
+		*hdr = *hdr + (*len * * cnt);
+		*len = nv_ro08(bios, outp + 0x06);
+		*cnt = nv_ro08(bios, outp + 0x07);
+	}
+
+	if (idx < *cnt)
+		return outp + *hdr + (idx * *len);
+
 	return 0x0000;
 }
+
+u16
+nvbios_dpcfg_parse(struct nouveau_bios *bios, u16 outp, u8 idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_dpcfg *info)
+{
+	u16 data = nvbios_dpcfg_entry(bios, outp, idx, ver, hdr, cnt, len);
+	if (data) {
+		switch (*ver) {
+		case 0x21:
+			info->drv = nv_ro08(bios, data + 0x02);
+			info->pre = nv_ro08(bios, data + 0x03);
+			info->unk = nv_ro08(bios, data + 0x04);
+			break;
+		case 0x30:
+		case 0x40:
+			info->drv = nv_ro08(bios, data + 0x01);
+			info->pre = nv_ro08(bios, data + 0x02);
+			info->unk = nv_ro08(bios, data + 0x03);
+			break;
+		default:
+			data = 0x0000;
+			break;
+		}
+	}
+	return data;
+}
+
+u16
+nvbios_dpcfg_match(struct nouveau_bios *bios, u16 outp, u8 un, u8 vs, u8 pe,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_dpcfg *info)
+{
+	u8 idx = 0xff;
+	u16 data;
+
+	if (*ver >= 0x30) {
+		const u8 vsoff[] = { 0, 4, 7, 9 };
+		idx = (un * 10) + vsoff[vs] + pe;
+	} else {
+		while ((data = nvbios_dpcfg_entry(bios, outp, idx,
+						  ver, hdr, cnt, len))) {
+			if (nv_ro08(bios, data + 0x00) == vs &&
+			    nv_ro08(bios, data + 0x01) == pe)
+				break;
+			idx++;
+		}
+	}
+
+	return nvbios_dpcfg_parse(bios, outp, pe, ver, hdr, cnt, len, info);
+}

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c b/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c
index 4c9f1e5..c90d4aa 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c

@@ -101,8 +101,8 @@
 	}
 
 	/* DCB 2.2, fixed TVDAC GPIO data */
-	if ((entry = dcb_table(bios, &ver, &hdr, &cnt, &len)) && ver >= 0x22) {
-		if (func == DCB_GPIO_TVDAC0) {
+	if ((entry = dcb_table(bios, &ver, &hdr, &cnt, &len))) {
+		if (ver >= 0x22 && ver < 0x30 && func == DCB_GPIO_TVDAC0) {
 			u8 conf = nv_ro08(bios, entry - 5);
 			u8 addr = nv_ro08(bios, entry - 4);
 			if (conf & 0x01) {

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
index 6be8c32..ae168bb 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c

@@ -743,9 +743,10 @@
 init_dp_condition(struct nvbios_init *init)
 {
 	struct nouveau_bios *bios = init->bios;
+	struct nvbios_dpout info;
 	u8  cond = nv_ro08(bios, init->offset + 1);
 	u8  unkn = nv_ro08(bios, init->offset + 2);
-	u8  ver, len;
+	u8  ver, hdr, cnt, len;
 	u16 data;
 
 	trace("DP_CONDITION\t0x%02x 0x%02x\n", cond, unkn);
@@ -759,10 +760,12 @@
 	case 1:
 	case 2:
 		if ( init->outp &&
-		    (data = dp_outp_match(bios, init->outp, &ver, &len))) {
-			if (ver <= 0x40 && !(nv_ro08(bios, data + 5) & cond))
-				init_exec_set(init, false);
-			if (ver == 0x40 && !(nv_ro08(bios, data + 4) & cond))
+		    (data = nvbios_dpout_match(bios, DCB_OUTPUT_DP,
+					       (init->outp->or << 0) |
+					       (init->outp->sorconf.link << 6),
+					       &ver, &hdr, &cnt, &len, &info)))
+		{
+			if (!(info.flags & cond))
 				init_exec_set(init, false);
 			break;
 		}

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/base.c b/drivers/gpu/drm/nouveau/core/subdev/device/base.c
index ca9a464..f8a7ed4 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/base.c

@@ -25,7 +25,6 @@
 #include <core/object.h>
 #include <core/device.h>
 #include <core/client.h>
-#include <core/device.h>
 #include <core/option.h>
 
 #include <core/class.h>
@@ -61,19 +60,24 @@
 
 static const u64 disable_map[] = {
 	[NVDEV_SUBDEV_VBIOS]	= NV_DEVICE_DISABLE_VBIOS,
+	[NVDEV_SUBDEV_DEVINIT]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_GPIO]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_I2C]	= NV_DEVICE_DISABLE_CORE,
-	[NVDEV_SUBDEV_DEVINIT]	= NV_DEVICE_DISABLE_CORE,
+	[NVDEV_SUBDEV_CLOCK]	= NV_DEVICE_DISABLE_CORE,
+	[NVDEV_SUBDEV_MXM]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_MC]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_TIMER]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_FB]	= NV_DEVICE_DISABLE_CORE,
-	[NVDEV_SUBDEV_VM]	= NV_DEVICE_DISABLE_CORE,
+	[NVDEV_SUBDEV_LTCG]	= NV_DEVICE_DISABLE_CORE,
+	[NVDEV_SUBDEV_IBUS]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_INSTMEM]	= NV_DEVICE_DISABLE_CORE,
+	[NVDEV_SUBDEV_VM]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_BAR]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_VOLT]	= NV_DEVICE_DISABLE_CORE,
-	[NVDEV_SUBDEV_CLOCK]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_SUBDEV_THERM]	= NV_DEVICE_DISABLE_CORE,
 	[NVDEV_ENGINE_DMAOBJ]	= NV_DEVICE_DISABLE_CORE,
+	[NVDEV_ENGINE_FIFO]	= NV_DEVICE_DISABLE_FIFO,
+	[NVDEV_ENGINE_SW]	= NV_DEVICE_DISABLE_FIFO,
 	[NVDEV_ENGINE_GR]	= NV_DEVICE_DISABLE_GRAPH,
 	[NVDEV_ENGINE_MPEG]	= NV_DEVICE_DISABLE_MPEG,
 	[NVDEV_ENGINE_ME]	= NV_DEVICE_DISABLE_ME,
@@ -84,7 +88,7 @@
 	[NVDEV_ENGINE_COPY0]	= NV_DEVICE_DISABLE_COPY0,
 	[NVDEV_ENGINE_COPY1]	= NV_DEVICE_DISABLE_COPY1,
 	[NVDEV_ENGINE_UNK1C1]	= NV_DEVICE_DISABLE_UNK1C1,
-	[NVDEV_ENGINE_FIFO]	= NV_DEVICE_DISABLE_FIFO,
+	[NVDEV_ENGINE_VENC]	= NV_DEVICE_DISABLE_VENC,
 	[NVDEV_ENGINE_DISP]	= NV_DEVICE_DISABLE_DISP,
 	[NVDEV_SUBDEV_NR]	= 0,
 };
@@ -208,7 +212,7 @@
 
 		/* determine frequency of timing crystal */
 		if ( device->chipset < 0x17 ||
-		    (device->chipset >= 0x20 && device->chipset <= 0x25))
+		    (device->chipset >= 0x20 && device->chipset < 0x25))
 			strap &= 0x00000040;
 		else
 			strap &= 0x00400040;
@@ -356,37 +360,37 @@
 }
 
 static u8
-nouveau_devobj_rd08(struct nouveau_object *object, u32 addr)
+nouveau_devobj_rd08(struct nouveau_object *object, u64 addr)
 {
 	return nv_rd08(object->engine, addr);
 }
 
 static u16
-nouveau_devobj_rd16(struct nouveau_object *object, u32 addr)
+nouveau_devobj_rd16(struct nouveau_object *object, u64 addr)
 {
 	return nv_rd16(object->engine, addr);
 }
 
 static u32
-nouveau_devobj_rd32(struct nouveau_object *object, u32 addr)
+nouveau_devobj_rd32(struct nouveau_object *object, u64 addr)
 {
 	return nv_rd32(object->engine, addr);
 }
 
 static void
-nouveau_devobj_wr08(struct nouveau_object *object, u32 addr, u8 data)
+nouveau_devobj_wr08(struct nouveau_object *object, u64 addr, u8 data)
 {
 	nv_wr08(object->engine, addr, data);
 }
 
 static void
-nouveau_devobj_wr16(struct nouveau_object *object, u32 addr, u16 data)
+nouveau_devobj_wr16(struct nouveau_object *object, u64 addr, u16 data)
 {
 	nv_wr16(object->engine, addr, data);
 }
 
 static void
-nouveau_devobj_wr32(struct nouveau_object *object, u32 addr, u32 data)
+nouveau_devobj_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	nv_wr32(object->engine, addr, data);
 }

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nv10.c b/drivers/gpu/drm/nouveau/core/subdev/device/nv10.c
index f09accf..9c40b0f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nv10.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nv10.c

@@ -105,7 +105,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv10_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv1a_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -159,7 +159,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv10_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv1a_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nv20.c b/drivers/gpu/drm/nouveau/core/subdev/device/nv20.c
index 5fa58b7..74f88f4 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nv20.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nv20.c

@@ -72,7 +72,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv20_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv25_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -90,7 +90,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv20_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv25_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -108,7 +108,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv20_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv25_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nv30.c b/drivers/gpu/drm/nouveau/core/subdev/device/nv30.c
index 7f4b8fe..0ac1b2c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nv30.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nv30.c

@@ -72,7 +72,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv30_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv35_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -109,7 +109,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv20_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv30_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv36_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -128,7 +128,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv10_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv30_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv10_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv04_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv04_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/device/nv40.c
index 42deadc..41d5968 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nv40.c

@@ -76,7 +76,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv41_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv41_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -96,7 +96,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv41_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv41_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -116,7 +116,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv41_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv41_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -156,7 +156,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv47_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv41_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -176,7 +176,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv49_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv41_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -196,7 +196,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv04_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv49_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv41_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -216,7 +216,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv44_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -236,7 +236,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv46_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -256,7 +256,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv44_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -276,7 +276,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv46_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -296,7 +296,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv4e_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -316,7 +316,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv46_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -336,7 +336,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv46_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;
@@ -356,7 +356,7 @@
 		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv1a_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv44_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
-		device->oclass[NVDEV_SUBDEV_FB     ] = &nv40_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nv46_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv40_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nv44_vmmgr_oclass;
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nv04_dmaeng_oclass;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/device/nv50.c
index fec3bcc..6ccfd85 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nv50.c

@@ -98,7 +98,7 @@
 		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv84_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv84_disp_oclass;
 		break;
 	case 0x86:
 		device->cname = "G86";
@@ -123,7 +123,7 @@
 		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv84_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv84_disp_oclass;
 		break;
 	case 0x92:
 		device->cname = "G92";
@@ -148,7 +148,7 @@
 		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv84_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv84_disp_oclass;
 		break;
 	case 0x94:
 		device->cname = "G94";
@@ -173,7 +173,7 @@
 		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv84_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
 	case 0x96:
 		device->cname = "G96";
@@ -198,7 +198,7 @@
 		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv84_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
 	case 0x98:
 		device->cname = "G98";
@@ -223,7 +223,7 @@
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv98_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
 	case 0xa0:
 		device->cname = "G200";
@@ -248,7 +248,7 @@
 		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv84_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva0_disp_oclass;
 		break;
 	case 0xaa:
 		device->cname = "MCP77/MCP78";
@@ -273,7 +273,7 @@
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv98_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
 	case 0xac:
 		device->cname = "MCP79/MCP7A";
@@ -298,7 +298,7 @@
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv98_crypt_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
 	case 0xa3:
 		device->cname = "GT215";
@@ -324,7 +324,7 @@
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xa5:
 		device->cname = "GT216";
@@ -349,7 +349,7 @@
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xa8:
 		device->cname = "GT218";
@@ -374,7 +374,7 @@
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xaf:
 		device->cname = "MCP89";
@@ -399,7 +399,7 @@
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	default:
 		nv_fatal(device, "unknown Tesla chipset\n");

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/device/nvc0.c
index 6697f0f..f046168 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nvc0.c

@@ -74,12 +74,12 @@
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nvc0_copy1_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xc4:
 		device->cname = "GF104";
@@ -102,12 +102,12 @@
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nvc0_copy1_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xc3:
 		device->cname = "GF106";
@@ -130,12 +130,12 @@
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nvc0_copy1_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xce:
 		device->cname = "GF114";
@@ -158,12 +158,12 @@
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nvc0_copy1_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xcf:
 		device->cname = "GF116";
@@ -186,12 +186,12 @@
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nvc0_copy1_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xc1:
 		device->cname = "GF108";
@@ -214,12 +214,12 @@
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nvc0_copy1_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xc8:
 		device->cname = "GF110";
@@ -242,12 +242,12 @@
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nvc0_copy1_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nv50_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
 		break;
 	case 0xd9:
 		device->cname = "GF119";
@@ -266,13 +266,13 @@
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv50_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
 		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
-		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvc0_dmaeng_oclass;
+		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass;
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nvc0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nvc0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
-		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nvc0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nvc0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nvc0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nvd0_disp_oclass;
 		break;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c b/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c
index 4a280b7..9b7881e 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c

@@ -45,6 +45,9 @@
 #include <engine/graph.h>
 #include <engine/disp.h>
 #include <engine/copy.h>
+#include <engine/bsp.h>
+#include <engine/vp.h>
+#include <engine/ppp.h>
 
 int
 nve0_identify(struct nouveau_device *device)
@@ -67,13 +70,16 @@
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv50_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
 		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
-		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvc0_dmaeng_oclass;
+		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass;
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nve0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nve0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nvd0_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nve0_disp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		break;
 	case 0xe7:
 		device->cname = "GK107";
@@ -92,13 +98,16 @@
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv50_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
 		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
-		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvc0_dmaeng_oclass;
+		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass;
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nve0_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nve0_graph_oclass;
-		device->oclass[NVDEV_ENGINE_DISP   ] = &nvd0_disp_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nve0_disp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		break;
 	default:
 		nv_fatal(device, "unknown Kepler chipset\n");

diff --git a/drivers/gpu/drm/nouveau/core/subdev/devinit/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/devinit/nv50.c
index 61becfa..ae7249b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/devinit/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/devinit/nv50.c

@@ -22,6 +22,10 @@
  * Authors: Ben Skeggs
  */
 
+#include <subdev/bios.h>
+#include <subdev/bios/dcb.h>
+#include <subdev/bios/disp.h>
+#include <subdev/bios/init.h>
 #include <subdev/devinit.h>
 #include <subdev/vga.h>
 
@@ -55,7 +59,12 @@
 static int
 nv50_devinit_init(struct nouveau_object *object)
 {
+	struct nouveau_bios *bios = nouveau_bios(object);
 	struct nv50_devinit_priv *priv = (void *)object;
+	struct nvbios_outp info;
+	struct dcb_output outp;
+	u8  ver = 0xff, hdr, cnt, len;
+	int ret, i = 0;
 
 	if (!priv->base.post) {
 		if (!nv_rdvgac(priv, 0, 0x00) &&
@@ -65,7 +74,30 @@
 		}
 	}
 
-	return nouveau_devinit_init(&priv->base);
+	ret = nouveau_devinit_init(&priv->base);
+	if (ret)
+		return ret;
+
+	/* if we ran the init tables, execute first script pointer for each
+	 * display table output entry that has a matching dcb entry.
+	 */
+	while (priv->base.post && ver) {
+		u16 data = nvbios_outp_parse(bios, i++, &ver, &hdr, &cnt, &len, &info);
+		if (data && dcb_outp_match(bios, info.type, info.mask, &ver, &len, &outp)) {
+			struct nvbios_init init = {
+				.subdev = nv_subdev(priv),
+				.bios = bios,
+				.offset = info.script[0],
+				.outp = &outp,
+				.crtc = -1,
+				.execute = 1,
+			};
+
+			nvbios_exec(&init);
+		}
+	};
+
+	return 0;
 }
 
 static int

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/base.c b/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
index f0086de..d6d1600 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/base.c

@@ -57,6 +57,67 @@
 }
 
 int
+nouveau_fb_preinit(struct nouveau_fb *pfb)
+{
+	static const char *name[] = {
+		[NV_MEM_TYPE_UNKNOWN] = "unknown",
+		[NV_MEM_TYPE_STOLEN ] = "stolen system memory",
+		[NV_MEM_TYPE_SGRAM  ] = "SGRAM",
+		[NV_MEM_TYPE_SDRAM  ] = "SDRAM",
+		[NV_MEM_TYPE_DDR1   ] = "DDR1",
+		[NV_MEM_TYPE_DDR2   ] = "DDR2",
+		[NV_MEM_TYPE_DDR3   ] = "DDR3",
+		[NV_MEM_TYPE_GDDR2  ] = "GDDR2",
+		[NV_MEM_TYPE_GDDR3  ] = "GDDR3",
+		[NV_MEM_TYPE_GDDR4  ] = "GDDR4",
+		[NV_MEM_TYPE_GDDR5  ] = "GDDR5",
+	};
+	int ret, tags;
+
+	tags = pfb->ram.init(pfb);
+	if (tags < 0 || !pfb->ram.size) {
+		nv_fatal(pfb, "error detecting memory configuration!!\n");
+		return (tags < 0) ? tags : -ERANGE;
+	}
+
+	if (!nouveau_mm_initialised(&pfb->vram)) {
+		ret = nouveau_mm_init(&pfb->vram, 0, pfb->ram.size >> 12, 1);
+		if (ret)
+			return ret;
+	}
+
+	if (!nouveau_mm_initialised(&pfb->tags) && tags) {
+		ret = nouveau_mm_init(&pfb->tags, 0, ++tags, 1);
+		if (ret)
+			return ret;
+	}
+
+	nv_info(pfb, "RAM type: %s\n", name[pfb->ram.type]);
+	nv_info(pfb, "RAM size: %d MiB\n", (int)(pfb->ram.size >> 20));
+	nv_info(pfb, "   ZCOMP: %d tags\n", tags);
+	return 0;
+}
+
+void
+nouveau_fb_destroy(struct nouveau_fb *pfb)
+{
+	int i;
+
+	for (i = 0; i < pfb->tile.regions; i++)
+		pfb->tile.fini(pfb, i, &pfb->tile.region[i]);
+	nouveau_mm_fini(&pfb->tags);
+	nouveau_mm_fini(&pfb->vram);
+
+	nouveau_subdev_destroy(&pfb->base);
+}
+
+void
+_nouveau_fb_dtor(struct nouveau_object *object)
+{
+	struct nouveau_fb *pfb = (void *)object;
+	nouveau_fb_destroy(pfb);
+}
+int
 nouveau_fb_init(struct nouveau_fb *pfb)
 {
 	int ret, i;
@@ -77,54 +138,3 @@
 	struct nouveau_fb *pfb = (void *)object;
 	return nouveau_fb_init(pfb);
 }
-
-void
-nouveau_fb_destroy(struct nouveau_fb *pfb)
-{
-	int i;
-
-	for (i = 0; i < pfb->tile.regions; i++)
-		pfb->tile.fini(pfb, i, &pfb->tile.region[i]);
-
-	if (pfb->tags.block_size)
-		nouveau_mm_fini(&pfb->tags);
-
-	if (pfb->vram.block_size)
-		nouveau_mm_fini(&pfb->vram);
-
-	nouveau_subdev_destroy(&pfb->base);
-}
-
-void
-_nouveau_fb_dtor(struct nouveau_object *object)
-{
-	struct nouveau_fb *pfb = (void *)object;
-	nouveau_fb_destroy(pfb);
-}
-
-int
-nouveau_fb_created(struct nouveau_fb *pfb)
-{
-	static const char *name[] = {
-		[NV_MEM_TYPE_UNKNOWN] = "unknown",
-		[NV_MEM_TYPE_STOLEN ] = "stolen system memory",
-		[NV_MEM_TYPE_SGRAM  ] = "SGRAM",
-		[NV_MEM_TYPE_SDRAM  ] = "SDRAM",
-		[NV_MEM_TYPE_DDR1   ] = "DDR1",
-		[NV_MEM_TYPE_DDR2   ] = "DDR2",
-		[NV_MEM_TYPE_DDR3   ] = "DDR3",
-		[NV_MEM_TYPE_GDDR2  ] = "GDDR2",
-		[NV_MEM_TYPE_GDDR3  ] = "GDDR3",
-		[NV_MEM_TYPE_GDDR4  ] = "GDDR4",
-		[NV_MEM_TYPE_GDDR5  ] = "GDDR5",
-	};
-
-	if (pfb->ram.size == 0) {
-		nv_fatal(pfb, "no vram detected!!\n");
-		return -ERANGE;
-	}
-
-	nv_info(pfb, "RAM type: %s\n", name[pfb->ram.type]);
-	nv_info(pfb, "RAM size: %d MiB\n", (int)(pfb->ram.size >> 20));
-	return 0;
-}

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv04.c
index eb06836..6e369f8 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv04.c

@@ -56,6 +56,37 @@
 }
 
 static int
+nv04_fb_vram_init(struct nouveau_fb *pfb)
+{
+	u32 boot0 = nv_rd32(pfb, NV04_PFB_BOOT_0);
+	if (boot0 & 0x00000100) {
+		pfb->ram.size  = ((boot0 >> 12) & 0xf) * 2 + 2;
+		pfb->ram.size *= 1024 * 1024;
+	} else {
+		switch (boot0 & NV04_PFB_BOOT_0_RAM_AMOUNT) {
+		case NV04_PFB_BOOT_0_RAM_AMOUNT_32MB:
+			pfb->ram.size = 32 * 1024 * 1024;
+			break;
+		case NV04_PFB_BOOT_0_RAM_AMOUNT_16MB:
+			pfb->ram.size = 16 * 1024 * 1024;
+			break;
+		case NV04_PFB_BOOT_0_RAM_AMOUNT_8MB:
+			pfb->ram.size = 8 * 1024 * 1024;
+			break;
+		case NV04_PFB_BOOT_0_RAM_AMOUNT_4MB:
+			pfb->ram.size = 4 * 1024 * 1024;
+			break;
+		}
+	}
+
+	if ((boot0 & 0x00000038) <= 0x10)
+		pfb->ram.type = NV_MEM_TYPE_SGRAM;
+	else
+		pfb->ram.type = NV_MEM_TYPE_SDRAM;
+	return 0;
+}
+
+static int
 nv04_fb_init(struct nouveau_object *object)
 {
 	struct nv04_fb_priv *priv = (void *)object;
@@ -79,7 +110,6 @@
 	     struct nouveau_object **pobject)
 {
 	struct nv04_fb_priv *priv;
-	u32 boot0;
 	int ret;
 
 	ret = nouveau_fb_create(parent, engine, oclass, &priv);
@@ -87,35 +117,9 @@
 	if (ret)
 		return ret;
 
-	boot0 = nv_rd32(priv, NV04_PFB_BOOT_0);
-	if (boot0 & 0x00000100) {
-		priv->base.ram.size  = ((boot0 >> 12) & 0xf) * 2 + 2;
-		priv->base.ram.size *= 1024 * 1024;
-	} else {
-		switch (boot0 & NV04_PFB_BOOT_0_RAM_AMOUNT) {
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_32MB:
-			priv->base.ram.size = 32 * 1024 * 1024;
-			break;
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_16MB:
-			priv->base.ram.size = 16 * 1024 * 1024;
-			break;
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_8MB:
-			priv->base.ram.size = 8 * 1024 * 1024;
-			break;
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_4MB:
-			priv->base.ram.size = 4 * 1024 * 1024;
-			break;
-		}
-	}
-
-	if ((boot0 & 0x00000038) <= 0x10)
-		priv->base.ram.type = NV_MEM_TYPE_SGRAM;
-	else
-		priv->base.ram.type = NV_MEM_TYPE_SDRAM;
-
-
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	return nouveau_fb_created(&priv->base);
+	priv->base.ram.init = nv04_fb_vram_init;
+	return nouveau_fb_preinit(&priv->base);
 }
 
 struct nouveau_oclass

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv10.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv10.c
index f037a42..edbbe26 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv10.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv10.c

@@ -30,7 +30,20 @@
 	struct nouveau_fb base;
 };
 
-static void
+static int
+nv10_fb_vram_init(struct nouveau_fb *pfb)
+{
+	u32 cfg0 = nv_rd32(pfb, 0x100200);
+	if (cfg0 & 0x00000001)
+		pfb->ram.type = NV_MEM_TYPE_DDR1;
+	else
+		pfb->ram.type = NV_MEM_TYPE_SDRAM;
+
+	pfb->ram.size = nv_rd32(pfb, 0x10020c) & 0xff000000;
+	return 0;
+}
+
+void
 nv10_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
 		  u32 flags, struct nouveau_fb_tile *tile)
 {
@@ -39,7 +52,7 @@
 	tile->pitch = pitch;
 }
 
-static void
+void
 nv10_fb_tile_fini(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
 {
 	tile->addr  = 0;
@@ -54,6 +67,7 @@
 	nv_wr32(pfb, 0x100244 + (i * 0x10), tile->limit);
 	nv_wr32(pfb, 0x100248 + (i * 0x10), tile->pitch);
 	nv_wr32(pfb, 0x100240 + (i * 0x10), tile->addr);
+	nv_rd32(pfb, 0x100240 + (i * 0x10));
 }
 
 static int
@@ -61,7 +75,6 @@
 	     struct nouveau_oclass *oclass, void *data, u32 size,
 	     struct nouveau_object **pobject)
 {
-	struct nouveau_device *device = nv_device(parent);
 	struct nv10_fb_priv *priv;
 	int ret;
 
@@ -70,42 +83,13 @@
 	if (ret)
 		return ret;
 
-	if (device->chipset == 0x1a ||  device->chipset == 0x1f) {
-		struct pci_dev *bridge;
-		u32 mem, mib;
-
-		bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 1));
-		if (!bridge) {
-			nv_fatal(device, "no bridge device\n");
-			return 0;
-		}
-
-		if (device->chipset == 0x1a) {
-			pci_read_config_dword(bridge, 0x7c, &mem);
-			mib = ((mem >> 6) & 31) + 1;
-		} else {
-			pci_read_config_dword(bridge, 0x84, &mem);
-			mib = ((mem >> 4) & 127) + 1;
-		}
-
-		priv->base.ram.type = NV_MEM_TYPE_STOLEN;
-		priv->base.ram.size = mib * 1024 * 1024;
-	} else {
-		u32 cfg0 = nv_rd32(priv, 0x100200);
-		if (cfg0 & 0x00000001)
-			priv->base.ram.type = NV_MEM_TYPE_DDR1;
-		else
-			priv->base.ram.type = NV_MEM_TYPE_SDRAM;
-
-		priv->base.ram.size = nv_rd32(priv, 0x10020c) & 0xff000000;
-	}
-
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv10_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv10_fb_tile_init;
 	priv->base.tile.fini = nv10_fb_tile_fini;
 	priv->base.tile.prog = nv10_fb_tile_prog;
-	return nouveau_fb_created(&priv->base);
+	return nouveau_fb_preinit(&priv->base);
 }
 
 struct nouveau_oclass

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c
new file mode 100644
index 0000000..4836684
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c

@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv1a_fb_priv {
+	struct nouveau_fb base;
+};
+
+static int
+nv1a_fb_vram_init(struct nouveau_fb *pfb)
+{
+	struct pci_dev *bridge;
+	u32 mem, mib;
+
+	bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 1));
+	if (!bridge) {
+		nv_fatal(pfb, "no bridge device\n");
+		return -ENODEV;
+	}
+
+	if (nv_device(pfb)->chipset == 0x1a) {
+		pci_read_config_dword(bridge, 0x7c, &mem);
+		mib = ((mem >> 6) & 31) + 1;
+	} else {
+		pci_read_config_dword(bridge, 0x84, &mem);
+		mib = ((mem >> 4) & 127) + 1;
+	}
+
+	pfb->ram.type = NV_MEM_TYPE_STOLEN;
+	pfb->ram.size = mib * 1024 * 1024;
+	return 0;
+}
+
+static int
+nv1a_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv1a_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv1a_fb_vram_init;
+	priv->base.tile.regions = 8;
+	priv->base.tile.init = nv10_fb_tile_init;
+	priv->base.tile.fini = nv10_fb_tile_fini;
+	priv->base.tile.prog = nv10_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+struct nouveau_oclass
+nv1a_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x1a),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv1a_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = _nouveau_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c
index 4b3578f..5d14612 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c

@@ -30,43 +30,54 @@
 	struct nouveau_fb base;
 };
 
-static void
+int
+nv20_fb_vram_init(struct nouveau_fb *pfb)
+{
+	u32 pbus1218 = nv_rd32(pfb, 0x001218);
+
+	switch (pbus1218 & 0x00000300) {
+	case 0x00000000: pfb->ram.type = NV_MEM_TYPE_SDRAM; break;
+	case 0x00000100: pfb->ram.type = NV_MEM_TYPE_DDR1; break;
+	case 0x00000200: pfb->ram.type = NV_MEM_TYPE_GDDR3; break;
+	case 0x00000300: pfb->ram.type = NV_MEM_TYPE_GDDR2; break;
+	}
+	pfb->ram.size  = (nv_rd32(pfb, 0x10020c) & 0xff000000);
+	pfb->ram.parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
+
+	return nv_rd32(pfb, 0x100320);
+}
+
+void
 nv20_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
 		  u32 flags, struct nouveau_fb_tile *tile)
 {
-	struct nouveau_device *device = nv_device(pfb);
-	int bpp = (flags & 2) ? 32 : 16;
-
 	tile->addr  = 0x00000001 | addr;
 	tile->limit = max(1u, addr + size) - 1;
 	tile->pitch = pitch;
-
-	/* Allocate some of the on-die tag memory, used to store Z
-	 * compression meta-data (most likely just a bitmap determining
-	 * if a given tile is compressed or not).
-	 */
-	size /= 256;
 	if (flags & 4) {
-		if (!nouveau_mm_head(&pfb->tags, 1, size, size, 1, &tile->tag)) {
-			/* Enable Z compression */
-			tile->zcomp = tile->tag->offset;
-			if (device->chipset >= 0x25) {
-				if (bpp == 16)
-					tile->zcomp |= 0x00100000;
-				else
-					tile->zcomp |= 0x00200000;
-			} else {
-				tile->zcomp |= 0x80000000;
-				if (bpp != 16)
-					tile->zcomp |= 0x04000000;
-			}
-		}
-
+		pfb->tile.comp(pfb, i, size, flags, tile);
 		tile->addr |= 2;
 	}
 }
 
 static void
+nv20_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
+		  struct nouveau_fb_tile *tile)
+{
+	u32 tiles = DIV_ROUND_UP(size, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+		if (!(flags & 2)) tile->zcomp = 0x00000000; /* Z16 */
+		else              tile->zcomp = 0x04000000; /* Z24S8 */
+		tile->zcomp |= tile->tag->offset;
+		tile->zcomp |= 0x80000000; /* enable */
+#ifdef __BIG_ENDIAN
+		tile->zcomp |= 0x08000000;
+#endif
+	}
+}
+
+void
 nv20_fb_tile_fini(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
 {
 	tile->addr  = 0;
@@ -76,12 +87,13 @@
 	nouveau_mm_free(&pfb->tags, &tile->tag);
 }
 
-static void
+void
 nv20_fb_tile_prog(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
 {
 	nv_wr32(pfb, 0x100244 + (i * 0x10), tile->limit);
 	nv_wr32(pfb, 0x100248 + (i * 0x10), tile->pitch);
 	nv_wr32(pfb, 0x100240 + (i * 0x10), tile->addr);
+	nv_rd32(pfb, 0x100240 + (i * 0x10));
 	nv_wr32(pfb, 0x100300 + (i * 0x04), tile->zcomp);
 }
 
@@ -90,9 +102,7 @@
 	     struct nouveau_oclass *oclass, void *data, u32 size,
 	     struct nouveau_object **pobject)
 {
-	struct nouveau_device *device = nv_device(parent);
 	struct nv20_fb_priv *priv;
-	u32 pbus1218;
 	int ret;
 
 	ret = nouveau_fb_create(parent, engine, oclass, &priv);
@@ -100,28 +110,14 @@
 	if (ret)
 		return ret;
 
-	pbus1218 = nv_rd32(priv, 0x001218);
-	switch (pbus1218 & 0x00000300) {
-	case 0x00000000: priv->base.ram.type = NV_MEM_TYPE_SDRAM; break;
-	case 0x00000100: priv->base.ram.type = NV_MEM_TYPE_DDR1; break;
-	case 0x00000200: priv->base.ram.type = NV_MEM_TYPE_GDDR3; break;
-	case 0x00000300: priv->base.ram.type = NV_MEM_TYPE_GDDR2; break;
-	}
-	priv->base.ram.size = nv_rd32(priv, 0x10020c) & 0xff000000;
-
-	if (device->chipset >= 0x25)
-		ret = nouveau_mm_init(&priv->base.tags, 0, 64 * 1024, 1);
-	else
-		ret = nouveau_mm_init(&priv->base.tags, 0, 32 * 1024, 1);
-	if (ret)
-		return ret;
-
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv20_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv20_fb_tile_init;
+	priv->base.tile.comp = nv20_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv20_fb_tile_prog;
-	return nouveau_fb_created(&priv->base);
+	return nouveau_fb_preinit(&priv->base);
 }
 
 struct nouveau_oclass

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c
new file mode 100644
index 0000000..0042ace
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c

@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv25_fb_priv {
+	struct nouveau_fb base;
+};
+
+static void
+nv25_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
+		  struct nouveau_fb_tile *tile)
+{
+	u32 tiles = DIV_ROUND_UP(size, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+		if (!(flags & 2)) tile->zcomp = 0x00100000; /* Z16 */
+		else              tile->zcomp = 0x00200000; /* Z24S8 */
+		tile->zcomp |= tile->tag->offset;
+#ifdef __BIG_ENDIAN
+		tile->zcomp |= 0x01000000;
+#endif
+	}
+}
+
+static int
+nv25_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv25_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv20_fb_vram_init;
+	priv->base.tile.regions = 8;
+	priv->base.tile.init = nv20_fb_tile_init;
+	priv->base.tile.comp = nv25_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv20_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+struct nouveau_oclass
+nv25_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x25),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv25_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = _nouveau_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c
index cba67bc..a7ba0d0 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c

@@ -34,17 +34,36 @@
 nv30_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
 		  u32 flags, struct nouveau_fb_tile *tile)
 {
-	tile->addr = addr | 1;
+	/* for performance, select alternate bank offset for zeta */
+	if (!(flags & 4)) {
+		tile->addr = (0 << 4);
+	} else {
+		if (pfb->tile.comp) /* z compression */
+			pfb->tile.comp(pfb, i, size, flags, tile);
+		tile->addr = (1 << 4);
+	}
+
+	tile->addr |= 0x00000001; /* enable */
+	tile->addr |= addr;
 	tile->limit = max(1u, addr + size) - 1;
 	tile->pitch = pitch;
 }
 
-void
-nv30_fb_tile_fini(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
+static void
+nv30_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
+		  struct nouveau_fb_tile *tile)
 {
-	tile->addr  = 0;
-	tile->limit = 0;
-	tile->pitch = 0;
+	u32 tiles = DIV_ROUND_UP(size, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+		if (flags & 2) tile->zcomp |= 0x01000000; /* Z16 */
+		else           tile->zcomp |= 0x02000000; /* Z24S8 */
+		tile->zcomp |= ((tile->tag->offset           ) >> 6);
+		tile->zcomp |= ((tile->tag->offset + tags - 1) >> 6) << 12;
+#ifdef __BIG_ENDIAN
+		tile->zcomp |= 0x10000000;
+#endif
+	}
 }
 
 static int
@@ -72,7 +91,7 @@
 	return x;
 }
 
-static int
+int
 nv30_fb_init(struct nouveau_object *object)
 {
 	struct nouveau_device *device = nv_device(object);
@@ -111,7 +130,6 @@
 	     struct nouveau_object **pobject)
 {
 	struct nv30_fb_priv *priv;
-	u32 pbus1218;
 	int ret;
 
 	ret = nouveau_fb_create(parent, engine, oclass, &priv);
@@ -119,21 +137,14 @@
 	if (ret)
 		return ret;
 
-	pbus1218 = nv_rd32(priv, 0x001218);
-	switch (pbus1218 & 0x00000300) {
-	case 0x00000000: priv->base.ram.type = NV_MEM_TYPE_SDRAM; break;
-	case 0x00000100: priv->base.ram.type = NV_MEM_TYPE_DDR1; break;
-	case 0x00000200: priv->base.ram.type = NV_MEM_TYPE_GDDR3; break;
-	case 0x00000300: priv->base.ram.type = NV_MEM_TYPE_GDDR2; break;
-	}
-	priv->base.ram.size = nv_rd32(priv, 0x10020c) & 0xff000000;
-
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv20_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv30_fb_tile_init;
-	priv->base.tile.fini = nv30_fb_tile_fini;
-	priv->base.tile.prog = nv10_fb_tile_prog;
-	return nouveau_fb_created(&priv->base);
+	priv->base.tile.comp = nv30_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv20_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
 }
 
 struct nouveau_oclass

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c
new file mode 100644
index 0000000..092f6f4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv35_fb_priv {
+	struct nouveau_fb base;
+};
+
+static void
+nv35_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
+		  struct nouveau_fb_tile *tile)
+{
+	u32 tiles = DIV_ROUND_UP(size, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+		if (flags & 2) tile->zcomp |= 0x04000000; /* Z16 */
+		else           tile->zcomp |= 0x08000000; /* Z24S8 */
+		tile->zcomp |= ((tile->tag->offset           ) >> 6);
+		tile->zcomp |= ((tile->tag->offset + tags - 1) >> 6) << 13;
+#ifdef __BIG_ENDIAN
+		tile->zcomp |= 0x40000000;
+#endif
+	}
+}
+
+static int
+nv35_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv35_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv20_fb_vram_init;
+	priv->base.tile.regions = 8;
+	priv->base.tile.init = nv30_fb_tile_init;
+	priv->base.tile.comp = nv35_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv20_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+struct nouveau_oclass
+nv35_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x35),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv35_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv30_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c
new file mode 100644
index 0000000..797ab3b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv36_fb_priv {
+	struct nouveau_fb base;
+};
+
+static void
+nv36_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
+		  struct nouveau_fb_tile *tile)
+{
+	u32 tiles = DIV_ROUND_UP(size, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+		if (flags & 2) tile->zcomp |= 0x10000000; /* Z16 */
+		else           tile->zcomp |= 0x20000000; /* Z24S8 */
+		tile->zcomp |= ((tile->tag->offset           ) >> 6);
+		tile->zcomp |= ((tile->tag->offset + tags - 1) >> 6) << 14;
+#ifdef __BIG_ENDIAN
+		tile->zcomp |= 0x80000000;
+#endif
+	}
+}
+
+static int
+nv36_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv36_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv20_fb_vram_init;
+	priv->base.tile.regions = 8;
+	priv->base.tile.init = nv30_fb_tile_init;
+	priv->base.tile.comp = nv36_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv20_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+struct nouveau_oclass
+nv36_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x36),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv36_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv30_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c
index 347a496..65e131b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c

@@ -30,34 +30,37 @@
 	struct nouveau_fb base;
 };
 
-static inline int
-nv44_graph_class(struct nouveau_device *device)
+static int
+nv40_fb_vram_init(struct nouveau_fb *pfb)
 {
-	if ((device->chipset & 0xf0) == 0x60)
-		return 1;
+	u32 pbus1218 = nv_rd32(pfb, 0x001218);
+	switch (pbus1218 & 0x00000300) {
+	case 0x00000000: pfb->ram.type = NV_MEM_TYPE_SDRAM; break;
+	case 0x00000100: pfb->ram.type = NV_MEM_TYPE_DDR1; break;
+	case 0x00000200: pfb->ram.type = NV_MEM_TYPE_GDDR3; break;
+	case 0x00000300: pfb->ram.type = NV_MEM_TYPE_DDR2; break;
+	}
 
-	return !(0x0baf & (1 << (device->chipset & 0x0f)));
+	pfb->ram.size  =  nv_rd32(pfb, 0x10020c) & 0xff000000;
+	pfb->ram.parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
+	return nv_rd32(pfb, 0x100320);
 }
 
-static void
-nv40_fb_tile_prog(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
+void
+nv40_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
+		  struct nouveau_fb_tile *tile)
 {
-	nv_wr32(pfb, 0x100604 + (i * 0x10), tile->limit);
-	nv_wr32(pfb, 0x100608 + (i * 0x10), tile->pitch);
-	nv_wr32(pfb, 0x100600 + (i * 0x10), tile->addr);
-}
-
-static void
-nv40_fb_init_gart(struct nv40_fb_priv *priv)
-{
-	nv_wr32(priv, 0x100800, 0x00000001);
-}
-
-static void
-nv44_fb_init_gart(struct nv40_fb_priv *priv)
-{
-	nv_wr32(priv, 0x100850, 0x80000000);
-	nv_wr32(priv, 0x100800, 0x00000001);
+	u32 tiles = DIV_ROUND_UP(size, 0x80);
+	u32 tags  = round_up(tiles / pfb->ram.parts, 0x100);
+	if ( (flags & 2) &&
+	    !nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+		tile->zcomp  = 0x28000000; /* Z24S8_SPLIT_GRAD */
+		tile->zcomp |= ((tile->tag->offset           ) >> 8);
+		tile->zcomp |= ((tile->tag->offset + tags - 1) >> 8) << 13;
+#ifdef __BIG_ENDIAN
+		tile->zcomp |= 0x40000000;
+#endif
+	}
 }
 
 static int
@@ -70,19 +73,7 @@
 	if (ret)
 		return ret;
 
-	switch (nv_device(priv)->chipset) {
-	case 0x40:
-	case 0x45:
-		nv_mask(priv, 0x10033c, 0x00008000, 0x00000000);
-		break;
-	default:
-		if (nv44_graph_class(nv_device(priv)))
-			nv44_fb_init_gart(priv);
-		else
-			nv40_fb_init_gart(priv);
-		break;
-	}
-
+	nv_mask(priv, 0x10033c, 0x00008000, 0x00000000);
 	return 0;
 }
 
@@ -91,7 +82,6 @@
 	     struct nouveau_oclass *oclass, void *data, u32 size,
 	     struct nouveau_object **pobject)
 {
-	struct nouveau_device *device = nv_device(parent);
 	struct nv40_fb_priv *priv;
 	int ret;
 
@@ -100,69 +90,14 @@
 	if (ret)
 		return ret;
 
-	/* 0x001218 is actually present on a few other NV4X I looked at,
-	 * and even contains sane values matching 0x100474.  From looking
-	 * at various vbios images however, this isn't the case everywhere.
-	 * So, I chose to use the same regs I've seen NVIDIA reading around
-	 * the memory detection, hopefully that'll get us the right numbers
-	 */
-	if (device->chipset == 0x40) {
-		u32 pbus1218 = nv_rd32(priv, 0x001218);
-		switch (pbus1218 & 0x00000300) {
-		case 0x00000000: priv->base.ram.type = NV_MEM_TYPE_SDRAM; break;
-		case 0x00000100: priv->base.ram.type = NV_MEM_TYPE_DDR1; break;
-		case 0x00000200: priv->base.ram.type = NV_MEM_TYPE_GDDR3; break;
-		case 0x00000300: priv->base.ram.type = NV_MEM_TYPE_DDR2; break;
-		}
-	} else
-	if (device->chipset == 0x49 || device->chipset == 0x4b) {
-		u32 pfb914 = nv_rd32(priv, 0x100914);
-		switch (pfb914 & 0x00000003) {
-		case 0x00000000: priv->base.ram.type = NV_MEM_TYPE_DDR1; break;
-		case 0x00000001: priv->base.ram.type = NV_MEM_TYPE_DDR2; break;
-		case 0x00000002: priv->base.ram.type = NV_MEM_TYPE_GDDR3; break;
-		case 0x00000003: break;
-		}
-	} else
-	if (device->chipset != 0x4e) {
-		u32 pfb474 = nv_rd32(priv, 0x100474);
-		if (pfb474 & 0x00000004)
-			priv->base.ram.type = NV_MEM_TYPE_GDDR3;
-		if (pfb474 & 0x00000002)
-			priv->base.ram.type = NV_MEM_TYPE_DDR2;
-		if (pfb474 & 0x00000001)
-			priv->base.ram.type = NV_MEM_TYPE_DDR1;
-	} else {
-		priv->base.ram.type = NV_MEM_TYPE_STOLEN;
-	}
-
-	priv->base.ram.size = nv_rd32(priv, 0x10020c) & 0xff000000;
-
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	switch (device->chipset) {
-	case 0x40:
-	case 0x45:
-		priv->base.tile.regions = 8;
-		break;
-	case 0x46:
-	case 0x47:
-	case 0x49:
-	case 0x4b:
-	case 0x4c:
-		priv->base.tile.regions = 15;
-		break;
-	default:
-		priv->base.tile.regions = 12;
-		break;
-	}
+	priv->base.ram.init = nv40_fb_vram_init;
+	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv30_fb_tile_init;
-	priv->base.tile.fini = nv30_fb_tile_fini;
-	if (device->chipset == 0x40)
-		priv->base.tile.prog = nv10_fb_tile_prog;
-	else
-		priv->base.tile.prog = nv40_fb_tile_prog;
-
-	return nouveau_fb_created(&priv->base);
+	priv->base.tile.comp = nv40_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv20_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
 }
 
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv41.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv41.c
new file mode 100644
index 0000000..e9e5a08
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv41.c

@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv41_fb_priv {
+	struct nouveau_fb base;
+};
+
+int
+nv41_fb_vram_init(struct nouveau_fb *pfb)
+{
+	u32 pfb474 = nv_rd32(pfb, 0x100474);
+	if (pfb474 & 0x00000004)
+		pfb->ram.type = NV_MEM_TYPE_GDDR3;
+	if (pfb474 & 0x00000002)
+		pfb->ram.type = NV_MEM_TYPE_DDR2;
+	if (pfb474 & 0x00000001)
+		pfb->ram.type = NV_MEM_TYPE_DDR1;
+
+	pfb->ram.size =   nv_rd32(pfb, 0x10020c) & 0xff000000;
+	pfb->ram.parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
+	return nv_rd32(pfb, 0x100320);
+}
+
+void
+nv41_fb_tile_prog(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
+{
+	nv_wr32(pfb, 0x100604 + (i * 0x10), tile->limit);
+	nv_wr32(pfb, 0x100608 + (i * 0x10), tile->pitch);
+	nv_wr32(pfb, 0x100600 + (i * 0x10), tile->addr);
+	nv_rd32(pfb, 0x100600 + (i * 0x10));
+	nv_wr32(pfb, 0x100700 + (i * 0x04), tile->zcomp);
+}
+
+int
+nv41_fb_init(struct nouveau_object *object)
+{
+	struct nv41_fb_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_fb_init(&priv->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x100800, 0x00000001);
+	return 0;
+}
+
+static int
+nv41_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv41_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv41_fb_vram_init;
+	priv->base.tile.regions = 12;
+	priv->base.tile.init = nv30_fb_tile_init;
+	priv->base.tile.comp = nv40_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv41_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+
+struct nouveau_oclass
+nv41_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x41),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv41_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv41_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv44.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv44.c
new file mode 100644
index 0000000..ae89b50
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv44.c

@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv44_fb_priv {
+	struct nouveau_fb base;
+};
+
+int
+nv44_fb_vram_init(struct nouveau_fb *pfb)
+{
+	u32 pfb474 = nv_rd32(pfb, 0x100474);
+	if (pfb474 & 0x00000004)
+		pfb->ram.type = NV_MEM_TYPE_GDDR3;
+	if (pfb474 & 0x00000002)
+		pfb->ram.type = NV_MEM_TYPE_DDR2;
+	if (pfb474 & 0x00000001)
+		pfb->ram.type = NV_MEM_TYPE_DDR1;
+
+	pfb->ram.size = nv_rd32(pfb, 0x10020c) & 0xff000000;
+	return 0;
+}
+
+static void
+nv44_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
+		  u32 flags, struct nouveau_fb_tile *tile)
+{
+	tile->addr  = 0x00000001; /* mode = vram */
+	tile->addr |= addr;
+	tile->limit = max(1u, addr + size) - 1;
+	tile->pitch = pitch;
+}
+
+void
+nv44_fb_tile_prog(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
+{
+	nv_wr32(pfb, 0x100604 + (i * 0x10), tile->limit);
+	nv_wr32(pfb, 0x100608 + (i * 0x10), tile->pitch);
+	nv_wr32(pfb, 0x100600 + (i * 0x10), tile->addr);
+	nv_rd32(pfb, 0x100600 + (i * 0x10));
+}
+
+int
+nv44_fb_init(struct nouveau_object *object)
+{
+	struct nv44_fb_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_fb_init(&priv->base);
+	if (ret)
+		return ret;
+
+	nv_wr32(priv, 0x100850, 0x80000000);
+	nv_wr32(priv, 0x100800, 0x00000001);
+	return 0;
+}
+
+static int
+nv44_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv44_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv44_fb_vram_init;
+	priv->base.tile.regions = 12;
+	priv->base.tile.init = nv44_fb_tile_init;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv44_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+
+struct nouveau_oclass
+nv44_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x44),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv44_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv44_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv46.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv46.c
new file mode 100644
index 0000000..589b93e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv46.c

@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv46_fb_priv {
+	struct nouveau_fb base;
+};
+
+void
+nv46_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
+		  u32 flags, struct nouveau_fb_tile *tile)
+{
+	/* for performance, select alternate bank offset for zeta */
+	if (!(flags & 4)) tile->addr = (0 << 3);
+	else              tile->addr = (1 << 3);
+
+	tile->addr |= 0x00000001; /* mode = vram */
+	tile->addr |= addr;
+	tile->limit = max(1u, addr + size) - 1;
+	tile->pitch = pitch;
+}
+
+static int
+nv46_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv46_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv44_fb_vram_init;
+	priv->base.tile.regions = 15;
+	priv->base.tile.init = nv46_fb_tile_init;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv44_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+
+struct nouveau_oclass
+nv46_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x46),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv46_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv44_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv47.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv47.c
new file mode 100644
index 0000000..818bba3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv47.c

@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv47_fb_priv {
+	struct nouveau_fb base;
+};
+
+static int
+nv47_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv47_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv41_fb_vram_init;
+	priv->base.tile.regions = 15;
+	priv->base.tile.init = nv30_fb_tile_init;
+	priv->base.tile.comp = nv40_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv41_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+
+struct nouveau_oclass
+nv47_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x47),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv47_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv41_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv49.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv49.c
new file mode 100644
index 0000000..84a31af
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv49.c

@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv49_fb_priv {
+	struct nouveau_fb base;
+};
+
+static int
+nv49_fb_vram_init(struct nouveau_fb *pfb)
+{
+	u32 pfb914 = nv_rd32(pfb, 0x100914);
+
+	switch (pfb914 & 0x00000003) {
+	case 0x00000000: pfb->ram.type = NV_MEM_TYPE_DDR1; break;
+	case 0x00000001: pfb->ram.type = NV_MEM_TYPE_DDR2; break;
+	case 0x00000002: pfb->ram.type = NV_MEM_TYPE_GDDR3; break;
+	case 0x00000003: break;
+	}
+
+	pfb->ram.size =   nv_rd32(pfb, 0x10020c) & 0xff000000;
+	pfb->ram.parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
+	return nv_rd32(pfb, 0x100320);
+}
+
+static int
+nv49_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv49_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv49_fb_vram_init;
+	priv->base.tile.regions = 15;
+	priv->base.tile.init = nv30_fb_tile_init;
+	priv->base.tile.comp = nv40_fb_tile_comp;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv41_fb_tile_prog;
+
+	return nouveau_fb_preinit(&priv->base);
+}
+
+
+struct nouveau_oclass
+nv49_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x49),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv49_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv41_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv4e.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv4e.c
new file mode 100644
index 0000000..797fd55
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv4e.c

@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <subdev/fb.h>
+
+struct nv4e_fb_priv {
+	struct nouveau_fb base;
+};
+
+static int
+nv4e_fb_vram_init(struct nouveau_fb *pfb)
+{
+	pfb->ram.size = nv_rd32(pfb, 0x10020c) & 0xff000000;
+	pfb->ram.type = NV_MEM_TYPE_STOLEN;
+	return 0;
+}
+
+static int
+nv4e_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv4e_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.memtype_valid = nv04_fb_memtype_valid;
+	priv->base.ram.init = nv4e_fb_vram_init;
+	priv->base.tile.regions = 12;
+	priv->base.tile.init = nv46_fb_tile_init;
+	priv->base.tile.fini = nv20_fb_tile_fini;
+	priv->base.tile.prog = nv44_fb_tile_prog;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+struct nouveau_oclass
+nv4e_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x4e),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv4e_fb_ctor,
+		.dtor = _nouveau_fb_dtor,
+		.init = nv44_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
index 5f570806..487cb8c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c

@@ -51,6 +51,101 @@
 	return types[(memtype & 0xff00) >> 8] != 0;
 }
 
+static u32
+nv50_fb_vram_rblock(struct nouveau_fb *pfb)
+{
+	int i, parts, colbits, rowbitsa, rowbitsb, banks;
+	u64 rowsize, predicted;
+	u32 r0, r4, rt, ru, rblock_size;
+
+	r0 = nv_rd32(pfb, 0x100200);
+	r4 = nv_rd32(pfb, 0x100204);
+	rt = nv_rd32(pfb, 0x100250);
+	ru = nv_rd32(pfb, 0x001540);
+	nv_debug(pfb, "memcfg 0x%08x 0x%08x 0x%08x 0x%08x\n", r0, r4, rt, ru);
+
+	for (i = 0, parts = 0; i < 8; i++) {
+		if (ru & (0x00010000 << i))
+			parts++;
+	}
+
+	colbits  =  (r4 & 0x0000f000) >> 12;
+	rowbitsa = ((r4 & 0x000f0000) >> 16) + 8;
+	rowbitsb = ((r4 & 0x00f00000) >> 20) + 8;
+	banks    = 1 << (((r4 & 0x03000000) >> 24) + 2);
+
+	rowsize = parts * banks * (1 << colbits) * 8;
+	predicted = rowsize << rowbitsa;
+	if (r0 & 0x00000004)
+		predicted += rowsize << rowbitsb;
+
+	if (predicted != pfb->ram.size) {
+		nv_warn(pfb, "memory controller reports %d MiB VRAM\n",
+			(u32)(pfb->ram.size >> 20));
+	}
+
+	rblock_size = rowsize;
+	if (rt & 1)
+		rblock_size *= 3;
+
+	nv_debug(pfb, "rblock %d bytes\n", rblock_size);
+	return rblock_size;
+}
+
+static int
+nv50_fb_vram_init(struct nouveau_fb *pfb)
+{
+	struct nouveau_device *device = nv_device(pfb);
+	struct nouveau_bios *bios = nouveau_bios(device);
+	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
+	u32 size;
+	int ret;
+
+	pfb->ram.size = nv_rd32(pfb, 0x10020c);
+	pfb->ram.size = (pfb->ram.size & 0xffffff00) |
+		       ((pfb->ram.size & 0x000000ff) << 32);
+
+	size = (pfb->ram.size >> 12) - rsvd_head - rsvd_tail;
+	switch (device->chipset) {
+	case 0xaa:
+	case 0xac:
+	case 0xaf: /* IGPs, no reordering, no real VRAM */
+		ret = nouveau_mm_init(&pfb->vram, rsvd_head, size, 1);
+		if (ret)
+			return ret;
+
+		pfb->ram.type   = NV_MEM_TYPE_STOLEN;
+		pfb->ram.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12;
+		break;
+	default:
+		switch (nv_rd32(pfb, 0x100714) & 0x00000007) {
+		case 0: pfb->ram.type = NV_MEM_TYPE_DDR1; break;
+		case 1:
+			if (nouveau_fb_bios_memtype(bios) == NV_MEM_TYPE_DDR3)
+				pfb->ram.type = NV_MEM_TYPE_DDR3;
+			else
+				pfb->ram.type = NV_MEM_TYPE_DDR2;
+			break;
+		case 2: pfb->ram.type = NV_MEM_TYPE_GDDR3; break;
+		case 3: pfb->ram.type = NV_MEM_TYPE_GDDR4; break;
+		case 4: pfb->ram.type = NV_MEM_TYPE_GDDR5; break;
+		default:
+			break;
+		}
+
+		ret = nouveau_mm_init(&pfb->vram, rsvd_head, size,
+				      nv50_fb_vram_rblock(pfb) >> 12);
+		if (ret)
+			return ret;
+
+		pfb->ram.ranks = (nv_rd32(pfb, 0x100200) & 0x4) ? 2 : 1;
+		break;
+	}
+
+	return nv_rd32(pfb, 0x100320);
+}
+
 static int
 nv50_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 		 u32 memtype, struct nouveau_mem **pmem)
@@ -140,195 +235,6 @@
 	kfree(mem);
 }
 
-static u32
-nv50_vram_rblock(struct nv50_fb_priv *priv)
-{
-	int i, parts, colbits, rowbitsa, rowbitsb, banks;
-	u64 rowsize, predicted;
-	u32 r0, r4, rt, ru, rblock_size;
-
-	r0 = nv_rd32(priv, 0x100200);
-	r4 = nv_rd32(priv, 0x100204);
-	rt = nv_rd32(priv, 0x100250);
-	ru = nv_rd32(priv, 0x001540);
-	nv_debug(priv, "memcfg 0x%08x 0x%08x 0x%08x 0x%08x\n", r0, r4, rt, ru);
-
-	for (i = 0, parts = 0; i < 8; i++) {
-		if (ru & (0x00010000 << i))
-			parts++;
-	}
-
-	colbits  =  (r4 & 0x0000f000) >> 12;
-	rowbitsa = ((r4 & 0x000f0000) >> 16) + 8;
-	rowbitsb = ((r4 & 0x00f00000) >> 20) + 8;
-	banks    = 1 << (((r4 & 0x03000000) >> 24) + 2);
-
-	rowsize = parts * banks * (1 << colbits) * 8;
-	predicted = rowsize << rowbitsa;
-	if (r0 & 0x00000004)
-		predicted += rowsize << rowbitsb;
-
-	if (predicted != priv->base.ram.size) {
-		nv_warn(priv, "memory controller reports %d MiB VRAM\n",
-			(u32)(priv->base.ram.size >> 20));
-	}
-
-	rblock_size = rowsize;
-	if (rt & 1)
-		rblock_size *= 3;
-
-	nv_debug(priv, "rblock %d bytes\n", rblock_size);
-	return rblock_size;
-}
-
-static int
-nv50_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-	     struct nouveau_oclass *oclass, void *data, u32 size,
-	     struct nouveau_object **pobject)
-{
-	struct nouveau_device *device = nv_device(parent);
-	struct nouveau_bios *bios = nouveau_bios(device);
-	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
-	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
-	struct nv50_fb_priv *priv;
-	u32 tags;
-	int ret;
-
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	switch (nv_rd32(priv, 0x100714) & 0x00000007) {
-	case 0: priv->base.ram.type = NV_MEM_TYPE_DDR1; break;
-	case 1:
-		if (nouveau_fb_bios_memtype(bios) == NV_MEM_TYPE_DDR3)
-			priv->base.ram.type = NV_MEM_TYPE_DDR3;
-		else
-			priv->base.ram.type = NV_MEM_TYPE_DDR2;
-		break;
-	case 2: priv->base.ram.type = NV_MEM_TYPE_GDDR3; break;
-	case 3: priv->base.ram.type = NV_MEM_TYPE_GDDR4; break;
-	case 4: priv->base.ram.type = NV_MEM_TYPE_GDDR5; break;
-	default:
-		break;
-	}
-
-	priv->base.ram.size = nv_rd32(priv, 0x10020c);
-	priv->base.ram.size = (priv->base.ram.size & 0xffffff00) |
-			     ((priv->base.ram.size & 0x000000ff) << 32);
-
-	tags = nv_rd32(priv, 0x100320);
-	ret = nouveau_mm_init(&priv->base.tags, 0, tags, 1);
-	if (ret)
-		return ret;
-
-	nv_debug(priv, "%d compression tags\n", tags);
-
-	size = (priv->base.ram.size >> 12) - rsvd_head - rsvd_tail;
-	switch (device->chipset) {
-	case 0xaa:
-	case 0xac:
-	case 0xaf: /* IGPs, no reordering, no real VRAM */
-		ret = nouveau_mm_init(&priv->base.vram, rsvd_head, size, 1);
-		if (ret)
-			return ret;
-
-		priv->base.ram.stolen = (u64)nv_rd32(priv, 0x100e10) << 12;
-		priv->base.ram.type = NV_MEM_TYPE_STOLEN;
-		break;
-	default:
-		ret = nouveau_mm_init(&priv->base.vram, rsvd_head, size,
-				      nv50_vram_rblock(priv) >> 12);
-		if (ret)
-			return ret;
-
-		priv->base.ram.ranks = (nv_rd32(priv, 0x100200) & 0x4) ? 2 : 1;
-		break;
-	}
-
-	priv->r100c08_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (priv->r100c08_page) {
-		priv->r100c08 = pci_map_page(device->pdev, priv->r100c08_page,
-					     0, PAGE_SIZE,
-					     PCI_DMA_BIDIRECTIONAL);
-		if (pci_dma_mapping_error(device->pdev, priv->r100c08))
-			nv_warn(priv, "failed 0x100c08 page map\n");
-	} else {
-		nv_warn(priv, "failed 0x100c08 page alloc\n");
-	}
-
-	priv->base.memtype_valid = nv50_fb_memtype_valid;
-	priv->base.ram.get = nv50_fb_vram_new;
-	priv->base.ram.put = nv50_fb_vram_del;
-	return nouveau_fb_created(&priv->base);
-}
-
-static void
-nv50_fb_dtor(struct nouveau_object *object)
-{
-	struct nouveau_device *device = nv_device(object);
-	struct nv50_fb_priv *priv = (void *)object;
-
-	if (priv->r100c08_page) {
-		pci_unmap_page(device->pdev, priv->r100c08, PAGE_SIZE,
-			       PCI_DMA_BIDIRECTIONAL);
-		__free_page(priv->r100c08_page);
-	}
-
-	nouveau_fb_destroy(&priv->base);
-}
-
-static int
-nv50_fb_init(struct nouveau_object *object)
-{
-	struct nouveau_device *device = nv_device(object);
-	struct nv50_fb_priv *priv = (void *)object;
-	int ret;
-
-	ret = nouveau_fb_init(&priv->base);
-	if (ret)
-		return ret;
-
-	/* Not a clue what this is exactly.  Without pointing it at a
-	 * scratch page, VRAM->GART blits with M2MF (as in DDX DFS)
-	 * cause IOMMU "read from address 0" errors (rh#561267)
-	 */
-	nv_wr32(priv, 0x100c08, priv->r100c08 >> 8);
-
-	/* This is needed to get meaningful information from 100c90
-	 * on traps. No idea what these values mean exactly. */
-	switch (device->chipset) {
-	case 0x50:
-		nv_wr32(priv, 0x100c90, 0x000707ff);
-		break;
-	case 0xa3:
-	case 0xa5:
-	case 0xa8:
-		nv_wr32(priv, 0x100c90, 0x000d0fff);
-		break;
-	case 0xaf:
-		nv_wr32(priv, 0x100c90, 0x089d1fff);
-		break;
-	default:
-		nv_wr32(priv, 0x100c90, 0x001d07ff);
-		break;
-	}
-
-	return 0;
-}
-
-struct nouveau_oclass
-nv50_fb_oclass = {
-	.handle = NV_SUBDEV(FB, 0x50),
-	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = nv50_fb_ctor,
-		.dtor = nv50_fb_dtor,
-		.init = nv50_fb_init,
-		.fini = _nouveau_fb_fini,
-	},
-};
-
 static const struct nouveau_enum vm_dispatch_subclients[] = {
 	{ 0x00000000, "GRCTX", NULL },
 	{ 0x00000001, "NOTIFY", NULL },
@@ -424,11 +330,11 @@
 	{}
 };
 
-void
-nv50_fb_trap(struct nouveau_fb *pfb, int display)
+static void
+nv50_fb_intr(struct nouveau_subdev *subdev)
 {
-	struct nouveau_device *device = nv_device(pfb);
-	struct nv50_fb_priv *priv = (void *)pfb;
+	struct nouveau_device *device = nv_device(subdev);
+	struct nv50_fb_priv *priv = (void *)subdev;
 	const struct nouveau_enum *en, *cl;
 	u32 trap[6], idx, chan;
 	u8 st0, st1, st2, st3;
@@ -445,9 +351,6 @@
 	}
 	nv_wr32(priv, 0x100c90, idx | 0x80000000);
 
-	if (!display)
-		return;
-
 	/* decode status bits into something more useful */
 	if (device->chipset  < 0xa3 ||
 	    device->chipset == 0xaa || device->chipset == 0xac) {
@@ -494,3 +397,101 @@
 	else
 		printk("0x%08x\n", st1);
 }
+
+static int
+nv50_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nouveau_device *device = nv_device(parent);
+	struct nv50_fb_priv *priv;
+	int ret;
+
+	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->r100c08_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (priv->r100c08_page) {
+		priv->r100c08 = pci_map_page(device->pdev, priv->r100c08_page,
+					     0, PAGE_SIZE,
+					     PCI_DMA_BIDIRECTIONAL);
+		if (pci_dma_mapping_error(device->pdev, priv->r100c08))
+			nv_warn(priv, "failed 0x100c08 page map\n");
+	} else {
+		nv_warn(priv, "failed 0x100c08 page alloc\n");
+	}
+
+	priv->base.memtype_valid = nv50_fb_memtype_valid;
+	priv->base.ram.init = nv50_fb_vram_init;
+	priv->base.ram.get = nv50_fb_vram_new;
+	priv->base.ram.put = nv50_fb_vram_del;
+	nv_subdev(priv)->intr = nv50_fb_intr;
+	return nouveau_fb_preinit(&priv->base);
+}
+
+static void
+nv50_fb_dtor(struct nouveau_object *object)
+{
+	struct nouveau_device *device = nv_device(object);
+	struct nv50_fb_priv *priv = (void *)object;
+
+	if (priv->r100c08_page) {
+		pci_unmap_page(device->pdev, priv->r100c08, PAGE_SIZE,
+			       PCI_DMA_BIDIRECTIONAL);
+		__free_page(priv->r100c08_page);
+	}
+
+	nouveau_fb_destroy(&priv->base);
+}
+
+static int
+nv50_fb_init(struct nouveau_object *object)
+{
+	struct nouveau_device *device = nv_device(object);
+	struct nv50_fb_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_fb_init(&priv->base);
+	if (ret)
+		return ret;
+
+	/* Not a clue what this is exactly.  Without pointing it at a
+	 * scratch page, VRAM->GART blits with M2MF (as in DDX DFS)
+	 * cause IOMMU "read from address 0" errors (rh#561267)
+	 */
+	nv_wr32(priv, 0x100c08, priv->r100c08 >> 8);
+
+	/* This is needed to get meaningful information from 100c90
+	 * on traps. No idea what these values mean exactly. */
+	switch (device->chipset) {
+	case 0x50:
+		nv_wr32(priv, 0x100c90, 0x000707ff);
+		break;
+	case 0xa3:
+	case 0xa5:
+	case 0xa8:
+		nv_wr32(priv, 0x100c90, 0x000d0fff);
+		break;
+	case 0xaf:
+		nv_wr32(priv, 0x100c90, 0x089d1fff);
+		break;
+	default:
+		nv_wr32(priv, 0x100c90, 0x001d07ff);
+		break;
+	}
+
+	return 0;
+}
+
+struct nouveau_oclass
+nv50_fb_oclass = {
+	.handle = NV_SUBDEV(FB, 0x50),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv50_fb_ctor,
+		.dtor = nv50_fb_dtor,
+		.init = nv50_fb_init,
+		.fini = _nouveau_fb_fini,
+	},
+};

diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
index 9f59f2b..306bdf1 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c

@@ -62,6 +62,65 @@
 }
 
 static int
+nvc0_fb_vram_init(struct nouveau_fb *pfb)
+{
+	struct nouveau_bios *bios = nouveau_bios(pfb);
+	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
+	u32 parts = nv_rd32(pfb, 0x022438);
+	u32 pmask = nv_rd32(pfb, 0x022554);
+	u32 bsize = nv_rd32(pfb, 0x10f20c);
+	u32 offset, length;
+	bool uniform = true;
+	int ret, part;
+
+	nv_debug(pfb, "0x100800: 0x%08x\n", nv_rd32(pfb, 0x100800));
+	nv_debug(pfb, "parts 0x%08x mask 0x%08x\n", parts, pmask);
+
+	pfb->ram.type = nouveau_fb_bios_memtype(bios);
+	pfb->ram.ranks = (nv_rd32(pfb, 0x10f200) & 0x00000004) ? 2 : 1;
+
+	/* read amount of vram attached to each memory controller */
+	for (part = 0; part < parts; part++) {
+		if (!(pmask & (1 << part))) {
+			u32 psize = nv_rd32(pfb, 0x11020c + (part * 0x1000));
+			if (psize != bsize) {
+				if (psize < bsize)
+					bsize = psize;
+				uniform = false;
+			}
+
+			nv_debug(pfb, "%d: mem_amount 0x%08x\n", part, psize);
+			pfb->ram.size += (u64)psize << 20;
+		}
+	}
+
+	/* if all controllers have the same amount attached, there's no holes */
+	if (uniform) {
+		offset = rsvd_head;
+		length = (pfb->ram.size >> 12) - rsvd_head - rsvd_tail;
+		return nouveau_mm_init(&pfb->vram, offset, length, 1);
+	}
+
+	/* otherwise, address lowest common amount from 0GiB */
+	ret = nouveau_mm_init(&pfb->vram, rsvd_head, (bsize << 8) * parts, 1);
+	if (ret)
+		return ret;
+
+	/* and the rest starting from (8GiB + common_size) */
+	offset = (0x0200000000ULL >> 12) + (bsize << 8);
+	length = (pfb->ram.size >> 12) - (bsize << 8) - rsvd_tail;
+
+	ret = nouveau_mm_init(&pfb->vram, offset, length, 0);
+	if (ret) {
+		nouveau_mm_fini(&pfb->vram);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
 nvc0_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 		 u32 memtype, struct nouveau_mem **pmem)
 {
@@ -139,66 +198,6 @@
 }
 
 static int
-nvc0_vram_detect(struct nvc0_fb_priv *priv)
-{
-	struct nouveau_bios *bios = nouveau_bios(priv);
-	struct nouveau_fb *pfb = &priv->base;
-	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
-	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
-	u32 parts = nv_rd32(priv, 0x022438);
-	u32 pmask = nv_rd32(priv, 0x022554);
-	u32 bsize = nv_rd32(priv, 0x10f20c);
-	u32 offset, length;
-	bool uniform = true;
-	int ret, part;
-
-	nv_debug(priv, "0x100800: 0x%08x\n", nv_rd32(priv, 0x100800));
-	nv_debug(priv, "parts 0x%08x mask 0x%08x\n", parts, pmask);
-
-	priv->base.ram.type = nouveau_fb_bios_memtype(bios);
-	priv->base.ram.ranks = (nv_rd32(priv, 0x10f200) & 0x00000004) ? 2 : 1;
-
-	/* read amount of vram attached to each memory controller */
-	for (part = 0; part < parts; part++) {
-		if (!(pmask & (1 << part))) {
-			u32 psize = nv_rd32(priv, 0x11020c + (part * 0x1000));
-			if (psize != bsize) {
-				if (psize < bsize)
-					bsize = psize;
-				uniform = false;
-			}
-
-			nv_debug(priv, "%d: mem_amount 0x%08x\n", part, psize);
-			priv->base.ram.size += (u64)psize << 20;
-		}
-	}
-
-	/* if all controllers have the same amount attached, there's no holes */
-	if (uniform) {
-		offset = rsvd_head;
-		length = (priv->base.ram.size >> 12) - rsvd_head - rsvd_tail;
-		return nouveau_mm_init(&pfb->vram, offset, length, 1);
-	}
-
-	/* otherwise, address lowest common amount from 0GiB */
-	ret = nouveau_mm_init(&pfb->vram, rsvd_head, (bsize << 8) * parts, 1);
-	if (ret)
-		return ret;
-
-	/* and the rest starting from (8GiB + common_size) */
-	offset = (0x0200000000ULL >> 12) + (bsize << 8);
-	length = (priv->base.ram.size >> 12) - (bsize << 8) - rsvd_tail;
-
-	ret = nouveau_mm_init(&pfb->vram, offset, length, 0);
-	if (ret) {
-		nouveau_mm_fini(&pfb->vram);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int
 nvc0_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	     struct nouveau_oclass *oclass, void *data, u32 size,
 	     struct nouveau_object **pobject)
@@ -213,13 +212,10 @@
 		return ret;
 
 	priv->base.memtype_valid = nvc0_fb_memtype_valid;
+	priv->base.ram.init = nvc0_fb_vram_init;
 	priv->base.ram.get = nvc0_fb_vram_new;
 	priv->base.ram.put = nv50_fb_vram_del;
 
-	ret = nvc0_vram_detect(priv);
-	if (ret)
-		return ret;
-
 	priv->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!priv->r100c10_page)
 		return -ENOMEM;
@@ -229,7 +225,7 @@
 	if (pci_dma_mapping_error(device->pdev, priv->r100c10))
 		return -EFAULT;
 
-	return nouveau_fb_created(&priv->base);
+	return nouveau_fb_preinit(&priv->base);
 }
 
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c
index fe1ebf1..dc27e79 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c

@@ -50,7 +50,7 @@
 		ctrl = nv_rd32(aux, 0x00e4e4 + (ch * 0x50));
 		udelay(1);
 		if (!timeout--) {
-			AUX_ERR("begin idle timeout 0x%08x", ctrl);
+			AUX_ERR("begin idle timeout 0x%08x\n", ctrl);
 			return -EBUSY;
 		}
 	} while (ctrl & 0x03010000);

diff --git a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c
index ba4d28b..f5bbd38 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c

@@ -63,14 +63,14 @@
 }
 
 static u32
-nv04_instobj_rd32(struct nouveau_object *object, u32 addr)
+nv04_instobj_rd32(struct nouveau_object *object, u64 addr)
 {
 	struct nv04_instobj_priv *node = (void *)object;
 	return nv_ro32(object->engine, node->mem->offset + addr);
 }
 
 static void
-nv04_instobj_wr32(struct nouveau_object *object, u32 addr, u32 data)
+nv04_instobj_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	struct nv04_instobj_priv *node = (void *)object;
 	nv_wo32(object->engine, node->mem->offset + addr, data);
@@ -173,13 +173,13 @@
 }
 
 static u32
-nv04_instmem_rd32(struct nouveau_object *object, u32 addr)
+nv04_instmem_rd32(struct nouveau_object *object, u64 addr)
 {
 	return nv_rd32(object, 0x700000 + addr);
 }
 
 static void
-nv04_instmem_wr32(struct nouveau_object *object, u32 addr, u32 data)
+nv04_instmem_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	return nv_wr32(object, 0x700000 + addr, data);
 }

diff --git a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv40.c
index 73c52eb..da64253 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv40.c

@@ -111,14 +111,14 @@
 }
 
 static u32
-nv40_instmem_rd32(struct nouveau_object *object, u32 addr)
+nv40_instmem_rd32(struct nouveau_object *object, u64 addr)
 {
 	struct nv04_instmem_priv *priv = (void *)object;
 	return ioread32_native(priv->iomem + addr);
 }
 
 static void
-nv40_instmem_wr32(struct nouveau_object *object, u32 addr, u32 data)
+nv40_instmem_wr32(struct nouveau_object *object, u64 addr, u32 data)
 {
 	struct nv04_instmem_priv *priv = (void *)object;
 	iowrite32_native(data, priv->iomem + addr);

diff --git a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv50.c
index 27ef089..cfc7e31 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv50.c

@@ -76,7 +76,7 @@
 }
 
 static u32
-nv50_instobj_rd32(struct nouveau_object *object, u32 offset)
+nv50_instobj_rd32(struct nouveau_object *object, u64 offset)
 {
 	struct nv50_instmem_priv *priv = (void *)object->engine;
 	struct nv50_instobj_priv *node = (void *)object;
@@ -96,7 +96,7 @@
 }
 
 static void
-nv50_instobj_wr32(struct nouveau_object *object, u32 offset, u32 data)
+nv50_instobj_wr32(struct nouveau_object *object, u64 offset, u32 data)
 {
 	struct nv50_instmem_priv *priv = (void *)object->engine;
 	struct nv50_instobj_priv *node = (void *)object;

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
index de5721c..8379aaf 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c

@@ -30,20 +30,20 @@
 	struct nouveau_mc *pmc = nouveau_mc(subdev);
 	const struct nouveau_mc_intr *map = pmc->intr_map;
 	struct nouveau_subdev *unit;
-	u32 stat;
+	u32 stat, intr;
 
-	stat = nv_rd32(pmc, 0x000100);
+	intr = stat = nv_rd32(pmc, 0x000100);
 	while (stat && map->stat) {
 		if (stat & map->stat) {
 			unit = nouveau_subdev(subdev, map->unit);
 			if (unit && unit->intr)
 				unit->intr(unit);
-			stat &= ~map->stat;
+			intr &= ~map->stat;
 		}
 		map++;
 	}
 
-	if (stat) {
+	if (intr) {
 		nv_error(pmc, "unknown intr 0x%08x\n", stat);
 	}
 }

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
index cedf33b..8d759f83 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c

@@ -39,6 +39,7 @@
 	{ 0x00200000, NVDEV_SUBDEV_GPIO },
 	{ 0x04000000, NVDEV_ENGINE_DISP },
 	{ 0x80000000, NVDEV_ENGINE_SW },
+	{ 0x0000d101, NVDEV_SUBDEV_FB },
 	{},
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c
index a001e4c4..ceb5c83 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c

@@ -40,6 +40,7 @@
 	{ 0x00400000, NVDEV_ENGINE_COPY0 },	/* NVA3-     */
 	{ 0x04000000, NVDEV_ENGINE_DISP },
 	{ 0x80000000, NVDEV_ENGINE_SW },
+	{ 0x0040d101, NVDEV_SUBDEV_FB },
 	{},
 };
 

diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c
index c2b81e3..9279668 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c

@@ -36,6 +36,7 @@
 	{ 0x00000100, NVDEV_ENGINE_FIFO },
 	{ 0x00001000, NVDEV_ENGINE_GR },
 	{ 0x00008000, NVDEV_ENGINE_BSP },
+	{ 0x00020000, NVDEV_ENGINE_VP },
 	{ 0x00100000, NVDEV_SUBDEV_TIMER },
 	{ 0x00200000, NVDEV_SUBDEV_GPIO },
 	{ 0x02000000, NVDEV_SUBDEV_LTCG },

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index cbf1fc6..4124192 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c

@@ -246,14 +246,26 @@
 		return nouveau_abi16_put(abi16, -ENODEV);
 
 	client = nv_client(abi16->client);
-
-	if (init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0)
-		return nouveau_abi16_put(abi16, -EINVAL);
-
 	device = nv_device(abi16->device);
 	imem   = nouveau_instmem(device);
 	pfb    = nouveau_fb(device);
 
+	/* hack to allow channel engine type specification on kepler */
+	if (device->card_type >= NV_E0) {
+		if (init->fb_ctxdma_handle != ~0)
+			init->fb_ctxdma_handle = NVE0_CHANNEL_IND_ENGINE_GR;
+		else
+			init->fb_ctxdma_handle = init->tt_ctxdma_handle;
+
+		/* allow flips to be executed if this is a graphics channel */
+		init->tt_ctxdma_handle = 0;
+		if (init->fb_ctxdma_handle == NVE0_CHANNEL_IND_ENGINE_GR)
+			init->tt_ctxdma_handle = 1;
+	}
+
+	if (init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0)
+		return nouveau_abi16_put(abi16, -EINVAL);
+
 	/* allocate "abi16 channel" data and make up a handle for it */
 	init->channel = ffsll(~abi16->handles);
 	if (!init->channel--)
@@ -268,11 +280,6 @@
 	abi16->handles |= (1 << init->channel);
 
 	/* create channel object and initialise dma and fence management */
-	if (device->card_type >= NV_E0) {
-		init->fb_ctxdma_handle = NVE0_CHANNEL_IND_ENGINE_GR;
-		init->tt_ctxdma_handle = 0;
-	}
-
 	ret = nouveau_channel_new(drm, cli, NVDRM_DEVICE, NVDRM_CHAN |
 				  init->channel, init->fb_ctxdma_handle,
 				  init->tt_ctxdma_handle, &chan->chan);
@@ -382,7 +389,7 @@
 	struct nouveau_abi16_chan *chan, *temp;
 	struct nouveau_abi16_ntfy *ntfy;
 	struct nouveau_object *object;
-	struct nv_dma_class args;
+	struct nv_dma_class args = {};
 	int ret;
 
 	if (unlikely(!abi16))

diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 48783e1..d97f200 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c

@@ -35,6 +35,14 @@
 	acpi_handle rom_handle;
 } nouveau_dsm_priv;
 
+bool nouveau_is_optimus(void) {
+	return nouveau_dsm_priv.optimus_detected;
+}
+
+bool nouveau_is_v1_dsm(void) {
+	return nouveau_dsm_priv.dsm_detected;
+}
+
 #define NOUVEAU_DSM_HAS_MUX 0x1
 #define NOUVEAU_DSM_HAS_OPT 0x2
 
@@ -183,9 +191,7 @@
 
 static int nouveau_dsm_switchto(enum vga_switcheroo_client_id id)
 {
-	/* perhaps the _DSM functions are mutually exclusive, but prepare for
-	 * the future */
-	if (!nouveau_dsm_priv.dsm_detected && nouveau_dsm_priv.optimus_detected)
+	if (!nouveau_dsm_priv.dsm_detected)
 		return 0;
 	if (id == VGA_SWITCHEROO_IGD)
 		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_LED_STAMINA);
@@ -201,7 +207,7 @@
 
 	/* Optimus laptops have the card already disabled in
 	 * nouveau_switcheroo_set_state */
-	if (!nouveau_dsm_priv.dsm_detected && nouveau_dsm_priv.optimus_detected)
+	if (!nouveau_dsm_priv.dsm_detected)
 		return 0;
 
 	return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dhandle, state);
@@ -283,7 +289,15 @@
 			has_optimus = 1;
 	}
 
-	if (vga_count == 2 && has_dsm && guid_valid) {
+	/* find the optimus DSM or the old v1 DSM */
+	if (has_optimus == 1) {
+		acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
+			&buffer);
+		printk(KERN_INFO "VGA switcheroo: detected Optimus DSM method %s handle\n",
+			acpi_method_name);
+		nouveau_dsm_priv.optimus_detected = true;
+		ret = true;
+	} else if (vga_count == 2 && has_dsm && guid_valid) {
 		acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
 			&buffer);
 		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
@@ -292,14 +306,6 @@
 		ret = true;
 	}
 
-	if (has_optimus == 1) {
-		acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
-			&buffer);
-		printk(KERN_INFO "VGA switcheroo: detected Optimus DSM method %s handle\n",
-			acpi_method_name);
-		nouveau_dsm_priv.optimus_detected = true;
-		ret = true;
-	}
 
 	return ret;
 }

diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.h b/drivers/gpu/drm/nouveau/nouveau_acpi.h
index 08af677..d0da230 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.h
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.h

@@ -4,6 +4,8 @@
 #define ROM_BIOS_PAGE 4096
 
 #if defined(CONFIG_ACPI)
+bool nouveau_is_optimus(void);
+bool nouveau_is_v1_dsm(void);
 void nouveau_register_dsm_handler(void);
 void nouveau_unregister_dsm_handler(void);
 void nouveau_switcheroo_optimus_dsm(void);
@@ -11,6 +13,8 @@
 bool nouveau_acpi_rom_supported(struct pci_dev *pdev);
 void *nouveau_acpi_edid(struct drm_device *, struct drm_connector *);
 #else
+static inline bool nouveau_is_optimus(void) { return false; };
+static inline bool nouveau_is_v1_dsm(void) { return false; };
 static inline void nouveau_register_dsm_handler(void) {}
 static inline void nouveau_unregister_dsm_handler(void) {}
 static inline void nouveau_switcheroo_optimus_dsm(void) {}

diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 09fdef2..865eddf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c

@@ -624,206 +624,6 @@
 	return 0;
 }
 
-/* BIT 'U'/'d' table encoder subtables have hashes matching them to
- * a particular set of encoders.
- *
- * This function returns true if a particular DCB entry matches.
- */
-bool
-bios_encoder_match(struct dcb_output *dcb, u32 hash)
-{
-	if ((hash & 0x000000f0) != (dcb->location << 4))
-		return false;
-	if ((hash & 0x0000000f) != dcb->type)
-		return false;
-	if (!(hash & (dcb->or << 16)))
-		return false;
-
-	switch (dcb->type) {
-	case DCB_OUTPUT_TMDS:
-	case DCB_OUTPUT_LVDS:
-	case DCB_OUTPUT_DP:
-		if (hash & 0x00c00000) {
-			if (!(hash & (dcb->sorconf.link << 22)))
-				return false;
-		}
-	default:
-		return true;
-	}
-}
-
-int
-nouveau_bios_run_display_table(struct drm_device *dev, u16 type, int pclk,
-			       struct dcb_output *dcbent, int crtc)
-{
-	/*
-	 * The display script table is located by the BIT 'U' table.
-	 *
-	 * It contains an array of pointers to various tables describing
-	 * a particular output type.  The first 32-bits of the output
-	 * tables contains similar information to a DCB entry, and is
-	 * used to decide whether that particular table is suitable for
-	 * the output you want to access.
-	 *
-	 * The "record header length" field here seems to indicate the
-	 * offset of the first configuration entry in the output tables.
-	 * This is 10 on most cards I've seen, but 12 has been witnessed
-	 * on DP cards, and there's another script pointer within the
-	 * header.
-	 *
-	 * offset + 0   ( 8 bits): version
-	 * offset + 1   ( 8 bits): header length
-	 * offset + 2   ( 8 bits): record length
-	 * offset + 3   ( 8 bits): number of records
-	 * offset + 4   ( 8 bits): record header length
-	 * offset + 5   (16 bits): pointer to first output script table
-	 */
-
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvbios *bios = &drm->vbios;
-	uint8_t *table = &bios->data[bios->display.script_table_ptr];
-	uint8_t *otable = NULL;
-	uint16_t script;
-	int i;
-
-	if (!bios->display.script_table_ptr) {
-		NV_ERROR(drm, "No pointer to output script table\n");
-		return 1;
-	}
-
-	/*
-	 * Nothing useful has been in any of the pre-2.0 tables I've seen,
-	 * so until they are, we really don't need to care.
-	 */
-	if (table[0] < 0x20)
-		return 1;
-
-	if (table[0] != 0x20 && table[0] != 0x21) {
-		NV_ERROR(drm, "Output script table version 0x%02x unknown\n",
-			 table[0]);
-		return 1;
-	}
-
-	/*
-	 * The output script tables describing a particular output type
-	 * look as follows:
-	 *
-	 * offset + 0   (32 bits): output this table matches (hash of DCB)
-	 * offset + 4   ( 8 bits): unknown
-	 * offset + 5   ( 8 bits): number of configurations
-	 * offset + 6   (16 bits): pointer to some script
-	 * offset + 8   (16 bits): pointer to some script
-	 *
-	 * headerlen == 10
-	 * offset + 10           : configuration 0
-	 *
-	 * headerlen == 12
-	 * offset + 10           : pointer to some script
-	 * offset + 12           : configuration 0
-	 *
-	 * Each config entry is as follows:
-	 *
-	 * offset + 0   (16 bits): unknown, assumed to be a match value
-	 * offset + 2   (16 bits): pointer to script table (clock set?)
-	 * offset + 4   (16 bits): pointer to script table (reset?)
-	 *
-	 * There doesn't appear to be a count value to say how many
-	 * entries exist in each script table, instead, a 0 value in
-	 * the first 16-bit word seems to indicate both the end of the
-	 * list and the default entry.  The second 16-bit word in the
-	 * script tables is a pointer to the script to execute.
-	 */
-
-	NV_DEBUG(drm, "Searching for output entry for %d %d %d\n",
-			dcbent->type, dcbent->location, dcbent->or);
-	for (i = 0; i < table[3]; i++) {
-		otable = ROMPTR(dev, table[table[1] + (i * table[2])]);
-		if (otable && bios_encoder_match(dcbent, ROM32(otable[0])))
-			break;
-	}
-
-	if (!otable) {
-		NV_DEBUG(drm, "failed to match any output table\n");
-		return 1;
-	}
-
-	if (pclk < -2 || pclk > 0) {
-		/* Try to find matching script table entry */
-		for (i = 0; i < otable[5]; i++) {
-			if (ROM16(otable[table[4] + i*6]) == type)
-				break;
-		}
-
-		if (i == otable[5]) {
-			NV_ERROR(drm, "Table 0x%04x not found for %d/%d, "
-				      "using first\n",
-				 type, dcbent->type, dcbent->or);
-			i = 0;
-		}
-	}
-
-	if (pclk == 0) {
-		script = ROM16(otable[6]);
-		if (!script) {
-			NV_DEBUG(drm, "output script 0 not found\n");
-			return 1;
-		}
-
-		NV_DEBUG(drm, "0x%04X: parsing output script 0\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
-	} else
-	if (pclk == -1) {
-		script = ROM16(otable[8]);
-		if (!script) {
-			NV_DEBUG(drm, "output script 1 not found\n");
-			return 1;
-		}
-
-		NV_DEBUG(drm, "0x%04X: parsing output script 1\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
-	} else
-	if (pclk == -2) {
-		if (table[4] >= 12)
-			script = ROM16(otable[10]);
-		else
-			script = 0;
-		if (!script) {
-			NV_DEBUG(drm, "output script 2 not found\n");
-			return 1;
-		}
-
-		NV_DEBUG(drm, "0x%04X: parsing output script 2\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
-	} else
-	if (pclk > 0) {
-		script = ROM16(otable[table[4] + i*6 + 2]);
-		if (script)
-			script = clkcmptable(bios, script, pclk);
-		if (!script) {
-			NV_DEBUG(drm, "clock script 0 not found\n");
-			return 1;
-		}
-
-		NV_DEBUG(drm, "0x%04X: parsing clock script 0\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
-	} else
-	if (pclk < 0) {
-		script = ROM16(otable[table[4] + i*6 + 4]);
-		if (script)
-			script = clkcmptable(bios, script, -pclk);
-		if (!script) {
-			NV_DEBUG(drm, "clock script 1 not found\n");
-			return 1;
-		}
-
-		NV_DEBUG(drm, "0x%04X: parsing clock script 1\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
-	}
-
-	return 0;
-}
-
-
 int run_tmds_table(struct drm_device *dev, struct dcb_output *dcbent, int head, int pxclk)
 {
 	/*
@@ -1212,31 +1012,6 @@
 	return 0;
 }
 
-static int
-parse_bit_U_tbl_entry(struct drm_device *dev, struct nvbios *bios,
-		      struct bit_entry *bitentry)
-{
-	/*
-	 * Parses the pointer to the G80 output script tables
-	 *
-	 * Starting at bitentry->offset:
-	 *
-	 * offset + 0  (16 bits): output script table pointer
-	 */
-
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	uint16_t outputscripttableptr;
-
-	if (bitentry->length != 3) {
-		NV_ERROR(drm, "Do not understand BIT U table\n");
-		return -EINVAL;
-	}
-
-	outputscripttableptr = ROM16(bios->data[bitentry->offset]);
-	bios->display.script_table_ptr = outputscripttableptr;
-	return 0;
-}
-
 struct bit_table {
 	const char id;
 	int (* const parse_fn)(struct drm_device *, struct nvbios *, struct bit_entry *);
@@ -1313,7 +1088,6 @@
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('M', M)); /* memory? */
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('L', lvds));
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('T', tmds));
-	parse_bit_table(bios, bitoffset, &BIT_TABLE('U', U));
 
 	return 0;
 }
@@ -2324,7 +2098,7 @@
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nvbios *bios = &drm->vbios;
-	int i, ret = 0;
+	int ret = 0;
 
 	/* Reset the BIOS head to 0. */
 	bios->state.crtchead = 0;
@@ -2337,13 +2111,6 @@
 		bios->fp.lvds_init_run = false;
 	}
 
-	if (nv_device(drm->device)->card_type >= NV_50) {
-		for (i = 0; bios->execute && i < bios->dcb.entries; i++) {
-			nouveau_bios_run_display_table(dev, 0, 0,
-						       &bios->dcb.entry[i], -1);
-		}
-	}
-
 	return ret;
 }
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 3befbb8..f68c54c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h

@@ -128,12 +128,6 @@
 	} state;
 
 	struct {
-		struct dcb_output *output;
-		int crtc;
-		uint16_t script_table_ptr;
-	} display;
-
-	struct {
 		uint16_t fptablepointer;	/* also used by tmds */
 		uint16_t fpxlatetableptr;
 		int xlatwidth;
@@ -185,8 +179,6 @@
 int nouveau_run_vbios_init(struct drm_device *);
 struct dcb_connector_table_entry *
 nouveau_bios_connector_entry(struct drm_device *, int index);
-int nouveau_bios_run_display_table(struct drm_device *, u16 id, int clk,
-					  struct dcb_output *, int crtc);
 bool nouveau_bios_fp_mode(struct drm_device *, struct drm_display_mode *);
 uint8_t *nouveau_bios_embedded_edid(struct drm_device *);
 int nouveau_bios_parse_lvds_table(struct drm_device *, int pxclk,
@@ -195,6 +187,5 @@
 			  int head, int pxclk);
 int call_lvds_script(struct drm_device *, struct dcb_output *, int head,
 			    enum LVDS_script, int pxclk);
-bool bios_encoder_match(struct dcb_output *, u32 hash);
 
 #endif

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 35ac57f..5614c89 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c

@@ -225,7 +225,7 @@
 
 	ret = ttm_bo_init(&drm->ttm.bdev, &nvbo->bo, size,
 			  type, &nvbo->placement,
-			  align >> PAGE_SHIFT, 0, false, NULL, acc_size, sg,
+			  align >> PAGE_SHIFT, false, NULL, acc_size, sg,
 			  nouveau_bo_del_ttm);
 	if (ret) {
 		/* ttm will call nouveau_bo_del_ttm if it fails.. */
@@ -315,7 +315,7 @@
 
 	nouveau_bo_placement_set(nvbo, memtype, 0);
 
-	ret = nouveau_bo_validate(nvbo, false, false, false);
+	ret = nouveau_bo_validate(nvbo, false, false);
 	if (ret == 0) {
 		switch (bo->mem.mem_type) {
 		case TTM_PL_VRAM:
@@ -351,7 +351,7 @@
 
 	nouveau_bo_placement_set(nvbo, bo->mem.placement, 0);
 
-	ret = nouveau_bo_validate(nvbo, false, false, false);
+	ret = nouveau_bo_validate(nvbo, false, false);
 	if (ret == 0) {
 		switch (bo->mem.mem_type) {
 		case TTM_PL_VRAM:
@@ -392,12 +392,12 @@
 
 int
 nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
-		    bool no_wait_reserve, bool no_wait_gpu)
+		    bool no_wait_gpu)
 {
 	int ret;
 
-	ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement, interruptible,
-			      no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
+			      interruptible, no_wait_gpu);
 	if (ret)
 		return ret;
 
@@ -556,8 +556,7 @@
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 			      struct nouveau_bo *nvbo, bool evict,
-			      bool no_wait_reserve, bool no_wait_gpu,
-			      struct ttm_mem_reg *new_mem)
+			      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_fence *fence = NULL;
 	int ret;
@@ -566,8 +565,8 @@
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL, evict,
-					no_wait_reserve, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, evict,
+					no_wait_gpu, new_mem);
 	nouveau_fence_unref(&fence);
 	return ret;
 }
@@ -965,8 +964,7 @@
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-		     bool no_wait_reserve, bool no_wait_gpu,
-		     struct ttm_mem_reg *new_mem)
+		     bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_channel *chan = chan = drm->channel;
@@ -995,7 +993,6 @@
 	ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
 	if (ret == 0) {
 		ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
-						    no_wait_reserve,
 						    no_wait_gpu, new_mem);
 	}
 
@@ -1064,8 +1061,7 @@
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_reserve, bool no_wait_gpu,
-		      struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
 	struct ttm_placement placement;
@@ -1078,7 +1074,7 @@
 
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
@@ -1086,11 +1082,11 @@
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, &tmp_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_mem);
 	if (ret)
 		goto out;
 
-	ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
 out:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return ret;
@@ -1098,8 +1094,7 @@
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_reserve, bool no_wait_gpu,
-		      struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
 	struct ttm_placement placement;
@@ -1112,15 +1107,15 @@
 
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+	ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, new_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_mem);
 	if (ret)
 		goto out;
 
@@ -1195,8 +1190,7 @@
 
 static int
 nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-		bool no_wait_reserve, bool no_wait_gpu,
-		struct ttm_mem_reg *new_mem)
+		bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
@@ -1220,23 +1214,26 @@
 
 	/* CPU copy if we have no accelerated method available */
 	if (!drm->ttm.move) {
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 		goto out;
 	}
 
 	/* Hardware assisted copy. */
 	if (new_mem->mem_type == TTM_PL_SYSTEM)
-		ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = nouveau_bo_move_flipd(bo, evict, intr,
+					    no_wait_gpu, new_mem);
 	else if (old_mem->mem_type == TTM_PL_SYSTEM)
-		ret = nouveau_bo_move_flips(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = nouveau_bo_move_flips(bo, evict, intr,
+					    no_wait_gpu, new_mem);
 	else
-		ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+		ret = nouveau_bo_move_m2mf(bo, evict, intr,
+					   no_wait_gpu, new_mem);
 
 	if (!ret)
 		goto out;
 
 	/* Fallback to software copy. */
-	ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 
 out:
 	if (nv_device(drm->device)->card_type < NV_50) {
@@ -1343,7 +1340,7 @@
 	nvbo->placement.fpfn = 0;
 	nvbo->placement.lpfn = mappable;
 	nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_VRAM, 0);
-	return nouveau_bo_validate(nvbo, false, true, false);
+	return nouveau_bo_validate(nvbo, false, false);
 }
 
 static int
@@ -1472,19 +1469,19 @@
 }
 
 static bool
-nouveau_bo_fence_signalled(void *sync_obj, void *sync_arg)
+nouveau_bo_fence_signalled(void *sync_obj)
 {
 	return nouveau_fence_done(sync_obj);
 }
 
 static int
-nouveau_bo_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
+nouveau_bo_fence_wait(void *sync_obj, bool lazy, bool intr)
 {
 	return nouveau_fence_wait(sync_obj, lazy, intr);
 }
 
 static int
-nouveau_bo_fence_flush(void *sync_obj, void *sync_arg)
+nouveau_bo_fence_flush(void *sync_obj)
 {
 	return 0;
 }

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index dec51b1..25ca379 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h

@@ -76,7 +76,7 @@
 void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
 void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
 int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
-			 bool no_wait_reserve, bool no_wait_gpu);
+			 bool no_wait_gpu);
 
 struct nouveau_vma *
 nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *);

diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index c1d7301..174300b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c

@@ -76,6 +76,8 @@
 		nouveau_object_del(client, NVDRM_DEVICE, chan->push.handle);
 		nouveau_bo_vma_del(chan->push.buffer, &chan->push.vma);
 		nouveau_bo_unmap(chan->push.buffer);
+		if (chan->push.buffer && chan->push.buffer->pin_refcnt)
+			nouveau_bo_unpin(chan->push.buffer);
 		nouveau_bo_ref(NULL, &chan->push.buffer);
 		kfree(chan);
 	}
@@ -267,7 +269,7 @@
 	struct nouveau_fb *pfb = nouveau_fb(device);
 	struct nouveau_software_chan *swch;
 	struct nouveau_object *object;
-	struct nv_dma_class args;
+	struct nv_dma_class args = {};
 	int ret, i;
 
 	/* allocate dma objects to cover all allowed vram, and gart */
@@ -346,7 +348,7 @@
 	/* allocate software object class (used for fences on <= nv05, and
 	 * to signal flip completion), bind it to a subchannel.
 	 */
-	if (chan != chan->drm->cechan) {
+	if ((device->card_type < NV_E0) || gart /* nve0: want_nvsw */) {
 		ret = nouveau_object_new(nv_object(client), chan->handle,
 					 NvSw, nouveau_abi16_swclass(chan->drm),
 					 NULL, 0, &object);

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index d3595b2..ac340ba 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c

@@ -110,7 +110,6 @@
 	dev  = nv_connector->base.dev;
 	drm  = nouveau_drm(dev);
 	gpio = nouveau_gpio(drm->device);
-	NV_DEBUG(drm, "\n");
 
 	if (gpio && nv_connector->hpd != DCB_GPIO_UNUSED) {
 		gpio->isr_del(gpio, 0, nv_connector->hpd, 0xff,
@@ -221,7 +220,7 @@
 	}
 
 	if (nv_connector->type == DCB_CONNECTOR_DVI_I) {
-		drm_connector_property_set_value(connector,
+		drm_object_property_set_value(&connector->base,
 			dev->mode_config.dvi_i_subconnector_property,
 			nv_encoder->dcb->type == DCB_OUTPUT_TMDS ?
 			DRM_MODE_SUBCONNECTOR_DVID :
@@ -929,8 +928,6 @@
 	int type, ret = 0;
 	bool dummy;
 
-	NV_DEBUG(drm, "\n");
-
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		nv_connector = nouveau_connector(connector);
 		if (nv_connector->index == index)
@@ -1043,7 +1040,7 @@
 
 	/* Init DVI-I specific properties */
 	if (nv_connector->type == DCB_CONNECTOR_DVI_I)
-		drm_connector_attach_property(connector, dev->mode_config.dvi_i_subconnector_property, 0);
+		drm_object_attach_property(&connector->base, dev->mode_config.dvi_i_subconnector_property, 0);
 
 	/* Add overscan compensation options to digital outputs */
 	if (disp->underscan_property &&
@@ -1051,31 +1048,31 @@
 	     type == DRM_MODE_CONNECTOR_DVII ||
 	     type == DRM_MODE_CONNECTOR_HDMIA ||
 	     type == DRM_MODE_CONNECTOR_DisplayPort)) {
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      disp->underscan_property,
 					      UNDERSCAN_OFF);
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      disp->underscan_hborder_property,
 					      0);
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      disp->underscan_vborder_property,
 					      0);
 	}
 
 	/* Add hue and saturation options */
 	if (disp->vibrant_hue_property)
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      disp->vibrant_hue_property,
 					      90);
 	if (disp->color_vibrance_property)
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 					      disp->color_vibrance_property,
 					      150);
 
 	switch (nv_connector->type) {
 	case DCB_CONNECTOR_VGA:
 		if (nv_device(drm->device)->card_type >= NV_50) {
-			drm_connector_attach_property(connector,
+			drm_object_attach_property(&connector->base,
 					dev->mode_config.scaling_mode_property,
 					nv_connector->scaling_mode);
 		}
@@ -1088,18 +1085,18 @@
 	default:
 		nv_connector->scaling_mode = DRM_MODE_SCALE_FULLSCREEN;
 
-		drm_connector_attach_property(connector,
+		drm_object_attach_property(&connector->base,
 				dev->mode_config.scaling_mode_property,
 				nv_connector->scaling_mode);
 		if (disp->dithering_mode) {
 			nv_connector->dithering_mode = DITHERING_MODE_AUTO;
-			drm_connector_attach_property(connector,
+			drm_object_attach_property(&connector->base,
 						disp->dithering_mode,
 						nv_connector->dithering_mode);
 		}
 		if (disp->dithering_depth) {
 			nv_connector->dithering_depth = DITHERING_DEPTH_AUTO;
-			drm_connector_attach_property(connector,
+			drm_object_attach_property(&connector->base,
 						disp->dithering_depth,
 						nv_connector->dithering_depth);
 		}

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index ebdb876..20eb84c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h

@@ -28,6 +28,7 @@
 #define __NOUVEAU_CONNECTOR_H__
 
 #include <drm/drm_edid.h>
+#include "nouveau_crtc.h"
 
 struct nouveau_i2c_port;
 
@@ -80,6 +81,21 @@
 	return container_of(con, struct nouveau_connector, base);
 }
 
+static inline struct nouveau_connector *
+nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc)
+{
+	struct drm_device *dev = nv_crtc->base.dev;
+	struct drm_connector *connector;
+	struct drm_crtc *crtc = to_drm_crtc(nv_crtc);
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder && connector->encoder->crtc == crtc)
+			return nouveau_connector(connector);
+	}
+
+	return NULL;
+}
+
 struct drm_connector *
 nouveau_connector_create(struct drm_device *, int index);
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index e6d0d1e..d1e5890 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h

@@ -82,16 +82,6 @@
 	return &crtc->base;
 }
 
-int nv50_crtc_create(struct drm_device *dev, int index);
-int nv50_crtc_cursor_set(struct drm_crtc *drm_crtc, struct drm_file *file_priv,
-			 uint32_t buffer_handle, uint32_t width,
-			 uint32_t height);
-int nv50_crtc_cursor_move(struct drm_crtc *drm_crtc, int x, int y);
-
 int nv04_cursor_init(struct nouveau_crtc *);
-int nv50_cursor_init(struct nouveau_crtc *);
-
-struct nouveau_connector *
-nouveau_crtc_connector_get(struct nouveau_crtc *crtc);
 
 #endif /* __NOUVEAU_CRTC_H__ */

diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 86124b1..e4188f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c

@@ -98,12 +98,12 @@
 			nv_fb->r_dma = NvEvoVRAM_LP;
 
 		switch (fb->depth) {
-		case  8: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_8; break;
-		case 15: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_15; break;
-		case 16: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_16; break;
+		case  8: nv_fb->r_format = 0x1e00; break;
+		case 15: nv_fb->r_format = 0xe900; break;
+		case 16: nv_fb->r_format = 0xe800; break;
 		case 24:
-		case 32: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_24; break;
-		case 30: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_30; break;
+		case 32: nv_fb->r_format = 0xcf00; break;
+		case 30: nv_fb->r_format = 0xd100; break;
 		default:
 			 NV_ERROR(drm, "unknown depth %d\n", fb->depth);
 			 return -EINVAL;
@@ -324,7 +324,7 @@
 	disp->underscan_vborder_property =
 		drm_property_create_range(dev, 0, "underscan vborder", 0, 128);
 
-	if (gen == 1) {
+	if (gen >= 1) {
 		disp->vibrant_hue_property =
 			drm_property_create(dev, DRM_MODE_PROP_RANGE,
 					    "vibrant hue", 2);
@@ -366,10 +366,7 @@
 		if (nv_device(drm->device)->card_type < NV_50)
 			ret = nv04_display_create(dev);
 		else
-		if (nv_device(drm->device)->card_type < NV_D0)
 			ret = nv50_display_create(dev);
-		else
-			ret = nvd0_display_create(dev);
 		if (ret)
 			goto disp_create_err;
 
@@ -400,11 +397,12 @@
 	nouveau_backlight_exit(dev);
 	drm_vblank_cleanup(dev);
 
+	drm_kms_helper_poll_fini(dev);
+	drm_mode_config_cleanup(dev);
+
 	if (disp->dtor)
 		disp->dtor(dev);
 
-	drm_kms_helper_poll_fini(dev);
-	drm_mode_config_cleanup(dev);
 	nouveau_drm(dev)->display = NULL;
 	kfree(disp);
 }
@@ -659,10 +657,7 @@
 
 	/* Emit a page flip */
 	if (nv_device(drm->device)->card_type >= NV_50) {
-		if (nv_device(drm->device)->card_type >= NV_D0)
-			ret = nvd0_display_flip_next(crtc, fb, chan, 0);
-		else
-			ret = nv50_display_flip_next(crtc, fb, chan);
+		ret = nv50_display_flip_next(crtc, fb, chan, 0);
 		if (ret) {
 			mutex_unlock(&chan->cli->mutex);
 			goto fail_unreserve;

diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 978a108..5983865 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c

@@ -30,60 +30,17 @@
 #include "nouveau_encoder.h"
 #include "nouveau_crtc.h"
 
+#include <core/class.h>
+
 #include <subdev/gpio.h>
 #include <subdev/i2c.h>
 
-u8 *
-nouveau_dp_bios_data(struct drm_device *dev, struct dcb_output *dcb, u8 **entry)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct bit_entry d;
-	u8 *table;
-	int i;
-
-	if (bit_table(dev, 'd', &d)) {
-		NV_ERROR(drm, "BIT 'd' table not found\n");
-		return NULL;
-	}
-
-	if (d.version != 1) {
-		NV_ERROR(drm, "BIT 'd' table version %d unknown\n", d.version);
-		return NULL;
-	}
-
-	table = ROMPTR(dev, d.data[0]);
-	if (!table) {
-		NV_ERROR(drm, "displayport table pointer invalid\n");
-		return NULL;
-	}
-
-	switch (table[0]) {
-	case 0x20:
-	case 0x21:
-	case 0x30:
-	case 0x40:
-		break;
-	default:
-		NV_ERROR(drm, "displayport table 0x%02x unknown\n", table[0]);
-		return NULL;
-	}
-
-	for (i = 0; i < table[3]; i++) {
-		*entry = ROMPTR(dev, table[table[1] + (i * table[2])]);
-		if (*entry && bios_encoder_match(dcb, ROM32((*entry)[0])))
-			return table;
-	}
-
-	NV_ERROR(drm, "displayport encoder table not found\n");
-	return NULL;
-}
-
 /******************************************************************************
  * link training
  *****************************************************************************/
 struct dp_state {
 	struct nouveau_i2c_port *auxch;
-	struct dp_train_func *func;
+	struct nouveau_object *core;
 	struct dcb_output *dcb;
 	int crtc;
 	u8 *dpcd;
@@ -97,13 +54,20 @@
 dp_set_link_config(struct drm_device *dev, struct dp_state *dp)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct dcb_output *dcb = dp->dcb;
+	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
+	const u32 moff = (dp->crtc << 3) | (link << 2) | or;
 	u8 sink[2];
+	u32 data;
 
 	NV_DEBUG(drm, "%d lanes at %d KB/s\n", dp->link_nr, dp->link_bw);
 
 	/* set desired link configuration on the source */
-	dp->func->link_set(dev, dp->dcb, dp->crtc, dp->link_nr, dp->link_bw,
-			   dp->dpcd[2] & DP_ENHANCED_FRAME_CAP);
+	data = ((dp->link_bw / 27000) << 8) | dp->link_nr;
+	if (dp->dpcd[2] & DP_ENHANCED_FRAME_CAP)
+		data |= NV94_DISP_SOR_DP_LNKCTL_FRAME_ENH;
+
+	nv_call(dp->core, NV94_DISP_SOR_DP_LNKCTL + moff, data);
 
 	/* inform the sink of the new configuration */
 	sink[0] = dp->link_bw / 27000;
@@ -118,11 +82,14 @@
 dp_set_training_pattern(struct drm_device *dev, struct dp_state *dp, u8 pattern)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct dcb_output *dcb = dp->dcb;
+	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
+	const u32 moff = (dp->crtc << 3) | (link << 2) | or;
 	u8 sink_tp;
 
 	NV_DEBUG(drm, "training pattern %d\n", pattern);
 
-	dp->func->train_set(dev, dp->dcb, pattern);
+	nv_call(dp->core, NV94_DISP_SOR_DP_TRAIN + moff, pattern);
 
 	nv_rdaux(dp->auxch, DP_TRAINING_PATTERN_SET, &sink_tp, 1);
 	sink_tp &= ~DP_TRAINING_PATTERN_MASK;
@@ -134,6 +101,9 @@
 dp_link_train_commit(struct drm_device *dev, struct dp_state *dp)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct dcb_output *dcb = dp->dcb;
+	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
+	const u32 moff = (dp->crtc << 3) | (link << 2) | or;
 	int i;
 
 	for (i = 0; i < dp->link_nr; i++) {
@@ -148,7 +118,8 @@
 			dp->conf[i] |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
 
 		NV_DEBUG(drm, "config lane %d %02x\n", i, dp->conf[i]);
-		dp->func->train_adj(dev, dp->dcb, i, lvsw, lpre);
+
+		nv_call(dp->core, NV94_DISP_SOR_DP_DRVCTL(i) + moff, (lvsw << 8) | lpre);
 	}
 
 	return nv_wraux(dp->auxch, DP_TRAINING_LANE0_SET, dp->conf, 4);
@@ -234,59 +205,32 @@
 }
 
 static void
-dp_set_downspread(struct drm_device *dev, struct dp_state *dp, bool enable)
+dp_link_train_init(struct drm_device *dev, struct dp_state *dp, bool spread)
 {
-	u16 script = 0x0000;
-	u8 *entry, *table = nouveau_dp_bios_data(dev, dp->dcb, &entry);
-	if (table) {
-		if (table[0] >= 0x20 && table[0] <= 0x30) {
-			if (enable) script = ROM16(entry[12]);
-			else        script = ROM16(entry[14]);
-		} else
-		if (table[0] == 0x40) {
-			if (enable) script = ROM16(entry[11]);
-			else        script = ROM16(entry[13]);
-		}
-	}
+	struct dcb_output *dcb = dp->dcb;
+	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
+	const u32 moff = (dp->crtc << 3) | (link << 2) | or;
 
-	nouveau_bios_run_init_table(dev, script, dp->dcb, dp->crtc);
-}
-
-static void
-dp_link_train_init(struct drm_device *dev, struct dp_state *dp)
-{
-	u16 script = 0x0000;
-	u8 *entry, *table = nouveau_dp_bios_data(dev, dp->dcb, &entry);
-	if (table) {
-		if (table[0] >= 0x20 && table[0] <= 0x30)
-			script = ROM16(entry[6]);
-		else
-		if (table[0] == 0x40)
-			script = ROM16(entry[5]);
-	}
-
-	nouveau_bios_run_init_table(dev, script, dp->dcb, dp->crtc);
+	nv_call(dp->core, NV94_DISP_SOR_DP_TRAIN + moff, (spread ?
+			  NV94_DISP_SOR_DP_TRAIN_INIT_SPREAD_ON :
+			  NV94_DISP_SOR_DP_TRAIN_INIT_SPREAD_OFF) |
+			  NV94_DISP_SOR_DP_TRAIN_OP_INIT);
 }
 
 static void
 dp_link_train_fini(struct drm_device *dev, struct dp_state *dp)
 {
-	u16 script = 0x0000;
-	u8 *entry, *table = nouveau_dp_bios_data(dev, dp->dcb, &entry);
-	if (table) {
-		if (table[0] >= 0x20 && table[0] <= 0x30)
-			script = ROM16(entry[8]);
-		else
-		if (table[0] == 0x40)
-			script = ROM16(entry[7]);
-	}
+	struct dcb_output *dcb = dp->dcb;
+	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
+	const u32 moff = (dp->crtc << 3) | (link << 2) | or;
 
-	nouveau_bios_run_init_table(dev, script, dp->dcb, dp->crtc);
+	nv_call(dp->core, NV94_DISP_SOR_DP_TRAIN + moff,
+			  NV94_DISP_SOR_DP_TRAIN_OP_FINI);
 }
 
 static bool
 nouveau_dp_link_train(struct drm_encoder *encoder, u32 datarate,
-		      struct dp_train_func *func)
+		      struct nouveau_object *core)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
@@ -304,7 +248,7 @@
 	if (!dp.auxch)
 		return false;
 
-	dp.func = func;
+	dp.core = core;
 	dp.dcb = nv_encoder->dcb;
 	dp.crtc = nv_crtc->index;
 	dp.dpcd = nv_encoder->dp.dpcd;
@@ -318,11 +262,8 @@
 	 */
 	gpio->irq(gpio, 0, nv_connector->hpd, 0xff, false);
 
-	/* enable down-spreading, if possible */
-	dp_set_downspread(dev, &dp, nv_encoder->dp.dpcd[3] & 1);
-
-	/* execute pre-train script from vbios */
-	dp_link_train_init(dev, &dp);
+	/* enable down-spreading and execute pre-train script from vbios */
+	dp_link_train_init(dev, &dp, nv_encoder->dp.dpcd[3] & 1);
 
 	/* start off at highest link rate supported by encoder and display */
 	while (*link_bw > nv_encoder->dp.link_bw)
@@ -365,7 +306,7 @@
 
 void
 nouveau_dp_dpms(struct drm_encoder *encoder, int mode, u32 datarate,
-		struct dp_train_func *func)
+		struct nouveau_object *core)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
@@ -385,7 +326,7 @@
 	nv_wraux(auxch, DP_SET_POWER, &status, 1);
 
 	if (mode == DRM_MODE_DPMS_ON)
-		nouveau_dp_link_train(encoder, datarate, func);
+		nouveau_dp_link_train(encoder, datarate, core);
 }
 
 static void

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 8503b2e..01c403d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c

@@ -49,8 +49,6 @@
 #include "nouveau_fbcon.h"
 #include "nouveau_fence.h"
 
-#include "nouveau_ttm.h"
-
 MODULE_PARM_DESC(config, "option string to pass to driver core");
 static char *nouveau_config;
 module_param_named(config, nouveau_config, charp, 0400);
@@ -149,7 +147,7 @@
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
 
 		arg0 = NVE0_CHANNEL_IND_ENGINE_GR;
-		arg1 = 0;
+		arg1 = 1;
 	} else {
 		arg0 = NvDmaFB;
 		arg1 = NvDmaTT;
@@ -224,6 +222,7 @@
 	boot = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
 #endif
 	remove_conflicting_framebuffers(aper, "nouveaufb", boot);
+	kfree(aper);
 
 	ret = nouveau_device_create(pdev, nouveau_name(pdev), pci_name(pdev),
 				    nouveau_config, nouveau_debug, &device);
@@ -395,17 +394,12 @@
 }
 
 int
-nouveau_drm_suspend(struct pci_dev *pdev, pm_message_t pm_state)
+nouveau_do_suspend(struct drm_device *dev)
 {
-	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_cli *cli;
 	int ret;
 
-	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
-	    pm_state.event == PM_EVENT_PRETHAW)
-		return 0;
-
 	if (dev->mode_config.num_crtc) {
 		NV_INFO(drm, "suspending fbcon...\n");
 		nouveau_fbcon_set_suspend(dev, 1);
@@ -436,13 +430,6 @@
 		goto fail_client;
 
 	nouveau_agp_fini(drm);
-
-	pci_save_state(pdev);
-	if (pm_state.event == PM_EVENT_SUSPEND) {
-		pci_disable_device(pdev);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-
 	return 0;
 
 fail_client:
@@ -457,24 +444,33 @@
 	return ret;
 }
 
-int
-nouveau_drm_resume(struct pci_dev *pdev)
+int nouveau_pmops_suspend(struct device *dev)
 {
-	struct drm_device *dev = pci_get_drvdata(pdev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_cli *cli;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	int ret;
 
-	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
-	NV_INFO(drm, "re-enabling device...\n");
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	ret = pci_enable_device(pdev);
+	ret = nouveau_do_suspend(drm_dev);
 	if (ret)
 		return ret;
-	pci_set_master(pdev);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+
+	return 0;
+}
+
+int
+nouveau_do_resume(struct drm_device *dev)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_cli *cli;
+
+	NV_INFO(drm, "re-enabling device...\n");
 
 	nouveau_agp_reset(drm);
 
@@ -500,6 +496,42 @@
 	return 0;
 }
 
+int nouveau_pmops_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	int ret;
+
+	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+	pci_set_master(pdev);
+
+	return nouveau_do_resume(drm_dev);
+}
+
+static int nouveau_pmops_freeze(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+
+	return nouveau_do_suspend(drm_dev);
+}
+
+static int nouveau_pmops_thaw(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+
+	return nouveau_do_resume(drm_dev);
+}
+
+
 static int
 nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
 {
@@ -652,14 +684,22 @@
 	{}
 };
 
+static const struct dev_pm_ops nouveau_pm_ops = {
+	.suspend = nouveau_pmops_suspend,
+	.resume = nouveau_pmops_resume,
+	.freeze = nouveau_pmops_freeze,
+	.thaw = nouveau_pmops_thaw,
+	.poweroff = nouveau_pmops_freeze,
+	.restore = nouveau_pmops_resume,
+};
+
 static struct pci_driver
 nouveau_drm_pci_driver = {
 	.name = "nouveau",
 	.id_table = nouveau_drm_pci_table,
 	.probe = nouveau_drm_probe,
 	.remove = nouveau_drm_remove,
-	.suspend = nouveau_drm_suspend,
-	.resume = nouveau_drm_resume,
+	.driver.pm = &nouveau_pm_ops,
 };
 
 static int __init

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index a101699..aa89eb9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h

@@ -129,8 +129,8 @@
 	return nv_device(nouveau_drm(dev)->device);
 }
 
-int nouveau_drm_suspend(struct pci_dev *, pm_message_t);
-int nouveau_drm_resume(struct pci_dev *);
+int nouveau_pmops_suspend(struct device *);
+int nouveau_pmops_resume(struct device *);
 
 #define NV_FATAL(cli, fmt, args...) nv_fatal((cli), fmt, ##args)
 #define NV_ERROR(cli, fmt, args...) nv_error((cli), fmt, ##args)

diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index 6a17bf2..d0d95bd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h

@@ -93,14 +93,9 @@
 /* nouveau_dp.c */
 bool nouveau_dp_detect(struct drm_encoder *);
 void nouveau_dp_dpms(struct drm_encoder *, int mode, u32 datarate,
-		     struct dp_train_func *);
-u8 *nouveau_dp_bios_data(struct drm_device *, struct dcb_output *, u8 **);
+		     struct nouveau_object *);
 
 struct nouveau_connector *
 nouveau_encoder_connector_get(struct nouveau_encoder *encoder);
-int nv50_sor_create(struct drm_connector *, struct dcb_output *);
-void nv50_sor_dp_calc_tu(struct drm_device *, int, int, u32, u32);
-int nv50_dac_create(struct drm_connector *, struct dcb_output *);
-
 
 #endif /* __NOUVEAU_ENCODER_H__ */

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 5e2f521..8bf695c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c

@@ -433,7 +433,7 @@
 			return ret;
 		}
 
-		ret = nouveau_bo_validate(nvbo, true, false, false);
+		ret = nouveau_bo_validate(nvbo, true, false);
 		if (unlikely(ret)) {
 			if (ret != -ERESTARTSYS)
 				NV_ERROR(drm, "fail ttm_validate\n");

diff --git a/drivers/gpu/drm/nouveau/nouveau_hdmi.c b/drivers/gpu/drm/nouveau/nouveau_hdmi.c
deleted file mode 100644
index 2c672ce..0000000
--- a/drivers/gpu/drm/nouveau/nouveau_hdmi.c
+++ /dev/null

@@ -1,261 +0,0 @@
-/*
- * Copyright 2011 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-
-#include <drm/drmP.h>
-#include "nouveau_drm.h"
-#include "nouveau_connector.h"
-#include "nouveau_encoder.h"
-#include "nouveau_crtc.h"
-
-static bool
-hdmi_sor(struct drm_encoder *encoder)
-{
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	if (nv_device(drm->device)->chipset <  0xa3 ||
-	    nv_device(drm->device)->chipset == 0xaa ||
-	    nv_device(drm->device)->chipset == 0xac)
-		return false;
-	return true;
-}
-
-static inline u32
-hdmi_base(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
-	if (!hdmi_sor(encoder))
-		return 0x616500 + (nv_crtc->index * 0x800);
-	return 0x61c500 + (nv_encoder->or * 0x800);
-}
-
-static void
-hdmi_wr32(struct drm_encoder *encoder, u32 reg, u32 val)
-{
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	nv_wr32(device, hdmi_base(encoder) + reg, val);
-}
-
-static u32
-hdmi_rd32(struct drm_encoder *encoder, u32 reg)
-{
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	return nv_rd32(device, hdmi_base(encoder) + reg);
-}
-
-static u32
-hdmi_mask(struct drm_encoder *encoder, u32 reg, u32 mask, u32 val)
-{
-	u32 tmp = hdmi_rd32(encoder, reg);
-	hdmi_wr32(encoder, reg, (tmp & ~mask) | val);
-	return tmp;
-}
-
-static void
-nouveau_audio_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	u32 or = nv_encoder->or * 0x800;
-
-	if (hdmi_sor(encoder))
-		nv_mask(device, 0x61c448 + or, 0x00000003, 0x00000000);
-}
-
-static void
-nouveau_audio_mode_set(struct drm_encoder *encoder,
-		       struct drm_display_mode *mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	struct nouveau_connector *nv_connector;
-	u32 or = nv_encoder->or * 0x800;
-	int i;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!drm_detect_monitor_audio(nv_connector->edid)) {
-		nouveau_audio_disconnect(encoder);
-		return;
-	}
-
-	if (hdmi_sor(encoder)) {
-		nv_mask(device, 0x61c448 + or, 0x00000001, 0x00000001);
-
-		drm_edid_to_eld(&nv_connector->base, nv_connector->edid);
-		if (nv_connector->base.eld[0]) {
-			u8 *eld = nv_connector->base.eld;
-			for (i = 0; i < eld[2] * 4; i++)
-				nv_wr32(device, 0x61c440 + or, (i << 8) | eld[i]);
-			for (i = eld[2] * 4; i < 0x60; i++)
-				nv_wr32(device, 0x61c440 + or, (i << 8) | 0x00);
-			nv_mask(device, 0x61c448 + or, 0x00000002, 0x00000002);
-		}
-	}
-}
-
-static void
-nouveau_hdmi_infoframe(struct drm_encoder *encoder, u32 ctrl, u8 *frame)
-{
-	/* calculate checksum for the infoframe */
-	u8 sum = 0, i;
-	for (i = 0; i < frame[2]; i++)
-		sum += frame[i];
-	frame[3] = 256 - sum;
-
-	/* disable infoframe, and write header */
-	hdmi_mask(encoder, ctrl + 0x00, 0x00000001, 0x00000000);
-	hdmi_wr32(encoder, ctrl + 0x08, *(u32 *)frame & 0xffffff);
-
-	/* register scans tell me the audio infoframe has only one set of
-	 * subpack regs, according to tegra (gee nvidia, it'd be nice if we
-	 * could get those docs too!), the hdmi block pads out the rest of
-	 * the packet on its own.
-	 */
-	if (ctrl == 0x020)
-		frame[2] = 6;
-
-	/* write out checksum and data, weird weird 7 byte register pairs */
-	for (i = 0; i < frame[2] + 1; i += 7) {
-		u32 rsubpack = ctrl + 0x0c + ((i / 7) * 8);
-		u32 *subpack = (u32 *)&frame[3 + i];
-		hdmi_wr32(encoder, rsubpack + 0, subpack[0]);
-		hdmi_wr32(encoder, rsubpack + 4, subpack[1] & 0xffffff);
-	}
-
-	/* enable the infoframe */
-	hdmi_mask(encoder, ctrl, 0x00000001, 0x00000001);
-}
-
-static void
-nouveau_hdmi_video_infoframe(struct drm_encoder *encoder,
-			     struct drm_display_mode *mode)
-{
-	const u8 Y = 0, A = 0, B = 0, S = 0, C = 0, M = 0, R = 0;
-	const u8 ITC = 0, EC = 0, Q = 0, SC = 0, VIC = 0, PR = 0;
-	const u8 bar_top = 0, bar_bottom = 0, bar_left = 0, bar_right = 0;
-	u8 frame[20];
-
-	frame[0x00] = 0x82; /* AVI infoframe */
-	frame[0x01] = 0x02; /* version */
-	frame[0x02] = 0x0d; /* length */
-	frame[0x03] = 0x00;
-	frame[0x04] = (Y << 5) | (A << 4) | (B << 2) | S;
-	frame[0x05] = (C << 6) | (M << 4) | R;
-	frame[0x06] = (ITC << 7) | (EC << 4) | (Q << 2) | SC;
-	frame[0x07] = VIC;
-	frame[0x08] = PR;
-	frame[0x09] = bar_top & 0xff;
-	frame[0x0a] = bar_top >> 8;
-	frame[0x0b] = bar_bottom & 0xff;
-	frame[0x0c] = bar_bottom >> 8;
-	frame[0x0d] = bar_left & 0xff;
-	frame[0x0e] = bar_left >> 8;
-	frame[0x0f] = bar_right & 0xff;
-	frame[0x10] = bar_right >> 8;
-	frame[0x11] = 0x00;
-	frame[0x12] = 0x00;
-	frame[0x13] = 0x00;
-
-	nouveau_hdmi_infoframe(encoder, 0x020, frame);
-}
-
-static void
-nouveau_hdmi_audio_infoframe(struct drm_encoder *encoder,
-			     struct drm_display_mode *mode)
-{
-	const u8 CT = 0x00, CC = 0x01, ceaSS = 0x00, SF = 0x00, FMT = 0x00;
-	const u8 CA = 0x00, DM_INH = 0, LSV = 0x00;
-	u8 frame[12];
-
-	frame[0x00] = 0x84;	/* Audio infoframe */
-	frame[0x01] = 0x01;	/* version */
-	frame[0x02] = 0x0a;	/* length */
-	frame[0x03] = 0x00;
-	frame[0x04] = (CT << 4) | CC;
-	frame[0x05] = (SF << 2) | ceaSS;
-	frame[0x06] = FMT;
-	frame[0x07] = CA;
-	frame[0x08] = (DM_INH << 7) | (LSV << 3);
-	frame[0x09] = 0x00;
-	frame[0x0a] = 0x00;
-	frame[0x0b] = 0x00;
-
-	nouveau_hdmi_infoframe(encoder, 0x000, frame);
-}
-
-static void
-nouveau_hdmi_disconnect(struct drm_encoder *encoder)
-{
-	nouveau_audio_disconnect(encoder);
-
-	/* disable audio and avi infoframes */
-	hdmi_mask(encoder, 0x000, 0x00000001, 0x00000000);
-	hdmi_mask(encoder, 0x020, 0x00000001, 0x00000000);
-
-	/* disable hdmi */
-	hdmi_mask(encoder, 0x0a4, 0x40000000, 0x00000000);
-}
-
-void
-nouveau_hdmi_mode_set(struct drm_encoder *encoder,
-		      struct drm_display_mode *mode)
-{
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *nv_connector;
-	u32 max_ac_packet, rekey;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!mode || !nv_connector || !nv_connector->edid ||
-	    !drm_detect_hdmi_monitor(nv_connector->edid)) {
-		nouveau_hdmi_disconnect(encoder);
-		return;
-	}
-
-	nouveau_hdmi_video_infoframe(encoder, mode);
-	nouveau_hdmi_audio_infoframe(encoder, mode);
-
-	hdmi_mask(encoder, 0x0d0, 0x00070001, 0x00010001); /* SPARE, HW_CTS */
-	hdmi_mask(encoder, 0x068, 0x00010101, 0x00000000); /* ACR_CTRL, ?? */
-	hdmi_mask(encoder, 0x078, 0x80000000, 0x80000000); /* ACR_0441_ENABLE */
-
-	nv_mask(device, 0x61733c, 0x00100000, 0x00100000); /* RESETF */
-	nv_mask(device, 0x61733c, 0x10000000, 0x10000000); /* LOOKUP_EN */
-	nv_mask(device, 0x61733c, 0x00100000, 0x00000000); /* !RESETF */
-
-	/* value matches nvidia binary driver, and tegra constant */
-	rekey = 56;
-
-	max_ac_packet  = mode->htotal - mode->hdisplay;
-	max_ac_packet -= rekey;
-	max_ac_packet -= 18; /* constant from tegra */
-	max_ac_packet /= 32;
-
-	/* enable hdmi */
-	hdmi_mask(encoder, 0x0a4, 0x5f1f003f, 0x40000000 | /* enable */
-					      0x1f000000 | /* unknown */
-					      max_ac_packet << 16 |
-					      rekey);
-
-	nouveau_audio_mode_set(encoder, mode);
-}

diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
index 1d8cb50..1303680 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c

@@ -60,18 +60,6 @@
 		return IRQ_NONE;
 
 	nv_subdev(pmc)->intr(nv_subdev(pmc));
-
-	if (dev->mode_config.num_crtc) {
-		if (device->card_type >= NV_D0) {
-			if (nv_rd32(device, 0x000100) & 0x04000000)
-				nvd0_display_intr(dev);
-		} else
-		if (device->card_type >= NV_50) {
-			if (nv_rd32(device, 0x000100) & 0x04000000)
-				nv50_display_intr(dev);
-		}
-	}
-
 	return IRQ_HANDLED;
 }
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index 366462c..3543fec 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c

@@ -155,10 +155,6 @@
 		return ret;
 	nvbo = *pnvbo;
 
-	/* we restrict allowed domains on nv50+ to only the types
-	 * that were requested at creation time.  not possibly on
-	 * earlier chips without busting the ABI.
-	 */
 	nvbo->valid_domains = NOUVEAU_GEM_DOMAIN_GART;
 	nvbo->gem = drm_gem_object_alloc(dev, nvbo->bo.mem.size);
 	if (!nvbo->gem) {

diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index 6f0ac64..25d3495 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c

@@ -31,12 +31,11 @@
 			     enum vga_switcheroo_state state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
 
 	if (state == VGA_SWITCHEROO_ON) {
 		printk(KERN_ERR "VGA switcheroo: switched nouveau on\n");
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
-		nouveau_drm_resume(pdev);
+		nouveau_pmops_resume(&pdev->dev);
 		drm_kms_helper_poll_enable(dev);
 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	} else {
@@ -44,7 +43,7 @@
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		drm_kms_helper_poll_disable(dev);
 		nouveau_switcheroo_optimus_dsm();
-		nouveau_drm_suspend(pdev, pmm);
+		nouveau_pmops_suspend(&pdev->dev);
 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
 	}
 }

diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index 82a0d9c..6578cd2 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c

@@ -730,6 +730,7 @@
 	drm_crtc_cleanup(crtc);
 
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+	nouveau_bo_unpin(nv_crtc->cursor.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
 	kfree(nv_crtc);
 }
@@ -1056,8 +1057,11 @@
 			     0, 0x0000, NULL, &nv_crtc->cursor.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
-		if (!ret)
+		if (!ret) {
 			ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
+			if (ret)
+				nouveau_bo_unpin(nv_crtc->cursor.nvbo);
+		}
 		if (ret)
 			nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
 	}

diff --git a/drivers/gpu/drm/nouveau/nv04_display.c b/drivers/gpu/drm/nouveau/nv04_display.c
index 846050f..2cd6fb8 100644
--- a/drivers/gpu/drm/nouveau/nv04_display.c
+++ b/drivers/gpu/drm/nouveau/nv04_display.c

@@ -60,8 +60,6 @@
 	struct nv04_display *disp;
 	int i, ret;
 
-	NV_DEBUG(drm, "\n");
-
 	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
 	if (!disp)
 		return -ENOMEM;
@@ -132,13 +130,10 @@
 void
 nv04_display_destroy(struct drm_device *dev)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nv04_display *disp = nv04_display(dev);
 	struct drm_encoder *encoder;
 	struct drm_crtc *crtc;
 
-	NV_DEBUG(drm, "\n");
-
 	/* Turn every CRTC off. */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct drm_mode_set modeset = {

diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c
index ce752bf..7ae7f97 100644
--- a/drivers/gpu/drm/nouveau/nv10_fence.c
+++ b/drivers/gpu/drm/nouveau/nv10_fence.c

@@ -155,6 +155,8 @@
 {
 	struct nv10_fence_priv *priv = drm->fence;
 	nouveau_bo_unmap(priv->bo);
+	if (priv->bo)
+		nouveau_bo_unpin(priv->bo);
 	nouveau_bo_ref(NULL, &priv->bo);
 	drm->fence = NULL;
 	kfree(priv);
@@ -183,8 +185,11 @@
 				     0, 0x0000, NULL, &priv->bo);
 		if (!ret) {
 			ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
-			if (!ret)
+			if (!ret) {
 				ret = nouveau_bo_map(priv->bo);
+				if (ret)
+					nouveau_bo_unpin(priv->bo);
+			}
 			if (ret)
 				nouveau_bo_ref(NULL, &priv->bo);
 		}

diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
index 897b636..2ca276a 100644
--- a/drivers/gpu/drm/nouveau/nv17_tv.c
+++ b/drivers/gpu/drm/nouveau/nv17_tv.c

@@ -195,7 +195,7 @@
 		break;
 	}
 
-	drm_connector_property_set_value(connector,
+	drm_object_property_set_value(&connector->base,
 					 conf->tv_subconnector_property,
 					 tv_enc->subconnector);
 
@@ -672,25 +672,25 @@
 
 	drm_mode_create_tv_properties(dev, num_tv_norms, nv17_tv_norm_names);
 
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					conf->tv_select_subconnector_property,
 					tv_enc->select_subconnector);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					conf->tv_subconnector_property,
 					tv_enc->subconnector);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					conf->tv_mode_property,
 					tv_enc->tv_norm);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					conf->tv_flicker_reduction_property,
 					tv_enc->flicker);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					conf->tv_saturation_property,
 					tv_enc->saturation);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					conf->tv_hue_property,
 					tv_enc->hue);
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 					conf->tv_overscan_property,
 					tv_enc->overscan);
 

diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
deleted file mode 100644
index 222de77..0000000
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ /dev/null

@@ -1,764 +0,0 @@
-/*
- * Copyright (C) 2008 Maarten Maathuis.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
-
-#include "nouveau_reg.h"
-#include "nouveau_drm.h"
-#include "nouveau_dma.h"
-#include "nouveau_gem.h"
-#include "nouveau_hw.h"
-#include "nouveau_encoder.h"
-#include "nouveau_crtc.h"
-#include "nouveau_connector.h"
-#include "nv50_display.h"
-
-#include <subdev/clock.h>
-
-static void
-nv50_crtc_lut_load(struct drm_crtc *crtc)
-{
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	void __iomem *lut = nvbo_kmap_obj_iovirtual(nv_crtc->lut.nvbo);
-	int i;
-
-	NV_DEBUG(drm, "\n");
-
-	for (i = 0; i < 256; i++) {
-		writew(nv_crtc->lut.r[i] >> 2, lut + 8*i + 0);
-		writew(nv_crtc->lut.g[i] >> 2, lut + 8*i + 2);
-		writew(nv_crtc->lut.b[i] >> 2, lut + 8*i + 4);
-	}
-
-	if (nv_crtc->lut.depth == 30) {
-		writew(nv_crtc->lut.r[i - 1] >> 2, lut + 8*i + 0);
-		writew(nv_crtc->lut.g[i - 1] >> 2, lut + 8*i + 2);
-		writew(nv_crtc->lut.b[i - 1] >> 2, lut + 8*i + 4);
-	}
-}
-
-int
-nv50_crtc_blank(struct nouveau_crtc *nv_crtc, bool blanked)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	int index = nv_crtc->index, ret;
-
-	NV_DEBUG(drm, "index %d\n", nv_crtc->index);
-	NV_DEBUG(drm, "%s\n", blanked ? "blanked" : "unblanked");
-
-	if (blanked) {
-		nv_crtc->cursor.hide(nv_crtc, false);
-
-		ret = RING_SPACE(evo, nv_device(drm->device)->chipset != 0x50 ? 7 : 5);
-		if (ret) {
-			NV_ERROR(drm, "no space while blanking crtc\n");
-			return ret;
-		}
-		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, CLUT_MODE), 2);
-		OUT_RING(evo, NV50_EVO_CRTC_CLUT_MODE_BLANK);
-		OUT_RING(evo, 0);
-		if (nv_device(drm->device)->chipset != 0x50) {
-			BEGIN_NV04(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1);
-			OUT_RING(evo, NV84_EVO_CRTC_CLUT_DMA_HANDLE_NONE);
-		}
-
-		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, FB_DMA), 1);
-		OUT_RING(evo, NV50_EVO_CRTC_FB_DMA_HANDLE_NONE);
-	} else {
-		if (nv_crtc->cursor.visible)
-			nv_crtc->cursor.show(nv_crtc, false);
-		else
-			nv_crtc->cursor.hide(nv_crtc, false);
-
-		ret = RING_SPACE(evo, nv_device(drm->device)->chipset != 0x50 ? 10 : 8);
-		if (ret) {
-			NV_ERROR(drm, "no space while unblanking crtc\n");
-			return ret;
-		}
-		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, CLUT_MODE), 2);
-		OUT_RING(evo, nv_crtc->lut.depth == 8 ?
-				NV50_EVO_CRTC_CLUT_MODE_OFF :
-				NV50_EVO_CRTC_CLUT_MODE_ON);
-		OUT_RING(evo, nv_crtc->lut.nvbo->bo.offset >> 8);
-		if (nv_device(drm->device)->chipset != 0x50) {
-			BEGIN_NV04(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1);
-			OUT_RING(evo, NvEvoVRAM);
-		}
-
-		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, FB_OFFSET), 2);
-		OUT_RING(evo, nv_crtc->fb.offset >> 8);
-		OUT_RING(evo, 0);
-		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, FB_DMA), 1);
-		if (nv_device(drm->device)->chipset != 0x50)
-			if (nv_crtc->fb.tile_flags == 0x7a00 ||
-			    nv_crtc->fb.tile_flags == 0xfe00)
-				OUT_RING(evo, NvEvoFB32);
-			else
-			if (nv_crtc->fb.tile_flags == 0x7000)
-				OUT_RING(evo, NvEvoFB16);
-			else
-				OUT_RING(evo, NvEvoVRAM_LP);
-		else
-			OUT_RING(evo, NvEvoVRAM_LP);
-	}
-
-	nv_crtc->fb.blanked = blanked;
-	return 0;
-}
-
-static int
-nv50_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool update)
-{
-	struct nouveau_channel *evo = nv50_display(nv_crtc->base.dev)->master;
-	struct nouveau_connector *nv_connector;
-	struct drm_connector *connector;
-	int head = nv_crtc->index, ret;
-	u32 mode = 0x00;
-
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	connector = &nv_connector->base;
-	if (nv_connector->dithering_mode == DITHERING_MODE_AUTO) {
-		if (nv_crtc->base.fb->depth > connector->display_info.bpc * 3)
-			mode = DITHERING_MODE_DYNAMIC2X2;
-	} else {
-		mode = nv_connector->dithering_mode;
-	}
-
-	if (nv_connector->dithering_depth == DITHERING_DEPTH_AUTO) {
-		if (connector->display_info.bpc >= 8)
-			mode |= DITHERING_DEPTH_8BPC;
-	} else {
-		mode |= nv_connector->dithering_depth;
-	}
-
-	ret = RING_SPACE(evo, 2 + (update ? 2 : 0));
-	if (ret == 0) {
-		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(head, DITHER_CTRL), 1);
-		OUT_RING  (evo, mode);
-		if (update) {
-			BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
-			OUT_RING  (evo, 0);
-			FIRE_RING (evo);
-		}
-	}
-
-	return ret;
-}
-
-static int
-nv50_crtc_set_color_vibrance(struct nouveau_crtc *nv_crtc, bool update)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	int ret;
-	int adj;
-	u32 hue, vib;
-
-	NV_DEBUG(drm, "vibrance = %i, hue = %i\n",
-		     nv_crtc->color_vibrance, nv_crtc->vibrant_hue);
-
-	ret = RING_SPACE(evo, 2 + (update ? 2 : 0));
-	if (ret) {
-		NV_ERROR(drm, "no space while setting color vibrance\n");
-		return ret;
-	}
-
-	adj = (nv_crtc->color_vibrance > 0) ? 50 : 0;
-	vib = ((nv_crtc->color_vibrance * 2047 + adj) / 100) & 0xfff;
-
-	hue = ((nv_crtc->vibrant_hue * 2047) / 100) & 0xfff;
-
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, COLOR_CTRL), 1);
-	OUT_RING  (evo, (hue << 20) | (vib << 8));
-
-	if (update) {
-		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
-		OUT_RING  (evo, 0);
-		FIRE_RING (evo);
-	}
-
-	return 0;
-}
-
-struct nouveau_connector *
-nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct drm_connector *connector;
-	struct drm_crtc *crtc = to_drm_crtc(nv_crtc);
-
-	/* The safest approach is to find an encoder with the right crtc, that
-	 * is also linked to a connector. */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->encoder)
-			if (connector->encoder->crtc == crtc)
-				return nouveau_connector(connector);
-	}
-
-	return NULL;
-}
-
-static int
-nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, bool update)
-{
-	struct nouveau_connector *nv_connector;
-	struct drm_crtc *crtc = &nv_crtc->base;
-	struct drm_device *dev = crtc->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	struct drm_display_mode *umode = &crtc->mode;
-	struct drm_display_mode *omode;
-	int scaling_mode, ret;
-	u32 ctrl = 0, oX, oY;
-
-	NV_DEBUG(drm, "\n");
-
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	if (!nv_connector || !nv_connector->native_mode) {
-		NV_ERROR(drm, "no native mode, forcing panel scaling\n");
-		scaling_mode = DRM_MODE_SCALE_NONE;
-	} else {
-		scaling_mode = nv_connector->scaling_mode;
-	}
-
-	/* start off at the resolution we programmed the crtc for, this
-	 * effectively handles NONE/FULL scaling
-	 */
-	if (scaling_mode != DRM_MODE_SCALE_NONE)
-		omode = nv_connector->native_mode;
-	else
-		omode = umode;
-
-	oX = omode->hdisplay;
-	oY = omode->vdisplay;
-	if (omode->flags & DRM_MODE_FLAG_DBLSCAN)
-		oY *= 2;
-
-	/* add overscan compensation if necessary, will keep the aspect
-	 * ratio the same as the backend mode unless overridden by the
-	 * user setting both hborder and vborder properties.
-	 */
-	if (nv_connector && ( nv_connector->underscan == UNDERSCAN_ON ||
-			     (nv_connector->underscan == UNDERSCAN_AUTO &&
-			      nv_connector->edid &&
-			      drm_detect_hdmi_monitor(nv_connector->edid)))) {
-		u32 bX = nv_connector->underscan_hborder;
-		u32 bY = nv_connector->underscan_vborder;
-		u32 aspect = (oY << 19) / oX;
-
-		if (bX) {
-			oX -= (bX * 2);
-			if (bY) oY -= (bY * 2);
-			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
-		} else {
-			oX -= (oX >> 4) + 32;
-			if (bY) oY -= (bY * 2);
-			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
-		}
-	}
-
-	/* handle CENTER/ASPECT scaling, taking into account the areas
-	 * removed already for overscan compensation
-	 */
-	switch (scaling_mode) {
-	case DRM_MODE_SCALE_CENTER:
-		oX = min((u32)umode->hdisplay, oX);
-		oY = min((u32)umode->vdisplay, oY);
-		/* fall-through */
-	case DRM_MODE_SCALE_ASPECT:
-		if (oY < oX) {
-			u32 aspect = (umode->hdisplay << 19) / umode->vdisplay;
-			oX = ((oY * aspect) + (aspect / 2)) >> 19;
-		} else {
-			u32 aspect = (umode->vdisplay << 19) / umode->hdisplay;
-			oY = ((oX * aspect) + (aspect / 2)) >> 19;
-		}
-		break;
-	default:
-		break;
-	}
-
-	if (umode->hdisplay != oX || umode->vdisplay != oY ||
-	    umode->flags & DRM_MODE_FLAG_INTERLACE ||
-	    umode->flags & DRM_MODE_FLAG_DBLSCAN)
-		ctrl |= NV50_EVO_CRTC_SCALE_CTRL_ACTIVE;
-
-	ret = RING_SPACE(evo, 5);
-	if (ret)
-		return ret;
-
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, SCALE_CTRL), 1);
-	OUT_RING  (evo, ctrl);
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, SCALE_RES1), 2);
-	OUT_RING  (evo, oY << 16 | oX);
-	OUT_RING  (evo, oY << 16 | oX);
-
-	if (update) {
-		nv50_display_flip_stop(crtc);
-		nv50_display_sync(dev);
-		nv50_display_flip_next(crtc, crtc->fb, NULL);
-	}
-
-	return 0;
-}
-
-int
-nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_clock *clk = nouveau_clock(device);
-
-	return clk->pll_set(clk, PLL_VPLL0 + head, pclk);
-}
-
-static void
-nv50_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-
-	NV_DEBUG(drm, "\n");
-
-	nouveau_bo_unmap(nv_crtc->lut.nvbo);
-	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
-	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
-	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
-	drm_crtc_cleanup(&nv_crtc->base);
-	kfree(nv_crtc);
-}
-
-int
-nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
-		     uint32_t buffer_handle, uint32_t width, uint32_t height)
-{
-	struct drm_device *dev = crtc->dev;
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nouveau_bo *cursor = NULL;
-	struct drm_gem_object *gem;
-	int ret = 0, i;
-
-	if (!buffer_handle) {
-		nv_crtc->cursor.hide(nv_crtc, true);
-		return 0;
-	}
-
-	if (width != 64 || height != 64)
-		return -EINVAL;
-
-	gem = drm_gem_object_lookup(dev, file_priv, buffer_handle);
-	if (!gem)
-		return -ENOENT;
-	cursor = nouveau_gem_object(gem);
-
-	ret = nouveau_bo_map(cursor);
-	if (ret)
-		goto out;
-
-	/* The simple will do for now. */
-	for (i = 0; i < 64 * 64; i++)
-		nouveau_bo_wr32(nv_crtc->cursor.nvbo, i, nouveau_bo_rd32(cursor, i));
-
-	nouveau_bo_unmap(cursor);
-
-	nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.nvbo->bo.offset);
-	nv_crtc->cursor.show(nv_crtc, true);
-
-out:
-	drm_gem_object_unreference_unlocked(gem);
-	return ret;
-}
-
-int
-nv50_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-
-	nv_crtc->cursor.set_pos(nv_crtc, x, y);
-	return 0;
-}
-
-static void
-nv50_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-		    uint32_t start, uint32_t size)
-{
-	int end = (start + size > 256) ? 256 : start + size, i;
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-
-	for (i = start; i < end; i++) {
-		nv_crtc->lut.r[i] = r[i];
-		nv_crtc->lut.g[i] = g[i];
-		nv_crtc->lut.b[i] = b[i];
-	}
-
-	/* We need to know the depth before we upload, but it's possible to
-	 * get called before a framebuffer is bound.  If this is the case,
-	 * mark the lut values as dirty by setting depth==0, and it'll be
-	 * uploaded on the first mode_set_base()
-	 */
-	if (!nv_crtc->base.fb) {
-		nv_crtc->lut.depth = 0;
-		return;
-	}
-
-	nv50_crtc_lut_load(crtc);
-}
-
-static void
-nv50_crtc_save(struct drm_crtc *crtc)
-{
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-	NV_ERROR(drm, "!!\n");
-}
-
-static void
-nv50_crtc_restore(struct drm_crtc *crtc)
-{
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-	NV_ERROR(drm, "!!\n");
-}
-
-static const struct drm_crtc_funcs nv50_crtc_funcs = {
-	.save = nv50_crtc_save,
-	.restore = nv50_crtc_restore,
-	.cursor_set = nv50_crtc_cursor_set,
-	.cursor_move = nv50_crtc_cursor_move,
-	.gamma_set = nv50_crtc_gamma_set,
-	.set_config = drm_crtc_helper_set_config,
-	.page_flip = nouveau_crtc_page_flip,
-	.destroy = nv50_crtc_destroy,
-};
-
-static void
-nv50_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
-}
-
-static void
-nv50_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct drm_device *dev = crtc->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-
-	NV_DEBUG(drm, "index %d\n", nv_crtc->index);
-
-	nv50_display_flip_stop(crtc);
-	drm_vblank_pre_modeset(dev, nv_crtc->index);
-	nv50_crtc_blank(nv_crtc, true);
-}
-
-static void
-nv50_crtc_commit(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-
-	NV_DEBUG(drm, "index %d\n", nv_crtc->index);
-
-	nv50_crtc_blank(nv_crtc, false);
-	drm_vblank_post_modeset(dev, nv_crtc->index);
-	nv50_display_sync(dev);
-	nv50_display_flip_next(crtc, crtc->fb, NULL);
-}
-
-static bool
-nv50_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode,
-		     struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
-static int
-nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
-			   struct drm_framebuffer *passed_fb,
-			   int x, int y, bool atomic)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	struct drm_framebuffer *drm_fb;
-	struct nouveau_framebuffer *fb;
-	int ret;
-
-	NV_DEBUG(drm, "index %d\n", nv_crtc->index);
-
-	/* no fb bound */
-	if (!atomic && !crtc->fb) {
-		NV_DEBUG(drm, "No FB bound\n");
-		return 0;
-	}
-
-	/* If atomic, we want to switch to the fb we were passed, so
-	 * now we update pointers to do that.  (We don't pin; just
-	 * assume we're already pinned and update the base address.)
-	 */
-	if (atomic) {
-		drm_fb = passed_fb;
-		fb = nouveau_framebuffer(passed_fb);
-	} else {
-		drm_fb = crtc->fb;
-		fb = nouveau_framebuffer(crtc->fb);
-		/* If not atomic, we can go ahead and pin, and unpin the
-		 * old fb we were passed.
-		 */
-		ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM);
-		if (ret)
-			return ret;
-
-		if (passed_fb) {
-			struct nouveau_framebuffer *ofb = nouveau_framebuffer(passed_fb);
-			nouveau_bo_unpin(ofb->nvbo);
-		}
-	}
-
-	nv_crtc->fb.offset = fb->nvbo->bo.offset;
-	nv_crtc->fb.tile_flags = nouveau_bo_tile_layout(fb->nvbo);
-	nv_crtc->fb.cpp = drm_fb->bits_per_pixel / 8;
-	if (!nv_crtc->fb.blanked && nv_device(drm->device)->chipset != 0x50) {
-		ret = RING_SPACE(evo, 2);
-		if (ret)
-			return ret;
-
-		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_DMA), 1);
-		OUT_RING  (evo, fb->r_dma);
-	}
-
-	ret = RING_SPACE(evo, 12);
-	if (ret)
-		return ret;
-
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_OFFSET), 5);
-	OUT_RING  (evo, nv_crtc->fb.offset >> 8);
-	OUT_RING  (evo, 0);
-	OUT_RING  (evo, (drm_fb->height << 16) | drm_fb->width);
-	OUT_RING  (evo, fb->r_pitch);
-	OUT_RING  (evo, fb->r_format);
-
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CLUT_MODE), 1);
-	OUT_RING  (evo, fb->base.depth == 8 ?
-		   NV50_EVO_CRTC_CLUT_MODE_OFF : NV50_EVO_CRTC_CLUT_MODE_ON);
-
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_POS), 1);
-	OUT_RING  (evo, (y << 16) | x);
-
-	if (nv_crtc->lut.depth != fb->base.depth) {
-		nv_crtc->lut.depth = fb->base.depth;
-		nv50_crtc_lut_load(crtc);
-	}
-
-	return 0;
-}
-
-static int
-nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
-		   struct drm_display_mode *mode, int x, int y,
-		   struct drm_framebuffer *old_fb)
-{
-	struct drm_device *dev = crtc->dev;
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	u32 head = nv_crtc->index * 0x400;
-	u32 ilace = (mode->flags & DRM_MODE_FLAG_INTERLACE) ? 2 : 1;
-	u32 vscan = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1;
-	u32 hactive, hsynce, hbackp, hfrontp, hblanke, hblanks;
-	u32 vactive, vsynce, vbackp, vfrontp, vblanke, vblanks;
-	u32 vblan2e = 0, vblan2s = 1;
-	int ret;
-
-	/* hw timing description looks like this:
-	 *
-	 * <sync> <back porch> <---------display---------> <front porch>
-	 * ______
-	 *       |____________|---------------------------|____________|
-	 *
-	 *       ^ synce      ^ blanke                    ^ blanks     ^ active
-	 *
-	 * interlaced modes also have 2 additional values pointing at the end
-	 * and start of the next field's blanking period.
-	 */
-
-	hactive = mode->htotal;
-	hsynce  = mode->hsync_end - mode->hsync_start - 1;
-	hbackp  = mode->htotal - mode->hsync_end;
-	hblanke = hsynce + hbackp;
-	hfrontp = mode->hsync_start - mode->hdisplay;
-	hblanks = mode->htotal - hfrontp - 1;
-
-	vactive = mode->vtotal * vscan / ilace;
-	vsynce  = ((mode->vsync_end - mode->vsync_start) * vscan / ilace) - 1;
-	vbackp  = (mode->vtotal - mode->vsync_end) * vscan / ilace;
-	vblanke = vsynce + vbackp;
-	vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
-	vblanks = vactive - vfrontp - 1;
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
-		vblan2e = vactive + vsynce + vbackp;
-		vblan2s = vblan2e + (mode->vdisplay * vscan / ilace);
-		vactive = (vactive * 2) + 1;
-	}
-
-	ret = RING_SPACE(evo, 18);
-	if (ret == 0) {
-		BEGIN_NV04(evo, 0, 0x0804 + head, 2);
-		OUT_RING  (evo, 0x00800000 | mode->clock);
-		OUT_RING  (evo, (ilace == 2) ? 2 : 0);
-		BEGIN_NV04(evo, 0, 0x0810 + head, 6);
-		OUT_RING  (evo, 0x00000000); /* border colour */
-		OUT_RING  (evo, (vactive << 16) | hactive);
-		OUT_RING  (evo, ( vsynce << 16) | hsynce);
-		OUT_RING  (evo, (vblanke << 16) | hblanke);
-		OUT_RING  (evo, (vblanks << 16) | hblanks);
-		OUT_RING  (evo, (vblan2e << 16) | vblan2s);
-		BEGIN_NV04(evo, 0, 0x082c + head, 1);
-		OUT_RING  (evo, 0x00000000);
-		BEGIN_NV04(evo, 0, 0x0900 + head, 1);
-		OUT_RING  (evo, 0x00000311); /* makes sync channel work */
-		BEGIN_NV04(evo, 0, 0x08c8 + head, 1);
-		OUT_RING  (evo, (umode->vdisplay << 16) | umode->hdisplay);
-		BEGIN_NV04(evo, 0, 0x08d4 + head, 1);
-		OUT_RING  (evo, 0x00000000); /* screen position */
-	}
-
-	nv_crtc->set_dither(nv_crtc, false);
-	nv_crtc->set_scale(nv_crtc, false);
-	nv_crtc->set_color_vibrance(nv_crtc, false);
-
-	return nv50_crtc_do_mode_set_base(crtc, old_fb, x, y, false);
-}
-
-static int
-nv50_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
-			struct drm_framebuffer *old_fb)
-{
-	int ret;
-
-	nv50_display_flip_stop(crtc);
-	ret = nv50_crtc_do_mode_set_base(crtc, old_fb, x, y, false);
-	if (ret)
-		return ret;
-
-	ret = nv50_display_sync(crtc->dev);
-	if (ret)
-		return ret;
-
-	return nv50_display_flip_next(crtc, crtc->fb, NULL);
-}
-
-static int
-nv50_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb,
-			       int x, int y, enum mode_set_atomic state)
-{
-	int ret;
-
-	nv50_display_flip_stop(crtc);
-	ret = nv50_crtc_do_mode_set_base(crtc, fb, x, y, true);
-	if (ret)
-		return ret;
-
-	return nv50_display_sync(crtc->dev);
-}
-
-static const struct drm_crtc_helper_funcs nv50_crtc_helper_funcs = {
-	.dpms = nv50_crtc_dpms,
-	.prepare = nv50_crtc_prepare,
-	.commit = nv50_crtc_commit,
-	.mode_fixup = nv50_crtc_mode_fixup,
-	.mode_set = nv50_crtc_mode_set,
-	.mode_set_base = nv50_crtc_mode_set_base,
-	.mode_set_base_atomic = nv50_crtc_mode_set_base_atomic,
-	.load_lut = nv50_crtc_lut_load,
-};
-
-int
-nv50_crtc_create(struct drm_device *dev, int index)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_crtc *nv_crtc = NULL;
-	int ret, i;
-
-	NV_DEBUG(drm, "\n");
-
-	nv_crtc = kzalloc(sizeof(*nv_crtc), GFP_KERNEL);
-	if (!nv_crtc)
-		return -ENOMEM;
-
-	nv_crtc->index = index;
-	nv_crtc->set_dither = nv50_crtc_set_dither;
-	nv_crtc->set_scale = nv50_crtc_set_scale;
-	nv_crtc->set_color_vibrance = nv50_crtc_set_color_vibrance;
-	nv_crtc->color_vibrance = 50;
-	nv_crtc->vibrant_hue = 0;
-	nv_crtc->lut.depth = 0;
-	for (i = 0; i < 256; i++) {
-		nv_crtc->lut.r[i] = i << 8;
-		nv_crtc->lut.g[i] = i << 8;
-		nv_crtc->lut.b[i] = i << 8;
-	}
-
-	drm_crtc_init(dev, &nv_crtc->base, &nv50_crtc_funcs);
-	drm_crtc_helper_add(&nv_crtc->base, &nv50_crtc_helper_funcs);
-	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
-
-	ret = nouveau_bo_new(dev, 4096, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, &nv_crtc->lut.nvbo);
-	if (!ret) {
-		ret = nouveau_bo_pin(nv_crtc->lut.nvbo, TTM_PL_FLAG_VRAM);
-		if (!ret)
-			ret = nouveau_bo_map(nv_crtc->lut.nvbo);
-		if (ret)
-			nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
-	}
-
-	if (ret)
-		goto out;
-
-
-	ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, &nv_crtc->cursor.nvbo);
-	if (!ret) {
-		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
-		if (!ret)
-			ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
-		if (ret)
-			nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
-	}
-
-	if (ret)
-		goto out;
-
-	nv50_cursor_init(nv_crtc);
-out:
-	if (ret)
-		nv50_crtc_destroy(&nv_crtc->base);
-	return ret;
-}

diff --git a/drivers/gpu/drm/nouveau/nv50_cursor.c b/drivers/gpu/drm/nouveau/nv50_cursor.c
deleted file mode 100644
index 223da11..0000000
--- a/drivers/gpu/drm/nouveau/nv50_cursor.c
+++ /dev/null

@@ -1,136 +0,0 @@
-/*
- * Copyright (C) 2008 Maarten Maathuis.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drmP.h>
-
-#include "nouveau_drm.h"
-#include "nouveau_dma.h"
-#include "nouveau_crtc.h"
-#include "nv50_display.h"
-
-static void
-nv50_cursor_show(struct nouveau_crtc *nv_crtc, bool update)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	int ret;
-
-	NV_DEBUG(drm, "\n");
-
-	if (update && nv_crtc->cursor.visible)
-		return;
-
-	ret = RING_SPACE(evo, (nv_device(drm->device)->chipset != 0x50 ? 5 : 3) + update * 2);
-	if (ret) {
-		NV_ERROR(drm, "no space while unhiding cursor\n");
-		return;
-	}
-
-	if (nv_device(drm->device)->chipset != 0x50) {
-		BEGIN_NV04(evo, 0, NV84_EVO_CRTC(nv_crtc->index, CURSOR_DMA), 1);
-		OUT_RING(evo, NvEvoVRAM);
-	}
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CURSOR_CTRL), 2);
-	OUT_RING(evo, NV50_EVO_CRTC_CURSOR_CTRL_SHOW);
-	OUT_RING(evo, nv_crtc->cursor.offset >> 8);
-
-	if (update) {
-		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
-		OUT_RING(evo, 0);
-		FIRE_RING(evo);
-		nv_crtc->cursor.visible = true;
-	}
-}
-
-static void
-nv50_cursor_hide(struct nouveau_crtc *nv_crtc, bool update)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	int ret;
-
-	NV_DEBUG(drm, "\n");
-
-	if (update && !nv_crtc->cursor.visible)
-		return;
-
-	ret = RING_SPACE(evo, (nv_device(drm->device)->chipset != 0x50 ? 5 : 3) + update * 2);
-	if (ret) {
-		NV_ERROR(drm, "no space while hiding cursor\n");
-		return;
-	}
-	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CURSOR_CTRL), 2);
-	OUT_RING(evo, NV50_EVO_CRTC_CURSOR_CTRL_HIDE);
-	OUT_RING(evo, 0);
-	if (nv_device(drm->device)->chipset != 0x50) {
-		BEGIN_NV04(evo, 0, NV84_EVO_CRTC(nv_crtc->index, CURSOR_DMA), 1);
-		OUT_RING(evo, NV84_EVO_CRTC_CURSOR_DMA_HANDLE_NONE);
-	}
-
-	if (update) {
-		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
-		OUT_RING(evo, 0);
-		FIRE_RING(evo);
-		nv_crtc->cursor.visible = false;
-	}
-}
-
-static void
-nv50_cursor_set_pos(struct nouveau_crtc *nv_crtc, int x, int y)
-{
-	struct nouveau_device *device = nouveau_dev(nv_crtc->base.dev);
-
-	nv_crtc->cursor_saved_x = x; nv_crtc->cursor_saved_y = y;
-	nv_wr32(device, NV50_PDISPLAY_CURSOR_USER_POS(nv_crtc->index),
-		((y & 0xFFFF) << 16) | (x & 0xFFFF));
-	/* Needed to make the cursor move. */
-	nv_wr32(device, NV50_PDISPLAY_CURSOR_USER_POS_CTRL(nv_crtc->index), 0);
-}
-
-static void
-nv50_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
-{
-	if (offset == nv_crtc->cursor.offset)
-		return;
-
-	nv_crtc->cursor.offset = offset;
-	if (nv_crtc->cursor.visible) {
-		nv_crtc->cursor.visible = false;
-		nv_crtc->cursor.show(nv_crtc, true);
-	}
-}
-
-int
-nv50_cursor_init(struct nouveau_crtc *nv_crtc)
-{
-	nv_crtc->cursor.set_offset = nv50_cursor_set_offset;
-	nv_crtc->cursor.set_pos = nv50_cursor_set_pos;
-	nv_crtc->cursor.hide = nv50_cursor_hide;
-	nv_crtc->cursor.show = nv50_cursor_show;
-	return 0;
-}

diff --git a/drivers/gpu/drm/nouveau/nv50_dac.c b/drivers/gpu/drm/nouveau/nv50_dac.c
deleted file mode 100644
index 6a30a17..0000000
--- a/drivers/gpu/drm/nouveau/nv50_dac.c
+++ /dev/null

@@ -1,321 +0,0 @@
-/*
- * Copyright (C) 2008 Maarten Maathuis.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
-
-#define NOUVEAU_DMA_DEBUG (nouveau_reg_debug & NOUVEAU_REG_DEBUG_EVO)
-#include "nouveau_reg.h"
-#include "nouveau_drm.h"
-#include "nouveau_dma.h"
-#include "nouveau_encoder.h"
-#include "nouveau_connector.h"
-#include "nouveau_crtc.h"
-#include "nv50_display.h"
-
-#include <subdev/timer.h>
-
-static void
-nv50_dac_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	int ret;
-
-	if (!nv_encoder->crtc)
-		return;
-	nv50_crtc_blank(nouveau_crtc(nv_encoder->crtc), true);
-
-	NV_DEBUG(drm, "Disconnecting DAC %d\n", nv_encoder->or);
-
-	ret = RING_SPACE(evo, 4);
-	if (ret) {
-		NV_ERROR(drm, "no space while disconnecting DAC\n");
-		return;
-	}
-	BEGIN_NV04(evo, 0, NV50_EVO_DAC(nv_encoder->or, MODE_CTRL), 1);
-	OUT_RING  (evo, 0);
-	BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
-	OUT_RING  (evo, 0);
-
-	nv_encoder->crtc = NULL;
-}
-
-static enum drm_connector_status
-nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	enum drm_connector_status status = connector_status_disconnected;
-	uint32_t dpms_state, load_pattern, load_state;
-	int or = nv_encoder->or;
-
-	nv_wr32(device, NV50_PDISPLAY_DAC_CLK_CTRL1(or), 0x00000001);
-	dpms_state = nv_rd32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or));
-
-	nv_wr32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
-		0x00150000 | NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING);
-	if (!nv_wait(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
-		     NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING, 0)) {
-		NV_ERROR(drm, "timeout: DAC_DPMS_CTRL_PENDING(%d) == 0\n", or);
-		NV_ERROR(drm, "DAC_DPMS_CTRL(%d) = 0x%08x\n", or,
-			  nv_rd32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or)));
-		return status;
-	}
-
-	/* Use bios provided value if possible. */
-	if (drm->vbios.dactestval) {
-		load_pattern = drm->vbios.dactestval;
-		NV_DEBUG(drm, "Using bios provided load_pattern of %d\n",
-			  load_pattern);
-	} else {
-		load_pattern = 340;
-		NV_DEBUG(drm, "Using default load_pattern of %d\n",
-			 load_pattern);
-	}
-
-	nv_wr32(device, NV50_PDISPLAY_DAC_LOAD_CTRL(or),
-		NV50_PDISPLAY_DAC_LOAD_CTRL_ACTIVE | load_pattern);
-	mdelay(45); /* give it some time to process */
-	load_state = nv_rd32(device, NV50_PDISPLAY_DAC_LOAD_CTRL(or));
-
-	nv_wr32(device, NV50_PDISPLAY_DAC_LOAD_CTRL(or), 0);
-	nv_wr32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or), dpms_state |
-		NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING);
-
-	if ((load_state & NV50_PDISPLAY_DAC_LOAD_CTRL_PRESENT) ==
-			  NV50_PDISPLAY_DAC_LOAD_CTRL_PRESENT)
-		status = connector_status_connected;
-
-	if (status == connector_status_connected)
-		NV_DEBUG(drm, "Load was detected on output with or %d\n", or);
-	else
-		NV_DEBUG(drm, "Load was not detected on output with or %d\n", or);
-
-	return status;
-}
-
-static void
-nv50_dac_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	uint32_t val;
-	int or = nv_encoder->or;
-
-	NV_DEBUG(drm, "or %d mode %d\n", or, mode);
-
-	/* wait for it to be done */
-	if (!nv_wait(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
-		     NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING, 0)) {
-		NV_ERROR(drm, "timeout: DAC_DPMS_CTRL_PENDING(%d) == 0\n", or);
-		NV_ERROR(drm, "DAC_DPMS_CTRL(%d) = 0x%08x\n", or,
-			 nv_rd32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or)));
-		return;
-	}
-
-	val = nv_rd32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or)) & ~0x7F;
-
-	if (mode != DRM_MODE_DPMS_ON)
-		val |= NV50_PDISPLAY_DAC_DPMS_CTRL_BLANKED;
-
-	switch (mode) {
-	case DRM_MODE_DPMS_STANDBY:
-		val |= NV50_PDISPLAY_DAC_DPMS_CTRL_HSYNC_OFF;
-		break;
-	case DRM_MODE_DPMS_SUSPEND:
-		val |= NV50_PDISPLAY_DAC_DPMS_CTRL_VSYNC_OFF;
-		break;
-	case DRM_MODE_DPMS_OFF:
-		val |= NV50_PDISPLAY_DAC_DPMS_CTRL_OFF;
-		val |= NV50_PDISPLAY_DAC_DPMS_CTRL_HSYNC_OFF;
-		val |= NV50_PDISPLAY_DAC_DPMS_CTRL_VSYNC_OFF;
-		break;
-	default:
-		break;
-	}
-
-	nv_wr32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(or), val |
-		NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING);
-}
-
-static void
-nv50_dac_save(struct drm_encoder *encoder)
-{
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	NV_ERROR(drm, "!!\n");
-}
-
-static void
-nv50_dac_restore(struct drm_encoder *encoder)
-{
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	NV_ERROR(drm, "!!\n");
-}
-
-static bool
-nv50_dac_mode_fixup(struct drm_encoder *encoder,
-		    const struct drm_display_mode *mode,
-		    struct drm_display_mode *adjusted_mode)
-{
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *connector;
-
-	NV_DEBUG(drm, "or %d\n", nv_encoder->or);
-
-	connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!connector) {
-		NV_ERROR(drm, "Encoder has no connector\n");
-		return false;
-	}
-
-	if (connector->scaling_mode != DRM_MODE_SCALE_NONE &&
-	     connector->native_mode)
-		drm_mode_copy(adjusted_mode, connector->native_mode);
-
-	return true;
-}
-
-static void
-nv50_dac_commit(struct drm_encoder *encoder)
-{
-}
-
-static void
-nv50_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
-		  struct drm_display_mode *adjusted_mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	struct nouveau_crtc *crtc = nouveau_crtc(encoder->crtc);
-	uint32_t mode_ctl = 0, mode_ctl2 = 0;
-	int ret;
-
-	NV_DEBUG(drm, "or %d type %d crtc %d\n",
-		     nv_encoder->or, nv_encoder->dcb->type, crtc->index);
-
-	nv50_dac_dpms(encoder, DRM_MODE_DPMS_ON);
-
-	if (crtc->index == 1)
-		mode_ctl |= NV50_EVO_DAC_MODE_CTRL_CRTC1;
-	else
-		mode_ctl |= NV50_EVO_DAC_MODE_CTRL_CRTC0;
-
-	/* Lacking a working tv-out, this is not a 100% sure. */
-	if (nv_encoder->dcb->type == DCB_OUTPUT_ANALOG)
-		mode_ctl |= 0x40;
-	else
-	if (nv_encoder->dcb->type == DCB_OUTPUT_TV)
-		mode_ctl |= 0x100;
-
-	if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
-		mode_ctl2 |= NV50_EVO_DAC_MODE_CTRL2_NHSYNC;
-
-	if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
-		mode_ctl2 |= NV50_EVO_DAC_MODE_CTRL2_NVSYNC;
-
-	ret = RING_SPACE(evo, 3);
-	if (ret) {
-		NV_ERROR(drm, "no space while connecting DAC\n");
-		return;
-	}
-	BEGIN_NV04(evo, 0, NV50_EVO_DAC(nv_encoder->or, MODE_CTRL), 2);
-	OUT_RING(evo, mode_ctl);
-	OUT_RING(evo, mode_ctl2);
-
-	nv_encoder->crtc = encoder->crtc;
-}
-
-static struct drm_crtc *
-nv50_dac_crtc_get(struct drm_encoder *encoder)
-{
-	return nouveau_encoder(encoder)->crtc;
-}
-
-static const struct drm_encoder_helper_funcs nv50_dac_helper_funcs = {
-	.dpms = nv50_dac_dpms,
-	.save = nv50_dac_save,
-	.restore = nv50_dac_restore,
-	.mode_fixup = nv50_dac_mode_fixup,
-	.prepare = nv50_dac_disconnect,
-	.commit = nv50_dac_commit,
-	.mode_set = nv50_dac_mode_set,
-	.get_crtc = nv50_dac_crtc_get,
-	.detect = nv50_dac_detect,
-	.disable = nv50_dac_disconnect
-};
-
-static void
-nv50_dac_destroy(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-
-	if (!encoder)
-		return;
-
-	NV_DEBUG(drm, "\n");
-
-	drm_encoder_cleanup(encoder);
-	kfree(nv_encoder);
-}
-
-static const struct drm_encoder_funcs nv50_dac_encoder_funcs = {
-	.destroy = nv50_dac_destroy,
-};
-
-int
-nv50_dac_create(struct drm_connector *connector, struct dcb_output *entry)
-{
-	struct nouveau_encoder *nv_encoder;
-	struct drm_encoder *encoder;
-
-	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
-	if (!nv_encoder)
-		return -ENOMEM;
-	encoder = to_drm_encoder(nv_encoder);
-
-	nv_encoder->dcb = entry;
-	nv_encoder->or = ffs(entry->or) - 1;
-
-	drm_encoder_init(connector->dev, encoder, &nv50_dac_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC);
-	drm_encoder_helper_add(encoder, &nv50_dac_helper_funcs);
-
-	encoder->possible_crtcs = entry->heads;
-	encoder->possible_clones = 0;
-
-	drm_mode_connector_attach_encoder(connector, encoder);
-	return 0;
-}
-

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index f97b42c..3587408 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c

@@ -1,969 +1,2058 @@
-/*
- * Copyright (C) 2008 Maarten Maathuis.
- * All Rights Reserved.
+	/*
+ * Copyright 2011 Red Hat Inc.
  *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  *
+ * Authors: Ben Skeggs
  */
 
+#include <linux/dma-mapping.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
 #include "nouveau_drm.h"
 #include "nouveau_dma.h"
-
-#include "nv50_display.h"
-#include "nouveau_crtc.h"
-#include "nouveau_encoder.h"
+#include "nouveau_gem.h"
 #include "nouveau_connector.h"
-#include "nouveau_fbcon.h"
-#include <drm/drm_crtc_helper.h>
+#include "nouveau_encoder.h"
+#include "nouveau_crtc.h"
 #include "nouveau_fence.h"
+#include "nv50_display.h"
 
+#include <core/client.h>
 #include <core/gpuobj.h>
+#include <core/class.h>
+
 #include <subdev/timer.h>
+#include <subdev/bar.h>
+#include <subdev/fb.h>
 
-static void nv50_display_bh(unsigned long);
+#define EVO_DMA_NR 9
 
-static inline int
-nv50_sor_nr(struct drm_device *dev)
+#define EVO_MASTER  (0x00)
+#define EVO_FLIP(c) (0x01 + (c))
+#define EVO_OVLY(c) (0x05 + (c))
+#define EVO_OIMM(c) (0x09 + (c))
+#define EVO_CURS(c) (0x0d + (c))
+
+/* offsets in shared sync bo of various structures */
+#define EVO_SYNC(c, o) ((c) * 0x0100 + (o))
+#define EVO_MAST_NTFY     EVO_SYNC(  0, 0x00)
+#define EVO_FLIP_SEM0(c)  EVO_SYNC((c), 0x00)
+#define EVO_FLIP_SEM1(c)  EVO_SYNC((c), 0x10)
+
+#define EVO_CORE_HANDLE      (0xd1500000)
+#define EVO_CHAN_HANDLE(t,i) (0xd15c0000 | (((t) & 0x00ff) << 8) | (i))
+#define EVO_CHAN_OCLASS(t,c) ((nv_hclass(c) & 0xff00) | ((t) & 0x00ff))
+#define EVO_PUSH_HANDLE(t,i) (0xd15b0000 | (i) |                               \
+			      (((NV50_DISP_##t##_CLASS) & 0x00ff) << 8))
+
+/******************************************************************************
+ * EVO channel
+ *****************************************************************************/
+
+struct nv50_chan {
+	struct nouveau_object *user;
+	u32 handle;
+};
+
+static int
+nv50_chan_create(struct nouveau_object *core, u32 bclass, u8 head,
+		 void *data, u32 size, struct nv50_chan *chan)
 {
-	struct nouveau_device *device = nouveau_dev(dev);
+	struct nouveau_object *client = nv_pclass(core, NV_CLIENT_CLASS);
+	const u32 oclass = EVO_CHAN_OCLASS(bclass, core);
+	const u32 handle = EVO_CHAN_HANDLE(bclass, head);
+	int ret;
 
-	if (device->chipset  < 0x90 ||
-	    device->chipset == 0x92 ||
-	    device->chipset == 0xa0)
-		return 2;
+	ret = nouveau_object_new(client, EVO_CORE_HANDLE, handle,
+				 oclass, data, size, &chan->user);
+	if (ret)
+		return ret;
 
-	return 4;
-}
-
-u32
-nv50_display_active_crtcs(struct drm_device *dev)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	u32 mask = 0;
-	int i;
-
-	if (device->chipset  < 0x90 ||
-	    device->chipset == 0x92 ||
-	    device->chipset == 0xa0) {
-		for (i = 0; i < 2; i++)
-			mask |= nv_rd32(device, NV50_PDISPLAY_SOR_MODE_CTRL_C(i));
-	} else {
-		for (i = 0; i < 4; i++)
-			mask |= nv_rd32(device, NV90_PDISPLAY_SOR_MODE_CTRL_C(i));
-	}
-
-	for (i = 0; i < 3; i++)
-		mask |= nv_rd32(device, NV50_PDISPLAY_DAC_MODE_CTRL_C(i));
-
-	return mask & 3;
-}
-
-int
-nv50_display_early_init(struct drm_device *dev)
-{
+	chan->handle = handle;
 	return 0;
 }
 
-void
-nv50_display_late_takedown(struct drm_device *dev)
+static void
+nv50_chan_destroy(struct nouveau_object *core, struct nv50_chan *chan)
 {
+	struct nouveau_object *client = nv_pclass(core, NV_CLIENT_CLASS);
+	if (chan->handle)
+		nouveau_object_del(client, EVO_CORE_HANDLE, chan->handle);
 }
 
-int
-nv50_display_sync(struct drm_device *dev)
+/******************************************************************************
+ * PIO EVO channel
+ *****************************************************************************/
+
+struct nv50_pioc {
+	struct nv50_chan base;
+};
+
+static void
+nv50_pioc_destroy(struct nouveau_object *core, struct nv50_pioc *pioc)
 {
-	struct nv50_display *disp = nv50_display(dev);
-	struct nouveau_channel *evo = disp->master;
+	nv50_chan_destroy(core, &pioc->base);
+}
+
+static int
+nv50_pioc_create(struct nouveau_object *core, u32 bclass, u8 head,
+		 void *data, u32 size, struct nv50_pioc *pioc)
+{
+	return nv50_chan_create(core, bclass, head, data, size, &pioc->base);
+}
+
+/******************************************************************************
+ * DMA EVO channel
+ *****************************************************************************/
+
+struct nv50_dmac {
+	struct nv50_chan base;
+	dma_addr_t handle;
+	u32 *ptr;
+};
+
+static void
+nv50_dmac_destroy(struct nouveau_object *core, struct nv50_dmac *dmac)
+{
+	if (dmac->ptr) {
+		struct pci_dev *pdev = nv_device(core)->pdev;
+		pci_free_consistent(pdev, PAGE_SIZE, dmac->ptr, dmac->handle);
+	}
+
+	nv50_chan_destroy(core, &dmac->base);
+}
+
+static int
+nv50_dmac_create_fbdma(struct nouveau_object *core, u32 parent)
+{
+	struct nouveau_fb *pfb = nouveau_fb(core);
+	struct nouveau_object *client = nv_pclass(core, NV_CLIENT_CLASS);
+	struct nouveau_object *object;
+	int ret = nouveau_object_new(client, parent, NvEvoVRAM_LP,
+				     NV_DMA_IN_MEMORY_CLASS,
+				     &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NV50_DMA_CONF0_ENABLE |
+					         NV50_DMA_CONF0_PART_256,
+				     }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_new(client, parent, NvEvoFB16,
+				 NV_DMA_IN_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NV50_DMA_CONF0_ENABLE | 0x70 |
+					         NV50_DMA_CONF0_PART_256,
+				 }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_new(client, parent, NvEvoFB32,
+				 NV_DMA_IN_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NV50_DMA_CONF0_ENABLE | 0x7a |
+					         NV50_DMA_CONF0_PART_256,
+				 }, sizeof(struct nv_dma_class), &object);
+	return ret;
+}
+
+static int
+nvc0_dmac_create_fbdma(struct nouveau_object *core, u32 parent)
+{
+	struct nouveau_fb *pfb = nouveau_fb(core);
+	struct nouveau_object *client = nv_pclass(core, NV_CLIENT_CLASS);
+	struct nouveau_object *object;
+	int ret = nouveau_object_new(client, parent, NvEvoVRAM_LP,
+				     NV_DMA_IN_MEMORY_CLASS,
+				     &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NVC0_DMA_CONF0_ENABLE,
+				     }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_new(client, parent, NvEvoFB16,
+				 NV_DMA_IN_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NVC0_DMA_CONF0_ENABLE | 0xfe,
+				 }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_new(client, parent, NvEvoFB32,
+				 NV_DMA_IN_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NVC0_DMA_CONF0_ENABLE | 0xfe,
+				 }, sizeof(struct nv_dma_class), &object);
+	return ret;
+}
+
+static int
+nvd0_dmac_create_fbdma(struct nouveau_object *core, u32 parent)
+{
+	struct nouveau_fb *pfb = nouveau_fb(core);
+	struct nouveau_object *client = nv_pclass(core, NV_CLIENT_CLASS);
+	struct nouveau_object *object;
+	int ret = nouveau_object_new(client, parent, NvEvoVRAM_LP,
+				     NV_DMA_IN_MEMORY_CLASS,
+				     &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NVD0_DMA_CONF0_ENABLE |
+						 NVD0_DMA_CONF0_PAGE_LP,
+				     }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_new(client, parent, NvEvoFB32,
+				 NV_DMA_IN_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+					.conf0 = NVD0_DMA_CONF0_ENABLE | 0xfe |
+						 NVD0_DMA_CONF0_PAGE_LP,
+				 }, sizeof(struct nv_dma_class), &object);
+	return ret;
+}
+
+static int
+nv50_dmac_create(struct nouveau_object *core, u32 bclass, u8 head,
+		 void *data, u32 size, u64 syncbuf,
+		 struct nv50_dmac *dmac)
+{
+	struct nouveau_fb *pfb = nouveau_fb(core);
+	struct nouveau_object *client = nv_pclass(core, NV_CLIENT_CLASS);
+	struct nouveau_object *object;
+	u32 pushbuf = *(u32 *)data;
 	int ret;
 
-	ret = RING_SPACE(evo, 6);
-	if (ret == 0) {
-		BEGIN_NV04(evo, 0, 0x0084, 1);
-		OUT_RING  (evo, 0x80000000);
-		BEGIN_NV04(evo, 0, 0x0080, 1);
-		OUT_RING  (evo, 0);
-		BEGIN_NV04(evo, 0, 0x0084, 1);
-		OUT_RING  (evo, 0x00000000);
+	dmac->ptr = pci_alloc_consistent(nv_device(core)->pdev, PAGE_SIZE,
+					&dmac->handle);
+	if (!dmac->ptr)
+		return -ENOMEM;
 
-		nv_wo32(disp->ramin, 0x2000, 0x00000000);
-		FIRE_RING (evo);
+	ret = nouveau_object_new(client, NVDRM_DEVICE, pushbuf,
+				 NV_DMA_FROM_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_PCI_US |
+						 NV_DMA_ACCESS_RD,
+					.start = dmac->handle + 0x0000,
+					.limit = dmac->handle + 0x0fff,
+				 }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
 
-		if (nv_wait_ne(disp->ramin, 0x2000, 0xffffffff, 0x00000000))
+	ret = nv50_chan_create(core, bclass, head, data, size, &dmac->base);
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_new(client, dmac->base.handle, NvEvoSync,
+				 NV_DMA_IN_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = syncbuf + 0x0000,
+					.limit = syncbuf + 0x0fff,
+				 }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_new(client, dmac->base.handle, NvEvoVRAM,
+				 NV_DMA_IN_MEMORY_CLASS,
+				 &(struct nv_dma_class) {
+					.flags = NV_DMA_TARGET_VRAM |
+						 NV_DMA_ACCESS_RDWR,
+					.start = 0,
+					.limit = pfb->ram.size - 1,
+				 }, sizeof(struct nv_dma_class), &object);
+	if (ret)
+		return ret;
+
+	if (nv_device(core)->card_type < NV_C0)
+		ret = nv50_dmac_create_fbdma(core, dmac->base.handle);
+	else
+	if (nv_device(core)->card_type < NV_D0)
+		ret = nvc0_dmac_create_fbdma(core, dmac->base.handle);
+	else
+		ret = nvd0_dmac_create_fbdma(core, dmac->base.handle);
+	return ret;
+}
+
+struct nv50_mast {
+	struct nv50_dmac base;
+};
+
+struct nv50_curs {
+	struct nv50_pioc base;
+};
+
+struct nv50_sync {
+	struct nv50_dmac base;
+	struct {
+		u32 offset;
+		u16 value;
+	} sem;
+};
+
+struct nv50_ovly {
+	struct nv50_dmac base;
+};
+
+struct nv50_oimm {
+	struct nv50_pioc base;
+};
+
+struct nv50_head {
+	struct nouveau_crtc base;
+	struct nv50_curs curs;
+	struct nv50_sync sync;
+	struct nv50_ovly ovly;
+	struct nv50_oimm oimm;
+};
+
+#define nv50_head(c) ((struct nv50_head *)nouveau_crtc(c))
+#define nv50_curs(c) (&nv50_head(c)->curs)
+#define nv50_sync(c) (&nv50_head(c)->sync)
+#define nv50_ovly(c) (&nv50_head(c)->ovly)
+#define nv50_oimm(c) (&nv50_head(c)->oimm)
+#define nv50_chan(c) (&(c)->base.base)
+#define nv50_vers(c) nv_mclass(nv50_chan(c)->user)
+
+struct nv50_disp {
+	struct nouveau_object *core;
+	struct nv50_mast mast;
+
+	u32 modeset;
+
+	struct nouveau_bo *sync;
+};
+
+static struct nv50_disp *
+nv50_disp(struct drm_device *dev)
+{
+	return nouveau_display(dev)->priv;
+}
+
+#define nv50_mast(d) (&nv50_disp(d)->mast)
+
+static struct drm_crtc *
+nv50_display_crtc_get(struct drm_encoder *encoder)
+{
+	return nouveau_encoder(encoder)->crtc;
+}
+
+/******************************************************************************
+ * EVO channel helpers
+ *****************************************************************************/
+static u32 *
+evo_wait(void *evoc, int nr)
+{
+	struct nv50_dmac *dmac = evoc;
+	u32 put = nv_ro32(dmac->base.user, 0x0000) / 4;
+
+	if (put + nr >= (PAGE_SIZE / 4) - 8) {
+		dmac->ptr[put] = 0x20000000;
+
+		nv_wo32(dmac->base.user, 0x0000, 0x00000000);
+		if (!nv_wait(dmac->base.user, 0x0004, ~0, 0x00000000)) {
+			NV_ERROR(dmac->base.user, "channel stalled\n");
+			return NULL;
+		}
+
+		put = 0;
+	}
+
+	return dmac->ptr + put;
+}
+
+static void
+evo_kick(u32 *push, void *evoc)
+{
+	struct nv50_dmac *dmac = evoc;
+	nv_wo32(dmac->base.user, 0x0000, (push - dmac->ptr) << 2);
+}
+
+#define evo_mthd(p,m,s) *((p)++) = (((s) << 18) | (m))
+#define evo_data(p,d)   *((p)++) = (d)
+
+static bool
+evo_sync_wait(void *data)
+{
+	return nouveau_bo_rd32(data, EVO_MAST_NTFY) != 0x00000000;
+}
+
+static int
+evo_sync(struct drm_device *dev)
+{
+	struct nouveau_device *device = nouveau_dev(dev);
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct nv50_mast *mast = nv50_mast(dev);
+	u32 *push = evo_wait(mast, 8);
+	if (push) {
+		nouveau_bo_wr32(disp->sync, EVO_MAST_NTFY, 0x00000000);
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, 0x80000000 | EVO_MAST_NTFY);
+		evo_mthd(push, 0x0080, 2);
+		evo_data(push, 0x00000000);
+		evo_data(push, 0x00000000);
+		evo_kick(push, mast);
+		if (nv_wait_cb(device, evo_sync_wait, disp->sync))
 			return 0;
 	}
 
-	return 0;
+	return -EBUSY;
 }
 
-int
-nv50_display_init(struct drm_device *dev)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_channel *evo;
-	int ret, i;
-	u32 val;
-
-	NV_DEBUG(drm, "\n");
-
-	nv_wr32(device, 0x00610184, nv_rd32(device, 0x00614004));
-
-	/*
-	 * I think the 0x006101XX range is some kind of main control area
-	 * that enables things.
-	 */
-	/* CRTC? */
-	for (i = 0; i < 2; i++) {
-		val = nv_rd32(device, 0x00616100 + (i * 0x800));
-		nv_wr32(device, 0x00610190 + (i * 0x10), val);
-		val = nv_rd32(device, 0x00616104 + (i * 0x800));
-		nv_wr32(device, 0x00610194 + (i * 0x10), val);
-		val = nv_rd32(device, 0x00616108 + (i * 0x800));
-		nv_wr32(device, 0x00610198 + (i * 0x10), val);
-		val = nv_rd32(device, 0x0061610c + (i * 0x800));
-		nv_wr32(device, 0x0061019c + (i * 0x10), val);
-	}
-
-	/* DAC */
-	for (i = 0; i < 3; i++) {
-		val = nv_rd32(device, 0x0061a000 + (i * 0x800));
-		nv_wr32(device, 0x006101d0 + (i * 0x04), val);
-	}
-
-	/* SOR */
-	for (i = 0; i < nv50_sor_nr(dev); i++) {
-		val = nv_rd32(device, 0x0061c000 + (i * 0x800));
-		nv_wr32(device, 0x006101e0 + (i * 0x04), val);
-	}
-
-	/* EXT */
-	for (i = 0; i < 3; i++) {
-		val = nv_rd32(device, 0x0061e000 + (i * 0x800));
-		nv_wr32(device, 0x006101f0 + (i * 0x04), val);
-	}
-
-	for (i = 0; i < 3; i++) {
-		nv_wr32(device, NV50_PDISPLAY_DAC_DPMS_CTRL(i), 0x00550000 |
-			NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING);
-		nv_wr32(device, NV50_PDISPLAY_DAC_CLK_CTRL1(i), 0x00000001);
-	}
-
-	/* The precise purpose is unknown, i suspect it has something to do
-	 * with text mode.
-	 */
-	if (nv_rd32(device, NV50_PDISPLAY_INTR_1) & 0x100) {
-		nv_wr32(device, NV50_PDISPLAY_INTR_1, 0x100);
-		nv_wr32(device, 0x006194e8, nv_rd32(device, 0x006194e8) & ~1);
-		if (!nv_wait(device, 0x006194e8, 2, 0)) {
-			NV_ERROR(drm, "timeout: (0x6194e8 & 2) != 0\n");
-			NV_ERROR(drm, "0x6194e8 = 0x%08x\n",
-						nv_rd32(device, 0x6194e8));
-			return -EBUSY;
-		}
-	}
-
-	for (i = 0; i < 2; i++) {
-		nv_wr32(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i), 0x2000);
-		if (!nv_wait(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
-			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
-			NV_ERROR(drm, "timeout: CURSOR_CTRL2_STATUS == 0\n");
-			NV_ERROR(drm, "CURSOR_CTRL2 = 0x%08x\n",
-				 nv_rd32(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i)));
-			return -EBUSY;
-		}
-
-		nv_wr32(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
-			NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_ON);
-		if (!nv_wait(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
-			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS,
-			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS_ACTIVE)) {
-			NV_ERROR(drm, "timeout: "
-				      "CURSOR_CTRL2_STATUS_ACTIVE(%d)\n", i);
-			NV_ERROR(drm, "CURSOR_CTRL2(%d) = 0x%08x\n", i,
-				 nv_rd32(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i)));
-			return -EBUSY;
-		}
-	}
-
-	nv_wr32(device, NV50_PDISPLAY_PIO_CTRL, 0x00000000);
-	nv_mask(device, NV50_PDISPLAY_INTR_0, 0x00000000, 0x00000000);
-	nv_wr32(device, NV50_PDISPLAY_INTR_EN_0, 0x00000000);
-	nv_mask(device, NV50_PDISPLAY_INTR_1, 0x00000000, 0x00000000);
-	nv_wr32(device, NV50_PDISPLAY_INTR_EN_1,
-		     NV50_PDISPLAY_INTR_EN_1_CLK_UNK10 |
-		     NV50_PDISPLAY_INTR_EN_1_CLK_UNK20 |
-		     NV50_PDISPLAY_INTR_EN_1_CLK_UNK40);
-
-	ret = nv50_evo_init(dev);
-	if (ret)
-		return ret;
-	evo = nv50_display(dev)->master;
-
-	nv_wr32(device, NV50_PDISPLAY_OBJECTS, (nv50_display(dev)->ramin->addr >> 8) | 9);
-
-	ret = RING_SPACE(evo, 3);
-	if (ret)
-		return ret;
-	BEGIN_NV04(evo, 0, NV50_EVO_UNK84, 2);
-	OUT_RING  (evo, NV50_EVO_UNK84_NOTIFY_DISABLED);
-	OUT_RING  (evo, NvEvoSync);
-
-	return nv50_display_sync(dev);
-}
-
-void
-nv50_display_fini(struct drm_device *dev)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nv50_display *disp = nv50_display(dev);
-	struct nouveau_channel *evo = disp->master;
-	struct drm_crtc *drm_crtc;
-	int ret, i;
-
-	NV_DEBUG(drm, "\n");
-
-	list_for_each_entry(drm_crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *crtc = nouveau_crtc(drm_crtc);
-
-		nv50_crtc_blank(crtc, true);
-	}
-
-	ret = RING_SPACE(evo, 2);
-	if (ret == 0) {
-		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
-		OUT_RING(evo, 0);
-	}
-	FIRE_RING(evo);
-
-	/* Almost like ack'ing a vblank interrupt, maybe in the spirit of
-	 * cleaning up?
-	 */
-	list_for_each_entry(drm_crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *crtc = nouveau_crtc(drm_crtc);
-		uint32_t mask = NV50_PDISPLAY_INTR_1_VBLANK_CRTC_(crtc->index);
-
-		if (!crtc->base.enabled)
-			continue;
-
-		nv_wr32(device, NV50_PDISPLAY_INTR_1, mask);
-		if (!nv_wait(device, NV50_PDISPLAY_INTR_1, mask, mask)) {
-			NV_ERROR(drm, "timeout: (0x610024 & 0x%08x) == "
-				      "0x%08x\n", mask, mask);
-			NV_ERROR(drm, "0x610024 = 0x%08x\n",
-				 nv_rd32(device, NV50_PDISPLAY_INTR_1));
-		}
-	}
-
-	for (i = 0; i < 2; i++) {
-		nv_wr32(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i), 0);
-		if (!nv_wait(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
-			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
-			NV_ERROR(drm, "timeout: CURSOR_CTRL2_STATUS == 0\n");
-			NV_ERROR(drm, "CURSOR_CTRL2 = 0x%08x\n",
-				 nv_rd32(device, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i)));
-		}
-	}
-
-	nv50_evo_fini(dev);
-
-	for (i = 0; i < 3; i++) {
-		if (!nv_wait(device, NV50_PDISPLAY_SOR_DPMS_STATE(i),
-			     NV50_PDISPLAY_SOR_DPMS_STATE_WAIT, 0)) {
-			NV_ERROR(drm, "timeout: SOR_DPMS_STATE_WAIT(%d) == 0\n", i);
-			NV_ERROR(drm, "SOR_DPMS_STATE(%d) = 0x%08x\n", i,
-				  nv_rd32(device, NV50_PDISPLAY_SOR_DPMS_STATE(i)));
-		}
-	}
-
-	/* disable interrupts. */
-	nv_wr32(device, NV50_PDISPLAY_INTR_EN_1, 0x00000000);
-}
-
-int
-nv50_display_create(struct drm_device *dev)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct dcb_table *dcb = &drm->vbios.dcb;
-	struct drm_connector *connector, *ct;
-	struct nv50_display *priv;
-	int ret, i;
-
-	NV_DEBUG(drm, "\n");
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	nouveau_display(dev)->priv = priv;
-	nouveau_display(dev)->dtor = nv50_display_destroy;
-	nouveau_display(dev)->init = nv50_display_init;
-	nouveau_display(dev)->fini = nv50_display_fini;
-
-	/* Create CRTC objects */
-	for (i = 0; i < 2; i++) {
-		ret = nv50_crtc_create(dev, i);
-		if (ret)
-			return ret;
-	}
-
-	/* We setup the encoders from the BIOS table */
-	for (i = 0 ; i < dcb->entries; i++) {
-		struct dcb_output *entry = &dcb->entry[i];
-
-		if (entry->location != DCB_LOC_ON_CHIP) {
-			NV_WARN(drm, "Off-chip encoder %d/%d unsupported\n",
-				entry->type, ffs(entry->or) - 1);
-			continue;
-		}
-
-		connector = nouveau_connector_create(dev, entry->connector);
-		if (IS_ERR(connector))
-			continue;
-
-		switch (entry->type) {
-		case DCB_OUTPUT_TMDS:
-		case DCB_OUTPUT_LVDS:
-		case DCB_OUTPUT_DP:
-			nv50_sor_create(connector, entry);
-			break;
-		case DCB_OUTPUT_ANALOG:
-			nv50_dac_create(connector, entry);
-			break;
-		default:
-			NV_WARN(drm, "DCB encoder %d unknown\n", entry->type);
-			continue;
-		}
-	}
-
-	list_for_each_entry_safe(connector, ct,
-				 &dev->mode_config.connector_list, head) {
-		if (!connector->encoder_ids[0]) {
-			NV_WARN(drm, "%s has no encoders, removing\n",
-				drm_get_connector_name(connector));
-			connector->funcs->destroy(connector);
-		}
-	}
-
-	tasklet_init(&priv->tasklet, nv50_display_bh, (unsigned long)dev);
-
-	ret = nv50_evo_create(dev);
-	if (ret) {
-		nv50_display_destroy(dev);
-		return ret;
-	}
-
-	return 0;
-}
-
-void
-nv50_display_destroy(struct drm_device *dev)
-{
-	struct nv50_display *disp = nv50_display(dev);
-
-	nv50_evo_destroy(dev);
-	kfree(disp);
-}
-
+/******************************************************************************
+ * Page flipping channel
+ *****************************************************************************/
 struct nouveau_bo *
 nv50_display_crtc_sema(struct drm_device *dev, int crtc)
 {
-	return nv50_display(dev)->crtc[crtc].sem.bo;
+	return nv50_disp(dev)->sync;
 }
 
 void
 nv50_display_flip_stop(struct drm_crtc *crtc)
 {
-	struct nv50_display *disp = nv50_display(crtc->dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nv50_display_crtc *dispc = &disp->crtc[nv_crtc->index];
-	struct nouveau_channel *evo = dispc->sync;
-	int ret;
+	struct nv50_sync *sync = nv50_sync(crtc);
+	u32 *push;
 
-	ret = RING_SPACE(evo, 8);
-	if (ret) {
-		WARN_ON(1);
-		return;
+	push = evo_wait(sync, 8);
+	if (push) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x0094, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x0080, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, sync);
 	}
-
-	BEGIN_NV04(evo, 0, 0x0084, 1);
-	OUT_RING  (evo, 0x00000000);
-	BEGIN_NV04(evo, 0, 0x0094, 1);
-	OUT_RING  (evo, 0x00000000);
-	BEGIN_NV04(evo, 0, 0x00c0, 1);
-	OUT_RING  (evo, 0x00000000);
-	BEGIN_NV04(evo, 0, 0x0080, 1);
-	OUT_RING  (evo, 0x00000000);
-	FIRE_RING (evo);
 }
 
 int
 nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-		       struct nouveau_channel *chan)
+		       struct nouveau_channel *chan, u32 swap_interval)
 {
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
-	struct nv50_display *disp = nv50_display(crtc->dev);
+	struct nv50_disp *disp = nv50_disp(crtc->dev);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nv50_display_crtc *dispc = &disp->crtc[nv_crtc->index];
-	struct nouveau_channel *evo = dispc->sync;
+	struct nv50_sync *sync = nv50_sync(crtc);
+	u32 *push;
 	int ret;
 
-	ret = RING_SPACE(evo, chan ? 25 : 27);
-	if (unlikely(ret))
-		return ret;
+	swap_interval <<= 4;
+	if (swap_interval == 0)
+		swap_interval |= 0x100;
+
+	push = evo_wait(sync, 128);
+	if (unlikely(push == NULL))
+		return -EBUSY;
 
 	/* synchronise with the rendering channel, if necessary */
 	if (likely(chan)) {
 		ret = RING_SPACE(chan, 10);
-		if (ret) {
-			WIND_RING(evo);
+		if (ret)
 			return ret;
-		}
 
-		if (nv_device(drm->device)->chipset < 0xc0) {
-			BEGIN_NV04(chan, 0, 0x0060, 2);
+		if (nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) {
+			BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
 			OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
-			OUT_RING  (chan, dispc->sem.offset);
-			BEGIN_NV04(chan, 0, 0x006c, 1);
-			OUT_RING  (chan, 0xf00d0000 | dispc->sem.value);
-			BEGIN_NV04(chan, 0, 0x0064, 2);
-			OUT_RING  (chan, dispc->sem.offset ^ 0x10);
+			OUT_RING  (chan, sync->sem.offset);
+			BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
+			OUT_RING  (chan, 0xf00d0000 | sync->sem.value);
+			BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2);
+			OUT_RING  (chan, sync->sem.offset ^ 0x10);
 			OUT_RING  (chan, 0x74b1e000);
-			BEGIN_NV04(chan, 0, 0x0060, 1);
-			if (nv_device(drm->device)->chipset < 0x84)
+			BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
+			if (nv_mclass(chan->object) < NV84_CHANNEL_DMA_CLASS)
 				OUT_RING  (chan, NvSema);
 			else
 				OUT_RING  (chan, chan->vram);
 		} else {
 			u64 offset = nvc0_fence_crtc(chan, nv_crtc->index);
-			offset += dispc->sem.offset;
-			BEGIN_NVC0(chan, 0, 0x0010, 4);
+			offset += sync->sem.offset;
+
+			BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
 			OUT_RING  (chan, upper_32_bits(offset));
 			OUT_RING  (chan, lower_32_bits(offset));
-			OUT_RING  (chan, 0xf00d0000 | dispc->sem.value);
+			OUT_RING  (chan, 0xf00d0000 | sync->sem.value);
 			OUT_RING  (chan, 0x1002);
-			BEGIN_NVC0(chan, 0, 0x0010, 4);
+			BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
 			OUT_RING  (chan, upper_32_bits(offset));
 			OUT_RING  (chan, lower_32_bits(offset ^ 0x10));
 			OUT_RING  (chan, 0x74b1e000);
 			OUT_RING  (chan, 0x1001);
 		}
+
 		FIRE_RING (chan);
 	} else {
-		nouveau_bo_wr32(dispc->sem.bo, dispc->sem.offset / 4,
-				0xf00d0000 | dispc->sem.value);
+		nouveau_bo_wr32(disp->sync, sync->sem.offset / 4,
+				0xf00d0000 | sync->sem.value);
+		evo_sync(crtc->dev);
 	}
 
-	/* queue the flip on the crtc's "display sync" channel */
-	BEGIN_NV04(evo, 0, 0x0100, 1);
-	OUT_RING  (evo, 0xfffe0000);
-	if (chan) {
-		BEGIN_NV04(evo, 0, 0x0084, 1);
-		OUT_RING  (evo, 0x00000100);
+	/* queue the flip */
+	evo_mthd(push, 0x0100, 1);
+	evo_data(push, 0xfffe0000);
+	evo_mthd(push, 0x0084, 1);
+	evo_data(push, swap_interval);
+	if (!(swap_interval & 0x00000100)) {
+		evo_mthd(push, 0x00e0, 1);
+		evo_data(push, 0x40000000);
+	}
+	evo_mthd(push, 0x0088, 4);
+	evo_data(push, sync->sem.offset);
+	evo_data(push, 0xf00d0000 | sync->sem.value);
+	evo_data(push, 0x74b1e000);
+	evo_data(push, NvEvoSync);
+	evo_mthd(push, 0x00a0, 2);
+	evo_data(push, 0x00000000);
+	evo_data(push, 0x00000000);
+	evo_mthd(push, 0x00c0, 1);
+	evo_data(push, nv_fb->r_dma);
+	evo_mthd(push, 0x0110, 2);
+	evo_data(push, 0x00000000);
+	evo_data(push, 0x00000000);
+	if (nv50_vers(sync) < NVD0_DISP_SYNC_CLASS) {
+		evo_mthd(push, 0x0800, 5);
+		evo_data(push, nv_fb->nvbo->bo.offset >> 8);
+		evo_data(push, 0);
+		evo_data(push, (fb->height << 16) | fb->width);
+		evo_data(push, nv_fb->r_pitch);
+		evo_data(push, nv_fb->r_format);
 	} else {
-		BEGIN_NV04(evo, 0, 0x0084, 1);
-		OUT_RING  (evo, 0x00000010);
-		/* allows gamma somehow, PDISP will bitch at you if
-		 * you don't wait for vblank before changing this..
-		 */
-		BEGIN_NV04(evo, 0, 0x00e0, 1);
-		OUT_RING  (evo, 0x40000000);
+		evo_mthd(push, 0x0400, 5);
+		evo_data(push, nv_fb->nvbo->bo.offset >> 8);
+		evo_data(push, 0);
+		evo_data(push, (fb->height << 16) | fb->width);
+		evo_data(push, nv_fb->r_pitch);
+		evo_data(push, nv_fb->r_format);
 	}
-	BEGIN_NV04(evo, 0, 0x0088, 4);
-	OUT_RING  (evo, dispc->sem.offset);
-	OUT_RING  (evo, 0xf00d0000 | dispc->sem.value);
-	OUT_RING  (evo, 0x74b1e000);
-	OUT_RING  (evo, NvEvoSync);
-	BEGIN_NV04(evo, 0, 0x00a0, 2);
-	OUT_RING  (evo, 0x00000000);
-	OUT_RING  (evo, 0x00000000);
-	BEGIN_NV04(evo, 0, 0x00c0, 1);
-	OUT_RING  (evo, nv_fb->r_dma);
-	BEGIN_NV04(evo, 0, 0x0110, 2);
-	OUT_RING  (evo, 0x00000000);
-	OUT_RING  (evo, 0x00000000);
-	BEGIN_NV04(evo, 0, 0x0800, 5);
-	OUT_RING  (evo, nv_fb->nvbo->bo.offset >> 8);
-	OUT_RING  (evo, 0);
-	OUT_RING  (evo, (fb->height << 16) | fb->width);
-	OUT_RING  (evo, nv_fb->r_pitch);
-	OUT_RING  (evo, nv_fb->r_format);
-	BEGIN_NV04(evo, 0, 0x0080, 1);
-	OUT_RING  (evo, 0x00000000);
-	FIRE_RING (evo);
+	evo_mthd(push, 0x0080, 1);
+	evo_data(push, 0x00000000);
+	evo_kick(push, sync);
 
-	dispc->sem.offset ^= 0x10;
-	dispc->sem.value++;
+	sync->sem.offset ^= 0x10;
+	sync->sem.value++;
 	return 0;
 }
 
-static u16
-nv50_display_script_select(struct drm_device *dev, struct dcb_output *dcb,
-			   u32 mc, int pxclk)
+/******************************************************************************
+ * CRTC
+ *****************************************************************************/
+static int
+nv50_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool update)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_connector *nv_connector = NULL;
-	struct drm_encoder *encoder;
-	struct nvbios *bios = &drm->vbios;
-	u32 script = 0, or;
+	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	struct nouveau_connector *nv_connector;
+	struct drm_connector *connector;
+	u32 *push, mode = 0x00;
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-
-		if (nv_encoder->dcb != dcb)
-			continue;
-
-		nv_connector = nouveau_encoder_connector_get(nv_encoder);
-		break;
+	nv_connector = nouveau_crtc_connector_get(nv_crtc);
+	connector = &nv_connector->base;
+	if (nv_connector->dithering_mode == DITHERING_MODE_AUTO) {
+		if (nv_crtc->base.fb->depth > connector->display_info.bpc * 3)
+			mode = DITHERING_MODE_DYNAMIC2X2;
+	} else {
+		mode = nv_connector->dithering_mode;
 	}
 
-	or = ffs(dcb->or) - 1;
-	switch (dcb->type) {
-	case DCB_OUTPUT_LVDS:
-		script = (mc >> 8) & 0xf;
-		if (bios->fp_no_ddc) {
-			if (bios->fp.dual_link)
-				script |= 0x0100;
-			if (bios->fp.if_is_24bit)
-				script |= 0x0200;
+	if (nv_connector->dithering_depth == DITHERING_DEPTH_AUTO) {
+		if (connector->display_info.bpc >= 8)
+			mode |= DITHERING_DEPTH_8BPC;
+	} else {
+		mode |= nv_connector->dithering_depth;
+	}
+
+	push = evo_wait(mast, 4);
+	if (push) {
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x08a0 + (nv_crtc->index * 0x0400), 1);
+			evo_data(push, mode);
+		} else
+		if (nv50_vers(mast) < NVE0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0490 + (nv_crtc->index * 0x0300), 1);
+			evo_data(push, mode);
 		} else {
-			/* determine number of lvds links */
-			if (nv_connector && nv_connector->edid &&
-			    nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) {
-				/* http://www.spwg.org */
-				if (((u8 *)nv_connector->edid)[121] == 2)
-					script |= 0x0100;
-			} else
-			if (pxclk >= bios->fp.duallink_transition_clk) {
-				script |= 0x0100;
-			}
-
-			/* determine panel depth */
-			if (script & 0x0100) {
-				if (bios->fp.strapless_is_24bit & 2)
-					script |= 0x0200;
-			} else {
-				if (bios->fp.strapless_is_24bit & 1)
-					script |= 0x0200;
-			}
-
-			if (nv_connector && nv_connector->edid &&
-			    (nv_connector->edid->revision >= 4) &&
-			    (nv_connector->edid->input & 0x70) >= 0x20)
-				script |= 0x0200;
+			evo_mthd(push, 0x04a0 + (nv_crtc->index * 0x0300), 1);
+			evo_data(push, mode);
 		}
-		break;
-	case DCB_OUTPUT_TMDS:
-		script = (mc >> 8) & 0xf;
-		if (pxclk >= 165000)
-			script |= 0x0100;
-		break;
-	case DCB_OUTPUT_DP:
-		script = (mc >> 8) & 0xf;
-		break;
-	case DCB_OUTPUT_ANALOG:
-		script = 0xff;
+
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, mast);
+	}
+
+	return 0;
+}
+
+static int
+nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, bool update)
+{
+	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	struct drm_display_mode *omode, *umode = &nv_crtc->base.mode;
+	struct drm_crtc *crtc = &nv_crtc->base;
+	struct nouveau_connector *nv_connector;
+	int mode = DRM_MODE_SCALE_NONE;
+	u32 oX, oY, *push;
+
+	/* start off at the resolution we programmed the crtc for, this
+	 * effectively handles NONE/FULL scaling
+	 */
+	nv_connector = nouveau_crtc_connector_get(nv_crtc);
+	if (nv_connector && nv_connector->native_mode)
+		mode = nv_connector->scaling_mode;
+
+	if (mode != DRM_MODE_SCALE_NONE)
+		omode = nv_connector->native_mode;
+	else
+		omode = umode;
+
+	oX = omode->hdisplay;
+	oY = omode->vdisplay;
+	if (omode->flags & DRM_MODE_FLAG_DBLSCAN)
+		oY *= 2;
+
+	/* add overscan compensation if necessary, will keep the aspect
+	 * ratio the same as the backend mode unless overridden by the
+	 * user setting both hborder and vborder properties.
+	 */
+	if (nv_connector && ( nv_connector->underscan == UNDERSCAN_ON ||
+			     (nv_connector->underscan == UNDERSCAN_AUTO &&
+			      nv_connector->edid &&
+			      drm_detect_hdmi_monitor(nv_connector->edid)))) {
+		u32 bX = nv_connector->underscan_hborder;
+		u32 bY = nv_connector->underscan_vborder;
+		u32 aspect = (oY << 19) / oX;
+
+		if (bX) {
+			oX -= (bX * 2);
+			if (bY) oY -= (bY * 2);
+			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
+		} else {
+			oX -= (oX >> 4) + 32;
+			if (bY) oY -= (bY * 2);
+			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
+		}
+	}
+
+	/* handle CENTER/ASPECT scaling, taking into account the areas
+	 * removed already for overscan compensation
+	 */
+	switch (mode) {
+	case DRM_MODE_SCALE_CENTER:
+		oX = min((u32)umode->hdisplay, oX);
+		oY = min((u32)umode->vdisplay, oY);
+		/* fall-through */
+	case DRM_MODE_SCALE_ASPECT:
+		if (oY < oX) {
+			u32 aspect = (umode->hdisplay << 19) / umode->vdisplay;
+			oX = ((oY * aspect) + (aspect / 2)) >> 19;
+		} else {
+			u32 aspect = (umode->vdisplay << 19) / umode->hdisplay;
+			oY = ((oX * aspect) + (aspect / 2)) >> 19;
+		}
 		break;
 	default:
-		NV_ERROR(drm, "modeset on unsupported output type!\n");
 		break;
 	}
 
-	return script;
+	push = evo_wait(mast, 8);
+	if (push) {
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			/*XXX: SCALE_CTRL_ACTIVE??? */
+			evo_mthd(push, 0x08d8 + (nv_crtc->index * 0x400), 2);
+			evo_data(push, (oY << 16) | oX);
+			evo_data(push, (oY << 16) | oX);
+			evo_mthd(push, 0x08a4 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x08c8 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, umode->vdisplay << 16 | umode->hdisplay);
+		} else {
+			evo_mthd(push, 0x04c0 + (nv_crtc->index * 0x300), 3);
+			evo_data(push, (oY << 16) | oX);
+			evo_data(push, (oY << 16) | oX);
+			evo_data(push, (oY << 16) | oX);
+			evo_mthd(push, 0x0494 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x04b8 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, umode->vdisplay << 16 | umode->hdisplay);
+		}
+
+		evo_kick(push, mast);
+
+		if (update) {
+			nv50_display_flip_stop(crtc);
+			nv50_display_flip_next(crtc, crtc->fb, NULL, 1);
+		}
+	}
+
+	return 0;
+}
+
+static int
+nv50_crtc_set_color_vibrance(struct nouveau_crtc *nv_crtc, bool update)
+{
+	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	u32 *push, hue, vib;
+	int adj;
+
+	adj = (nv_crtc->color_vibrance > 0) ? 50 : 0;
+	vib = ((nv_crtc->color_vibrance * 2047 + adj) / 100) & 0xfff;
+	hue = ((nv_crtc->vibrant_hue * 2047) / 100) & 0xfff;
+
+	push = evo_wait(mast, 16);
+	if (push) {
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x08a8 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, (hue << 20) | (vib << 8));
+		} else {
+			evo_mthd(push, 0x0498 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, (hue << 20) | (vib << 8));
+		}
+
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, mast);
+	}
+
+	return 0;
+}
+
+static int
+nv50_crtc_set_image(struct nouveau_crtc *nv_crtc, struct drm_framebuffer *fb,
+		    int x, int y, bool update)
+{
+	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(fb);
+	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	u32 *push;
+
+	push = evo_wait(mast, 16);
+	if (push) {
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0860 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, nvfb->nvbo->bo.offset >> 8);
+			evo_mthd(push, 0x0868 + (nv_crtc->index * 0x400), 3);
+			evo_data(push, (fb->height << 16) | fb->width);
+			evo_data(push, nvfb->r_pitch);
+			evo_data(push, nvfb->r_format);
+			evo_mthd(push, 0x08c0 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, (y << 16) | x);
+			if (nv50_vers(mast) > NV50_DISP_MAST_CLASS) {
+				evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
+				evo_data(push, nvfb->r_dma);
+			}
+		} else {
+			evo_mthd(push, 0x0460 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, nvfb->nvbo->bo.offset >> 8);
+			evo_mthd(push, 0x0468 + (nv_crtc->index * 0x300), 4);
+			evo_data(push, (fb->height << 16) | fb->width);
+			evo_data(push, nvfb->r_pitch);
+			evo_data(push, nvfb->r_format);
+			evo_data(push, nvfb->r_dma);
+			evo_mthd(push, 0x04b0 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, (y << 16) | x);
+		}
+
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, mast);
+	}
+
+	nv_crtc->fb.tile_flags = nvfb->r_dma;
+	return 0;
 }
 
 static void
-nv50_display_unk10_handler(struct drm_device *dev)
+nv50_crtc_cursor_show(struct nouveau_crtc *nv_crtc)
 {
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_display *disp = nv50_display(dev);
-	u32 unk30 = nv_rd32(device, 0x610030), mc;
-	int i, crtc, or = 0, type = DCB_OUTPUT_ANY;
-
-	NV_DEBUG(drm, "0x610030: 0x%08x\n", unk30);
-	disp->irq.dcb = NULL;
-
-	nv_wr32(device, 0x619494, nv_rd32(device, 0x619494) & ~8);
-
-	/* Determine which CRTC we're dealing with, only 1 ever will be
-	 * signalled at the same time with the current nouveau code.
-	 */
-	crtc = ffs((unk30 & 0x00000060) >> 5) - 1;
-	if (crtc < 0)
-		goto ack;
-
-	/* Nothing needs to be done for the encoder */
-	crtc = ffs((unk30 & 0x00000180) >> 7) - 1;
-	if (crtc < 0)
-		goto ack;
-
-	/* Find which encoder was connected to the CRTC */
-	for (i = 0; type == DCB_OUTPUT_ANY && i < 3; i++) {
-		mc = nv_rd32(device, NV50_PDISPLAY_DAC_MODE_CTRL_C(i));
-		NV_DEBUG(drm, "DAC-%d mc: 0x%08x\n", i, mc);
-		if (!(mc & (1 << crtc)))
-			continue;
-
-		switch ((mc & 0x00000f00) >> 8) {
-		case 0: type = DCB_OUTPUT_ANALOG; break;
-		case 1: type = DCB_OUTPUT_TV; break;
-		default:
-			NV_ERROR(drm, "invalid mc, DAC-%d: 0x%08x\n", i, mc);
-			goto ack;
+	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	u32 *push = evo_wait(mast, 16);
+	if (push) {
+		if (nv50_vers(mast) < NV84_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 2);
+			evo_data(push, 0x85000000);
+			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
+		} else
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 2);
+			evo_data(push, 0x85000000);
+			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
+			evo_mthd(push, 0x089c + (nv_crtc->index * 0x400), 1);
+			evo_data(push, NvEvoVRAM);
+		} else {
+			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 2);
+			evo_data(push, 0x85000000);
+			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
+			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, NvEvoVRAM);
 		}
-
-		or = i;
+		evo_kick(push, mast);
 	}
-
-	for (i = 0; type == DCB_OUTPUT_ANY && i < nv50_sor_nr(dev); i++) {
-		if (nv_device(drm->device)->chipset  < 0x90 ||
-		    nv_device(drm->device)->chipset == 0x92 ||
-		    nv_device(drm->device)->chipset == 0xa0)
-			mc = nv_rd32(device, NV50_PDISPLAY_SOR_MODE_CTRL_C(i));
-		else
-			mc = nv_rd32(device, NV90_PDISPLAY_SOR_MODE_CTRL_C(i));
-
-		NV_DEBUG(drm, "SOR-%d mc: 0x%08x\n", i, mc);
-		if (!(mc & (1 << crtc)))
-			continue;
-
-		switch ((mc & 0x00000f00) >> 8) {
-		case 0: type = DCB_OUTPUT_LVDS; break;
-		case 1: type = DCB_OUTPUT_TMDS; break;
-		case 2: type = DCB_OUTPUT_TMDS; break;
-		case 5: type = DCB_OUTPUT_TMDS; break;
-		case 8: type = DCB_OUTPUT_DP; break;
-		case 9: type = DCB_OUTPUT_DP; break;
-		default:
-			NV_ERROR(drm, "invalid mc, SOR-%d: 0x%08x\n", i, mc);
-			goto ack;
-		}
-
-		or = i;
-	}
-
-	/* There was no encoder to disable */
-	if (type == DCB_OUTPUT_ANY)
-		goto ack;
-
-	/* Disable the encoder */
-	for (i = 0; i < drm->vbios.dcb.entries; i++) {
-		struct dcb_output *dcb = &drm->vbios.dcb.entry[i];
-
-		if (dcb->type == type && (dcb->or & (1 << or))) {
-			nouveau_bios_run_display_table(dev, 0, -1, dcb, -1);
-			disp->irq.dcb = dcb;
-			goto ack;
-		}
-	}
-
-	NV_ERROR(drm, "no dcb for %d %d 0x%08x\n", or, type, mc);
-ack:
-	nv_wr32(device, NV50_PDISPLAY_INTR_1, NV50_PDISPLAY_INTR_1_CLK_UNK10);
-	nv_wr32(device, 0x610030, 0x80000000);
 }
 
 static void
-nv50_display_unk20_handler(struct drm_device *dev)
+nv50_crtc_cursor_hide(struct nouveau_crtc *nv_crtc)
 {
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_display *disp = nv50_display(dev);
-	u32 unk30 = nv_rd32(device, 0x610030), tmp, pclk, script, mc = 0;
-	struct dcb_output *dcb;
-	int i, crtc, or = 0, type = DCB_OUTPUT_ANY;
-
-	NV_DEBUG(drm, "0x610030: 0x%08x\n", unk30);
-	dcb = disp->irq.dcb;
-	if (dcb) {
-		nouveau_bios_run_display_table(dev, 0, -2, dcb, -1);
-		disp->irq.dcb = NULL;
-	}
-
-	/* CRTC clock change requested? */
-	crtc = ffs((unk30 & 0x00000600) >> 9) - 1;
-	if (crtc >= 0) {
-		pclk  = nv_rd32(device, NV50_PDISPLAY_CRTC_P(crtc, CLOCK));
-		pclk &= 0x003fffff;
-		if (pclk)
-			nv50_crtc_set_clock(dev, crtc, pclk);
-
-		tmp = nv_rd32(device, NV50_PDISPLAY_CRTC_CLK_CTRL2(crtc));
-		tmp &= ~0x000000f;
-		nv_wr32(device, NV50_PDISPLAY_CRTC_CLK_CTRL2(crtc), tmp);
-	}
-
-	/* Nothing needs to be done for the encoder */
-	crtc = ffs((unk30 & 0x00000180) >> 7) - 1;
-	if (crtc < 0)
-		goto ack;
-	pclk  = nv_rd32(device, NV50_PDISPLAY_CRTC_P(crtc, CLOCK)) & 0x003fffff;
-
-	/* Find which encoder is connected to the CRTC */
-	for (i = 0; type == DCB_OUTPUT_ANY && i < 3; i++) {
-		mc = nv_rd32(device, NV50_PDISPLAY_DAC_MODE_CTRL_P(i));
-		NV_DEBUG(drm, "DAC-%d mc: 0x%08x\n", i, mc);
-		if (!(mc & (1 << crtc)))
-			continue;
-
-		switch ((mc & 0x00000f00) >> 8) {
-		case 0: type = DCB_OUTPUT_ANALOG; break;
-		case 1: type = DCB_OUTPUT_TV; break;
-		default:
-			NV_ERROR(drm, "invalid mc, DAC-%d: 0x%08x\n", i, mc);
-			goto ack;
+	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	u32 *push = evo_wait(mast, 16);
+	if (push) {
+		if (nv50_vers(mast) < NV84_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x05000000);
+		} else
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x05000000);
+			evo_mthd(push, 0x089c + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x00000000);
+		} else {
+			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x05000000);
+			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x00000000);
 		}
-
-		or = i;
+		evo_kick(push, mast);
 	}
-
-	for (i = 0; type == DCB_OUTPUT_ANY && i < nv50_sor_nr(dev); i++) {
-		if (nv_device(drm->device)->chipset  < 0x90 ||
-		    nv_device(drm->device)->chipset == 0x92 ||
-		    nv_device(drm->device)->chipset == 0xa0)
-			mc = nv_rd32(device, NV50_PDISPLAY_SOR_MODE_CTRL_P(i));
-		else
-			mc = nv_rd32(device, NV90_PDISPLAY_SOR_MODE_CTRL_P(i));
-
-		NV_DEBUG(drm, "SOR-%d mc: 0x%08x\n", i, mc);
-		if (!(mc & (1 << crtc)))
-			continue;
-
-		switch ((mc & 0x00000f00) >> 8) {
-		case 0: type = DCB_OUTPUT_LVDS; break;
-		case 1: type = DCB_OUTPUT_TMDS; break;
-		case 2: type = DCB_OUTPUT_TMDS; break;
-		case 5: type = DCB_OUTPUT_TMDS; break;
-		case 8: type = DCB_OUTPUT_DP; break;
-		case 9: type = DCB_OUTPUT_DP; break;
-		default:
-			NV_ERROR(drm, "invalid mc, SOR-%d: 0x%08x\n", i, mc);
-			goto ack;
-		}
-
-		or = i;
-	}
-
-	if (type == DCB_OUTPUT_ANY)
-		goto ack;
-
-	/* Enable the encoder */
-	for (i = 0; i < drm->vbios.dcb.entries; i++) {
-		dcb = &drm->vbios.dcb.entry[i];
-		if (dcb->type == type && (dcb->or & (1 << or)))
-			break;
-	}
-
-	if (i == drm->vbios.dcb.entries) {
-		NV_ERROR(drm, "no dcb for %d %d 0x%08x\n", or, type, mc);
-		goto ack;
-	}
-
-	script = nv50_display_script_select(dev, dcb, mc, pclk);
-	nouveau_bios_run_display_table(dev, script, pclk, dcb, -1);
-
-	if (type == DCB_OUTPUT_DP) {
-		int link = !(dcb->dpconf.sor.link & 1);
-		if ((mc & 0x000f0000) == 0x00020000)
-			nv50_sor_dp_calc_tu(dev, or, link, pclk, 18);
-		else
-			nv50_sor_dp_calc_tu(dev, or, link, pclk, 24);
-	}
-
-	if (dcb->type != DCB_OUTPUT_ANALOG) {
-		tmp = nv_rd32(device, NV50_PDISPLAY_SOR_CLK_CTRL2(or));
-		tmp &= ~0x00000f0f;
-		if (script & 0x0100)
-			tmp |= 0x00000101;
-		nv_wr32(device, NV50_PDISPLAY_SOR_CLK_CTRL2(or), tmp);
-	} else {
-		nv_wr32(device, NV50_PDISPLAY_DAC_CLK_CTRL2(or), 0);
-	}
-
-	disp->irq.dcb = dcb;
-	disp->irq.pclk = pclk;
-	disp->irq.script = script;
-
-ack:
-	nv_wr32(device, NV50_PDISPLAY_INTR_1, NV50_PDISPLAY_INTR_1_CLK_UNK20);
-	nv_wr32(device, 0x610030, 0x80000000);
 }
 
-/* If programming a TMDS output on a SOR that can also be configured for
- * DisplayPort, make sure NV50_SOR_DP_CTRL_ENABLE is forced off.
- *
- * It looks like the VBIOS TMDS scripts make an attempt at this, however,
- * the VBIOS scripts on at least one board I have only switch it off on
- * link 0, causing a blank display if the output has previously been
- * programmed for DisplayPort.
- */
 static void
-nv50_display_unk40_dp_set_tmds(struct drm_device *dev, struct dcb_output *dcb)
+nv50_crtc_cursor_show_hide(struct nouveau_crtc *nv_crtc, bool show, bool update)
 {
-	struct nouveau_device *device = nouveau_dev(dev);
-	int or = ffs(dcb->or) - 1, link = !(dcb->dpconf.sor.link & 1);
+	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+
+	if (show)
+		nv50_crtc_cursor_show(nv_crtc);
+	else
+		nv50_crtc_cursor_hide(nv_crtc);
+
+	if (update) {
+		u32 *push = evo_wait(mast, 2);
+		if (push) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+			evo_kick(push, mast);
+		}
+	}
+}
+
+static void
+nv50_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+}
+
+static void
+nv50_crtc_prepare(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nv50_mast *mast = nv50_mast(crtc->dev);
+	u32 *push;
+
+	nv50_display_flip_stop(crtc);
+
+	push = evo_wait(mast, 2);
+	if (push) {
+		if (nv50_vers(mast) < NV84_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x40000000);
+		} else
+		if (nv50_vers(mast) <  NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x40000000);
+			evo_mthd(push, 0x085c + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x00000000);
+		} else {
+			evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x03000000);
+			evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x00000000);
+		}
+
+		evo_kick(push, mast);
+	}
+
+	nv50_crtc_cursor_show_hide(nv_crtc, false, false);
+}
+
+static void
+nv50_crtc_commit(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nv50_mast *mast = nv50_mast(crtc->dev);
+	u32 *push;
+
+	push = evo_wait(mast, 32);
+	if (push) {
+		if (nv50_vers(mast) < NV84_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, NvEvoVRAM_LP);
+			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 2);
+			evo_data(push, 0xc0000000);
+			evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
+		} else
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
+			evo_data(push, nv_crtc->fb.tile_flags);
+			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 2);
+			evo_data(push, 0xc0000000);
+			evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
+			evo_mthd(push, 0x085c + (nv_crtc->index * 0x400), 1);
+			evo_data(push, NvEvoVRAM);
+		} else {
+			evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, nv_crtc->fb.tile_flags);
+			evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 4);
+			evo_data(push, 0x83000000);
+			evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
+			evo_data(push, 0x00000000);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, NvEvoVRAM);
+			evo_mthd(push, 0x0430 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0xffffff00);
+		}
+
+		evo_kick(push, mast);
+	}
+
+	nv50_crtc_cursor_show_hide(nv_crtc, nv_crtc->cursor.visible, true);
+	nv50_display_flip_next(crtc, crtc->fb, NULL, 1);
+}
+
+static bool
+nv50_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode,
+		     struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static int
+nv50_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
+{
+	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+	int ret;
+
+	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
+	if (ret)
+		return ret;
+
+	if (old_fb) {
+		nvfb = nouveau_framebuffer(old_fb);
+		nouveau_bo_unpin(nvfb->nvbo);
+	}
+
+	return 0;
+}
+
+static int
+nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
+		   struct drm_display_mode *mode, int x, int y,
+		   struct drm_framebuffer *old_fb)
+{
+	struct nv50_mast *mast = nv50_mast(crtc->dev);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nouveau_connector *nv_connector;
+	u32 ilace = (mode->flags & DRM_MODE_FLAG_INTERLACE) ? 2 : 1;
+	u32 vscan = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1;
+	u32 hactive, hsynce, hbackp, hfrontp, hblanke, hblanks;
+	u32 vactive, vsynce, vbackp, vfrontp, vblanke, vblanks;
+	u32 vblan2e = 0, vblan2s = 1;
+	u32 *push;
+	int ret;
+
+	hactive = mode->htotal;
+	hsynce  = mode->hsync_end - mode->hsync_start - 1;
+	hbackp  = mode->htotal - mode->hsync_end;
+	hblanke = hsynce + hbackp;
+	hfrontp = mode->hsync_start - mode->hdisplay;
+	hblanks = mode->htotal - hfrontp - 1;
+
+	vactive = mode->vtotal * vscan / ilace;
+	vsynce  = ((mode->vsync_end - mode->vsync_start) * vscan / ilace) - 1;
+	vbackp  = (mode->vtotal - mode->vsync_end) * vscan / ilace;
+	vblanke = vsynce + vbackp;
+	vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
+	vblanks = vactive - vfrontp - 1;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		vblan2e = vactive + vsynce + vbackp;
+		vblan2s = vblan2e + (mode->vdisplay * vscan / ilace);
+		vactive = (vactive * 2) + 1;
+	}
+
+	ret = nv50_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
+
+	push = evo_wait(mast, 64);
+	if (push) {
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			evo_mthd(push, 0x0804 + (nv_crtc->index * 0x400), 2);
+			evo_data(push, 0x00800000 | mode->clock);
+			evo_data(push, (ilace == 2) ? 2 : 0);
+			evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 6);
+			evo_data(push, 0x00000000);
+			evo_data(push, (vactive << 16) | hactive);
+			evo_data(push, ( vsynce << 16) | hsynce);
+			evo_data(push, (vblanke << 16) | hblanke);
+			evo_data(push, (vblanks << 16) | hblanks);
+			evo_data(push, (vblan2e << 16) | vblan2s);
+			evo_mthd(push, 0x082c + (nv_crtc->index * 0x400), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0900 + (nv_crtc->index * 0x400), 2);
+			evo_data(push, 0x00000311);
+			evo_data(push, 0x00000100);
+		} else {
+			evo_mthd(push, 0x0410 + (nv_crtc->index * 0x300), 6);
+			evo_data(push, 0x00000000);
+			evo_data(push, (vactive << 16) | hactive);
+			evo_data(push, ( vsynce << 16) | hsynce);
+			evo_data(push, (vblanke << 16) | hblanke);
+			evo_data(push, (vblanks << 16) | hblanks);
+			evo_data(push, (vblan2e << 16) | vblan2s);
+			evo_mthd(push, 0x042c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x00000000); /* ??? */
+			evo_mthd(push, 0x0450 + (nv_crtc->index * 0x300), 3);
+			evo_data(push, mode->clock * 1000);
+			evo_data(push, 0x00200000); /* ??? */
+			evo_data(push, mode->clock * 1000);
+			evo_mthd(push, 0x04d0 + (nv_crtc->index * 0x300), 2);
+			evo_data(push, 0x00000311);
+			evo_data(push, 0x00000100);
+		}
+
+		evo_kick(push, mast);
+	}
+
+	nv_connector = nouveau_crtc_connector_get(nv_crtc);
+	nv50_crtc_set_dither(nv_crtc, false);
+	nv50_crtc_set_scale(nv_crtc, false);
+	nv50_crtc_set_color_vibrance(nv_crtc, false);
+	nv50_crtc_set_image(nv_crtc, crtc->fb, x, y, false);
+	return 0;
+}
+
+static int
+nv50_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+			struct drm_framebuffer *old_fb)
+{
+	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	int ret;
+
+	if (!crtc->fb) {
+		NV_DEBUG(drm, "No FB bound\n");
+		return 0;
+	}
+
+	ret = nv50_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
+
+	nv50_display_flip_stop(crtc);
+	nv50_crtc_set_image(nv_crtc, crtc->fb, x, y, true);
+	nv50_display_flip_next(crtc, crtc->fb, NULL, 1);
+	return 0;
+}
+
+static int
+nv50_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
+			       struct drm_framebuffer *fb, int x, int y,
+			       enum mode_set_atomic state)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	nv50_display_flip_stop(crtc);
+	nv50_crtc_set_image(nv_crtc, fb, x, y, true);
+	return 0;
+}
+
+static void
+nv50_crtc_lut_load(struct drm_crtc *crtc)
+{
+	struct nv50_disp *disp = nv50_disp(crtc->dev);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	void __iomem *lut = nvbo_kmap_obj_iovirtual(nv_crtc->lut.nvbo);
+	int i;
+
+	for (i = 0; i < 256; i++) {
+		u16 r = nv_crtc->lut.r[i] >> 2;
+		u16 g = nv_crtc->lut.g[i] >> 2;
+		u16 b = nv_crtc->lut.b[i] >> 2;
+
+		if (nv_mclass(disp->core) < NVD0_DISP_CLASS) {
+			writew(r + 0x0000, lut + (i * 0x08) + 0);
+			writew(g + 0x0000, lut + (i * 0x08) + 2);
+			writew(b + 0x0000, lut + (i * 0x08) + 4);
+		} else {
+			writew(r + 0x6000, lut + (i * 0x20) + 0);
+			writew(g + 0x6000, lut + (i * 0x20) + 2);
+			writew(b + 0x6000, lut + (i * 0x20) + 4);
+		}
+	}
+}
+
+static int
+nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
+		     uint32_t handle, uint32_t width, uint32_t height)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	bool visible = (handle != 0);
+	int i, ret = 0;
+
+	if (visible) {
+		if (width != 64 || height != 64)
+			return -EINVAL;
+
+		gem = drm_gem_object_lookup(dev, file_priv, handle);
+		if (unlikely(!gem))
+			return -ENOENT;
+		nvbo = nouveau_gem_object(gem);
+
+		ret = nouveau_bo_map(nvbo);
+		if (ret == 0) {
+			for (i = 0; i < 64 * 64; i++) {
+				u32 v = nouveau_bo_rd32(nvbo, i);
+				nouveau_bo_wr32(nv_crtc->cursor.nvbo, i, v);
+			}
+			nouveau_bo_unmap(nvbo);
+		}
+
+		drm_gem_object_unreference_unlocked(gem);
+	}
+
+	if (visible != nv_crtc->cursor.visible) {
+		nv50_crtc_cursor_show_hide(nv_crtc, visible, true);
+		nv_crtc->cursor.visible = visible;
+	}
+
+	return ret;
+}
+
+static int
+nv50_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct nv50_curs *curs = nv50_curs(crtc);
+	struct nv50_chan *chan = nv50_chan(curs);
+	nv_wo32(chan->user, 0x0084, (y << 16) | (x & 0xffff));
+	nv_wo32(chan->user, 0x0080, 0x00000000);
+	return 0;
+}
+
+static void
+nv50_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+		    uint32_t start, uint32_t size)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	u32 end = max(start + size, (u32)256);
+	u32 i;
+
+	for (i = start; i < end; i++) {
+		nv_crtc->lut.r[i] = r[i];
+		nv_crtc->lut.g[i] = g[i];
+		nv_crtc->lut.b[i] = b[i];
+	}
+
+	nv50_crtc_lut_load(crtc);
+}
+
+static void
+nv50_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nv50_disp *disp = nv50_disp(crtc->dev);
+	struct nv50_head *head = nv50_head(crtc);
+	nv50_dmac_destroy(disp->core, &head->ovly.base);
+	nv50_pioc_destroy(disp->core, &head->oimm.base);
+	nv50_dmac_destroy(disp->core, &head->sync.base);
+	nv50_pioc_destroy(disp->core, &head->curs.base);
+	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+	if (nv_crtc->cursor.nvbo)
+		nouveau_bo_unpin(nv_crtc->cursor.nvbo);
+	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+	nouveau_bo_unmap(nv_crtc->lut.nvbo);
+	if (nv_crtc->lut.nvbo)
+		nouveau_bo_unpin(nv_crtc->lut.nvbo);
+	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+	drm_crtc_cleanup(crtc);
+	kfree(crtc);
+}
+
+static const struct drm_crtc_helper_funcs nv50_crtc_hfunc = {
+	.dpms = nv50_crtc_dpms,
+	.prepare = nv50_crtc_prepare,
+	.commit = nv50_crtc_commit,
+	.mode_fixup = nv50_crtc_mode_fixup,
+	.mode_set = nv50_crtc_mode_set,
+	.mode_set_base = nv50_crtc_mode_set_base,
+	.mode_set_base_atomic = nv50_crtc_mode_set_base_atomic,
+	.load_lut = nv50_crtc_lut_load,
+};
+
+static const struct drm_crtc_funcs nv50_crtc_func = {
+	.cursor_set = nv50_crtc_cursor_set,
+	.cursor_move = nv50_crtc_cursor_move,
+	.gamma_set = nv50_crtc_gamma_set,
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = nv50_crtc_destroy,
+	.page_flip = nouveau_crtc_page_flip,
+};
+
+static void
+nv50_cursor_set_pos(struct nouveau_crtc *nv_crtc, int x, int y)
+{
+}
+
+static void
+nv50_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
+{
+}
+
+static int
+nv50_crtc_create(struct drm_device *dev, struct nouveau_object *core, int index)
+{
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct nv50_head *head;
+	struct drm_crtc *crtc;
+	int ret, i;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (!head)
+		return -ENOMEM;
+
+	head->base.index = index;
+	head->base.set_dither = nv50_crtc_set_dither;
+	head->base.set_scale = nv50_crtc_set_scale;
+	head->base.set_color_vibrance = nv50_crtc_set_color_vibrance;
+	head->base.color_vibrance = 50;
+	head->base.vibrant_hue = 0;
+	head->base.cursor.set_offset = nv50_cursor_set_offset;
+	head->base.cursor.set_pos = nv50_cursor_set_pos;
+	for (i = 0; i < 256; i++) {
+		head->base.lut.r[i] = i << 8;
+		head->base.lut.g[i] = i << 8;
+		head->base.lut.b[i] = i << 8;
+	}
+
+	crtc = &head->base.base;
+	drm_crtc_init(dev, crtc, &nv50_crtc_func);
+	drm_crtc_helper_add(crtc, &nv50_crtc_hfunc);
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
+	ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, NULL, &head->base.lut.nvbo);
+	if (!ret) {
+		ret = nouveau_bo_pin(head->base.lut.nvbo, TTM_PL_FLAG_VRAM);
+		if (!ret) {
+			ret = nouveau_bo_map(head->base.lut.nvbo);
+			if (ret)
+				nouveau_bo_unpin(head->base.lut.nvbo);
+		}
+		if (ret)
+			nouveau_bo_ref(NULL, &head->base.lut.nvbo);
+	}
+
+	if (ret)
+		goto out;
+
+	nv50_crtc_lut_load(crtc);
+
+	/* allocate cursor resources */
+	ret = nv50_pioc_create(disp->core, NV50_DISP_CURS_CLASS, index,
+			      &(struct nv50_display_curs_class) {
+					.head = index,
+			      }, sizeof(struct nv50_display_curs_class),
+			      &head->curs.base);
+	if (ret)
+		goto out;
+
+	ret = nouveau_bo_new(dev, 64 * 64 * 4, 0x100, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, NULL, &head->base.cursor.nvbo);
+	if (!ret) {
+		ret = nouveau_bo_pin(head->base.cursor.nvbo, TTM_PL_FLAG_VRAM);
+		if (!ret) {
+			ret = nouveau_bo_map(head->base.cursor.nvbo);
+			if (ret)
+				nouveau_bo_unpin(head->base.lut.nvbo);
+		}
+		if (ret)
+			nouveau_bo_ref(NULL, &head->base.cursor.nvbo);
+	}
+
+	if (ret)
+		goto out;
+
+	/* allocate page flip / sync resources */
+	ret = nv50_dmac_create(disp->core, NV50_DISP_SYNC_CLASS, index,
+			      &(struct nv50_display_sync_class) {
+					.pushbuf = EVO_PUSH_HANDLE(SYNC, index),
+					.head = index,
+			      }, sizeof(struct nv50_display_sync_class),
+			      disp->sync->bo.offset, &head->sync.base);
+	if (ret)
+		goto out;
+
+	head->sync.sem.offset = EVO_SYNC(1 + index, 0x00);
+
+	/* allocate overlay resources */
+	ret = nv50_pioc_create(disp->core, NV50_DISP_OIMM_CLASS, index,
+			      &(struct nv50_display_oimm_class) {
+					.head = index,
+			      }, sizeof(struct nv50_display_oimm_class),
+			      &head->oimm.base);
+	if (ret)
+		goto out;
+
+	ret = nv50_dmac_create(disp->core, NV50_DISP_OVLY_CLASS, index,
+			      &(struct nv50_display_ovly_class) {
+					.pushbuf = EVO_PUSH_HANDLE(OVLY, index),
+					.head = index,
+			      }, sizeof(struct nv50_display_ovly_class),
+			      disp->sync->bo.offset, &head->ovly.base);
+	if (ret)
+		goto out;
+
+out:
+	if (ret)
+		nv50_crtc_destroy(crtc);
+	return ret;
+}
+
+/******************************************************************************
+ * DAC
+ *****************************************************************************/
+static void
+nv50_dac_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	int or = nv_encoder->or;
+	u32 dpms_ctrl;
+
+	dpms_ctrl = 0x00000000;
+	if (mode == DRM_MODE_DPMS_STANDBY || mode == DRM_MODE_DPMS_OFF)
+		dpms_ctrl |= 0x00000001;
+	if (mode == DRM_MODE_DPMS_SUSPEND || mode == DRM_MODE_DPMS_OFF)
+		dpms_ctrl |= 0x00000004;
+
+	nv_call(disp->core, NV50_DISP_DAC_PWR + or, dpms_ctrl);
+}
+
+static bool
+nv50_dac_mode_fixup(struct drm_encoder *encoder,
+		    const struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_connector *nv_connector;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (nv_connector && nv_connector->native_mode) {
+		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
+			int id = adjusted_mode->base.id;
+			*adjusted_mode = *nv_connector->native_mode;
+			adjusted_mode->base.id = id;
+		}
+	}
+
+	return true;
+}
+
+static void
+nv50_dac_commit(struct drm_encoder *encoder)
+{
+}
+
+static void
+nv50_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		  struct drm_display_mode *adjusted_mode)
+{
+	struct nv50_mast *mast = nv50_mast(encoder->dev);
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	u32 *push;
+
+	nv50_dac_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	push = evo_wait(mast, 8);
+	if (push) {
+		if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+			u32 syncs = 0x00000000;
+
+			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+				syncs |= 0x00000001;
+			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+				syncs |= 0x00000002;
+
+			evo_mthd(push, 0x0400 + (nv_encoder->or * 0x080), 2);
+			evo_data(push, 1 << nv_crtc->index);
+			evo_data(push, syncs);
+		} else {
+			u32 magic = 0x31ec6000 | (nv_crtc->index << 25);
+			u32 syncs = 0x00000001;
+
+			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+				syncs |= 0x00000008;
+			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+				syncs |= 0x00000010;
+
+			if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+				magic |= 0x00000001;
+
+			evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2);
+			evo_data(push, syncs);
+			evo_data(push, magic);
+			evo_mthd(push, 0x0180 + (nv_encoder->or * 0x020), 1);
+			evo_data(push, 1 << nv_crtc->index);
+		}
+
+		evo_kick(push, mast);
+	}
+
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static void
+nv50_dac_disconnect(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_mast *mast = nv50_mast(encoder->dev);
+	const int or = nv_encoder->or;
+	u32 *push;
+
+	if (nv_encoder->crtc) {
+		nv50_crtc_prepare(nv_encoder->crtc);
+
+		push = evo_wait(mast, 4);
+		if (push) {
+			if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+				evo_mthd(push, 0x0400 + (or * 0x080), 1);
+				evo_data(push, 0x00000000);
+			} else {
+				evo_mthd(push, 0x0180 + (or * 0x020), 1);
+				evo_data(push, 0x00000000);
+			}
+
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+			evo_kick(push, mast);
+		}
+	}
+
+	nv_encoder->crtc = NULL;
+}
+
+static enum drm_connector_status
+nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	int ret, or = nouveau_encoder(encoder)->or;
+	u32 load = 0;
+
+	ret = nv_exec(disp->core, NV50_DISP_DAC_LOAD + or, &load, sizeof(load));
+	if (ret || load != 7)
+		return connector_status_disconnected;
+
+	return connector_status_connected;
+}
+
+static void
+nv50_dac_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_helper_funcs nv50_dac_hfunc = {
+	.dpms = nv50_dac_dpms,
+	.mode_fixup = nv50_dac_mode_fixup,
+	.prepare = nv50_dac_disconnect,
+	.commit = nv50_dac_commit,
+	.mode_set = nv50_dac_mode_set,
+	.disable = nv50_dac_disconnect,
+	.get_crtc = nv50_display_crtc_get,
+	.detect = nv50_dac_detect
+};
+
+static const struct drm_encoder_funcs nv50_dac_func = {
+	.destroy = nv50_dac_destroy,
+};
+
+static int
+nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
-	u32 tmp;
 
-	if (dcb->type != DCB_OUTPUT_TMDS)
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->or = ffs(dcbe->or) - 1;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(dev, encoder, &nv50_dac_func, DRM_MODE_ENCODER_DAC);
+	drm_encoder_helper_add(encoder, &nv50_dac_hfunc);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * Audio
+ *****************************************************************************/
+static void
+nv50_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_connector *nv_connector;
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (!drm_detect_monitor_audio(nv_connector->edid))
 		return;
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	drm_edid_to_eld(&nv_connector->base, nv_connector->edid);
 
-		if (nv_encoder->dcb->type == DCB_OUTPUT_DP &&
-		    nv_encoder->dcb->or & (1 << or)) {
-			tmp  = nv_rd32(device, NV50_SOR_DP_CTRL(or, link));
-			tmp &= ~NV50_SOR_DP_CTRL_ENABLED;
-			nv_wr32(device, NV50_SOR_DP_CTRL(or, link), tmp);
+	nv_exec(disp->core, NVA3_DISP_SOR_HDA_ELD + nv_encoder->or,
+			    nv_connector->base.eld,
+			    nv_connector->base.eld[2] * 4);
+}
+
+static void
+nv50_audio_disconnect(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+
+	nv_exec(disp->core, NVA3_DISP_SOR_HDA_ELD + nv_encoder->or, NULL, 0);
+}
+
+/******************************************************************************
+ * HDMI
+ *****************************************************************************/
+static void
+nv50_hdmi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	const u32 moff = (nv_crtc->index << 3) | nv_encoder->or;
+	u32 rekey = 56; /* binary driver, and tegra constant */
+	u32 max_ac_packet;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (!drm_detect_hdmi_monitor(nv_connector->edid))
+		return;
+
+	max_ac_packet  = mode->htotal - mode->hdisplay;
+	max_ac_packet -= rekey;
+	max_ac_packet -= 18; /* constant from tegra */
+	max_ac_packet /= 32;
+
+	nv_call(disp->core, NV84_DISP_SOR_HDMI_PWR + moff,
+			    NV84_DISP_SOR_HDMI_PWR_STATE_ON |
+			    (max_ac_packet << 16) | rekey);
+
+	nv50_audio_mode_set(encoder, mode);
+}
+
+static void
+nv50_hdmi_disconnect(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	const u32 moff = (nv_crtc->index << 3) | nv_encoder->or;
+
+	nv50_audio_disconnect(encoder);
+
+	nv_call(disp->core, NV84_DISP_SOR_HDMI_PWR + moff, 0x00000000);
+}
+
+/******************************************************************************
+ * SOR
+ *****************************************************************************/
+static void
+nv50_sor_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct drm_encoder *partner;
+	int or = nv_encoder->or;
+
+	nv_encoder->last_dpms = mode;
+
+	list_for_each_entry(partner, &dev->mode_config.encoder_list, head) {
+		struct nouveau_encoder *nv_partner = nouveau_encoder(partner);
+
+		if (partner->encoder_type != DRM_MODE_ENCODER_TMDS)
+			continue;
+
+		if (nv_partner != nv_encoder &&
+		    nv_partner->dcb->or == nv_encoder->dcb->or) {
+			if (nv_partner->last_dpms == DRM_MODE_DPMS_ON)
+				return;
 			break;
 		}
 	}
+
+	nv_call(disp->core, NV50_DISP_SOR_PWR + or, (mode == DRM_MODE_DPMS_ON));
+
+	if (nv_encoder->dcb->type == DCB_OUTPUT_DP)
+		nouveau_dp_dpms(encoder, mode, nv_encoder->dp.datarate, disp->core);
 }
 
-static void
-nv50_display_unk40_handler(struct drm_device *dev)
+static bool
+nv50_sor_mode_fixup(struct drm_encoder *encoder,
+		    const struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode)
 {
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_display *disp = nv50_display(dev);
-	struct dcb_output *dcb = disp->irq.dcb;
-	u16 script = disp->irq.script;
-	u32 unk30 = nv_rd32(device, 0x610030), pclk = disp->irq.pclk;
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_connector *nv_connector;
 
-	NV_DEBUG(drm, "0x610030: 0x%08x\n", unk30);
-	disp->irq.dcb = NULL;
-	if (!dcb)
-		goto ack;
-
-	nouveau_bios_run_display_table(dev, script, -pclk, dcb, -1);
-	nv50_display_unk40_dp_set_tmds(dev, dcb);
-
-ack:
-	nv_wr32(device, NV50_PDISPLAY_INTR_1, NV50_PDISPLAY_INTR_1_CLK_UNK40);
-	nv_wr32(device, 0x610030, 0x80000000);
-	nv_wr32(device, 0x619494, nv_rd32(device, 0x619494) | 8);
-}
-
-static void
-nv50_display_bh(unsigned long data)
-{
-	struct drm_device *dev = (struct drm_device *)data;
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-
-	for (;;) {
-		uint32_t intr0 = nv_rd32(device, NV50_PDISPLAY_INTR_0);
-		uint32_t intr1 = nv_rd32(device, NV50_PDISPLAY_INTR_1);
-
-		NV_DEBUG(drm, "PDISPLAY_INTR_BH 0x%08x 0x%08x\n", intr0, intr1);
-
-		if (intr1 & NV50_PDISPLAY_INTR_1_CLK_UNK10)
-			nv50_display_unk10_handler(dev);
-		else
-		if (intr1 & NV50_PDISPLAY_INTR_1_CLK_UNK20)
-			nv50_display_unk20_handler(dev);
-		else
-		if (intr1 & NV50_PDISPLAY_INTR_1_CLK_UNK40)
-			nv50_display_unk40_handler(dev);
-		else
-			break;
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (nv_connector && nv_connector->native_mode) {
+		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
+			int id = adjusted_mode->base.id;
+			*adjusted_mode = *nv_connector->native_mode;
+			adjusted_mode->base.id = id;
+		}
 	}
 
-	nv_wr32(device, NV03_PMC_INTR_EN_0, 1);
+	return true;
 }
 
 static void
-nv50_display_error_handler(struct drm_device *dev)
+nv50_sor_disconnect(struct drm_encoder *encoder)
 {
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	u32 channels = (nv_rd32(device, NV50_PDISPLAY_INTR_0) & 0x001f0000) >> 16;
-	u32 addr, data;
-	int chid;
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_mast *mast = nv50_mast(encoder->dev);
+	const int or = nv_encoder->or;
+	u32 *push;
 
-	for (chid = 0; chid < 5; chid++) {
-		if (!(channels & (1 << chid)))
-			continue;
+	if (nv_encoder->crtc) {
+		nv50_crtc_prepare(nv_encoder->crtc);
 
-		nv_wr32(device, NV50_PDISPLAY_INTR_0, 0x00010000 << chid);
-		addr = nv_rd32(device, NV50_PDISPLAY_TRAPPED_ADDR(chid));
-		data = nv_rd32(device, NV50_PDISPLAY_TRAPPED_DATA(chid));
-		NV_ERROR(drm, "EvoCh %d Mthd 0x%04x Data 0x%08x "
-			      "(0x%04x 0x%02x)\n", chid,
-			 addr & 0xffc, data, addr >> 16, (addr >> 12) & 0xf);
+		push = evo_wait(mast, 4);
+		if (push) {
+			if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) {
+				evo_mthd(push, 0x0600 + (or * 0x40), 1);
+				evo_data(push, 0x00000000);
+			} else {
+				evo_mthd(push, 0x0200 + (or * 0x20), 1);
+				evo_data(push, 0x00000000);
+			}
 
-		nv_wr32(device, NV50_PDISPLAY_TRAPPED_ADDR(chid), 0x90000000);
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+			evo_kick(push, mast);
+		}
+
+		nv50_hdmi_disconnect(encoder);
 	}
+
+	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
+	nv_encoder->crtc = NULL;
+}
+
+static void
+nv50_sor_prepare(struct drm_encoder *encoder)
+{
+	nv50_sor_disconnect(encoder);
+	if (nouveau_encoder(encoder)->dcb->type == DCB_OUTPUT_DP)
+		evo_sync(encoder->dev);
+}
+
+static void
+nv50_sor_commit(struct drm_encoder *encoder)
+{
+}
+
+static void
+nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
+		  struct drm_display_mode *mode)
+{
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct nv50_mast *mast = nv50_mast(encoder->dev);
+	struct drm_device *dev = encoder->dev;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
+	struct nvbios *bios = &drm->vbios;
+	u32 *push, lvds = 0;
+	u8 owner = 1 << nv_crtc->index;
+	u8 proto = 0xf;
+	u8 depth = 0x0;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	switch (nv_encoder->dcb->type) {
+	case DCB_OUTPUT_TMDS:
+		if (nv_encoder->dcb->sorconf.link & 1) {
+			if (mode->clock < 165000)
+				proto = 0x1;
+			else
+				proto = 0x5;
+		} else {
+			proto = 0x2;
+		}
+
+		nv50_hdmi_mode_set(encoder, mode);
+		break;
+	case DCB_OUTPUT_LVDS:
+		proto = 0x0;
+
+		if (bios->fp_no_ddc) {
+			if (bios->fp.dual_link)
+				lvds |= 0x0100;
+			if (bios->fp.if_is_24bit)
+				lvds |= 0x0200;
+		} else {
+			if (nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) {
+				if (((u8 *)nv_connector->edid)[121] == 2)
+					lvds |= 0x0100;
+			} else
+			if (mode->clock >= bios->fp.duallink_transition_clk) {
+				lvds |= 0x0100;
+			}
+
+			if (lvds & 0x0100) {
+				if (bios->fp.strapless_is_24bit & 2)
+					lvds |= 0x0200;
+			} else {
+				if (bios->fp.strapless_is_24bit & 1)
+					lvds |= 0x0200;
+			}
+
+			if (nv_connector->base.display_info.bpc == 8)
+				lvds |= 0x0200;
+		}
+
+		nv_call(disp->core, NV50_DISP_SOR_LVDS_SCRIPT + nv_encoder->or, lvds);
+		break;
+	case DCB_OUTPUT_DP:
+		if (nv_connector->base.display_info.bpc == 6) {
+			nv_encoder->dp.datarate = mode->clock * 18 / 8;
+			depth = 0x2;
+		} else
+		if (nv_connector->base.display_info.bpc == 8) {
+			nv_encoder->dp.datarate = mode->clock * 24 / 8;
+			depth = 0x5;
+		} else {
+			nv_encoder->dp.datarate = mode->clock * 30 / 8;
+			depth = 0x6;
+		}
+
+		if (nv_encoder->dcb->sorconf.link & 1)
+			proto = 0x8;
+		else
+			proto = 0x9;
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	push = evo_wait(nv50_mast(dev), 8);
+	if (push) {
+		if (nv50_vers(mast) < NVD0_DISP_CLASS) {
+			evo_mthd(push, 0x0600 + (nv_encoder->or * 0x040), 1);
+			evo_data(push, (depth << 16) | (proto << 8) | owner);
+		} else {
+			u32 magic = 0x31ec6000 | (nv_crtc->index << 25);
+			u32 syncs = 0x00000001;
+
+			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+				syncs |= 0x00000008;
+			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+				syncs |= 0x00000010;
+
+			if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+				magic |= 0x00000001;
+
+			evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2);
+			evo_data(push, syncs | (depth << 6));
+			evo_data(push, magic);
+			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x020), 1);
+			evo_data(push, owner | (proto << 8));
+		}
+
+		evo_kick(push, mast);
+	}
+
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static void
+nv50_sor_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_helper_funcs nv50_sor_hfunc = {
+	.dpms = nv50_sor_dpms,
+	.mode_fixup = nv50_sor_mode_fixup,
+	.prepare = nv50_sor_prepare,
+	.commit = nv50_sor_commit,
+	.mode_set = nv50_sor_mode_set,
+	.disable = nv50_sor_disconnect,
+	.get_crtc = nv50_display_crtc_get,
+};
+
+static const struct drm_encoder_funcs nv50_sor_func = {
+	.destroy = nv50_sor_destroy,
+};
+
+static int
+nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->or = ffs(dcbe->or) - 1;
+	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(dev, encoder, &nv50_sor_func, DRM_MODE_ENCODER_TMDS);
+	drm_encoder_helper_add(encoder, &nv50_sor_hfunc);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * Init
+ *****************************************************************************/
+void
+nv50_display_fini(struct drm_device *dev)
+{
+}
+
+int
+nv50_display_init(struct drm_device *dev)
+{
+	u32 *push = evo_wait(nv50_mast(dev), 32);
+	if (push) {
+		evo_mthd(push, 0x0088, 1);
+		evo_data(push, NvEvoSync);
+		evo_kick(push, nv50_mast(dev));
+		return evo_sync(dev);
+	}
+
+	return -EBUSY;
 }
 
 void
-nv50_display_intr(struct drm_device *dev)
+nv50_display_destroy(struct drm_device *dev)
 {
+	struct nv50_disp *disp = nv50_disp(dev);
+
+	nv50_dmac_destroy(disp->core, &disp->mast.base);
+
+	nouveau_bo_unmap(disp->sync);
+	if (disp->sync)
+		nouveau_bo_unpin(disp->sync);
+	nouveau_bo_ref(NULL, &disp->sync);
+
+	nouveau_display(dev)->priv = NULL;
+	kfree(disp);
+}
+
+int
+nv50_display_create(struct drm_device *dev)
+{
+	static const u16 oclass[] = {
+		NVE0_DISP_CLASS,
+		NVD0_DISP_CLASS,
+		NVA3_DISP_CLASS,
+		NV94_DISP_CLASS,
+		NVA0_DISP_CLASS,
+		NV84_DISP_CLASS,
+		NV50_DISP_CLASS,
+	};
 	struct nouveau_device *device = nouveau_dev(dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_display *disp = nv50_display(dev);
-	uint32_t delayed = 0;
+	struct dcb_table *dcb = &drm->vbios.dcb;
+	struct drm_connector *connector, *tmp;
+	struct nv50_disp *disp;
+	struct dcb_output *dcbe;
+	int crtcs, ret, i;
 
-	while (nv_rd32(device, NV50_PMC_INTR_0) & NV50_PMC_INTR_0_DISPLAY) {
-		uint32_t intr0 = nv_rd32(device, NV50_PDISPLAY_INTR_0);
-		uint32_t intr1 = nv_rd32(device, NV50_PDISPLAY_INTR_1);
-		uint32_t clock;
+	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
+	if (!disp)
+		return -ENOMEM;
 
-		NV_DEBUG(drm, "PDISPLAY_INTR 0x%08x 0x%08x\n", intr0, intr1);
+	nouveau_display(dev)->priv = disp;
+	nouveau_display(dev)->dtor = nv50_display_destroy;
+	nouveau_display(dev)->init = nv50_display_init;
+	nouveau_display(dev)->fini = nv50_display_fini;
 
-		if (!intr0 && !(intr1 & ~delayed))
+	/* small shared memory area we use for notifiers and semaphores */
+	ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, NULL, &disp->sync);
+	if (!ret) {
+		ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM);
+		if (!ret) {
+			ret = nouveau_bo_map(disp->sync);
+			if (ret)
+				nouveau_bo_unpin(disp->sync);
+		}
+		if (ret)
+			nouveau_bo_ref(NULL, &disp->sync);
+	}
+
+	if (ret)
+		goto out;
+
+	/* attempt to allocate a supported evo display class */
+	ret = -ENODEV;
+	for (i = 0; ret && i < ARRAY_SIZE(oclass); i++) {
+		ret = nouveau_object_new(nv_object(drm), NVDRM_DEVICE,
+					 0xd1500000, oclass[i], NULL, 0,
+					 &disp->core);
+	}
+
+	if (ret)
+		goto out;
+
+	/* allocate master evo channel */
+	ret = nv50_dmac_create(disp->core, NV50_DISP_MAST_CLASS, 0,
+			      &(struct nv50_display_mast_class) {
+					.pushbuf = EVO_PUSH_HANDLE(MAST, 0),
+			      }, sizeof(struct nv50_display_mast_class),
+			      disp->sync->bo.offset, &disp->mast.base);
+	if (ret)
+		goto out;
+
+	/* create crtc objects to represent the hw heads */
+	if (nv_mclass(disp->core) >= NVD0_DISP_CLASS)
+		crtcs = nv_rd32(device, 0x022448);
+	else
+		crtcs = 2;
+
+	for (i = 0; i < crtcs; i++) {
+		ret = nv50_crtc_create(dev, disp->core, i);
+		if (ret)
+			goto out;
+	}
+
+	/* create encoder/connector objects based on VBIOS DCB table */
+	for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) {
+		connector = nouveau_connector_create(dev, dcbe->connector);
+		if (IS_ERR(connector))
+			continue;
+
+		if (dcbe->location != DCB_LOC_ON_CHIP) {
+			NV_WARN(drm, "skipping off-chip encoder %d/%d\n",
+				dcbe->type, ffs(dcbe->or) - 1);
+			continue;
+		}
+
+		switch (dcbe->type) {
+		case DCB_OUTPUT_TMDS:
+		case DCB_OUTPUT_LVDS:
+		case DCB_OUTPUT_DP:
+			nv50_sor_create(connector, dcbe);
 			break;
-
-		if (intr0 & 0x001f0000) {
-			nv50_display_error_handler(dev);
-			intr0 &= ~0x001f0000;
-		}
-
-		if (intr1 & NV50_PDISPLAY_INTR_1_VBLANK_CRTC) {
-			intr1 &= ~NV50_PDISPLAY_INTR_1_VBLANK_CRTC;
-			delayed |= NV50_PDISPLAY_INTR_1_VBLANK_CRTC;
-		}
-
-		clock = (intr1 & (NV50_PDISPLAY_INTR_1_CLK_UNK10 |
-				  NV50_PDISPLAY_INTR_1_CLK_UNK20 |
-				  NV50_PDISPLAY_INTR_1_CLK_UNK40));
-		if (clock) {
-			nv_wr32(device, NV03_PMC_INTR_EN_0, 0);
-			tasklet_schedule(&disp->tasklet);
-			delayed |= clock;
-			intr1 &= ~clock;
-		}
-
-		if (intr0) {
-			NV_ERROR(drm, "unknown PDISPLAY_INTR_0: 0x%08x\n", intr0);
-			nv_wr32(device, NV50_PDISPLAY_INTR_0, intr0);
-		}
-
-		if (intr1) {
-			NV_ERROR(drm,
-				 "unknown PDISPLAY_INTR_1: 0x%08x\n", intr1);
-			nv_wr32(device, NV50_PDISPLAY_INTR_1, intr1);
+		case DCB_OUTPUT_ANALOG:
+			nv50_dac_create(connector, dcbe);
+			break;
+		default:
+			NV_WARN(drm, "skipping unsupported encoder %d/%d\n",
+				dcbe->type, ffs(dcbe->or) - 1);
+			continue;
 		}
 	}
+
+	/* cull any connectors we created that don't have an encoder */
+	list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) {
+		if (connector->encoder_ids[0])
+			continue;
+
+		NV_WARN(drm, "%s has no encoders, removing\n",
+			drm_get_connector_name(connector));
+		connector->funcs->destroy(connector);
+	}
+
+out:
+	if (ret)
+		nv50_display_destroy(dev);
+	return ret;
 }

diff --git a/drivers/gpu/drm/nouveau/nv50_display.h b/drivers/gpu/drm/nouveau/nv50_display.h
index 973554d..70da347 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.h
+++ b/drivers/gpu/drm/nouveau/nv50_display.h

@@ -30,77 +30,16 @@
 #include "nouveau_display.h"
 #include "nouveau_crtc.h"
 #include "nouveau_reg.h"
-#include "nv50_evo.h"
 
-struct nv50_display_crtc {
-	struct nouveau_channel *sync;
-	struct {
-		struct nouveau_bo *bo;
-		u32 offset;
-		u16 value;
-	} sem;
-};
+int  nv50_display_create(struct drm_device *);
+void nv50_display_destroy(struct drm_device *);
+int  nv50_display_init(struct drm_device *);
+void nv50_display_fini(struct drm_device *);
 
-struct nv50_display {
-	struct nouveau_channel *master;
-
-	struct nouveau_gpuobj *ramin;
-	u32 dmao;
-	u32 hash;
-
-	struct nv50_display_crtc crtc[2];
-
-	struct tasklet_struct tasklet;
-	struct {
-		struct dcb_output *dcb;
-		u16 script;
-		u32 pclk;
-	} irq;
-};
-
-static inline struct nv50_display *
-nv50_display(struct drm_device *dev)
-{
-	return nouveau_display(dev)->priv;
-}
-
-int nv50_display_early_init(struct drm_device *dev);
-void nv50_display_late_takedown(struct drm_device *dev);
-int nv50_display_create(struct drm_device *dev);
-int nv50_display_init(struct drm_device *dev);
-void nv50_display_fini(struct drm_device *dev);
-void nv50_display_destroy(struct drm_device *dev);
-void nv50_display_intr(struct drm_device *);
-int nv50_crtc_blank(struct nouveau_crtc *, bool blank);
-int nv50_crtc_set_clock(struct drm_device *, int head, int pclk);
-
-u32  nv50_display_active_crtcs(struct drm_device *);
-
-int  nv50_display_sync(struct drm_device *);
-int  nv50_display_flip_next(struct drm_crtc *, struct drm_framebuffer *,
-			    struct nouveau_channel *chan);
 void nv50_display_flip_stop(struct drm_crtc *);
-
-int  nv50_evo_create(struct drm_device *dev);
-void nv50_evo_destroy(struct drm_device *dev);
-int  nv50_evo_init(struct drm_device *dev);
-void nv50_evo_fini(struct drm_device *dev);
-void nv50_evo_dmaobj_init(struct nouveau_gpuobj *, u32 memtype, u64 base,
-			  u64 size);
-int  nv50_evo_dmaobj_new(struct nouveau_channel *, u32 handle, u32 memtype,
-			 u64 base, u64 size, struct nouveau_gpuobj **);
-
-int  nvd0_display_create(struct drm_device *);
-void nvd0_display_destroy(struct drm_device *);
-int  nvd0_display_init(struct drm_device *);
-void nvd0_display_fini(struct drm_device *);
-void nvd0_display_intr(struct drm_device *);
-
-void nvd0_display_flip_stop(struct drm_crtc *);
-int  nvd0_display_flip_next(struct drm_crtc *, struct drm_framebuffer *,
+int  nv50_display_flip_next(struct drm_crtc *, struct drm_framebuffer *,
 			    struct nouveau_channel *, u32 swap_interval);
 
 struct nouveau_bo *nv50_display_crtc_sema(struct drm_device *, int head);
-struct nouveau_bo *nvd0_display_crtc_sema(struct drm_device *, int head);
 
 #endif /* __NV50_DISPLAY_H__ */

diff --git a/drivers/gpu/drm/nouveau/nv50_evo.c b/drivers/gpu/drm/nouveau/nv50_evo.c
deleted file mode 100644
index 9f6f55c..0000000
--- a/drivers/gpu/drm/nouveau/nv50_evo.c
+++ /dev/null

@@ -1,403 +0,0 @@
-/*
- * Copyright 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-
-#include <drm/drmP.h>
-
-#include "nouveau_drm.h"
-#include "nouveau_dma.h"
-#include "nv50_display.h"
-
-#include <core/gpuobj.h>
-
-#include <subdev/timer.h>
-#include <subdev/fb.h>
-
-static u32
-nv50_evo_rd32(struct nouveau_object *object, u32 addr)
-{
-	void __iomem *iomem = object->oclass->ofuncs->rd08;
-	return ioread32_native(iomem + addr);
-}
-
-static void
-nv50_evo_wr32(struct nouveau_object *object, u32 addr, u32 data)
-{
-	void __iomem *iomem = object->oclass->ofuncs->rd08;
-	iowrite32_native(data, iomem + addr);
-}
-
-static void
-nv50_evo_channel_del(struct nouveau_channel **pevo)
-{
-	struct nouveau_channel *evo = *pevo;
-
-	if (!evo)
-		return;
-	*pevo = NULL;
-
-	nouveau_bo_unmap(evo->push.buffer);
-	nouveau_bo_ref(NULL, &evo->push.buffer);
-
-	if (evo->object)
-		iounmap(evo->object->oclass->ofuncs);
-
-	kfree(evo);
-}
-
-int
-nv50_evo_dmaobj_new(struct nouveau_channel *evo, u32 handle, u32 memtype,
-		    u64 base, u64 size, struct nouveau_gpuobj **pobj)
-{
-	struct drm_device *dev = evo->fence;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_display *disp = nv50_display(dev);
-	u32 dmao = disp->dmao;
-	u32 hash = disp->hash;
-	u32 flags5;
-
-	if (nv_device(drm->device)->chipset < 0xc0) {
-		/* not supported on 0x50, specified in format mthd */
-		if (nv_device(drm->device)->chipset == 0x50)
-			memtype = 0;
-		flags5 = 0x00010000;
-	} else {
-		if (memtype & 0x80000000)
-			flags5 = 0x00000000; /* large pages */
-		else
-			flags5 = 0x00020000;
-	}
-
-	nv_wo32(disp->ramin, dmao + 0x00, 0x0019003d | (memtype << 22));
-	nv_wo32(disp->ramin, dmao + 0x04, lower_32_bits(base + size - 1));
-	nv_wo32(disp->ramin, dmao + 0x08, lower_32_bits(base));
-	nv_wo32(disp->ramin, dmao + 0x0c, upper_32_bits(base + size - 1) << 24 |
-					  upper_32_bits(base));
-	nv_wo32(disp->ramin, dmao + 0x10, 0x00000000);
-	nv_wo32(disp->ramin, dmao + 0x14, flags5);
-
-	nv_wo32(disp->ramin, hash + 0x00, handle);
-	nv_wo32(disp->ramin, hash + 0x04, (evo->handle << 28) | (dmao << 10) |
-					   evo->handle);
-
-	disp->dmao += 0x20;
-	disp->hash += 0x08;
-	return 0;
-}
-
-static int
-nv50_evo_channel_new(struct drm_device *dev, int chid,
-		     struct nouveau_channel **pevo)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_display *disp = nv50_display(dev);
-	struct nouveau_channel *evo;
-	int ret;
-
-	evo = kzalloc(sizeof(struct nouveau_channel), GFP_KERNEL);
-	if (!evo)
-		return -ENOMEM;
-	*pevo = evo;
-
-	evo->drm = drm;
-	evo->handle = chid;
-	evo->fence = dev;
-	evo->user_get = 4;
-	evo->user_put = 0;
-
-	ret = nouveau_bo_new(dev, 4096, 0, TTM_PL_FLAG_VRAM, 0, 0, NULL,
-			     &evo->push.buffer);
-	if (ret == 0)
-		ret = nouveau_bo_pin(evo->push.buffer, TTM_PL_FLAG_VRAM);
-	if (ret) {
-		NV_ERROR(drm, "Error creating EVO DMA push buffer: %d\n", ret);
-		nv50_evo_channel_del(pevo);
-		return ret;
-	}
-
-	ret = nouveau_bo_map(evo->push.buffer);
-	if (ret) {
-		NV_ERROR(drm, "Error mapping EVO DMA push buffer: %d\n", ret);
-		nv50_evo_channel_del(pevo);
-		return ret;
-	}
-
-	evo->object = kzalloc(sizeof(*evo->object), GFP_KERNEL);
-#ifdef NOUVEAU_OBJECT_MAGIC
-	evo->object->_magic = NOUVEAU_OBJECT_MAGIC;
-#endif
-	evo->object->parent = nv_object(disp->ramin)->parent;
-	evo->object->engine = nv_object(disp->ramin)->engine;
-	evo->object->oclass =
-		kzalloc(sizeof(*evo->object->oclass), GFP_KERNEL);
-	evo->object->oclass->ofuncs =
-		kzalloc(sizeof(*evo->object->oclass->ofuncs), GFP_KERNEL);
-	evo->object->oclass->ofuncs->rd32 = nv50_evo_rd32;
-	evo->object->oclass->ofuncs->wr32 = nv50_evo_wr32;
-	evo->object->oclass->ofuncs->rd08 =
-		ioremap(pci_resource_start(dev->pdev, 0) +
-			NV50_PDISPLAY_USER(evo->handle), PAGE_SIZE);
-	return 0;
-}
-
-static int
-nv50_evo_channel_init(struct nouveau_channel *evo)
-{
-	struct nouveau_drm *drm = evo->drm;
-	struct nouveau_device *device = nv_device(drm->device);
-	int id = evo->handle, ret, i;
-	u64 pushbuf = evo->push.buffer->bo.offset;
-	u32 tmp;
-
-	tmp = nv_rd32(device, NV50_PDISPLAY_EVO_CTRL(id));
-	if ((tmp & 0x009f0000) == 0x00020000)
-		nv_wr32(device, NV50_PDISPLAY_EVO_CTRL(id), tmp | 0x00800000);
-
-	tmp = nv_rd32(device, NV50_PDISPLAY_EVO_CTRL(id));
-	if ((tmp & 0x003f0000) == 0x00030000)
-		nv_wr32(device, NV50_PDISPLAY_EVO_CTRL(id), tmp | 0x00600000);
-
-	/* initialise fifo */
-	nv_wr32(device, NV50_PDISPLAY_EVO_DMA_CB(id), pushbuf >> 8 |
-		     NV50_PDISPLAY_EVO_DMA_CB_LOCATION_VRAM |
-		     NV50_PDISPLAY_EVO_DMA_CB_VALID);
-	nv_wr32(device, NV50_PDISPLAY_EVO_UNK2(id), 0x00010000);
-	nv_wr32(device, NV50_PDISPLAY_EVO_HASH_TAG(id), id);
-	nv_mask(device, NV50_PDISPLAY_EVO_CTRL(id), NV50_PDISPLAY_EVO_CTRL_DMA,
-		     NV50_PDISPLAY_EVO_CTRL_DMA_ENABLED);
-
-	nv_wr32(device, NV50_PDISPLAY_USER_PUT(id), 0x00000000);
-	nv_wr32(device, NV50_PDISPLAY_EVO_CTRL(id), 0x01000003 |
-		     NV50_PDISPLAY_EVO_CTRL_DMA_ENABLED);
-	if (!nv_wait(device, NV50_PDISPLAY_EVO_CTRL(id), 0x80000000, 0x00000000)) {
-		NV_ERROR(drm, "EvoCh %d init timeout: 0x%08x\n", id,
-			 nv_rd32(device, NV50_PDISPLAY_EVO_CTRL(id)));
-		return -EBUSY;
-	}
-
-	/* enable error reporting on the channel */
-	nv_mask(device, 0x610028, 0x00000000, 0x00010001 << id);
-
-	evo->dma.max = (4096/4) - 2;
-	evo->dma.max &= ~7;
-	evo->dma.put = 0;
-	evo->dma.cur = evo->dma.put;
-	evo->dma.free = evo->dma.max - evo->dma.cur;
-
-	ret = RING_SPACE(evo, NOUVEAU_DMA_SKIPS);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < NOUVEAU_DMA_SKIPS; i++)
-		OUT_RING(evo, 0);
-
-	return 0;
-}
-
-static void
-nv50_evo_channel_fini(struct nouveau_channel *evo)
-{
-	struct nouveau_drm *drm = evo->drm;
-	struct nouveau_device *device = nv_device(drm->device);
-	int id = evo->handle;
-
-	nv_mask(device, 0x610028, 0x00010001 << id, 0x00000000);
-	nv_mask(device, NV50_PDISPLAY_EVO_CTRL(id), 0x00001010, 0x00001000);
-	nv_wr32(device, NV50_PDISPLAY_INTR_0, (1 << id));
-	nv_mask(device, NV50_PDISPLAY_EVO_CTRL(id), 0x00000003, 0x00000000);
-	if (!nv_wait(device, NV50_PDISPLAY_EVO_CTRL(id), 0x001e0000, 0x00000000)) {
-		NV_ERROR(drm, "EvoCh %d takedown timeout: 0x%08x\n", id,
-			 nv_rd32(device, NV50_PDISPLAY_EVO_CTRL(id)));
-	}
-}
-
-void
-nv50_evo_destroy(struct drm_device *dev)
-{
-	struct nv50_display *disp = nv50_display(dev);
-	int i;
-
-	for (i = 0; i < 2; i++) {
-		if (disp->crtc[i].sem.bo) {
-			nouveau_bo_unmap(disp->crtc[i].sem.bo);
-			nouveau_bo_ref(NULL, &disp->crtc[i].sem.bo);
-		}
-		nv50_evo_channel_del(&disp->crtc[i].sync);
-	}
-	nv50_evo_channel_del(&disp->master);
-	nouveau_gpuobj_ref(NULL, &disp->ramin);
-}
-
-int
-nv50_evo_create(struct drm_device *dev)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_fb *pfb = nouveau_fb(drm->device);
-	struct nv50_display *disp = nv50_display(dev);
-	struct nouveau_channel *evo;
-	int ret, i, j;
-
-	/* setup object management on it, any other evo channel will
-	 * use this also as there's no per-channel support on the
-	 * hardware
-	 */
-	ret = nouveau_gpuobj_new(drm->device, NULL, 32768, 65536,
-				 NVOBJ_FLAG_ZERO_ALLOC, &disp->ramin);
-	if (ret) {
-		NV_ERROR(drm, "Error allocating EVO channel memory: %d\n", ret);
-		goto err;
-	}
-
-	disp->hash = 0x0000;
-	disp->dmao = 0x1000;
-
-	/* create primary evo channel, the one we use for modesetting
-	 * purporses
-	 */
-	ret = nv50_evo_channel_new(dev, 0, &disp->master);
-	if (ret)
-		return ret;
-	evo = disp->master;
-
-	ret = nv50_evo_dmaobj_new(disp->master, NvEvoSync, 0x0000,
-				  disp->ramin->addr + 0x2000, 0x1000, NULL);
-	if (ret)
-		goto err;
-
-	/* create some default objects for the scanout memtypes we support */
-	ret = nv50_evo_dmaobj_new(disp->master, NvEvoVRAM, 0x0000,
-				  0, pfb->ram.size, NULL);
-	if (ret)
-		goto err;
-
-	ret = nv50_evo_dmaobj_new(disp->master, NvEvoVRAM_LP, 0x80000000,
-				  0, pfb->ram.size, NULL);
-	if (ret)
-		goto err;
-
-	ret = nv50_evo_dmaobj_new(disp->master, NvEvoFB32, 0x80000000 |
-				  (nv_device(drm->device)->chipset < 0xc0 ? 0x7a : 0xfe),
-				  0, pfb->ram.size, NULL);
-	if (ret)
-		goto err;
-
-	ret = nv50_evo_dmaobj_new(disp->master, NvEvoFB16, 0x80000000 |
-				  (nv_device(drm->device)->chipset < 0xc0 ? 0x70 : 0xfe),
-				  0, pfb->ram.size, NULL);
-	if (ret)
-		goto err;
-
-	/* create "display sync" channels and other structures we need
-	 * to implement page flipping
-	 */
-	for (i = 0; i < 2; i++) {
-		struct nv50_display_crtc *dispc = &disp->crtc[i];
-		u64 offset;
-
-		ret = nv50_evo_channel_new(dev, 1 + i, &dispc->sync);
-		if (ret)
-			goto err;
-
-		ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
-				     0, 0x0000, NULL, &dispc->sem.bo);
-		if (!ret) {
-			ret = nouveau_bo_pin(dispc->sem.bo, TTM_PL_FLAG_VRAM);
-			if (!ret)
-				ret = nouveau_bo_map(dispc->sem.bo);
-			if (ret)
-				nouveau_bo_ref(NULL, &dispc->sem.bo);
-			offset = dispc->sem.bo->bo.offset;
-		}
-
-		if (ret)
-			goto err;
-
-		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoSync, 0x0000,
-					  offset, 4096, NULL);
-		if (ret)
-			goto err;
-
-		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoVRAM_LP, 0x80000000,
-					  0, pfb->ram.size, NULL);
-		if (ret)
-			goto err;
-
-		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoFB32, 0x80000000 |
-					  (nv_device(drm->device)->chipset < 0xc0 ?
-					  0x7a : 0xfe),
-					  0, pfb->ram.size, NULL);
-		if (ret)
-			goto err;
-
-		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoFB16, 0x80000000 |
-					  (nv_device(drm->device)->chipset < 0xc0 ?
-					  0x70 : 0xfe),
-					  0, pfb->ram.size, NULL);
-		if (ret)
-			goto err;
-
-		for (j = 0; j < 4096; j += 4)
-			nouveau_bo_wr32(dispc->sem.bo, j / 4, 0x74b1e000);
-		dispc->sem.offset = 0;
-	}
-
-	return 0;
-
-err:
-	nv50_evo_destroy(dev);
-	return ret;
-}
-
-int
-nv50_evo_init(struct drm_device *dev)
-{
-	struct nv50_display *disp = nv50_display(dev);
-	int ret, i;
-
-	ret = nv50_evo_channel_init(disp->master);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < 2; i++) {
-		ret = nv50_evo_channel_init(disp->crtc[i].sync);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-void
-nv50_evo_fini(struct drm_device *dev)
-{
-	struct nv50_display *disp = nv50_display(dev);
-	int i;
-
-	for (i = 0; i < 2; i++) {
-		if (disp->crtc[i].sync)
-			nv50_evo_channel_fini(disp->crtc[i].sync);
-	}
-
-	if (disp->master)
-		nv50_evo_channel_fini(disp->master);
-}

diff --git a/drivers/gpu/drm/nouveau/nv50_evo.h b/drivers/gpu/drm/nouveau/nv50_evo.h
deleted file mode 100644
index 771d879..0000000
--- a/drivers/gpu/drm/nouveau/nv50_evo.h
+++ /dev/null

@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2008 Maarten Maathuis.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __NV50_EVO_H__
-#define __NV50_EVO_H__
-
-#define NV50_EVO_UPDATE                                              0x00000080
-#define NV50_EVO_UNK84                                               0x00000084
-#define NV50_EVO_UNK84_NOTIFY                                        0x40000000
-#define NV50_EVO_UNK84_NOTIFY_DISABLED                               0x00000000
-#define NV50_EVO_UNK84_NOTIFY_ENABLED                                0x40000000
-#define NV50_EVO_DMA_NOTIFY                                          0x00000088
-#define NV50_EVO_DMA_NOTIFY_HANDLE                                   0xffffffff
-#define NV50_EVO_DMA_NOTIFY_HANDLE_NONE                              0x00000000
-#define NV50_EVO_UNK8C                                               0x0000008C
-
-#define NV50_EVO_DAC(n, r)                       ((n) * 0x80 + NV50_EVO_DAC_##r)
-#define NV50_EVO_DAC_MODE_CTRL                                       0x00000400
-#define NV50_EVO_DAC_MODE_CTRL_CRTC0                                 0x00000001
-#define NV50_EVO_DAC_MODE_CTRL_CRTC1                                 0x00000002
-#define NV50_EVO_DAC_MODE_CTRL2                                      0x00000404
-#define NV50_EVO_DAC_MODE_CTRL2_NHSYNC                               0x00000001
-#define NV50_EVO_DAC_MODE_CTRL2_NVSYNC                               0x00000002
-
-#define NV50_EVO_SOR(n, r)                       ((n) * 0x40 + NV50_EVO_SOR_##r)
-#define NV50_EVO_SOR_MODE_CTRL                                       0x00000600
-#define NV50_EVO_SOR_MODE_CTRL_CRTC0                                 0x00000001
-#define NV50_EVO_SOR_MODE_CTRL_CRTC1                                 0x00000002
-#define NV50_EVO_SOR_MODE_CTRL_TMDS                                  0x00000100
-#define NV50_EVO_SOR_MODE_CTRL_TMDS_DUAL_LINK                        0x00000400
-#define NV50_EVO_SOR_MODE_CTRL_NHSYNC                                0x00001000
-#define NV50_EVO_SOR_MODE_CTRL_NVSYNC                                0x00002000
-
-#define NV50_EVO_CRTC(n, r)                    ((n) * 0x400 + NV50_EVO_CRTC_##r)
-#define NV84_EVO_CRTC(n, r)                    ((n) * 0x400 + NV84_EVO_CRTC_##r)
-#define NV50_EVO_CRTC_UNK0800                                        0x00000800
-#define NV50_EVO_CRTC_CLOCK                                          0x00000804
-#define NV50_EVO_CRTC_INTERLACE                                      0x00000808
-#define NV50_EVO_CRTC_DISPLAY_START                                  0x00000810
-#define NV50_EVO_CRTC_DISPLAY_TOTAL                                  0x00000814
-#define NV50_EVO_CRTC_SYNC_DURATION                                  0x00000818
-#define NV50_EVO_CRTC_SYNC_START_TO_BLANK_END                        0x0000081c
-#define NV50_EVO_CRTC_UNK0820                                        0x00000820
-#define NV50_EVO_CRTC_UNK0824                                        0x00000824
-#define NV50_EVO_CRTC_UNK082C                                        0x0000082c
-#define NV50_EVO_CRTC_CLUT_MODE                                      0x00000840
-/* You can't have a palette in 8 bit mode (=OFF) */
-#define NV50_EVO_CRTC_CLUT_MODE_BLANK                                0x00000000
-#define NV50_EVO_CRTC_CLUT_MODE_OFF                                  0x80000000
-#define NV50_EVO_CRTC_CLUT_MODE_ON                                   0xC0000000
-#define NV50_EVO_CRTC_CLUT_OFFSET                                    0x00000844
-#define NV84_EVO_CRTC_CLUT_DMA                                       0x0000085C
-#define NV84_EVO_CRTC_CLUT_DMA_HANDLE                                0xffffffff
-#define NV84_EVO_CRTC_CLUT_DMA_HANDLE_NONE                           0x00000000
-#define NV50_EVO_CRTC_FB_OFFSET                                      0x00000860
-#define NV50_EVO_CRTC_FB_SIZE                                        0x00000868
-#define NV50_EVO_CRTC_FB_CONFIG                                      0x0000086c
-#define NV50_EVO_CRTC_FB_CONFIG_MODE                                 0x00100000
-#define NV50_EVO_CRTC_FB_CONFIG_MODE_TILE                            0x00000000
-#define NV50_EVO_CRTC_FB_CONFIG_MODE_PITCH                           0x00100000
-#define NV50_EVO_CRTC_FB_DEPTH                                       0x00000870
-#define NV50_EVO_CRTC_FB_DEPTH_8                                     0x00001e00
-#define NV50_EVO_CRTC_FB_DEPTH_15                                    0x0000e900
-#define NV50_EVO_CRTC_FB_DEPTH_16                                    0x0000e800
-#define NV50_EVO_CRTC_FB_DEPTH_24                                    0x0000cf00
-#define NV50_EVO_CRTC_FB_DEPTH_30                                    0x0000d100
-#define NV50_EVO_CRTC_FB_DMA                                         0x00000874
-#define NV50_EVO_CRTC_FB_DMA_HANDLE                                  0xffffffff
-#define NV50_EVO_CRTC_FB_DMA_HANDLE_NONE                             0x00000000
-#define NV50_EVO_CRTC_CURSOR_CTRL                                    0x00000880
-#define NV50_EVO_CRTC_CURSOR_CTRL_HIDE                               0x05000000
-#define NV50_EVO_CRTC_CURSOR_CTRL_SHOW                               0x85000000
-#define NV50_EVO_CRTC_CURSOR_OFFSET                                  0x00000884
-#define NV84_EVO_CRTC_CURSOR_DMA                                     0x0000089c
-#define NV84_EVO_CRTC_CURSOR_DMA_HANDLE                              0xffffffff
-#define NV84_EVO_CRTC_CURSOR_DMA_HANDLE_NONE                         0x00000000
-#define NV50_EVO_CRTC_DITHER_CTRL                                    0x000008a0
-#define NV50_EVO_CRTC_DITHER_CTRL_OFF                                0x00000000
-#define NV50_EVO_CRTC_DITHER_CTRL_ON                                 0x00000011
-#define NV50_EVO_CRTC_SCALE_CTRL                                     0x000008a4
-#define NV50_EVO_CRTC_SCALE_CTRL_INACTIVE                            0x00000000
-#define NV50_EVO_CRTC_SCALE_CTRL_ACTIVE                              0x00000009
-#define NV50_EVO_CRTC_COLOR_CTRL                                     0x000008a8
-#define NV50_EVO_CRTC_COLOR_CTRL_VIBRANCE                            0x000fff00
-#define NV50_EVO_CRTC_COLOR_CTRL_HUE                                 0xfff00000
-#define NV50_EVO_CRTC_FB_POS                                         0x000008c0
-#define NV50_EVO_CRTC_REAL_RES                                       0x000008c8
-#define NV50_EVO_CRTC_SCALE_CENTER_OFFSET                            0x000008d4
-#define NV50_EVO_CRTC_SCALE_CENTER_OFFSET_VAL(x, y) \
-	((((unsigned)y << 16) & 0xFFFF0000) | (((unsigned)x) & 0x0000FFFF))
-/* Both of these are needed, otherwise nothing happens. */
-#define NV50_EVO_CRTC_SCALE_RES1                                     0x000008d8
-#define NV50_EVO_CRTC_SCALE_RES2                                     0x000008dc
-#define NV50_EVO_CRTC_UNK900                                         0x00000900
-#define NV50_EVO_CRTC_UNK904                                         0x00000904
-
-#endif

diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index e0763ea..c20f272 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c

@@ -110,8 +110,11 @@
 			     0, 0x0000, NULL, &priv->bo);
 	if (!ret) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
-		if (!ret)
+		if (!ret) {
 			ret = nouveau_bo_map(priv->bo);
+			if (ret)
+				nouveau_bo_unpin(priv->bo);
+		}
 		if (ret)
 			nouveau_bo_ref(NULL, &priv->bo);
 	}

diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c
index c4a6503..8bd5d27 100644
--- a/drivers/gpu/drm/nouveau/nv50_pm.c
+++ b/drivers/gpu/drm/nouveau/nv50_pm.c

@@ -546,7 +546,7 @@
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_device *device = nouveau_dev(dev);
-	u32 crtc_mask = nv50_display_active_crtcs(dev);
+	u32 crtc_mask = 0; /*XXX: nv50_display_active_crtcs(dev); */
 	struct nouveau_mem_exec_func exec = {
 		.dev = dev,
 		.precharge = mclk_precharge,

diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
deleted file mode 100644
index b562b59..0000000
--- a/drivers/gpu/drm/nouveau/nv50_sor.c
+++ /dev/null

@@ -1,530 +0,0 @@
-/*
- * Copyright (C) 2008 Maarten Maathuis.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
-
-#define NOUVEAU_DMA_DEBUG (nouveau_reg_debug & NOUVEAU_REG_DEBUG_EVO)
-#include "nouveau_reg.h"
-#include "nouveau_drm.h"
-#include "nouveau_dma.h"
-#include "nouveau_encoder.h"
-#include "nouveau_connector.h"
-#include "nouveau_crtc.h"
-#include "nv50_display.h"
-
-#include <subdev/timer.h>
-
-static u32
-nv50_sor_dp_lane_map(struct drm_device *dev, struct dcb_output *dcb, u8 lane)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	static const u8 nvaf[] = { 24, 16, 8, 0 }; /* thanks, apple.. */
-	static const u8 nv50[] = { 16, 8, 0, 24 };
-	if (nv_device(drm->device)->chipset == 0xaf)
-		return nvaf[lane];
-	return nv50[lane];
-}
-
-static void
-nv50_sor_dp_train_set(struct drm_device *dev, struct dcb_output *dcb, u8 pattern)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
-	nv_mask(device, NV50_SOR_DP_CTRL(or, link), 0x0f000000, pattern << 24);
-}
-
-static void
-nv50_sor_dp_train_adj(struct drm_device *dev, struct dcb_output *dcb,
-		      u8 lane, u8 swing, u8 preem)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
-	u32 shift = nv50_sor_dp_lane_map(dev, dcb, lane);
-	u32 mask = 0x000000ff << shift;
-	u8 *table, *entry, *config;
-
-	table = nouveau_dp_bios_data(dev, dcb, &entry);
-	if (!table || (table[0] != 0x20 && table[0] != 0x21)) {
-		NV_ERROR(drm, "PDISP: unsupported DP table for chipset\n");
-		return;
-	}
-
-	config = entry + table[4];
-	while (config[0] != swing || config[1] != preem) {
-		config += table[5];
-		if (config >= entry + table[4] + entry[4] * table[5])
-			return;
-	}
-
-	nv_mask(device, NV50_SOR_DP_UNK118(or, link), mask, config[2] << shift);
-	nv_mask(device, NV50_SOR_DP_UNK120(or, link), mask, config[3] << shift);
-	nv_mask(device, NV50_SOR_DP_UNK130(or, link), 0x0000ff00, config[4] << 8);
-}
-
-static void
-nv50_sor_dp_link_set(struct drm_device *dev, struct dcb_output *dcb, int crtc,
-		     int link_nr, u32 link_bw, bool enhframe)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
-	u32 dpctrl = nv_rd32(device, NV50_SOR_DP_CTRL(or, link)) & ~0x001f4000;
-	u32 clksor = nv_rd32(device, 0x614300 + (or * 0x800)) & ~0x000c0000;
-	u8 *table, *entry, mask;
-	int i;
-
-	table = nouveau_dp_bios_data(dev, dcb, &entry);
-	if (!table || (table[0] != 0x20 && table[0] != 0x21)) {
-		NV_ERROR(drm, "PDISP: unsupported DP table for chipset\n");
-		return;
-	}
-
-	entry = ROMPTR(dev, entry[10]);
-	if (entry) {
-		while (link_bw < ROM16(entry[0]) * 10)
-			entry += 4;
-
-		nouveau_bios_run_init_table(dev, ROM16(entry[2]), dcb, crtc);
-	}
-
-	dpctrl |= ((1 << link_nr) - 1) << 16;
-	if (enhframe)
-		dpctrl |= 0x00004000;
-
-	if (link_bw > 162000)
-		clksor |= 0x00040000;
-
-	nv_wr32(device, 0x614300 + (or * 0x800), clksor);
-	nv_wr32(device, NV50_SOR_DP_CTRL(or, link), dpctrl);
-
-	mask = 0;
-	for (i = 0; i < link_nr; i++)
-		mask |= 1 << (nv50_sor_dp_lane_map(dev, dcb, i) >> 3);
-	nv_mask(device, NV50_SOR_DP_UNK130(or, link), 0x0000000f, mask);
-}
-
-static void
-nv50_sor_dp_link_get(struct drm_device *dev, u32 or, u32 link, u32 *nr, u32 *bw)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	u32 dpctrl = nv_rd32(device, NV50_SOR_DP_CTRL(or, link)) & 0x000f0000;
-	u32 clksor = nv_rd32(device, 0x614300 + (or * 0x800));
-	if (clksor & 0x000c0000)
-		*bw = 270000;
-	else
-		*bw = 162000;
-
-	if      (dpctrl > 0x00030000) *nr = 4;
-	else if (dpctrl > 0x00010000) *nr = 2;
-	else			      *nr = 1;
-}
-
-void
-nv50_sor_dp_calc_tu(struct drm_device *dev, int or, int link, u32 clk, u32 bpp)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	const u32 symbol = 100000;
-	int bestTU = 0, bestVTUi = 0, bestVTUf = 0, bestVTUa = 0;
-	int TU, VTUi, VTUf, VTUa;
-	u64 link_data_rate, link_ratio, unk;
-	u32 best_diff = 64 * symbol;
-	u32 link_nr, link_bw, r;
-
-	/* calculate packed data rate for each lane */
-	nv50_sor_dp_link_get(dev, or, link, &link_nr, &link_bw);
-	link_data_rate = (clk * bpp / 8) / link_nr;
-
-	/* calculate ratio of packed data rate to link symbol rate */
-	link_ratio = link_data_rate * symbol;
-	r = do_div(link_ratio, link_bw);
-
-	for (TU = 64; TU >= 32; TU--) {
-		/* calculate average number of valid symbols in each TU */
-		u32 tu_valid = link_ratio * TU;
-		u32 calc, diff;
-
-		/* find a hw representation for the fraction.. */
-		VTUi = tu_valid / symbol;
-		calc = VTUi * symbol;
-		diff = tu_valid - calc;
-		if (diff) {
-			if (diff >= (symbol / 2)) {
-				VTUf = symbol / (symbol - diff);
-				if (symbol - (VTUf * diff))
-					VTUf++;
-
-				if (VTUf <= 15) {
-					VTUa  = 1;
-					calc += symbol - (symbol / VTUf);
-				} else {
-					VTUa  = 0;
-					VTUf  = 1;
-					calc += symbol;
-				}
-			} else {
-				VTUa  = 0;
-				VTUf  = min((int)(symbol / diff), 15);
-				calc += symbol / VTUf;
-			}
-
-			diff = calc - tu_valid;
-		} else {
-			/* no remainder, but the hw doesn't like the fractional
-			 * part to be zero.  decrement the integer part and
-			 * have the fraction add a whole symbol back
-			 */
-			VTUa = 0;
-			VTUf = 1;
-			VTUi--;
-		}
-
-		if (diff < best_diff) {
-			best_diff = diff;
-			bestTU = TU;
-			bestVTUa = VTUa;
-			bestVTUf = VTUf;
-			bestVTUi = VTUi;
-			if (diff == 0)
-				break;
-		}
-	}
-
-	if (!bestTU) {
-		NV_ERROR(drm, "DP: unable to find suitable config\n");
-		return;
-	}
-
-	/* XXX close to vbios numbers, but not right */
-	unk  = (symbol - link_ratio) * bestTU;
-	unk *= link_ratio;
-	r = do_div(unk, symbol);
-	r = do_div(unk, symbol);
-	unk += 6;
-
-	nv_mask(device, NV50_SOR_DP_CTRL(or, link), 0x000001fc, bestTU << 2);
-	nv_mask(device, NV50_SOR_DP_SCFG(or, link), 0x010f7f3f, bestVTUa << 24 |
-							     bestVTUf << 16 |
-							     bestVTUi << 8 |
-							     unk);
-}
-static void
-nv50_sor_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_channel *evo = nv50_display(dev)->master;
-	int ret;
-
-	if (!nv_encoder->crtc)
-		return;
-	nv50_crtc_blank(nouveau_crtc(nv_encoder->crtc), true);
-
-	NV_DEBUG(drm, "Disconnecting SOR %d\n", nv_encoder->or);
-
-	ret = RING_SPACE(evo, 4);
-	if (ret) {
-		NV_ERROR(drm, "no space while disconnecting SOR\n");
-		return;
-	}
-	BEGIN_NV04(evo, 0, NV50_EVO_SOR(nv_encoder->or, MODE_CTRL), 1);
-	OUT_RING  (evo, 0);
-	BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
-	OUT_RING  (evo, 0);
-
-	nouveau_hdmi_mode_set(encoder, NULL);
-
-	nv_encoder->crtc = NULL;
-	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
-}
-
-static void
-nv50_sor_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct nouveau_device *device = nouveau_dev(encoder->dev);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_encoder *enc;
-	uint32_t val;
-	int or = nv_encoder->or;
-
-	NV_DEBUG(drm, "or %d type %d mode %d\n", or, nv_encoder->dcb->type, mode);
-
-	nv_encoder->last_dpms = mode;
-	list_for_each_entry(enc, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nvenc = nouveau_encoder(enc);
-
-		if (nvenc == nv_encoder ||
-		    (nvenc->dcb->type != DCB_OUTPUT_TMDS &&
-		     nvenc->dcb->type != DCB_OUTPUT_LVDS &&
-		     nvenc->dcb->type != DCB_OUTPUT_DP) ||
-		    nvenc->dcb->or != nv_encoder->dcb->or)
-			continue;
-
-		if (nvenc->last_dpms == DRM_MODE_DPMS_ON)
-			return;
-	}
-
-	/* wait for it to be done */
-	if (!nv_wait(device, NV50_PDISPLAY_SOR_DPMS_CTRL(or),
-		     NV50_PDISPLAY_SOR_DPMS_CTRL_PENDING, 0)) {
-		NV_ERROR(drm, "timeout: SOR_DPMS_CTRL_PENDING(%d) == 0\n", or);
-		NV_ERROR(drm, "SOR_DPMS_CTRL(%d) = 0x%08x\n", or,
-			 nv_rd32(device, NV50_PDISPLAY_SOR_DPMS_CTRL(or)));
-	}
-
-	val = nv_rd32(device, NV50_PDISPLAY_SOR_DPMS_CTRL(or));
-
-	if (mode == DRM_MODE_DPMS_ON)
-		val |= NV50_PDISPLAY_SOR_DPMS_CTRL_ON;
-	else
-		val &= ~NV50_PDISPLAY_SOR_DPMS_CTRL_ON;
-
-	nv_wr32(device, NV50_PDISPLAY_SOR_DPMS_CTRL(or), val |
-		NV50_PDISPLAY_SOR_DPMS_CTRL_PENDING);
-	if (!nv_wait(device, NV50_PDISPLAY_SOR_DPMS_STATE(or),
-		     NV50_PDISPLAY_SOR_DPMS_STATE_WAIT, 0)) {
-		NV_ERROR(drm, "timeout: SOR_DPMS_STATE_WAIT(%d) == 0\n", or);
-		NV_ERROR(drm, "SOR_DPMS_STATE(%d) = 0x%08x\n", or,
-			 nv_rd32(device, NV50_PDISPLAY_SOR_DPMS_STATE(or)));
-	}
-
-	if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
-		struct dp_train_func func = {
-			.link_set = nv50_sor_dp_link_set,
-			.train_set = nv50_sor_dp_train_set,
-			.train_adj = nv50_sor_dp_train_adj
-		};
-
-		nouveau_dp_dpms(encoder, mode, nv_encoder->dp.datarate, &func);
-	}
-}
-
-static void
-nv50_sor_save(struct drm_encoder *encoder)
-{
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	NV_ERROR(drm, "!!\n");
-}
-
-static void
-nv50_sor_restore(struct drm_encoder *encoder)
-{
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	NV_ERROR(drm, "!!\n");
-}
-
-static bool
-nv50_sor_mode_fixup(struct drm_encoder *encoder,
-		    const struct drm_display_mode *mode,
-		    struct drm_display_mode *adjusted_mode)
-{
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *connector;
-
-	NV_DEBUG(drm, "or %d\n", nv_encoder->or);
-
-	connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!connector) {
-		NV_ERROR(drm, "Encoder has no connector\n");
-		return false;
-	}
-
-	if (connector->scaling_mode != DRM_MODE_SCALE_NONE &&
-	     connector->native_mode)
-		drm_mode_copy(adjusted_mode, connector->native_mode);
-
-	return true;
-}
-
-static void
-nv50_sor_prepare(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	nv50_sor_disconnect(encoder);
-	if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
-		/* avoid race between link training and supervisor intr */
-		nv50_display_sync(encoder->dev);
-	}
-}
-
-static void
-nv50_sor_commit(struct drm_encoder *encoder)
-{
-}
-
-static void
-nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
-		  struct drm_display_mode *mode)
-{
-	struct nouveau_channel *evo = nv50_display(encoder->dev)->master;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-	struct nouveau_crtc *crtc = nouveau_crtc(encoder->crtc);
-	struct nouveau_connector *nv_connector;
-	uint32_t mode_ctl = 0;
-	int ret;
-
-	NV_DEBUG(drm, "or %d type %d -> crtc %d\n",
-		     nv_encoder->or, nv_encoder->dcb->type, crtc->index);
-	nv_encoder->crtc = encoder->crtc;
-
-	switch (nv_encoder->dcb->type) {
-	case DCB_OUTPUT_TMDS:
-		if (nv_encoder->dcb->sorconf.link & 1) {
-			if (mode->clock < 165000)
-				mode_ctl = 0x0100;
-			else
-				mode_ctl = 0x0500;
-		} else
-			mode_ctl = 0x0200;
-
-		nouveau_hdmi_mode_set(encoder, mode);
-		break;
-	case DCB_OUTPUT_DP:
-		nv_connector = nouveau_encoder_connector_get(nv_encoder);
-		if (nv_connector && nv_connector->base.display_info.bpc == 6) {
-			nv_encoder->dp.datarate = mode->clock * 18 / 8;
-			mode_ctl |= 0x00020000;
-		} else {
-			nv_encoder->dp.datarate = mode->clock * 24 / 8;
-			mode_ctl |= 0x00050000;
-		}
-
-		if (nv_encoder->dcb->sorconf.link & 1)
-			mode_ctl |= 0x00000800;
-		else
-			mode_ctl |= 0x00000900;
-		break;
-	default:
-		break;
-	}
-
-	if (crtc->index == 1)
-		mode_ctl |= NV50_EVO_SOR_MODE_CTRL_CRTC1;
-	else
-		mode_ctl |= NV50_EVO_SOR_MODE_CTRL_CRTC0;
-
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		mode_ctl |= NV50_EVO_SOR_MODE_CTRL_NHSYNC;
-
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		mode_ctl |= NV50_EVO_SOR_MODE_CTRL_NVSYNC;
-
-	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
-
-	ret = RING_SPACE(evo, 2);
-	if (ret) {
-		NV_ERROR(drm, "no space while connecting SOR\n");
-		nv_encoder->crtc = NULL;
-		return;
-	}
-	BEGIN_NV04(evo, 0, NV50_EVO_SOR(nv_encoder->or, MODE_CTRL), 1);
-	OUT_RING(evo, mode_ctl);
-}
-
-static struct drm_crtc *
-nv50_sor_crtc_get(struct drm_encoder *encoder)
-{
-	return nouveau_encoder(encoder)->crtc;
-}
-
-static const struct drm_encoder_helper_funcs nv50_sor_helper_funcs = {
-	.dpms = nv50_sor_dpms,
-	.save = nv50_sor_save,
-	.restore = nv50_sor_restore,
-	.mode_fixup = nv50_sor_mode_fixup,
-	.prepare = nv50_sor_prepare,
-	.commit = nv50_sor_commit,
-	.mode_set = nv50_sor_mode_set,
-	.get_crtc = nv50_sor_crtc_get,
-	.detect = NULL,
-	.disable = nv50_sor_disconnect
-};
-
-static void
-nv50_sor_destroy(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-
-	NV_DEBUG(drm, "\n");
-
-	drm_encoder_cleanup(encoder);
-
-	kfree(nv_encoder);
-}
-
-static const struct drm_encoder_funcs nv50_sor_encoder_funcs = {
-	.destroy = nv50_sor_destroy,
-};
-
-int
-nv50_sor_create(struct drm_connector *connector, struct dcb_output *entry)
-{
-	struct nouveau_encoder *nv_encoder = NULL;
-	struct drm_device *dev = connector->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct drm_encoder *encoder;
-	int type;
-
-	NV_DEBUG(drm, "\n");
-
-	switch (entry->type) {
-	case DCB_OUTPUT_TMDS:
-	case DCB_OUTPUT_DP:
-		type = DRM_MODE_ENCODER_TMDS;
-		break;
-	case DCB_OUTPUT_LVDS:
-		type = DRM_MODE_ENCODER_LVDS;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
-	if (!nv_encoder)
-		return -ENOMEM;
-	encoder = to_drm_encoder(nv_encoder);
-
-	nv_encoder->dcb = entry;
-	nv_encoder->or = ffs(entry->or) - 1;
-	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
-
-	drm_encoder_init(dev, encoder, &nv50_sor_encoder_funcs, type);
-	drm_encoder_helper_add(encoder, &nv50_sor_helper_funcs);
-
-	encoder->possible_crtcs = entry->heads;
-	encoder->possible_clones = 0;
-
-	drm_mode_connector_attach_encoder(connector, encoder);
-	return 0;
-}

diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c
index 53299ea..2a56b1b 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fence.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fence.c

@@ -114,17 +114,9 @@
 	struct nvc0_fence_chan *fctx = chan->fence;
 	int i;
 
-	if (nv_device(chan->drm->device)->card_type >= NV_D0) {
-		for (i = 0; i < dev->mode_config.num_crtc; i++) {
-			struct nouveau_bo *bo = nvd0_display_crtc_sema(dev, i);
-			nouveau_bo_vma_del(bo, &fctx->dispc_vma[i]);
-		}
-	} else
-	if (nv_device(chan->drm->device)->card_type >= NV_50) {
-		for (i = 0; i < dev->mode_config.num_crtc; i++) {
-			struct nouveau_bo *bo = nv50_display_crtc_sema(dev, i);
-			nouveau_bo_vma_del(bo, &fctx->dispc_vma[i]);
-		}
+	for (i = 0; i < dev->mode_config.num_crtc; i++) {
+		struct nouveau_bo *bo = nv50_display_crtc_sema(dev, i);
+		nouveau_bo_vma_del(bo, &fctx->dispc_vma[i]);
 	}
 
 	nouveau_bo_vma_del(priv->bo, &fctx->vma);
@@ -154,12 +146,7 @@
 
 	/* map display semaphore buffers into channel's vm */
 	for (i = 0; !ret && i < chan->drm->dev->mode_config.num_crtc; i++) {
-		struct nouveau_bo *bo;
-		if (nv_device(chan->drm->device)->card_type >= NV_D0)
-			bo = nvd0_display_crtc_sema(chan->drm->dev, i);
-		else
-			bo = nv50_display_crtc_sema(chan->drm->dev, i);
-
+		struct nouveau_bo *bo = nv50_display_crtc_sema(chan->drm->dev, i);
 		ret = nouveau_bo_vma_add(bo, client->vm, &fctx->dispc_vma[i]);
 	}
 
@@ -203,6 +190,8 @@
 {
 	struct nvc0_fence_priv *priv = drm->fence;
 	nouveau_bo_unmap(priv->bo);
+	if (priv->bo)
+		nouveau_bo_unpin(priv->bo);
 	nouveau_bo_ref(NULL, &priv->bo);
 	drm->fence = NULL;
 	kfree(priv);
@@ -232,8 +221,11 @@
 			     TTM_PL_FLAG_VRAM, 0, 0, NULL, &priv->bo);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
-		if (ret == 0)
+		if (ret == 0) {
 			ret = nouveau_bo_map(priv->bo);
+			if (ret)
+				nouveau_bo_unpin(priv->bo);
+		}
 		if (ret)
 			nouveau_bo_ref(NULL, &priv->bo);
 	}

diff --git a/drivers/gpu/drm/nouveau/nvd0_display.c b/drivers/gpu/drm/nouveau/nvd0_display.c
deleted file mode 100644
index c402fca..0000000
--- a/drivers/gpu/drm/nouveau/nvd0_display.c
+++ /dev/null

@@ -1,2141 +0,0 @@
-/*
- * Copyright 2011 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-
-#include <linux/dma-mapping.h>
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
-
-#include "nouveau_drm.h"
-#include "nouveau_dma.h"
-#include "nouveau_gem.h"
-#include "nouveau_connector.h"
-#include "nouveau_encoder.h"
-#include "nouveau_crtc.h"
-#include "nouveau_fence.h"
-#include "nv50_display.h"
-
-#include <core/gpuobj.h>
-
-#include <subdev/timer.h>
-#include <subdev/bar.h>
-#include <subdev/fb.h>
-
-#define EVO_DMA_NR 9
-
-#define EVO_MASTER  (0x00)
-#define EVO_FLIP(c) (0x01 + (c))
-#define EVO_OVLY(c) (0x05 + (c))
-#define EVO_OIMM(c) (0x09 + (c))
-#define EVO_CURS(c) (0x0d + (c))
-
-/* offsets in shared sync bo of various structures */
-#define EVO_SYNC(c, o) ((c) * 0x0100 + (o))
-#define EVO_MAST_NTFY     EVO_SYNC(  0, 0x00)
-#define EVO_FLIP_SEM0(c)  EVO_SYNC((c), 0x00)
-#define EVO_FLIP_SEM1(c)  EVO_SYNC((c), 0x10)
-
-struct evo {
-	int idx;
-	dma_addr_t handle;
-	u32 *ptr;
-	struct {
-		u32 offset;
-		u16 value;
-	} sem;
-};
-
-struct nvd0_display {
-	struct nouveau_gpuobj *mem;
-	struct nouveau_bo *sync;
-	struct evo evo[9];
-
-	struct tasklet_struct tasklet;
-	u32 modeset;
-};
-
-static struct nvd0_display *
-nvd0_display(struct drm_device *dev)
-{
-	return nouveau_display(dev)->priv;
-}
-
-static struct drm_crtc *
-nvd0_display_crtc_get(struct drm_encoder *encoder)
-{
-	return nouveau_encoder(encoder)->crtc;
-}
-
-/******************************************************************************
- * EVO channel helpers
- *****************************************************************************/
-static inline int
-evo_icmd(struct drm_device *dev, int id, u32 mthd, u32 data)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	int ret = 0;
-	nv_mask(device, 0x610700 + (id * 0x10), 0x00000001, 0x00000001);
-	nv_wr32(device, 0x610704 + (id * 0x10), data);
-	nv_mask(device, 0x610704 + (id * 0x10), 0x80000ffc, 0x80000000 | mthd);
-	if (!nv_wait(device, 0x610704 + (id * 0x10), 0x80000000, 0x00000000))
-		ret = -EBUSY;
-	nv_mask(device, 0x610700 + (id * 0x10), 0x00000001, 0x00000000);
-	return ret;
-}
-
-static u32 *
-evo_wait(struct drm_device *dev, int id, int nr)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvd0_display *disp = nvd0_display(dev);
-	u32 put = nv_rd32(device, 0x640000 + (id * 0x1000)) / 4;
-
-	if (put + nr >= (PAGE_SIZE / 4)) {
-		disp->evo[id].ptr[put] = 0x20000000;
-
-		nv_wr32(device, 0x640000 + (id * 0x1000), 0x00000000);
-		if (!nv_wait(device, 0x640004 + (id * 0x1000), ~0, 0x00000000)) {
-			NV_ERROR(drm, "evo %d dma stalled\n", id);
-			return NULL;
-		}
-
-		put = 0;
-	}
-
-	return disp->evo[id].ptr + put;
-}
-
-static void
-evo_kick(u32 *push, struct drm_device *dev, int id)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nvd0_display *disp = nvd0_display(dev);
-
-	nv_wr32(device, 0x640000 + (id * 0x1000), (push - disp->evo[id].ptr) << 2);
-}
-
-#define evo_mthd(p,m,s) *((p)++) = (((s) << 18) | (m))
-#define evo_data(p,d)   *((p)++) = (d)
-
-static int
-evo_init_dma(struct drm_device *dev, int ch)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvd0_display *disp = nvd0_display(dev);
-	u32 flags;
-
-	flags = 0x00000000;
-	if (ch == EVO_MASTER)
-		flags |= 0x01000000;
-
-	nv_wr32(device, 0x610494 + (ch * 0x0010), (disp->evo[ch].handle >> 8) | 3);
-	nv_wr32(device, 0x610498 + (ch * 0x0010), 0x00010000);
-	nv_wr32(device, 0x61049c + (ch * 0x0010), 0x00000001);
-	nv_mask(device, 0x610490 + (ch * 0x0010), 0x00000010, 0x00000010);
-	nv_wr32(device, 0x640000 + (ch * 0x1000), 0x00000000);
-	nv_wr32(device, 0x610490 + (ch * 0x0010), 0x00000013 | flags);
-	if (!nv_wait(device, 0x610490 + (ch * 0x0010), 0x80000000, 0x00000000)) {
-		NV_ERROR(drm, "PDISP: ch%d 0x%08x\n", ch,
-			      nv_rd32(device, 0x610490 + (ch * 0x0010)));
-		return -EBUSY;
-	}
-
-	nv_mask(device, 0x610090, (1 << ch), (1 << ch));
-	nv_mask(device, 0x6100a0, (1 << ch), (1 << ch));
-	return 0;
-}
-
-static void
-evo_fini_dma(struct drm_device *dev, int ch)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-
-	if (!(nv_rd32(device, 0x610490 + (ch * 0x0010)) & 0x00000010))
-		return;
-
-	nv_mask(device, 0x610490 + (ch * 0x0010), 0x00000010, 0x00000000);
-	nv_mask(device, 0x610490 + (ch * 0x0010), 0x00000003, 0x00000000);
-	nv_wait(device, 0x610490 + (ch * 0x0010), 0x80000000, 0x00000000);
-	nv_mask(device, 0x610090, (1 << ch), 0x00000000);
-	nv_mask(device, 0x6100a0, (1 << ch), 0x00000000);
-}
-
-static inline void
-evo_piow(struct drm_device *dev, int ch, u16 mthd, u32 data)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	nv_wr32(device, 0x640000 + (ch * 0x1000) + mthd, data);
-}
-
-static int
-evo_init_pio(struct drm_device *dev, int ch)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-
-	nv_wr32(device, 0x610490 + (ch * 0x0010), 0x00000001);
-	if (!nv_wait(device, 0x610490 + (ch * 0x0010), 0x00010000, 0x00010000)) {
-		NV_ERROR(drm, "PDISP: ch%d 0x%08x\n", ch,
-			      nv_rd32(device, 0x610490 + (ch * 0x0010)));
-		return -EBUSY;
-	}
-
-	nv_mask(device, 0x610090, (1 << ch), (1 << ch));
-	nv_mask(device, 0x6100a0, (1 << ch), (1 << ch));
-	return 0;
-}
-
-static void
-evo_fini_pio(struct drm_device *dev, int ch)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-
-	if (!(nv_rd32(device, 0x610490 + (ch * 0x0010)) & 0x00000001))
-		return;
-
-	nv_mask(device, 0x610490 + (ch * 0x0010), 0x00000010, 0x00000010);
-	nv_mask(device, 0x610490 + (ch * 0x0010), 0x00000001, 0x00000000);
-	nv_wait(device, 0x610490 + (ch * 0x0010), 0x00010000, 0x00000000);
-	nv_mask(device, 0x610090, (1 << ch), 0x00000000);
-	nv_mask(device, 0x6100a0, (1 << ch), 0x00000000);
-}
-
-static bool
-evo_sync_wait(void *data)
-{
-	return nouveau_bo_rd32(data, EVO_MAST_NTFY) != 0x00000000;
-}
-
-static int
-evo_sync(struct drm_device *dev, int ch)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nvd0_display *disp = nvd0_display(dev);
-	u32 *push = evo_wait(dev, ch, 8);
-	if (push) {
-		nouveau_bo_wr32(disp->sync, EVO_MAST_NTFY, 0x00000000);
-		evo_mthd(push, 0x0084, 1);
-		evo_data(push, 0x80000000 | EVO_MAST_NTFY);
-		evo_mthd(push, 0x0080, 2);
-		evo_data(push, 0x00000000);
-		evo_data(push, 0x00000000);
-		evo_kick(push, dev, ch);
-		if (nv_wait_cb(device, evo_sync_wait, disp->sync))
-			return 0;
-	}
-
-	return -EBUSY;
-}
-
-/******************************************************************************
- * Page flipping channel
- *****************************************************************************/
-struct nouveau_bo *
-nvd0_display_crtc_sema(struct drm_device *dev, int crtc)
-{
-	return nvd0_display(dev)->sync;
-}
-
-void
-nvd0_display_flip_stop(struct drm_crtc *crtc)
-{
-	struct nvd0_display *disp = nvd0_display(crtc->dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct evo *evo = &disp->evo[EVO_FLIP(nv_crtc->index)];
-	u32 *push;
-
-	push = evo_wait(crtc->dev, evo->idx, 8);
-	if (push) {
-		evo_mthd(push, 0x0084, 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x0094, 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x00c0, 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x0080, 1);
-		evo_data(push, 0x00000000);
-		evo_kick(push, crtc->dev, evo->idx);
-	}
-}
-
-int
-nvd0_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-		       struct nouveau_channel *chan, u32 swap_interval)
-{
-	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
-	struct nvd0_display *disp = nvd0_display(crtc->dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct evo *evo = &disp->evo[EVO_FLIP(nv_crtc->index)];
-	u64 offset;
-	u32 *push;
-	int ret;
-
-	swap_interval <<= 4;
-	if (swap_interval == 0)
-		swap_interval |= 0x100;
-
-	push = evo_wait(crtc->dev, evo->idx, 128);
-	if (unlikely(push == NULL))
-		return -EBUSY;
-
-	/* synchronise with the rendering channel, if necessary */
-	if (likely(chan)) {
-		ret = RING_SPACE(chan, 10);
-		if (ret)
-			return ret;
-
-
-		offset  = nvc0_fence_crtc(chan, nv_crtc->index);
-		offset += evo->sem.offset;
-
-		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 0xf00d0000 | evo->sem.value);
-		OUT_RING  (chan, 0x1002);
-		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset ^ 0x10));
-		OUT_RING  (chan, 0x74b1e000);
-		OUT_RING  (chan, 0x1001);
-		FIRE_RING (chan);
-	} else {
-		nouveau_bo_wr32(disp->sync, evo->sem.offset / 4,
-				0xf00d0000 | evo->sem.value);
-		evo_sync(crtc->dev, EVO_MASTER);
-	}
-
-	/* queue the flip */
-	evo_mthd(push, 0x0100, 1);
-	evo_data(push, 0xfffe0000);
-	evo_mthd(push, 0x0084, 1);
-	evo_data(push, swap_interval);
-	if (!(swap_interval & 0x00000100)) {
-		evo_mthd(push, 0x00e0, 1);
-		evo_data(push, 0x40000000);
-	}
-	evo_mthd(push, 0x0088, 4);
-	evo_data(push, evo->sem.offset);
-	evo_data(push, 0xf00d0000 | evo->sem.value);
-	evo_data(push, 0x74b1e000);
-	evo_data(push, NvEvoSync);
-	evo_mthd(push, 0x00a0, 2);
-	evo_data(push, 0x00000000);
-	evo_data(push, 0x00000000);
-	evo_mthd(push, 0x00c0, 1);
-	evo_data(push, nv_fb->r_dma);
-	evo_mthd(push, 0x0110, 2);
-	evo_data(push, 0x00000000);
-	evo_data(push, 0x00000000);
-	evo_mthd(push, 0x0400, 5);
-	evo_data(push, nv_fb->nvbo->bo.offset >> 8);
-	evo_data(push, 0);
-	evo_data(push, (fb->height << 16) | fb->width);
-	evo_data(push, nv_fb->r_pitch);
-	evo_data(push, nv_fb->r_format);
-	evo_mthd(push, 0x0080, 1);
-	evo_data(push, 0x00000000);
-	evo_kick(push, crtc->dev, evo->idx);
-
-	evo->sem.offset ^= 0x10;
-	evo->sem.value++;
-	return 0;
-}
-
-/******************************************************************************
- * CRTC
- *****************************************************************************/
-static int
-nvd0_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool update)
-{
-	struct nouveau_drm *drm = nouveau_drm(nv_crtc->base.dev);
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct nouveau_connector *nv_connector;
-	struct drm_connector *connector;
-	u32 *push, mode = 0x00;
-	u32 mthd;
-
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	connector = &nv_connector->base;
-	if (nv_connector->dithering_mode == DITHERING_MODE_AUTO) {
-		if (nv_crtc->base.fb->depth > connector->display_info.bpc * 3)
-			mode = DITHERING_MODE_DYNAMIC2X2;
-	} else {
-		mode = nv_connector->dithering_mode;
-	}
-
-	if (nv_connector->dithering_depth == DITHERING_DEPTH_AUTO) {
-		if (connector->display_info.bpc >= 8)
-			mode |= DITHERING_DEPTH_8BPC;
-	} else {
-		mode |= nv_connector->dithering_depth;
-	}
-
-	if (nv_device(drm->device)->card_type < NV_E0)
-		mthd = 0x0490 + (nv_crtc->index * 0x0300);
-	else
-		mthd = 0x04a0 + (nv_crtc->index * 0x0300);
-
-	push = evo_wait(dev, EVO_MASTER, 4);
-	if (push) {
-		evo_mthd(push, mthd, 1);
-		evo_data(push, mode);
-		if (update) {
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-		}
-		evo_kick(push, dev, EVO_MASTER);
-	}
-
-	return 0;
-}
-
-static int
-nvd0_crtc_set_scale(struct nouveau_crtc *nv_crtc, bool update)
-{
-	struct drm_display_mode *omode, *umode = &nv_crtc->base.mode;
-	struct drm_device *dev = nv_crtc->base.dev;
-	struct drm_crtc *crtc = &nv_crtc->base;
-	struct nouveau_connector *nv_connector;
-	int mode = DRM_MODE_SCALE_NONE;
-	u32 oX, oY, *push;
-
-	/* start off at the resolution we programmed the crtc for, this
-	 * effectively handles NONE/FULL scaling
-	 */
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	if (nv_connector && nv_connector->native_mode)
-		mode = nv_connector->scaling_mode;
-
-	if (mode != DRM_MODE_SCALE_NONE)
-		omode = nv_connector->native_mode;
-	else
-		omode = umode;
-
-	oX = omode->hdisplay;
-	oY = omode->vdisplay;
-	if (omode->flags & DRM_MODE_FLAG_DBLSCAN)
-		oY *= 2;
-
-	/* add overscan compensation if necessary, will keep the aspect
-	 * ratio the same as the backend mode unless overridden by the
-	 * user setting both hborder and vborder properties.
-	 */
-	if (nv_connector && ( nv_connector->underscan == UNDERSCAN_ON ||
-			     (nv_connector->underscan == UNDERSCAN_AUTO &&
-			      nv_connector->edid &&
-			      drm_detect_hdmi_monitor(nv_connector->edid)))) {
-		u32 bX = nv_connector->underscan_hborder;
-		u32 bY = nv_connector->underscan_vborder;
-		u32 aspect = (oY << 19) / oX;
-
-		if (bX) {
-			oX -= (bX * 2);
-			if (bY) oY -= (bY * 2);
-			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
-		} else {
-			oX -= (oX >> 4) + 32;
-			if (bY) oY -= (bY * 2);
-			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
-		}
-	}
-
-	/* handle CENTER/ASPECT scaling, taking into account the areas
-	 * removed already for overscan compensation
-	 */
-	switch (mode) {
-	case DRM_MODE_SCALE_CENTER:
-		oX = min((u32)umode->hdisplay, oX);
-		oY = min((u32)umode->vdisplay, oY);
-		/* fall-through */
-	case DRM_MODE_SCALE_ASPECT:
-		if (oY < oX) {
-			u32 aspect = (umode->hdisplay << 19) / umode->vdisplay;
-			oX = ((oY * aspect) + (aspect / 2)) >> 19;
-		} else {
-			u32 aspect = (umode->vdisplay << 19) / umode->hdisplay;
-			oY = ((oX * aspect) + (aspect / 2)) >> 19;
-		}
-		break;
-	default:
-		break;
-	}
-
-	push = evo_wait(dev, EVO_MASTER, 8);
-	if (push) {
-		evo_mthd(push, 0x04c0 + (nv_crtc->index * 0x300), 3);
-		evo_data(push, (oY << 16) | oX);
-		evo_data(push, (oY << 16) | oX);
-		evo_data(push, (oY << 16) | oX);
-		evo_mthd(push, 0x0494 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x04b8 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, (umode->vdisplay << 16) | umode->hdisplay);
-		evo_kick(push, dev, EVO_MASTER);
-		if (update) {
-			nvd0_display_flip_stop(crtc);
-			nvd0_display_flip_next(crtc, crtc->fb, NULL, 1);
-		}
-	}
-
-	return 0;
-}
-
-static int
-nvd0_crtc_set_image(struct nouveau_crtc *nv_crtc, struct drm_framebuffer *fb,
-		    int x, int y, bool update)
-{
-	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(fb);
-	u32 *push;
-
-	push = evo_wait(fb->dev, EVO_MASTER, 16);
-	if (push) {
-		evo_mthd(push, 0x0460 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, nvfb->nvbo->bo.offset >> 8);
-		evo_mthd(push, 0x0468 + (nv_crtc->index * 0x300), 4);
-		evo_data(push, (fb->height << 16) | fb->width);
-		evo_data(push, nvfb->r_pitch);
-		evo_data(push, nvfb->r_format);
-		evo_data(push, nvfb->r_dma);
-		evo_mthd(push, 0x04b0 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, (y << 16) | x);
-		if (update) {
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-		}
-		evo_kick(push, fb->dev, EVO_MASTER);
-	}
-
-	nv_crtc->fb.tile_flags = nvfb->r_dma;
-	return 0;
-}
-
-static void
-nvd0_crtc_cursor_show(struct nouveau_crtc *nv_crtc, bool show, bool update)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	u32 *push = evo_wait(dev, EVO_MASTER, 16);
-	if (push) {
-		if (show) {
-			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 2);
-			evo_data(push, 0x85000000);
-			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
-			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
-			evo_data(push, NvEvoVRAM);
-		} else {
-			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 1);
-			evo_data(push, 0x05000000);
-			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
-			evo_data(push, 0x00000000);
-		}
-
-		if (update) {
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-		}
-
-		evo_kick(push, dev, EVO_MASTER);
-	}
-}
-
-static void
-nvd0_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
-}
-
-static void
-nvd0_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	u32 *push;
-
-	nvd0_display_flip_stop(crtc);
-
-	push = evo_wait(crtc->dev, EVO_MASTER, 2);
-	if (push) {
-		evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, 0x03000000);
-		evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
-		evo_data(push, 0x00000000);
-		evo_kick(push, crtc->dev, EVO_MASTER);
-	}
-
-	nvd0_crtc_cursor_show(nv_crtc, false, false);
-}
-
-static void
-nvd0_crtc_commit(struct drm_crtc *crtc)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	u32 *push;
-
-	push = evo_wait(crtc->dev, EVO_MASTER, 32);
-	if (push) {
-		evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, nv_crtc->fb.tile_flags);
-		evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 4);
-		evo_data(push, 0x83000000);
-		evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
-		evo_data(push, 0x00000000);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
-		evo_data(push, NvEvoVRAM);
-		evo_mthd(push, 0x0430 + (nv_crtc->index * 0x300), 1);
-		evo_data(push, 0xffffff00);
-		evo_kick(push, crtc->dev, EVO_MASTER);
-	}
-
-	nvd0_crtc_cursor_show(nv_crtc, nv_crtc->cursor.visible, true);
-	nvd0_display_flip_next(crtc, crtc->fb, NULL, 1);
-}
-
-static bool
-nvd0_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode,
-		     struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
-static int
-nvd0_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
-{
-	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
-	int ret;
-
-	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
-	if (ret)
-		return ret;
-
-	if (old_fb) {
-		nvfb = nouveau_framebuffer(old_fb);
-		nouveau_bo_unpin(nvfb->nvbo);
-	}
-
-	return 0;
-}
-
-static int
-nvd0_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
-		   struct drm_display_mode *mode, int x, int y,
-		   struct drm_framebuffer *old_fb)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nouveau_connector *nv_connector;
-	u32 ilace = (mode->flags & DRM_MODE_FLAG_INTERLACE) ? 2 : 1;
-	u32 vscan = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1;
-	u32 hactive, hsynce, hbackp, hfrontp, hblanke, hblanks;
-	u32 vactive, vsynce, vbackp, vfrontp, vblanke, vblanks;
-	u32 vblan2e = 0, vblan2s = 1;
-	u32 *push;
-	int ret;
-
-	hactive = mode->htotal;
-	hsynce  = mode->hsync_end - mode->hsync_start - 1;
-	hbackp  = mode->htotal - mode->hsync_end;
-	hblanke = hsynce + hbackp;
-	hfrontp = mode->hsync_start - mode->hdisplay;
-	hblanks = mode->htotal - hfrontp - 1;
-
-	vactive = mode->vtotal * vscan / ilace;
-	vsynce  = ((mode->vsync_end - mode->vsync_start) * vscan / ilace) - 1;
-	vbackp  = (mode->vtotal - mode->vsync_end) * vscan / ilace;
-	vblanke = vsynce + vbackp;
-	vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
-	vblanks = vactive - vfrontp - 1;
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
-		vblan2e = vactive + vsynce + vbackp;
-		vblan2s = vblan2e + (mode->vdisplay * vscan / ilace);
-		vactive = (vactive * 2) + 1;
-	}
-
-	ret = nvd0_crtc_swap_fbs(crtc, old_fb);
-	if (ret)
-		return ret;
-
-	push = evo_wait(crtc->dev, EVO_MASTER, 64);
-	if (push) {
-		evo_mthd(push, 0x0410 + (nv_crtc->index * 0x300), 6);
-		evo_data(push, 0x00000000);
-		evo_data(push, (vactive << 16) | hactive);
-		evo_data(push, ( vsynce << 16) | hsynce);
-		evo_data(push, (vblanke << 16) | hblanke);
-		evo_data(push, (vblanks << 16) | hblanks);
-		evo_data(push, (vblan2e << 16) | vblan2s);
-		evo_mthd(push, 0x042c + (nv_crtc->index * 0x300), 1);
-		evo_data(push, 0x00000000); /* ??? */
-		evo_mthd(push, 0x0450 + (nv_crtc->index * 0x300), 3);
-		evo_data(push, mode->clock * 1000);
-		evo_data(push, 0x00200000); /* ??? */
-		evo_data(push, mode->clock * 1000);
-		evo_mthd(push, 0x04d0 + (nv_crtc->index * 0x300), 2);
-		evo_data(push, 0x00000311);
-		evo_data(push, 0x00000100);
-		evo_kick(push, crtc->dev, EVO_MASTER);
-	}
-
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	nvd0_crtc_set_dither(nv_crtc, false);
-	nvd0_crtc_set_scale(nv_crtc, false);
-	nvd0_crtc_set_image(nv_crtc, crtc->fb, x, y, false);
-	return 0;
-}
-
-static int
-nvd0_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
-			struct drm_framebuffer *old_fb)
-{
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	int ret;
-
-	if (!crtc->fb) {
-		NV_DEBUG(drm, "No FB bound\n");
-		return 0;
-	}
-
-	ret = nvd0_crtc_swap_fbs(crtc, old_fb);
-	if (ret)
-		return ret;
-
-	nvd0_display_flip_stop(crtc);
-	nvd0_crtc_set_image(nv_crtc, crtc->fb, x, y, true);
-	nvd0_display_flip_next(crtc, crtc->fb, NULL, 1);
-	return 0;
-}
-
-static int
-nvd0_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb, int x, int y,
-			       enum mode_set_atomic state)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	nvd0_display_flip_stop(crtc);
-	nvd0_crtc_set_image(nv_crtc, fb, x, y, true);
-	return 0;
-}
-
-static void
-nvd0_crtc_lut_load(struct drm_crtc *crtc)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	void __iomem *lut = nvbo_kmap_obj_iovirtual(nv_crtc->lut.nvbo);
-	int i;
-
-	for (i = 0; i < 256; i++) {
-		writew(0x6000 + (nv_crtc->lut.r[i] >> 2), lut + (i * 0x20) + 0);
-		writew(0x6000 + (nv_crtc->lut.g[i] >> 2), lut + (i * 0x20) + 2);
-		writew(0x6000 + (nv_crtc->lut.b[i] >> 2), lut + (i * 0x20) + 4);
-	}
-}
-
-static int
-nvd0_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
-		     uint32_t handle, uint32_t width, uint32_t height)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct drm_device *dev = crtc->dev;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *nvbo;
-	bool visible = (handle != 0);
-	int i, ret = 0;
-
-	if (visible) {
-		if (width != 64 || height != 64)
-			return -EINVAL;
-
-		gem = drm_gem_object_lookup(dev, file_priv, handle);
-		if (unlikely(!gem))
-			return -ENOENT;
-		nvbo = nouveau_gem_object(gem);
-
-		ret = nouveau_bo_map(nvbo);
-		if (ret == 0) {
-			for (i = 0; i < 64 * 64; i++) {
-				u32 v = nouveau_bo_rd32(nvbo, i);
-				nouveau_bo_wr32(nv_crtc->cursor.nvbo, i, v);
-			}
-			nouveau_bo_unmap(nvbo);
-		}
-
-		drm_gem_object_unreference_unlocked(gem);
-	}
-
-	if (visible != nv_crtc->cursor.visible) {
-		nvd0_crtc_cursor_show(nv_crtc, visible, true);
-		nv_crtc->cursor.visible = visible;
-	}
-
-	return ret;
-}
-
-static int
-nvd0_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	int ch = EVO_CURS(nv_crtc->index);
-
-	evo_piow(crtc->dev, ch, 0x0084, (y << 16) | (x & 0xffff));
-	evo_piow(crtc->dev, ch, 0x0080, 0x00000000);
-	return 0;
-}
-
-static void
-nvd0_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-		    uint32_t start, uint32_t size)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	u32 end = max(start + size, (u32)256);
-	u32 i;
-
-	for (i = start; i < end; i++) {
-		nv_crtc->lut.r[i] = r[i];
-		nv_crtc->lut.g[i] = g[i];
-		nv_crtc->lut.b[i] = b[i];
-	}
-
-	nvd0_crtc_lut_load(crtc);
-}
-
-static void
-nvd0_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
-	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
-	nouveau_bo_unmap(nv_crtc->lut.nvbo);
-	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
-	drm_crtc_cleanup(crtc);
-	kfree(crtc);
-}
-
-static const struct drm_crtc_helper_funcs nvd0_crtc_hfunc = {
-	.dpms = nvd0_crtc_dpms,
-	.prepare = nvd0_crtc_prepare,
-	.commit = nvd0_crtc_commit,
-	.mode_fixup = nvd0_crtc_mode_fixup,
-	.mode_set = nvd0_crtc_mode_set,
-	.mode_set_base = nvd0_crtc_mode_set_base,
-	.mode_set_base_atomic = nvd0_crtc_mode_set_base_atomic,
-	.load_lut = nvd0_crtc_lut_load,
-};
-
-static const struct drm_crtc_funcs nvd0_crtc_func = {
-	.cursor_set = nvd0_crtc_cursor_set,
-	.cursor_move = nvd0_crtc_cursor_move,
-	.gamma_set = nvd0_crtc_gamma_set,
-	.set_config = drm_crtc_helper_set_config,
-	.destroy = nvd0_crtc_destroy,
-	.page_flip = nouveau_crtc_page_flip,
-};
-
-static void
-nvd0_cursor_set_pos(struct nouveau_crtc *nv_crtc, int x, int y)
-{
-}
-
-static void
-nvd0_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
-{
-}
-
-static int
-nvd0_crtc_create(struct drm_device *dev, int index)
-{
-	struct nouveau_crtc *nv_crtc;
-	struct drm_crtc *crtc;
-	int ret, i;
-
-	nv_crtc = kzalloc(sizeof(*nv_crtc), GFP_KERNEL);
-	if (!nv_crtc)
-		return -ENOMEM;
-
-	nv_crtc->index = index;
-	nv_crtc->set_dither = nvd0_crtc_set_dither;
-	nv_crtc->set_scale = nvd0_crtc_set_scale;
-	nv_crtc->cursor.set_offset = nvd0_cursor_set_offset;
-	nv_crtc->cursor.set_pos = nvd0_cursor_set_pos;
-	for (i = 0; i < 256; i++) {
-		nv_crtc->lut.r[i] = i << 8;
-		nv_crtc->lut.g[i] = i << 8;
-		nv_crtc->lut.b[i] = i << 8;
-	}
-
-	crtc = &nv_crtc->base;
-	drm_crtc_init(dev, crtc, &nvd0_crtc_func);
-	drm_crtc_helper_add(crtc, &nvd0_crtc_hfunc);
-	drm_mode_crtc_set_gamma_size(crtc, 256);
-
-	ret = nouveau_bo_new(dev, 64 * 64 * 4, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, &nv_crtc->cursor.nvbo);
-	if (!ret) {
-		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
-		if (!ret)
-			ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
-		if (ret)
-			nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
-	}
-
-	if (ret)
-		goto out;
-
-	ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, &nv_crtc->lut.nvbo);
-	if (!ret) {
-		ret = nouveau_bo_pin(nv_crtc->lut.nvbo, TTM_PL_FLAG_VRAM);
-		if (!ret)
-			ret = nouveau_bo_map(nv_crtc->lut.nvbo);
-		if (ret)
-			nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
-	}
-
-	if (ret)
-		goto out;
-
-	nvd0_crtc_lut_load(crtc);
-
-out:
-	if (ret)
-		nvd0_crtc_destroy(crtc);
-	return ret;
-}
-
-/******************************************************************************
- * DAC
- *****************************************************************************/
-static void
-nvd0_dac_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_device *device = nouveau_dev(dev);
-	int or = nv_encoder->or;
-	u32 dpms_ctrl;
-
-	dpms_ctrl = 0x80000000;
-	if (mode == DRM_MODE_DPMS_STANDBY || mode == DRM_MODE_DPMS_OFF)
-		dpms_ctrl |= 0x00000001;
-	if (mode == DRM_MODE_DPMS_SUSPEND || mode == DRM_MODE_DPMS_OFF)
-		dpms_ctrl |= 0x00000004;
-
-	nv_wait(device, 0x61a004 + (or * 0x0800), 0x80000000, 0x00000000);
-	nv_mask(device, 0x61a004 + (or * 0x0800), 0xc000007f, dpms_ctrl);
-	nv_wait(device, 0x61a004 + (or * 0x0800), 0x80000000, 0x00000000);
-}
-
-static bool
-nvd0_dac_mode_fixup(struct drm_encoder *encoder,
-		    const struct drm_display_mode *mode,
-		    struct drm_display_mode *adjusted_mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *nv_connector;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (nv_connector && nv_connector->native_mode) {
-		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
-			int id = adjusted_mode->base.id;
-			*adjusted_mode = *nv_connector->native_mode;
-			adjusted_mode->base.id = id;
-		}
-	}
-
-	return true;
-}
-
-static void
-nvd0_dac_commit(struct drm_encoder *encoder)
-{
-}
-
-static void
-nvd0_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
-		  struct drm_display_mode *adjusted_mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	u32 syncs, magic, *push;
-
-	syncs = 0x00000001;
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		syncs |= 0x00000008;
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		syncs |= 0x00000010;
-
-	magic = 0x31ec6000 | (nv_crtc->index << 25);
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-		magic |= 0x00000001;
-
-	nvd0_dac_dpms(encoder, DRM_MODE_DPMS_ON);
-
-	push = evo_wait(encoder->dev, EVO_MASTER, 8);
-	if (push) {
-		evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2);
-		evo_data(push, syncs);
-		evo_data(push, magic);
-		evo_mthd(push, 0x0180 + (nv_encoder->or * 0x020), 2);
-		evo_data(push, 1 << nv_crtc->index);
-		evo_data(push, 0x00ff);
-		evo_kick(push, encoder->dev, EVO_MASTER);
-	}
-
-	nv_encoder->crtc = encoder->crtc;
-}
-
-static void
-nvd0_dac_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	u32 *push;
-
-	if (nv_encoder->crtc) {
-		nvd0_crtc_prepare(nv_encoder->crtc);
-
-		push = evo_wait(dev, EVO_MASTER, 4);
-		if (push) {
-			evo_mthd(push, 0x0180 + (nv_encoder->or * 0x20), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-			evo_kick(push, dev, EVO_MASTER);
-		}
-
-		nv_encoder->crtc = NULL;
-	}
-}
-
-static enum drm_connector_status
-nvd0_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
-{
-	enum drm_connector_status status = connector_status_disconnected;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_device *device = nouveau_dev(dev);
-	int or = nv_encoder->or;
-	u32 load;
-
-	nv_wr32(device, 0x61a00c + (or * 0x800), 0x00100000);
-	udelay(9500);
-	nv_wr32(device, 0x61a00c + (or * 0x800), 0x80000000);
-
-	load = nv_rd32(device, 0x61a00c + (or * 0x800));
-	if ((load & 0x38000000) == 0x38000000)
-		status = connector_status_connected;
-
-	nv_wr32(device, 0x61a00c + (or * 0x800), 0x00000000);
-	return status;
-}
-
-static void
-nvd0_dac_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-	kfree(encoder);
-}
-
-static const struct drm_encoder_helper_funcs nvd0_dac_hfunc = {
-	.dpms = nvd0_dac_dpms,
-	.mode_fixup = nvd0_dac_mode_fixup,
-	.prepare = nvd0_dac_disconnect,
-	.commit = nvd0_dac_commit,
-	.mode_set = nvd0_dac_mode_set,
-	.disable = nvd0_dac_disconnect,
-	.get_crtc = nvd0_display_crtc_get,
-	.detect = nvd0_dac_detect
-};
-
-static const struct drm_encoder_funcs nvd0_dac_func = {
-	.destroy = nvd0_dac_destroy,
-};
-
-static int
-nvd0_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
-{
-	struct drm_device *dev = connector->dev;
-	struct nouveau_encoder *nv_encoder;
-	struct drm_encoder *encoder;
-
-	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
-	if (!nv_encoder)
-		return -ENOMEM;
-	nv_encoder->dcb = dcbe;
-	nv_encoder->or = ffs(dcbe->or) - 1;
-
-	encoder = to_drm_encoder(nv_encoder);
-	encoder->possible_crtcs = dcbe->heads;
-	encoder->possible_clones = 0;
-	drm_encoder_init(dev, encoder, &nvd0_dac_func, DRM_MODE_ENCODER_DAC);
-	drm_encoder_helper_add(encoder, &nvd0_dac_hfunc);
-
-	drm_mode_connector_attach_encoder(connector, encoder);
-	return 0;
-}
-
-/******************************************************************************
- * Audio
- *****************************************************************************/
-static void
-nvd0_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *nv_connector;
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_device *device = nouveau_dev(dev);
-	int i, or = nv_encoder->or * 0x30;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!drm_detect_monitor_audio(nv_connector->edid))
-		return;
-
-	nv_mask(device, 0x10ec10 + or, 0x80000003, 0x80000001);
-
-	drm_edid_to_eld(&nv_connector->base, nv_connector->edid);
-	if (nv_connector->base.eld[0]) {
-		u8 *eld = nv_connector->base.eld;
-
-		for (i = 0; i < eld[2] * 4; i++)
-			nv_wr32(device, 0x10ec00 + or, (i << 8) | eld[i]);
-		for (i = eld[2] * 4; i < 0x60; i++)
-			nv_wr32(device, 0x10ec00 + or, (i << 8) | 0x00);
-
-		nv_mask(device, 0x10ec10 + or, 0x80000002, 0x80000002);
-	}
-}
-
-static void
-nvd0_audio_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_device *device = nouveau_dev(dev);
-	int or = nv_encoder->or * 0x30;
-
-	nv_mask(device, 0x10ec10 + or, 0x80000003, 0x80000000);
-}
-
-/******************************************************************************
- * HDMI
- *****************************************************************************/
-static void
-nvd0_hdmi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	struct nouveau_connector *nv_connector;
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_device *device = nouveau_dev(dev);
-	int head = nv_crtc->index * 0x800;
-	u32 rekey = 56; /* binary driver, and tegra constant */
-	u32 max_ac_packet;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!drm_detect_hdmi_monitor(nv_connector->edid))
-		return;
-
-	max_ac_packet  = mode->htotal - mode->hdisplay;
-	max_ac_packet -= rekey;
-	max_ac_packet -= 18; /* constant from tegra */
-	max_ac_packet /= 32;
-
-	/* AVI InfoFrame */
-	nv_mask(device, 0x616714 + head, 0x00000001, 0x00000000);
-	nv_wr32(device, 0x61671c + head, 0x000d0282);
-	nv_wr32(device, 0x616720 + head, 0x0000006f);
-	nv_wr32(device, 0x616724 + head, 0x00000000);
-	nv_wr32(device, 0x616728 + head, 0x00000000);
-	nv_wr32(device, 0x61672c + head, 0x00000000);
-	nv_mask(device, 0x616714 + head, 0x00000001, 0x00000001);
-
-	/* ??? InfoFrame? */
-	nv_mask(device, 0x6167a4 + head, 0x00000001, 0x00000000);
-	nv_wr32(device, 0x6167ac + head, 0x00000010);
-	nv_mask(device, 0x6167a4 + head, 0x00000001, 0x00000001);
-
-	/* HDMI_CTRL */
-	nv_mask(device, 0x616798 + head, 0x401f007f, 0x40000000 | rekey |
-						  max_ac_packet << 16);
-
-	/* NFI, audio doesn't work without it though.. */
-	nv_mask(device, 0x616548 + head, 0x00000070, 0x00000000);
-
-	nvd0_audio_mode_set(encoder, mode);
-}
-
-static void
-nvd0_hdmi_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_device *device = nouveau_dev(dev);
-	int head = nv_crtc->index * 0x800;
-
-	nvd0_audio_disconnect(encoder);
-
-	nv_mask(device, 0x616798 + head, 0x40000000, 0x00000000);
-	nv_mask(device, 0x6167a4 + head, 0x00000001, 0x00000000);
-	nv_mask(device, 0x616714 + head, 0x00000001, 0x00000000);
-}
-
-/******************************************************************************
- * SOR
- *****************************************************************************/
-static inline u32
-nvd0_sor_dp_lane_map(struct drm_device *dev, struct dcb_output *dcb, u8 lane)
-{
-	static const u8 nvd0[] = { 16, 8, 0, 24 };
-	return nvd0[lane];
-}
-
-static void
-nvd0_sor_dp_train_set(struct drm_device *dev, struct dcb_output *dcb, u8 pattern)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
-	const u32 loff = (or * 0x800) + (link * 0x80);
-	nv_mask(device, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern);
-}
-
-static void
-nvd0_sor_dp_train_adj(struct drm_device *dev, struct dcb_output *dcb,
-		      u8 lane, u8 swing, u8 preem)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
-	const u32 loff = (or * 0x800) + (link * 0x80);
-	u32 shift = nvd0_sor_dp_lane_map(dev, dcb, lane);
-	u32 mask = 0x000000ff << shift;
-	u8 *table, *entry, *config = NULL;
-
-	switch (swing) {
-	case 0: preem += 0; break;
-	case 1: preem += 4; break;
-	case 2: preem += 7; break;
-	case 3: preem += 9; break;
-	}
-
-	table = nouveau_dp_bios_data(dev, dcb, &entry);
-	if (table) {
-		if (table[0] == 0x30) {
-			config  = entry + table[4];
-			config += table[5] * preem;
-		} else
-		if (table[0] == 0x40) {
-			config  = table + table[1];
-			config += table[2] * table[3];
-			config += table[6] * preem;
-		}
-	}
-
-	if (!config) {
-		NV_ERROR(drm, "PDISP: unsupported DP table for chipset\n");
-		return;
-	}
-
-	nv_mask(device, 0x61c118 + loff, mask, config[1] << shift);
-	nv_mask(device, 0x61c120 + loff, mask, config[2] << shift);
-	nv_mask(device, 0x61c130 + loff, 0x0000ff00, config[3] << 8);
-	nv_mask(device, 0x61c13c + loff, 0x00000000, 0x00000000);
-}
-
-static void
-nvd0_sor_dp_link_set(struct drm_device *dev, struct dcb_output *dcb, int crtc,
-		     int link_nr, u32 link_bw, bool enhframe)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
-	const u32 loff = (or * 0x800) + (link * 0x80);
-	const u32 soff = (or * 0x800);
-	u32 dpctrl = nv_rd32(device, 0x61c10c + loff) & ~0x001f4000;
-	u32 clksor = nv_rd32(device, 0x612300 + soff) & ~0x007c0000;
-	u32 script = 0x0000, lane_mask = 0;
-	u8 *table, *entry;
-	int i;
-
-	link_bw /= 27000;
-
-	table = nouveau_dp_bios_data(dev, dcb, &entry);
-	if (table) {
-		if      (table[0] == 0x30) entry = ROMPTR(dev, entry[10]);
-		else if (table[0] == 0x40) entry = ROMPTR(dev, entry[9]);
-		else                       entry = NULL;
-
-		while (entry) {
-			if (entry[0] >= link_bw)
-				break;
-			entry += 3;
-		}
-
-		nouveau_bios_run_init_table(dev, script, dcb, crtc);
-	}
-
-	clksor |= link_bw << 18;
-	dpctrl |= ((1 << link_nr) - 1) << 16;
-	if (enhframe)
-		dpctrl |= 0x00004000;
-
-	for (i = 0; i < link_nr; i++)
-		lane_mask |= 1 << (nvd0_sor_dp_lane_map(dev, dcb, i) >> 3);
-
-	nv_wr32(device, 0x612300 + soff, clksor);
-	nv_wr32(device, 0x61c10c + loff, dpctrl);
-	nv_mask(device, 0x61c130 + loff, 0x0000000f, lane_mask);
-}
-
-static void
-nvd0_sor_dp_link_get(struct drm_device *dev, struct dcb_output *dcb,
-		     u32 *link_nr, u32 *link_bw)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	const u32 or = ffs(dcb->or) - 1, link = !(dcb->sorconf.link & 1);
-	const u32 loff = (or * 0x800) + (link * 0x80);
-	const u32 soff = (or * 0x800);
-	u32 dpctrl = nv_rd32(device, 0x61c10c + loff) & 0x000f0000;
-	u32 clksor = nv_rd32(device, 0x612300 + soff);
-
-	if      (dpctrl > 0x00030000) *link_nr = 4;
-	else if (dpctrl > 0x00010000) *link_nr = 2;
-	else			      *link_nr = 1;
-
-	*link_bw  = (clksor & 0x007c0000) >> 18;
-	*link_bw *= 27000;
-}
-
-static void
-nvd0_sor_dp_calc_tu(struct drm_device *dev, struct dcb_output *dcb,
-		    u32 crtc, u32 datarate)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	const u32 symbol = 100000;
-	const u32 TU = 64;
-	u32 link_nr, link_bw;
-	u64 ratio, value;
-
-	nvd0_sor_dp_link_get(dev, dcb, &link_nr, &link_bw);
-
-	ratio  = datarate;
-	ratio *= symbol;
-	do_div(ratio, link_nr * link_bw);
-
-	value  = (symbol - ratio) * TU;
-	value *= ratio;
-	do_div(value, symbol);
-	do_div(value, symbol);
-
-	value += 5;
-	value |= 0x08000000;
-
-	nv_wr32(device, 0x616610 + (crtc * 0x800), value);
-}
-
-static void
-nvd0_sor_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct drm_encoder *partner;
-	int or = nv_encoder->or;
-	u32 dpms_ctrl;
-
-	nv_encoder->last_dpms = mode;
-
-	list_for_each_entry(partner, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nv_partner = nouveau_encoder(partner);
-
-		if (partner->encoder_type != DRM_MODE_ENCODER_TMDS)
-			continue;
-
-		if (nv_partner != nv_encoder &&
-		    nv_partner->dcb->or == nv_encoder->dcb->or) {
-			if (nv_partner->last_dpms == DRM_MODE_DPMS_ON)
-				return;
-			break;
-		}
-	}
-
-	dpms_ctrl  = (mode == DRM_MODE_DPMS_ON);
-	dpms_ctrl |= 0x80000000;
-
-	nv_wait(device, 0x61c004 + (or * 0x0800), 0x80000000, 0x00000000);
-	nv_mask(device, 0x61c004 + (or * 0x0800), 0x80000001, dpms_ctrl);
-	nv_wait(device, 0x61c004 + (or * 0x0800), 0x80000000, 0x00000000);
-	nv_wait(device, 0x61c030 + (or * 0x0800), 0x10000000, 0x00000000);
-
-	if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
-		struct dp_train_func func = {
-			.link_set = nvd0_sor_dp_link_set,
-			.train_set = nvd0_sor_dp_train_set,
-			.train_adj = nvd0_sor_dp_train_adj
-		};
-
-		nouveau_dp_dpms(encoder, mode, nv_encoder->dp.datarate, &func);
-	}
-}
-
-static bool
-nvd0_sor_mode_fixup(struct drm_encoder *encoder,
-		    const struct drm_display_mode *mode,
-		    struct drm_display_mode *adjusted_mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *nv_connector;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (nv_connector && nv_connector->native_mode) {
-		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
-			int id = adjusted_mode->base.id;
-			*adjusted_mode = *nv_connector->native_mode;
-			adjusted_mode->base.id = id;
-		}
-	}
-
-	return true;
-}
-
-static void
-nvd0_sor_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	u32 *push;
-
-	if (nv_encoder->crtc) {
-		nvd0_crtc_prepare(nv_encoder->crtc);
-
-		push = evo_wait(dev, EVO_MASTER, 4);
-		if (push) {
-			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-			evo_kick(push, dev, EVO_MASTER);
-		}
-
-		nvd0_hdmi_disconnect(encoder);
-
-		nv_encoder->crtc = NULL;
-		nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
-	}
-}
-
-static void
-nvd0_sor_prepare(struct drm_encoder *encoder)
-{
-	nvd0_sor_disconnect(encoder);
-	if (nouveau_encoder(encoder)->dcb->type == DCB_OUTPUT_DP)
-		evo_sync(encoder->dev, EVO_MASTER);
-}
-
-static void
-nvd0_sor_commit(struct drm_encoder *encoder)
-{
-}
-
-static void
-nvd0_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
-		  struct drm_display_mode *mode)
-{
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	struct nouveau_connector *nv_connector;
-	struct nvbios *bios = &drm->vbios;
-	u32 mode_ctrl = (1 << nv_crtc->index);
-	u32 syncs, magic, *push;
-	u32 or_config;
-
-	syncs = 0x00000001;
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		syncs |= 0x00000008;
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		syncs |= 0x00000010;
-
-	magic = 0x31ec6000 | (nv_crtc->index << 25);
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-		magic |= 0x00000001;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	switch (nv_encoder->dcb->type) {
-	case DCB_OUTPUT_TMDS:
-		if (nv_encoder->dcb->sorconf.link & 1) {
-			if (mode->clock < 165000)
-				mode_ctrl |= 0x00000100;
-			else
-				mode_ctrl |= 0x00000500;
-		} else {
-			mode_ctrl |= 0x00000200;
-		}
-
-		or_config = (mode_ctrl & 0x00000f00) >> 8;
-		if (mode->clock >= 165000)
-			or_config |= 0x0100;
-
-		nvd0_hdmi_mode_set(encoder, mode);
-		break;
-	case DCB_OUTPUT_LVDS:
-		or_config = (mode_ctrl & 0x00000f00) >> 8;
-		if (bios->fp_no_ddc) {
-			if (bios->fp.dual_link)
-				or_config |= 0x0100;
-			if (bios->fp.if_is_24bit)
-				or_config |= 0x0200;
-		} else {
-			if (nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) {
-				if (((u8 *)nv_connector->edid)[121] == 2)
-					or_config |= 0x0100;
-			} else
-			if (mode->clock >= bios->fp.duallink_transition_clk) {
-				or_config |= 0x0100;
-			}
-
-			if (or_config & 0x0100) {
-				if (bios->fp.strapless_is_24bit & 2)
-					or_config |= 0x0200;
-			} else {
-				if (bios->fp.strapless_is_24bit & 1)
-					or_config |= 0x0200;
-			}
-
-			if (nv_connector->base.display_info.bpc == 8)
-				or_config |= 0x0200;
-
-		}
-		break;
-	case DCB_OUTPUT_DP:
-		if (nv_connector->base.display_info.bpc == 6) {
-			nv_encoder->dp.datarate = mode->clock * 18 / 8;
-			syncs |= 0x00000002 << 6;
-		} else {
-			nv_encoder->dp.datarate = mode->clock * 24 / 8;
-			syncs |= 0x00000005 << 6;
-		}
-
-		if (nv_encoder->dcb->sorconf.link & 1)
-			mode_ctrl |= 0x00000800;
-		else
-			mode_ctrl |= 0x00000900;
-
-		or_config = (mode_ctrl & 0x00000f00) >> 8;
-		break;
-	default:
-		BUG_ON(1);
-		break;
-	}
-
-	nvd0_sor_dpms(encoder, DRM_MODE_DPMS_ON);
-
-	if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
-		nvd0_sor_dp_calc_tu(dev, nv_encoder->dcb, nv_crtc->index,
-					 nv_encoder->dp.datarate);
-	}
-
-	push = evo_wait(dev, EVO_MASTER, 8);
-	if (push) {
-		evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2);
-		evo_data(push, syncs);
-		evo_data(push, magic);
-		evo_mthd(push, 0x0200 + (nv_encoder->or * 0x020), 2);
-		evo_data(push, mode_ctrl);
-		evo_data(push, or_config);
-		evo_kick(push, dev, EVO_MASTER);
-	}
-
-	nv_encoder->crtc = encoder->crtc;
-}
-
-static void
-nvd0_sor_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-	kfree(encoder);
-}
-
-static const struct drm_encoder_helper_funcs nvd0_sor_hfunc = {
-	.dpms = nvd0_sor_dpms,
-	.mode_fixup = nvd0_sor_mode_fixup,
-	.prepare = nvd0_sor_prepare,
-	.commit = nvd0_sor_commit,
-	.mode_set = nvd0_sor_mode_set,
-	.disable = nvd0_sor_disconnect,
-	.get_crtc = nvd0_display_crtc_get,
-};
-
-static const struct drm_encoder_funcs nvd0_sor_func = {
-	.destroy = nvd0_sor_destroy,
-};
-
-static int
-nvd0_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
-{
-	struct drm_device *dev = connector->dev;
-	struct nouveau_encoder *nv_encoder;
-	struct drm_encoder *encoder;
-
-	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
-	if (!nv_encoder)
-		return -ENOMEM;
-	nv_encoder->dcb = dcbe;
-	nv_encoder->or = ffs(dcbe->or) - 1;
-	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
-
-	encoder = to_drm_encoder(nv_encoder);
-	encoder->possible_crtcs = dcbe->heads;
-	encoder->possible_clones = 0;
-	drm_encoder_init(dev, encoder, &nvd0_sor_func, DRM_MODE_ENCODER_TMDS);
-	drm_encoder_helper_add(encoder, &nvd0_sor_hfunc);
-
-	drm_mode_connector_attach_encoder(connector, encoder);
-	return 0;
-}
-
-/******************************************************************************
- * IRQ
- *****************************************************************************/
-static struct dcb_output *
-lookup_dcb(struct drm_device *dev, int id, u32 mc)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	int type, or, i, link = -1;
-
-	if (id < 4) {
-		type = DCB_OUTPUT_ANALOG;
-		or   = id;
-	} else {
-		switch (mc & 0x00000f00) {
-		case 0x00000000: link = 0; type = DCB_OUTPUT_LVDS; break;
-		case 0x00000100: link = 0; type = DCB_OUTPUT_TMDS; break;
-		case 0x00000200: link = 1; type = DCB_OUTPUT_TMDS; break;
-		case 0x00000500: link = 0; type = DCB_OUTPUT_TMDS; break;
-		case 0x00000800: link = 0; type = DCB_OUTPUT_DP; break;
-		case 0x00000900: link = 1; type = DCB_OUTPUT_DP; break;
-		default:
-			NV_ERROR(drm, "PDISP: unknown SOR mc 0x%08x\n", mc);
-			return NULL;
-		}
-
-		or = id - 4;
-	}
-
-	for (i = 0; i < drm->vbios.dcb.entries; i++) {
-		struct dcb_output *dcb = &drm->vbios.dcb.entry[i];
-		if (dcb->type == type && (dcb->or & (1 << or)) &&
-		    (link < 0 || link == !(dcb->sorconf.link & 1)))
-			return dcb;
-	}
-
-	NV_ERROR(drm, "PDISP: DCB for %d/0x%08x not found\n", id, mc);
-	return NULL;
-}
-
-static void
-nvd0_display_unk1_handler(struct drm_device *dev, u32 crtc, u32 mask)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct dcb_output *dcb;
-	int i;
-
-	for (i = 0; mask && i < 8; i++) {
-		u32 mcc = nv_rd32(device, 0x640180 + (i * 0x20));
-		if (!(mcc & (1 << crtc)))
-			continue;
-
-		dcb = lookup_dcb(dev, i, mcc);
-		if (!dcb)
-			continue;
-
-		nouveau_bios_run_display_table(dev, 0x0000, -1, dcb, crtc);
-	}
-
-	nv_wr32(device, 0x6101d4, 0x00000000);
-	nv_wr32(device, 0x6109d4, 0x00000000);
-	nv_wr32(device, 0x6101d0, 0x80000000);
-}
-
-static void
-nvd0_display_unk2_handler(struct drm_device *dev, u32 crtc, u32 mask)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct dcb_output *dcb;
-	u32 or, tmp, pclk;
-	int i;
-
-	for (i = 0; mask && i < 8; i++) {
-		u32 mcc = nv_rd32(device, 0x640180 + (i * 0x20));
-		if (!(mcc & (1 << crtc)))
-			continue;
-
-		dcb = lookup_dcb(dev, i, mcc);
-		if (!dcb)
-			continue;
-
-		nouveau_bios_run_display_table(dev, 0x0000, -2, dcb, crtc);
-	}
-
-	pclk = nv_rd32(device, 0x660450 + (crtc * 0x300)) / 1000;
-	NV_DEBUG(drm, "PDISP: crtc %d pclk %d mask 0x%08x\n",
-			  crtc, pclk, mask);
-	if (pclk && (mask & 0x00010000)) {
-		nv50_crtc_set_clock(dev, crtc, pclk);
-	}
-
-	for (i = 0; mask && i < 8; i++) {
-		u32 mcp = nv_rd32(device, 0x660180 + (i * 0x20));
-		u32 cfg = nv_rd32(device, 0x660184 + (i * 0x20));
-		if (!(mcp & (1 << crtc)))
-			continue;
-
-		dcb = lookup_dcb(dev, i, mcp);
-		if (!dcb)
-			continue;
-		or = ffs(dcb->or) - 1;
-
-		nouveau_bios_run_display_table(dev, cfg, pclk, dcb, crtc);
-
-		nv_wr32(device, 0x612200 + (crtc * 0x800), 0x00000000);
-		switch (dcb->type) {
-		case DCB_OUTPUT_ANALOG:
-			nv_wr32(device, 0x612280 + (or * 0x800), 0x00000000);
-			break;
-		case DCB_OUTPUT_TMDS:
-		case DCB_OUTPUT_LVDS:
-		case DCB_OUTPUT_DP:
-			if (cfg & 0x00000100)
-				tmp = 0x00000101;
-			else
-				tmp = 0x00000000;
-
-			nv_mask(device, 0x612300 + (or * 0x800), 0x00000707, tmp);
-			break;
-		default:
-			break;
-		}
-
-		break;
-	}
-
-	nv_wr32(device, 0x6101d4, 0x00000000);
-	nv_wr32(device, 0x6109d4, 0x00000000);
-	nv_wr32(device, 0x6101d0, 0x80000000);
-}
-
-static void
-nvd0_display_unk4_handler(struct drm_device *dev, u32 crtc, u32 mask)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct dcb_output *dcb;
-	int pclk, i;
-
-	pclk = nv_rd32(device, 0x660450 + (crtc * 0x300)) / 1000;
-
-	for (i = 0; mask && i < 8; i++) {
-		u32 mcp = nv_rd32(device, 0x660180 + (i * 0x20));
-		u32 cfg = nv_rd32(device, 0x660184 + (i * 0x20));
-		if (!(mcp & (1 << crtc)))
-			continue;
-
-		dcb = lookup_dcb(dev, i, mcp);
-		if (!dcb)
-			continue;
-
-		nouveau_bios_run_display_table(dev, cfg, -pclk, dcb, crtc);
-	}
-
-	nv_wr32(device, 0x6101d4, 0x00000000);
-	nv_wr32(device, 0x6109d4, 0x00000000);
-	nv_wr32(device, 0x6101d0, 0x80000000);
-}
-
-static void
-nvd0_display_bh(unsigned long data)
-{
-	struct drm_device *dev = (struct drm_device *)data;
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvd0_display *disp = nvd0_display(dev);
-	u32 mask = 0, crtc = ~0;
-	int i;
-
-	if (drm_debug & (DRM_UT_DRIVER | DRM_UT_KMS)) {
-		NV_INFO(drm, "PDISP: modeset req %d\n", disp->modeset);
-		NV_INFO(drm, " STAT: 0x%08x 0x%08x 0x%08x\n",
-			 nv_rd32(device, 0x6101d0),
-			 nv_rd32(device, 0x6101d4), nv_rd32(device, 0x6109d4));
-		for (i = 0; i < 8; i++) {
-			NV_INFO(drm, " %s%d: 0x%08x 0x%08x\n",
-				i < 4 ? "DAC" : "SOR", i,
-				nv_rd32(device, 0x640180 + (i * 0x20)),
-				nv_rd32(device, 0x660180 + (i * 0x20)));
-		}
-	}
-
-	while (!mask && ++crtc < dev->mode_config.num_crtc)
-		mask = nv_rd32(device, 0x6101d4 + (crtc * 0x800));
-
-	if (disp->modeset & 0x00000001)
-		nvd0_display_unk1_handler(dev, crtc, mask);
-	if (disp->modeset & 0x00000002)
-		nvd0_display_unk2_handler(dev, crtc, mask);
-	if (disp->modeset & 0x00000004)
-		nvd0_display_unk4_handler(dev, crtc, mask);
-}
-
-void
-nvd0_display_intr(struct drm_device *dev)
-{
-	struct nvd0_display *disp = nvd0_display(dev);
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	u32 intr = nv_rd32(device, 0x610088);
-
-	if (intr & 0x00000001) {
-		u32 stat = nv_rd32(device, 0x61008c);
-		nv_wr32(device, 0x61008c, stat);
-		intr &= ~0x00000001;
-	}
-
-	if (intr & 0x00000002) {
-		u32 stat = nv_rd32(device, 0x61009c);
-		int chid = ffs(stat) - 1;
-		if (chid >= 0) {
-			u32 mthd = nv_rd32(device, 0x6101f0 + (chid * 12));
-			u32 data = nv_rd32(device, 0x6101f4 + (chid * 12));
-			u32 unkn = nv_rd32(device, 0x6101f8 + (chid * 12));
-
-			NV_INFO(drm, "EvoCh: chid %d mthd 0x%04x data 0x%08x "
-				     "0x%08x 0x%08x\n",
-				chid, (mthd & 0x0000ffc), data, mthd, unkn);
-			nv_wr32(device, 0x61009c, (1 << chid));
-			nv_wr32(device, 0x6101f0 + (chid * 12), 0x90000000);
-		}
-
-		intr &= ~0x00000002;
-	}
-
-	if (intr & 0x00100000) {
-		u32 stat = nv_rd32(device, 0x6100ac);
-
-		if (stat & 0x00000007) {
-			disp->modeset = stat;
-			tasklet_schedule(&disp->tasklet);
-
-			nv_wr32(device, 0x6100ac, (stat & 0x00000007));
-			stat &= ~0x00000007;
-		}
-
-		if (stat) {
-			NV_INFO(drm, "PDISP: unknown intr24 0x%08x\n", stat);
-			nv_wr32(device, 0x6100ac, stat);
-		}
-
-		intr &= ~0x00100000;
-	}
-
-	intr &= ~0x0f000000; /* vblank, handled in core */
-	if (intr)
-		NV_INFO(drm, "PDISP: unknown intr 0x%08x\n", intr);
-}
-
-/******************************************************************************
- * Init
- *****************************************************************************/
-void
-nvd0_display_fini(struct drm_device *dev)
-{
-	int i;
-
-	/* fini cursors + overlays + flips */
-	for (i = 1; i >= 0; i--) {
-		evo_fini_pio(dev, EVO_CURS(i));
-		evo_fini_pio(dev, EVO_OIMM(i));
-		evo_fini_dma(dev, EVO_OVLY(i));
-		evo_fini_dma(dev, EVO_FLIP(i));
-	}
-
-	/* fini master */
-	evo_fini_dma(dev, EVO_MASTER);
-}
-
-int
-nvd0_display_init(struct drm_device *dev)
-{
-	struct nvd0_display *disp = nvd0_display(dev);
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	int ret, i;
-	u32 *push;
-
-	if (nv_rd32(device, 0x6100ac) & 0x00000100) {
-		nv_wr32(device, 0x6100ac, 0x00000100);
-		nv_mask(device, 0x6194e8, 0x00000001, 0x00000000);
-		if (!nv_wait(device, 0x6194e8, 0x00000002, 0x00000000)) {
-			NV_ERROR(drm, "PDISP: 0x6194e8 0x%08x\n",
-				 nv_rd32(device, 0x6194e8));
-			return -EBUSY;
-		}
-	}
-
-	/* nfi what these are exactly, i do know that SOR_MODE_CTRL won't
-	 * work at all unless you do the SOR part below.
-	 */
-	for (i = 0; i < 3; i++) {
-		u32 dac = nv_rd32(device, 0x61a000 + (i * 0x800));
-		nv_wr32(device, 0x6101c0 + (i * 0x800), dac);
-	}
-
-	for (i = 0; i < 4; i++) {
-		u32 sor = nv_rd32(device, 0x61c000 + (i * 0x800));
-		nv_wr32(device, 0x6301c4 + (i * 0x800), sor);
-	}
-
-	for (i = 0; i < dev->mode_config.num_crtc; i++) {
-		u32 crtc0 = nv_rd32(device, 0x616104 + (i * 0x800));
-		u32 crtc1 = nv_rd32(device, 0x616108 + (i * 0x800));
-		u32 crtc2 = nv_rd32(device, 0x61610c + (i * 0x800));
-		nv_wr32(device, 0x6101b4 + (i * 0x800), crtc0);
-		nv_wr32(device, 0x6101b8 + (i * 0x800), crtc1);
-		nv_wr32(device, 0x6101bc + (i * 0x800), crtc2);
-	}
-
-	/* point at our hash table / objects, enable interrupts */
-	nv_wr32(device, 0x610010, (disp->mem->addr >> 8) | 9);
-	nv_mask(device, 0x6100b0, 0x00000307, 0x00000307);
-
-	/* init master */
-	ret = evo_init_dma(dev, EVO_MASTER);
-	if (ret)
-		goto error;
-
-	/* init flips + overlays + cursors */
-	for (i = 0; i < dev->mode_config.num_crtc; i++) {
-		if ((ret = evo_init_dma(dev, EVO_FLIP(i))) ||
-		    (ret = evo_init_dma(dev, EVO_OVLY(i))) ||
-		    (ret = evo_init_pio(dev, EVO_OIMM(i))) ||
-		    (ret = evo_init_pio(dev, EVO_CURS(i))))
-			goto error;
-	}
-
-	push = evo_wait(dev, EVO_MASTER, 32);
-	if (!push) {
-		ret = -EBUSY;
-		goto error;
-	}
-	evo_mthd(push, 0x0088, 1);
-	evo_data(push, NvEvoSync);
-	evo_mthd(push, 0x0084, 1);
-	evo_data(push, 0x00000000);
-	evo_mthd(push, 0x0084, 1);
-	evo_data(push, 0x80000000);
-	evo_mthd(push, 0x008c, 1);
-	evo_data(push, 0x00000000);
-	evo_kick(push, dev, EVO_MASTER);
-
-error:
-	if (ret)
-		nvd0_display_fini(dev);
-	return ret;
-}
-
-void
-nvd0_display_destroy(struct drm_device *dev)
-{
-	struct nvd0_display *disp = nvd0_display(dev);
-	struct pci_dev *pdev = dev->pdev;
-	int i;
-
-	for (i = 0; i < EVO_DMA_NR; i++) {
-		struct evo *evo = &disp->evo[i];
-		pci_free_consistent(pdev, PAGE_SIZE, evo->ptr, evo->handle);
-	}
-
-	nouveau_gpuobj_ref(NULL, &disp->mem);
-	nouveau_bo_unmap(disp->sync);
-	nouveau_bo_ref(NULL, &disp->sync);
-
-	nouveau_display(dev)->priv = NULL;
-	kfree(disp);
-}
-
-int
-nvd0_display_create(struct drm_device *dev)
-{
-	struct nouveau_device *device = nouveau_dev(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_bar *bar = nouveau_bar(device);
-	struct nouveau_fb *pfb = nouveau_fb(device);
-	struct dcb_table *dcb = &drm->vbios.dcb;
-	struct drm_connector *connector, *tmp;
-	struct pci_dev *pdev = dev->pdev;
-	struct nvd0_display *disp;
-	struct dcb_output *dcbe;
-	int crtcs, ret, i;
-
-	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
-	if (!disp)
-		return -ENOMEM;
-
-	nouveau_display(dev)->priv = disp;
-	nouveau_display(dev)->dtor = nvd0_display_destroy;
-	nouveau_display(dev)->init = nvd0_display_init;
-	nouveau_display(dev)->fini = nvd0_display_fini;
-
-	/* create crtc objects to represent the hw heads */
-	crtcs = nv_rd32(device, 0x022448);
-	for (i = 0; i < crtcs; i++) {
-		ret = nvd0_crtc_create(dev, i);
-		if (ret)
-			goto out;
-	}
-
-	/* create encoder/connector objects based on VBIOS DCB table */
-	for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) {
-		connector = nouveau_connector_create(dev, dcbe->connector);
-		if (IS_ERR(connector))
-			continue;
-
-		if (dcbe->location != DCB_LOC_ON_CHIP) {
-			NV_WARN(drm, "skipping off-chip encoder %d/%d\n",
-				dcbe->type, ffs(dcbe->or) - 1);
-			continue;
-		}
-
-		switch (dcbe->type) {
-		case DCB_OUTPUT_TMDS:
-		case DCB_OUTPUT_LVDS:
-		case DCB_OUTPUT_DP:
-			nvd0_sor_create(connector, dcbe);
-			break;
-		case DCB_OUTPUT_ANALOG:
-			nvd0_dac_create(connector, dcbe);
-			break;
-		default:
-			NV_WARN(drm, "skipping unsupported encoder %d/%d\n",
-				dcbe->type, ffs(dcbe->or) - 1);
-			continue;
-		}
-	}
-
-	/* cull any connectors we created that don't have an encoder */
-	list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) {
-		if (connector->encoder_ids[0])
-			continue;
-
-		NV_WARN(drm, "%s has no encoders, removing\n",
-			drm_get_connector_name(connector));
-		connector->funcs->destroy(connector);
-	}
-
-	/* setup interrupt handling */
-	tasklet_init(&disp->tasklet, nvd0_display_bh, (unsigned long)dev);
-
-	/* small shared memory area we use for notifiers and semaphores */
-	ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, &disp->sync);
-	if (!ret) {
-		ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM);
-		if (!ret)
-			ret = nouveau_bo_map(disp->sync);
-		if (ret)
-			nouveau_bo_ref(NULL, &disp->sync);
-	}
-
-	if (ret)
-		goto out;
-
-	/* hash table and dma objects for the memory areas we care about */
-	ret = nouveau_gpuobj_new(nv_object(device), NULL, 0x4000, 0x10000,
-				 NVOBJ_FLAG_ZERO_ALLOC, &disp->mem);
-	if (ret)
-		goto out;
-
-	/* create evo dma channels */
-	for (i = 0; i < EVO_DMA_NR; i++) {
-		struct evo *evo = &disp->evo[i];
-		u64 offset = disp->sync->bo.offset;
-		u32 dmao = 0x1000 + (i * 0x100);
-		u32 hash = 0x0000 + (i * 0x040);
-
-		evo->idx = i;
-		evo->sem.offset = EVO_SYNC(evo->idx, 0x00);
-		evo->ptr = pci_alloc_consistent(pdev, PAGE_SIZE, &evo->handle);
-		if (!evo->ptr) {
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		nv_wo32(disp->mem, dmao + 0x00, 0x00000049);
-		nv_wo32(disp->mem, dmao + 0x04, (offset + 0x0000) >> 8);
-		nv_wo32(disp->mem, dmao + 0x08, (offset + 0x0fff) >> 8);
-		nv_wo32(disp->mem, dmao + 0x0c, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x10, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x14, 0x00000000);
-		nv_wo32(disp->mem, hash + 0x00, NvEvoSync);
-		nv_wo32(disp->mem, hash + 0x04, 0x00000001 | (i << 27) |
-						((dmao + 0x00) << 9));
-
-		nv_wo32(disp->mem, dmao + 0x20, 0x00000049);
-		nv_wo32(disp->mem, dmao + 0x24, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x28, (pfb->ram.size - 1) >> 8);
-		nv_wo32(disp->mem, dmao + 0x2c, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x30, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x34, 0x00000000);
-		nv_wo32(disp->mem, hash + 0x08, NvEvoVRAM);
-		nv_wo32(disp->mem, hash + 0x0c, 0x00000001 | (i << 27) |
-						((dmao + 0x20) << 9));
-
-		nv_wo32(disp->mem, dmao + 0x40, 0x00000009);
-		nv_wo32(disp->mem, dmao + 0x44, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x48, (pfb->ram.size - 1) >> 8);
-		nv_wo32(disp->mem, dmao + 0x4c, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x50, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x54, 0x00000000);
-		nv_wo32(disp->mem, hash + 0x10, NvEvoVRAM_LP);
-		nv_wo32(disp->mem, hash + 0x14, 0x00000001 | (i << 27) |
-						((dmao + 0x40) << 9));
-
-		nv_wo32(disp->mem, dmao + 0x60, 0x0fe00009);
-		nv_wo32(disp->mem, dmao + 0x64, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x68, (pfb->ram.size - 1) >> 8);
-		nv_wo32(disp->mem, dmao + 0x6c, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x70, 0x00000000);
-		nv_wo32(disp->mem, dmao + 0x74, 0x00000000);
-		nv_wo32(disp->mem, hash + 0x18, NvEvoFB32);
-		nv_wo32(disp->mem, hash + 0x1c, 0x00000001 | (i << 27) |
-						((dmao + 0x60) << 9));
-	}
-
-	bar->flush(bar);
-
-out:
-	if (ret)
-		nvd0_display_destroy(dev);
-	return ret;
-}

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 24d932f..9175615 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c

@@ -561,6 +561,8 @@
 		/* use frac fb div on APUs */
 		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
+		if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
+			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 	} else {
 		radeon_crtc->pll_flags |= RADEON_PLL_LEGACY;
 

diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index d5699fe..064023b 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c

@@ -34,8 +34,7 @@
 
 /* move these to drm_dp_helper.c/h */
 #define DP_LINK_CONFIGURATION_SIZE 9
-#define DP_LINK_STATUS_SIZE	   6
-#define DP_DPCD_SIZE	           8
+#define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE
 
 static char *voltage_names[] = {
         "0.4V", "0.6V", "0.8V", "1.2V"
@@ -290,78 +289,6 @@
 
 /***** general DP utility functions *****/
 
-static u8 dp_link_status(u8 link_status[DP_LINK_STATUS_SIZE], int r)
-{
-	return link_status[r - DP_LANE0_1_STATUS];
-}
-
-static u8 dp_get_lane_status(u8 link_status[DP_LINK_STATUS_SIZE],
-			     int lane)
-{
-	int i = DP_LANE0_1_STATUS + (lane >> 1);
-	int s = (lane & 1) * 4;
-	u8 l = dp_link_status(link_status, i);
-	return (l >> s) & 0xf;
-}
-
-static bool dp_clock_recovery_ok(u8 link_status[DP_LINK_STATUS_SIZE],
-				 int lane_count)
-{
-	int lane;
-	u8 lane_status;
-
-	for (lane = 0; lane < lane_count; lane++) {
-		lane_status = dp_get_lane_status(link_status, lane);
-		if ((lane_status & DP_LANE_CR_DONE) == 0)
-			return false;
-	}
-	return true;
-}
-
-static bool dp_channel_eq_ok(u8 link_status[DP_LINK_STATUS_SIZE],
-			     int lane_count)
-{
-	u8 lane_align;
-	u8 lane_status;
-	int lane;
-
-	lane_align = dp_link_status(link_status,
-				    DP_LANE_ALIGN_STATUS_UPDATED);
-	if ((lane_align & DP_INTERLANE_ALIGN_DONE) == 0)
-		return false;
-	for (lane = 0; lane < lane_count; lane++) {
-		lane_status = dp_get_lane_status(link_status, lane);
-		if ((lane_status & DP_CHANNEL_EQ_BITS) != DP_CHANNEL_EQ_BITS)
-			return false;
-	}
-	return true;
-}
-
-static u8 dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE],
-					int lane)
-
-{
-	int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
-	int s = ((lane & 1) ?
-		 DP_ADJUST_VOLTAGE_SWING_LANE1_SHIFT :
-		 DP_ADJUST_VOLTAGE_SWING_LANE0_SHIFT);
-	u8 l = dp_link_status(link_status, i);
-
-	return ((l >> s) & 0x3) << DP_TRAIN_VOLTAGE_SWING_SHIFT;
-}
-
-static u8 dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE],
-					     int lane)
-{
-	int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
-	int s = ((lane & 1) ?
-		 DP_ADJUST_PRE_EMPHASIS_LANE1_SHIFT :
-		 DP_ADJUST_PRE_EMPHASIS_LANE0_SHIFT);
-	u8 l = dp_link_status(link_status, i);
-
-	return ((l >> s) & 0x3) << DP_TRAIN_PRE_EMPHASIS_SHIFT;
-}
-
 #define DP_VOLTAGE_MAX         DP_TRAIN_VOLTAGE_SWING_1200
 #define DP_PRE_EMPHASIS_MAX    DP_TRAIN_PRE_EMPHASIS_9_5
 
@@ -374,8 +301,8 @@
 	int lane;
 
 	for (lane = 0; lane < lane_count; lane++) {
-		u8 this_v = dp_get_adjust_request_voltage(link_status, lane);
-		u8 this_p = dp_get_adjust_request_pre_emphasis(link_status, lane);
+		u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
+		u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
 
 		DRM_DEBUG_KMS("requested signal parameters: lane %d voltage %s pre_emph %s\n",
 			  lane,
@@ -420,37 +347,6 @@
 	return (link_rate * lane_num * 8) / bpp;
 }
 
-static int dp_get_max_link_rate(u8 dpcd[DP_DPCD_SIZE])
-{
-	switch (dpcd[DP_MAX_LINK_RATE]) {
-	case DP_LINK_BW_1_62:
-	default:
-		return 162000;
-	case DP_LINK_BW_2_7:
-		return 270000;
-	case DP_LINK_BW_5_4:
-		return 540000;
-	}
-}
-
-static u8 dp_get_max_lane_number(u8 dpcd[DP_DPCD_SIZE])
-{
-	return dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK;
-}
-
-static u8 dp_get_dp_link_rate_coded(int link_rate)
-{
-	switch (link_rate) {
-	case 162000:
-	default:
-		return DP_LINK_BW_1_62;
-	case 270000:
-		return DP_LINK_BW_2_7;
-	case 540000:
-		return DP_LINK_BW_5_4;
-	}
-}
-
 /***** radeon specific DP functions *****/
 
 /* First get the min lane# when low rate is used according to pixel clock
@@ -462,8 +358,8 @@
 					int pix_clock)
 {
 	int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
-	int max_link_rate = dp_get_max_link_rate(dpcd);
-	int max_lane_num = dp_get_max_lane_number(dpcd);
+	int max_link_rate = drm_dp_max_link_rate(dpcd);
+	int max_lane_num = drm_dp_max_lane_count(dpcd);
 	int lane_num;
 	int max_dp_pix_clock;
 
@@ -500,7 +396,7 @@
 			return 540000;
 	}
 
-	return dp_get_max_link_rate(dpcd);
+	return drm_dp_max_link_rate(dpcd);
 }
 
 static u8 radeon_dp_encoder_service(struct radeon_device *rdev,
@@ -551,14 +447,15 @@
 bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector)
 {
 	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
-	u8 msg[25];
+	u8 msg[DP_DPCD_SIZE];
 	int ret, i;
 
-	ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg, 8, 0);
+	ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg,
+					DP_DPCD_SIZE, 0);
 	if (ret > 0) {
-		memcpy(dig_connector->dpcd, msg, 8);
+		memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
 		DRM_DEBUG_KMS("DPCD: ");
-		for (i = 0; i < 8; i++)
+		for (i = 0; i < DP_DPCD_SIZE; i++)
 			DRM_DEBUG_KMS("%02x ", msg[i]);
 		DRM_DEBUG_KMS("\n");
 
@@ -664,7 +561,7 @@
 
 	if (!radeon_dp_get_link_status(radeon_connector, link_status))
 		return false;
-	if (dp_channel_eq_ok(link_status, dig->dp_lane_count))
+	if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
 		return false;
 	return true;
 }
@@ -677,9 +574,8 @@
 	int enc_id;
 	int dp_clock;
 	int dp_lane_count;
-	int rd_interval;
 	bool tp3_supported;
-	u8 dpcd[8];
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
 	u8 train_set[4];
 	u8 link_status[DP_LINK_STATUS_SIZE];
 	u8 tries;
@@ -765,7 +661,7 @@
 	radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LANE_COUNT_SET, tmp);
 
 	/* set the link rate on the sink */
-	tmp = dp_get_dp_link_rate_coded(dp_info->dp_clock);
+	tmp = drm_dp_link_rate_to_bw_code(dp_info->dp_clock);
 	radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp);
 
 	/* start training on the source */
@@ -821,17 +717,14 @@
 	dp_info->tries = 0;
 	voltage = 0xff;
 	while (1) {
-		if (dp_info->rd_interval == 0)
-			udelay(100);
-		else
-			mdelay(dp_info->rd_interval * 4);
+		drm_dp_link_train_clock_recovery_delay(dp_info->dpcd);
 
 		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
 			DRM_ERROR("displayport link status failed\n");
 			break;
 		}
 
-		if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+		if (drm_dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
 			clock_recovery = true;
 			break;
 		}
@@ -886,17 +779,14 @@
 	dp_info->tries = 0;
 	channel_eq = false;
 	while (1) {
-		if (dp_info->rd_interval == 0)
-			udelay(400);
-		else
-			mdelay(dp_info->rd_interval * 4);
+		drm_dp_link_train_channel_eq_delay(dp_info->dpcd);
 
 		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
 			DRM_ERROR("displayport link status failed\n");
 			break;
 		}
 
-		if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+		if (drm_dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
 			channel_eq = true;
 			break;
 		}
@@ -974,14 +864,13 @@
 	else
 		dp_info.enc_id |= ATOM_DP_CONFIG_LINK_A;
 
-	dp_info.rd_interval = radeon_read_dpcd_reg(radeon_connector, DP_TRAINING_AUX_RD_INTERVAL);
 	tmp = radeon_read_dpcd_reg(radeon_connector, DP_MAX_LANE_COUNT);
 	if (ASIC_IS_DCE5(rdev) && (tmp & DP_TPS3_SUPPORTED))
 		dp_info.tp3_supported = true;
 	else
 		dp_info.tp3_supported = false;
 
-	memcpy(dp_info.dpcd, dig_connector->dpcd, 8);
+	memcpy(dp_info.dpcd, dig_connector->dpcd, DP_RECEIVER_CAP_SIZE);
 	dp_info.rdev = rdev;
 	dp_info.encoder = encoder;
 	dp_info.connector = connector;

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 010bae1..4552d4a 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c

@@ -340,7 +340,7 @@
 	    ((radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
 	     (radeon_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE))) {
 		struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-		radeon_dp_set_link_config(connector, mode);
+		radeon_dp_set_link_config(connector, adjusted_mode);
 	}
 
 	return true;

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 5d1d21a..f95d7fc 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c

@@ -1821,7 +1821,7 @@
 	case CHIP_SUMO:
 		rdev->config.evergreen.num_ses = 1;
 		rdev->config.evergreen.max_pipes = 4;
-		rdev->config.evergreen.max_tile_pipes = 2;
+		rdev->config.evergreen.max_tile_pipes = 4;
 		if (rdev->pdev->device == 0x9648)
 			rdev->config.evergreen.max_simds = 3;
 		else if ((rdev->pdev->device == 0x9647) ||
@@ -1844,7 +1844,7 @@
 		rdev->config.evergreen.sc_prim_fifo_size = 0x40;
 		rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
 		rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
-		gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+		gb_addr_config = SUMO_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_SUMO2:
 		rdev->config.evergreen.num_ses = 1;
@@ -1866,7 +1866,7 @@
 		rdev->config.evergreen.sc_prim_fifo_size = 0x40;
 		rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
 		rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
-		gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+		gb_addr_config = SUMO2_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_BARTS:
 		rdev->config.evergreen.num_ses = 2;
@@ -1914,7 +1914,7 @@
 		break;
 	case CHIP_CAICOS:
 		rdev->config.evergreen.num_ses = 1;
-		rdev->config.evergreen.max_pipes = 4;
+		rdev->config.evergreen.max_pipes = 2;
 		rdev->config.evergreen.max_tile_pipes = 2;
 		rdev->config.evergreen.max_simds = 2;
 		rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses;
@@ -2034,6 +2034,7 @@
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+	WREG32(DMA_TILING_CONFIG, gb_addr_config);
 
 	tmp = gb_addr_config & NUM_PIPES_MASK;
 	tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends,
@@ -2403,8 +2404,12 @@
 					 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
 		cayman_cp_int_cntl_setup(rdev, 1, 0);
 		cayman_cp_int_cntl_setup(rdev, 2, 0);
+		tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+		WREG32(CAYMAN_DMA1_CNTL, tmp);
 	} else
 		WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+	tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+	WREG32(DMA_CNTL, tmp);
 	WREG32(GRBM_INT_CNTL, 0);
 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
 	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -2457,6 +2462,7 @@
 	u32 grbm_int_cntl = 0;
 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
 	u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
+	u32 dma_cntl, dma_cntl1 = 0;
 
 	if (!rdev->irq.installed) {
 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -2484,6 +2490,8 @@
 	afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
 	afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
 
+	dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+
 	if (rdev->family >= CHIP_CAYMAN) {
 		/* enable CP interrupts on all rings */
 		if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
@@ -2506,6 +2514,19 @@
 		}
 	}
 
+	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+		DRM_DEBUG("r600_irq_set: sw int dma\n");
+		dma_cntl |= TRAP_ENABLE;
+	}
+
+	if (rdev->family >= CHIP_CAYMAN) {
+		dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+		if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
+			DRM_DEBUG("r600_irq_set: sw int dma1\n");
+			dma_cntl1 |= TRAP_ENABLE;
+		}
+	}
+
 	if (rdev->irq.crtc_vblank_int[0] ||
 	    atomic_read(&rdev->irq.pflip[0])) {
 		DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2591,6 +2612,12 @@
 		cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2);
 	} else
 		WREG32(CP_INT_CNTL, cp_int_cntl);
+
+	WREG32(DMA_CNTL, dma_cntl);
+
+	if (rdev->family >= CHIP_CAYMAN)
+		WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
+
 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
 
 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3093,6 +3120,16 @@
 				break;
 			}
 			break;
+		case 146:
+		case 147:
+			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
+			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+			/* reset addr and status */
+			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
+			break;
 		case 176: /* CP_INT in ring buffer */
 		case 177: /* CP_INT in IB1 */
 		case 178: /* CP_INT in IB2 */
@@ -3116,9 +3153,19 @@
 			} else
 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
+		case 224: /* DMA trap event */
+			DRM_DEBUG("IH: DMA trap\n");
+			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
 			break;
+		case 244: /* DMA trap event */
+			if (rdev->family >= CHIP_CAYMAN) {
+				DRM_DEBUG("IH: DMA1 trap\n");
+				radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+			}
+			break;
 		default:
 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
 			break;
@@ -3144,6 +3191,143 @@
 	return IRQ_HANDLED;
 }
 
+/**
+ * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (evergreen-SI).
+ */
+void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
+				   struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+	/* write the fence */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+	radeon_ring_write(ring, addr & 0xfffffffc);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+	radeon_ring_write(ring, fence->seq);
+	/* generate an interrupt */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+	/* flush HDP */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | HDP_MEM_COHERENCY_FLUSH_CNTL);
+	radeon_ring_write(ring, 1);
+}
+
+/**
+ * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (evergreen).
+ */
+void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
+				   struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	if (rdev->wb.enabled) {
+		u32 next_rptr = ring->wptr + 4;
+		while ((next_rptr & 7) != 5)
+			next_rptr++;
+		next_rptr += 3;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+		radeon_ring_write(ring, next_rptr);
+	}
+
+	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+	 * Pad as necessary with NOPs.
+	 */
+	while ((ring->wptr & 7) != 5)
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * evergreen_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (evergreen-cayman).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int evergreen_copy_dma(struct radeon_device *rdev,
+		       uint64_t src_offset, uint64_t dst_offset,
+		       unsigned num_gpu_pages,
+		       struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_dw, cur_size_in_dw;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+	num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
+	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_dw = size_in_dw;
+		if (cur_size_in_dw > 0xFFFFF)
+			cur_size_in_dw = 0xFFFFF;
+		size_in_dw -= cur_size_in_dw;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+		radeon_ring_write(ring, dst_offset & 0xfffffffc);
+		radeon_ring_write(ring, src_offset & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+		src_offset += cur_size_in_dw * 4;
+		dst_offset += cur_size_in_dw * 4;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
+
 static int evergreen_startup(struct radeon_device *rdev)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -3207,6 +3391,12 @@
 		return r;
 	}
 
+	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -3221,12 +3411,23 @@
 			     0, 0xfffff, RADEON_CP_PACKET2);
 	if (r)
 		return r;
+
+	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+			     DMA_RB_RPTR, DMA_RB_WPTR,
+			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	if (r)
+		return r;
+
 	r = evergreen_cp_load_microcode(rdev);
 	if (r)
 		return r;
 	r = evergreen_cp_resume(rdev);
 	if (r)
 		return r;
+	r = r600_dma_resume(rdev);
+	if (r)
+		return r;
 
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
@@ -3273,11 +3474,9 @@
 
 int evergreen_suspend(struct radeon_device *rdev)
 {
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-
 	r600_audio_fini(rdev);
 	r700_cp_stop(rdev);
-	ring->ready = false;
+	r600_dma_stop(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
@@ -3354,6 +3553,9 @@
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
 
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -3366,6 +3568,7 @@
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r700_cp_fini(rdev);
+		r600_dma_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
 		radeon_ib_pool_fini(rdev);
@@ -3393,6 +3596,7 @@
 	r600_audio_fini(rdev);
 	r600_blit_fini(rdev);
 	r700_cp_fini(rdev);
+	r600_dma_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_ib_pool_fini(rdev);

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index c042e49..74c6b42 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c

@@ -34,6 +34,8 @@
 #define MAX(a,b)                   (((a)>(b))?(a):(b))
 #define MIN(a,b)                   (((a)<(b))?(a):(b))
 
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+			   struct radeon_cs_reloc **cs_reloc);
 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
 					  struct radeon_cs_reloc **cs_reloc);
 
@@ -507,20 +509,28 @@
 		/* height is npipes htiles aligned == npipes * 8 pixel aligned */
 		nby = round_up(nby, track->npipes * 8);
 	} else {
+		/* always assume 8x8 htile */
+		/* align is htile align * 8, htile align vary according to
+		 * number of pipe and tile width and nby
+		 */
 		switch (track->npipes) {
 		case 8:
+			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 			nbx = round_up(nbx, 64 * 8);
 			nby = round_up(nby, 64 * 8);
 			break;
 		case 4:
+			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 			nbx = round_up(nbx, 64 * 8);
 			nby = round_up(nby, 32 * 8);
 			break;
 		case 2:
+			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 			nbx = round_up(nbx, 32 * 8);
 			nby = round_up(nby, 32 * 8);
 			break;
 		case 1:
+			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 			nbx = round_up(nbx, 32 * 8);
 			nby = round_up(nby, 16 * 8);
 			break;
@@ -531,9 +541,10 @@
 		}
 	}
 	/* compute number of htile */
-	nbx = nbx / 8;
-	nby = nby / 8;
-	size = nbx * nby * 4;
+	nbx = nbx >> 3;
+	nby = nby >> 3;
+	/* size must be aligned on npipes * 2K boundary */
+	size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
 	size += track->htile_offset;
 
 	if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1801,8 @@
 	case DB_HTILE_SURFACE:
 		/* 8x8 only */
 		track->htile_surface = radeon_get_ib_value(p, idx);
+		/* force 8x8 htile width and height */
+		ib[idx] |= 3;
 		track->db_dirty = true;
 		break;
 	case CB_IMMED0_BASE:
@@ -2232,6 +2245,107 @@
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		}
 		break;
+	case PACKET3_CP_DMA:
+	{
+		u32 command, size, info;
+		u64 offset, tmp;
+		if (pkt->count != 4) {
+			DRM_ERROR("bad CP DMA\n");
+			return -EINVAL;
+		}
+		command = radeon_get_ib_value(p, idx+4);
+		size = command & 0x1fffff;
+		info = radeon_get_ib_value(p, idx+1);
+		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
+		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
+		    ((((info & 0x00300000) >> 20) == 0) &&
+		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
+		    ((((info & 0x60000000) >> 29) == 0) &&
+		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
+			/* non mem to mem copies requires dw aligned count */
+			if (size % 4) {
+				DRM_ERROR("CP DMA command requires dw count alignment\n");
+				return -EINVAL;
+			}
+		}
+		if (command & PACKET3_CP_DMA_CMD_SAS) {
+			/* src address space is register */
+			/* GDS is ok */
+			if (((info & 0x60000000) >> 29) != 1) {
+				DRM_ERROR("CP DMA SAS not supported\n");
+				return -EINVAL;
+			}
+		} else {
+			if (command & PACKET3_CP_DMA_CMD_SAIC) {
+				DRM_ERROR("CP DMA SAIC only supported for registers\n");
+				return -EINVAL;
+			}
+			/* src address space is memory */
+			if (((info & 0x60000000) >> 29) == 0) {
+				r = evergreen_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad CP DMA SRC\n");
+					return -EINVAL;
+				}
+
+				tmp = radeon_get_ib_value(p, idx) +
+					((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+				offset = reloc->lobj.gpu_offset + tmp;
+
+				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+					dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
+						 tmp + size, radeon_bo_size(reloc->robj));
+					return -EINVAL;
+				}
+
+				ib[idx] = offset;
+				ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
+			} else if (((info & 0x60000000) >> 29) != 2) {
+				DRM_ERROR("bad CP DMA SRC_SEL\n");
+				return -EINVAL;
+			}
+		}
+		if (command & PACKET3_CP_DMA_CMD_DAS) {
+			/* dst address space is register */
+			/* GDS is ok */
+			if (((info & 0x00300000) >> 20) != 1) {
+				DRM_ERROR("CP DMA DAS not supported\n");
+				return -EINVAL;
+			}
+		} else {
+			/* dst address space is memory */
+			if (command & PACKET3_CP_DMA_CMD_DAIC) {
+				DRM_ERROR("CP DMA DAIC only supported for registers\n");
+				return -EINVAL;
+			}
+			if (((info & 0x00300000) >> 20) == 0) {
+				r = evergreen_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad CP DMA DST\n");
+					return -EINVAL;
+				}
+
+				tmp = radeon_get_ib_value(p, idx+2) +
+					((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
+
+				offset = reloc->lobj.gpu_offset + tmp;
+
+				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+					dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
+						 tmp + size, radeon_bo_size(reloc->robj));
+					return -EINVAL;
+				}
+
+				ib[idx+2] = offset;
+				ib[idx+3] = upper_32_bits(offset) & 0xff;
+			} else {
+				DRM_ERROR("bad CP DMA DST_SEL\n");
+				return -EINVAL;
+			}
+		}
+		break;
+	}
 	case PACKET3_SURFACE_SYNC:
 		if (pkt->count != 3) {
 			DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2715,6 +2829,455 @@
 	return 0;
 }
 
+/*
+ *  DMA
+ */
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
+#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
+
+/**
+ * evergreen_dma_cs_parse() - parse the DMA IB
+ * @p:		parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (Evergreen-Cayman)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+	struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
+	u32 header, cmd, count, tiled, new_cmd, misc;
+	volatile u32 *ib = p->ib.ptr;
+	u32 idx, idx_value;
+	u64 src_offset, dst_offset, dst2_offset;
+	int r;
+
+	do {
+		if (p->idx >= ib_chunk->length_dw) {
+			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+				  p->idx, ib_chunk->length_dw);
+			return -EINVAL;
+		}
+		idx = p->idx;
+		header = radeon_get_ib_value(p, idx);
+		cmd = GET_DMA_CMD(header);
+		count = GET_DMA_COUNT(header);
+		tiled = GET_DMA_T(header);
+		new_cmd = GET_DMA_NEW(header);
+		misc = GET_DMA_MISC(header);
+
+		switch (cmd) {
+		case DMA_PACKET_WRITE:
+			r = r600_dma_cs_next_reloc(p, &dst_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_WRITE\n");
+				return -EINVAL;
+			}
+			if (tiled) {
+				dst_offset = ib[idx+1];
+				dst_offset <<= 8;
+
+				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+				p->idx += count + 7;
+			} else {
+				dst_offset = ib[idx+1];
+				dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+				p->idx += count + 3;
+			}
+			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+				dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+					 dst_offset, radeon_bo_size(dst_reloc->robj));
+				return -EINVAL;
+			}
+			break;
+		case DMA_PACKET_COPY:
+			r = r600_dma_cs_next_reloc(p, &src_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_COPY\n");
+				return -EINVAL;
+			}
+			r = r600_dma_cs_next_reloc(p, &dst_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_COPY\n");
+				return -EINVAL;
+			}
+			if (tiled) {
+				idx_value = radeon_get_ib_value(p, idx + 2);
+				if (new_cmd) {
+					switch (misc) {
+					case 0:
+						/* L2T, frame to fields */
+						if (idx_value & (1 << 31)) {
+							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+							return -EINVAL;
+						}
+						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+						if (r) {
+							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+							return -EINVAL;
+						}
+						dst_offset = ib[idx+1];
+						dst_offset <<= 8;
+						dst2_offset = ib[idx+2];
+						dst2_offset <<= 8;
+						src_offset = ib[idx+8];
+						src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
+								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+							return -EINVAL;
+						}
+						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+						p->idx += 10;
+						break;
+					case 1:
+						/* L2T, T2L partial */
+						if (p->family < CHIP_CAYMAN) {
+							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+							return -EINVAL;
+						}
+						/* detile bit */
+						if (idx_value & (1 << 31)) {
+							/* tiled src, linear dst */
+							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+						} else {
+							/* linear src, tiled dst */
+							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+						}
+						p->idx += 12;
+						break;
+					case 3:
+						/* L2T, broadcast */
+						if (idx_value & (1 << 31)) {
+							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+							return -EINVAL;
+						}
+						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+						if (r) {
+							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+							return -EINVAL;
+						}
+						dst_offset = ib[idx+1];
+						dst_offset <<= 8;
+						dst2_offset = ib[idx+2];
+						dst2_offset <<= 8;
+						src_offset = ib[idx+8];
+						src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+							return -EINVAL;
+						}
+						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+						p->idx += 10;
+						break;
+					case 4:
+						/* L2T, T2L */
+						/* detile bit */
+						if (idx_value & (1 << 31)) {
+							/* tiled src, linear dst */
+							src_offset = ib[idx+1];
+							src_offset <<= 8;
+							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+							dst_offset = ib[idx+7];
+							dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+						} else {
+							/* linear src, tiled dst */
+							src_offset = ib[idx+7];
+							src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+							dst_offset = ib[idx+1];
+							dst_offset <<= 8;
+							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+						}
+						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
+								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
+								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+							return -EINVAL;
+						}
+						p->idx += 9;
+						break;
+					case 5:
+						/* T2T partial */
+						if (p->family < CHIP_CAYMAN) {
+							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+							return -EINVAL;
+						}
+						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+						p->idx += 13;
+						break;
+					case 7:
+						/* L2T, broadcast */
+						if (idx_value & (1 << 31)) {
+							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+							return -EINVAL;
+						}
+						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+						if (r) {
+							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+							return -EINVAL;
+						}
+						dst_offset = ib[idx+1];
+						dst_offset <<= 8;
+						dst2_offset = ib[idx+2];
+						dst2_offset <<= 8;
+						src_offset = ib[idx+8];
+						src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+							return -EINVAL;
+						}
+						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+						p->idx += 10;
+						break;
+					default:
+						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+						return -EINVAL;
+					}
+				} else {
+					switch (misc) {
+					case 0:
+						/* detile bit */
+						if (idx_value & (1 << 31)) {
+							/* tiled src, linear dst */
+							src_offset = ib[idx+1];
+							src_offset <<= 8;
+							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+							dst_offset = ib[idx+7];
+							dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+						} else {
+							/* linear src, tiled dst */
+							src_offset = ib[idx+7];
+							src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+							dst_offset = ib[idx+1];
+							dst_offset <<= 8;
+							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+						}
+						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+							return -EINVAL;
+						}
+						p->idx += 9;
+						break;
+					default:
+						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+						return -EINVAL;
+					}
+				}
+			} else {
+				if (new_cmd) {
+					switch (misc) {
+					case 0:
+						/* L2L, byte */
+						src_offset = ib[idx+2];
+						src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+						dst_offset = ib[idx+1];
+						dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+						if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
+								 src_offset + count, radeon_bo_size(src_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
+								 dst_offset + count, radeon_bo_size(dst_reloc->robj));
+							return -EINVAL;
+						}
+						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+						ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+						ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+						ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+						p->idx += 5;
+						break;
+					case 1:
+						/* L2L, partial */
+						if (p->family < CHIP_CAYMAN) {
+							DRM_ERROR("L2L Partial is cayman only !\n");
+							return -EINVAL;
+						}
+						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+						ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+						ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+
+						p->idx += 9;
+						break;
+					case 4:
+						/* L2L, dw, broadcast */
+						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+						if (r) {
+							DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
+							return -EINVAL;
+						}
+						dst_offset = ib[idx+1];
+						dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+						dst2_offset = ib[idx+2];
+						dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+						src_offset = ib[idx+3];
+						src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
+								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
+								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+							return -EINVAL;
+						}
+						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+							dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
+								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+							return -EINVAL;
+						}
+						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
+						ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+						ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+						ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
+						ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+						p->idx += 7;
+						break;
+					default:
+						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+						return -EINVAL;
+					}
+				} else {
+					/* L2L, dw */
+					src_offset = ib[idx+2];
+					src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+					dst_offset = ib[idx+1];
+					dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+					if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+						dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
+							 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+						return -EINVAL;
+					}
+					if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+						dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
+							 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+						return -EINVAL;
+					}
+					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+					ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+					p->idx += 5;
+				}
+			}
+			break;
+		case DMA_PACKET_CONSTANT_FILL:
+			r = r600_dma_cs_next_reloc(p, &dst_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
+				return -EINVAL;
+			}
+			dst_offset = ib[idx+1];
+			dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+					 dst_offset, radeon_bo_size(dst_reloc->robj));
+				return -EINVAL;
+			}
+			ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+			ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+			p->idx += 4;
+			break;
+		case DMA_PACKET_NOP:
+			p->idx += 1;
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+			return -EINVAL;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+	for (r = 0; r < p->ib->length_dw; r++) {
+		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
+		mdelay(1);
+	}
+#endif
+	return 0;
+}
+
 /* vm parser */
 static bool evergreen_vm_reg_valid(u32 reg)
 {
@@ -2843,6 +3406,7 @@
 	u32 idx = pkt->idx + 1;
 	u32 idx_value = ib[idx];
 	u32 start_reg, end_reg, reg, i;
+	u32 command, info;
 
 	switch (pkt->opcode) {
 	case PACKET3_NOP:
@@ -2917,6 +3481,64 @@
 				return -EINVAL;
 		}
 		break;
+	case PACKET3_CP_DMA:
+		command = ib[idx + 4];
+		info = ib[idx + 1];
+		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
+		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
+		    ((((info & 0x00300000) >> 20) == 0) &&
+		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
+		    ((((info & 0x60000000) >> 29) == 0) &&
+		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
+			/* non mem to mem copies requires dw aligned count */
+			if ((command & 0x1fffff) % 4) {
+				DRM_ERROR("CP DMA command requires dw count alignment\n");
+				return -EINVAL;
+			}
+		}
+		if (command & PACKET3_CP_DMA_CMD_SAS) {
+			/* src address space is register */
+			if (((info & 0x60000000) >> 29) == 0) {
+				start_reg = idx_value << 2;
+				if (command & PACKET3_CP_DMA_CMD_SAIC) {
+					reg = start_reg;
+					if (!evergreen_vm_reg_valid(reg)) {
+						DRM_ERROR("CP DMA Bad SRC register\n");
+						return -EINVAL;
+					}
+				} else {
+					for (i = 0; i < (command & 0x1fffff); i++) {
+						reg = start_reg + (4 * i);
+						if (!evergreen_vm_reg_valid(reg)) {
+							DRM_ERROR("CP DMA Bad SRC register\n");
+							return -EINVAL;
+						}
+					}
+				}
+			}
+		}
+		if (command & PACKET3_CP_DMA_CMD_DAS) {
+			/* dst address space is register */
+			if (((info & 0x00300000) >> 20) == 0) {
+				start_reg = ib[idx + 2];
+				if (command & PACKET3_CP_DMA_CMD_DAIC) {
+					reg = start_reg;
+					if (!evergreen_vm_reg_valid(reg)) {
+						DRM_ERROR("CP DMA Bad DST register\n");
+						return -EINVAL;
+					}
+				} else {
+					for (i = 0; i < (command & 0x1fffff); i++) {
+						reg = start_reg + (4 * i);
+						if (!evergreen_vm_reg_valid(reg)) {
+							DRM_ERROR("CP DMA Bad DST register\n");
+							return -EINVAL;
+						}
+					}
+				}
+			}
+		}
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -2958,3 +3580,114 @@
 
 	return ret;
 }
+
+/**
+ * evergreen_dma_ib_parse() - parse the DMA IB for VM
+ * @rdev: radeon_device pointer
+ * @ib:	radeon_ib pointer
+ *
+ * Parses the DMA IB from the VM CS ioctl
+ * checks for errors. (Cayman-SI)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	u32 idx = 0;
+	u32 header, cmd, count, tiled, new_cmd, misc;
+
+	do {
+		header = ib->ptr[idx];
+		cmd = GET_DMA_CMD(header);
+		count = GET_DMA_COUNT(header);
+		tiled = GET_DMA_T(header);
+		new_cmd = GET_DMA_NEW(header);
+		misc = GET_DMA_MISC(header);
+
+		switch (cmd) {
+		case DMA_PACKET_WRITE:
+			if (tiled)
+				idx += count + 7;
+			else
+				idx += count + 3;
+			break;
+		case DMA_PACKET_COPY:
+			if (tiled) {
+				if (new_cmd) {
+					switch (misc) {
+					case 0:
+						/* L2T, frame to fields */
+						idx += 10;
+						break;
+					case 1:
+						/* L2T, T2L partial */
+						idx += 12;
+						break;
+					case 3:
+						/* L2T, broadcast */
+						idx += 10;
+						break;
+					case 4:
+						/* L2T, T2L */
+						idx += 9;
+						break;
+					case 5:
+						/* T2T partial */
+						idx += 13;
+						break;
+					case 7:
+						/* L2T, broadcast */
+						idx += 10;
+						break;
+					default:
+						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+						return -EINVAL;
+					}
+				} else {
+					switch (misc) {
+					case 0:
+						idx += 9;
+						break;
+					default:
+						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+						return -EINVAL;
+					}
+				}
+			} else {
+				if (new_cmd) {
+					switch (misc) {
+					case 0:
+						/* L2L, byte */
+						idx += 5;
+						break;
+					case 1:
+						/* L2L, partial */
+						idx += 9;
+						break;
+					case 4:
+						/* L2L, dw, broadcast */
+						idx += 7;
+						break;
+					default:
+						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+						return -EINVAL;
+					}
+				} else {
+					/* L2L, dw */
+					idx += 5;
+				}
+			}
+			break;
+		case DMA_PACKET_CONSTANT_FILL:
+			idx += 4;
+			break;
+		case DMA_PACKET_NOP:
+			idx += 1;
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+			return -EINVAL;
+		}
+	} while (idx < ib->length_dw);
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 2bc0f6a..cb9baaa 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h

@@ -45,6 +45,8 @@
 #define TURKS_GB_ADDR_CONFIG_GOLDEN          0x02010002
 #define CEDAR_GB_ADDR_CONFIG_GOLDEN          0x02010001
 #define CAICOS_GB_ADDR_CONFIG_GOLDEN         0x02010001
+#define SUMO_GB_ADDR_CONFIG_GOLDEN           0x02010002
+#define SUMO2_GB_ADDR_CONFIG_GOLDEN          0x02010002
 
 /* Registers */
 
@@ -355,6 +357,54 @@
 #       define AFMT_MPEG_INFO_UPDATE         (1 << 10)
 #define AFMT_GENERIC0_7                      0x7138
 
+/* DCE4/5 ELD audio interface */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0        0x5f84 /* LPCM */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1        0x5f88 /* AC3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2        0x5f8c /* MPEG1 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3        0x5f90 /* MP3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4        0x5f94 /* MPEG2 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5        0x5f98 /* AAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6        0x5f9c /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7        0x5fa0 /* ATRAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8        0x5fa4 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9        0x5fa8 /* Dolby Digital */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10       0x5fac /* DTS-HD */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11       0x5fb0 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12       0x5fb4 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13       0x5fb8 /* WMA Pro */
+#       define MAX_CHANNELS(x)                            (((x) & 0x7) << 0)
+/* max channels minus one.  7 = 8 channels */
+#       define SUPPORTED_FREQUENCIES(x)                   (((x) & 0xff) << 8)
+#       define DESCRIPTOR_BYTE_2(x)                       (((x) & 0xff) << 16)
+#       define SUPPORTED_FREQUENCIES_STEREO(x)            (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
+#define AZ_HOT_PLUG_CONTROL                               0x5e78
+#       define AZ_FORCE_CODEC_WAKE                        (1 << 0)
+#       define PIN0_JACK_DETECTION_ENABLE                 (1 << 4)
+#       define PIN1_JACK_DETECTION_ENABLE                 (1 << 5)
+#       define PIN2_JACK_DETECTION_ENABLE                 (1 << 6)
+#       define PIN3_JACK_DETECTION_ENABLE                 (1 << 7)
+#       define PIN0_UNSOLICITED_RESPONSE_ENABLE           (1 << 8)
+#       define PIN1_UNSOLICITED_RESPONSE_ENABLE           (1 << 9)
+#       define PIN2_UNSOLICITED_RESPONSE_ENABLE           (1 << 10)
+#       define PIN3_UNSOLICITED_RESPONSE_ENABLE           (1 << 11)
+#       define CODEC_HOT_PLUG_ENABLE                      (1 << 12)
+#       define PIN0_AUDIO_ENABLED                         (1 << 24)
+#       define PIN1_AUDIO_ENABLED                         (1 << 25)
+#       define PIN2_AUDIO_ENABLED                         (1 << 26)
+#       define PIN3_AUDIO_ENABLED                         (1 << 27)
+#       define AUDIO_ENABLED                              (1 << 31)
+
+
 #define	GC_USER_SHADER_PIPE_CONFIG			0x8954
 #define		INACTIVE_QD_PIPES(x)				((x) << 8)
 #define		INACTIVE_QD_PIPES_MASK				0x0000FF00
@@ -651,6 +701,7 @@
 #define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
 #define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
 #define VM_CONTEXT1_CNTL				0x1414
+#define VM_CONTEXT1_CNTL2				0x1434
 #define	VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x153C
 #define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x157C
 #define	VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x155C
@@ -672,6 +723,8 @@
 #define		CACHE_UPDATE_MODE(x)				((x) << 6)
 #define	VM_L2_STATUS					0x140C
 #define		L2_BUSY						(1 << 0)
+#define	VM_CONTEXT1_PROTECTION_FAULT_ADDR		0x14FC
+#define	VM_CONTEXT1_PROTECTION_FAULT_STATUS		0x14DC
 
 #define	WAIT_UNTIL					0x8040
 
@@ -854,6 +907,37 @@
 #       define DC_HPDx_RX_INT_TIMER(x)                    ((x) << 16)
 #       define DC_HPDx_EN                                 (1 << 28)
 
+/* ASYNC DMA */
+#define DMA_RB_RPTR                                       0xd008
+#define DMA_RB_WPTR                                       0xd00c
+
+#define DMA_CNTL                                          0xd02c
+#       define TRAP_ENABLE                                (1 << 0)
+#       define SEM_INCOMPLETE_INT_ENABLE                  (1 << 1)
+#       define SEM_WAIT_INT_ENABLE                        (1 << 2)
+#       define DATA_SWAP_ENABLE                           (1 << 3)
+#       define FENCE_SWAP_ENABLE                          (1 << 4)
+#       define CTXEMPTY_INT_ENABLE                        (1 << 28)
+#define DMA_TILING_CONFIG  				  0xD0B8
+
+#define CAYMAN_DMA1_CNTL                                  0xd82c
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n)	((((cmd) & 0xF) << 28) |	\
+					 (((t) & 0x1) << 23) |		\
+					 (((s) & 0x1) << 22) |		\
+					 (((n) & 0xFFFFF) << 0))
+/* async DMA Packet types */
+#define	DMA_PACKET_WRITE				  0x2
+#define	DMA_PACKET_COPY					  0x3
+#define	DMA_PACKET_INDIRECT_BUFFER			  0x4
+#define	DMA_PACKET_SEMAPHORE				  0x5
+#define	DMA_PACKET_FENCE				  0x6
+#define	DMA_PACKET_TRAP					  0x7
+#define	DMA_PACKET_SRBM_WRITE				  0x9
+#define	DMA_PACKET_CONSTANT_FILL			  0xd
+#define	DMA_PACKET_NOP					  0xf
+
 /* PCIE link stuff */
 #define PCIE_LC_TRAINING_CNTL                             0xa1 /* PCIE_P */
 #define PCIE_LC_LINK_WIDTH_CNTL                           0xa2 /* PCIE_P */
@@ -951,6 +1035,53 @@
 #define	PACKET3_WAIT_REG_MEM				0x3C
 #define	PACKET3_MEM_WRITE				0x3D
 #define	PACKET3_INDIRECT_BUFFER				0x32
+#define	PACKET3_CP_DMA					0x41
+/* 1. header
+ * 2. SRC_ADDR_LO or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
+ *    SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+#              define PACKET3_CP_DMA_DST_SEL(x)    ((x) << 20)
+                /* 0 - SRC_ADDR
+		 * 1 - GDS
+		 */
+#              define PACKET3_CP_DMA_ENGINE(x)     ((x) << 27)
+                /* 0 - ME
+		 * 1 - PFP
+		 */
+#              define PACKET3_CP_DMA_SRC_SEL(x)    ((x) << 29)
+                /* 0 - SRC_ADDR
+		 * 1 - GDS
+		 * 2 - DATA
+		 */
+#              define PACKET3_CP_DMA_CP_SYNC       (1 << 31)
+/* COMMAND */
+#              define PACKET3_CP_DMA_DIS_WC        (1 << 21)
+#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+                /* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+                /* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_CP_DMA_CMD_SAS       (1 << 26)
+                /* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_CP_DMA_CMD_DAS       (1 << 27)
+                /* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
+#              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
 #define	PACKET3_SURFACE_SYNC				0x43
 #              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
 #              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index cda01f8..7bdbcb0 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c

@@ -611,6 +611,8 @@
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
 
 	tmp = gb_addr_config & NUM_PIPES_MASK;
 	tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -784,10 +786,20 @@
 	/* enable context1-7 */
 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
 	       (u32)(rdev->dummy_page.addr >> 12));
-	WREG32(VM_CONTEXT1_CNTL2, 0);
-	WREG32(VM_CONTEXT1_CNTL, 0);
+	WREG32(VM_CONTEXT1_CNTL2, 4);
 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
 
 	cayman_pcie_gart_tlb_flush(rdev);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -905,6 +917,7 @@
 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
 		WREG32(SCRATCH_UMSK, 0);
+		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	}
 }
 
@@ -1118,6 +1131,181 @@
 	return 0;
 }
 
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine.  The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things.  It also
+ * has support for tiling/detiling of buffers.
+ * Cayman and newer support two asynchronous DMA engines.
+ */
+/**
+ * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (cayman-SI).
+ */
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+				struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	if (rdev->wb.enabled) {
+		u32 next_rptr = ring->wptr + 4;
+		while ((next_rptr & 7) != 5)
+			next_rptr++;
+		next_rptr += 3;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+		radeon_ring_write(ring, next_rptr);
+	}
+
+	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+	 * Pad as necessary with NOPs.
+	 */
+	while ((ring->wptr & 7) != 5)
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * cayman_dma_stop - stop the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines (cayman-SI).
+ */
+void cayman_dma_stop(struct radeon_device *rdev)
+{
+	u32 rb_cntl;
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+	/* dma0 */
+	rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+	rb_cntl &= ~DMA_RB_ENABLE;
+	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
+
+	/* dma1 */
+	rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+	rb_cntl &= ~DMA_RB_ENABLE;
+	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
+
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+}
+
+/**
+ * cayman_dma_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffers and enable them. (cayman-SI).
+ * Returns 0 for success, error for failure.
+ */
+int cayman_dma_resume(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring;
+	u32 rb_cntl, dma_cntl;
+	u32 rb_bufsz;
+	u32 reg_offset, wb_offset;
+	int i, r;
+
+	/* Reset dma */
+	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+	RREG32(SRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(SRBM_SOFT_RESET, 0);
+
+	for (i = 0; i < 2; i++) {
+		if (i == 0) {
+			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+			reg_offset = DMA0_REGISTER_OFFSET;
+			wb_offset = R600_WB_DMA_RPTR_OFFSET;
+		} else {
+			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+			reg_offset = DMA1_REGISTER_OFFSET;
+			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+		}
+
+		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+
+		/* Set ring buffer size in dwords */
+		rb_bufsz = drm_order(ring->ring_size / 4);
+		rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
+
+		/* Initialize the ring buffer's read and write pointers */
+		WREG32(DMA_RB_RPTR + reg_offset, 0);
+		WREG32(DMA_RB_WPTR + reg_offset, 0);
+
+		/* set the wb address whether it's enabled or not */
+		WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
+		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
+		WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
+		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+
+		if (rdev->wb.enabled)
+			rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+		WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+
+		/* enable DMA IBs */
+		WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
+
+		dma_cntl = RREG32(DMA_CNTL + reg_offset);
+		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+		WREG32(DMA_CNTL + reg_offset, dma_cntl);
+
+		ring->wptr = 0;
+		WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
+
+		ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
+
+		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
+
+		ring->ready = true;
+
+		r = radeon_ring_test(rdev, ring->idx, ring);
+		if (r) {
+			ring->ready = false;
+			return r;
+		}
+	}
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+	return 0;
+}
+
+/**
+ * cayman_dma_fini - tear down the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines and free the rings (cayman-SI).
+ */
+void cayman_dma_fini(struct radeon_device *rdev)
+{
+	cayman_dma_stop(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+}
+
 static int cayman_gpu_soft_reset(struct radeon_device *rdev)
 {
 	struct evergreen_mc_save save;
@@ -1208,6 +1396,32 @@
 	return cayman_gpu_soft_reset(rdev);
 }
 
+/**
+ * cayman_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (cayman-SI).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 dma_status_reg;
+
+	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+		dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
+	else
+		dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
+	if (dma_status_reg & DMA_IDLE) {
+		radeon_ring_lockup_update(ring);
+		return false;
+	}
+	/* force ring activities */
+	radeon_ring_force_activity(rdev, ring);
+	return radeon_ring_test_lockup(rdev, ring);
+}
+
 static int cayman_startup(struct radeon_device *rdev)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -1289,6 +1503,18 @@
 		return r;
 	}
 
+	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -1303,6 +1529,23 @@
 			     0, 0xfffff, RADEON_CP_PACKET2);
 	if (r)
 		return r;
+
+	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
+			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
+			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	if (r)
+		return r;
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
+			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
+			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	if (r)
+		return r;
+
 	r = cayman_cp_load_microcode(rdev);
 	if (r)
 		return r;
@@ -1310,6 +1553,10 @@
 	if (r)
 		return r;
 
+	r = cayman_dma_resume(rdev);
+	if (r)
+		return r;
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1354,7 +1601,7 @@
 {
 	r600_audio_fini(rdev);
 	cayman_cp_enable(rdev, false);
-	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+	cayman_dma_stop(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
@@ -1421,6 +1668,14 @@
 	ring->ring_obj = NULL;
 	r600_ring_init(rdev, ring, 1024 * 1024);
 
+	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 64 * 1024);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 64 * 1024);
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1433,6 +1688,7 @@
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		cayman_cp_fini(rdev);
+		cayman_dma_fini(rdev);
 		r600_irq_fini(rdev);
 		if (rdev->flags & RADEON_IS_IGP)
 			si_rlc_fini(rdev);
@@ -1463,6 +1719,7 @@
 {
 	r600_blit_fini(rdev);
 	cayman_cp_fini(rdev);
+	cayman_dma_fini(rdev);
 	r600_irq_fini(rdev);
 	if (rdev->flags & RADEON_IS_IGP)
 		si_rlc_fini(rdev);
@@ -1538,30 +1795,57 @@
 {
 	struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+	uint64_t value;
+	unsigned ndw;
 
-	while (count) {
-		unsigned ndw = 1 + count * 2;
-		if (ndw > 0x3FFF)
-			ndw = 0x3FFF;
+	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+		while (count) {
+			ndw = 1 + count * 2;
+			if (ndw > 0x3FFF)
+				ndw = 0x3FFF;
 
-		radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
-		radeon_ring_write(ring, pe);
-		radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
-		for (; ndw > 1; ndw -= 2, --count, pe += 8) {
-			uint64_t value = 0;
-			if (flags & RADEON_VM_PAGE_SYSTEM) {
-				value = radeon_vm_map_gart(rdev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
+			radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
+			radeon_ring_write(ring, pe);
+			radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+			for (; ndw > 1; ndw -= 2, --count, pe += 8) {
+				if (flags & RADEON_VM_PAGE_SYSTEM) {
+					value = radeon_vm_map_gart(rdev, addr);
+					value &= 0xFFFFFFFFFFFFF000ULL;
+				} else if (flags & RADEON_VM_PAGE_VALID) {
+					value = addr;
+				} else {
+					value = 0;
+				}
 				addr += incr;
-
-			} else if (flags & RADEON_VM_PAGE_VALID) {
-				value = addr;
-				addr += incr;
+				value |= r600_flags;
+				radeon_ring_write(ring, value);
+				radeon_ring_write(ring, upper_32_bits(value));
 			}
+		}
+	} else {
+		while (count) {
+			ndw = count * 2;
+			if (ndw > 0xFFFFE)
+				ndw = 0xFFFFE;
 
-			value |= r600_flags;
-			radeon_ring_write(ring, value);
-			radeon_ring_write(ring, upper_32_bits(value));
+			/* for non-physically contiguous pages (system) */
+			radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
+			radeon_ring_write(ring, pe);
+			radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+			for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+				if (flags & RADEON_VM_PAGE_SYSTEM) {
+					value = radeon_vm_map_gart(rdev, addr);
+					value &= 0xFFFFFFFFFFFFF000ULL;
+				} else if (flags & RADEON_VM_PAGE_VALID) {
+					value = addr;
+				} else {
+					value = 0;
+				}
+				addr += incr;
+				value |= r600_flags;
+				radeon_ring_write(ring, value);
+				radeon_ring_write(ring, upper_32_bits(value));
+			}
 		}
 	}
 }
@@ -1596,3 +1880,26 @@
 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 	radeon_ring_write(ring, 0x0);
 }
+
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+	struct radeon_ring *ring = &rdev->ring[ridx];
+
+	if (vm == NULL)
+		return;
+
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+	/* flush hdp cache */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+	radeon_ring_write(ring, 1);
+
+	/* bits 0-7 are the VM contexts0-7 */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+	radeon_ring_write(ring, 1 << vm->id);
+}
+

diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index cbef681..b93186b 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h

@@ -50,6 +50,24 @@
 #define		VMID(x)						(((x) & 0x7) << 0)
 #define	SRBM_STATUS				        0x0E50
 
+#define	SRBM_SOFT_RESET				        0x0E60
+#define		SOFT_RESET_BIF				(1 << 1)
+#define		SOFT_RESET_CG				(1 << 2)
+#define		SOFT_RESET_DC				(1 << 5)
+#define		SOFT_RESET_DMA1				(1 << 6)
+#define		SOFT_RESET_GRBM				(1 << 8)
+#define		SOFT_RESET_HDP				(1 << 9)
+#define		SOFT_RESET_IH				(1 << 10)
+#define		SOFT_RESET_MC				(1 << 11)
+#define		SOFT_RESET_RLC				(1 << 13)
+#define		SOFT_RESET_ROM				(1 << 14)
+#define		SOFT_RESET_SEM				(1 << 15)
+#define		SOFT_RESET_VMC				(1 << 17)
+#define		SOFT_RESET_DMA				(1 << 20)
+#define		SOFT_RESET_TST				(1 << 21)
+#define		SOFT_RESET_REGBB		       	(1 << 22)
+#define		SOFT_RESET_ORB				(1 << 23)
+
 #define VM_CONTEXT0_REQUEST_RESPONSE			0x1470
 #define		REQUEST_TYPE(x)					(((x) & 0xf) << 0)
 #define		RESPONSE_TYPE_MASK				0x000000F0
@@ -80,7 +98,18 @@
 #define VM_CONTEXT0_CNTL				0x1410
 #define		ENABLE_CONTEXT					(1 << 0)
 #define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
+#define		RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 3)
 #define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
+#define		DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT	(1 << 6)
+#define		DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT	(1 << 7)
+#define		PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 9)
+#define		PDE0_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 10)
+#define		VALID_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 12)
+#define		VALID_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 13)
+#define		READ_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 15)
+#define		READ_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 16)
+#define		WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 18)
+#define		WRITE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 19)
 #define VM_CONTEXT1_CNTL				0x1414
 #define VM_CONTEXT0_CNTL2				0x1430
 #define VM_CONTEXT1_CNTL2				0x1434
@@ -588,5 +617,62 @@
 #define	PACKET3_SET_APPEND_CNT			        0x75
 #define	PACKET3_ME_WRITE				0x7A
 
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET                              0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET                              0x800 /* not a register */
+
+#define DMA_RB_CNTL                                       0xd000
+#       define DMA_RB_ENABLE                              (1 << 0)
+#       define DMA_RB_SIZE(x)                             ((x) << 1) /* log2 */
+#       define DMA_RB_SWAP_ENABLE                         (1 << 9) /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_ENABLE                  (1 << 12)
+#       define DMA_RPTR_WRITEBACK_SWAP_ENABLE             (1 << 13)  /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_TIMER(x)                ((x) << 16) /* log2 */
+#define DMA_RB_BASE                                       0xd004
+#define DMA_RB_RPTR                                       0xd008
+#define DMA_RB_WPTR                                       0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI                               0xd01c
+#define DMA_RB_RPTR_ADDR_LO                               0xd020
+
+#define DMA_IB_CNTL                                       0xd024
+#       define DMA_IB_ENABLE                              (1 << 0)
+#       define DMA_IB_SWAP_ENABLE                         (1 << 4)
+#       define CMD_VMID_FORCE                             (1 << 31)
+#define DMA_IB_RPTR                                       0xd028
+#define DMA_CNTL                                          0xd02c
+#       define TRAP_ENABLE                                (1 << 0)
+#       define SEM_INCOMPLETE_INT_ENABLE                  (1 << 1)
+#       define SEM_WAIT_INT_ENABLE                        (1 << 2)
+#       define DATA_SWAP_ENABLE                           (1 << 3)
+#       define FENCE_SWAP_ENABLE                          (1 << 4)
+#       define CTXEMPTY_INT_ENABLE                        (1 << 28)
+#define DMA_STATUS_REG                                    0xd034
+#       define DMA_IDLE                                   (1 << 0)
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL                     0xd044
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL                      0xd048
+#define DMA_TILING_CONFIG  				  0xd0b8
+#define DMA_MODE                                          0xd0bc
+
+#define DMA_PACKET(cmd, t, s, n)	((((cmd) & 0xF) << 28) |	\
+					 (((t) & 0x1) << 23) |		\
+					 (((s) & 0x1) << 22) |		\
+					 (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n)	((((cmd) & 0xF) << 28) |	\
+					 (((vmid) & 0xF) << 20) |	\
+					 (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define	DMA_PACKET_WRITE				  0x2
+#define	DMA_PACKET_COPY					  0x3
+#define	DMA_PACKET_INDIRECT_BUFFER			  0x4
+#define	DMA_PACKET_SEMAPHORE				  0x5
+#define	DMA_PACKET_FENCE				  0x6
+#define	DMA_PACKET_TRAP					  0x7
+#define	DMA_PACKET_SRBM_WRITE				  0x9
+#define	DMA_PACKET_CONSTANT_FILL			  0xd
+#define	DMA_PACKET_NOP					  0xf
+
 #endif
 

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 376884f..8ff7cac 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c

@@ -4135,23 +4135,36 @@
 	return 0;
 }
 
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
+		      bool always_indirect)
 {
-	if (reg < rdev->rmmio_size)
+	if (reg < rdev->rmmio_size && !always_indirect)
 		return readl(((void __iomem *)rdev->rmmio) + reg);
 	else {
+		unsigned long flags;
+		uint32_t ret;
+
+		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
 		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
-		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+		ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+
+		return ret;
 	}
 }
 
-void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
+		  bool always_indirect)
 {
-	if (reg < rdev->rmmio_size)
+	if (reg < rdev->rmmio_size && !always_indirect)
 		writel(v, ((void __iomem *)rdev->rmmio) + reg);
 	else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
 		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
 		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
 	}
 }
 

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index cda280d..2aaf147 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c

@@ -1370,6 +1370,29 @@
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
+/**
+ * r600_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (r6xx-evergreen).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 dma_status_reg;
+
+	dma_status_reg = RREG32(DMA_STATUS_REG);
+	if (dma_status_reg & DMA_IDLE) {
+		radeon_ring_lockup_update(ring);
+		return false;
+	}
+	/* force ring activities */
+	radeon_ring_force_activity(rdev, ring);
+	return radeon_ring_test_lockup(rdev, ring);
+}
+
 int r600_asic_reset(struct radeon_device *rdev)
 {
 	return r600_gpu_soft_reset(rdev);
@@ -1424,13 +1447,7 @@
 
 int r600_count_pipe_bits(uint32_t val)
 {
-	int i, ret = 0;
-
-	for (i = 0; i < 32; i++) {
-		ret += val & 1;
-		val >>= 1;
-	}
-	return ret;
+	return hweight32(val);
 }
 
 static void r600_gpu_init(struct radeon_device *rdev)
@@ -1594,6 +1611,7 @@
 	WREG32(GB_TILING_CONFIG, tiling_config);
 	WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
 	WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
+	WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff);
 
 	tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
 	WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
@@ -1871,6 +1889,7 @@
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 	WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
 	WREG32(SCRATCH_UMSK, 0);
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 }
 
 int r600_init_microcode(struct radeon_device *rdev)
@@ -2196,6 +2215,128 @@
 	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine.  The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things.  It also
+ * has support for tiling/detiling of buffers.
+ */
+/**
+ * r600_dma_stop - stop the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine (r6xx-evergreen).
+ */
+void r600_dma_stop(struct radeon_device *rdev)
+{
+	u32 rb_cntl = RREG32(DMA_RB_CNTL);
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+	rb_cntl &= ~DMA_RB_ENABLE;
+	WREG32(DMA_RB_CNTL, rb_cntl);
+
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+}
+
+/**
+ * r600_dma_resume - setup and start the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_resume(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	u32 rb_cntl, dma_cntl;
+	u32 rb_bufsz;
+	int r;
+
+	/* Reset dma */
+	if (rdev->family >= CHIP_RV770)
+		WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+	else
+		WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+	RREG32(SRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(SRBM_SOFT_RESET, 0);
+
+	WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
+	WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+
+	/* Set ring buffer size in dwords */
+	rb_bufsz = drm_order(ring->ring_size / 4);
+	rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+	rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+	WREG32(DMA_RB_CNTL, rb_cntl);
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(DMA_RB_RPTR, 0);
+	WREG32(DMA_RB_WPTR, 0);
+
+	/* set the wb address whether it's enabled or not */
+	WREG32(DMA_RB_RPTR_ADDR_HI,
+	       upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
+	WREG32(DMA_RB_RPTR_ADDR_LO,
+	       ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
+
+	if (rdev->wb.enabled)
+		rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+	WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
+
+	/* enable DMA IBs */
+	WREG32(DMA_IB_CNTL, DMA_IB_ENABLE);
+
+	dma_cntl = RREG32(DMA_CNTL);
+	dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+	WREG32(DMA_CNTL, dma_cntl);
+
+	if (rdev->family >= CHIP_RV770)
+		WREG32(DMA_MODE, 1);
+
+	ring->wptr = 0;
+	WREG32(DMA_RB_WPTR, ring->wptr << 2);
+
+	ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
+
+	WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
+
+	ring->ready = true;
+
+	r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
+	if (r) {
+		ring->ready = false;
+		return r;
+	}
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+	return 0;
+}
+
+/**
+ * r600_dma_fini - tear down the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine and free the ring (r6xx-evergreen).
+ */
+void r600_dma_fini(struct radeon_device *rdev)
+{
+	r600_dma_stop(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+}
 
 /*
  * GPU scratch registers helpers function.
@@ -2252,6 +2393,64 @@
 	return r;
 }
 
+/**
+ * r600_dma_ring_test - simple async dma engine test
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (r6xx-SI).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_ring_test(struct radeon_device *rdev,
+		       struct radeon_ring *ring)
+{
+	unsigned i;
+	int r;
+	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+	u32 tmp;
+
+	if (!ptr) {
+		DRM_ERROR("invalid vram scratch pointer\n");
+		return -EINVAL;
+	}
+
+	tmp = 0xCAFEDEAD;
+	writel(tmp, ptr);
+
+	r = radeon_ring_lock(rdev, ring, 4);
+	if (r) {
+		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+		return r;
+	}
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = readl(ptr);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+	} else {
+		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+			  ring->idx, tmp);
+		r = -EINVAL;
+	}
+	return r;
+}
+
+/*
+ * CP fences/semaphores
+ */
+
 void r600_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence)
 {
@@ -2315,6 +2514,59 @@
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
 }
 
+/*
+ * DMA fences/semaphores
+ */
+
+/**
+ * r600_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (r6xx-r7xx).
+ */
+void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+			      struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	/* write the fence */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+	radeon_ring_write(ring, addr & 0xfffffffc);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+	radeon_ring_write(ring, lower_32_bits(fence->seq));
+	/* generate an interrupt */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+}
+
+/**
+ * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @semaphore: radeon semaphore object
+ * @emit_wait: wait or signal semaphore
+ *
+ * Add a DMA semaphore packet to the ring wait on or signal
+ * other rings (r6xx-SI).
+ */
+void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+				  struct radeon_ring *ring,
+				  struct radeon_semaphore *semaphore,
+				  bool emit_wait)
+{
+	u64 addr = semaphore->gpu_addr;
+	u32 s = emit_wait ? 0 : 1;
+
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
+	radeon_ring_write(ring, addr & 0xfffffffc);
+	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+}
+
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
@@ -2334,6 +2586,80 @@
 	return 0;
 }
 
+/**
+ * r600_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (r6xx-r7xx).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int r600_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset, uint64_t dst_offset,
+		  unsigned num_gpu_pages,
+		  struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_dw, cur_size_in_dw;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+	num_loops = DIV_ROUND_UP(size_in_dw, 0xffff);
+	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_dw = size_in_dw;
+		if (cur_size_in_dw > 0xFFFF)
+			cur_size_in_dw = 0xFFFF;
+		size_in_dw -= cur_size_in_dw;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+		radeon_ring_write(ring, dst_offset & 0xfffffffc);
+		radeon_ring_write(ring, src_offset & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+		src_offset += cur_size_in_dw * 4;
+		dst_offset += cur_size_in_dw * 4;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
+
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size)
@@ -2349,7 +2675,7 @@
 
 static int r600_startup(struct radeon_device *rdev)
 {
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_ring *ring;
 	int r;
 
 	/* enable pcie gen2 link */
@@ -2394,6 +2720,12 @@
 		return r;
 	}
 
+	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -2403,12 +2735,20 @@
 	}
 	r600_irq_set(rdev);
 
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
 			     0, 0xfffff, RADEON_CP_PACKET2);
-
 	if (r)
 		return r;
+
+	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+			     DMA_RB_RPTR, DMA_RB_WPTR,
+			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	if (r)
+		return r;
+
 	r = r600_cp_load_microcode(rdev);
 	if (r)
 		return r;
@@ -2416,6 +2756,10 @@
 	if (r)
 		return r;
 
+	r = r600_dma_resume(rdev);
+	if (r)
+		return r;
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2471,7 +2815,7 @@
 {
 	r600_audio_fini(rdev);
 	r600_cp_stop(rdev);
-	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+	r600_dma_stop(rdev);
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	r600_pcie_gart_disable(rdev);
@@ -2544,6 +2888,9 @@
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
 
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -2556,6 +2903,7 @@
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r600_cp_fini(rdev);
+		r600_dma_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
 		radeon_ib_pool_fini(rdev);
@@ -2572,6 +2920,7 @@
 	r600_audio_fini(rdev);
 	r600_blit_fini(rdev);
 	r600_cp_fini(rdev);
+	r600_dma_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_ib_pool_fini(rdev);
@@ -2674,6 +3023,104 @@
 	return r;
 }
 
+/**
+ * r600_dma_ib_test - test an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test a simple IB in the DMA ring (r6xx-SI).
+ * Returns 0 on success, error on failure.
+ */
+int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	struct radeon_ib ib;
+	unsigned i;
+	int r;
+	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+	u32 tmp = 0;
+
+	if (!ptr) {
+		DRM_ERROR("invalid vram scratch pointer\n");
+		return -EINVAL;
+	}
+
+	tmp = 0xCAFEDEAD;
+	writel(tmp, ptr);
+
+	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+	if (r) {
+		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+		return r;
+	}
+
+	ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
+	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
+	ib.ptr[3] = 0xDEADBEEF;
+	ib.length_dw = 4;
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		return r;
+	}
+	r = radeon_fence_wait(ib.fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = readl(ptr);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+		r = -EINVAL;
+	}
+	radeon_ib_free(rdev, &ib);
+	return r;
+}
+
+/**
+ * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (r6xx-r7xx).
+ */
+void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	if (rdev->wb.enabled) {
+		u32 next_rptr = ring->wptr + 4;
+		while ((next_rptr & 7) != 5)
+			next_rptr++;
+		next_rptr += 3;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+		radeon_ring_write(ring, next_rptr);
+	}
+
+	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+	 * Pad as necessary with NOPs.
+	 */
+	while ((ring->wptr & 7) != 5)
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+	radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
 /*
  * Interrupts
  *
@@ -2865,6 +3312,8 @@
 	u32 tmp;
 
 	WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+	tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+	WREG32(DMA_CNTL, tmp);
 	WREG32(GRBM_INT_CNTL, 0);
 	WREG32(DxMODE_INT_MASK, 0);
 	WREG32(D1GRPH_INTERRUPT_CONTROL, 0);
@@ -3006,6 +3455,7 @@
 	u32 grbm_int_cntl = 0;
 	u32 hdmi0, hdmi1;
 	u32 d1grph = 0, d2grph = 0;
+	u32 dma_cntl;
 
 	if (!rdev->irq.installed) {
 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3040,12 +3490,19 @@
 		hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
 		hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
 	}
+	dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
 
 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 		DRM_DEBUG("r600_irq_set: sw int\n");
 		cp_int_cntl |= RB_INT_ENABLE;
 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
 	}
+
+	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+		DRM_DEBUG("r600_irq_set: sw int dma\n");
+		dma_cntl |= TRAP_ENABLE;
+	}
+
 	if (rdev->irq.crtc_vblank_int[0] ||
 	    atomic_read(&rdev->irq.pflip[0])) {
 		DRM_DEBUG("r600_irq_set: vblank 0\n");
@@ -3090,6 +3547,7 @@
 	}
 
 	WREG32(CP_INT_CNTL, cp_int_cntl);
+	WREG32(DMA_CNTL, dma_cntl);
 	WREG32(DxMODE_INT_MASK, mode_int);
 	WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph);
 	WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph);
@@ -3469,6 +3927,10 @@
 			DRM_DEBUG("IH: CP EOP\n");
 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
+		case 224: /* DMA trap event */
+			DRM_DEBUG("IH: DMA trap\n");
+			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
 			break;

diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 2514123..be85f75 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c

@@ -721,12 +721,7 @@
 
 static int r600_count_pipe_bits(uint32_t val)
 {
-	int i, ret = 0;
-	for (i = 0; i < 32; i++) {
-		ret += val & 1;
-		val >>= 1;
-	}
-	return ret;
+	return hweight32(val);
 }
 
 static void r600_gfx_init(struct drm_device *dev,

diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 211c402..0be768b 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c

@@ -657,87 +657,30 @@
 			/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
 			nby = round_up(nby, track->npipes * 8);
 		} else {
-			/* htile widht & nby (8 or 4) make 2 bits number */
-			tmp = track->htile_surface & 3;
+			/* always assume 8x8 htile */
 			/* align is htile align * 8, htile align vary according to
 			 * number of pipe and tile width and nby
 			 */
 			switch (track->npipes) {
 			case 8:
-				switch (tmp) {
-				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
-					nbx = round_up(nbx, 64 * 8);
-					nby = round_up(nby, 64 * 8);
-					break;
-				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
-				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 64 * 8);
-					nby = round_up(nby, 32 * 8);
-					break;
-				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 32 * 8);
-					nby = round_up(nby, 32 * 8);
-					break;
-				default:
-					return -EINVAL;
-				}
+				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				nbx = round_up(nbx, 64 * 8);
+				nby = round_up(nby, 64 * 8);
 				break;
 			case 4:
-				switch (tmp) {
-				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
-					nbx = round_up(nbx, 64 * 8);
-					nby = round_up(nby, 32 * 8);
-					break;
-				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
-				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 32 * 8);
-					nby = round_up(nby, 32 * 8);
-					break;
-				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 32 * 8);
-					nby = round_up(nby, 16 * 8);
-					break;
-				default:
-					return -EINVAL;
-				}
+				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				nbx = round_up(nbx, 64 * 8);
+				nby = round_up(nby, 32 * 8);
 				break;
 			case 2:
-				switch (tmp) {
-				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
-					nbx = round_up(nbx, 32 * 8);
-					nby = round_up(nby, 32 * 8);
-					break;
-				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
-				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 32 * 8);
-					nby = round_up(nby, 16 * 8);
-					break;
-				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 16 * 8);
-					nby = round_up(nby, 16 * 8);
-					break;
-				default:
-					return -EINVAL;
-				}
+				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				nbx = round_up(nbx, 32 * 8);
+				nby = round_up(nby, 32 * 8);
 				break;
 			case 1:
-				switch (tmp) {
-				case 3:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
-					nbx = round_up(nbx, 32 * 8);
-					nby = round_up(nby, 16 * 8);
-					break;
-				case 2:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
-				case 1:	/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 16 * 8);
-					nby = round_up(nby, 16 * 8);
-					break;
-				case 0:	/* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
-					nbx = round_up(nbx, 16 * 8);
-					nby = round_up(nby, 8 * 8);
-					break;
-				default:
-					return -EINVAL;
-				}
+				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				nbx = round_up(nbx, 32 * 8);
+				nby = round_up(nby, 16 * 8);
 				break;
 			default:
 				dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
@@ -746,9 +689,10 @@
 			}
 		}
 		/* compute number of htile */
-		nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4;
-		nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4;
-		size = nbx * nby * 4;
+		nbx = nbx >> 3;
+		nby = nby >> 3;
+		/* size must be aligned on npipes * 2K boundary */
+		size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
 		size += track->htile_offset;
 
 		if (size > radeon_bo_size(track->htile_bo)) {
@@ -1492,6 +1436,8 @@
 		break;
 	case DB_HTILE_SURFACE:
 		track->htile_surface = radeon_get_ib_value(p, idx);
+		/* force 8x8 htile width and height */
+		ib[idx] |= 3;
 		track->db_dirty = true;
 		break;
 	case SQ_PGM_START_FS:
@@ -1949,6 +1895,78 @@
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		}
 		break;
+	case PACKET3_CP_DMA:
+	{
+		u32 command, size;
+		u64 offset, tmp;
+		if (pkt->count != 4) {
+			DRM_ERROR("bad CP DMA\n");
+			return -EINVAL;
+		}
+		command = radeon_get_ib_value(p, idx+4);
+		size = command & 0x1fffff;
+		if (command & PACKET3_CP_DMA_CMD_SAS) {
+			/* src address space is register */
+			DRM_ERROR("CP DMA SAS not supported\n");
+			return -EINVAL;
+		} else {
+			if (command & PACKET3_CP_DMA_CMD_SAIC) {
+				DRM_ERROR("CP DMA SAIC only supported for registers\n");
+				return -EINVAL;
+			}
+			/* src address space is memory */
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad CP DMA SRC\n");
+				return -EINVAL;
+			}
+
+			tmp = radeon_get_ib_value(p, idx) +
+				((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+			offset = reloc->lobj.gpu_offset + tmp;
+
+			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+				dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
+					 tmp + size, radeon_bo_size(reloc->robj));
+				return -EINVAL;
+			}
+
+			ib[idx] = offset;
+			ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
+		}
+		if (command & PACKET3_CP_DMA_CMD_DAS) {
+			/* dst address space is register */
+			DRM_ERROR("CP DMA DAS not supported\n");
+			return -EINVAL;
+		} else {
+			/* dst address space is memory */
+			if (command & PACKET3_CP_DMA_CMD_DAIC) {
+				DRM_ERROR("CP DMA DAIC only supported for registers\n");
+				return -EINVAL;
+			}
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad CP DMA DST\n");
+				return -EINVAL;
+			}
+
+			tmp = radeon_get_ib_value(p, idx+2) +
+				((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
+
+			offset = reloc->lobj.gpu_offset + tmp;
+
+			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+				dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
+					 tmp + size, radeon_bo_size(reloc->robj));
+				return -EINVAL;
+			}
+
+			ib[idx+2] = offset;
+			ib[idx+3] = upper_32_bits(offset) & 0xff;
+		}
+		break;
+	}
 	case PACKET3_SURFACE_SYNC:
 		if (pkt->count != 3) {
 			DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2496,3 +2514,196 @@
 {
 	r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
 }
+
+/*
+ *  DMA
+ */
+/**
+ * r600_dma_cs_next_reloc() - parse next reloc
+ * @p:		parser structure holding parsing context.
+ * @cs_reloc:		reloc informations
+ *
+ * Return the next reloc, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+			   struct radeon_cs_reloc **cs_reloc)
+{
+	struct radeon_cs_chunk *relocs_chunk;
+	unsigned idx;
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+	*cs_reloc = NULL;
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	idx = p->dma_reloc_idx;
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+	*cs_reloc = p->relocs_ptr[idx];
+	p->dma_reloc_idx++;
+	return 0;
+}
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+
+/**
+ * r600_dma_cs_parse() - parse the DMA IB
+ * @p:		parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (R6xx-R7xx)
+ * Returns 0 for success and an error on failure.
+ **/
+int r600_dma_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+	struct radeon_cs_reloc *src_reloc, *dst_reloc;
+	u32 header, cmd, count, tiled;
+	volatile u32 *ib = p->ib.ptr;
+	u32 idx, idx_value;
+	u64 src_offset, dst_offset;
+	int r;
+
+	do {
+		if (p->idx >= ib_chunk->length_dw) {
+			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+				  p->idx, ib_chunk->length_dw);
+			return -EINVAL;
+		}
+		idx = p->idx;
+		header = radeon_get_ib_value(p, idx);
+		cmd = GET_DMA_CMD(header);
+		count = GET_DMA_COUNT(header);
+		tiled = GET_DMA_T(header);
+
+		switch (cmd) {
+		case DMA_PACKET_WRITE:
+			r = r600_dma_cs_next_reloc(p, &dst_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_WRITE\n");
+				return -EINVAL;
+			}
+			if (tiled) {
+				dst_offset = ib[idx+1];
+				dst_offset <<= 8;
+
+				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+				p->idx += count + 5;
+			} else {
+				dst_offset = ib[idx+1];
+				dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+				p->idx += count + 3;
+			}
+			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+				dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+				return -EINVAL;
+			}
+			break;
+		case DMA_PACKET_COPY:
+			r = r600_dma_cs_next_reloc(p, &src_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_COPY\n");
+				return -EINVAL;
+			}
+			r = r600_dma_cs_next_reloc(p, &dst_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_COPY\n");
+				return -EINVAL;
+			}
+			if (tiled) {
+				idx_value = radeon_get_ib_value(p, idx + 2);
+				/* detile bit */
+				if (idx_value & (1 << 31)) {
+					/* tiled src, linear dst */
+					src_offset = ib[idx+1];
+					src_offset <<= 8;
+					ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+					dst_offset = ib[idx+5];
+					dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+					ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+					ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+				} else {
+					/* linear src, tiled dst */
+					src_offset = ib[idx+5];
+					src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+					ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+					ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+					dst_offset = ib[idx+1];
+					dst_offset <<= 8;
+					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+				}
+				p->idx += 7;
+			} else {
+				src_offset = ib[idx+2];
+				src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+				dst_offset = ib[idx+1];
+				dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+
+				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+				ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+				ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+				p->idx += 5;
+			}
+			if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+				dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
+					 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+				return -EINVAL;
+			}
+			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+				dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
+					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+				return -EINVAL;
+			}
+			break;
+		case DMA_PACKET_CONSTANT_FILL:
+			if (p->family < CHIP_RV770) {
+				DRM_ERROR("Constant Fill is 7xx only !\n");
+				return -EINVAL;
+			}
+			r = r600_dma_cs_next_reloc(p, &dst_reloc);
+			if (r) {
+				DRM_ERROR("bad DMA_PACKET_WRITE\n");
+				return -EINVAL;
+			}
+			dst_offset = ib[idx+1];
+			dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+				return -EINVAL;
+			}
+			ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+			ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+			p->idx += 4;
+			break;
+		case DMA_PACKET_NOP:
+			p->idx += 1;
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+			return -EINVAL;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+	for (r = 0; r < p->ib->length_dw; r++) {
+		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
+		mdelay(1);
+	}
+#endif
+	return 0;
+}

diff --git a/drivers/gpu/drm/radeon/r600_reg.h b/drivers/gpu/drm/radeon/r600_reg.h
index 2b960cb..909219b 100644
--- a/drivers/gpu/drm/radeon/r600_reg.h
+++ b/drivers/gpu/drm/radeon/r600_reg.h

@@ -96,6 +96,15 @@
 #define R600_CONFIG_F0_BASE                                     0x542C
 #define R600_CONFIG_APER_SIZE                                   0x5430
 
+#define	R600_BIF_FB_EN						0x5490
+#define		R600_FB_READ_EN					(1 << 0)
+#define		R600_FB_WRITE_EN				(1 << 1)
+
+#define R600_CITF_CNTL           				0x200c
+#define		R600_BLACKOUT_MASK				0x00000003
+
+#define R700_MC_CITF_CNTL           				0x25c0
+
 #define R600_ROM_CNTL                              0x1600
 #       define R600_SCK_OVERWRITE                  (1 << 1)
 #       define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28

diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index fa6f370..4a53402 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h

@@ -590,9 +590,59 @@
 #define         WAIT_2D_IDLECLEAN_bit                           (1 << 16)
 #define         WAIT_3D_IDLECLEAN_bit                           (1 << 17)
 
+/* async DMA */
+#define DMA_TILING_CONFIG                                 0x3ec4
+#define DMA_CONFIG                                        0x3e4c
+
+#define DMA_RB_CNTL                                       0xd000
+#       define DMA_RB_ENABLE                              (1 << 0)
+#       define DMA_RB_SIZE(x)                             ((x) << 1) /* log2 */
+#       define DMA_RB_SWAP_ENABLE                         (1 << 9) /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_ENABLE                  (1 << 12)
+#       define DMA_RPTR_WRITEBACK_SWAP_ENABLE             (1 << 13)  /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_TIMER(x)                ((x) << 16) /* log2 */
+#define DMA_RB_BASE                                       0xd004
+#define DMA_RB_RPTR                                       0xd008
+#define DMA_RB_WPTR                                       0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI                               0xd01c
+#define DMA_RB_RPTR_ADDR_LO                               0xd020
+
+#define DMA_IB_CNTL                                       0xd024
+#       define DMA_IB_ENABLE                              (1 << 0)
+#       define DMA_IB_SWAP_ENABLE                         (1 << 4)
+#define DMA_IB_RPTR                                       0xd028
+#define DMA_CNTL                                          0xd02c
+#       define TRAP_ENABLE                                (1 << 0)
+#       define SEM_INCOMPLETE_INT_ENABLE                  (1 << 1)
+#       define SEM_WAIT_INT_ENABLE                        (1 << 2)
+#       define DATA_SWAP_ENABLE                           (1 << 3)
+#       define FENCE_SWAP_ENABLE                          (1 << 4)
+#       define CTXEMPTY_INT_ENABLE                        (1 << 28)
+#define DMA_STATUS_REG                                    0xd034
+#       define DMA_IDLE                                   (1 << 0)
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL                     0xd044
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL                      0xd048
+#define DMA_MODE                                          0xd0bc
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n)	((((cmd) & 0xF) << 28) |	\
+					 (((t) & 0x1) << 23) |		\
+					 (((s) & 0x1) << 22) |		\
+					 (((n) & 0xFFFF) << 0))
+/* async DMA Packet types */
+#define	DMA_PACKET_WRITE				  0x2
+#define	DMA_PACKET_COPY					  0x3
+#define	DMA_PACKET_INDIRECT_BUFFER			  0x4
+#define	DMA_PACKET_SEMAPHORE				  0x5
+#define	DMA_PACKET_FENCE				  0x6
+#define	DMA_PACKET_TRAP					  0x7
+#define	DMA_PACKET_CONSTANT_FILL			  0xd /* 7xx only */
+#define	DMA_PACKET_NOP					  0xf
+
 #define IH_RB_CNTL                                        0x3e00
 #       define IH_RB_ENABLE                               (1 << 0)
-#       define IH_IB_SIZE(x)                              ((x) << 1) /* log2 */
+#       define IH_RB_SIZE(x)                              ((x) << 1) /* log2 */
 #       define IH_RB_FULL_DRAIN_ENABLE                    (1 << 6)
 #       define IH_WPTR_WRITEBACK_ENABLE                   (1 << 8)
 #       define IH_WPTR_WRITEBACK_TIMER(x)                 ((x) << 9) /* log2 */
@@ -637,7 +687,9 @@
 #define TN_RLC_CLEAR_STATE_RESTORE_BASE                   0x3f20
 
 #define SRBM_SOFT_RESET                                   0xe60
+#       define SOFT_RESET_DMA                             (1 << 12)
 #       define SOFT_RESET_RLC                             (1 << 13)
+#       define RV770_SOFT_RESET_DMA                       (1 << 20)
 
 #define CP_INT_CNTL                                       0xc124
 #       define CNTX_BUSY_INT_ENABLE                       (1 << 19)
@@ -1134,6 +1186,38 @@
 #define	PACKET3_WAIT_REG_MEM				0x3C
 #define	PACKET3_MEM_WRITE				0x3D
 #define	PACKET3_INDIRECT_BUFFER				0x32
+#define	PACKET3_CP_DMA					0x41
+/* 1. header
+ * 2. SRC_ADDR_LO [31:0]
+ * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+#              define PACKET3_CP_DMA_CP_SYNC       (1 << 31)
+/* COMMAND */
+#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+                /* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+                /* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_CP_DMA_CMD_SAS       (1 << 26)
+                /* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_CP_DMA_CMD_DAS       (1 << 27)
+                /* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
+#              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
 #define	PACKET3_SURFACE_SYNC				0x43
 #              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
 #              define PACKET3_TC_ACTION_ENA        (1 << 23)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8c42d54..5dc744d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h

@@ -109,7 +109,7 @@
 #define RADEON_BIOS_NUM_SCRATCH			8
 
 /* max number of rings */
-#define RADEON_NUM_RINGS			3
+#define RADEON_NUM_RINGS			5
 
 /* fence seq are set to this number when signaled */
 #define RADEON_FENCE_SIGNALED_SEQ		0LL
@@ -122,6 +122,11 @@
 #define CAYMAN_RING_TYPE_CP1_INDEX		1
 #define CAYMAN_RING_TYPE_CP2_INDEX		2
 
+/* R600+ has an async dma ring */
+#define R600_RING_TYPE_DMA_INDEX		3
+/* cayman add a second async dma ring */
+#define CAYMAN_RING_TYPE_DMA1_INDEX		4
+
 /* hardcode those limit for now */
 #define RADEON_VA_IB_OFFSET			(1 << 20)
 #define RADEON_VA_RESERVED_SIZE			(8 << 20)
@@ -313,6 +318,7 @@
 	struct list_head		list;
 	/* Protected by tbo.reserved */
 	u32				placements[3];
+	u32				busy_placements[3];
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
@@ -787,6 +793,15 @@
 void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
 
 
+/* r600 async dma */
+void r600_dma_stop(struct radeon_device *rdev);
+int r600_dma_resume(struct radeon_device *rdev);
+void r600_dma_fini(struct radeon_device *rdev);
+
+void cayman_dma_stop(struct radeon_device *rdev);
+int cayman_dma_resume(struct radeon_device *rdev);
+void cayman_dma_fini(struct radeon_device *rdev);
+
 /*
  * CS.
  */
@@ -824,6 +839,7 @@
 	struct radeon_cs_reloc	*relocs;
 	struct radeon_cs_reloc	**relocs_ptr;
 	struct list_head	validated;
+	unsigned		dma_reloc_idx;
 	/* indices of various chunks */
 	int			chunk_ib_idx;
 	int			chunk_relocs_idx;
@@ -883,7 +899,9 @@
 #define RADEON_WB_CP_RPTR_OFFSET 1024
 #define RADEON_WB_CP1_RPTR_OFFSET 1280
 #define RADEON_WB_CP2_RPTR_OFFSET 1536
+#define R600_WB_DMA_RPTR_OFFSET   1792
 #define R600_WB_IH_WPTR_OFFSET   2048
+#define CAYMAN_WB_DMA1_RPTR_OFFSET   2304
 #define R600_WB_EVENT_OFFSET     3072
 
 /**
@@ -1539,6 +1557,8 @@
 	/* Register mmio */
 	resource_size_t			rmmio_base;
 	resource_size_t			rmmio_size;
+	/* protects concurrent MM_INDEX/DATA based register access */
+	spinlock_t mmio_idx_lock;
 	void __iomem			*rmmio;
 	radeon_rreg_t			mc_rreg;
 	radeon_wreg_t			mc_wreg;
@@ -1614,8 +1634,10 @@
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
-void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
+		      bool always_indirect);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
+		  bool always_indirect);
 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
 void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 
@@ -1631,9 +1653,11 @@
 #define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
 #define RREG16(reg) readw((rdev->rmmio) + (reg))
 #define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
-#define RREG32(reg) r100_mm_rreg(rdev, (reg))
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
-#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
+#define RREG32(reg) r100_mm_rreg(rdev, (reg), false)
+#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false))
+#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false)
+#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
@@ -1658,7 +1682,7 @@
 		tmp_ |= ((val) & ~(mask));			\
 		WREG32_PLL(reg, tmp_);				\
 	} while (0)
-#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
+#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
 #define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
 #define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
 

diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 654520b..596bcbe 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c

@@ -947,6 +947,15 @@
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &r600_gpu_is_lockup,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &r600_dma_ring_ib_execute,
+			.emit_fence = &r600_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &r600_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &r600_dma_is_lockup,
 		}
 	},
 	.irq = {
@@ -963,10 +972,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &r600_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &r600_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1022,6 +1031,15 @@
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &r600_gpu_is_lockup,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &r600_dma_ring_ib_execute,
+			.emit_fence = &r600_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &r600_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &r600_dma_is_lockup,
 		}
 	},
 	.irq = {
@@ -1038,10 +1056,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &r600_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &r600_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1097,6 +1115,15 @@
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &r600_gpu_is_lockup,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &r600_dma_ring_ib_execute,
+			.emit_fence = &r600_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &r600_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &r600_dma_is_lockup,
 		}
 	},
 	.irq = {
@@ -1113,10 +1140,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &r600_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &r600_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1172,6 +1199,15 @@
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &evergreen_dma_ring_ib_execute,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &evergreen_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &r600_dma_is_lockup,
 		}
 	},
 	.irq = {
@@ -1188,10 +1224,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &evergreen_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &evergreen_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1248,6 +1284,15 @@
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
 		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &evergreen_dma_ring_ib_execute,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &evergreen_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &r600_dma_is_lockup,
+		}
 	},
 	.irq = {
 		.set = &evergreen_irq_set,
@@ -1263,10 +1308,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &evergreen_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &evergreen_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1322,6 +1367,15 @@
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &evergreen_dma_ring_ib_execute,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &evergreen_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &r600_dma_is_lockup,
 		}
 	},
 	.irq = {
@@ -1338,10 +1392,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &evergreen_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &evergreen_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1391,7 +1445,7 @@
 	.vm = {
 		.init = &cayman_vm_init,
 		.fini = &cayman_vm_fini,
-		.pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.pt_ring_index = R600_RING_TYPE_DMA_INDEX,
 		.set_page = &cayman_vm_set_page,
 	},
 	.ring = {
@@ -1427,6 +1481,28 @@
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
 			.vm_flush = &cayman_vm_flush,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &cayman_dma_ring_ib_execute,
+			.ib_parse = &evergreen_dma_ib_parse,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &evergreen_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &cayman_dma_is_lockup,
+			.vm_flush = &cayman_dma_vm_flush,
+		},
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
+			.ib_execute = &cayman_dma_ring_ib_execute,
+			.ib_parse = &evergreen_dma_ib_parse,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &evergreen_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &cayman_dma_is_lockup,
+			.vm_flush = &cayman_dma_vm_flush,
 		}
 	},
 	.irq = {
@@ -1443,10 +1519,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &evergreen_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &evergreen_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1496,7 +1572,7 @@
 	.vm = {
 		.init = &cayman_vm_init,
 		.fini = &cayman_vm_fini,
-		.pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.pt_ring_index = R600_RING_TYPE_DMA_INDEX,
 		.set_page = &cayman_vm_set_page,
 	},
 	.ring = {
@@ -1532,6 +1608,28 @@
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
 			.vm_flush = &cayman_vm_flush,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &cayman_dma_ring_ib_execute,
+			.ib_parse = &evergreen_dma_ib_parse,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &evergreen_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &cayman_dma_is_lockup,
+			.vm_flush = &cayman_dma_vm_flush,
+		},
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
+			.ib_execute = &cayman_dma_ring_ib_execute,
+			.ib_parse = &evergreen_dma_ib_parse,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = &evergreen_dma_cs_parse,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &cayman_dma_is_lockup,
+			.vm_flush = &cayman_dma_vm_flush,
 		}
 	},
 	.irq = {
@@ -1548,10 +1646,10 @@
 	.copy = {
 		.blit = &r600_copy_blit,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = &r600_copy_blit,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &evergreen_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &evergreen_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
@@ -1601,7 +1699,7 @@
 	.vm = {
 		.init = &si_vm_init,
 		.fini = &si_vm_fini,
-		.pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.pt_ring_index = R600_RING_TYPE_DMA_INDEX,
 		.set_page = &si_vm_set_page,
 	},
 	.ring = {
@@ -1637,6 +1735,28 @@
 			.ib_test = &r600_ib_test,
 			.is_lockup = &si_gpu_is_lockup,
 			.vm_flush = &si_vm_flush,
+		},
+		[R600_RING_TYPE_DMA_INDEX] = {
+			.ib_execute = &cayman_dma_ring_ib_execute,
+			.ib_parse = &evergreen_dma_ib_parse,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = NULL,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &cayman_dma_is_lockup,
+			.vm_flush = &si_dma_vm_flush,
+		},
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
+			.ib_execute = &cayman_dma_ring_ib_execute,
+			.ib_parse = &evergreen_dma_ib_parse,
+			.emit_fence = &evergreen_dma_fence_ring_emit,
+			.emit_semaphore = &r600_dma_semaphore_ring_emit,
+			.cs_parse = NULL,
+			.ring_test = &r600_dma_ring_test,
+			.ib_test = &r600_dma_ib_test,
+			.is_lockup = &cayman_dma_is_lockup,
+			.vm_flush = &si_dma_vm_flush,
 		}
 	},
 	.irq = {
@@ -1653,10 +1773,10 @@
 	.copy = {
 		.blit = NULL,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.dma = NULL,
-		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-		.copy = NULL,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = &si_copy_dma,
+		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+		.copy = &si_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,

diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5e3a0e5..5f4882c 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h

@@ -263,6 +263,7 @@
 struct rv515_mc_save {
 	u32 vga_render_control;
 	u32 vga_hdp_control;
+	bool crtc_enabled[2];
 };
 
 int rv515_init(struct radeon_device *rdev);
@@ -303,12 +304,21 @@
 uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
 void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int r600_cs_parse(struct radeon_cs_parser *p);
+int r600_dma_cs_parse(struct radeon_cs_parser *p);
 void r600_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence);
 void r600_semaphore_ring_emit(struct radeon_device *rdev,
 			      struct radeon_ring *cp,
 			      struct radeon_semaphore *semaphore,
 			      bool emit_wait);
+void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+			      struct radeon_fence *fence);
+void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+				  struct radeon_ring *ring,
+				  struct radeon_semaphore *semaphore,
+				  bool emit_wait);
+void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
 bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_asic_reset(struct radeon_device *rdev);
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@@ -316,11 +326,16 @@
 			 uint32_t offset, uint32_t obj_size);
 void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
 int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
+int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
 		   unsigned num_gpu_pages, struct radeon_fence **fence);
+int r600_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset, uint64_t dst_offset,
+		  unsigned num_gpu_pages, struct radeon_fence **fence);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -416,6 +431,7 @@
 int evergreen_irq_set(struct radeon_device *rdev);
 int evergreen_irq_process(struct radeon_device *rdev);
 extern int evergreen_cs_parse(struct radeon_cs_parser *p);
+extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
 extern void evergreen_pm_misc(struct radeon_device *rdev);
 extern void evergreen_pm_prepare(struct radeon_device *rdev);
 extern void evergreen_pm_finish(struct radeon_device *rdev);
@@ -428,6 +444,14 @@
 void evergreen_disable_interrupt_state(struct radeon_device *rdev);
 int evergreen_blit_init(struct radeon_device *rdev);
 int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
+void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
+				   struct radeon_fence *fence);
+void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
+				   struct radeon_ib *ib);
+int evergreen_copy_dma(struct radeon_device *rdev,
+		       uint64_t src_offset, uint64_t dst_offset,
+		       unsigned num_gpu_pages,
+		       struct radeon_fence **fence);
 
 /*
  * cayman
@@ -449,6 +473,11 @@
 			uint64_t addr, unsigned count,
 			uint32_t incr, uint32_t flags);
 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+				struct radeon_ib *ib);
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 
 /* DCE6 - SI */
 void dce6_bandwidth_update(struct radeon_device *rdev);
@@ -476,5 +505,10 @@
 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 uint64_t si_get_gpu_clock(struct radeon_device *rdev);
+int si_copy_dma(struct radeon_device *rdev,
+		uint64_t src_offset, uint64_t dst_offset,
+		unsigned num_gpu_pages,
+		struct radeon_fence **fence);
+void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 
 #endif

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 45b660b..4af8912 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c

@@ -3246,11 +3246,9 @@
 	while (ram--) {
 		addr = ram * 1024 * 1024;
 		/* write to each page */
-		WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
-		WREG32(RADEON_MM_DATA, 0xdeadbeef);
+		WREG32_IDX((addr) | RADEON_MM_APER, 0xdeadbeef);
 		/* read back and verify */
-		WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
-		if (RREG32(RADEON_MM_DATA) != 0xdeadbeef)
+		if (RREG32_IDX((addr) | RADEON_MM_APER) != 0xdeadbeef)
 			return 0;
 	}
 

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index b884c36..47bf162 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c

@@ -1599,7 +1599,7 @@
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			radeon_connector->dac_load_detect = true;
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.load_detect_property,
 						      1);
 			break;
@@ -1608,13 +1608,13 @@
 		case DRM_MODE_CONNECTOR_HDMIA:
 		case DRM_MODE_CONNECTOR_HDMIB:
 		case DRM_MODE_CONNECTOR_DisplayPort:
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.underscan_property,
 						      UNDERSCAN_OFF);
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.underscan_hborder_property,
 						      0);
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.underscan_vborder_property,
 						      0);
 			subpixel_order = SubPixelHorizontalRGB;
@@ -1625,14 +1625,14 @@
 				connector->doublescan_allowed = false;
 			if (connector_type == DRM_MODE_CONNECTOR_DVII) {
 				radeon_connector->dac_load_detect = true;
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.load_detect_property,
 							      1);
 			}
 			break;
 		case DRM_MODE_CONNECTOR_LVDS:
 		case DRM_MODE_CONNECTOR_eDP:
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      dev->mode_config.scaling_mode_property,
 						      DRM_MODE_SCALE_FULLSCREEN);
 			subpixel_order = SubPixelHorizontalRGB;
@@ -1651,7 +1651,7 @@
 					DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 			}
 			radeon_connector->dac_load_detect = true;
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.load_detect_property,
 						      1);
 			/* no HPD on analog connectors */
@@ -1669,7 +1669,7 @@
 					DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 			}
 			radeon_connector->dac_load_detect = true;
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.load_detect_property,
 						      1);
 			/* no HPD on analog connectors */
@@ -1692,23 +1692,23 @@
 					DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 			}
 			subpixel_order = SubPixelHorizontalRGB;
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.coherent_mode_property,
 						      1);
 			if (ASIC_IS_AVIVO(rdev)) {
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_property,
 							      UNDERSCAN_OFF);
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_hborder_property,
 							      0);
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_vborder_property,
 							      0);
 			}
 			if (connector_type == DRM_MODE_CONNECTOR_DVII) {
 				radeon_connector->dac_load_detect = true;
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.load_detect_property,
 							      1);
 			}
@@ -1732,17 +1732,17 @@
 				if (!radeon_connector->ddc_bus)
 					DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 			}
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.coherent_mode_property,
 						      1);
 			if (ASIC_IS_AVIVO(rdev)) {
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_property,
 							      UNDERSCAN_OFF);
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_hborder_property,
 							      0);
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_vborder_property,
 							      0);
 			}
@@ -1771,17 +1771,17 @@
 					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 			}
 			subpixel_order = SubPixelHorizontalRGB;
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.coherent_mode_property,
 						      1);
 			if (ASIC_IS_AVIVO(rdev)) {
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_property,
 							      UNDERSCAN_OFF);
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_hborder_property,
 							      0);
-				drm_connector_attach_property(&radeon_connector->base,
+				drm_object_attach_property(&radeon_connector->base.base,
 							      rdev->mode_info.underscan_vborder_property,
 							      0);
 			}
@@ -1806,7 +1806,7 @@
 				if (!radeon_connector->ddc_bus)
 					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 			}
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      dev->mode_config.scaling_mode_property,
 						      DRM_MODE_SCALE_FULLSCREEN);
 			subpixel_order = SubPixelHorizontalRGB;
@@ -1819,10 +1819,10 @@
 			drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
 			drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
 			radeon_connector->dac_load_detect = true;
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.load_detect_property,
 						      1);
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.tv_std_property,
 						      radeon_atombios_get_tv_info(rdev));
 			/* no HPD on analog connectors */
@@ -1843,7 +1843,7 @@
 				if (!radeon_connector->ddc_bus)
 					DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 			}
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      dev->mode_config.scaling_mode_property,
 						      DRM_MODE_SCALE_FULLSCREEN);
 			subpixel_order = SubPixelHorizontalRGB;
@@ -1922,7 +1922,7 @@
 				DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		radeon_connector->dac_load_detect = true;
-		drm_connector_attach_property(&radeon_connector->base,
+		drm_object_attach_property(&radeon_connector->base.base,
 					      rdev->mode_info.load_detect_property,
 					      1);
 		/* no HPD on analog connectors */
@@ -1940,7 +1940,7 @@
 				DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		radeon_connector->dac_load_detect = true;
-		drm_connector_attach_property(&radeon_connector->base,
+		drm_object_attach_property(&radeon_connector->base.base,
 					      rdev->mode_info.load_detect_property,
 					      1);
 		/* no HPD on analog connectors */
@@ -1959,7 +1959,7 @@
 		}
 		if (connector_type == DRM_MODE_CONNECTOR_DVII) {
 			radeon_connector->dac_load_detect = true;
-			drm_connector_attach_property(&radeon_connector->base,
+			drm_object_attach_property(&radeon_connector->base.base,
 						      rdev->mode_info.load_detect_property,
 						      1);
 		}
@@ -1983,10 +1983,10 @@
 		 */
 		if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480)
 			radeon_connector->dac_load_detect = false;
-		drm_connector_attach_property(&radeon_connector->base,
+		drm_object_attach_property(&radeon_connector->base.base,
 					      rdev->mode_info.load_detect_property,
 					      radeon_connector->dac_load_detect);
-		drm_connector_attach_property(&radeon_connector->base,
+		drm_object_attach_property(&radeon_connector->base.base,
 					      rdev->mode_info.tv_std_property,
 					      radeon_combios_get_tv_info(rdev));
 		/* no HPD on analog connectors */
@@ -2002,7 +2002,7 @@
 			if (!radeon_connector->ddc_bus)
 				DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
-		drm_connector_attach_property(&radeon_connector->base,
+		drm_object_attach_property(&radeon_connector->base.base,
 					      dev->mode_config.scaling_mode_property,
 					      DRM_MODE_SCALE_FULLSCREEN);
 		subpixel_order = SubPixelHorizontalRGB;

diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 8b2797d..9143fc4 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c

@@ -116,20 +116,6 @@
 	}
 }
 
-u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr)
-{
-	u32 ret;
-
-	if (addr < 0x10000)
-		ret = DRM_READ32(dev_priv->mmio, addr);
-	else {
-		DRM_WRITE32(dev_priv->mmio, RADEON_MM_INDEX, addr);
-		ret = DRM_READ32(dev_priv->mmio, RADEON_MM_DATA);
-	}
-
-	return ret;
-}
-
 static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
 {
 	u32 ret;

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc..396baba 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c

@@ -43,6 +43,7 @@
 		return 0;
 	}
 	chunk = &p->chunks[p->chunk_relocs_idx];
+	p->dma_reloc_idx = 0;
 	/* FIXME: we assume that each relocs use 4 dwords */
 	p->nrelocs = chunk->length_dw / 4;
 	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
@@ -111,6 +112,18 @@
 		} else
 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
 		break;
+	case RADEON_CS_RING_DMA:
+		if (p->rdev->family >= CHIP_CAYMAN) {
+			if (p->priority > 0)
+				p->ring = R600_RING_TYPE_DMA_INDEX;
+			else
+				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
+		} else if (p->rdev->family >= CHIP_R600) {
+			p->ring = R600_RING_TYPE_DMA_INDEX;
+		} else {
+			return -EINVAL;
+		}
+		break;
 	}
 	return 0;
 }

diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 0fe56c9f..ad6df62 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c

@@ -66,24 +66,25 @@
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
 	if (ASIC_IS_DCE4(rdev)) {
-		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
-		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
-		       EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
+		WREG32_IDX(EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset,
+			   EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
+			   EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
 	} else if (ASIC_IS_AVIVO(rdev)) {
-		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
-		WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
+		WREG32_IDX(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset,
+			   (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
 	} else {
+		u32 reg;
 		switch (radeon_crtc->crtc_id) {
 		case 0:
-			WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL);
+			reg = RADEON_CRTC_GEN_CNTL;
 			break;
 		case 1:
-			WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL);
+			reg = RADEON_CRTC2_GEN_CNTL;
 			break;
 		default:
 			return;
 		}
-		WREG32_P(RADEON_MM_DATA, 0, ~RADEON_CRTC_CUR_EN);
+		WREG32_IDX(reg, RREG32_IDX(reg) & ~RADEON_CRTC_CUR_EN);
 	}
 }
 

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f88..49b0659 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c

@@ -1059,6 +1059,7 @@
 
 	/* Registers mapping */
 	/* TODO: block userspace mapping of io register */
+	spin_lock_init(&rdev->mmio_idx_lock);
 	rdev->rmmio_base = pci_resource_start(rdev->pdev, 2);
 	rdev->rmmio_size = pci_resource_len(rdev->pdev, 2);
 	rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size);

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bfa2a60..310c0e5 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c

@@ -378,8 +378,12 @@
 	work->old_rbo = rbo;
 	obj = new_radeon_fb->obj;
 	rbo = gem_to_radeon_bo(obj);
+
+	spin_lock(&rbo->tbo.bdev->fence_lock);
 	if (rbo->tbo.sync_obj)
 		work->fence = radeon_fence_ref(rbo->tbo.sync_obj);
+	spin_unlock(&rbo->tbo.bdev->fence_lock);
+
 	INIT_WORK(&work->work, radeon_unpin_work_func);
 
 	/* We borrow the event spin lock for protecting unpin_work */

diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 07eb84e..9b1a727 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c

@@ -65,9 +65,12 @@
  *   2.22.0 - r600 only: RESOLVE_BOX allowed
  *   2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880
  *   2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures
+ *   2.25.0 - eg+: new info request for num SE and num SH
+ *   2.26.0 - r600-eg: fix htile size computation
+ *   2.27.0 - r600-SI: Add CS ioctl support for async DMA
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	24
+#define KMS_DRIVER_MINOR	27
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -281,12 +284,15 @@
 
 static struct drm_driver kms_driver;
 
-static void radeon_kick_out_firmware_fb(struct pci_dev *pdev)
+static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
 {
 	struct apertures_struct *ap;
 	bool primary = false;
 
 	ap = alloc_apertures(1);
+	if (!ap)
+		return -ENOMEM;
+
 	ap->ranges[0].base = pci_resource_start(pdev, 0);
 	ap->ranges[0].size = pci_resource_len(pdev, 0);
 
@@ -295,13 +301,19 @@
 #endif
 	remove_conflicting_framebuffers(ap, "radeondrmfb", primary);
 	kfree(ap);
+
+	return 0;
 }
 
 static int __devinit
 radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
+	int ret;
+
 	/* Get rid of things like offb */
-	radeon_kick_out_firmware_fb(pdev);
+	ret = radeon_kick_out_firmware_fb(pdev);
+	if (ret)
+		return ret;
 
 	return drm_get_pci_dev(pdev, ent, &kms_driver);
 }

diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index a1b59ca..e7fdf16 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h

@@ -366,7 +366,6 @@
 extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv);
 extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc);
 extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base);
-extern u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr);
 
 extern void radeon_freelist_reset(struct drm_device * dev);
 extern struct drm_buf *radeon_freelist_get(struct drm_device * dev);

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2..410a975 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c

@@ -772,7 +772,7 @@
 	int r;
 
 	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
-	if (rdev->wb.use_event) {
+	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
 		rdev->fence_drv[ring].scratch_reg = 0;
 		index = R600_WB_EVENT_OFFSET + ring * 4;
 	} else {

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 4debd60..6e24f84 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c

@@ -1237,7 +1237,6 @@
 {
 	struct radeon_bo_va *bo_va;
 
-	BUG_ON(!atomic_read(&bo->tbo.reserved));
 	list_for_each_entry(bo_va, &bo->va, bo_list) {
 		bo_va->valid = false;
 	}

diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index dc781c4..9c312f9 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c

@@ -361,6 +361,22 @@
 			return -EINVAL;
 		}
 		break;
+	case RADEON_INFO_MAX_SE:
+		if (rdev->family >= CHIP_TAHITI)
+			value = rdev->config.si.max_shader_engines;
+		else if (rdev->family >= CHIP_CAYMAN)
+			value = rdev->config.cayman.max_shader_engines;
+		else if (rdev->family >= CHIP_CEDAR)
+			value = rdev->config.evergreen.num_ses;
+		else
+			value = 1;
+		break;
+	case RADEON_INFO_MAX_SH_PER_SE:
+		if (rdev->family >= CHIP_TAHITI)
+			value = rdev->config.si.max_sh_per_se;
+		else
+			return -EINVAL;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;

diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 92c5f47..d818b50 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h

@@ -427,7 +427,7 @@
 	uint32_t igp_lane_info;
 	/* displayport */
 	struct radeon_i2c_chan *dp_i2c_bus;
-	u8 dpcd[8];
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
 	u8 dp_sink_type;
 	int dp_clock;
 	int dp_lane_count;

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index b91118c..883c95d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c

@@ -84,17 +84,34 @@
 	rbo->placement.fpfn = 0;
 	rbo->placement.lpfn = 0;
 	rbo->placement.placement = rbo->placements;
-	rbo->placement.busy_placement = rbo->placements;
 	if (domain & RADEON_GEM_DOMAIN_VRAM)
 		rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
 					TTM_PL_FLAG_VRAM;
-	if (domain & RADEON_GEM_DOMAIN_GTT)
-		rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-	if (domain & RADEON_GEM_DOMAIN_CPU)
-		rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+	if (domain & RADEON_GEM_DOMAIN_GTT) {
+		if (rbo->rdev->flags & RADEON_IS_AGP) {
+			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+		} else {
+			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+		}
+	}
+	if (domain & RADEON_GEM_DOMAIN_CPU) {
+		if (rbo->rdev->flags & RADEON_IS_AGP) {
+			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM;
+		} else {
+			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
+		}
+	}
 	if (!c)
 		rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	rbo->placement.num_placement = c;
+
+	c = 0;
+	rbo->placement.busy_placement = rbo->busy_placements;
+	if (rbo->rdev->flags & RADEON_IS_AGP) {
+		rbo->busy_placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+	} else {
+		rbo->busy_placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+	}
 	rbo->placement.num_busy_placement = c;
 }
 
@@ -140,7 +157,7 @@
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
-			&bo->placement, page_align, 0, !kernel, NULL,
+			&bo->placement, page_align, !kernel, NULL,
 			acc_size, sg, &radeon_ttm_bo_destroy);
 	up_read(&rdev->pm.mclk_lock);
 	if (unlikely(r != 0)) {
@@ -240,7 +257,7 @@
 	}
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		bo->pin_count = 1;
 		if (gpu_addr != NULL)
@@ -269,7 +286,7 @@
 		return 0;
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (unlikely(r != 0))
 		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
 	return r;
@@ -340,7 +357,6 @@
 {
 	struct radeon_bo_list *lobj;
 	struct radeon_bo *bo;
-	u32 domain;
 	int r;
 
 	r = ttm_eu_reserve_buffers(head);
@@ -350,17 +366,9 @@
 	list_for_each_entry(lobj, head, tv.head) {
 		bo = lobj->bo;
 		if (!bo->pin_count) {
-			domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
-			
-		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement,
-						true, false, false);
+						true, false);
 			if (unlikely(r)) {
-				if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
-					domain |= RADEON_GEM_DOMAIN_GTT;
-					goto retry;
-				}
 				return r;
 			}
 		}
@@ -384,7 +392,7 @@
 	int steal;
 	int i;
 
-	BUG_ON(!atomic_read(&bo->tbo.reserved));
+	BUG_ON(!radeon_bo_is_reserved(bo));
 
 	if (!bo->tiling_flags)
 		return 0;
@@ -510,7 +518,7 @@
 				uint32_t *tiling_flags,
 				uint32_t *pitch)
 {
-	BUG_ON(!atomic_read(&bo->tbo.reserved));
+	BUG_ON(!radeon_bo_is_reserved(bo));
 	if (tiling_flags)
 		*tiling_flags = bo->tiling_flags;
 	if (pitch)
@@ -520,7 +528,7 @@
 int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop)
 {
-	BUG_ON(!atomic_read(&bo->tbo.reserved));
+	BUG_ON(!radeon_bo_is_reserved(bo) && !force_drop);
 
 	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
 		return 0;
@@ -575,7 +583,7 @@
 			/* hurrah the memory is not visible ! */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
 			rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
-			r = ttm_bo_validate(bo, &rbo->placement, false, true, false);
+			r = ttm_bo_validate(bo, &rbo->placement, false, false);
 			if (unlikely(r != 0))
 				return r;
 			offset = bo->mem.start << PAGE_SHIFT;

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 93cd491..5fc86b0 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h

@@ -80,7 +80,7 @@
 
 static inline bool radeon_bo_is_reserved(struct radeon_bo *bo)
 {
-	return !!atomic_read(&bo->tbo.reserved);
+	return ttm_bo_is_reserved(&bo->tbo);
 }
 
 static inline unsigned radeon_bo_ngpu_pages(struct radeon_bo *bo)

diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 587c09a..fda09c9 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c

@@ -26,16 +26,31 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 
+#define RADEON_TEST_COPY_BLIT 1
+#define RADEON_TEST_COPY_DMA  0
+
 
 /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
-void radeon_test_moves(struct radeon_device *rdev)
+static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 {
 	struct radeon_bo *vram_obj = NULL;
 	struct radeon_bo **gtt_obj = NULL;
 	struct radeon_fence *fence = NULL;
 	uint64_t gtt_addr, vram_addr;
 	unsigned i, n, size;
-	int r;
+	int r, ring;
+
+	switch (flag) {
+	case RADEON_TEST_COPY_DMA:
+		ring = radeon_copy_dma_ring_index(rdev);
+		break;
+	case RADEON_TEST_COPY_BLIT:
+		ring = radeon_copy_blit_ring_index(rdev);
+		break;
+	default:
+		DRM_ERROR("Unknown copy method\n");
+		return;
+	}
 
 	size = 1024 * 1024;
 
@@ -106,7 +121,10 @@
 
 		radeon_bo_kunmap(gtt_obj[i]);
 
-		r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+		if (ring == R600_RING_TYPE_DMA_INDEX)
+			r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+		else
+			r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
 		if (r) {
 			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
 			goto out_cleanup;
@@ -149,7 +167,10 @@
 
 		radeon_bo_kunmap(vram_obj);
 
-		r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+		if (ring == R600_RING_TYPE_DMA_INDEX)
+			r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+		else
+			r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
 		if (r) {
 			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
 			goto out_cleanup;
@@ -223,6 +244,14 @@
 	}
 }
 
+void radeon_test_moves(struct radeon_device *rdev)
+{
+	if (rdev->asic->copy.dma)
+		radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA);
+	if (rdev->asic->copy.blit)
+		radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
+}
+
 void radeon_test_ring_sync(struct radeon_device *rdev,
 			   struct radeon_ring *ringA,
 			   struct radeon_ring *ringB)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 5ebe1b3..1d8ff2f 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c

@@ -216,7 +216,7 @@
 }
 
 static int radeon_move_blit(struct ttm_buffer_object *bo,
-			bool evict, int no_wait_reserve, bool no_wait_gpu,
+			bool evict, bool no_wait_gpu,
 			struct ttm_mem_reg *new_mem,
 			struct ttm_mem_reg *old_mem)
 {
@@ -265,15 +265,15 @@
 			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
 			&fence);
 	/* FIXME: handle copy error */
-	r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
-				      evict, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
+				      evict, no_wait_gpu, new_mem);
 	radeon_fence_unref(&fence);
 	return r;
 }
 
 static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 				bool evict, bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu,
+				bool no_wait_gpu,
 				struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
@@ -294,7 +294,7 @@
 	placement.busy_placement = &placements;
 	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
-			     interruptible, no_wait_reserve, no_wait_gpu);
+			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
 		return r;
 	}
@@ -308,11 +308,11 @@
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem, old_mem);
+	r = radeon_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
@@ -320,7 +320,7 @@
 
 static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 				bool evict, bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu,
+				bool no_wait_gpu,
 				struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
@@ -340,15 +340,16 @@
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
 	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu);
+	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
+			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
 		return r;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+	r = radeon_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@@ -359,7 +360,7 @@
 
 static int radeon_bo_move(struct ttm_buffer_object *bo,
 			bool evict, bool interruptible,
-			bool no_wait_reserve, bool no_wait_gpu,
+			bool no_wait_gpu,
 			struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
@@ -388,18 +389,18 @@
 	if (old_mem->mem_type == TTM_PL_VRAM &&
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
 		r = radeon_move_vram_ram(bo, evict, interruptible,
-					no_wait_reserve, no_wait_gpu, new_mem);
+					no_wait_gpu, new_mem);
 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
 		   new_mem->mem_type == TTM_PL_VRAM) {
 		r = radeon_move_ram_vram(bo, evict, interruptible,
-					    no_wait_reserve, no_wait_gpu, new_mem);
+					    no_wait_gpu, new_mem);
 	} else {
-		r = radeon_move_blit(bo, evict, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+		r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
 	}
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 	}
 	return r;
 }
@@ -471,13 +472,12 @@
 {
 }
 
-static int radeon_sync_obj_wait(void *sync_obj, void *sync_arg,
-				bool lazy, bool interruptible)
+static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
 {
 	return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
 }
 
-static int radeon_sync_obj_flush(void *sync_obj, void *sync_arg)
+static int radeon_sync_obj_flush(void *sync_obj)
 {
 	return 0;
 }
@@ -492,7 +492,7 @@
 	return radeon_fence_ref((struct radeon_fence *)sync_obj);
 }
 
-static bool radeon_sync_obj_signaled(void *sync_obj, void *sync_arg)
+static bool radeon_sync_obj_signaled(void *sync_obj)
 {
 	return radeon_fence_signaled((struct radeon_fence *)sync_obj);
 }

diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 785d095..2bb6d0e 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c

@@ -40,6 +40,12 @@
 static void rv515_gpu_init(struct radeon_device *rdev);
 int rv515_mc_wait_for_idle(struct radeon_device *rdev);
 
+static const u32 crtc_offsets[2] =
+{
+	0,
+	AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL
+};
+
 void rv515_debugfs(struct radeon_device *rdev)
 {
 	if (r100_debugfs_rbbm_init(rdev)) {
@@ -281,30 +287,114 @@
 
 void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save)
 {
+	u32 crtc_enabled, tmp, frame_count, blackout;
+	int i, j;
+
 	save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL);
 	save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL);
 
-	/* Stop all video */
-	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
+	/* disable VGA render */
 	WREG32(R_000300_VGA_RENDER_CONTROL, 0);
-	WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1);
-	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1);
-	WREG32(R_006080_D1CRTC_CONTROL, 0);
-	WREG32(R_006880_D2CRTC_CONTROL, 0);
-	WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0);
-	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
-	WREG32(R_000330_D1VGA_CONTROL, 0);
-	WREG32(R_000338_D2VGA_CONTROL, 0);
+	/* blank the display controllers */
+	for (i = 0; i < rdev->num_crtc; i++) {
+		crtc_enabled = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]) & AVIVO_CRTC_EN;
+		if (crtc_enabled) {
+			save->crtc_enabled[i] = true;
+			tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
+			if (!(tmp & AVIVO_CRTC_DISP_READ_REQUEST_DISABLE)) {
+				radeon_wait_for_vblank(rdev, i);
+				tmp |= AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
+				WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
+			}
+			/* wait for the next frame */
+			frame_count = radeon_get_vblank_counter(rdev, i);
+			for (j = 0; j < rdev->usec_timeout; j++) {
+				if (radeon_get_vblank_counter(rdev, i) != frame_count)
+					break;
+				udelay(1);
+			}
+		} else {
+			save->crtc_enabled[i] = false;
+		}
+	}
+
+	radeon_mc_wait_for_idle(rdev);
+
+	if (rdev->family >= CHIP_R600) {
+		if (rdev->family >= CHIP_RV770)
+			blackout = RREG32(R700_MC_CITF_CNTL);
+		else
+			blackout = RREG32(R600_CITF_CNTL);
+		if ((blackout & R600_BLACKOUT_MASK) != R600_BLACKOUT_MASK) {
+			/* Block CPU access */
+			WREG32(R600_BIF_FB_EN, 0);
+			/* blackout the MC */
+			blackout |= R600_BLACKOUT_MASK;
+			if (rdev->family >= CHIP_RV770)
+				WREG32(R700_MC_CITF_CNTL, blackout);
+			else
+				WREG32(R600_CITF_CNTL, blackout);
+		}
+	}
 }
 
 void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save)
 {
-	WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start);
-	WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start);
-	WREG32(R_006910_D2GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start);
-	WREG32(R_006918_D2GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start);
-	WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
-	/* Unlock host access */
+	u32 tmp, frame_count;
+	int i, j;
+
+	/* update crtc base addresses */
+	for (i = 0; i < rdev->num_crtc; i++) {
+		if (rdev->family >= CHIP_RV770) {
+			if (i == 1) {
+				WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
+				       upper_32_bits(rdev->mc.vram_start));
+				WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
+				       upper_32_bits(rdev->mc.vram_start));
+			} else {
+				WREG32(R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
+				       upper_32_bits(rdev->mc.vram_start));
+				WREG32(R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
+				       upper_32_bits(rdev->mc.vram_start));
+			}
+		}
+		WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
+		       (u32)rdev->mc.vram_start);
+		WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
+		       (u32)rdev->mc.vram_start);
+	}
+	WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start);
+
+	if (rdev->family >= CHIP_R600) {
+		/* unblackout the MC */
+		if (rdev->family >= CHIP_RV770)
+			tmp = RREG32(R700_MC_CITF_CNTL);
+		else
+			tmp = RREG32(R600_CITF_CNTL);
+		tmp &= ~R600_BLACKOUT_MASK;
+		if (rdev->family >= CHIP_RV770)
+			WREG32(R700_MC_CITF_CNTL, tmp);
+		else
+			WREG32(R600_CITF_CNTL, tmp);
+		/* allow CPU access */
+		WREG32(R600_BIF_FB_EN, R600_FB_READ_EN | R600_FB_WRITE_EN);
+	}
+
+	for (i = 0; i < rdev->num_crtc; i++) {
+		if (save->crtc_enabled[i]) {
+			tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
+			tmp &= ~AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
+			WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
+			/* wait for the next frame */
+			frame_count = radeon_get_vblank_counter(rdev, i);
+			for (j = 0; j < rdev->usec_timeout; j++) {
+				if (radeon_get_vblank_counter(rdev, i) != frame_count)
+					break;
+				udelay(1);
+			}
+		}
+	}
+	/* Unlock vga access */
 	WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control);
 	mdelay(1);
 	WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control);

diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 79814a0..87c979c 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c

@@ -316,6 +316,7 @@
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 	WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
 	WREG32(SCRATCH_UMSK, 0);
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 }
 
 static int rv770_cp_load_microcode(struct radeon_device *rdev)
@@ -583,6 +584,8 @@
 	WREG32(GB_TILING_CONFIG, gb_tiling_config);
 	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+	WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
+	WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
 
 	WREG32(CGTS_SYS_TCC_DISABLE, 0);
 	WREG32(CGTS_TCC_DISABLE, 0);
@@ -886,7 +889,7 @@
 
 static int rv770_startup(struct radeon_device *rdev)
 {
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_ring *ring;
 	int r;
 
 	/* enable pcie gen2 link */
@@ -932,6 +935,12 @@
 		return r;
 	}
 
+	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -941,11 +950,20 @@
 	}
 	r600_irq_set(rdev);
 
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
 			     0, 0xfffff, RADEON_CP_PACKET2);
 	if (r)
 		return r;
+
+	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+			     DMA_RB_RPTR, DMA_RB_WPTR,
+			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	if (r)
+		return r;
+
 	r = rv770_cp_load_microcode(rdev);
 	if (r)
 		return r;
@@ -953,6 +971,10 @@
 	if (r)
 		return r;
 
+	r = r600_dma_resume(rdev);
+	if (r)
+		return r;
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -995,7 +1017,7 @@
 {
 	r600_audio_fini(rdev);
 	r700_cp_stop(rdev);
-	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+	r600_dma_stop(rdev);
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	rv770_pcie_gart_disable(rdev);
@@ -1066,6 +1088,9 @@
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
 
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1078,6 +1103,7 @@
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r700_cp_fini(rdev);
+		r600_dma_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
 		radeon_ib_pool_fini(rdev);
@@ -1093,6 +1119,7 @@
 {
 	r600_blit_fini(rdev);
 	r700_cp_fini(rdev);
+	r600_dma_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_ib_pool_fini(rdev);

diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index b0adfc5..20e29d2 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h

@@ -109,6 +109,9 @@
 #define     PIPE_TILING__SHIFT              1
 #define     PIPE_TILING__MASK               0x0000000e
 
+#define DMA_TILING_CONFIG                               0x3ec8
+#define DMA_TILING_CONFIG2                              0xd0b8
+
 #define	GC_USER_SHADER_PIPE_CONFIG			0x8954
 #define		INACTIVE_QD_PIPES(x)				((x) << 8)
 #define		INACTIVE_QD_PIPES_MASK				0x0000FF00
@@ -358,6 +361,26 @@
 
 #define	WAIT_UNTIL					0x8040
 
+/* async DMA */
+#define DMA_RB_RPTR                                       0xd008
+#define DMA_RB_WPTR                                       0xd00c
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n)	((((cmd) & 0xF) << 28) |	\
+					 (((t) & 0x1) << 23) |		\
+					 (((s) & 0x1) << 22) |		\
+					 (((n) & 0xFFFF) << 0))
+/* async DMA Packet types */
+#define	DMA_PACKET_WRITE				  0x2
+#define	DMA_PACKET_COPY					  0x3
+#define	DMA_PACKET_INDIRECT_BUFFER			  0x4
+#define	DMA_PACKET_SEMAPHORE				  0x5
+#define	DMA_PACKET_FENCE				  0x6
+#define	DMA_PACKET_TRAP					  0x7
+#define	DMA_PACKET_CONSTANT_FILL			  0xd
+#define	DMA_PACKET_NOP					  0xf
+
+
 #define	SRBM_STATUS				        0x0E50
 
 /* DCE 3.2 HDMI */
@@ -551,6 +574,54 @@
 #define HDMI_OFFSET0                      (0x7400 - 0x7400)
 #define HDMI_OFFSET1                      (0x7800 - 0x7400)
 
+/* DCE3.2 ELD audio interface */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0        0x71c8 /* LPCM */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1        0x71cc /* AC3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2        0x71d0 /* MPEG1 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3        0x71d4 /* MP3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4        0x71d8 /* MPEG2 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5        0x71dc /* AAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6        0x71e0 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7        0x71e4 /* ATRAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8        0x71e8 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9        0x71ec /* Dolby Digital */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10       0x71f0 /* DTS-HD */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11       0x71f4 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12       0x71f8 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13       0x71fc /* WMA Pro */
+#       define MAX_CHANNELS(x)                            (((x) & 0x7) << 0)
+/* max channels minus one.  7 = 8 channels */
+#       define SUPPORTED_FREQUENCIES(x)                   (((x) & 0xff) << 8)
+#       define DESCRIPTOR_BYTE_2(x)                       (((x) & 0xff) << 16)
+#       define SUPPORTED_FREQUENCIES_STEREO(x)            (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
+#define AZ_HOT_PLUG_CONTROL                               0x7300
+#       define AZ_FORCE_CODEC_WAKE                        (1 << 0)
+#       define PIN0_JACK_DETECTION_ENABLE                 (1 << 4)
+#       define PIN1_JACK_DETECTION_ENABLE                 (1 << 5)
+#       define PIN2_JACK_DETECTION_ENABLE                 (1 << 6)
+#       define PIN3_JACK_DETECTION_ENABLE                 (1 << 7)
+#       define PIN0_UNSOLICITED_RESPONSE_ENABLE           (1 << 8)
+#       define PIN1_UNSOLICITED_RESPONSE_ENABLE           (1 << 9)
+#       define PIN2_UNSOLICITED_RESPONSE_ENABLE           (1 << 10)
+#       define PIN3_UNSOLICITED_RESPONSE_ENABLE           (1 << 11)
+#       define CODEC_HOT_PLUG_ENABLE                      (1 << 12)
+#       define PIN0_AUDIO_ENABLED                         (1 << 24)
+#       define PIN1_AUDIO_ENABLED                         (1 << 25)
+#       define PIN2_AUDIO_ENABLED                         (1 << 26)
+#       define PIN3_AUDIO_ENABLED                         (1 << 27)
+#       define AUDIO_ENABLED                              (1 << 31)
+
+
 #define D1GRPH_PRIMARY_SURFACE_ADDRESS                    0x6110
 #define D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH               0x6914
 #define D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH               0x6114

diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 010156d..ef68365 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c

@@ -1660,6 +1660,8 @@
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
 
 	si_tiling_mode_table_init(rdev);
 
@@ -1836,6 +1838,9 @@
 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
 		WREG32(SCRATCH_UMSK, 0);
+		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
 	}
 	udelay(50);
 }
@@ -2426,9 +2431,20 @@
 	/* enable context1-15 */
 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
 	       (u32)(rdev->dummy_page.addr >> 12));
-	WREG32(VM_CONTEXT1_CNTL2, 0);
+	WREG32(VM_CONTEXT1_CNTL2, 4);
 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
 
 	si_pcie_gart_tlb_flush(rdev);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -2534,6 +2550,7 @@
 	u32 idx = pkt->idx + 1;
 	u32 idx_value = ib[idx];
 	u32 start_reg, end_reg, reg, i;
+	u32 command, info;
 
 	switch (pkt->opcode) {
 	case PACKET3_NOP:
@@ -2633,6 +2650,52 @@
 				return -EINVAL;
 		}
 		break;
+	case PACKET3_CP_DMA:
+		command = ib[idx + 4];
+		info = ib[idx + 1];
+		if (command & PACKET3_CP_DMA_CMD_SAS) {
+			/* src address space is register */
+			if (((info & 0x60000000) >> 29) == 0) {
+				start_reg = idx_value << 2;
+				if (command & PACKET3_CP_DMA_CMD_SAIC) {
+					reg = start_reg;
+					if (!si_vm_reg_valid(reg)) {
+						DRM_ERROR("CP DMA Bad SRC register\n");
+						return -EINVAL;
+					}
+				} else {
+					for (i = 0; i < (command & 0x1fffff); i++) {
+						reg = start_reg + (4 * i);
+						if (!si_vm_reg_valid(reg)) {
+							DRM_ERROR("CP DMA Bad SRC register\n");
+							return -EINVAL;
+						}
+					}
+				}
+			}
+		}
+		if (command & PACKET3_CP_DMA_CMD_DAS) {
+			/* dst address space is register */
+			if (((info & 0x00300000) >> 20) == 0) {
+				start_reg = ib[idx + 2];
+				if (command & PACKET3_CP_DMA_CMD_DAIC) {
+					reg = start_reg;
+					if (!si_vm_reg_valid(reg)) {
+						DRM_ERROR("CP DMA Bad DST register\n");
+						return -EINVAL;
+					}
+				} else {
+					for (i = 0; i < (command & 0x1fffff); i++) {
+						reg = start_reg + (4 * i);
+						if (!si_vm_reg_valid(reg)) {
+							DRM_ERROR("CP DMA Bad DST register\n");
+							return -EINVAL;
+						}
+					}
+				}
+			}
+		}
+		break;
 	default:
 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
 		return -EINVAL;
@@ -2809,30 +2872,86 @@
 {
 	struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+	uint64_t value;
+	unsigned ndw;
 
-	while (count) {
-		unsigned ndw = 2 + count * 2;
-		if (ndw > 0x3FFE)
-			ndw = 0x3FFE;
+	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+		while (count) {
+			ndw = 2 + count * 2;
+			if (ndw > 0x3FFE)
+				ndw = 0x3FFE;
 
-		radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
-		radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-					 WRITE_DATA_DST_SEL(1)));
-		radeon_ring_write(ring, pe);
-		radeon_ring_write(ring, upper_32_bits(pe));
-		for (; ndw > 2; ndw -= 2, --count, pe += 8) {
-			uint64_t value;
-			if (flags & RADEON_VM_PAGE_SYSTEM) {
-				value = radeon_vm_map_gart(rdev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
-			} else if (flags & RADEON_VM_PAGE_VALID)
-				value = addr;
-			else
-				value = 0;
-			addr += incr;
-			value |= r600_flags;
-			radeon_ring_write(ring, value);
-			radeon_ring_write(ring, upper_32_bits(value));
+			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
+			radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+						 WRITE_DATA_DST_SEL(1)));
+			radeon_ring_write(ring, pe);
+			radeon_ring_write(ring, upper_32_bits(pe));
+			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
+				if (flags & RADEON_VM_PAGE_SYSTEM) {
+					value = radeon_vm_map_gart(rdev, addr);
+					value &= 0xFFFFFFFFFFFFF000ULL;
+				} else if (flags & RADEON_VM_PAGE_VALID) {
+					value = addr;
+				} else {
+					value = 0;
+				}
+				addr += incr;
+				value |= r600_flags;
+				radeon_ring_write(ring, value);
+				radeon_ring_write(ring, upper_32_bits(value));
+			}
+		}
+	} else {
+		/* DMA */
+		if (flags & RADEON_VM_PAGE_SYSTEM) {
+			while (count) {
+				ndw = count * 2;
+				if (ndw > 0xFFFFE)
+					ndw = 0xFFFFE;
+
+				/* for non-physically contiguous pages (system) */
+				radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
+				radeon_ring_write(ring, pe);
+				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+					if (flags & RADEON_VM_PAGE_SYSTEM) {
+						value = radeon_vm_map_gart(rdev, addr);
+						value &= 0xFFFFFFFFFFFFF000ULL;
+					} else if (flags & RADEON_VM_PAGE_VALID) {
+						value = addr;
+					} else {
+						value = 0;
+					}
+					addr += incr;
+					value |= r600_flags;
+					radeon_ring_write(ring, value);
+					radeon_ring_write(ring, upper_32_bits(value));
+				}
+			}
+		} else {
+			while (count) {
+				ndw = count * 2;
+				if (ndw > 0xFFFFE)
+					ndw = 0xFFFFE;
+
+				if (flags & RADEON_VM_PAGE_VALID)
+					value = addr;
+				else
+					value = 0;
+				/* for physically contiguous pages (vram) */
+				radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
+				radeon_ring_write(ring, pe); /* dst addr */
+				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+				radeon_ring_write(ring, r600_flags); /* mask */
+				radeon_ring_write(ring, 0);
+				radeon_ring_write(ring, value); /* value */
+				radeon_ring_write(ring, upper_32_bits(value));
+				radeon_ring_write(ring, incr); /* increment size */
+				radeon_ring_write(ring, 0);
+				pe += ndw * 4;
+				addr += (ndw / 2) * incr;
+				count -= ndw / 2;
+			}
 		}
 	}
 }
@@ -2880,6 +2999,32 @@
 	radeon_ring_write(ring, 0x0);
 }
 
+void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+	struct radeon_ring *ring = &rdev->ring[ridx];
+
+	if (vm == NULL)
+		return;
+
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+	if (vm->id < 8) {
+		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+	} else {
+		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
+	}
+	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+	/* flush hdp cache */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+	radeon_ring_write(ring, 1);
+
+	/* bits 0-7 are the VM contexts0-7 */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+	radeon_ring_write(ring, 1 << vm->id);
+}
+
 /*
  * RLC
  */
@@ -3048,6 +3193,10 @@
 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
 	WREG32(CP_INT_CNTL_RING1, 0);
 	WREG32(CP_INT_CNTL_RING2, 0);
+	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
+	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
+	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
+	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
 	WREG32(GRBM_INT_CNTL, 0);
 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
 	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -3167,6 +3316,7 @@
 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
 	u32 grbm_int_cntl = 0;
 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
+	u32 dma_cntl, dma_cntl1;
 
 	if (!rdev->irq.installed) {
 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3187,6 +3337,9 @@
 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
 
+	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
+	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
+
 	/* enable CP interrupts on all rings */
 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 		DRM_DEBUG("si_irq_set: sw int gfx\n");
@@ -3200,6 +3353,15 @@
 		DRM_DEBUG("si_irq_set: sw int cp2\n");
 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
 	}
+	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+		DRM_DEBUG("si_irq_set: sw int dma\n");
+		dma_cntl |= TRAP_ENABLE;
+	}
+
+	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
+		DRM_DEBUG("si_irq_set: sw int dma1\n");
+		dma_cntl1 |= TRAP_ENABLE;
+	}
 	if (rdev->irq.crtc_vblank_int[0] ||
 	    atomic_read(&rdev->irq.pflip[0])) {
 		DRM_DEBUG("si_irq_set: vblank 0\n");
@@ -3259,6 +3421,9 @@
 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
 
+	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
+	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
+
 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
 
 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3684,6 +3849,16 @@
 				break;
 			}
 			break;
+		case 146:
+		case 147:
+			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
+			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+			/* reset addr and status */
+			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
+			break;
 		case 176: /* RINGID0 CP_INT */
 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
@@ -3707,9 +3882,17 @@
 				break;
 			}
 			break;
+		case 224: /* DMA trap event */
+			DRM_DEBUG("IH: DMA trap\n");
+			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
 			break;
+		case 244: /* DMA trap event */
+			DRM_DEBUG("IH: DMA1 trap\n");
+			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+			break;
 		default:
 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
 			break;
@@ -3733,6 +3916,80 @@
 	return IRQ_HANDLED;
 }
 
+/**
+ * si_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (SI).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int si_copy_dma(struct radeon_device *rdev,
+		uint64_t src_offset, uint64_t dst_offset,
+		unsigned num_gpu_pages,
+		struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_bytes, cur_size_in_bytes;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
+	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
+	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_bytes = size_in_bytes;
+		if (cur_size_in_bytes > 0xFFFFF)
+			cur_size_in_bytes = 0xFFFFF;
+		size_in_bytes -= cur_size_in_bytes;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
+		radeon_ring_write(ring, dst_offset & 0xffffffff);
+		radeon_ring_write(ring, src_offset & 0xffffffff);
+		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+		src_offset += cur_size_in_bytes;
+		dst_offset += cur_size_in_bytes;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
+
 /*
  * startup/shutdown callbacks
  */
@@ -3804,6 +4061,18 @@
 		return r;
 	}
 
+	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = si_irq_init(rdev);
 	if (r) {
@@ -3834,6 +4103,22 @@
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
+			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
+			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+	if (r)
+		return r;
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
+			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
+			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+	if (r)
+		return r;
+
 	r = si_cp_load_microcode(rdev);
 	if (r)
 		return r;
@@ -3841,6 +4126,10 @@
 	if (r)
 		return r;
 
+	r = cayman_dma_resume(rdev);
+	if (r)
+		return r;
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3882,9 +4171,7 @@
 int si_suspend(struct radeon_device *rdev)
 {
 	si_cp_enable(rdev, false);
-	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
-	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
-	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
+	cayman_dma_stop(rdev);
 	si_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	si_pcie_gart_disable(rdev);
@@ -3962,6 +4249,14 @@
 	ring->ring_obj = NULL;
 	r600_ring_init(rdev, ring, 1024 * 1024);
 
+	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 64 * 1024);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 64 * 1024);
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -3974,6 +4269,7 @@
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		si_cp_fini(rdev);
+		cayman_dma_fini(rdev);
 		si_irq_fini(rdev);
 		si_rlc_fini(rdev);
 		radeon_wb_fini(rdev);
@@ -4002,6 +4298,7 @@
 	r600_blit_fini(rdev);
 #endif
 	si_cp_fini(rdev);
+	cayman_dma_fini(rdev);
 	si_irq_fini(rdev);
 	si_rlc_fini(rdev);
 	radeon_wb_fini(rdev);

diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index a8871af..62b4621 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h

@@ -91,7 +91,18 @@
 #define VM_CONTEXT0_CNTL				0x1410
 #define		ENABLE_CONTEXT					(1 << 0)
 #define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
+#define		RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 3)
 #define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
+#define		DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT	(1 << 6)
+#define		DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT	(1 << 7)
+#define		PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 9)
+#define		PDE0_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 10)
+#define		VALID_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 12)
+#define		VALID_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 13)
+#define		READ_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 15)
+#define		READ_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 16)
+#define		WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 18)
+#define		WRITE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 19)
 #define VM_CONTEXT1_CNTL				0x1414
 #define VM_CONTEXT0_CNTL2				0x1430
 #define VM_CONTEXT1_CNTL2				0x1434
@@ -104,6 +115,9 @@
 #define	VM_CONTEXT14_PAGE_TABLE_BASE_ADDR		0x1450
 #define	VM_CONTEXT15_PAGE_TABLE_BASE_ADDR		0x1454
 
+#define	VM_CONTEXT1_PROTECTION_FAULT_ADDR		0x14FC
+#define	VM_CONTEXT1_PROTECTION_FAULT_STATUS		0x14DC
+
 #define VM_INVALIDATE_REQUEST				0x1478
 #define VM_INVALIDATE_RESPONSE				0x147c
 
@@ -835,6 +849,54 @@
 #define	PACKET3_WAIT_REG_MEM				0x3C
 #define	PACKET3_MEM_WRITE				0x3D
 #define	PACKET3_COPY_DATA				0x40
+#define	PACKET3_CP_DMA					0x41
+/* 1. header
+ * 2. SRC_ADDR_LO or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
+ *    SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+#              define PACKET3_CP_DMA_DST_SEL(x)    ((x) << 20)
+                /* 0 - SRC_ADDR
+		 * 1 - GDS
+		 */
+#              define PACKET3_CP_DMA_ENGINE(x)     ((x) << 27)
+                /* 0 - ME
+		 * 1 - PFP
+		 */
+#              define PACKET3_CP_DMA_SRC_SEL(x)    ((x) << 29)
+                /* 0 - SRC_ADDR
+		 * 1 - GDS
+		 * 2 - DATA
+		 */
+#              define PACKET3_CP_DMA_CP_SYNC       (1 << 31)
+/* COMMAND */
+#              define PACKET3_CP_DMA_DIS_WC        (1 << 21)
+#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+                /* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+                /* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_CP_DMA_CMD_SAS       (1 << 26)
+                /* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_CP_DMA_CMD_DAS       (1 << 27)
+                /* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
+#              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
+#              define PACKET3_CP_DMA_CMD_RAW_WAIT  (1 << 30)
 #define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_SURFACE_SYNC				0x43
 #              define PACKET3_DEST_BASE_0_ENA      (1 << 0)
@@ -922,4 +984,61 @@
 #define	PACKET3_WAIT_ON_AVAIL_BUFFER			0x8A
 #define	PACKET3_SWITCH_BUFFER				0x8B
 
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET                              0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET                              0x800 /* not a register */
+
+#define DMA_RB_CNTL                                       0xd000
+#       define DMA_RB_ENABLE                              (1 << 0)
+#       define DMA_RB_SIZE(x)                             ((x) << 1) /* log2 */
+#       define DMA_RB_SWAP_ENABLE                         (1 << 9) /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_ENABLE                  (1 << 12)
+#       define DMA_RPTR_WRITEBACK_SWAP_ENABLE             (1 << 13)  /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_TIMER(x)                ((x) << 16) /* log2 */
+#define DMA_RB_BASE                                       0xd004
+#define DMA_RB_RPTR                                       0xd008
+#define DMA_RB_WPTR                                       0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI                               0xd01c
+#define DMA_RB_RPTR_ADDR_LO                               0xd020
+
+#define DMA_IB_CNTL                                       0xd024
+#       define DMA_IB_ENABLE                              (1 << 0)
+#       define DMA_IB_SWAP_ENABLE                         (1 << 4)
+#define DMA_IB_RPTR                                       0xd028
+#define DMA_CNTL                                          0xd02c
+#       define TRAP_ENABLE                                (1 << 0)
+#       define SEM_INCOMPLETE_INT_ENABLE                  (1 << 1)
+#       define SEM_WAIT_INT_ENABLE                        (1 << 2)
+#       define DATA_SWAP_ENABLE                           (1 << 3)
+#       define FENCE_SWAP_ENABLE                          (1 << 4)
+#       define CTXEMPTY_INT_ENABLE                        (1 << 28)
+#define DMA_TILING_CONFIG  				  0xd0b8
+
+#define DMA_PACKET(cmd, b, t, s, n)	((((cmd) & 0xF) << 28) |	\
+					 (((b) & 0x1) << 26) |		\
+					 (((t) & 0x1) << 23) |		\
+					 (((s) & 0x1) << 22) |		\
+					 (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n)	((((cmd) & 0xF) << 28) |	\
+					 (((vmid) & 0xF) << 20) |	\
+					 (((n) & 0xFFFFF) << 0))
+
+#define DMA_PTE_PDE_PACKET(n)		((2 << 28) |			\
+					 (1 << 26) |			\
+					 (1 << 21) |			\
+					 (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define	DMA_PACKET_WRITE				  0x2
+#define	DMA_PACKET_COPY					  0x3
+#define	DMA_PACKET_INDIRECT_BUFFER			  0x4
+#define	DMA_PACKET_SEMAPHORE				  0x5
+#define	DMA_PACKET_FENCE				  0x6
+#define	DMA_PACKET_TRAP					  0x7
+#define	DMA_PACKET_SRBM_WRITE				  0x9
+#define	DMA_PACKET_CONSTANT_FILL			  0xd
+#define	DMA_PACKET_NOP					  0xf
+
 #endif

diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index 0e7a930..d917a41 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c

@@ -748,7 +748,7 @@
 	connector->encoder = encoder;
 
 	drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-	drm_connector_property_set_value(connector,
+	drm_object_property_set_value(&connector->base,
 		sdev->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
 
 	return 0;

diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
new file mode 100644
index 0000000..be1daf7
--- /dev/null
+++ b/drivers/gpu/drm/tegra/Kconfig

@@ -0,0 +1,23 @@
+config DRM_TEGRA
+	tristate "NVIDIA Tegra DRM"
+	depends on DRM && OF && ARCH_TEGRA
+	select DRM_KMS_HELPER
+	select DRM_GEM_CMA_HELPER
+	select DRM_KMS_CMA_HELPER
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	  Choose this option if you have an NVIDIA Tegra SoC.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called tegra-drm.
+
+if DRM_TEGRA
+
+config DRM_TEGRA_DEBUG
+	bool "NVIDIA Tegra DRM debug support"
+	help
+	  Say yes here to enable debugging support.
+
+endif

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
new file mode 100644
index 0000000..80f73d1
--- /dev/null
+++ b/drivers/gpu/drm/tegra/Makefile

@@ -0,0 +1,7 @@
+ccflags-y := -Iinclude/drm
+ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
+
+tegra-drm-y := drm.o fb.o dc.o host1x.o
+tegra-drm-y += output.o rgb.o hdmi.o
+
+obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
new file mode 100644
index 0000000..0744103
--- /dev/null
+++ b/drivers/gpu/drm/tegra/dc.c

@@ -0,0 +1,834 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <mach/clk.h>
+
+#include "drm.h"
+#include "dc.h"
+
+struct tegra_dc_window {
+	fixed20_12 x;
+	fixed20_12 y;
+	fixed20_12 w;
+	fixed20_12 h;
+	unsigned int outx;
+	unsigned int outy;
+	unsigned int outw;
+	unsigned int outh;
+	unsigned int stride;
+	unsigned int fmt;
+};
+
+static const struct drm_crtc_funcs tegra_crtc_funcs = {
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = drm_crtc_cleanup,
+};
+
+static void tegra_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+}
+
+static bool tegra_crtc_mode_fixup(struct drm_crtc *crtc,
+				  const struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted)
+{
+	return true;
+}
+
+static inline u32 compute_dda_inc(fixed20_12 inf, unsigned int out, bool v,
+				  unsigned int bpp)
+{
+	fixed20_12 outf = dfixed_init(out);
+	u32 dda_inc;
+	int max;
+
+	if (v)
+		max = 15;
+	else {
+		switch (bpp) {
+		case 2:
+			max = 8;
+			break;
+
+		default:
+			WARN_ON_ONCE(1);
+			/* fallthrough */
+		case 4:
+			max = 4;
+			break;
+		}
+	}
+
+	outf.full = max_t(u32, outf.full - dfixed_const(1), dfixed_const(1));
+	inf.full -= dfixed_const(1);
+
+	dda_inc = dfixed_div(inf, outf);
+	dda_inc = min_t(u32, dda_inc, dfixed_const(max));
+
+	return dda_inc;
+}
+
+static inline u32 compute_initial_dda(fixed20_12 in)
+{
+	return dfixed_frac(in);
+}
+
+static int tegra_dc_set_timings(struct tegra_dc *dc,
+				struct drm_display_mode *mode)
+{
+	/* TODO: For HDMI compliance, h & v ref_to_sync should be set to 1 */
+	unsigned int h_ref_to_sync = 0;
+	unsigned int v_ref_to_sync = 0;
+	unsigned long value;
+
+	tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
+
+	value = (v_ref_to_sync << 16) | h_ref_to_sync;
+	tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC);
+
+	value = ((mode->vsync_end - mode->vsync_start) << 16) |
+		((mode->hsync_end - mode->hsync_start) <<  0);
+	tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH);
+
+	value = ((mode->vsync_start - mode->vdisplay) << 16) |
+		((mode->hsync_start - mode->hdisplay) <<  0);
+	tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH);
+
+	value = ((mode->vtotal - mode->vsync_end) << 16) |
+		((mode->htotal - mode->hsync_end) <<  0);
+	tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH);
+
+	value = (mode->vdisplay << 16) | mode->hdisplay;
+	tegra_dc_writel(dc, value, DC_DISP_ACTIVE);
+
+	return 0;
+}
+
+static int tegra_crtc_setup_clk(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				unsigned long *div)
+{
+	unsigned long pclk = mode->clock * 1000, rate;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct tegra_output *output = NULL;
+	struct drm_encoder *encoder;
+	long err;
+
+	list_for_each_entry(encoder, &crtc->dev->mode_config.encoder_list, head)
+		if (encoder->crtc == crtc) {
+			output = encoder_to_output(encoder);
+			break;
+		}
+
+	if (!output)
+		return -ENODEV;
+
+	/*
+	 * This assumes that the display controller will divide its parent
+	 * clock by 2 to generate the pixel clock.
+	 */
+	err = tegra_output_setup_clock(output, dc->clk, pclk * 2);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to setup clock: %ld\n", err);
+		return err;
+	}
+
+	rate = clk_get_rate(dc->clk);
+	*div = (rate * 2 / pclk) - 2;
+
+	DRM_DEBUG_KMS("rate: %lu, div: %lu\n", rate, *div);
+
+	return 0;
+}
+
+static int tegra_crtc_mode_set(struct drm_crtc *crtc,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted,
+			       int x, int y, struct drm_framebuffer *old_fb)
+{
+	struct tegra_framebuffer *fb = to_tegra_fb(crtc->fb);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	unsigned int h_dda, v_dda, bpp;
+	struct tegra_dc_window win;
+	unsigned long div, value;
+	int err;
+
+	err = tegra_crtc_setup_clk(crtc, mode, &div);
+	if (err) {
+		dev_err(dc->dev, "failed to setup clock for CRTC: %d\n", err);
+		return err;
+	}
+
+	/* program display mode */
+	tegra_dc_set_timings(dc, mode);
+
+	value = DE_SELECT_ACTIVE | DE_CONTROL_NORMAL;
+	tegra_dc_writel(dc, value, DC_DISP_DATA_ENABLE_OPTIONS);
+
+	value = tegra_dc_readl(dc, DC_COM_PIN_OUTPUT_POLARITY(1));
+	value &= ~LVS_OUTPUT_POLARITY_LOW;
+	value &= ~LHS_OUTPUT_POLARITY_LOW;
+	tegra_dc_writel(dc, value, DC_COM_PIN_OUTPUT_POLARITY(1));
+
+	value = DISP_DATA_FORMAT_DF1P1C | DISP_ALIGNMENT_MSB |
+		DISP_ORDER_RED_BLUE;
+	tegra_dc_writel(dc, value, DC_DISP_DISP_INTERFACE_CONTROL);
+
+	tegra_dc_writel(dc, 0x00010001, DC_DISP_SHIFT_CLOCK_OPTIONS);
+
+	value = SHIFT_CLK_DIVIDER(div) | PIXEL_CLK_DIVIDER_PCD1;
+	tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
+
+	/* setup window parameters */
+	memset(&win, 0, sizeof(win));
+	win.x.full = dfixed_const(0);
+	win.y.full = dfixed_const(0);
+	win.w.full = dfixed_const(mode->hdisplay);
+	win.h.full = dfixed_const(mode->vdisplay);
+	win.outx = 0;
+	win.outy = 0;
+	win.outw = mode->hdisplay;
+	win.outh = mode->vdisplay;
+
+	switch (crtc->fb->pixel_format) {
+	case DRM_FORMAT_XRGB8888:
+		win.fmt = WIN_COLOR_DEPTH_B8G8R8A8;
+		break;
+
+	case DRM_FORMAT_RGB565:
+		win.fmt = WIN_COLOR_DEPTH_B5G6R5;
+		break;
+
+	default:
+		win.fmt = WIN_COLOR_DEPTH_B8G8R8A8;
+		WARN_ON(1);
+		break;
+	}
+
+	bpp = crtc->fb->bits_per_pixel / 8;
+	win.stride = crtc->fb->pitches[0];
+
+	/* program window registers */
+	value = tegra_dc_readl(dc, DC_CMD_DISPLAY_WINDOW_HEADER);
+	value |= WINDOW_A_SELECT;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
+
+	tegra_dc_writel(dc, win.fmt, DC_WIN_COLOR_DEPTH);
+	tegra_dc_writel(dc, 0, DC_WIN_BYTE_SWAP);
+
+	value = V_POSITION(win.outy) | H_POSITION(win.outx);
+	tegra_dc_writel(dc, value, DC_WIN_POSITION);
+
+	value = V_SIZE(win.outh) | H_SIZE(win.outw);
+	tegra_dc_writel(dc, value, DC_WIN_SIZE);
+
+	value = V_PRESCALED_SIZE(dfixed_trunc(win.h)) |
+		H_PRESCALED_SIZE(dfixed_trunc(win.w) * bpp);
+	tegra_dc_writel(dc, value, DC_WIN_PRESCALED_SIZE);
+
+	h_dda = compute_dda_inc(win.w, win.outw, false, bpp);
+	v_dda = compute_dda_inc(win.h, win.outh, true, bpp);
+
+	value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda);
+	tegra_dc_writel(dc, value, DC_WIN_DDA_INC);
+
+	h_dda = compute_initial_dda(win.x);
+	v_dda = compute_initial_dda(win.y);
+
+	tegra_dc_writel(dc, h_dda, DC_WIN_H_INITIAL_DDA);
+	tegra_dc_writel(dc, v_dda, DC_WIN_V_INITIAL_DDA);
+
+	tegra_dc_writel(dc, 0, DC_WIN_UV_BUF_STRIDE);
+	tegra_dc_writel(dc, 0, DC_WIN_BUF_STRIDE);
+
+	tegra_dc_writel(dc, fb->obj->paddr, DC_WINBUF_START_ADDR);
+	tegra_dc_writel(dc, win.stride, DC_WIN_LINE_STRIDE);
+	tegra_dc_writel(dc, dfixed_trunc(win.x) * bpp,
+			DC_WINBUF_ADDR_H_OFFSET);
+	tegra_dc_writel(dc, dfixed_trunc(win.y), DC_WINBUF_ADDR_V_OFFSET);
+
+	value = WIN_ENABLE;
+
+	if (bpp < 24)
+		value |= COLOR_EXPAND;
+
+	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
+
+	tegra_dc_writel(dc, 0xff00, DC_WIN_BLEND_NOKEY);
+	tegra_dc_writel(dc, 0xff00, DC_WIN_BLEND_1WIN);
+
+	return 0;
+}
+
+static void tegra_crtc_prepare(struct drm_crtc *crtc)
+{
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	unsigned int syncpt;
+	unsigned long value;
+
+	/* hardware initialization */
+	tegra_periph_reset_deassert(dc->clk);
+	usleep_range(10000, 20000);
+
+	if (dc->pipe)
+		syncpt = SYNCPT_VBLANK1;
+	else
+		syncpt = SYNCPT_VBLANK0;
+
+	/* initialize display controller */
+	tegra_dc_writel(dc, 0x00000100, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
+	tegra_dc_writel(dc, 0x100 | syncpt, DC_CMD_CONT_SYNCPT_VSYNC);
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | WIN_A_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
+
+	value = PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
+		PW4_ENABLE | PM0_ENABLE | PM1_ENABLE;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
+
+	value = tegra_dc_readl(dc, DC_CMD_DISPLAY_COMMAND);
+	value |= DISP_CTRL_MODE_C_DISPLAY;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND);
+
+	/* initialize timer */
+	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
+		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
+
+	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
+		WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
+
+	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+
+	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
+}
+
+static void tegra_crtc_commit(struct drm_crtc *crtc)
+{
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	unsigned long update_mask;
+	unsigned long value;
+
+	update_mask = GENERAL_ACT_REQ | WIN_A_ACT_REQ;
+
+	tegra_dc_writel(dc, update_mask << 8, DC_CMD_STATE_CONTROL);
+
+	value = tegra_dc_readl(dc, DC_CMD_INT_ENABLE);
+	value |= FRAME_END_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
+
+	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
+	value |= FRAME_END_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+
+	tegra_dc_writel(dc, update_mask, DC_CMD_STATE_CONTROL);
+}
+
+static void tegra_crtc_load_lut(struct drm_crtc *crtc)
+{
+}
+
+static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
+	.dpms = tegra_crtc_dpms,
+	.mode_fixup = tegra_crtc_mode_fixup,
+	.mode_set = tegra_crtc_mode_set,
+	.prepare = tegra_crtc_prepare,
+	.commit = tegra_crtc_commit,
+	.load_lut = tegra_crtc_load_lut,
+};
+
+static irqreturn_t tegra_drm_irq(int irq, void *data)
+{
+	struct tegra_dc *dc = data;
+	unsigned long status;
+
+	status = tegra_dc_readl(dc, DC_CMD_INT_STATUS);
+	tegra_dc_writel(dc, status, DC_CMD_INT_STATUS);
+
+	if (status & FRAME_END_INT) {
+		/*
+		dev_dbg(dc->dev, "%s(): frame end\n", __func__);
+		*/
+	}
+
+	if (status & VBLANK_INT) {
+		/*
+		dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
+		*/
+		drm_handle_vblank(dc->base.dev, dc->pipe);
+	}
+
+	if (status & (WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT)) {
+		/*
+		dev_dbg(dc->dev, "%s(): underflow\n", __func__);
+		*/
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int tegra_dc_show_regs(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct tegra_dc *dc = node->info_ent->data;
+
+#define DUMP_REG(name)						\
+	seq_printf(s, "%-40s %#05x %08lx\n", #name, name,	\
+		   tegra_dc_readl(dc, name))
+
+	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT);
+	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
+	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT_ERROR);
+	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT);
+	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT_CNTRL);
+	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT_ERROR);
+	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT);
+	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT_CNTRL);
+	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT_ERROR);
+	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT);
+	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT_CNTRL);
+	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT_ERROR);
+	DUMP_REG(DC_CMD_CONT_SYNCPT_VSYNC);
+	DUMP_REG(DC_CMD_DISPLAY_COMMAND_OPTION0);
+	DUMP_REG(DC_CMD_DISPLAY_COMMAND);
+	DUMP_REG(DC_CMD_SIGNAL_RAISE);
+	DUMP_REG(DC_CMD_DISPLAY_POWER_CONTROL);
+	DUMP_REG(DC_CMD_INT_STATUS);
+	DUMP_REG(DC_CMD_INT_MASK);
+	DUMP_REG(DC_CMD_INT_ENABLE);
+	DUMP_REG(DC_CMD_INT_TYPE);
+	DUMP_REG(DC_CMD_INT_POLARITY);
+	DUMP_REG(DC_CMD_SIGNAL_RAISE1);
+	DUMP_REG(DC_CMD_SIGNAL_RAISE2);
+	DUMP_REG(DC_CMD_SIGNAL_RAISE3);
+	DUMP_REG(DC_CMD_STATE_ACCESS);
+	DUMP_REG(DC_CMD_STATE_CONTROL);
+	DUMP_REG(DC_CMD_DISPLAY_WINDOW_HEADER);
+	DUMP_REG(DC_CMD_REG_ACT_CONTROL);
+	DUMP_REG(DC_COM_CRC_CONTROL);
+	DUMP_REG(DC_COM_CRC_CHECKSUM);
+	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(0));
+	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(1));
+	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(2));
+	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(3));
+	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(0));
+	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(1));
+	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(2));
+	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(3));
+	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(0));
+	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(1));
+	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(2));
+	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(3));
+	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(0));
+	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(1));
+	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(2));
+	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(3));
+	DUMP_REG(DC_COM_PIN_INPUT_DATA(0));
+	DUMP_REG(DC_COM_PIN_INPUT_DATA(1));
+	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(0));
+	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(1));
+	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(2));
+	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(3));
+	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(4));
+	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(5));
+	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(6));
+	DUMP_REG(DC_COM_PIN_MISC_CONTROL);
+	DUMP_REG(DC_COM_PIN_PM0_CONTROL);
+	DUMP_REG(DC_COM_PIN_PM0_DUTY_CYCLE);
+	DUMP_REG(DC_COM_PIN_PM1_CONTROL);
+	DUMP_REG(DC_COM_PIN_PM1_DUTY_CYCLE);
+	DUMP_REG(DC_COM_SPI_CONTROL);
+	DUMP_REG(DC_COM_SPI_START_BYTE);
+	DUMP_REG(DC_COM_HSPI_WRITE_DATA_AB);
+	DUMP_REG(DC_COM_HSPI_WRITE_DATA_CD);
+	DUMP_REG(DC_COM_HSPI_CS_DC);
+	DUMP_REG(DC_COM_SCRATCH_REGISTER_A);
+	DUMP_REG(DC_COM_SCRATCH_REGISTER_B);
+	DUMP_REG(DC_COM_GPIO_CTRL);
+	DUMP_REG(DC_COM_GPIO_DEBOUNCE_COUNTER);
+	DUMP_REG(DC_COM_CRC_CHECKSUM_LATCHED);
+	DUMP_REG(DC_DISP_DISP_SIGNAL_OPTIONS0);
+	DUMP_REG(DC_DISP_DISP_SIGNAL_OPTIONS1);
+	DUMP_REG(DC_DISP_DISP_WIN_OPTIONS);
+	DUMP_REG(DC_DISP_DISP_MEM_HIGH_PRIORITY);
+	DUMP_REG(DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
+	DUMP_REG(DC_DISP_DISP_TIMING_OPTIONS);
+	DUMP_REG(DC_DISP_REF_TO_SYNC);
+	DUMP_REG(DC_DISP_SYNC_WIDTH);
+	DUMP_REG(DC_DISP_BACK_PORCH);
+	DUMP_REG(DC_DISP_ACTIVE);
+	DUMP_REG(DC_DISP_FRONT_PORCH);
+	DUMP_REG(DC_DISP_H_PULSE0_CONTROL);
+	DUMP_REG(DC_DISP_H_PULSE0_POSITION_A);
+	DUMP_REG(DC_DISP_H_PULSE0_POSITION_B);
+	DUMP_REG(DC_DISP_H_PULSE0_POSITION_C);
+	DUMP_REG(DC_DISP_H_PULSE0_POSITION_D);
+	DUMP_REG(DC_DISP_H_PULSE1_CONTROL);
+	DUMP_REG(DC_DISP_H_PULSE1_POSITION_A);
+	DUMP_REG(DC_DISP_H_PULSE1_POSITION_B);
+	DUMP_REG(DC_DISP_H_PULSE1_POSITION_C);
+	DUMP_REG(DC_DISP_H_PULSE1_POSITION_D);
+	DUMP_REG(DC_DISP_H_PULSE2_CONTROL);
+	DUMP_REG(DC_DISP_H_PULSE2_POSITION_A);
+	DUMP_REG(DC_DISP_H_PULSE2_POSITION_B);
+	DUMP_REG(DC_DISP_H_PULSE2_POSITION_C);
+	DUMP_REG(DC_DISP_H_PULSE2_POSITION_D);
+	DUMP_REG(DC_DISP_V_PULSE0_CONTROL);
+	DUMP_REG(DC_DISP_V_PULSE0_POSITION_A);
+	DUMP_REG(DC_DISP_V_PULSE0_POSITION_B);
+	DUMP_REG(DC_DISP_V_PULSE0_POSITION_C);
+	DUMP_REG(DC_DISP_V_PULSE1_CONTROL);
+	DUMP_REG(DC_DISP_V_PULSE1_POSITION_A);
+	DUMP_REG(DC_DISP_V_PULSE1_POSITION_B);
+	DUMP_REG(DC_DISP_V_PULSE1_POSITION_C);
+	DUMP_REG(DC_DISP_V_PULSE2_CONTROL);
+	DUMP_REG(DC_DISP_V_PULSE2_POSITION_A);
+	DUMP_REG(DC_DISP_V_PULSE3_CONTROL);
+	DUMP_REG(DC_DISP_V_PULSE3_POSITION_A);
+	DUMP_REG(DC_DISP_M0_CONTROL);
+	DUMP_REG(DC_DISP_M1_CONTROL);
+	DUMP_REG(DC_DISP_DI_CONTROL);
+	DUMP_REG(DC_DISP_PP_CONTROL);
+	DUMP_REG(DC_DISP_PP_SELECT_A);
+	DUMP_REG(DC_DISP_PP_SELECT_B);
+	DUMP_REG(DC_DISP_PP_SELECT_C);
+	DUMP_REG(DC_DISP_PP_SELECT_D);
+	DUMP_REG(DC_DISP_DISP_CLOCK_CONTROL);
+	DUMP_REG(DC_DISP_DISP_INTERFACE_CONTROL);
+	DUMP_REG(DC_DISP_DISP_COLOR_CONTROL);
+	DUMP_REG(DC_DISP_SHIFT_CLOCK_OPTIONS);
+	DUMP_REG(DC_DISP_DATA_ENABLE_OPTIONS);
+	DUMP_REG(DC_DISP_SERIAL_INTERFACE_OPTIONS);
+	DUMP_REG(DC_DISP_LCD_SPI_OPTIONS);
+	DUMP_REG(DC_DISP_BORDER_COLOR);
+	DUMP_REG(DC_DISP_COLOR_KEY0_LOWER);
+	DUMP_REG(DC_DISP_COLOR_KEY0_UPPER);
+	DUMP_REG(DC_DISP_COLOR_KEY1_LOWER);
+	DUMP_REG(DC_DISP_COLOR_KEY1_UPPER);
+	DUMP_REG(DC_DISP_CURSOR_FOREGROUND);
+	DUMP_REG(DC_DISP_CURSOR_BACKGROUND);
+	DUMP_REG(DC_DISP_CURSOR_START_ADDR);
+	DUMP_REG(DC_DISP_CURSOR_START_ADDR_NS);
+	DUMP_REG(DC_DISP_CURSOR_POSITION);
+	DUMP_REG(DC_DISP_CURSOR_POSITION_NS);
+	DUMP_REG(DC_DISP_INIT_SEQ_CONTROL);
+	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_A);
+	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_B);
+	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_C);
+	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_D);
+	DUMP_REG(DC_DISP_DC_MCCIF_FIFOCTRL);
+	DUMP_REG(DC_DISP_MCCIF_DISPLAY0A_HYST);
+	DUMP_REG(DC_DISP_MCCIF_DISPLAY0B_HYST);
+	DUMP_REG(DC_DISP_MCCIF_DISPLAY1A_HYST);
+	DUMP_REG(DC_DISP_MCCIF_DISPLAY1B_HYST);
+	DUMP_REG(DC_DISP_DAC_CRT_CTRL);
+	DUMP_REG(DC_DISP_DISP_MISC_CONTROL);
+	DUMP_REG(DC_DISP_SD_CONTROL);
+	DUMP_REG(DC_DISP_SD_CSC_COEFF);
+	DUMP_REG(DC_DISP_SD_LUT(0));
+	DUMP_REG(DC_DISP_SD_LUT(1));
+	DUMP_REG(DC_DISP_SD_LUT(2));
+	DUMP_REG(DC_DISP_SD_LUT(3));
+	DUMP_REG(DC_DISP_SD_LUT(4));
+	DUMP_REG(DC_DISP_SD_LUT(5));
+	DUMP_REG(DC_DISP_SD_LUT(6));
+	DUMP_REG(DC_DISP_SD_LUT(7));
+	DUMP_REG(DC_DISP_SD_LUT(8));
+	DUMP_REG(DC_DISP_SD_FLICKER_CONTROL);
+	DUMP_REG(DC_DISP_DC_PIXEL_COUNT);
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(0));
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(1));
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(2));
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(3));
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(4));
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(5));
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(6));
+	DUMP_REG(DC_DISP_SD_HISTOGRAM(7));
+	DUMP_REG(DC_DISP_SD_BL_TF(0));
+	DUMP_REG(DC_DISP_SD_BL_TF(1));
+	DUMP_REG(DC_DISP_SD_BL_TF(2));
+	DUMP_REG(DC_DISP_SD_BL_TF(3));
+	DUMP_REG(DC_DISP_SD_BL_CONTROL);
+	DUMP_REG(DC_DISP_SD_HW_K_VALUES);
+	DUMP_REG(DC_DISP_SD_MAN_K_VALUES);
+	DUMP_REG(DC_WIN_WIN_OPTIONS);
+	DUMP_REG(DC_WIN_BYTE_SWAP);
+	DUMP_REG(DC_WIN_BUFFER_CONTROL);
+	DUMP_REG(DC_WIN_COLOR_DEPTH);
+	DUMP_REG(DC_WIN_POSITION);
+	DUMP_REG(DC_WIN_SIZE);
+	DUMP_REG(DC_WIN_PRESCALED_SIZE);
+	DUMP_REG(DC_WIN_H_INITIAL_DDA);
+	DUMP_REG(DC_WIN_V_INITIAL_DDA);
+	DUMP_REG(DC_WIN_DDA_INC);
+	DUMP_REG(DC_WIN_LINE_STRIDE);
+	DUMP_REG(DC_WIN_BUF_STRIDE);
+	DUMP_REG(DC_WIN_UV_BUF_STRIDE);
+	DUMP_REG(DC_WIN_BUFFER_ADDR_MODE);
+	DUMP_REG(DC_WIN_DV_CONTROL);
+	DUMP_REG(DC_WIN_BLEND_NOKEY);
+	DUMP_REG(DC_WIN_BLEND_1WIN);
+	DUMP_REG(DC_WIN_BLEND_2WIN_X);
+	DUMP_REG(DC_WIN_BLEND_2WIN_Y);
+	DUMP_REG(DC_WIN_BLEND32WIN_XY);
+	DUMP_REG(DC_WIN_HP_FETCH_CONTROL);
+	DUMP_REG(DC_WINBUF_START_ADDR);
+	DUMP_REG(DC_WINBUF_START_ADDR_NS);
+	DUMP_REG(DC_WINBUF_START_ADDR_U);
+	DUMP_REG(DC_WINBUF_START_ADDR_U_NS);
+	DUMP_REG(DC_WINBUF_START_ADDR_V);
+	DUMP_REG(DC_WINBUF_START_ADDR_V_NS);
+	DUMP_REG(DC_WINBUF_ADDR_H_OFFSET);
+	DUMP_REG(DC_WINBUF_ADDR_H_OFFSET_NS);
+	DUMP_REG(DC_WINBUF_ADDR_V_OFFSET);
+	DUMP_REG(DC_WINBUF_ADDR_V_OFFSET_NS);
+	DUMP_REG(DC_WINBUF_UFLOW_STATUS);
+	DUMP_REG(DC_WINBUF_AD_UFLOW_STATUS);
+	DUMP_REG(DC_WINBUF_BD_UFLOW_STATUS);
+	DUMP_REG(DC_WINBUF_CD_UFLOW_STATUS);
+
+#undef DUMP_REG
+
+	return 0;
+}
+
+static struct drm_info_list debugfs_files[] = {
+	{ "regs", tegra_dc_show_regs, 0, NULL },
+};
+
+static int tegra_dc_debugfs_init(struct tegra_dc *dc, struct drm_minor *minor)
+{
+	unsigned int i;
+	char *name;
+	int err;
+
+	name = kasprintf(GFP_KERNEL, "dc.%d", dc->pipe);
+	dc->debugfs = debugfs_create_dir(name, minor->debugfs_root);
+	kfree(name);
+
+	if (!dc->debugfs)
+		return -ENOMEM;
+
+	dc->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
+				    GFP_KERNEL);
+	if (!dc->debugfs_files) {
+		err = -ENOMEM;
+		goto remove;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
+		dc->debugfs_files[i].data = dc;
+
+	err = drm_debugfs_create_files(dc->debugfs_files,
+				       ARRAY_SIZE(debugfs_files),
+				       dc->debugfs, minor);
+	if (err < 0)
+		goto free;
+
+	dc->minor = minor;
+
+	return 0;
+
+free:
+	kfree(dc->debugfs_files);
+	dc->debugfs_files = NULL;
+remove:
+	debugfs_remove(dc->debugfs);
+	dc->debugfs = NULL;
+
+	return err;
+}
+
+static int tegra_dc_debugfs_exit(struct tegra_dc *dc)
+{
+	drm_debugfs_remove_files(dc->debugfs_files, ARRAY_SIZE(debugfs_files),
+				 dc->minor);
+	dc->minor = NULL;
+
+	kfree(dc->debugfs_files);
+	dc->debugfs_files = NULL;
+
+	debugfs_remove(dc->debugfs);
+	dc->debugfs = NULL;
+
+	return 0;
+}
+
+static int tegra_dc_drm_init(struct host1x_client *client,
+			     struct drm_device *drm)
+{
+	struct tegra_dc *dc = host1x_client_to_dc(client);
+	int err;
+
+	dc->pipe = drm->mode_config.num_crtc;
+
+	drm_crtc_init(drm, &dc->base, &tegra_crtc_funcs);
+	drm_mode_crtc_set_gamma_size(&dc->base, 256);
+	drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs);
+
+	err = tegra_dc_rgb_init(drm, dc);
+	if (err < 0 && err != -ENODEV) {
+		dev_err(dc->dev, "failed to initialize RGB output: %d\n", err);
+		return err;
+	}
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		err = tegra_dc_debugfs_init(dc, drm->primary);
+		if (err < 0)
+			dev_err(dc->dev, "debugfs setup failed: %d\n", err);
+	}
+
+	err = devm_request_irq(dc->dev, dc->irq, tegra_drm_irq, 0,
+			       dev_name(dc->dev), dc);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to request IRQ#%u: %d\n", dc->irq,
+			err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int tegra_dc_drm_exit(struct host1x_client *client)
+{
+	struct tegra_dc *dc = host1x_client_to_dc(client);
+	int err;
+
+	devm_free_irq(dc->dev, dc->irq, dc);
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		err = tegra_dc_debugfs_exit(dc);
+		if (err < 0)
+			dev_err(dc->dev, "debugfs cleanup failed: %d\n", err);
+	}
+
+	err = tegra_dc_rgb_exit(dc);
+	if (err) {
+		dev_err(dc->dev, "failed to shutdown RGB output: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct host1x_client_ops dc_client_ops = {
+	.drm_init = tegra_dc_drm_init,
+	.drm_exit = tegra_dc_drm_exit,
+};
+
+static int tegra_dc_probe(struct platform_device *pdev)
+{
+	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct resource *regs;
+	struct tegra_dc *dc;
+	int err;
+
+	dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL);
+	if (!dc)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dc->list);
+	dc->dev = &pdev->dev;
+
+	dc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dc->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(dc->clk);
+	}
+
+	err = clk_prepare_enable(dc->clk);
+	if (err < 0)
+		return err;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_err(&pdev->dev, "failed to get registers\n");
+		return -ENXIO;
+	}
+
+	dc->regs = devm_request_and_ioremap(&pdev->dev, regs);
+	if (!dc->regs) {
+		dev_err(&pdev->dev, "failed to remap registers\n");
+		return -ENXIO;
+	}
+
+	dc->irq = platform_get_irq(pdev, 0);
+	if (dc->irq < 0) {
+		dev_err(&pdev->dev, "failed to get IRQ\n");
+		return -ENXIO;
+	}
+
+	INIT_LIST_HEAD(&dc->client.list);
+	dc->client.ops = &dc_client_ops;
+	dc->client.dev = &pdev->dev;
+
+	err = tegra_dc_rgb_probe(dc);
+	if (err < 0 && err != -ENODEV) {
+		dev_err(&pdev->dev, "failed to probe RGB output: %d\n", err);
+		return err;
+	}
+
+	err = host1x_register_client(host1x, &dc->client);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
+			err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, dc);
+
+	return 0;
+}
+
+static int tegra_dc_remove(struct platform_device *pdev)
+{
+	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct tegra_dc *dc = platform_get_drvdata(pdev);
+	int err;
+
+	err = host1x_unregister_client(host1x, &dc->client);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
+			err);
+		return err;
+	}
+
+	clk_disable_unprepare(dc->clk);
+
+	return 0;
+}
+
+static struct of_device_id tegra_dc_of_match[] = {
+	{ .compatible = "nvidia,tegra30-dc", },
+	{ .compatible = "nvidia,tegra20-dc", },
+	{ },
+};
+
+struct platform_driver tegra_dc_driver = {
+	.driver = {
+		.name = "tegra-dc",
+		.owner = THIS_MODULE,
+		.of_match_table = tegra_dc_of_match,
+	},
+	.probe = tegra_dc_probe,
+	.remove = tegra_dc_remove,
+};

diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
new file mode 100644
index 0000000..99977b5
--- /dev/null
+++ b/drivers/gpu/drm/tegra/dc.h

@@ -0,0 +1,388 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TEGRA_DC_H
+#define TEGRA_DC_H 1
+
+#define DC_CMD_GENERAL_INCR_SYNCPT		0x000
+#define DC_CMD_GENERAL_INCR_SYNCPT_CNTRL	0x001
+#define DC_CMD_GENERAL_INCR_SYNCPT_ERROR	0x002
+#define DC_CMD_WIN_A_INCR_SYNCPT		0x008
+#define DC_CMD_WIN_A_INCR_SYNCPT_CNTRL		0x009
+#define DC_CMD_WIN_A_INCR_SYNCPT_ERROR		0x00a
+#define DC_CMD_WIN_B_INCR_SYNCPT		0x010
+#define DC_CMD_WIN_B_INCR_SYNCPT_CNTRL		0x011
+#define DC_CMD_WIN_B_INCR_SYNCPT_ERROR		0x012
+#define DC_CMD_WIN_C_INCR_SYNCPT		0x018
+#define DC_CMD_WIN_C_INCR_SYNCPT_CNTRL		0x019
+#define DC_CMD_WIN_C_INCR_SYNCPT_ERROR		0x01a
+#define DC_CMD_CONT_SYNCPT_VSYNC		0x028
+#define DC_CMD_DISPLAY_COMMAND_OPTION0		0x031
+#define DC_CMD_DISPLAY_COMMAND			0x032
+#define DISP_CTRL_MODE_STOP (0 << 5)
+#define DISP_CTRL_MODE_C_DISPLAY (1 << 5)
+#define DISP_CTRL_MODE_NC_DISPLAY (2 << 5)
+#define DC_CMD_SIGNAL_RAISE			0x033
+#define DC_CMD_DISPLAY_POWER_CONTROL		0x036
+#define PW0_ENABLE (1 <<  0)
+#define PW1_ENABLE (1 <<  2)
+#define PW2_ENABLE (1 <<  4)
+#define PW3_ENABLE (1 <<  6)
+#define PW4_ENABLE (1 <<  8)
+#define PM0_ENABLE (1 << 16)
+#define PM1_ENABLE (1 << 18)
+
+#define DC_CMD_INT_STATUS			0x037
+#define DC_CMD_INT_MASK				0x038
+#define DC_CMD_INT_ENABLE			0x039
+#define DC_CMD_INT_TYPE				0x03a
+#define DC_CMD_INT_POLARITY			0x03b
+#define CTXSW_INT     (1 << 0)
+#define FRAME_END_INT (1 << 1)
+#define VBLANK_INT    (1 << 2)
+#define WIN_A_UF_INT  (1 << 8)
+#define WIN_B_UF_INT  (1 << 9)
+#define WIN_C_UF_INT  (1 << 10)
+#define WIN_A_OF_INT  (1 << 14)
+#define WIN_B_OF_INT  (1 << 15)
+#define WIN_C_OF_INT  (1 << 16)
+
+#define DC_CMD_SIGNAL_RAISE1			0x03c
+#define DC_CMD_SIGNAL_RAISE2			0x03d
+#define DC_CMD_SIGNAL_RAISE3			0x03e
+
+#define DC_CMD_STATE_ACCESS			0x040
+
+#define DC_CMD_STATE_CONTROL			0x041
+#define GENERAL_ACT_REQ (1 <<  0)
+#define WIN_A_ACT_REQ   (1 <<  1)
+#define WIN_B_ACT_REQ   (1 <<  2)
+#define WIN_C_ACT_REQ   (1 <<  3)
+#define GENERAL_UPDATE  (1 <<  8)
+#define WIN_A_UPDATE    (1 <<  9)
+#define WIN_B_UPDATE    (1 << 10)
+#define WIN_C_UPDATE    (1 << 11)
+#define NC_HOST_TRIG    (1 << 24)
+
+#define DC_CMD_DISPLAY_WINDOW_HEADER		0x042
+#define WINDOW_A_SELECT (1 << 4)
+#define WINDOW_B_SELECT (1 << 5)
+#define WINDOW_C_SELECT (1 << 6)
+
+#define DC_CMD_REG_ACT_CONTROL			0x043
+
+#define DC_COM_CRC_CONTROL			0x300
+#define DC_COM_CRC_CHECKSUM			0x301
+#define DC_COM_PIN_OUTPUT_ENABLE(x) (0x302 + (x))
+#define DC_COM_PIN_OUTPUT_POLARITY(x) (0x306 + (x))
+#define LVS_OUTPUT_POLARITY_LOW (1 << 28)
+#define LHS_OUTPUT_POLARITY_LOW (1 << 30)
+#define DC_COM_PIN_OUTPUT_DATA(x) (0x30a + (x))
+#define DC_COM_PIN_INPUT_ENABLE(x) (0x30e + (x))
+#define DC_COM_PIN_INPUT_DATA(x) (0x312 + (x))
+#define DC_COM_PIN_OUTPUT_SELECT(x) (0x314 + (x))
+
+#define DC_COM_PIN_MISC_CONTROL			0x31b
+#define DC_COM_PIN_PM0_CONTROL			0x31c
+#define DC_COM_PIN_PM0_DUTY_CYCLE		0x31d
+#define DC_COM_PIN_PM1_CONTROL			0x31e
+#define DC_COM_PIN_PM1_DUTY_CYCLE		0x31f
+
+#define DC_COM_SPI_CONTROL			0x320
+#define DC_COM_SPI_START_BYTE			0x321
+#define DC_COM_HSPI_WRITE_DATA_AB		0x322
+#define DC_COM_HSPI_WRITE_DATA_CD		0x323
+#define DC_COM_HSPI_CS_DC			0x324
+#define DC_COM_SCRATCH_REGISTER_A		0x325
+#define DC_COM_SCRATCH_REGISTER_B		0x326
+#define DC_COM_GPIO_CTRL			0x327
+#define DC_COM_GPIO_DEBOUNCE_COUNTER		0x328
+#define DC_COM_CRC_CHECKSUM_LATCHED		0x329
+
+#define DC_DISP_DISP_SIGNAL_OPTIONS0		0x400
+#define H_PULSE_0_ENABLE (1 <<  8)
+#define H_PULSE_1_ENABLE (1 << 10)
+#define H_PULSE_2_ENABLE (1 << 12)
+
+#define DC_DISP_DISP_SIGNAL_OPTIONS1		0x401
+
+#define DC_DISP_DISP_WIN_OPTIONS		0x402
+#define HDMI_ENABLE (1 << 30)
+
+#define DC_DISP_DISP_MEM_HIGH_PRIORITY		0x403
+#define CURSOR_THRESHOLD(x)   (((x) & 0x03) << 24)
+#define WINDOW_A_THRESHOLD(x) (((x) & 0x7f) << 16)
+#define WINDOW_B_THRESHOLD(x) (((x) & 0x7f) <<  8)
+#define WINDOW_C_THRESHOLD(x) (((x) & 0xff) <<  0)
+
+#define DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER	0x404
+#define CURSOR_DELAY(x)   (((x) & 0x3f) << 24)
+#define WINDOW_A_DELAY(x) (((x) & 0x3f) << 16)
+#define WINDOW_B_DELAY(x) (((x) & 0x3f) <<  8)
+#define WINDOW_C_DELAY(x) (((x) & 0x3f) <<  0)
+
+#define DC_DISP_DISP_TIMING_OPTIONS		0x405
+#define VSYNC_H_POSITION(x) ((x) & 0xfff)
+
+#define DC_DISP_REF_TO_SYNC			0x406
+#define DC_DISP_SYNC_WIDTH			0x407
+#define DC_DISP_BACK_PORCH			0x408
+#define DC_DISP_ACTIVE				0x409
+#define DC_DISP_FRONT_PORCH			0x40a
+#define DC_DISP_H_PULSE0_CONTROL		0x40b
+#define DC_DISP_H_PULSE0_POSITION_A		0x40c
+#define DC_DISP_H_PULSE0_POSITION_B		0x40d
+#define DC_DISP_H_PULSE0_POSITION_C		0x40e
+#define DC_DISP_H_PULSE0_POSITION_D		0x40f
+#define DC_DISP_H_PULSE1_CONTROL		0x410
+#define DC_DISP_H_PULSE1_POSITION_A		0x411
+#define DC_DISP_H_PULSE1_POSITION_B		0x412
+#define DC_DISP_H_PULSE1_POSITION_C		0x413
+#define DC_DISP_H_PULSE1_POSITION_D		0x414
+#define DC_DISP_H_PULSE2_CONTROL		0x415
+#define DC_DISP_H_PULSE2_POSITION_A		0x416
+#define DC_DISP_H_PULSE2_POSITION_B		0x417
+#define DC_DISP_H_PULSE2_POSITION_C		0x418
+#define DC_DISP_H_PULSE2_POSITION_D		0x419
+#define DC_DISP_V_PULSE0_CONTROL		0x41a
+#define DC_DISP_V_PULSE0_POSITION_A		0x41b
+#define DC_DISP_V_PULSE0_POSITION_B		0x41c
+#define DC_DISP_V_PULSE0_POSITION_C		0x41d
+#define DC_DISP_V_PULSE1_CONTROL		0x41e
+#define DC_DISP_V_PULSE1_POSITION_A		0x41f
+#define DC_DISP_V_PULSE1_POSITION_B		0x420
+#define DC_DISP_V_PULSE1_POSITION_C		0x421
+#define DC_DISP_V_PULSE2_CONTROL		0x422
+#define DC_DISP_V_PULSE2_POSITION_A		0x423
+#define DC_DISP_V_PULSE3_CONTROL		0x424
+#define DC_DISP_V_PULSE3_POSITION_A		0x425
+#define DC_DISP_M0_CONTROL			0x426
+#define DC_DISP_M1_CONTROL			0x427
+#define DC_DISP_DI_CONTROL			0x428
+#define DC_DISP_PP_CONTROL			0x429
+#define DC_DISP_PP_SELECT_A			0x42a
+#define DC_DISP_PP_SELECT_B			0x42b
+#define DC_DISP_PP_SELECT_C			0x42c
+#define DC_DISP_PP_SELECT_D			0x42d
+
+#define PULSE_MODE_NORMAL    (0 << 3)
+#define PULSE_MODE_ONE_CLOCK (1 << 3)
+#define PULSE_POLARITY_HIGH  (0 << 4)
+#define PULSE_POLARITY_LOW   (1 << 4)
+#define PULSE_QUAL_ALWAYS    (0 << 6)
+#define PULSE_QUAL_VACTIVE   (2 << 6)
+#define PULSE_QUAL_VACTIVE1  (3 << 6)
+#define PULSE_LAST_START_A   (0 << 8)
+#define PULSE_LAST_END_A     (1 << 8)
+#define PULSE_LAST_START_B   (2 << 8)
+#define PULSE_LAST_END_B     (3 << 8)
+#define PULSE_LAST_START_C   (4 << 8)
+#define PULSE_LAST_END_C     (5 << 8)
+#define PULSE_LAST_START_D   (6 << 8)
+#define PULSE_LAST_END_D     (7 << 8)
+
+#define PULSE_START(x) (((x) & 0xfff) <<  0)
+#define PULSE_END(x)   (((x) & 0xfff) << 16)
+
+#define DC_DISP_DISP_CLOCK_CONTROL		0x42e
+#define PIXEL_CLK_DIVIDER_PCD1  (0 << 8)
+#define PIXEL_CLK_DIVIDER_PCD1H (1 << 8)
+#define PIXEL_CLK_DIVIDER_PCD2  (2 << 8)
+#define PIXEL_CLK_DIVIDER_PCD3  (3 << 8)
+#define PIXEL_CLK_DIVIDER_PCD4  (4 << 8)
+#define PIXEL_CLK_DIVIDER_PCD6  (5 << 8)
+#define PIXEL_CLK_DIVIDER_PCD8  (6 << 8)
+#define PIXEL_CLK_DIVIDER_PCD9  (7 << 8)
+#define PIXEL_CLK_DIVIDER_PCD12 (8 << 8)
+#define PIXEL_CLK_DIVIDER_PCD16 (9 << 8)
+#define PIXEL_CLK_DIVIDER_PCD18 (10 << 8)
+#define PIXEL_CLK_DIVIDER_PCD24 (11 << 8)
+#define PIXEL_CLK_DIVIDER_PCD13 (12 << 8)
+#define SHIFT_CLK_DIVIDER(x)    ((x) & 0xff)
+
+#define DC_DISP_DISP_INTERFACE_CONTROL		0x42f
+#define DISP_DATA_FORMAT_DF1P1C    (0 << 0)
+#define DISP_DATA_FORMAT_DF1P2C24B (1 << 0)
+#define DISP_DATA_FORMAT_DF1P2C18B (2 << 0)
+#define DISP_DATA_FORMAT_DF1P2C16B (3 << 0)
+#define DISP_DATA_FORMAT_DF2S      (4 << 0)
+#define DISP_DATA_FORMAT_DF3S      (5 << 0)
+#define DISP_DATA_FORMAT_DFSPI     (6 << 0)
+#define DISP_DATA_FORMAT_DF1P3C24B (7 << 0)
+#define DISP_DATA_FORMAT_DF1P3C18B (8 << 0)
+#define DISP_ALIGNMENT_MSB         (0 << 8)
+#define DISP_ALIGNMENT_LSB         (1 << 8)
+#define DISP_ORDER_RED_BLUE        (0 << 9)
+#define DISP_ORDER_BLUE_RED        (1 << 9)
+
+#define DC_DISP_DISP_COLOR_CONTROL		0x430
+#define BASE_COLOR_SIZE666     (0 << 0)
+#define BASE_COLOR_SIZE111     (1 << 0)
+#define BASE_COLOR_SIZE222     (2 << 0)
+#define BASE_COLOR_SIZE333     (3 << 0)
+#define BASE_COLOR_SIZE444     (4 << 0)
+#define BASE_COLOR_SIZE555     (5 << 0)
+#define BASE_COLOR_SIZE565     (6 << 0)
+#define BASE_COLOR_SIZE332     (7 << 0)
+#define BASE_COLOR_SIZE888     (8 << 0)
+#define DITHER_CONTROL_DISABLE (0 << 8)
+#define DITHER_CONTROL_ORDERED (2 << 8)
+#define DITHER_CONTROL_ERRDIFF (3 << 8)
+
+#define DC_DISP_SHIFT_CLOCK_OPTIONS		0x431
+
+#define DC_DISP_DATA_ENABLE_OPTIONS		0x432
+#define DE_SELECT_ACTIVE_BLANK  (0 << 0)
+#define DE_SELECT_ACTIVE        (1 << 0)
+#define DE_SELECT_ACTIVE_IS     (2 << 0)
+#define DE_CONTROL_ONECLK       (0 << 2)
+#define DE_CONTROL_NORMAL       (1 << 2)
+#define DE_CONTROL_EARLY_EXT    (2 << 2)
+#define DE_CONTROL_EARLY        (3 << 2)
+#define DE_CONTROL_ACTIVE_BLANK (4 << 2)
+
+#define DC_DISP_SERIAL_INTERFACE_OPTIONS	0x433
+#define DC_DISP_LCD_SPI_OPTIONS			0x434
+#define DC_DISP_BORDER_COLOR			0x435
+#define DC_DISP_COLOR_KEY0_LOWER		0x436
+#define DC_DISP_COLOR_KEY0_UPPER		0x437
+#define DC_DISP_COLOR_KEY1_LOWER		0x438
+#define DC_DISP_COLOR_KEY1_UPPER		0x439
+
+#define DC_DISP_CURSOR_FOREGROUND		0x43c
+#define DC_DISP_CURSOR_BACKGROUND		0x43d
+
+#define DC_DISP_CURSOR_START_ADDR		0x43e
+#define DC_DISP_CURSOR_START_ADDR_NS		0x43f
+
+#define DC_DISP_CURSOR_POSITION			0x440
+#define DC_DISP_CURSOR_POSITION_NS		0x441
+
+#define DC_DISP_INIT_SEQ_CONTROL		0x442
+#define DC_DISP_SPI_INIT_SEQ_DATA_A		0x443
+#define DC_DISP_SPI_INIT_SEQ_DATA_B		0x444
+#define DC_DISP_SPI_INIT_SEQ_DATA_C		0x445
+#define DC_DISP_SPI_INIT_SEQ_DATA_D		0x446
+
+#define DC_DISP_DC_MCCIF_FIFOCTRL		0x480
+#define DC_DISP_MCCIF_DISPLAY0A_HYST		0x481
+#define DC_DISP_MCCIF_DISPLAY0B_HYST		0x482
+#define DC_DISP_MCCIF_DISPLAY1A_HYST		0x483
+#define DC_DISP_MCCIF_DISPLAY1B_HYST		0x484
+
+#define DC_DISP_DAC_CRT_CTRL			0x4c0
+#define DC_DISP_DISP_MISC_CONTROL		0x4c1
+#define DC_DISP_SD_CONTROL			0x4c2
+#define DC_DISP_SD_CSC_COEFF			0x4c3
+#define DC_DISP_SD_LUT(x)			(0x4c4 + (x))
+#define DC_DISP_SD_FLICKER_CONTROL		0x4cd
+#define DC_DISP_DC_PIXEL_COUNT			0x4ce
+#define DC_DISP_SD_HISTOGRAM(x)			(0x4cf + (x))
+#define DC_DISP_SD_BL_PARAMETERS		0x4d7
+#define DC_DISP_SD_BL_TF(x)			(0x4d8 + (x))
+#define DC_DISP_SD_BL_CONTROL			0x4dc
+#define DC_DISP_SD_HW_K_VALUES			0x4dd
+#define DC_DISP_SD_MAN_K_VALUES			0x4de
+
+#define DC_WIN_WIN_OPTIONS			0x700
+#define COLOR_EXPAND (1 <<  6)
+#define WIN_ENABLE   (1 << 30)
+
+#define DC_WIN_BYTE_SWAP			0x701
+#define BYTE_SWAP_NOSWAP  (0 << 0)
+#define BYTE_SWAP_SWAP2   (1 << 0)
+#define BYTE_SWAP_SWAP4   (2 << 0)
+#define BYTE_SWAP_SWAP4HW (3 << 0)
+
+#define DC_WIN_BUFFER_CONTROL			0x702
+#define BUFFER_CONTROL_HOST  (0 << 0)
+#define BUFFER_CONTROL_VI    (1 << 0)
+#define BUFFER_CONTROL_EPP   (2 << 0)
+#define BUFFER_CONTROL_MPEGE (3 << 0)
+#define BUFFER_CONTROL_SB2D  (4 << 0)
+
+#define DC_WIN_COLOR_DEPTH			0x703
+#define WIN_COLOR_DEPTH_P1              0
+#define WIN_COLOR_DEPTH_P2              1
+#define WIN_COLOR_DEPTH_P4              2
+#define WIN_COLOR_DEPTH_P8              3
+#define WIN_COLOR_DEPTH_B4G4R4A4        4
+#define WIN_COLOR_DEPTH_B5G5R5A         5
+#define WIN_COLOR_DEPTH_B5G6R5          6
+#define WIN_COLOR_DEPTH_AB5G5R5         7
+#define WIN_COLOR_DEPTH_B8G8R8A8       12
+#define WIN_COLOR_DEPTH_R8G8B8A8       13
+#define WIN_COLOR_DEPTH_B6x2G6x2R6x2A8 14
+#define WIN_COLOR_DEPTH_R6x2G6x2B6x2A8 15
+#define WIN_COLOR_DEPTH_YCbCr422       16
+#define WIN_COLOR_DEPTH_YUV422         17
+#define WIN_COLOR_DEPTH_YCbCr420P      18
+#define WIN_COLOR_DEPTH_YUV420P        19
+#define WIN_COLOR_DEPTH_YCbCr422P      20
+#define WIN_COLOR_DEPTH_YUV422P        21
+#define WIN_COLOR_DEPTH_YCbCr422R      22
+#define WIN_COLOR_DEPTH_YUV422R        23
+#define WIN_COLOR_DEPTH_YCbCr422RA     24
+#define WIN_COLOR_DEPTH_YUV422RA       25
+
+#define DC_WIN_POSITION				0x704
+#define H_POSITION(x) (((x) & 0x1fff) <<  0)
+#define V_POSITION(x) (((x) & 0x1fff) << 16)
+
+#define DC_WIN_SIZE				0x705
+#define H_SIZE(x) (((x) & 0x1fff) <<  0)
+#define V_SIZE(x) (((x) & 0x1fff) << 16)
+
+#define DC_WIN_PRESCALED_SIZE			0x706
+#define H_PRESCALED_SIZE(x) (((x) & 0x7fff) <<  0)
+#define V_PRESCALED_SIZE(x) (((x) & 0x1fff) << 16)
+
+#define DC_WIN_H_INITIAL_DDA			0x707
+#define DC_WIN_V_INITIAL_DDA			0x708
+#define DC_WIN_DDA_INC				0x709
+#define H_DDA_INC(x) (((x) & 0xffff) <<  0)
+#define V_DDA_INC(x) (((x) & 0xffff) << 16)
+
+#define DC_WIN_LINE_STRIDE			0x70a
+#define DC_WIN_BUF_STRIDE			0x70b
+#define DC_WIN_UV_BUF_STRIDE			0x70c
+#define DC_WIN_BUFFER_ADDR_MODE			0x70d
+#define DC_WIN_DV_CONTROL			0x70e
+
+#define DC_WIN_BLEND_NOKEY			0x70f
+#define DC_WIN_BLEND_1WIN			0x710
+#define DC_WIN_BLEND_2WIN_X			0x711
+#define DC_WIN_BLEND_2WIN_Y			0x712
+#define DC_WIN_BLEND32WIN_XY			0x713
+
+#define DC_WIN_HP_FETCH_CONTROL			0x714
+
+#define DC_WINBUF_START_ADDR			0x800
+#define DC_WINBUF_START_ADDR_NS			0x801
+#define DC_WINBUF_START_ADDR_U			0x802
+#define DC_WINBUF_START_ADDR_U_NS		0x803
+#define DC_WINBUF_START_ADDR_V			0x804
+#define DC_WINBUF_START_ADDR_V_NS		0x805
+
+#define DC_WINBUF_ADDR_H_OFFSET			0x806
+#define DC_WINBUF_ADDR_H_OFFSET_NS		0x807
+#define DC_WINBUF_ADDR_V_OFFSET			0x808
+#define DC_WINBUF_ADDR_V_OFFSET_NS		0x809
+
+#define DC_WINBUF_UFLOW_STATUS			0x80a
+
+#define DC_WINBUF_AD_UFLOW_STATUS		0xbca
+#define DC_WINBUF_BD_UFLOW_STATUS		0xdca
+#define DC_WINBUF_CD_UFLOW_STATUS		0xfca
+
+/* synchronization points */
+#define SYNCPT_VBLANK0 26
+#define SYNCPT_VBLANK1 27
+
+#endif /* TEGRA_DC_H */

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
new file mode 100644
index 0000000..3a503c9
--- /dev/null
+++ b/drivers/gpu/drm/tegra/drm.c

@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include <mach/clk.h>
+#include <linux/dma-mapping.h>
+#include <asm/dma-iommu.h>
+
+#include "drm.h"
+
+#define DRIVER_NAME "tegra"
+#define DRIVER_DESC "NVIDIA Tegra graphics"
+#define DRIVER_DATE "20120330"
+#define DRIVER_MAJOR 0
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 0
+
+static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
+{
+	struct device *dev = drm->dev;
+	struct host1x *host1x;
+	int err;
+
+	host1x = dev_get_drvdata(dev);
+	drm->dev_private = host1x;
+	host1x->drm = drm;
+
+	drm_mode_config_init(drm);
+
+	err = host1x_drm_init(host1x, drm);
+	if (err < 0)
+		return err;
+
+	err = tegra_drm_fb_init(drm);
+	if (err < 0)
+		return err;
+
+	drm_kms_helper_poll_init(drm);
+
+	return 0;
+}
+
+static int tegra_drm_unload(struct drm_device *drm)
+{
+	drm_kms_helper_poll_fini(drm);
+	tegra_drm_fb_exit(drm);
+
+	drm_mode_config_cleanup(drm);
+
+	return 0;
+}
+
+static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
+{
+	return 0;
+}
+
+static void tegra_drm_lastclose(struct drm_device *drm)
+{
+	struct host1x *host1x = drm->dev_private;
+
+	drm_fbdev_cma_restore_mode(host1x->fbdev);
+}
+
+static struct drm_ioctl_desc tegra_drm_ioctls[] = {
+};
+
+static const struct file_operations tegra_drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = drm_gem_cma_mmap,
+	.poll = drm_poll,
+	.fasync = drm_fasync,
+	.read = drm_read,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = drm_compat_ioctl,
+#endif
+	.llseek = noop_llseek,
+};
+
+struct drm_driver tegra_drm_driver = {
+	.driver_features = DRIVER_BUS_PLATFORM | DRIVER_MODESET | DRIVER_GEM,
+	.load = tegra_drm_load,
+	.unload = tegra_drm_unload,
+	.open = tegra_drm_open,
+	.lastclose = tegra_drm_lastclose,
+
+	.gem_free_object = drm_gem_cma_free_object,
+	.gem_vm_ops = &drm_gem_cma_vm_ops,
+	.dumb_create = drm_gem_cma_dumb_create,
+	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
+	.dumb_destroy = drm_gem_cma_dumb_destroy,
+
+	.ioctls = tegra_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(tegra_drm_ioctls),
+	.fops = &tegra_drm_fops,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
new file mode 100644
index 0000000..3a843a7
--- /dev/null
+++ b/drivers/gpu/drm/tegra/drm.h

@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TEGRA_DRM_H
+#define TEGRA_DRM_H 1
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fixed.h>
+
+struct tegra_framebuffer {
+	struct drm_framebuffer base;
+	struct drm_gem_cma_object *obj;
+};
+
+static inline struct tegra_framebuffer *to_tegra_fb(struct drm_framebuffer *fb)
+{
+	return container_of(fb, struct tegra_framebuffer, base);
+}
+
+struct host1x {
+	struct drm_device *drm;
+	struct device *dev;
+	void __iomem *regs;
+	struct clk *clk;
+	int syncpt;
+	int irq;
+
+	struct mutex drm_clients_lock;
+	struct list_head drm_clients;
+	struct list_head drm_active;
+
+	struct mutex clients_lock;
+	struct list_head clients;
+
+	struct drm_fbdev_cma *fbdev;
+	struct tegra_framebuffer fb;
+};
+
+struct host1x_client;
+
+struct host1x_client_ops {
+	int (*drm_init)(struct host1x_client *client, struct drm_device *drm);
+	int (*drm_exit)(struct host1x_client *client);
+};
+
+struct host1x_client {
+	struct host1x *host1x;
+	struct device *dev;
+
+	const struct host1x_client_ops *ops;
+
+	struct list_head list;
+};
+
+extern int host1x_drm_init(struct host1x *host1x, struct drm_device *drm);
+extern int host1x_drm_exit(struct host1x *host1x);
+
+extern int host1x_register_client(struct host1x *host1x,
+				  struct host1x_client *client);
+extern int host1x_unregister_client(struct host1x *host1x,
+				    struct host1x_client *client);
+
+struct tegra_output;
+
+struct tegra_dc {
+	struct host1x_client client;
+
+	struct host1x *host1x;
+	struct device *dev;
+
+	struct drm_crtc base;
+	int pipe;
+
+	struct clk *clk;
+
+	void __iomem *regs;
+	int irq;
+
+	struct tegra_output *rgb;
+
+	struct list_head list;
+
+	struct drm_info_list *debugfs_files;
+	struct drm_minor *minor;
+	struct dentry *debugfs;
+};
+
+static inline struct tegra_dc *host1x_client_to_dc(struct host1x_client *client)
+{
+	return container_of(client, struct tegra_dc, client);
+}
+
+static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc)
+{
+	return container_of(crtc, struct tegra_dc, base);
+}
+
+static inline void tegra_dc_writel(struct tegra_dc *dc, unsigned long value,
+				   unsigned long reg)
+{
+	writel(value, dc->regs + (reg << 2));
+}
+
+static inline unsigned long tegra_dc_readl(struct tegra_dc *dc,
+					   unsigned long reg)
+{
+	return readl(dc->regs + (reg << 2));
+}
+
+struct tegra_output_ops {
+	int (*enable)(struct tegra_output *output);
+	int (*disable)(struct tegra_output *output);
+	int (*setup_clock)(struct tegra_output *output, struct clk *clk,
+			   unsigned long pclk);
+	int (*check_mode)(struct tegra_output *output,
+			  struct drm_display_mode *mode,
+			  enum drm_mode_status *status);
+};
+
+enum tegra_output_type {
+	TEGRA_OUTPUT_RGB,
+	TEGRA_OUTPUT_HDMI,
+};
+
+struct tegra_output {
+	struct device_node *of_node;
+	struct device *dev;
+
+	const struct tegra_output_ops *ops;
+	enum tegra_output_type type;
+
+	struct i2c_adapter *ddc;
+	const struct edid *edid;
+	unsigned int hpd_irq;
+	int hpd_gpio;
+
+	struct drm_encoder encoder;
+	struct drm_connector connector;
+};
+
+static inline struct tegra_output *encoder_to_output(struct drm_encoder *e)
+{
+	return container_of(e, struct tegra_output, encoder);
+}
+
+static inline struct tegra_output *connector_to_output(struct drm_connector *c)
+{
+	return container_of(c, struct tegra_output, connector);
+}
+
+static inline int tegra_output_enable(struct tegra_output *output)
+{
+	if (output && output->ops && output->ops->enable)
+		return output->ops->enable(output);
+
+	return output ? -ENOSYS : -EINVAL;
+}
+
+static inline int tegra_output_disable(struct tegra_output *output)
+{
+	if (output && output->ops && output->ops->disable)
+		return output->ops->disable(output);
+
+	return output ? -ENOSYS : -EINVAL;
+}
+
+static inline int tegra_output_setup_clock(struct tegra_output *output,
+					   struct clk *clk, unsigned long pclk)
+{
+	if (output && output->ops && output->ops->setup_clock)
+		return output->ops->setup_clock(output, clk, pclk);
+
+	return output ? -ENOSYS : -EINVAL;
+}
+
+static inline int tegra_output_check_mode(struct tegra_output *output,
+					  struct drm_display_mode *mode,
+					  enum drm_mode_status *status)
+{
+	if (output && output->ops && output->ops->check_mode)
+		return output->ops->check_mode(output, mode, status);
+
+	return output ? -ENOSYS : -EINVAL;
+}
+
+/* from rgb.c */
+extern int tegra_dc_rgb_probe(struct tegra_dc *dc);
+extern int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
+extern int tegra_dc_rgb_exit(struct tegra_dc *dc);
+
+/* from output.c */
+extern int tegra_output_parse_dt(struct tegra_output *output);
+extern int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
+extern int tegra_output_exit(struct tegra_output *output);
+
+/* from gem.c */
+extern struct tegra_gem_object *tegra_gem_alloc(struct drm_device *drm,
+						size_t size);
+extern int tegra_gem_handle_create(struct drm_device *drm,
+				   struct drm_file *file, size_t size,
+				   unsigned long flags, uint32_t *handle);
+extern int tegra_gem_dumb_create(struct drm_file *file, struct drm_device *drm,
+				 struct drm_mode_create_dumb *args);
+extern int tegra_gem_dumb_map_offset(struct drm_file *file,
+				     struct drm_device *drm, uint32_t handle,
+				     uint64_t *offset);
+extern int tegra_gem_dumb_destroy(struct drm_file *file,
+				  struct drm_device *drm, uint32_t handle);
+extern int tegra_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+extern int tegra_gem_init_object(struct drm_gem_object *obj);
+extern void tegra_gem_free_object(struct drm_gem_object *obj);
+extern struct vm_operations_struct tegra_gem_vm_ops;
+
+/* from fb.c */
+extern int tegra_drm_fb_init(struct drm_device *drm);
+extern void tegra_drm_fb_exit(struct drm_device *drm);
+
+extern struct platform_driver tegra_host1x_driver;
+extern struct platform_driver tegra_hdmi_driver;
+extern struct platform_driver tegra_dc_driver;
+extern struct drm_driver tegra_drm_driver;
+
+#endif /* TEGRA_DRM_H */

diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
new file mode 100644
index 0000000..97993c6
--- /dev/null
+++ b/drivers/gpu/drm/tegra/fb.c

@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "drm.h"
+
+static void tegra_drm_fb_output_poll_changed(struct drm_device *drm)
+{
+	struct host1x *host1x = drm->dev_private;
+
+	drm_fbdev_cma_hotplug_event(host1x->fbdev);
+}
+
+static const struct drm_mode_config_funcs tegra_drm_mode_funcs = {
+	.fb_create = drm_fb_cma_create,
+	.output_poll_changed = tegra_drm_fb_output_poll_changed,
+};
+
+int tegra_drm_fb_init(struct drm_device *drm)
+{
+	struct host1x *host1x = drm->dev_private;
+	struct drm_fbdev_cma *fbdev;
+
+	drm->mode_config.min_width = 0;
+	drm->mode_config.min_height = 0;
+
+	drm->mode_config.max_width = 4096;
+	drm->mode_config.max_height = 4096;
+
+	drm->mode_config.funcs = &tegra_drm_mode_funcs;
+
+	fbdev = drm_fbdev_cma_init(drm, 32, drm->mode_config.num_crtc,
+				   drm->mode_config.num_connector);
+	if (IS_ERR(fbdev))
+		return PTR_ERR(fbdev);
+
+#ifndef CONFIG_FRAMEBUFFER_CONSOLE
+	drm_fbdev_cma_restore_mode(fbdev);
+#endif
+
+	host1x->fbdev = fbdev;
+
+	return 0;
+}
+
+void tegra_drm_fb_exit(struct drm_device *drm)
+{
+	struct host1x *host1x = drm->dev_private;
+
+	drm_fbdev_cma_fini(host1x->fbdev);
+}

diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
new file mode 100644
index 0000000..ab40164
--- /dev/null
+++ b/drivers/gpu/drm/tegra/hdmi.c

@@ -0,0 +1,1334 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/gpio.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+#include <mach/clk.h>
+
+#include "hdmi.h"
+#include "drm.h"
+#include "dc.h"
+
+struct tegra_hdmi {
+	struct host1x_client client;
+	struct tegra_output output;
+	struct device *dev;
+
+	struct regulator *vdd;
+	struct regulator *pll;
+
+	void __iomem *regs;
+	unsigned int irq;
+
+	struct clk *clk_parent;
+	struct clk *clk;
+
+	unsigned int audio_source;
+	unsigned int audio_freq;
+	bool stereo;
+	bool dvi;
+
+	struct drm_info_list *debugfs_files;
+	struct drm_minor *minor;
+	struct dentry *debugfs;
+};
+
+static inline struct tegra_hdmi *
+host1x_client_to_hdmi(struct host1x_client *client)
+{
+	return container_of(client, struct tegra_hdmi, client);
+}
+
+static inline struct tegra_hdmi *to_hdmi(struct tegra_output *output)
+{
+	return container_of(output, struct tegra_hdmi, output);
+}
+
+#define HDMI_AUDIOCLK_FREQ 216000000
+#define HDMI_REKEY_DEFAULT 56
+
+enum {
+	AUTO = 0,
+	SPDIF,
+	HDA,
+};
+
+static inline unsigned long tegra_hdmi_readl(struct tegra_hdmi *hdmi,
+					     unsigned long reg)
+{
+	return readl(hdmi->regs + (reg << 2));
+}
+
+static inline void tegra_hdmi_writel(struct tegra_hdmi *hdmi, unsigned long val,
+				     unsigned long reg)
+{
+	writel(val, hdmi->regs + (reg << 2));
+}
+
+struct tegra_hdmi_audio_config {
+	unsigned int pclk;
+	unsigned int n;
+	unsigned int cts;
+	unsigned int aval;
+};
+
+static const struct tegra_hdmi_audio_config tegra_hdmi_audio_32k[] = {
+	{  25200000, 4096,  25200, 24000 },
+	{  27000000, 4096,  27000, 24000 },
+	{  74250000, 4096,  74250, 24000 },
+	{ 148500000, 4096, 148500, 24000 },
+	{         0,    0,      0,     0 },
+};
+
+static const struct tegra_hdmi_audio_config tegra_hdmi_audio_44_1k[] = {
+	{  25200000, 5880,  26250, 25000 },
+	{  27000000, 5880,  28125, 25000 },
+	{  74250000, 4704,  61875, 20000 },
+	{ 148500000, 4704, 123750, 20000 },
+	{         0,    0,      0,     0 },
+};
+
+static const struct tegra_hdmi_audio_config tegra_hdmi_audio_48k[] = {
+	{  25200000, 6144,  25200, 24000 },
+	{  27000000, 6144,  27000, 24000 },
+	{  74250000, 6144,  74250, 24000 },
+	{ 148500000, 6144, 148500, 24000 },
+	{         0,    0,      0,     0 },
+};
+
+static const struct tegra_hdmi_audio_config tegra_hdmi_audio_88_2k[] = {
+	{  25200000, 11760,  26250, 25000 },
+	{  27000000, 11760,  28125, 25000 },
+	{  74250000,  9408,  61875, 20000 },
+	{ 148500000,  9408, 123750, 20000 },
+	{         0,     0,      0,     0 },
+};
+
+static const struct tegra_hdmi_audio_config tegra_hdmi_audio_96k[] = {
+	{  25200000, 12288,  25200, 24000 },
+	{  27000000, 12288,  27000, 24000 },
+	{  74250000, 12288,  74250, 24000 },
+	{ 148500000, 12288, 148500, 24000 },
+	{         0,     0,      0,     0 },
+};
+
+static const struct tegra_hdmi_audio_config tegra_hdmi_audio_176_4k[] = {
+	{  25200000, 23520,  26250, 25000 },
+	{  27000000, 23520,  28125, 25000 },
+	{  74250000, 18816,  61875, 20000 },
+	{ 148500000, 18816, 123750, 20000 },
+	{         0,     0,      0,     0 },
+};
+
+static const struct tegra_hdmi_audio_config tegra_hdmi_audio_192k[] = {
+	{  25200000, 24576,  25200, 24000 },
+	{  27000000, 24576,  27000, 24000 },
+	{  74250000, 24576,  74250, 24000 },
+	{ 148500000, 24576, 148500, 24000 },
+	{         0,     0,      0,     0 },
+};
+
+struct tmds_config {
+	unsigned int pclk;
+	u32 pll0;
+	u32 pll1;
+	u32 pe_current;
+	u32 drive_current;
+};
+
+static const struct tmds_config tegra2_tmds_config[] = {
+	{ /* 480p modes */
+		.pclk = 27000000,
+		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
+			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(0) |
+			SOR_PLL_TX_REG_LOAD(3),
+		.pll1 = SOR_PLL_TMDS_TERM_ENABLE,
+		.pe_current = PE_CURRENT0(PE_CURRENT_0_0_mA) |
+			PE_CURRENT1(PE_CURRENT_0_0_mA) |
+			PE_CURRENT2(PE_CURRENT_0_0_mA) |
+			PE_CURRENT3(PE_CURRENT_0_0_mA),
+		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA),
+	}, { /* 720p modes */
+		.pclk = 74250000,
+		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
+			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) |
+			SOR_PLL_TX_REG_LOAD(3),
+		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
+		.pe_current = PE_CURRENT0(PE_CURRENT_6_0_mA) |
+			PE_CURRENT1(PE_CURRENT_6_0_mA) |
+			PE_CURRENT2(PE_CURRENT_6_0_mA) |
+			PE_CURRENT3(PE_CURRENT_6_0_mA),
+		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA),
+	}, { /* 1080p modes */
+		.pclk = UINT_MAX,
+		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
+			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) |
+			SOR_PLL_TX_REG_LOAD(3),
+		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
+		.pe_current = PE_CURRENT0(PE_CURRENT_6_0_mA) |
+			PE_CURRENT1(PE_CURRENT_6_0_mA) |
+			PE_CURRENT2(PE_CURRENT_6_0_mA) |
+			PE_CURRENT3(PE_CURRENT_6_0_mA),
+		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) |
+			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA),
+	},
+};
+
+static const struct tmds_config tegra3_tmds_config[] = {
+	{ /* 480p modes */
+		.pclk = 27000000,
+		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
+			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(0) |
+			SOR_PLL_TX_REG_LOAD(0),
+		.pll1 = SOR_PLL_TMDS_TERM_ENABLE,
+		.pe_current = PE_CURRENT0(PE_CURRENT_0_0_mA) |
+			PE_CURRENT1(PE_CURRENT_0_0_mA) |
+			PE_CURRENT2(PE_CURRENT_0_0_mA) |
+			PE_CURRENT3(PE_CURRENT_0_0_mA),
+		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_5_250_mA),
+	}, { /* 720p modes */
+		.pclk = 74250000,
+		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
+			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) |
+			SOR_PLL_TX_REG_LOAD(0),
+		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
+		.pe_current = PE_CURRENT0(PE_CURRENT_5_0_mA) |
+			PE_CURRENT1(PE_CURRENT_5_0_mA) |
+			PE_CURRENT2(PE_CURRENT_5_0_mA) |
+			PE_CURRENT3(PE_CURRENT_5_0_mA),
+		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_5_250_mA),
+	}, { /* 1080p modes */
+		.pclk = UINT_MAX,
+		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
+			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(3) |
+			SOR_PLL_TX_REG_LOAD(0),
+		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
+		.pe_current = PE_CURRENT0(PE_CURRENT_5_0_mA) |
+			PE_CURRENT1(PE_CURRENT_5_0_mA) |
+			PE_CURRENT2(PE_CURRENT_5_0_mA) |
+			PE_CURRENT3(PE_CURRENT_5_0_mA),
+		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_5_250_mA) |
+			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_5_250_mA),
+	},
+};
+
+static const struct tegra_hdmi_audio_config *
+tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pclk)
+{
+	const struct tegra_hdmi_audio_config *table;
+
+	switch (audio_freq) {
+	case 32000:
+		table = tegra_hdmi_audio_32k;
+		break;
+
+	case 44100:
+		table = tegra_hdmi_audio_44_1k;
+		break;
+
+	case 48000:
+		table = tegra_hdmi_audio_48k;
+		break;
+
+	case 88200:
+		table = tegra_hdmi_audio_88_2k;
+		break;
+
+	case 96000:
+		table = tegra_hdmi_audio_96k;
+		break;
+
+	case 176400:
+		table = tegra_hdmi_audio_176_4k;
+		break;
+
+	case 192000:
+		table = tegra_hdmi_audio_192k;
+		break;
+
+	default:
+		return NULL;
+	}
+
+	while (table->pclk) {
+		if (table->pclk == pclk)
+			return table;
+
+		table++;
+	}
+
+	return NULL;
+}
+
+static void tegra_hdmi_setup_audio_fs_tables(struct tegra_hdmi *hdmi)
+{
+	const unsigned int freqs[] = {
+		32000, 44100, 48000, 88200, 96000, 176400, 192000
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(freqs); i++) {
+		unsigned int f = freqs[i];
+		unsigned int eight_half;
+		unsigned long value;
+		unsigned int delta;
+
+		if (f > 96000)
+			delta = 2;
+		else if (f > 480000)
+			delta = 6;
+		else
+			delta = 9;
+
+		eight_half = (8 * HDMI_AUDIOCLK_FREQ) / (f * 128);
+		value = AUDIO_FS_LOW(eight_half - delta) |
+			AUDIO_FS_HIGH(eight_half + delta);
+		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_FS(i));
+	}
+}
+
+static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
+{
+	struct device_node *node = hdmi->dev->of_node;
+	const struct tegra_hdmi_audio_config *config;
+	unsigned int offset = 0;
+	unsigned long value;
+
+	switch (hdmi->audio_source) {
+	case HDA:
+		value = AUDIO_CNTRL0_SOURCE_SELECT_HDAL;
+		break;
+
+	case SPDIF:
+		value = AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
+		break;
+
+	default:
+		value = AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
+		break;
+	}
+
+	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
+		value |= AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
+			 AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
+		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
+	} else {
+		value |= AUDIO_CNTRL0_INJECT_NULLSMPL;
+		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
+
+		value = AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
+			AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
+		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
+	}
+
+	config = tegra_hdmi_get_audio_config(hdmi->audio_freq, pclk);
+	if (!config) {
+		dev_err(hdmi->dev, "cannot set audio to %u at %u pclk\n",
+			hdmi->audio_freq, pclk);
+		return -EINVAL;
+	}
+
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_HDMI_ACR_CTRL);
+
+	value = AUDIO_N_RESETF | AUDIO_N_GENERATE_ALTERNATE |
+		AUDIO_N_VALUE(config->n - 1);
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
+
+	tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config->n) | ACR_ENABLE,
+			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
+
+	value = ACR_SUBPACK_CTS(config->cts);
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
+
+	value = SPARE_HW_CTS | SPARE_FORCE_SW_CTS | SPARE_CTS_RESET_VAL(1);
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_SPARE);
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_AUDIO_N);
+	value &= ~AUDIO_N_RESETF;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
+
+	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
+		switch (hdmi->audio_freq) {
+		case 32000:
+			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320;
+			break;
+
+		case 44100:
+			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441;
+			break;
+
+		case 48000:
+			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480;
+			break;
+
+		case 88200:
+			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882;
+			break;
+
+		case 96000:
+			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960;
+			break;
+
+		case 176400:
+			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764;
+			break;
+
+		case 192000:
+			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920;
+			break;
+		}
+
+		tegra_hdmi_writel(hdmi, config->aval, offset);
+	}
+
+	tegra_hdmi_setup_audio_fs_tables(hdmi);
+
+	return 0;
+}
+
+static void tegra_hdmi_write_infopack(struct tegra_hdmi *hdmi,
+				      unsigned int offset, u8 type,
+				      u8 version, void *data, size_t size)
+{
+	unsigned long value;
+	u8 *ptr = data;
+	u32 subpack[2];
+	size_t i;
+	u8 csum;
+
+	/* first byte of data is the checksum */
+	csum = type + version + size - 1;
+
+	for (i = 1; i < size; i++)
+		csum += ptr[i];
+
+	ptr[0] = 0x100 - csum;
+
+	value = INFOFRAME_HEADER_TYPE(type) |
+		INFOFRAME_HEADER_VERSION(version) |
+		INFOFRAME_HEADER_LEN(size - 1);
+	tegra_hdmi_writel(hdmi, value, offset);
+
+	/* The audio inforame only has one set of subpack registers.  The hdmi
+	 * block pads the rest of the data as per the spec so we have to fixup
+	 * the length before filling in the subpacks.
+	 */
+	if (offset == HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER)
+		size = 6;
+
+	/* each subpack 7 bytes devided into:
+	 *   subpack_low - bytes 0 - 3
+	 *   subpack_high - bytes 4 - 6 (with byte 7 padded to 0x00)
+	 */
+	for (i = 0; i < size; i++) {
+		size_t index = i % 7;
+
+		if (index == 0)
+			memset(subpack, 0x0, sizeof(subpack));
+
+		((u8 *)subpack)[index] = ptr[i];
+
+		if (index == 6 || (i + 1 == size)) {
+			unsigned int reg = offset + 1 + (i / 7) * 2;
+
+			tegra_hdmi_writel(hdmi, subpack[0], reg);
+			tegra_hdmi_writel(hdmi, subpack[1], reg + 1);
+		}
+	}
+}
+
+static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
+					   struct drm_display_mode *mode)
+{
+	struct hdmi_avi_infoframe frame;
+	unsigned int h_front_porch;
+	unsigned int hsize = 16;
+	unsigned int vsize = 9;
+
+	if (hdmi->dvi) {
+		tegra_hdmi_writel(hdmi, 0,
+				  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+		return;
+	}
+
+	h_front_porch = mode->htotal - mode->hsync_end;
+	memset(&frame, 0, sizeof(frame));
+	frame.r = HDMI_AVI_R_SAME;
+
+	switch (mode->vdisplay) {
+	case 480:
+		if (mode->hdisplay == 640) {
+			frame.m = HDMI_AVI_M_4_3;
+			frame.vic = 1;
+		} else {
+			frame.m = HDMI_AVI_M_16_9;
+			frame.vic = 3;
+		}
+		break;
+
+	case 576:
+		if (((hsize * 10) / vsize) > 14) {
+			frame.m = HDMI_AVI_M_16_9;
+			frame.vic = 18;
+		} else {
+			frame.m = HDMI_AVI_M_4_3;
+			frame.vic = 17;
+		}
+		break;
+
+	case 720:
+	case 1470: /* stereo mode */
+		frame.m = HDMI_AVI_M_16_9;
+
+		if (h_front_porch == 110)
+			frame.vic = 4;
+		else
+			frame.vic = 19;
+		break;
+
+	case 1080:
+	case 2205: /* stereo mode */
+		frame.m = HDMI_AVI_M_16_9;
+
+		switch (h_front_porch) {
+		case 88:
+			frame.vic = 16;
+			break;
+
+		case 528:
+			frame.vic = 31;
+			break;
+
+		default:
+			frame.vic = 32;
+			break;
+		}
+		break;
+
+	default:
+		frame.m = HDMI_AVI_M_16_9;
+		frame.vic = 0;
+		break;
+	}
+
+	tegra_hdmi_write_infopack(hdmi, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER,
+				  HDMI_INFOFRAME_TYPE_AVI, HDMI_AVI_VERSION,
+				  &frame, sizeof(frame));
+
+	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
+			  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+}
+
+static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
+{
+	struct hdmi_audio_infoframe frame;
+
+	if (hdmi->dvi) {
+		tegra_hdmi_writel(hdmi, 0,
+				  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+		return;
+	}
+
+	memset(&frame, 0, sizeof(frame));
+	frame.cc = HDMI_AUDIO_CC_2;
+
+	tegra_hdmi_write_infopack(hdmi,
+				  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER,
+				  HDMI_INFOFRAME_TYPE_AUDIO,
+				  HDMI_AUDIO_VERSION,
+				  &frame, sizeof(frame));
+
+	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
+			  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+}
+
+static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
+{
+	struct hdmi_stereo_infoframe frame;
+	unsigned long value;
+
+	if (!hdmi->stereo) {
+		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+		value &= ~GENERIC_CTRL_ENABLE;
+		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+		return;
+	}
+
+	memset(&frame, 0, sizeof(frame));
+	frame.regid0 = 0x03;
+	frame.regid1 = 0x0c;
+	frame.regid2 = 0x00;
+	frame.hdmi_video_format = 2;
+
+	/* TODO: 74 MHz limit? */
+	if (1) {
+		frame._3d_structure = 0;
+	} else {
+		frame._3d_structure = 8;
+		frame._3d_ext_data = 0;
+	}
+
+	tegra_hdmi_write_infopack(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_HEADER,
+				  HDMI_INFOFRAME_TYPE_VENDOR,
+				  HDMI_VENDOR_VERSION, &frame, 6);
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	value |= GENERIC_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+}
+
+static void tegra_hdmi_setup_tmds(struct tegra_hdmi *hdmi,
+				  const struct tmds_config *tmds)
+{
+	unsigned long value;
+
+	tegra_hdmi_writel(hdmi, tmds->pll0, HDMI_NV_PDISP_SOR_PLL0);
+	tegra_hdmi_writel(hdmi, tmds->pll1, HDMI_NV_PDISP_SOR_PLL1);
+	tegra_hdmi_writel(hdmi, tmds->pe_current, HDMI_NV_PDISP_PE_CURRENT);
+
+	value = tmds->drive_current | DRIVE_CURRENT_FUSE_OVERRIDE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT);
+}
+
+static int tegra_output_hdmi_enable(struct tegra_output *output)
+{
+	unsigned int h_sync_width, h_front_porch, h_back_porch, i, rekey;
+	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
+	struct drm_display_mode *mode = &dc->base.mode;
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+	struct device_node *node = hdmi->dev->of_node;
+	unsigned int pulse_start, div82, pclk;
+	const struct tmds_config *tmds;
+	unsigned int num_tmds;
+	unsigned long value;
+	int retries = 1000;
+	int err;
+
+	pclk = mode->clock * 1000;
+	h_sync_width = mode->hsync_end - mode->hsync_start;
+	h_front_porch = mode->htotal - mode->hsync_end;
+	h_back_porch = mode->hsync_start - mode->hdisplay;
+
+	err = regulator_enable(hdmi->vdd);
+	if (err < 0) {
+		dev_err(hdmi->dev, "failed to enable VDD regulator: %d\n", err);
+		return err;
+	}
+
+	err = regulator_enable(hdmi->pll);
+	if (err < 0) {
+		dev_err(hdmi->dev, "failed to enable PLL regulator: %d\n", err);
+		return err;
+	}
+
+	/*
+	 * This assumes that the display controller will divide its parent
+	 * clock by 2 to generate the pixel clock.
+	 */
+	err = tegra_output_setup_clock(output, hdmi->clk, pclk * 2);
+	if (err < 0) {
+		dev_err(hdmi->dev, "failed to setup clock: %d\n", err);
+		return err;
+	}
+
+	err = clk_set_rate(hdmi->clk, pclk);
+	if (err < 0)
+		return err;
+
+	err = clk_enable(hdmi->clk);
+	if (err < 0) {
+		dev_err(hdmi->dev, "failed to enable clock: %d\n", err);
+		return err;
+	}
+
+	tegra_periph_reset_assert(hdmi->clk);
+	usleep_range(1000, 2000);
+	tegra_periph_reset_deassert(hdmi->clk);
+
+	tegra_dc_writel(dc, VSYNC_H_POSITION(1),
+			DC_DISP_DISP_TIMING_OPTIONS);
+	tegra_dc_writel(dc, DITHER_CONTROL_DISABLE | BASE_COLOR_SIZE888,
+			DC_DISP_DISP_COLOR_CONTROL);
+
+	/* video_preamble uses h_pulse2 */
+	pulse_start = 1 + h_sync_width + h_back_porch - 10;
+
+	tegra_dc_writel(dc, H_PULSE_2_ENABLE, DC_DISP_DISP_SIGNAL_OPTIONS0);
+
+	value = PULSE_MODE_NORMAL | PULSE_POLARITY_HIGH | PULSE_QUAL_VACTIVE |
+		PULSE_LAST_END_A;
+	tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_CONTROL);
+
+	value = PULSE_START(pulse_start) | PULSE_END(pulse_start + 8);
+	tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_POSITION_A);
+
+	value = VSYNC_WINDOW_END(0x210) | VSYNC_WINDOW_START(0x200) |
+		VSYNC_WINDOW_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_VSYNC_WINDOW);
+
+	if (dc->pipe)
+		value = HDMI_SRC_DISPLAYB;
+	else
+		value = HDMI_SRC_DISPLAYA;
+
+	if ((mode->hdisplay == 720) && ((mode->vdisplay == 480) ||
+					(mode->vdisplay == 576)))
+		tegra_hdmi_writel(hdmi,
+				  value | ARM_VIDEO_RANGE_FULL,
+				  HDMI_NV_PDISP_INPUT_CONTROL);
+	else
+		tegra_hdmi_writel(hdmi,
+				  value | ARM_VIDEO_RANGE_LIMITED,
+				  HDMI_NV_PDISP_INPUT_CONTROL);
+
+	div82 = clk_get_rate(hdmi->clk) / 1000000 * 4;
+	value = SOR_REFCLK_DIV_INT(div82 >> 2) | SOR_REFCLK_DIV_FRAC(div82);
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_REFCLK);
+
+	if (!hdmi->dvi) {
+		err = tegra_hdmi_setup_audio(hdmi, pclk);
+		if (err < 0)
+			hdmi->dvi = true;
+	}
+
+	if (of_device_is_compatible(node, "nvidia,tegra20-hdmi")) {
+		/*
+		 * TODO: add ELD support
+		 */
+	}
+
+	rekey = HDMI_REKEY_DEFAULT;
+	value = HDMI_CTRL_REKEY(rekey);
+	value |= HDMI_CTRL_MAX_AC_PACKET((h_sync_width + h_back_porch +
+					  h_front_porch - rekey - 18) / 32);
+
+	if (!hdmi->dvi)
+		value |= HDMI_CTRL_ENABLE;
+
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_CTRL);
+
+	if (hdmi->dvi)
+		tegra_hdmi_writel(hdmi, 0x0,
+				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	else
+		tegra_hdmi_writel(hdmi, GENERIC_CTRL_AUDIO,
+				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+
+	tegra_hdmi_setup_avi_infoframe(hdmi, mode);
+	tegra_hdmi_setup_audio_infoframe(hdmi);
+	tegra_hdmi_setup_stereo_infoframe(hdmi);
+
+	/* TMDS CONFIG */
+	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
+		num_tmds = ARRAY_SIZE(tegra3_tmds_config);
+		tmds = tegra3_tmds_config;
+	} else {
+		num_tmds = ARRAY_SIZE(tegra2_tmds_config);
+		tmds = tegra2_tmds_config;
+	}
+
+	for (i = 0; i < num_tmds; i++) {
+		if (pclk <= tmds[i].pclk) {
+			tegra_hdmi_setup_tmds(hdmi, &tmds[i]);
+			break;
+		}
+	}
+
+	tegra_hdmi_writel(hdmi,
+			  SOR_SEQ_CTL_PU_PC(0) |
+			  SOR_SEQ_PU_PC_ALT(0) |
+			  SOR_SEQ_PD_PC(8) |
+			  SOR_SEQ_PD_PC_ALT(8),
+			  HDMI_NV_PDISP_SOR_SEQ_CTL);
+
+	value = SOR_SEQ_INST_WAIT_TIME(1) |
+		SOR_SEQ_INST_WAIT_UNITS_VSYNC |
+		SOR_SEQ_INST_HALT |
+		SOR_SEQ_INST_PIN_A_LOW |
+		SOR_SEQ_INST_PIN_B_LOW |
+		SOR_SEQ_INST_DRIVE_PWM_OUT_LO;
+
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_SEQ_INST(0));
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_SEQ_INST(8));
+
+	value = 0x1c800;
+	value &= ~SOR_CSTM_ROTCLK(~0);
+	value |= SOR_CSTM_ROTCLK(2);
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_CSTM);
+
+	tegra_dc_writel(dc, DISP_CTRL_MODE_STOP, DC_CMD_DISPLAY_COMMAND);
+	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
+
+	/* start SOR */
+	tegra_hdmi_writel(hdmi,
+			  SOR_PWR_NORMAL_STATE_PU |
+			  SOR_PWR_NORMAL_START_NORMAL |
+			  SOR_PWR_SAFE_STATE_PD |
+			  SOR_PWR_SETTING_NEW_TRIGGER,
+			  HDMI_NV_PDISP_SOR_PWR);
+	tegra_hdmi_writel(hdmi,
+			  SOR_PWR_NORMAL_STATE_PU |
+			  SOR_PWR_NORMAL_START_NORMAL |
+			  SOR_PWR_SAFE_STATE_PD |
+			  SOR_PWR_SETTING_NEW_DONE,
+			  HDMI_NV_PDISP_SOR_PWR);
+
+	do {
+		BUG_ON(--retries < 0);
+		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_PWR);
+	} while (value & SOR_PWR_SETTING_NEW_PENDING);
+
+	value = SOR_STATE_ASY_CRCMODE_COMPLETE |
+		SOR_STATE_ASY_OWNER_HEAD0 |
+		SOR_STATE_ASY_SUBOWNER_BOTH |
+		SOR_STATE_ASY_PROTOCOL_SINGLE_TMDS_A |
+		SOR_STATE_ASY_DEPOL_POS;
+
+	/* setup sync polarities */
+	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+		value |= SOR_STATE_ASY_HSYNCPOL_POS;
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		value |= SOR_STATE_ASY_HSYNCPOL_NEG;
+
+	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+		value |= SOR_STATE_ASY_VSYNCPOL_POS;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		value |= SOR_STATE_ASY_VSYNCPOL_NEG;
+
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_STATE2);
+
+	value = SOR_STATE_ASY_HEAD_OPMODE_AWAKE | SOR_STATE_ASY_ORMODE_NORMAL;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_STATE1);
+
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_SOR_STATE0);
+	tegra_hdmi_writel(hdmi, SOR_STATE_UPDATE, HDMI_NV_PDISP_SOR_STATE0);
+	tegra_hdmi_writel(hdmi, value | SOR_STATE_ATTACHED,
+			  HDMI_NV_PDISP_SOR_STATE1);
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_SOR_STATE0);
+
+	tegra_dc_writel(dc, HDMI_ENABLE, DC_DISP_DISP_WIN_OPTIONS);
+
+	value = PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
+		PW4_ENABLE | PM0_ENABLE | PM1_ENABLE;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
+
+	value = DISP_CTRL_MODE_C_DISPLAY;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND);
+
+	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
+
+	/* TODO: add HDCP support */
+
+	return 0;
+}
+
+static int tegra_output_hdmi_disable(struct tegra_output *output)
+{
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+
+	tegra_periph_reset_assert(hdmi->clk);
+	clk_disable(hdmi->clk);
+	regulator_disable(hdmi->pll);
+	regulator_disable(hdmi->vdd);
+
+	return 0;
+}
+
+static int tegra_output_hdmi_setup_clock(struct tegra_output *output,
+					 struct clk *clk, unsigned long pclk)
+{
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+	struct clk *base;
+	int err;
+
+	err = clk_set_parent(clk, hdmi->clk_parent);
+	if (err < 0) {
+		dev_err(output->dev, "failed to set parent: %d\n", err);
+		return err;
+	}
+
+	base = clk_get_parent(hdmi->clk_parent);
+
+	/*
+	 * This assumes that the parent clock is pll_d_out0 or pll_d2_out
+	 * respectively, each of which divides the base pll_d by 2.
+	 */
+	err = clk_set_rate(base, pclk * 2);
+	if (err < 0)
+		dev_err(output->dev,
+			"failed to set base clock rate to %lu Hz\n",
+			pclk * 2);
+
+	return 0;
+}
+
+static int tegra_output_hdmi_check_mode(struct tegra_output *output,
+					struct drm_display_mode *mode,
+					enum drm_mode_status *status)
+{
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+	unsigned long pclk = mode->clock * 1000;
+	struct clk *parent;
+	long err;
+
+	parent = clk_get_parent(hdmi->clk_parent);
+
+	err = clk_round_rate(parent, pclk * 4);
+	if (err < 0)
+		*status = MODE_NOCLOCK;
+	else
+		*status = MODE_OK;
+
+	return 0;
+}
+
+static const struct tegra_output_ops hdmi_ops = {
+	.enable = tegra_output_hdmi_enable,
+	.disable = tegra_output_hdmi_disable,
+	.setup_clock = tegra_output_hdmi_setup_clock,
+	.check_mode = tegra_output_hdmi_check_mode,
+};
+
+static int tegra_hdmi_show_regs(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct tegra_hdmi *hdmi = node->info_ent->data;
+
+#define DUMP_REG(name)						\
+	seq_printf(s, "%-56s %#05x %08lx\n", #name, name,	\
+		tegra_hdmi_readl(hdmi, name))
+
+	DUMP_REG(HDMI_CTXSW);
+	DUMP_REG(HDMI_NV_PDISP_SOR_STATE0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_STATE1);
+	DUMP_REG(HDMI_NV_PDISP_SOR_STATE2);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AN_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AN_LSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CN_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CN_LSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AKSV_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AKSV_LSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_BKSV_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_BKSV_LSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CKSV_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CKSV_LSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_DKSV_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_DKSV_LSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CMODE);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_MPRIME_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_MPRIME_LSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB2);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB1);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_RI);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CS_MSB);
+	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CS_LSB);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU0);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU_RDATA0);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU1);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU2);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_STATUS);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_STATUS);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_STATUS);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_HEADER);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_LOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_HIGH);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_VSYNC_KEEPOUT);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_VSYNC_WINDOW);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_STATUS);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_SUBPACK);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS1);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS2);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU0);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU1);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU1_RDATA);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_SPARE);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS1);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS2);
+	DUMP_REG(HDMI_NV_PDISP_HDMI_HDCPRIF_ROM_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_SOR_CAP);
+	DUMP_REG(HDMI_NV_PDISP_SOR_PWR);
+	DUMP_REG(HDMI_NV_PDISP_SOR_TEST);
+	DUMP_REG(HDMI_NV_PDISP_SOR_PLL0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_PLL1);
+	DUMP_REG(HDMI_NV_PDISP_SOR_PLL2);
+	DUMP_REG(HDMI_NV_PDISP_SOR_CSTM);
+	DUMP_REG(HDMI_NV_PDISP_SOR_LVDS);
+	DUMP_REG(HDMI_NV_PDISP_SOR_CRCA);
+	DUMP_REG(HDMI_NV_PDISP_SOR_CRCB);
+	DUMP_REG(HDMI_NV_PDISP_SOR_BLANK);
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_CTL);
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(0));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(1));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(2));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(3));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(4));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(5));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(6));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(7));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(8));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(9));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(10));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(11));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(12));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(13));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(14));
+	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(15));
+	DUMP_REG(HDMI_NV_PDISP_SOR_VCRCA0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_VCRCA1);
+	DUMP_REG(HDMI_NV_PDISP_SOR_CCRCA0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_CCRCA1);
+	DUMP_REG(HDMI_NV_PDISP_SOR_EDATAA0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_EDATAA1);
+	DUMP_REG(HDMI_NV_PDISP_SOR_COUNTA0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_COUNTA1);
+	DUMP_REG(HDMI_NV_PDISP_SOR_DEBUGA0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_DEBUGA1);
+	DUMP_REG(HDMI_NV_PDISP_SOR_TRIG);
+	DUMP_REG(HDMI_NV_PDISP_SOR_MSCHECK);
+	DUMP_REG(HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT);
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG0);
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG1);
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG2);
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(0));
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(1));
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(2));
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(3));
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(4));
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(5));
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(6));
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_PULSE_WIDTH);
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_THRESHOLD);
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_CNTRL0);
+	DUMP_REG(HDMI_NV_PDISP_AUDIO_N);
+	DUMP_REG(HDMI_NV_PDISP_HDCPRIF_ROM_TIMING);
+	DUMP_REG(HDMI_NV_PDISP_SOR_REFCLK);
+	DUMP_REG(HDMI_NV_PDISP_CRC_CONTROL);
+	DUMP_REG(HDMI_NV_PDISP_INPUT_CONTROL);
+	DUMP_REG(HDMI_NV_PDISP_SCRATCH);
+	DUMP_REG(HDMI_NV_PDISP_PE_CURRENT);
+	DUMP_REG(HDMI_NV_PDISP_KEY_CTRL);
+	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG0);
+	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG1);
+	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG2);
+	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_0);
+	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_1);
+	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_2);
+	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_3);
+	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG);
+	DUMP_REG(HDMI_NV_PDISP_KEY_SKEY_INDEX);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
+
+#undef DUMP_REG
+
+	return 0;
+}
+
+static struct drm_info_list debugfs_files[] = {
+	{ "regs", tegra_hdmi_show_regs, 0, NULL },
+};
+
+static int tegra_hdmi_debugfs_init(struct tegra_hdmi *hdmi,
+				   struct drm_minor *minor)
+{
+	unsigned int i;
+	int err;
+
+	hdmi->debugfs = debugfs_create_dir("hdmi", minor->debugfs_root);
+	if (!hdmi->debugfs)
+		return -ENOMEM;
+
+	hdmi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
+				      GFP_KERNEL);
+	if (!hdmi->debugfs_files) {
+		err = -ENOMEM;
+		goto remove;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
+		hdmi->debugfs_files[i].data = hdmi;
+
+	err = drm_debugfs_create_files(hdmi->debugfs_files,
+				       ARRAY_SIZE(debugfs_files),
+				       hdmi->debugfs, minor);
+	if (err < 0)
+		goto free;
+
+	hdmi->minor = minor;
+
+	return 0;
+
+free:
+	kfree(hdmi->debugfs_files);
+	hdmi->debugfs_files = NULL;
+remove:
+	debugfs_remove(hdmi->debugfs);
+	hdmi->debugfs = NULL;
+
+	return err;
+}
+
+static int tegra_hdmi_debugfs_exit(struct tegra_hdmi *hdmi)
+{
+	drm_debugfs_remove_files(hdmi->debugfs_files, ARRAY_SIZE(debugfs_files),
+				 hdmi->minor);
+	hdmi->minor = NULL;
+
+	kfree(hdmi->debugfs_files);
+	hdmi->debugfs_files = NULL;
+
+	debugfs_remove(hdmi->debugfs);
+	hdmi->debugfs = NULL;
+
+	return 0;
+}
+
+static int tegra_hdmi_drm_init(struct host1x_client *client,
+			       struct drm_device *drm)
+{
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	int err;
+
+	hdmi->output.type = TEGRA_OUTPUT_HDMI;
+	hdmi->output.dev = client->dev;
+	hdmi->output.ops = &hdmi_ops;
+
+	err = tegra_output_init(drm, &hdmi->output);
+	if (err < 0) {
+		dev_err(client->dev, "output setup failed: %d\n", err);
+		return err;
+	}
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		err = tegra_hdmi_debugfs_init(hdmi, drm->primary);
+		if (err < 0)
+			dev_err(client->dev, "debugfs setup failed: %d\n", err);
+	}
+
+	return 0;
+}
+
+static int tegra_hdmi_drm_exit(struct host1x_client *client)
+{
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	int err;
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		err = tegra_hdmi_debugfs_exit(hdmi);
+		if (err < 0)
+			dev_err(client->dev, "debugfs cleanup failed: %d\n",
+				err);
+	}
+
+	err = tegra_output_disable(&hdmi->output);
+	if (err < 0) {
+		dev_err(client->dev, "output failed to disable: %d\n", err);
+		return err;
+	}
+
+	err = tegra_output_exit(&hdmi->output);
+	if (err < 0) {
+		dev_err(client->dev, "output cleanup failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct host1x_client_ops hdmi_client_ops = {
+	.drm_init = tegra_hdmi_drm_init,
+	.drm_exit = tegra_hdmi_drm_exit,
+};
+
+static int tegra_hdmi_probe(struct platform_device *pdev)
+{
+	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct tegra_hdmi *hdmi;
+	struct resource *regs;
+	int err;
+
+	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
+	if (!hdmi)
+		return -ENOMEM;
+
+	hdmi->dev = &pdev->dev;
+	hdmi->audio_source = AUTO;
+	hdmi->audio_freq = 44100;
+	hdmi->stereo = false;
+	hdmi->dvi = false;
+
+	hdmi->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(hdmi->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(hdmi->clk);
+	}
+
+	err = clk_prepare(hdmi->clk);
+	if (err < 0)
+		return err;
+
+	hdmi->clk_parent = devm_clk_get(&pdev->dev, "parent");
+	if (IS_ERR(hdmi->clk_parent))
+		return PTR_ERR(hdmi->clk_parent);
+
+	err = clk_prepare(hdmi->clk_parent);
+	if (err < 0)
+		return err;
+
+	err = clk_set_parent(hdmi->clk, hdmi->clk_parent);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to setup clocks: %d\n", err);
+		return err;
+	}
+
+	hdmi->vdd = devm_regulator_get(&pdev->dev, "vdd");
+	if (IS_ERR(hdmi->vdd)) {
+		dev_err(&pdev->dev, "failed to get VDD regulator\n");
+		return PTR_ERR(hdmi->vdd);
+	}
+
+	hdmi->pll = devm_regulator_get(&pdev->dev, "pll");
+	if (IS_ERR(hdmi->pll)) {
+		dev_err(&pdev->dev, "failed to get PLL regulator\n");
+		return PTR_ERR(hdmi->pll);
+	}
+
+	hdmi->output.dev = &pdev->dev;
+
+	err = tegra_output_parse_dt(&hdmi->output);
+	if (err < 0)
+		return err;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs)
+		return -ENXIO;
+
+	hdmi->regs = devm_request_and_ioremap(&pdev->dev, regs);
+	if (!hdmi->regs)
+		return -EADDRNOTAVAIL;
+
+	err = platform_get_irq(pdev, 0);
+	if (err < 0)
+		return err;
+
+	hdmi->irq = err;
+
+	hdmi->client.ops = &hdmi_client_ops;
+	INIT_LIST_HEAD(&hdmi->client.list);
+	hdmi->client.dev = &pdev->dev;
+
+	err = host1x_register_client(host1x, &hdmi->client);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
+			err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, hdmi);
+
+	return 0;
+}
+
+static int tegra_hdmi_remove(struct platform_device *pdev)
+{
+	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
+	int err;
+
+	err = host1x_unregister_client(host1x, &hdmi->client);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
+			err);
+		return err;
+	}
+
+	clk_unprepare(hdmi->clk_parent);
+	clk_unprepare(hdmi->clk);
+
+	return 0;
+}
+
+static struct of_device_id tegra_hdmi_of_match[] = {
+	{ .compatible = "nvidia,tegra30-hdmi", },
+	{ .compatible = "nvidia,tegra20-hdmi", },
+	{ },
+};
+
+struct platform_driver tegra_hdmi_driver = {
+	.driver = {
+		.name = "tegra-hdmi",
+		.owner = THIS_MODULE,
+		.of_match_table = tegra_hdmi_of_match,
+	},
+	.probe = tegra_hdmi_probe,
+	.remove = tegra_hdmi_remove,
+};

diff --git a/drivers/gpu/drm/tegra/hdmi.h b/drivers/gpu/drm/tegra/hdmi.h
new file mode 100644
index 0000000..1477f36
--- /dev/null
+++ b/drivers/gpu/drm/tegra/hdmi.h

@@ -0,0 +1,575 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TEGRA_HDMI_H
+#define TEGRA_HDMI_H 1
+
+#define HDMI_INFOFRAME_TYPE_VENDOR   0x81
+#define HDMI_INFOFRAME_TYPE_AVI      0x82
+#define HDMI_INFOFRAME_TYPE_SPD      0x83
+#define HDMI_INFOFRAME_TYPE_AUDIO    0x84
+#define HDMI_INFOFRAME_TYPE_MPEG_SRC 0x85
+#define HDMI_INFOFRAME_TYPE_NTSC_VBI 0x86
+
+/* all fields little endian */
+struct hdmi_avi_infoframe {
+	/* PB0 */
+	u8 csum;
+
+	/* PB1 */
+	unsigned s:2; /* scan information */
+	unsigned b:2; /* bar info data valid */
+	unsigned a:1; /* active info present */
+	unsigned y:2; /* RGB or YCbCr */
+	unsigned res1:1;
+
+	/* PB2 */
+	unsigned r:4; /* active format aspect ratio */
+	unsigned m:2; /* picture aspect ratio */
+	unsigned c:2; /* colorimetry */
+
+	/* PB3 */
+	unsigned sc:2;  /* scan information */
+	unsigned q:2;   /* quantization range */
+	unsigned ec:3;  /* extended colorimetry */
+	unsigned itc:1; /* it content */
+
+	/* PB4 */
+	unsigned vic:7; /* video format id code */
+	unsigned res4:1;
+
+	/* PB5 */
+	unsigned pr:4; /* pixel repetition factor */
+	unsigned cn:2; /* it content type*/
+	unsigned yq:2; /* ycc quantization range */
+
+	/* PB6-7 */
+	u16 top_bar_end_line;
+
+	/* PB8-9 */
+	u16 bot_bar_start_line;
+
+	/* PB10-11 */
+	u16 left_bar_end_pixel;
+
+	/* PB12-13 */
+	u16 right_bar_start_pixel;
+} __packed;
+
+#define HDMI_AVI_VERSION 0x02
+
+#define HDMI_AVI_Y_RGB       0x0
+#define HDMI_AVI_Y_YCBCR_422 0x1
+#define HDMI_AVI_Y_YCBCR_444 0x2
+
+#define HDMI_AVI_B_VERT  0x1
+#define HDMI_AVI_B_HORIZ 0x2
+
+#define HDMI_AVI_S_NONE      0x0
+#define HDMI_AVI_S_OVERSCAN  0x1
+#define HDMI_AVI_S_UNDERSCAN 0x2
+
+#define HDMI_AVI_C_NONE     0x0
+#define HDMI_AVI_C_SMPTE    0x1
+#define HDMI_AVI_C_ITU_R    0x2
+#define HDMI_AVI_C_EXTENDED 0x4
+
+#define HDMI_AVI_M_4_3  0x1
+#define HDMI_AVI_M_16_9 0x2
+
+#define HDMI_AVI_R_SAME        0x8
+#define HDMI_AVI_R_4_3_CENTER  0x9
+#define HDMI_AVI_R_16_9_CENTER 0xa
+#define HDMI_AVI_R_14_9_CENTER 0xb
+
+/* all fields little endian */
+struct hdmi_audio_infoframe {
+	/* PB0 */
+	u8 csum;
+
+	/* PB1 */
+	unsigned cc:3; /* channel count */
+	unsigned res1:1;
+	unsigned ct:4; /* coding type */
+
+	/* PB2 */
+	unsigned ss:2; /* sample size */
+	unsigned sf:3; /* sample frequency */
+	unsigned res2:3;
+
+	/* PB3 */
+	unsigned cxt:5; /* coding extention type */
+	unsigned res3:3;
+
+	/* PB4 */
+	u8 ca; /* channel/speaker allocation */
+
+	/* PB5 */
+	unsigned res5:3;
+	unsigned lsv:4; /* level shift value */
+	unsigned dm_inh:1; /* downmix inhibit */
+
+	/* PB6-10 reserved */
+	u8 res6;
+	u8 res7;
+	u8 res8;
+	u8 res9;
+	u8 res10;
+} __packed;
+
+#define HDMI_AUDIO_VERSION 0x01
+
+#define HDMI_AUDIO_CC_STREAM 0x0 /* specified by audio stream */
+#define HDMI_AUDIO_CC_2      0x1
+#define HDMI_AUDIO_CC_3      0x2
+#define HDMI_AUDIO_CC_4      0x3
+#define HDMI_AUDIO_CC_5      0x4
+#define HDMI_AUDIO_CC_6      0x5
+#define HDMI_AUDIO_CC_7      0x6
+#define HDMI_AUDIO_CC_8      0x7
+
+#define HDMI_AUDIO_CT_STREAM  0x0 /* specified by audio stream */
+#define HDMI_AUDIO_CT_PCM     0x1
+#define HDMI_AUDIO_CT_AC3     0x2
+#define HDMI_AUDIO_CT_MPEG1   0x3
+#define HDMI_AUDIO_CT_MP3     0x4
+#define HDMI_AUDIO_CT_MPEG2   0x5
+#define HDMI_AUDIO_CT_AAC_LC  0x6
+#define HDMI_AUDIO_CT_DTS     0x7
+#define HDMI_AUDIO_CT_ATRAC   0x8
+#define HDMI_AUDIO_CT_DSD     0x9
+#define HDMI_AUDIO_CT_E_AC3   0xa
+#define HDMI_AUDIO_CT_DTS_HD  0xb
+#define HDMI_AUDIO_CT_MLP     0xc
+#define HDMI_AUDIO_CT_DST     0xd
+#define HDMI_AUDIO_CT_WMA_PRO 0xe
+#define HDMI_AUDIO_CT_CXT     0xf
+
+#define HDMI_AUDIO_SF_STREAM 0x0 /* specified by audio stream */
+#define HDMI_AUIDO_SF_32K    0x1
+#define HDMI_AUDIO_SF_44_1K  0x2
+#define HDMI_AUDIO_SF_48K    0x3
+#define HDMI_AUDIO_SF_88_2K  0x4
+#define HDMI_AUDIO_SF_96K    0x5
+#define HDMI_AUDIO_SF_176_4K 0x6
+#define HDMI_AUDIO_SF_192K   0x7
+
+#define HDMI_AUDIO_SS_STREAM 0x0 /* specified by audio stream */
+#define HDMI_AUDIO_SS_16BIT  0x1
+#define HDMI_AUDIO_SS_20BIT  0x2
+#define HDMI_AUDIO_SS_24BIT  0x3
+
+#define HDMI_AUDIO_CXT_CT            0x0 /* refer to coding in CT */
+#define HDMI_AUDIO_CXT_HE_AAC        0x1
+#define HDMI_AUDIO_CXT_HE_AAC_V2     0x2
+#define HDMI_AUDIO_CXT_MPEG_SURROUND 0x3
+
+/* all fields little endian */
+struct hdmi_stereo_infoframe {
+	/* PB0 */
+	u8 csum;
+
+	/* PB1 */
+	u8 regid0;
+
+	/* PB2 */
+	u8 regid1;
+
+	/* PB3 */
+	u8 regid2;
+
+	/* PB4 */
+	unsigned res1:5;
+	unsigned hdmi_video_format:3;
+
+	/* PB5 */
+	unsigned res2:4;
+	unsigned _3d_structure:4;
+
+	/* PB6*/
+	unsigned res3:4;
+	unsigned _3d_ext_data:4;
+} __packed;
+
+#define HDMI_VENDOR_VERSION 0x01
+
+/* register definitions */
+#define HDMI_CTXSW						0x00
+
+#define HDMI_NV_PDISP_SOR_STATE0				0x01
+#define SOR_STATE_UPDATE (1 << 0)
+
+#define HDMI_NV_PDISP_SOR_STATE1				0x02
+#define SOR_STATE_ASY_HEAD_OPMODE_AWAKE (2 << 0)
+#define SOR_STATE_ASY_ORMODE_NORMAL     (1 << 2)
+#define SOR_STATE_ATTACHED              (1 << 3)
+
+#define HDMI_NV_PDISP_SOR_STATE2				0x03
+#define SOR_STATE_ASY_OWNER_NONE         (0 <<  0)
+#define SOR_STATE_ASY_OWNER_HEAD0        (1 <<  0)
+#define SOR_STATE_ASY_SUBOWNER_NONE      (0 <<  4)
+#define SOR_STATE_ASY_SUBOWNER_SUBHEAD0  (1 <<  4)
+#define SOR_STATE_ASY_SUBOWNER_SUBHEAD1  (2 <<  4)
+#define SOR_STATE_ASY_SUBOWNER_BOTH      (3 <<  4)
+#define SOR_STATE_ASY_CRCMODE_ACTIVE     (0 <<  6)
+#define SOR_STATE_ASY_CRCMODE_COMPLETE   (1 <<  6)
+#define SOR_STATE_ASY_CRCMODE_NON_ACTIVE (2 <<  6)
+#define SOR_STATE_ASY_PROTOCOL_SINGLE_TMDS_A (1 << 8)
+#define SOR_STATE_ASY_PROTOCOL_CUSTOM        (15 << 8)
+#define SOR_STATE_ASY_HSYNCPOL_POS       (0 << 12)
+#define SOR_STATE_ASY_HSYNCPOL_NEG       (1 << 12)
+#define SOR_STATE_ASY_VSYNCPOL_POS       (0 << 13)
+#define SOR_STATE_ASY_VSYNCPOL_NEG       (1 << 13)
+#define SOR_STATE_ASY_DEPOL_POS          (0 << 14)
+#define SOR_STATE_ASY_DEPOL_NEG          (1 << 14)
+
+#define HDMI_NV_PDISP_RG_HDCP_AN_MSB				0x04
+#define HDMI_NV_PDISP_RG_HDCP_AN_LSB				0x05
+#define HDMI_NV_PDISP_RG_HDCP_CN_MSB				0x06
+#define HDMI_NV_PDISP_RG_HDCP_CN_LSB				0x07
+#define HDMI_NV_PDISP_RG_HDCP_AKSV_MSB				0x08
+#define HDMI_NV_PDISP_RG_HDCP_AKSV_LSB				0x09
+#define HDMI_NV_PDISP_RG_HDCP_BKSV_MSB				0x0a
+#define HDMI_NV_PDISP_RG_HDCP_BKSV_LSB				0x0b
+#define HDMI_NV_PDISP_RG_HDCP_CKSV_MSB				0x0c
+#define HDMI_NV_PDISP_RG_HDCP_CKSV_LSB				0x0d
+#define HDMI_NV_PDISP_RG_HDCP_DKSV_MSB				0x0e
+#define HDMI_NV_PDISP_RG_HDCP_DKSV_LSB				0x0f
+#define HDMI_NV_PDISP_RG_HDCP_CTRL				0x10
+#define HDMI_NV_PDISP_RG_HDCP_CMODE				0x11
+#define HDMI_NV_PDISP_RG_HDCP_MPRIME_MSB			0x12
+#define HDMI_NV_PDISP_RG_HDCP_MPRIME_LSB			0x13
+#define HDMI_NV_PDISP_RG_HDCP_SPRIME_MSB			0x14
+#define HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB2			0x15
+#define HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB1			0x16
+#define HDMI_NV_PDISP_RG_HDCP_RI				0x17
+#define HDMI_NV_PDISP_RG_HDCP_CS_MSB				0x18
+#define HDMI_NV_PDISP_RG_HDCP_CS_LSB				0x19
+#define HDMI_NV_PDISP_HDMI_AUDIO_EMU0				0x1a
+#define HDMI_NV_PDISP_HDMI_AUDIO_EMU_RDATA0			0x1b
+#define HDMI_NV_PDISP_HDMI_AUDIO_EMU1				0x1c
+#define HDMI_NV_PDISP_HDMI_AUDIO_EMU2				0x1d
+
+#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL			0x1e
+#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_STATUS		0x1f
+#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER		0x20
+#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_LOW		0x21
+#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_HIGH	0x22
+#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL			0x23
+#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_STATUS			0x24
+#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER			0x25
+#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_LOW		0x26
+#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_HIGH		0x27
+#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_LOW		0x28
+#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_HIGH		0x29
+
+#define INFOFRAME_CTRL_ENABLE (1 << 0)
+
+#define INFOFRAME_HEADER_TYPE(x)    (((x) & 0xff) <<  0)
+#define INFOFRAME_HEADER_VERSION(x) (((x) & 0xff) <<  8)
+#define INFOFRAME_HEADER_LEN(x)     (((x) & 0x0f) << 16)
+
+#define HDMI_NV_PDISP_HDMI_GENERIC_CTRL				0x2a
+#define GENERIC_CTRL_ENABLE (1 <<  0)
+#define GENERIC_CTRL_OTHER  (1 <<  4)
+#define GENERIC_CTRL_SINGLE (1 <<  8)
+#define GENERIC_CTRL_HBLANK (1 << 12)
+#define GENERIC_CTRL_AUDIO  (1 << 16)
+
+#define HDMI_NV_PDISP_HDMI_GENERIC_STATUS			0x2b
+#define HDMI_NV_PDISP_HDMI_GENERIC_HEADER			0x2c
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_LOW			0x2d
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_HIGH		0x2e
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_LOW			0x2f
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_HIGH		0x30
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_LOW			0x31
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_HIGH		0x32
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_LOW			0x33
+#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_HIGH		0x34
+
+#define HDMI_NV_PDISP_HDMI_ACR_CTRL				0x35
+#define HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_LOW			0x36
+#define HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_HIGH		0x37
+#define HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW			0x38
+#define HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH		0x39
+#define HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_LOW			0x3a
+#define HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_HIGH		0x3b
+#define HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_LOW			0x3c
+#define HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_HIGH		0x3d
+#define HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_LOW			0x3e
+#define HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_HIGH		0x3f
+#define HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_LOW			0x40
+#define HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_HIGH		0x41
+#define HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_LOW			0x42
+#define HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_HIGH		0x43
+
+#define ACR_SUBPACK_CTS(x) (((x) & 0xffffff) << 8)
+#define ACR_SUBPACK_N(x)   (((x) & 0xffffff) << 0)
+#define ACR_ENABLE         (1 << 31)
+
+#define HDMI_NV_PDISP_HDMI_CTRL					0x44
+#define HDMI_CTRL_REKEY(x)         (((x) & 0x7f) <<  0)
+#define HDMI_CTRL_MAX_AC_PACKET(x) (((x) & 0x1f) << 16)
+#define HDMI_CTRL_ENABLE           (1 << 30)
+
+#define HDMI_NV_PDISP_HDMI_VSYNC_KEEPOUT			0x45
+#define HDMI_NV_PDISP_HDMI_VSYNC_WINDOW				0x46
+#define VSYNC_WINDOW_END(x)   (((x) & 0x3ff) <<  0)
+#define VSYNC_WINDOW_START(x) (((x) & 0x3ff) << 16)
+#define VSYNC_WINDOW_ENABLE   (1 << 31)
+
+#define HDMI_NV_PDISP_HDMI_GCP_CTRL				0x47
+#define HDMI_NV_PDISP_HDMI_GCP_STATUS				0x48
+#define HDMI_NV_PDISP_HDMI_GCP_SUBPACK				0x49
+#define HDMI_NV_PDISP_HDMI_CHANNEL_STATUS1			0x4a
+#define HDMI_NV_PDISP_HDMI_CHANNEL_STATUS2			0x4b
+#define HDMI_NV_PDISP_HDMI_EMU0					0x4c
+#define HDMI_NV_PDISP_HDMI_EMU1					0x4d
+#define HDMI_NV_PDISP_HDMI_EMU1_RDATA				0x4e
+
+#define HDMI_NV_PDISP_HDMI_SPARE				0x4f
+#define SPARE_HW_CTS           (1 << 0)
+#define SPARE_FORCE_SW_CTS     (1 << 1)
+#define SPARE_CTS_RESET_VAL(x) (((x) & 0x7) << 16)
+
+#define HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS1			0x50
+#define HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS2			0x51
+#define HDMI_NV_PDISP_HDMI_HDCPRIF_ROM_CTRL			0x53
+#define HDMI_NV_PDISP_SOR_CAP					0x54
+#define HDMI_NV_PDISP_SOR_PWR					0x55
+#define SOR_PWR_NORMAL_STATE_PD     (0 <<  0)
+#define SOR_PWR_NORMAL_STATE_PU     (1 <<  0)
+#define SOR_PWR_NORMAL_START_NORMAL (0 <<  1)
+#define SOR_PWR_NORMAL_START_ALT    (1 <<  1)
+#define SOR_PWR_SAFE_STATE_PD       (0 << 16)
+#define SOR_PWR_SAFE_STATE_PU       (1 << 16)
+#define SOR_PWR_SETTING_NEW_DONE    (0 << 31)
+#define SOR_PWR_SETTING_NEW_PENDING (1 << 31)
+#define SOR_PWR_SETTING_NEW_TRIGGER (1 << 31)
+
+#define HDMI_NV_PDISP_SOR_TEST					0x56
+#define HDMI_NV_PDISP_SOR_PLL0					0x57
+#define SOR_PLL_PWR            (1 << 0)
+#define SOR_PLL_PDBG           (1 << 1)
+#define SOR_PLL_VCAPD          (1 << 2)
+#define SOR_PLL_PDPORT         (1 << 3)
+#define SOR_PLL_RESISTORSEL    (1 << 4)
+#define SOR_PLL_PULLDOWN       (1 << 5)
+#define SOR_PLL_VCOCAP(x)      (((x) & 0xf) <<  8)
+#define SOR_PLL_BG_V17_S(x)    (((x) & 0xf) << 12)
+#define SOR_PLL_FILTER(x)      (((x) & 0xf) << 16)
+#define SOR_PLL_ICHPMP(x)      (((x) & 0xf) << 24)
+#define SOR_PLL_TX_REG_LOAD(x) (((x) & 0xf) << 28)
+
+#define HDMI_NV_PDISP_SOR_PLL1					0x58
+#define SOR_PLL_TMDS_TERM_ENABLE (1 << 8)
+#define SOR_PLL_TMDS_TERMADJ(x)  (((x) & 0xf) <<  9)
+#define SOR_PLL_LOADADJ(x)       (((x) & 0xf) << 20)
+#define SOR_PLL_PE_EN            (1 << 28)
+#define SOR_PLL_HALF_FULL_PE     (1 << 29)
+#define SOR_PLL_S_D_PIN_PE       (1 << 30)
+
+#define HDMI_NV_PDISP_SOR_PLL2					0x59
+
+#define HDMI_NV_PDISP_SOR_CSTM					0x5a
+#define SOR_CSTM_ROTCLK(x) (((x) & 0xf) << 24)
+
+#define HDMI_NV_PDISP_SOR_LVDS					0x5b
+#define HDMI_NV_PDISP_SOR_CRCA					0x5c
+#define HDMI_NV_PDISP_SOR_CRCB					0x5d
+#define HDMI_NV_PDISP_SOR_BLANK					0x5e
+#define HDMI_NV_PDISP_SOR_SEQ_CTL				0x5f
+#define SOR_SEQ_CTL_PU_PC(x) (((x) & 0xf) <<  0)
+#define SOR_SEQ_PU_PC_ALT(x) (((x) & 0xf) <<  4)
+#define SOR_SEQ_PD_PC(x)     (((x) & 0xf) <<  8)
+#define SOR_SEQ_PD_PC_ALT(x) (((x) & 0xf) << 12)
+#define SOR_SEQ_PC(x)        (((x) & 0xf) << 16)
+#define SOR_SEQ_STATUS       (1 << 28)
+#define SOR_SEQ_SWITCH       (1 << 30)
+
+#define HDMI_NV_PDISP_SOR_SEQ_INST(x)				(0x60 + (x))
+
+#define SOR_SEQ_INST_WAIT_TIME(x)     (((x) & 0x3ff) << 0)
+#define SOR_SEQ_INST_WAIT_UNITS_VSYNC (2 << 12)
+#define SOR_SEQ_INST_HALT             (1 << 15)
+#define SOR_SEQ_INST_PIN_A_LOW        (0 << 21)
+#define SOR_SEQ_INST_PIN_A_HIGH       (1 << 21)
+#define SOR_SEQ_INST_PIN_B_LOW        (0 << 22)
+#define SOR_SEQ_INST_PIN_B_HIGH       (1 << 22)
+#define SOR_SEQ_INST_DRIVE_PWM_OUT_LO (1 << 23)
+
+#define HDMI_NV_PDISP_SOR_VCRCA0				0x72
+#define HDMI_NV_PDISP_SOR_VCRCA1				0x73
+#define HDMI_NV_PDISP_SOR_CCRCA0				0x74
+#define HDMI_NV_PDISP_SOR_CCRCA1				0x75
+#define HDMI_NV_PDISP_SOR_EDATAA0				0x76
+#define HDMI_NV_PDISP_SOR_EDATAA1				0x77
+#define HDMI_NV_PDISP_SOR_COUNTA0				0x78
+#define HDMI_NV_PDISP_SOR_COUNTA1				0x79
+#define HDMI_NV_PDISP_SOR_DEBUGA0				0x7a
+#define HDMI_NV_PDISP_SOR_DEBUGA1				0x7b
+#define HDMI_NV_PDISP_SOR_TRIG					0x7c
+#define HDMI_NV_PDISP_SOR_MSCHECK				0x7d
+
+#define HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT			0x7e
+#define DRIVE_CURRENT_LANE0(x)      (((x) & 0x3f) <<  0)
+#define DRIVE_CURRENT_LANE1(x)      (((x) & 0x3f) <<  8)
+#define DRIVE_CURRENT_LANE2(x)      (((x) & 0x3f) << 16)
+#define DRIVE_CURRENT_LANE3(x)      (((x) & 0x3f) << 24)
+#define DRIVE_CURRENT_FUSE_OVERRIDE (1 << 31)
+
+#define DRIVE_CURRENT_1_500_mA  0x00
+#define DRIVE_CURRENT_1_875_mA  0x01
+#define DRIVE_CURRENT_2_250_mA  0x02
+#define DRIVE_CURRENT_2_625_mA  0x03
+#define DRIVE_CURRENT_3_000_mA  0x04
+#define DRIVE_CURRENT_3_375_mA  0x05
+#define DRIVE_CURRENT_3_750_mA  0x06
+#define DRIVE_CURRENT_4_125_mA  0x07
+#define DRIVE_CURRENT_4_500_mA  0x08
+#define DRIVE_CURRENT_4_875_mA  0x09
+#define DRIVE_CURRENT_5_250_mA  0x0a
+#define DRIVE_CURRENT_5_625_mA  0x0b
+#define DRIVE_CURRENT_6_000_mA  0x0c
+#define DRIVE_CURRENT_6_375_mA  0x0d
+#define DRIVE_CURRENT_6_750_mA  0x0e
+#define DRIVE_CURRENT_7_125_mA  0x0f
+#define DRIVE_CURRENT_7_500_mA  0x10
+#define DRIVE_CURRENT_7_875_mA  0x11
+#define DRIVE_CURRENT_8_250_mA  0x12
+#define DRIVE_CURRENT_8_625_mA  0x13
+#define DRIVE_CURRENT_9_000_mA  0x14
+#define DRIVE_CURRENT_9_375_mA  0x15
+#define DRIVE_CURRENT_9_750_mA  0x16
+#define DRIVE_CURRENT_10_125_mA 0x17
+#define DRIVE_CURRENT_10_500_mA 0x18
+#define DRIVE_CURRENT_10_875_mA 0x19
+#define DRIVE_CURRENT_11_250_mA 0x1a
+#define DRIVE_CURRENT_11_625_mA 0x1b
+#define DRIVE_CURRENT_12_000_mA 0x1c
+#define DRIVE_CURRENT_12_375_mA 0x1d
+#define DRIVE_CURRENT_12_750_mA 0x1e
+#define DRIVE_CURRENT_13_125_mA 0x1f
+#define DRIVE_CURRENT_13_500_mA 0x20
+#define DRIVE_CURRENT_13_875_mA 0x21
+#define DRIVE_CURRENT_14_250_mA 0x22
+#define DRIVE_CURRENT_14_625_mA 0x23
+#define DRIVE_CURRENT_15_000_mA 0x24
+#define DRIVE_CURRENT_15_375_mA 0x25
+#define DRIVE_CURRENT_15_750_mA 0x26
+#define DRIVE_CURRENT_16_125_mA 0x27
+#define DRIVE_CURRENT_16_500_mA 0x28
+#define DRIVE_CURRENT_16_875_mA 0x29
+#define DRIVE_CURRENT_17_250_mA 0x2a
+#define DRIVE_CURRENT_17_625_mA 0x2b
+#define DRIVE_CURRENT_18_000_mA 0x2c
+#define DRIVE_CURRENT_18_375_mA 0x2d
+#define DRIVE_CURRENT_18_750_mA 0x2e
+#define DRIVE_CURRENT_19_125_mA 0x2f
+#define DRIVE_CURRENT_19_500_mA 0x30
+#define DRIVE_CURRENT_19_875_mA 0x31
+#define DRIVE_CURRENT_20_250_mA 0x32
+#define DRIVE_CURRENT_20_625_mA 0x33
+#define DRIVE_CURRENT_21_000_mA 0x34
+#define DRIVE_CURRENT_21_375_mA 0x35
+#define DRIVE_CURRENT_21_750_mA 0x36
+#define DRIVE_CURRENT_22_125_mA 0x37
+#define DRIVE_CURRENT_22_500_mA 0x38
+#define DRIVE_CURRENT_22_875_mA 0x39
+#define DRIVE_CURRENT_23_250_mA 0x3a
+#define DRIVE_CURRENT_23_625_mA 0x3b
+#define DRIVE_CURRENT_24_000_mA 0x3c
+#define DRIVE_CURRENT_24_375_mA 0x3d
+#define DRIVE_CURRENT_24_750_mA 0x3e
+
+#define HDMI_NV_PDISP_AUDIO_DEBUG0				0x7f
+#define HDMI_NV_PDISP_AUDIO_DEBUG1				0x80
+#define HDMI_NV_PDISP_AUDIO_DEBUG2				0x81
+
+#define HDMI_NV_PDISP_AUDIO_FS(x)				(0x82 + (x))
+#define AUDIO_FS_LOW(x)  (((x) & 0xfff) <<  0)
+#define AUDIO_FS_HIGH(x) (((x) & 0xfff) << 16)
+
+#define HDMI_NV_PDISP_AUDIO_PULSE_WIDTH				0x89
+#define HDMI_NV_PDISP_AUDIO_THRESHOLD				0x8a
+#define HDMI_NV_PDISP_AUDIO_CNTRL0				0x8b
+#define AUDIO_CNTRL0_ERROR_TOLERANCE(x)  (((x) & 0xff) << 0)
+#define AUDIO_CNTRL0_SOURCE_SELECT_AUTO  (0 << 20)
+#define AUDIO_CNTRL0_SOURCE_SELECT_SPDIF (1 << 20)
+#define AUDIO_CNTRL0_SOURCE_SELECT_HDAL  (2 << 20)
+#define AUDIO_CNTRL0_FRAMES_PER_BLOCK(x) (((x) & 0xff) << 24)
+
+#define HDMI_NV_PDISP_AUDIO_N					0x8c
+#define AUDIO_N_VALUE(x)           (((x) & 0xfffff) << 0)
+#define AUDIO_N_RESETF             (1 << 20)
+#define AUDIO_N_GENERATE_NORMAL    (0 << 24)
+#define AUDIO_N_GENERATE_ALTERNATE (1 << 24)
+
+#define HDMI_NV_PDISP_HDCPRIF_ROM_TIMING			0x94
+#define HDMI_NV_PDISP_SOR_REFCLK				0x95
+#define SOR_REFCLK_DIV_INT(x)  (((x) & 0xff) << 8)
+#define SOR_REFCLK_DIV_FRAC(x) (((x) & 0x03) << 6)
+
+#define HDMI_NV_PDISP_CRC_CONTROL				0x96
+#define HDMI_NV_PDISP_INPUT_CONTROL				0x97
+#define HDMI_SRC_DISPLAYA       (0 << 0)
+#define HDMI_SRC_DISPLAYB       (1 << 0)
+#define ARM_VIDEO_RANGE_FULL    (0 << 1)
+#define ARM_VIDEO_RANGE_LIMITED (1 << 1)
+
+#define HDMI_NV_PDISP_SCRATCH					0x98
+#define HDMI_NV_PDISP_PE_CURRENT				0x99
+#define PE_CURRENT0(x) (((x) & 0xf) << 0)
+#define PE_CURRENT1(x) (((x) & 0xf) << 8)
+#define PE_CURRENT2(x) (((x) & 0xf) << 16)
+#define PE_CURRENT3(x) (((x) & 0xf) << 24)
+
+#define PE_CURRENT_0_0_mA 0x0
+#define PE_CURRENT_0_5_mA 0x1
+#define PE_CURRENT_1_0_mA 0x2
+#define PE_CURRENT_1_5_mA 0x3
+#define PE_CURRENT_2_0_mA 0x4
+#define PE_CURRENT_2_5_mA 0x5
+#define PE_CURRENT_3_0_mA 0x6
+#define PE_CURRENT_3_5_mA 0x7
+#define PE_CURRENT_4_0_mA 0x8
+#define PE_CURRENT_4_5_mA 0x9
+#define PE_CURRENT_5_0_mA 0xa
+#define PE_CURRENT_5_5_mA 0xb
+#define PE_CURRENT_6_0_mA 0xc
+#define PE_CURRENT_6_5_mA 0xd
+#define PE_CURRENT_7_0_mA 0xe
+#define PE_CURRENT_7_5_mA 0xf
+
+#define HDMI_NV_PDISP_KEY_CTRL					0x9a
+#define HDMI_NV_PDISP_KEY_DEBUG0				0x9b
+#define HDMI_NV_PDISP_KEY_DEBUG1				0x9c
+#define HDMI_NV_PDISP_KEY_DEBUG2				0x9d
+#define HDMI_NV_PDISP_KEY_HDCP_KEY_0				0x9e
+#define HDMI_NV_PDISP_KEY_HDCP_KEY_1				0x9f
+#define HDMI_NV_PDISP_KEY_HDCP_KEY_2				0xa0
+#define HDMI_NV_PDISP_KEY_HDCP_KEY_3				0xa1
+#define HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG				0xa2
+#define HDMI_NV_PDISP_KEY_SKEY_INDEX				0xa3
+
+#define HDMI_NV_PDISP_SOR_AUDIO_CNTRL0				0xac
+#define AUDIO_CNTRL0_INJECT_NULLSMPL (1 << 29)
+#define HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR			0xbc
+#define HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE			0xbd
+
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320    0xbf
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441    0xc0
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882    0xc1
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764    0xc2
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480    0xc3
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960    0xc4
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920    0xc5
+#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_DEFAULT 0xc5
+
+#endif /* TEGRA_HDMI_H */

diff --git a/drivers/gpu/drm/tegra/host1x.c b/drivers/gpu/drm/tegra/host1x.c
new file mode 100644
index 0000000..bdb97a5
--- /dev/null
+++ b/drivers/gpu/drm/tegra/host1x.c

@@ -0,0 +1,325 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "drm.h"
+
+struct host1x_drm_client {
+	struct host1x_client *client;
+	struct device_node *np;
+	struct list_head list;
+};
+
+static int host1x_add_drm_client(struct host1x *host1x, struct device_node *np)
+{
+	struct host1x_drm_client *client;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&client->list);
+	client->np = of_node_get(np);
+
+	list_add_tail(&client->list, &host1x->drm_clients);
+
+	return 0;
+}
+
+static int host1x_activate_drm_client(struct host1x *host1x,
+				      struct host1x_drm_client *drm,
+				      struct host1x_client *client)
+{
+	mutex_lock(&host1x->drm_clients_lock);
+	list_del_init(&drm->list);
+	list_add_tail(&drm->list, &host1x->drm_active);
+	drm->client = client;
+	mutex_unlock(&host1x->drm_clients_lock);
+
+	return 0;
+}
+
+static int host1x_remove_drm_client(struct host1x *host1x,
+				    struct host1x_drm_client *client)
+{
+	mutex_lock(&host1x->drm_clients_lock);
+	list_del_init(&client->list);
+	mutex_unlock(&host1x->drm_clients_lock);
+
+	of_node_put(client->np);
+	kfree(client);
+
+	return 0;
+}
+
+static int host1x_parse_dt(struct host1x *host1x)
+{
+	static const char * const compat[] = {
+		"nvidia,tegra20-dc",
+		"nvidia,tegra20-hdmi",
+		"nvidia,tegra30-dc",
+		"nvidia,tegra30-hdmi",
+	};
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(compat); i++) {
+		struct device_node *np;
+
+		for_each_child_of_node(host1x->dev->of_node, np) {
+			if (of_device_is_compatible(np, compat[i]) &&
+			    of_device_is_available(np)) {
+				err = host1x_add_drm_client(host1x, np);
+				if (err < 0)
+					return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_host1x_probe(struct platform_device *pdev)
+{
+	struct host1x *host1x;
+	struct resource *regs;
+	int err;
+
+	host1x = devm_kzalloc(&pdev->dev, sizeof(*host1x), GFP_KERNEL);
+	if (!host1x)
+		return -ENOMEM;
+
+	mutex_init(&host1x->drm_clients_lock);
+	INIT_LIST_HEAD(&host1x->drm_clients);
+	INIT_LIST_HEAD(&host1x->drm_active);
+	mutex_init(&host1x->clients_lock);
+	INIT_LIST_HEAD(&host1x->clients);
+	host1x->dev = &pdev->dev;
+
+	err = host1x_parse_dt(host1x);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to parse DT: %d\n", err);
+		return err;
+	}
+
+	host1x->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(host1x->clk))
+		return PTR_ERR(host1x->clk);
+
+	err = clk_prepare_enable(host1x->clk);
+	if (err < 0)
+		return err;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		err = -ENXIO;
+		goto err;
+	}
+
+	err = platform_get_irq(pdev, 0);
+	if (err < 0)
+		goto err;
+
+	host1x->syncpt = err;
+
+	err = platform_get_irq(pdev, 1);
+	if (err < 0)
+		goto err;
+
+	host1x->irq = err;
+
+	host1x->regs = devm_request_and_ioremap(&pdev->dev, regs);
+	if (!host1x->regs) {
+		err = -EADDRNOTAVAIL;
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, host1x);
+
+	return 0;
+
+err:
+	clk_disable_unprepare(host1x->clk);
+	return err;
+}
+
+static int tegra_host1x_remove(struct platform_device *pdev)
+{
+	struct host1x *host1x = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(host1x->clk);
+
+	return 0;
+}
+
+int host1x_drm_init(struct host1x *host1x, struct drm_device *drm)
+{
+	struct host1x_client *client;
+
+	mutex_lock(&host1x->clients_lock);
+
+	list_for_each_entry(client, &host1x->clients, list) {
+		if (client->ops && client->ops->drm_init) {
+			int err = client->ops->drm_init(client, drm);
+			if (err < 0) {
+				dev_err(host1x->dev,
+					"DRM setup failed for %s: %d\n",
+					dev_name(client->dev), err);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->clients_lock);
+
+	return 0;
+}
+
+int host1x_drm_exit(struct host1x *host1x)
+{
+	struct platform_device *pdev = to_platform_device(host1x->dev);
+	struct host1x_client *client;
+
+	if (!host1x->drm)
+		return 0;
+
+	mutex_lock(&host1x->clients_lock);
+
+	list_for_each_entry_reverse(client, &host1x->clients, list) {
+		if (client->ops && client->ops->drm_exit) {
+			int err = client->ops->drm_exit(client);
+			if (err < 0) {
+				dev_err(host1x->dev,
+					"DRM cleanup failed for %s: %d\n",
+					dev_name(client->dev), err);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->clients_lock);
+
+	drm_platform_exit(&tegra_drm_driver, pdev);
+	host1x->drm = NULL;
+
+	return 0;
+}
+
+int host1x_register_client(struct host1x *host1x, struct host1x_client *client)
+{
+	struct host1x_drm_client *drm, *tmp;
+	int err;
+
+	mutex_lock(&host1x->clients_lock);
+	list_add_tail(&client->list, &host1x->clients);
+	mutex_unlock(&host1x->clients_lock);
+
+	list_for_each_entry_safe(drm, tmp, &host1x->drm_clients, list)
+		if (drm->np == client->dev->of_node)
+			host1x_activate_drm_client(host1x, drm, client);
+
+	if (list_empty(&host1x->drm_clients)) {
+		struct platform_device *pdev = to_platform_device(host1x->dev);
+
+		err = drm_platform_init(&tegra_drm_driver, pdev);
+		if (err < 0) {
+			dev_err(host1x->dev, "drm_platform_init(): %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int host1x_unregister_client(struct host1x *host1x,
+			     struct host1x_client *client)
+{
+	struct host1x_drm_client *drm, *tmp;
+	int err;
+
+	list_for_each_entry_safe(drm, tmp, &host1x->drm_active, list) {
+		if (drm->client == client) {
+			err = host1x_drm_exit(host1x);
+			if (err < 0) {
+				dev_err(host1x->dev, "host1x_drm_exit(): %d\n",
+					err);
+				return err;
+			}
+
+			host1x_remove_drm_client(host1x, drm);
+			break;
+		}
+	}
+
+	mutex_lock(&host1x->clients_lock);
+	list_del_init(&client->list);
+	mutex_unlock(&host1x->clients_lock);
+
+	return 0;
+}
+
+static struct of_device_id tegra_host1x_of_match[] = {
+	{ .compatible = "nvidia,tegra30-host1x", },
+	{ .compatible = "nvidia,tegra20-host1x", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_host1x_of_match);
+
+struct platform_driver tegra_host1x_driver = {
+	.driver = {
+		.name = "tegra-host1x",
+		.owner = THIS_MODULE,
+		.of_match_table = tegra_host1x_of_match,
+	},
+	.probe = tegra_host1x_probe,
+	.remove = tegra_host1x_remove,
+};
+
+static int __init tegra_host1x_init(void)
+{
+	int err;
+
+	err = platform_driver_register(&tegra_host1x_driver);
+	if (err < 0)
+		return err;
+
+	err = platform_driver_register(&tegra_dc_driver);
+	if (err < 0)
+		goto unregister_host1x;
+
+	err = platform_driver_register(&tegra_hdmi_driver);
+	if (err < 0)
+		goto unregister_dc;
+
+	return 0;
+
+unregister_dc:
+	platform_driver_unregister(&tegra_dc_driver);
+unregister_host1x:
+	platform_driver_unregister(&tegra_host1x_driver);
+	return err;
+}
+module_init(tegra_host1x_init);
+
+static void __exit tegra_host1x_exit(void)
+{
+	platform_driver_unregister(&tegra_hdmi_driver);
+	platform_driver_unregister(&tegra_dc_driver);
+	platform_driver_unregister(&tegra_host1x_driver);
+}
+module_exit(tegra_host1x_exit);
+
+MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
+MODULE_DESCRIPTION("NVIDIA Tegra DRM driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
new file mode 100644
index 0000000..8140fc6
--- /dev/null
+++ b/drivers/gpu/drm/tegra/output.c

@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/of_i2c.h>
+
+#include "drm.h"
+
+static int tegra_connector_get_modes(struct drm_connector *connector)
+{
+	struct tegra_output *output = connector_to_output(connector);
+	struct edid *edid = NULL;
+	int err = 0;
+
+	if (output->edid)
+		edid = kmemdup(output->edid, sizeof(*edid), GFP_KERNEL);
+	else if (output->ddc)
+		edid = drm_get_edid(connector, output->ddc);
+
+	drm_mode_connector_update_edid_property(connector, edid);
+
+	if (edid) {
+		err = drm_add_edid_modes(connector, edid);
+		kfree(edid);
+	}
+
+	return err;
+}
+
+static int tegra_connector_mode_valid(struct drm_connector *connector,
+				      struct drm_display_mode *mode)
+{
+	struct tegra_output *output = connector_to_output(connector);
+	enum drm_mode_status status = MODE_OK;
+	int err;
+
+	err = tegra_output_check_mode(output, mode, &status);
+	if (err < 0)
+		return MODE_ERROR;
+
+	return status;
+}
+
+static struct drm_encoder *
+tegra_connector_best_encoder(struct drm_connector *connector)
+{
+	struct tegra_output *output = connector_to_output(connector);
+
+	return &output->encoder;
+}
+
+static const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = tegra_connector_get_modes,
+	.mode_valid = tegra_connector_mode_valid,
+	.best_encoder = tegra_connector_best_encoder,
+};
+
+static enum drm_connector_status
+tegra_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct tegra_output *output = connector_to_output(connector);
+	enum drm_connector_status status = connector_status_unknown;
+
+	if (gpio_is_valid(output->hpd_gpio)) {
+		if (gpio_get_value(output->hpd_gpio) == 0)
+			status = connector_status_disconnected;
+		else
+			status = connector_status_connected;
+	} else {
+		if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+			status = connector_status_connected;
+	}
+
+	return status;
+}
+
+static void tegra_connector_destroy(struct drm_connector *connector)
+{
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = tegra_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = tegra_connector_destroy,
+};
+
+static void tegra_encoder_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = tegra_encoder_destroy,
+};
+
+static void tegra_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+}
+
+static bool tegra_encoder_mode_fixup(struct drm_encoder *encoder,
+				     const struct drm_display_mode *mode,
+				     struct drm_display_mode *adjusted)
+{
+	return true;
+}
+
+static void tegra_encoder_prepare(struct drm_encoder *encoder)
+{
+}
+
+static void tegra_encoder_commit(struct drm_encoder *encoder)
+{
+}
+
+static void tegra_encoder_mode_set(struct drm_encoder *encoder,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *adjusted)
+{
+	struct tegra_output *output = encoder_to_output(encoder);
+	int err;
+
+	err = tegra_output_enable(output);
+	if (err < 0)
+		dev_err(encoder->dev->dev, "tegra_output_enable(): %d\n", err);
+}
+
+static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.dpms = tegra_encoder_dpms,
+	.mode_fixup = tegra_encoder_mode_fixup,
+	.prepare = tegra_encoder_prepare,
+	.commit = tegra_encoder_commit,
+	.mode_set = tegra_encoder_mode_set,
+};
+
+static irqreturn_t hpd_irq(int irq, void *data)
+{
+	struct tegra_output *output = data;
+
+	drm_helper_hpd_irq_event(output->connector.dev);
+
+	return IRQ_HANDLED;
+}
+
+int tegra_output_parse_dt(struct tegra_output *output)
+{
+	enum of_gpio_flags flags;
+	struct device_node *ddc;
+	size_t size;
+	int err;
+
+	if (!output->of_node)
+		output->of_node = output->dev->of_node;
+
+	output->edid = of_get_property(output->of_node, "nvidia,edid", &size);
+
+	ddc = of_parse_phandle(output->of_node, "nvidia,ddc-i2c-bus", 0);
+	if (ddc) {
+		output->ddc = of_find_i2c_adapter_by_node(ddc);
+		if (!output->ddc) {
+			err = -EPROBE_DEFER;
+			of_node_put(ddc);
+			return err;
+		}
+
+		of_node_put(ddc);
+	}
+
+	if (!output->edid && !output->ddc)
+		return -ENODEV;
+
+	output->hpd_gpio = of_get_named_gpio_flags(output->of_node,
+						   "nvidia,hpd-gpio", 0,
+						   &flags);
+
+	return 0;
+}
+
+int tegra_output_init(struct drm_device *drm, struct tegra_output *output)
+{
+	int connector, encoder, err;
+
+	if (gpio_is_valid(output->hpd_gpio)) {
+		unsigned long flags;
+
+		err = gpio_request_one(output->hpd_gpio, GPIOF_DIR_IN,
+				       "HDMI hotplug detect");
+		if (err < 0) {
+			dev_err(output->dev, "gpio_request_one(): %d\n", err);
+			return err;
+		}
+
+		err = gpio_to_irq(output->hpd_gpio);
+		if (err < 0) {
+			dev_err(output->dev, "gpio_to_irq(): %d\n", err);
+			goto free_hpd;
+		}
+
+		output->hpd_irq = err;
+
+		flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING |
+			IRQF_ONESHOT;
+
+		err = request_threaded_irq(output->hpd_irq, NULL, hpd_irq,
+					   flags, "hpd", output);
+		if (err < 0) {
+			dev_err(output->dev, "failed to request IRQ#%u: %d\n",
+				output->hpd_irq, err);
+			goto free_hpd;
+		}
+
+		output->connector.polled = DRM_CONNECTOR_POLL_HPD;
+	}
+
+	switch (output->type) {
+	case TEGRA_OUTPUT_RGB:
+		connector = DRM_MODE_CONNECTOR_LVDS;
+		encoder = DRM_MODE_ENCODER_LVDS;
+		break;
+
+	case TEGRA_OUTPUT_HDMI:
+		connector = DRM_MODE_CONNECTOR_HDMIA;
+		encoder = DRM_MODE_ENCODER_TMDS;
+		break;
+
+	default:
+		connector = DRM_MODE_CONNECTOR_Unknown;
+		encoder = DRM_MODE_ENCODER_NONE;
+		break;
+	}
+
+	drm_connector_init(drm, &output->connector, &connector_funcs,
+			   connector);
+	drm_connector_helper_add(&output->connector, &connector_helper_funcs);
+
+	drm_encoder_init(drm, &output->encoder, &encoder_funcs, encoder);
+	drm_encoder_helper_add(&output->encoder, &encoder_helper_funcs);
+
+	drm_mode_connector_attach_encoder(&output->connector, &output->encoder);
+	drm_sysfs_connector_add(&output->connector);
+
+	output->encoder.possible_crtcs = 0x3;
+
+	return 0;
+
+free_hpd:
+	gpio_free(output->hpd_gpio);
+
+	return err;
+}
+
+int tegra_output_exit(struct tegra_output *output)
+{
+	if (gpio_is_valid(output->hpd_gpio)) {
+		free_irq(output->hpd_irq, output);
+		gpio_free(output->hpd_gpio);
+	}
+
+	if (output->ddc)
+		put_device(&output->ddc->dev);
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c
new file mode 100644
index 0000000..ed4416f
--- /dev/null
+++ b/drivers/gpu/drm/tegra/rgb.c

@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "drm.h"
+#include "dc.h"
+
+struct tegra_rgb {
+	struct tegra_output output;
+	struct clk *clk_parent;
+	struct clk *clk;
+};
+
+static inline struct tegra_rgb *to_rgb(struct tegra_output *output)
+{
+	return container_of(output, struct tegra_rgb, output);
+}
+
+struct reg_entry {
+	unsigned long offset;
+	unsigned long value;
+};
+
+static const struct reg_entry rgb_enable[] = {
+	{ DC_COM_PIN_OUTPUT_ENABLE(0),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_ENABLE(1),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_ENABLE(2),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_ENABLE(3),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_POLARITY(0), 0x00000000 },
+	{ DC_COM_PIN_OUTPUT_POLARITY(1), 0x01000000 },
+	{ DC_COM_PIN_OUTPUT_POLARITY(2), 0x00000000 },
+	{ DC_COM_PIN_OUTPUT_POLARITY(3), 0x00000000 },
+	{ DC_COM_PIN_OUTPUT_DATA(0),     0x00000000 },
+	{ DC_COM_PIN_OUTPUT_DATA(1),     0x00000000 },
+	{ DC_COM_PIN_OUTPUT_DATA(2),     0x00000000 },
+	{ DC_COM_PIN_OUTPUT_DATA(3),     0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(0),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(1),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(2),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(3),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(4),   0x00210222 },
+	{ DC_COM_PIN_OUTPUT_SELECT(5),   0x00002200 },
+	{ DC_COM_PIN_OUTPUT_SELECT(6),   0x00020000 },
+};
+
+static const struct reg_entry rgb_disable[] = {
+	{ DC_COM_PIN_OUTPUT_SELECT(6),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(5),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(4),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(3),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(2),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(1),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_SELECT(0),   0x00000000 },
+	{ DC_COM_PIN_OUTPUT_DATA(3),     0xaaaaaaaa },
+	{ DC_COM_PIN_OUTPUT_DATA(2),     0xaaaaaaaa },
+	{ DC_COM_PIN_OUTPUT_DATA(1),     0xaaaaaaaa },
+	{ DC_COM_PIN_OUTPUT_DATA(0),     0xaaaaaaaa },
+	{ DC_COM_PIN_OUTPUT_POLARITY(3), 0x00000000 },
+	{ DC_COM_PIN_OUTPUT_POLARITY(2), 0x00000000 },
+	{ DC_COM_PIN_OUTPUT_POLARITY(1), 0x00000000 },
+	{ DC_COM_PIN_OUTPUT_POLARITY(0), 0x00000000 },
+	{ DC_COM_PIN_OUTPUT_ENABLE(3),   0x55555555 },
+	{ DC_COM_PIN_OUTPUT_ENABLE(2),   0x55555555 },
+	{ DC_COM_PIN_OUTPUT_ENABLE(1),   0x55150005 },
+	{ DC_COM_PIN_OUTPUT_ENABLE(0),   0x55555555 },
+};
+
+static void tegra_dc_write_regs(struct tegra_dc *dc,
+				const struct reg_entry *table,
+				unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		tegra_dc_writel(dc, table[i].value, table[i].offset);
+}
+
+static int tegra_output_rgb_enable(struct tegra_output *output)
+{
+	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
+
+	tegra_dc_write_regs(dc, rgb_enable, ARRAY_SIZE(rgb_enable));
+
+	return 0;
+}
+
+static int tegra_output_rgb_disable(struct tegra_output *output)
+{
+	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
+
+	tegra_dc_write_regs(dc, rgb_disable, ARRAY_SIZE(rgb_disable));
+
+	return 0;
+}
+
+static int tegra_output_rgb_setup_clock(struct tegra_output *output,
+					struct clk *clk, unsigned long pclk)
+{
+	struct tegra_rgb *rgb = to_rgb(output);
+
+	return clk_set_parent(clk, rgb->clk_parent);
+}
+
+static int tegra_output_rgb_check_mode(struct tegra_output *output,
+				       struct drm_display_mode *mode,
+				       enum drm_mode_status *status)
+{
+	/*
+	 * FIXME: For now, always assume that the mode is okay. There are
+	 * unresolved issues with clk_round_rate(), which doesn't always
+	 * reliably report whether a frequency can be set or not.
+	 */
+
+	*status = MODE_OK;
+
+	return 0;
+}
+
+static const struct tegra_output_ops rgb_ops = {
+	.enable = tegra_output_rgb_enable,
+	.disable = tegra_output_rgb_disable,
+	.setup_clock = tegra_output_rgb_setup_clock,
+	.check_mode = tegra_output_rgb_check_mode,
+};
+
+int tegra_dc_rgb_probe(struct tegra_dc *dc)
+{
+	struct device_node *np;
+	struct tegra_rgb *rgb;
+	int err;
+
+	np = of_get_child_by_name(dc->dev->of_node, "rgb");
+	if (!np || !of_device_is_available(np))
+		return -ENODEV;
+
+	rgb = devm_kzalloc(dc->dev, sizeof(*rgb), GFP_KERNEL);
+	if (!rgb)
+		return -ENOMEM;
+
+	rgb->clk = devm_clk_get(dc->dev, NULL);
+	if (IS_ERR(rgb->clk)) {
+		dev_err(dc->dev, "failed to get clock\n");
+		return PTR_ERR(rgb->clk);
+	}
+
+	rgb->clk_parent = devm_clk_get(dc->dev, "parent");
+	if (IS_ERR(rgb->clk_parent)) {
+		dev_err(dc->dev, "failed to get parent clock\n");
+		return PTR_ERR(rgb->clk_parent);
+	}
+
+	err = clk_set_parent(rgb->clk, rgb->clk_parent);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to set parent clock: %d\n", err);
+		return err;
+	}
+
+	rgb->output.dev = dc->dev;
+	rgb->output.of_node = np;
+
+	err = tegra_output_parse_dt(&rgb->output);
+	if (err < 0)
+		return err;
+
+	dc->rgb = &rgb->output;
+
+	return 0;
+}
+
+int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc)
+{
+	struct tegra_rgb *rgb = to_rgb(dc->rgb);
+	int err;
+
+	if (!dc->rgb)
+		return -ENODEV;
+
+	rgb->output.type = TEGRA_OUTPUT_RGB;
+	rgb->output.ops = &rgb_ops;
+
+	err = tegra_output_init(dc->base.dev, &rgb->output);
+	if (err < 0) {
+		dev_err(dc->dev, "output setup failed: %d\n", err);
+		return err;
+	}
+
+	/*
+	 * By default, outputs can be associated with each display controller.
+	 * RGB outputs are an exception, so we make sure they can be attached
+	 * to only their parent display controller.
+	 */
+	rgb->output.encoder.possible_crtcs = 1 << dc->pipe;
+
+	return 0;
+}
+
+int tegra_dc_rgb_exit(struct tegra_dc *dc)
+{
+	if (dc->rgb) {
+		int err;
+
+		err = tegra_output_disable(dc->rgb);
+		if (err < 0) {
+			dev_err(dc->dev, "output failed to disable: %d\n", err);
+			return err;
+		}
+
+		err = tegra_output_exit(dc->rgb);
+		if (err < 0) {
+			dev_err(dc->dev, "output cleanup failed: %d\n", err);
+			return err;
+		}
+
+		dc->rgb = NULL;
+	}
+
+	return 0;
+}

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index bf6e4b5..33d20be 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c

@@ -162,9 +162,9 @@
 {
 	if (interruptible) {
 		return wait_event_interruptible(bo->event_queue,
-					       atomic_read(&bo->reserved) == 0);
+					       !ttm_bo_is_reserved(bo));
 	} else {
-		wait_event(bo->event_queue, atomic_read(&bo->reserved) == 0);
+		wait_event(bo->event_queue, !ttm_bo_is_reserved(bo));
 		return 0;
 	}
 }
@@ -175,7 +175,7 @@
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man;
 
-	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(!ttm_bo_is_reserved(bo));
 
 	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
 
@@ -220,7 +220,7 @@
 	struct ttm_bo_global *glob = bo->glob;
 	int ret;
 
-	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
+	while (unlikely(atomic_read(&bo->reserved) != 0)) {
 		/**
 		 * Deadlock avoidance for multi-bo reserving.
 		 */
@@ -249,6 +249,7 @@
 			return ret;
 	}
 
+	atomic_set(&bo->reserved, 1);
 	if (use_sequence) {
 		/**
 		 * Wake up waiters that may need to recheck for deadlock,
@@ -365,7 +366,7 @@
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *mem,
 				  bool evict, bool interruptible,
-				  bool no_wait_reserve, bool no_wait_gpu)
+				  bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	bool old_is_pci = ttm_mem_reg_is_pci(bdev, &bo->mem);
@@ -419,12 +420,12 @@
 
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-		ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, mem);
+		ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem);
 	else if (bdev->driver->move)
 		ret = bdev->driver->move(bo, evict, interruptible,
-					 no_wait_reserve, no_wait_gpu, mem);
+					 no_wait_gpu, mem);
 	else
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, mem);
+		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, mem);
 
 	if (ret) {
 		if (bdev->driver->move_notify) {
@@ -487,40 +488,33 @@
 	ttm_bo_mem_put(bo, &bo->mem);
 
 	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
 
 	/*
-	 * Make processes trying to reserve really pick it up.
+	 * Since the final reference to this bo may not be dropped by
+	 * the current task we have to put a memory barrier here to make
+	 * sure the changes done in this function are always visible.
+	 *
+	 * This function only needs protection against the final kref_put.
 	 */
-	smp_mb__after_atomic_dec();
-	wake_up_all(&bo->event_queue);
+	smp_mb__before_atomic_dec();
 }
 
 static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bo->glob;
-	struct ttm_bo_driver *driver;
+	struct ttm_bo_driver *driver = bdev->driver;
 	void *sync_obj = NULL;
-	void *sync_obj_arg;
 	int put_count;
 	int ret;
 
+	spin_lock(&glob->lru_lock);
+	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+
 	spin_lock(&bdev->fence_lock);
 	(void) ttm_bo_wait(bo, false, false, true);
-	if (!bo->sync_obj) {
-
-		spin_lock(&glob->lru_lock);
-
-		/**
-		 * Lock inversion between bo:reserve and bdev::fence_lock here,
-		 * but that's OK, since we're only trylocking.
-		 */
-
-		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
-
-		if (unlikely(ret == -EBUSY))
-			goto queue;
-
+	if (!ret && !bo->sync_obj) {
 		spin_unlock(&bdev->fence_lock);
 		put_count = ttm_bo_del_from_lru(bo);
 
@@ -530,22 +524,22 @@
 		ttm_bo_list_ref_sub(bo, put_count, true);
 
 		return;
-	} else {
-		spin_lock(&glob->lru_lock);
 	}
-queue:
-	driver = bdev->driver;
 	if (bo->sync_obj)
 		sync_obj = driver->sync_obj_ref(bo->sync_obj);
-	sync_obj_arg = bo->sync_obj_arg;
+	spin_unlock(&bdev->fence_lock);
+
+	if (!ret) {
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
+	}
 
 	kref_get(&bo->list_kref);
 	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
 	spin_unlock(&glob->lru_lock);
-	spin_unlock(&bdev->fence_lock);
 
 	if (sync_obj) {
-		driver->sync_obj_flush(sync_obj, sync_obj_arg);
+		driver->sync_obj_flush(sync_obj);
 		driver->sync_obj_unref(&sync_obj);
 	}
 	schedule_delayed_work(&bdev->wq,
@@ -553,68 +547,84 @@
 }
 
 /**
- * function ttm_bo_cleanup_refs
+ * function ttm_bo_cleanup_refs_and_unlock
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, do nothing.
  *
+ * Must be called with lru_lock and reservation held, this function
+ * will drop both before returning.
+ *
  * @interruptible         Any sleeps should occur interruptibly.
- * @no_wait_reserve       Never wait for reserve. Return -EBUSY instead.
  * @no_wait_gpu           Never wait for gpu. Return -EBUSY instead.
  */
 
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-			       bool interruptible,
-			       bool no_wait_reserve,
-			       bool no_wait_gpu)
+static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
+					  bool interruptible,
+					  bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
 	struct ttm_bo_global *glob = bo->glob;
 	int put_count;
-	int ret = 0;
+	int ret;
 
-retry:
 	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bdev->fence_lock);
+	ret = ttm_bo_wait(bo, false, false, true);
 
-	if (unlikely(ret != 0))
-		return ret;
+	if (ret && !no_wait_gpu) {
+		void *sync_obj;
 
-retry_reserve:
-	spin_lock(&glob->lru_lock);
+		/*
+		 * Take a reference to the fence and unreserve,
+		 * at this point the buffer should be dead, so
+		 * no new sync objects can be attached.
+		 */
+		sync_obj = driver->sync_obj_ref(bo->sync_obj);
+		spin_unlock(&bdev->fence_lock);
 
-	if (unlikely(list_empty(&bo->ddestroy))) {
-		spin_unlock(&glob->lru_lock);
-		return 0;
-	}
-
-	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
-
-	if (unlikely(ret == -EBUSY)) {
-		spin_unlock(&glob->lru_lock);
-		if (likely(!no_wait_reserve))
-			ret = ttm_bo_wait_unreserved(bo, interruptible);
-		if (unlikely(ret != 0))
-			return ret;
-
-		goto retry_reserve;
-	}
-
-	BUG_ON(ret != 0);
-
-	/**
-	 * We can re-check for sync object without taking
-	 * the bo::lock since setting the sync object requires
-	 * also bo::reserved. A busy object at this point may
-	 * be caused by another thread recently starting an accelerated
-	 * eviction.
-	 */
-
-	if (unlikely(bo->sync_obj)) {
 		atomic_set(&bo->reserved, 0);
 		wake_up_all(&bo->event_queue);
 		spin_unlock(&glob->lru_lock);
-		goto retry;
+
+		ret = driver->sync_obj_wait(sync_obj, false, interruptible);
+		driver->sync_obj_unref(&sync_obj);
+		if (ret)
+			return ret;
+
+		/*
+		 * remove sync_obj with ttm_bo_wait, the wait should be
+		 * finished, and no new wait object should have been added.
+		 */
+		spin_lock(&bdev->fence_lock);
+		ret = ttm_bo_wait(bo, false, false, true);
+		WARN_ON(ret);
+		spin_unlock(&bdev->fence_lock);
+		if (ret)
+			return ret;
+
+		spin_lock(&glob->lru_lock);
+		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+
+		/*
+		 * We raced, and lost, someone else holds the reservation now,
+		 * and is probably busy in ttm_bo_cleanup_memtype_use.
+		 *
+		 * Even if it's not the case, because we finished waiting any
+		 * delayed destruction would succeed, so just return success
+		 * here.
+		 */
+		if (ret) {
+			spin_unlock(&glob->lru_lock);
+			return 0;
+		}
+	} else
+		spin_unlock(&bdev->fence_lock);
+
+	if (ret || unlikely(list_empty(&bo->ddestroy))) {
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
+		spin_unlock(&glob->lru_lock);
+		return ret;
 	}
 
 	put_count = ttm_bo_del_from_lru(bo);
@@ -657,9 +667,13 @@
 			kref_get(&nentry->list_kref);
 		}
 
-		spin_unlock(&glob->lru_lock);
-		ret = ttm_bo_cleanup_refs(entry, false, !remove_all,
-					  !remove_all);
+		ret = ttm_bo_reserve_locked(entry, false, !remove_all, false, 0);
+		if (!ret)
+			ret = ttm_bo_cleanup_refs_and_unlock(entry, false,
+							     !remove_all);
+		else
+			spin_unlock(&glob->lru_lock);
+
 		kref_put(&entry->list_kref, ttm_bo_release_list);
 		entry = nentry;
 
@@ -697,6 +711,7 @@
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
 
+	write_lock(&bdev->vm_lock);
 	if (likely(bo->vm_node != NULL)) {
 		rb_erase(&bo->vm_rb, &bdev->addr_space_rb);
 		drm_mm_put_block(bo->vm_node);
@@ -708,18 +723,14 @@
 	ttm_mem_io_unlock(man);
 	ttm_bo_cleanup_refs_or_queue(bo);
 	kref_put(&bo->list_kref, ttm_bo_release_list);
-	write_lock(&bdev->vm_lock);
 }
 
 void ttm_bo_unref(struct ttm_buffer_object **p_bo)
 {
 	struct ttm_buffer_object *bo = *p_bo;
-	struct ttm_bo_device *bdev = bo->bdev;
 
 	*p_bo = NULL;
-	write_lock(&bdev->vm_lock);
 	kref_put(&bo->kref, ttm_bo_release);
-	write_unlock(&bdev->vm_lock);
 }
 EXPORT_SYMBOL(ttm_bo_unref);
 
@@ -738,7 +749,7 @@
 EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue);
 
 static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
-			bool no_wait_reserve, bool no_wait_gpu)
+			bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_reg evict_mem;
@@ -756,7 +767,7 @@
 		goto out;
 	}
 
-	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(!ttm_bo_is_reserved(bo));
 
 	evict_mem = bo->mem;
 	evict_mem.mm_node = NULL;
@@ -769,7 +780,7 @@
 	placement.num_busy_placement = 0;
 	bdev->driver->evict_flags(bo, &placement);
 	ret = ttm_bo_mem_space(bo, &placement, &evict_mem, interruptible,
-				no_wait_reserve, no_wait_gpu);
+				no_wait_gpu);
 	if (ret) {
 		if (ret != -ERESTARTSYS) {
 			pr_err("Failed to find memory space for buffer 0x%p eviction\n",
@@ -780,7 +791,7 @@
 	}
 
 	ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, interruptible,
-				     no_wait_reserve, no_wait_gpu);
+				     no_wait_gpu);
 	if (ret) {
 		if (ret != -ERESTARTSYS)
 			pr_err("Buffer eviction failed\n");
@@ -794,49 +805,33 @@
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 				uint32_t mem_type,
-				bool interruptible, bool no_wait_reserve,
+				bool interruptible,
 				bool no_wait_gpu)
 {
 	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_buffer_object *bo;
-	int ret, put_count = 0;
+	int ret = -EBUSY, put_count;
 
-retry:
 	spin_lock(&glob->lru_lock);
-	if (list_empty(&man->lru)) {
-		spin_unlock(&glob->lru_lock);
-		return -EBUSY;
+	list_for_each_entry(bo, &man->lru, lru) {
+		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+		if (!ret)
+			break;
 	}
 
-	bo = list_first_entry(&man->lru, struct ttm_buffer_object, lru);
-	kref_get(&bo->list_kref);
-
-	if (!list_empty(&bo->ddestroy)) {
+	if (ret) {
 		spin_unlock(&glob->lru_lock);
-		ret = ttm_bo_cleanup_refs(bo, interruptible,
-					  no_wait_reserve, no_wait_gpu);
-		kref_put(&bo->list_kref, ttm_bo_release_list);
-
 		return ret;
 	}
 
-	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+	kref_get(&bo->list_kref);
 
-	if (unlikely(ret == -EBUSY)) {
-		spin_unlock(&glob->lru_lock);
-		if (likely(!no_wait_reserve))
-			ret = ttm_bo_wait_unreserved(bo, interruptible);
-
+	if (!list_empty(&bo->ddestroy)) {
+		ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
+						     no_wait_gpu);
 		kref_put(&bo->list_kref, ttm_bo_release_list);
-
-		/**
-		 * We *need* to retry after releasing the lru lock.
-		 */
-
-		if (unlikely(ret != 0))
-			return ret;
-		goto retry;
+		return ret;
 	}
 
 	put_count = ttm_bo_del_from_lru(bo);
@@ -846,7 +841,7 @@
 
 	ttm_bo_list_ref_sub(bo, put_count, true);
 
-	ret = ttm_bo_evict(bo, interruptible, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
 	ttm_bo_unreserve(bo);
 
 	kref_put(&bo->list_kref, ttm_bo_release_list);
@@ -871,7 +866,6 @@
 					struct ttm_placement *placement,
 					struct ttm_mem_reg *mem,
 					bool interruptible,
-					bool no_wait_reserve,
 					bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -884,8 +878,8 @@
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, interruptible,
-						no_wait_reserve, no_wait_gpu);
+		ret = ttm_mem_evict_first(bdev, mem_type,
+					  interruptible, no_wait_gpu);
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
@@ -950,7 +944,7 @@
 int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
 			struct ttm_mem_reg *mem,
-			bool interruptible, bool no_wait_reserve,
+			bool interruptible,
 			bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -1041,7 +1035,7 @@
 		}
 
 		ret = ttm_bo_mem_force_space(bo, mem_type, placement, mem,
-						interruptible, no_wait_reserve, no_wait_gpu);
+						interruptible, no_wait_gpu);
 		if (ret == 0 && mem->mm_node) {
 			mem->placement = cur_flags;
 			return 0;
@@ -1054,26 +1048,16 @@
 }
 EXPORT_SYMBOL(ttm_bo_mem_space);
 
-int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait)
-{
-	if ((atomic_read(&bo->cpu_writers) > 0) && no_wait)
-		return -EBUSY;
-
-	return wait_event_interruptible(bo->event_queue,
-					atomic_read(&bo->cpu_writers) == 0);
-}
-EXPORT_SYMBOL(ttm_bo_wait_cpu);
-
 int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
-			bool interruptible, bool no_wait_reserve,
+			bool interruptible,
 			bool no_wait_gpu)
 {
 	int ret = 0;
 	struct ttm_mem_reg mem;
 	struct ttm_bo_device *bdev = bo->bdev;
 
-	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(!ttm_bo_is_reserved(bo));
 
 	/*
 	 * FIXME: It's possible to pipeline buffer moves.
@@ -1093,10 +1077,12 @@
 	/*
 	 * Determine where to move the buffer.
 	 */
-	ret = ttm_bo_mem_space(bo, placement, &mem, interruptible, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_mem_space(bo, placement, &mem,
+			       interruptible, no_wait_gpu);
 	if (ret)
 		goto out_unlock;
-	ret = ttm_bo_handle_move_mem(bo, &mem, false, interruptible, no_wait_reserve, no_wait_gpu);
+	ret = ttm_bo_handle_move_mem(bo, &mem, false,
+				     interruptible, no_wait_gpu);
 out_unlock:
 	if (ret && mem.mm_node)
 		ttm_bo_mem_put(bo, &mem);
@@ -1125,12 +1111,12 @@
 
 int ttm_bo_validate(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
-			bool interruptible, bool no_wait_reserve,
+			bool interruptible,
 			bool no_wait_gpu)
 {
 	int ret;
 
-	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(!ttm_bo_is_reserved(bo));
 	/* Check that range is valid */
 	if (placement->lpfn || placement->fpfn)
 		if (placement->fpfn > placement->lpfn ||
@@ -1141,7 +1127,8 @@
 	 */
 	ret = ttm_bo_mem_compat(placement, &bo->mem);
 	if (ret < 0) {
-		ret = ttm_bo_move_buffer(bo, placement, interruptible, no_wait_reserve, no_wait_gpu);
+		ret = ttm_bo_move_buffer(bo, placement, interruptible,
+					 no_wait_gpu);
 		if (ret)
 			return ret;
 	} else {
@@ -1179,7 +1166,6 @@
 		enum ttm_bo_type type,
 		struct ttm_placement *placement,
 		uint32_t page_alignment,
-		unsigned long buffer_start,
 		bool interruptible,
 		struct file *persistent_swap_storage,
 		size_t acc_size,
@@ -1200,7 +1186,6 @@
 		return -ENOMEM;
 	}
 
-	size += buffer_start & ~PAGE_MASK;
 	num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (num_pages == 0) {
 		pr_err("Illegal buffer object size\n");
@@ -1233,7 +1218,6 @@
 	bo->mem.page_alignment = page_alignment;
 	bo->mem.bus.io_reserved_vm = false;
 	bo->mem.bus.io_reserved_count = 0;
-	bo->buffer_start = buffer_start & PAGE_MASK;
 	bo->priv_flags = 0;
 	bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
 	bo->seq_valid = false;
@@ -1257,7 +1241,7 @@
 			goto out_err;
 	}
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 	if (ret)
 		goto out_err;
 
@@ -1306,7 +1290,6 @@
 			enum ttm_bo_type type,
 			struct ttm_placement *placement,
 			uint32_t page_alignment,
-			unsigned long buffer_start,
 			bool interruptible,
 			struct file *persistent_swap_storage,
 			struct ttm_buffer_object **p_bo)
@@ -1321,8 +1304,8 @@
 
 	acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object));
 	ret = ttm_bo_init(bdev, bo, size, type, placement, page_alignment,
-				buffer_start, interruptible,
-			  persistent_swap_storage, acc_size, NULL, NULL);
+			  interruptible, persistent_swap_storage, acc_size,
+			  NULL, NULL);
 	if (likely(ret == 0))
 		*p_bo = bo;
 
@@ -1344,7 +1327,7 @@
 	spin_lock(&glob->lru_lock);
 	while (!list_empty(&man->lru)) {
 		spin_unlock(&glob->lru_lock);
-		ret = ttm_mem_evict_first(bdev, mem_type, false, false, false);
+		ret = ttm_mem_evict_first(bdev, mem_type, false, false);
 		if (ret) {
 			if (allow_errors) {
 				return ret;
@@ -1577,7 +1560,6 @@
 		goto out_no_addr_mm;
 
 	INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue);
-	bdev->nice_mode = true;
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	bdev->dev_mapping = NULL;
 	bdev->glob = glob;
@@ -1721,7 +1703,6 @@
 	struct ttm_bo_driver *driver = bo->bdev->driver;
 	struct ttm_bo_device *bdev = bo->bdev;
 	void *sync_obj;
-	void *sync_obj_arg;
 	int ret = 0;
 
 	if (likely(bo->sync_obj == NULL))
@@ -1729,7 +1710,7 @@
 
 	while (bo->sync_obj) {
 
-		if (driver->sync_obj_signaled(bo->sync_obj, bo->sync_obj_arg)) {
+		if (driver->sync_obj_signaled(bo->sync_obj)) {
 			void *tmp_obj = bo->sync_obj;
 			bo->sync_obj = NULL;
 			clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
@@ -1743,9 +1724,8 @@
 			return -EBUSY;
 
 		sync_obj = driver->sync_obj_ref(bo->sync_obj);
-		sync_obj_arg = bo->sync_obj_arg;
 		spin_unlock(&bdev->fence_lock);
-		ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
+		ret = driver->sync_obj_wait(sync_obj,
 					    lazy, interruptible);
 		if (unlikely(ret != 0)) {
 			driver->sync_obj_unref(&sync_obj);
@@ -1753,8 +1733,7 @@
 			return ret;
 		}
 		spin_lock(&bdev->fence_lock);
-		if (likely(bo->sync_obj == sync_obj &&
-			   bo->sync_obj_arg == sync_obj_arg)) {
+		if (likely(bo->sync_obj == sync_obj)) {
 			void *tmp_obj = bo->sync_obj;
 			bo->sync_obj = NULL;
 			clear_bit(TTM_BO_PRIV_FLAG_MOVING,
@@ -1797,8 +1776,7 @@
 
 void ttm_bo_synccpu_write_release(struct ttm_buffer_object *bo)
 {
-	if (atomic_dec_and_test(&bo->cpu_writers))
-		wake_up_all(&bo->event_queue);
+	atomic_dec(&bo->cpu_writers);
 }
 EXPORT_SYMBOL(ttm_bo_synccpu_write_release);
 
@@ -1817,40 +1795,25 @@
 	uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
 
 	spin_lock(&glob->lru_lock);
-	while (ret == -EBUSY) {
-		if (unlikely(list_empty(&glob->swap_lru))) {
-			spin_unlock(&glob->lru_lock);
-			return -EBUSY;
-		}
-
-		bo = list_first_entry(&glob->swap_lru,
-				      struct ttm_buffer_object, swap);
-		kref_get(&bo->list_kref);
-
-		if (!list_empty(&bo->ddestroy)) {
-			spin_unlock(&glob->lru_lock);
-			(void) ttm_bo_cleanup_refs(bo, false, false, false);
-			kref_put(&bo->list_kref, ttm_bo_release_list);
-			spin_lock(&glob->lru_lock);
-			continue;
-		}
-
-		/**
-		 * Reserve buffer. Since we unlock while sleeping, we need
-		 * to re-check that nobody removed us from the swap-list while
-		 * we slept.
-		 */
-
+	list_for_each_entry(bo, &glob->swap_lru, swap) {
 		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
-		if (unlikely(ret == -EBUSY)) {
-			spin_unlock(&glob->lru_lock);
-			ttm_bo_wait_unreserved(bo, false);
-			kref_put(&bo->list_kref, ttm_bo_release_list);
-			spin_lock(&glob->lru_lock);
-		}
+		if (!ret)
+			break;
 	}
 
-	BUG_ON(ret != 0);
+	if (ret) {
+		spin_unlock(&glob->lru_lock);
+		return ret;
+	}
+
+	kref_get(&bo->list_kref);
+
+	if (!list_empty(&bo->ddestroy)) {
+		ret = ttm_bo_cleanup_refs_and_unlock(bo, false, false);
+		kref_put(&bo->list_kref, ttm_bo_release_list);
+		return ret;
+	}
+
 	put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
@@ -1876,7 +1839,7 @@
 		evict_mem.mem_type = TTM_PL_SYSTEM;
 
 		ret = ttm_bo_handle_move_mem(bo, &evict_mem, true,
-					     false, false, false);
+					     false, false);
 		if (unlikely(ret != 0))
 			goto out;
 	}

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 2026060..9e9c5d2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c

@@ -43,7 +43,7 @@
 }
 
 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-		    bool evict, bool no_wait_reserve,
+		    bool evict,
 		    bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct ttm_tt *ttm = bo->ttm;
@@ -314,7 +314,7 @@
 }
 
 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-		       bool evict, bool no_wait_reserve, bool no_wait_gpu,
+		       bool evict, bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -611,8 +611,7 @@
 
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 			      void *sync_obj,
-			      void *sync_obj_arg,
-			      bool evict, bool no_wait_reserve,
+			      bool evict,
 			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
@@ -630,7 +629,6 @@
 		bo->sync_obj = NULL;
 	}
 	bo->sync_obj = driver->sync_obj_ref(sync_obj);
-	bo->sync_obj_arg = sync_obj_arg;
 	if (evict) {
 		ret = ttm_bo_wait(bo, false, false, false);
 		spin_unlock(&bdev->fence_lock);

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 3ba72db..74705f32 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c

@@ -259,8 +259,8 @@
 	read_lock(&bdev->vm_lock);
 	bo = ttm_bo_vm_lookup_rb(bdev, vma->vm_pgoff,
 				 (vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
-	if (likely(bo != NULL))
-		ttm_bo_reference(bo);
+	if (likely(bo != NULL) && !kref_get_unless_zero(&bo->kref))
+		bo = NULL;
 	read_unlock(&bdev->vm_lock);
 
 	if (unlikely(bo == NULL)) {

diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 1937069..cd9e452 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c

@@ -185,10 +185,7 @@
 			ttm_eu_backoff_reservation_locked(list);
 			spin_unlock(&glob->lru_lock);
 			ttm_eu_list_ref_sub(list);
-			ret = ttm_bo_wait_cpu(bo, false);
-			if (ret)
-				return ret;
-			goto retry;
+			return -EBUSY;
 		}
 	}
 
@@ -216,19 +213,18 @@
 	driver = bdev->driver;
 	glob = bo->glob;
 
-	spin_lock(&bdev->fence_lock);
 	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->fence_lock);
 
 	list_for_each_entry(entry, list, head) {
 		bo = entry->bo;
 		entry->old_sync_obj = bo->sync_obj;
 		bo->sync_obj = driver->sync_obj_ref(sync_obj);
-		bo->sync_obj_arg = entry->new_sync_obj_arg;
 		ttm_bo_unreserve_locked(bo);
 		entry->reserved = false;
 	}
-	spin_unlock(&glob->lru_lock);
 	spin_unlock(&bdev->fence_lock);
+	spin_unlock(&glob->lru_lock);
 
 	list_for_each_entry(entry, list, head) {
 		if (entry->old_sync_obj)

diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index 479c6b0..dbc2def 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c

@@ -367,7 +367,6 @@
 	spin_lock_init(&glob->lock);
 	glob->swap_queue = create_singlethread_workqueue("ttm_swap");
 	INIT_WORK(&glob->work, ttm_shrink_work);
-	init_waitqueue_head(&glob->queue);
 	ret = kobject_init_and_add(
 		&glob->kobj, &ttm_mem_glob_kobj_type, ttm_get_kobj(), "memory_accounting");
 	if (unlikely(ret != 0)) {

diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index c785787..58a5f32 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c

@@ -80,7 +80,7 @@
  */
 
 struct ttm_object_device {
-	rwlock_t object_lock;
+	spinlock_t object_lock;
 	struct drm_open_hash object_hash;
 	atomic_t object_count;
 	struct ttm_mem_global *mem_glob;
@@ -157,12 +157,12 @@
 	base->refcount_release = refcount_release;
 	base->ref_obj_release = ref_obj_release;
 	base->object_type = object_type;
-	write_lock(&tdev->object_lock);
 	kref_init(&base->refcount);
-	ret = drm_ht_just_insert_please(&tdev->object_hash,
-					&base->hash,
-					(unsigned long)base, 31, 0, 0);
-	write_unlock(&tdev->object_lock);
+	spin_lock(&tdev->object_lock);
+	ret = drm_ht_just_insert_please_rcu(&tdev->object_hash,
+					    &base->hash,
+					    (unsigned long)base, 31, 0, 0);
+	spin_unlock(&tdev->object_lock);
 	if (unlikely(ret != 0))
 		goto out_err0;
 
@@ -174,7 +174,9 @@
 
 	return 0;
 out_err1:
-	(void)drm_ht_remove_item(&tdev->object_hash, &base->hash);
+	spin_lock(&tdev->object_lock);
+	(void)drm_ht_remove_item_rcu(&tdev->object_hash, &base->hash);
+	spin_unlock(&tdev->object_lock);
 out_err0:
 	return ret;
 }
@@ -186,30 +188,29 @@
 	    container_of(kref, struct ttm_base_object, refcount);
 	struct ttm_object_device *tdev = base->tfile->tdev;
 
-	(void)drm_ht_remove_item(&tdev->object_hash, &base->hash);
-	write_unlock(&tdev->object_lock);
+	spin_lock(&tdev->object_lock);
+	(void)drm_ht_remove_item_rcu(&tdev->object_hash, &base->hash);
+	spin_unlock(&tdev->object_lock);
+
+	/*
+	 * Note: We don't use synchronize_rcu() here because it's far
+	 * too slow. It's up to the user to free the object using
+	 * call_rcu() or ttm_base_object_kfree().
+	 */
+
 	if (base->refcount_release) {
 		ttm_object_file_unref(&base->tfile);
 		base->refcount_release(&base);
 	}
-	write_lock(&tdev->object_lock);
 }
 
 void ttm_base_object_unref(struct ttm_base_object **p_base)
 {
 	struct ttm_base_object *base = *p_base;
-	struct ttm_object_device *tdev = base->tfile->tdev;
 
 	*p_base = NULL;
 
-	/*
-	 * Need to take the lock here to avoid racing with
-	 * users trying to look up the object.
-	 */
-
-	write_lock(&tdev->object_lock);
 	kref_put(&base->refcount, ttm_release_base);
-	write_unlock(&tdev->object_lock);
 }
 EXPORT_SYMBOL(ttm_base_object_unref);
 
@@ -221,14 +222,14 @@
 	struct drm_hash_item *hash;
 	int ret;
 
-	read_lock(&tdev->object_lock);
-	ret = drm_ht_find_item(&tdev->object_hash, key, &hash);
+	rcu_read_lock();
+	ret = drm_ht_find_item_rcu(&tdev->object_hash, key, &hash);
 
 	if (likely(ret == 0)) {
 		base = drm_hash_entry(hash, struct ttm_base_object, hash);
-		kref_get(&base->refcount);
+		ret = kref_get_unless_zero(&base->refcount) ? 0 : -EINVAL;
 	}
-	read_unlock(&tdev->object_lock);
+	rcu_read_unlock();
 
 	if (unlikely(ret != 0))
 		return NULL;
@@ -426,7 +427,7 @@
 		return NULL;
 
 	tdev->mem_glob = mem_glob;
-	rwlock_init(&tdev->object_lock);
+	spin_lock_init(&tdev->object_lock);
 	atomic_set(&tdev->object_count, 0);
 	ret = drm_ht_create(&tdev->object_hash, hash_order);
 
@@ -444,9 +445,9 @@
 
 	*p_tdev = NULL;
 
-	write_lock(&tdev->object_lock);
+	spin_lock(&tdev->object_lock);
 	drm_ht_remove(&tdev->object_hash);
-	write_unlock(&tdev->object_lock);
+	spin_unlock(&tdev->object_lock);
 
 	kfree(tdev);
 }

diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index b3b2ced..512f44a 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c

@@ -84,7 +84,8 @@
 	return connector_status_connected;
 }
 
-struct drm_encoder *udl_best_single_encoder(struct drm_connector *connector)
+static struct drm_encoder*
+udl_best_single_encoder(struct drm_connector *connector)
 {
 	int enc_id = connector->encoder_ids[0];
 	struct drm_mode_object *obj;
@@ -97,8 +98,9 @@
 	return encoder;
 }
 
-int udl_connector_set_property(struct drm_connector *connector, struct drm_property *property,
-			       uint64_t val)
+static int udl_connector_set_property(struct drm_connector *connector,
+				      struct drm_property *property,
+				      uint64_t val)
 {
 	return 0;
 }
@@ -110,13 +112,13 @@
 	kfree(connector);
 }
 
-struct drm_connector_helper_funcs udl_connector_helper_funcs = {
+static struct drm_connector_helper_funcs udl_connector_helper_funcs = {
 	.get_modes = udl_get_modes,
 	.mode_valid = udl_mode_valid,
 	.best_encoder = udl_best_single_encoder,
 };
 
-struct drm_connector_funcs udl_connector_funcs = {
+static struct drm_connector_funcs udl_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.detect = udl_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -138,7 +140,7 @@
 	drm_sysfs_connector_add(connector);
 	drm_mode_connector_attach_encoder(connector, encoder);
 
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.dirty_info_property,
 				      1);
 	return 0;

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 586869c..2cc6cd9 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile

@@ -5,6 +5,7 @@
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
 	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
-	    vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o
+	    vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o vmwgfx_context.o \
+	    vmwgfx_surface.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o

diff --git a/drivers/gpu/drm/vmwgfx/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/svga3d_surfacedefs.h
new file mode 100644
index 0000000..8369c3b
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/svga3d_surfacedefs.h

@@ -0,0 +1,909 @@
+/**************************************************************************
+ *
+ * Copyright © 2008-2012 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifdef __KERNEL__
+
+#include <drm/vmwgfx_drm.h>
+#define surf_size_struct struct drm_vmw_size
+
+#else /* __KERNEL__ */
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(_A) (sizeof(_A) / sizeof((_A)[0]))
+#endif /* ARRAY_SIZE */
+
+#define DIV_ROUND_UP(x, y)  (((x) + (y) - 1) / (y))
+#define max_t(type, x, y)  ((x) > (y) ? (x) : (y))
+#define surf_size_struct SVGA3dSize
+#define u32 uint32
+
+#endif /* __KERNEL__ */
+
+#include "svga3d_reg.h"
+
+/*
+ * enum svga3d_block_desc describes the active data channels in a block.
+ *
+ * There can be at-most four active channels in a block:
+ *    1. Red, bump W, luminance and depth are stored in the first channel.
+ *    2. Green, bump V and stencil are stored in the second channel.
+ *    3. Blue and bump U are stored in the third channel.
+ *    4. Alpha and bump Q are stored in the fourth channel.
+ *
+ * Block channels can be used to store compressed and buffer data:
+ *    1. For compressed formats, only the data channel is used and its size
+ *       is equal to that of a singular block in the compression scheme.
+ *    2. For buffer formats, only the data channel is used and its size is
+ *       exactly one byte in length.
+ *    3. In each case the bit depth represent the size of a singular block.
+ *
+ * Note: Compressed and IEEE formats do not use the bitMask structure.
+ */
+
+enum svga3d_block_desc {
+	SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
+	SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel
+						    data */
+	SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel
+						    data */
+	SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video
+						    U and V */
+	SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel
+						    data */
+	SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel
+						    data */
+	SVGA3DBLOCKDESC_STENCIL     = 1 << 1,    /* Block with a stencil
+						    channel */
+	SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel
+						    data */
+	SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel
+						    data */
+	SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel
+						    data */
+	SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance
+						    data */
+	SVGA3DBLOCKDESC_DEPTH       = 1 << 2,    /* Block with depth channel */
+	SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha
+						    channel */
+	SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel
+						    data */
+	SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of
+						    data */
+	SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of
+						    data depending on the
+						    compression method used */
+	SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE
+						    floating point
+						    representation in
+						    all channels */
+	SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store
+						    data. */
+	SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
+	SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
+	SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
+	SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
+	SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV,
+						    e.g., NV12. */
+	SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate
+						    Y, U, V, e.g., YV12. */
+
+	SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
+	SVGA3DBLOCKDESC_GREEN,
+	SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
+	SVGA3DBLOCKDESC_BLUE,
+	SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
+	SVGA3DBLOCKDESC_SRGB,
+	SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
+	SVGA3DBLOCKDESC_SRGB,
+	SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
+	SVGA3DBLOCKDESC_V,
+	SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
+	SVGA3DBLOCKDESC_LUMINANCE,
+	SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
+	SVGA3DBLOCKDESC_W,
+	SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
+	SVGA3DBLOCKDESC_V |
+	SVGA3DBLOCKDESC_W |
+	SVGA3DBLOCKDESC_Q,
+	SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
+	SVGA3DBLOCKDESC_IEEE_FP,
+	SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
+	SVGA3DBLOCKDESC_GREEN,
+	SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
+	SVGA3DBLOCKDESC_BLUE,
+	SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
+	SVGA3DBLOCKDESC_STENCIL,
+	SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
+	SVGA3DBLOCKDESC_Y,
+	SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
+	SVGA3DBLOCKDESC_Y |
+	SVGA3DBLOCKDESC_U_VIDEO |
+	SVGA3DBLOCKDESC_V_VIDEO,
+	SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
+	SVGA3DBLOCKDESC_EXP,
+	SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
+	SVGA3DBLOCKDESC_SRGB,
+	SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+	SVGA3DBLOCKDESC_2PLANAR_YUV,
+	SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+	SVGA3DBLOCKDESC_3PLANAR_YUV,
+};
+
+/*
+ * SVGA3dSurfaceDesc describes the actual pixel data.
+ *
+ * This structure provides the following information:
+ *    1. Block description.
+ *    2. Dimensions of a block in the surface.
+ *    3. Size of block in bytes.
+ *    4. Bit depth of the pixel data.
+ *    5. Channel bit depths and masks (if applicable).
+ */
+#define SVGA3D_CHANNEL_DEF(type)		\
+	struct {				\
+		union {				\
+			type blue;              \
+			type u;                 \
+			type uv_video;          \
+			type u_video;           \
+		};				\
+		union {				\
+			type green;             \
+			type v;                 \
+			type stencil;           \
+			type v_video;           \
+		};				\
+		union {				\
+			type red;               \
+			type w;                 \
+			type luminance;         \
+			type y;                 \
+			type depth;             \
+			type data;              \
+		};				\
+		union {				\
+			type alpha;             \
+			type q;                 \
+			type exp;               \
+		};				\
+	}
+
+struct svga3d_surface_desc {
+	enum svga3d_block_desc block_desc;
+	surf_size_struct block_size;
+	u32 bytes_per_block;
+	u32 pitch_bytes_per_block;
+
+	struct {
+		u32 total;
+		SVGA3D_CHANNEL_DEF(uint8);
+	} bit_depth;
+
+	struct {
+		SVGA3D_CHANNEL_DEF(uint8);
+	} bit_offset;
+};
+
+static const struct svga3d_surface_desc svga3d_surface_descs[] = {
+	{SVGA3DBLOCKDESC_NONE,
+	 {1, 1, 1},  0, 0, {0, {{0}, {0}, {0}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_FORMAT_INVALID */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_X8R8G8B8 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_A8R8G8B8 */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  2, 2, {16, {{5}, {6}, {5}, {0} } },
+	 {{{0}, {5}, {11}, {0} } } },    /* SVGA3D_R5G6B5 */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  2, 2, {15, {{5}, {5}, {5}, {0} } },
+	 {{{0}, {5}, {10}, {0} } } },    /* SVGA3D_X1R5G5B5 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {5}, {1} } },
+	 {{{0}, {5}, {10}, {15} } } },   /* SVGA3D_A1R5G5B5 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  2, 2, {16, {{4}, {4}, {4}, {4} } },
+	 {{{0}, {4}, {8}, {12} } } },    /* SVGA3D_A4R4G4B4 */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D32 */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D16 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  2, 2, {16, {{0}, {1}, {15}, {0} } },
+	 {{{0}, {15}, {0}, {0} } } },    /* SVGA3D_Z_D15S1 */
+
+	{SVGA3DBLOCKDESC_LUMINANCE,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE8 */
+
+	{SVGA3DBLOCKDESC_LA,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {4}, {4} } },
+	 {{{0}, {0}, {0}, {4} } } },     /* SVGA3D_LUMINANCE4_ALPHA4 */
+
+	{SVGA3DBLOCKDESC_LUMINANCE,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE16 */
+
+	{SVGA3DBLOCKDESC_LA,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
+	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_LUMINANCE8_ALPHA8 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT1 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT2 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT3 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT4 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT5 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
+	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_BUMPU8V8 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {6}, {0} } },
+	 {{{11}, {6}, {0}, {0} } } },    /* SVGA3D_BUMPL6V5U5 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {0} } },
+	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPX8L8V8U8 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  3, 3, {24, {{8}, {8}, {8}, {0} } },
+	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPL8V8U8 */
+
+	{SVGA3DBLOCKDESC_RGBA_FP,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_ARGB_S10E5 */
+
+	{SVGA3DBLOCKDESC_RGBA_FP,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_ARGB_S23E8 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2R10G10B10 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
+	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_V8U8 */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{24}, {16}, {8}, {0} } } },   /* SVGA3D_Q8W8V8U8 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
+	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_CxV8U8 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_X8L8V8U8 */
+
+	{SVGA3DBLOCKDESC_UVWA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2W10V10U10 */
+
+	{SVGA3DBLOCKDESC_ALPHA,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {0}, {8} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_ALPHA8 */
+
+	{SVGA3DBLOCKDESC_R_FP,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S10E5 */
+
+	{SVGA3DBLOCKDESC_R_FP,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S23E8 */
+
+	{SVGA3DBLOCKDESC_RG_FP,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_RG_S10E5 */
+
+	{SVGA3DBLOCKDESC_RG_FP,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_RG_S23E8 */
+
+	{SVGA3DBLOCKDESC_BUFFER,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BUFFER */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24X8 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  4, 4, {32, {{16}, {16}, {0}, {0} } },
+	 {{{16}, {0}, {0}, {0} } } },    /* SVGA3D_V16U16 */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {0}, {16}, {0} } } },    /* SVGA3D_G16R16 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_A16B16G16R16 */
+
+	{SVGA3DBLOCKDESC_YUV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {8}, {0} } } },     /* SVGA3D_UYVY */
+
+	{SVGA3DBLOCKDESC_YUV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
+	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_YUY2 */
+
+	{SVGA3DBLOCKDESC_NV12,
+	 {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_NV12 */
+
+	{SVGA3DBLOCKDESC_AYUV,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_AYUV */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_UINT */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_SINT */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGB_FP,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_FLOAT */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_UINT */
+
+	{SVGA3DBLOCKDESC_UVW,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_SINT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_UINT */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SNORM */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_UINT */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G8X24_TYPELESS */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_D32_FLOAT_S8X24_UINT */
+
+	{SVGA3DBLOCKDESC_R_FP,
+	 {1, 1, 1},  8, 8, {64, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },    /* SVGA3D_R32_FLOAT_X8_X24_TYPELESS */
+
+	{SVGA3DBLOCKDESC_GREEN,
+	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {0}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_X32_TYPELESS_G8X24_UINT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_UINT */
+
+	{SVGA3DBLOCKDESC_RGB_FP,
+	 {1, 1, 1},  4, 4, {32, {{10}, {11}, {11}, {0} } },
+	 {{{0}, {10}, {21}, {0} } } },  /* SVGA3D_R11G11B10_FLOAT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM */
+
+	{SVGA3DBLOCKDESC_RGBA_SRGB,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UINT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RG_FP,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_UINT */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_D32_FLOAT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_UINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_R24G8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_D24_UNORM_S8_UINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R24_UNORM_X8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_GREEN,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {0}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_X24_TYPELESS_G8_UINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UNORM */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UINT */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UNORM */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UINT */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SNORM */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UNORM */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UINT */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SNORM */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {8, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R1_UNORM */
+
+	{SVGA3DBLOCKDESC_RGBE,
+	 {1, 1, 1},  4, 4, {32, {{9}, {9}, {9}, {5} } },
+	 {{{18}, {9}, {0}, {27} } } },   /* SVGA3D_R9G9B9E5_SHAREDEXP */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_B8G8_UNORM */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_G8R8_G8B8_UNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_UNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_SNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_UNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_SNORM */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10_XR_BIAS_A2_UNORM */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA_SRGB,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGB_SRGB,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_DF16 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_DF24 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8_INT */
+};
+
+static inline u32 clamped_umul32(u32 a, u32 b)
+{
+	uint64_t tmp = (uint64_t) a*b;
+	return (tmp > (uint64_t) ((u32) -1)) ? (u32) -1 : tmp;
+}
+
+static inline const struct svga3d_surface_desc *
+svga3dsurface_get_desc(SVGA3dSurfaceFormat format)
+{
+	if (format < ARRAY_SIZE(svga3d_surface_descs))
+		return &svga3d_surface_descs[format];
+
+	return &svga3d_surface_descs[SVGA3D_FORMAT_INVALID];
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * svga3dsurface_get_mip_size --
+ *
+ *      Given a base level size and the mip level, compute the size of
+ *      the mip level.
+ *
+ * Results:
+ *      See above.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline surf_size_struct
+svga3dsurface_get_mip_size(surf_size_struct base_level, u32 mip_level)
+{
+	surf_size_struct size;
+
+	size.width = max_t(u32, base_level.width >> mip_level, 1);
+	size.height = max_t(u32, base_level.height >> mip_level, 1);
+	size.depth = max_t(u32, base_level.depth >> mip_level, 1);
+	return size;
+}
+
+static inline void
+svga3dsurface_get_size_in_blocks(const struct svga3d_surface_desc *desc,
+				 const surf_size_struct *pixel_size,
+				 surf_size_struct *block_size)
+{
+	block_size->width = DIV_ROUND_UP(pixel_size->width,
+					 desc->block_size.width);
+	block_size->height = DIV_ROUND_UP(pixel_size->height,
+					  desc->block_size.height);
+	block_size->depth = DIV_ROUND_UP(pixel_size->depth,
+					 desc->block_size.depth);
+}
+
+static inline bool
+svga3dsurface_is_planar_surface(const struct svga3d_surface_desc *desc)
+{
+	return (desc->block_desc & SVGA3DBLOCKDESC_PLANAR_YUV) != 0;
+}
+
+static inline u32
+svga3dsurface_calculate_pitch(const struct svga3d_surface_desc *desc,
+			      const surf_size_struct *size)
+{
+	u32 pitch;
+	surf_size_struct blocks;
+
+	svga3dsurface_get_size_in_blocks(desc, size, &blocks);
+
+	pitch = blocks.width * desc->pitch_bytes_per_block;
+
+	return pitch;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * svga3dsurface_get_image_buffer_size --
+ *
+ *      Return the number of bytes of buffer space required to store
+ *      one image of a surface, optionally using the specified pitch.
+ *
+ *      If pitch is zero, it is assumed that rows are tightly packed.
+ *
+ *      This function is overflow-safe. If the result would have
+ *      overflowed, instead we return MAX_UINT32.
+ *
+ * Results:
+ *      Byte count.
+ *
+ * Side effects:
+ *      None.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+static inline u32
+svga3dsurface_get_image_buffer_size(const struct svga3d_surface_desc *desc,
+				    const surf_size_struct *size,
+				    u32 pitch)
+{
+	surf_size_struct image_blocks;
+	u32 slice_size, total_size;
+
+	svga3dsurface_get_size_in_blocks(desc, size, &image_blocks);
+
+	if (svga3dsurface_is_planar_surface(desc)) {
+		total_size = clamped_umul32(image_blocks.width,
+					    image_blocks.height);
+		total_size = clamped_umul32(total_size, image_blocks.depth);
+		total_size = clamped_umul32(total_size, desc->bytes_per_block);
+		return total_size;
+	}
+
+	if (pitch == 0)
+		pitch = svga3dsurface_calculate_pitch(desc, size);
+
+	slice_size = clamped_umul32(image_blocks.height, pitch);
+	total_size = clamped_umul32(slice_size, image_blocks.depth);
+
+	return total_size;
+}
+
+static inline u32
+svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
+				  surf_size_struct base_level_size,
+				  u32 num_mip_levels,
+				  bool cubemap)
+{
+	const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
+	u32 total_size = 0;
+	u32 mip;
+
+	for (mip = 0; mip < num_mip_levels; mip++) {
+		surf_size_struct size =
+			svga3dsurface_get_mip_size(base_level_size, mip);
+		total_size += svga3dsurface_get_image_buffer_size(desc,
+								  &size, 0);
+	}
+
+	if (cubemap)
+		total_size *= SVGA3D_MAX_SURFACE_FACES;
+
+	return total_size;
+}
+
+
+/**
+ * svga3dsurface_get_pixel_offset - Compute the offset (in bytes) to a pixel
+ * in an image (or volume).
+ *
+ * @width: The image width in pixels.
+ * @height: The image height in pixels
+ */
+static inline u32
+svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
+			       u32 width, u32 height,
+			       u32 x, u32 y, u32 z)
+{
+	const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
+	const u32 bw = desc->block_size.width, bh = desc->block_size.height;
+	const u32 bd = desc->block_size.depth;
+	const u32 rowstride = DIV_ROUND_UP(width, bw) * desc->bytes_per_block;
+	const u32 imgstride = DIV_ROUND_UP(height, bh) * rowstride;
+	const u32 offset = (z / bd * imgstride +
+			    y / bh * rowstride +
+			    x / bw * desc->bytes_per_block);
+	return offset;
+}
+
+
+static inline u32
+svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
+			       surf_size_struct baseLevelSize,
+			       u32 numMipLevels,
+			       u32 face,
+			       u32 mip)
+
+{
+	u32 offset;
+	u32 mipChainBytes;
+	u32 mipChainBytesToLevel;
+	u32 i;
+	const struct svga3d_surface_desc *desc;
+	surf_size_struct mipSize;
+	u32 bytes;
+
+	desc = svga3dsurface_get_desc(format);
+
+	mipChainBytes = 0;
+	mipChainBytesToLevel = 0;
+	for (i = 0; i < numMipLevels; i++) {
+		mipSize = svga3dsurface_get_mip_size(baseLevelSize, i);
+		bytes = svga3dsurface_get_image_buffer_size(desc, &mipSize, 0);
+		mipChainBytes += bytes;
+		if (i < mip)
+			mipChainBytesToLevel += bytes;
+	}
+
+	offset = mipChainBytes * face + mipChainBytesToLevel;
+
+	return offset;
+}

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 9826fbc..96dc84d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c

@@ -248,13 +248,12 @@
 	*placement = vmw_sys_placement;
 }
 
-/**
- * FIXME: Proper access checks on buffers.
- */
-
 static int vmw_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	return 0;
+	struct ttm_object_file *tfile =
+		vmw_fpriv((struct drm_file *)filp->private_data)->tfile;
+
+	return vmw_user_dmabuf_verify_access(bo, tfile);
 }
 
 static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
@@ -310,27 +309,23 @@
 	vmw_fence_obj_unreference((struct vmw_fence_obj **) sync_obj);
 }
 
-static int vmw_sync_obj_flush(void *sync_obj, void *sync_arg)
+static int vmw_sync_obj_flush(void *sync_obj)
 {
 	vmw_fence_obj_flush((struct vmw_fence_obj *) sync_obj);
 	return 0;
 }
 
-static bool vmw_sync_obj_signaled(void *sync_obj, void *sync_arg)
+static bool vmw_sync_obj_signaled(void *sync_obj)
 {
-	unsigned long flags = (unsigned long) sync_arg;
 	return	vmw_fence_obj_signaled((struct vmw_fence_obj *) sync_obj,
-				       (uint32_t) flags);
+				       DRM_VMW_FENCE_FLAG_EXEC);
 
 }
 
-static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
-			     bool lazy, bool interruptible)
+static int vmw_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
 {
-	unsigned long flags = (unsigned long) sync_arg;
-
 	return vmw_fence_obj_wait((struct vmw_fence_obj *) sync_obj,
-				  (uint32_t) flags,
+				  DRM_VMW_FENCE_FLAG_EXEC,
 				  lazy, interruptible,
 				  VMW_FENCE_WAIT_TIMEOUT);
 }

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
new file mode 100644
index 0000000..00ae092
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c

@@ -0,0 +1,274 @@
+/**************************************************************************
+ *
+ * Copyright © 2009-2012 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "vmwgfx_resource_priv.h"
+#include "ttm/ttm_placement.h"
+
+struct vmw_user_context {
+	struct ttm_base_object base;
+	struct vmw_resource res;
+};
+
+static void vmw_user_context_free(struct vmw_resource *res);
+static struct vmw_resource *
+vmw_user_context_base_to_res(struct ttm_base_object *base);
+
+static uint64_t vmw_user_context_size;
+
+static const struct vmw_user_resource_conv user_context_conv = {
+	.object_type = VMW_RES_CONTEXT,
+	.base_obj_to_res = vmw_user_context_base_to_res,
+	.res_free = vmw_user_context_free
+};
+
+const struct vmw_user_resource_conv *user_context_converter =
+	&user_context_conv;
+
+
+static const struct vmw_res_func vmw_legacy_context_func = {
+	.res_type = vmw_res_context,
+	.needs_backup = false,
+	.may_evict = false,
+	.type_name = "legacy contexts",
+	.backup_placement = NULL,
+	.create = NULL,
+	.destroy = NULL,
+	.bind = NULL,
+	.unbind = NULL
+};
+
+/**
+ * Context management:
+ */
+
+static void vmw_hw_context_destroy(struct vmw_resource *res)
+{
+
+	struct vmw_private *dev_priv = res->dev_priv;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDestroyContext body;
+	} *cmd;
+
+
+	vmw_execbuf_release_pinned_bo(dev_priv);
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "destruction.\n");
+		return;
+	}
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_CONTEXT_DESTROY);
+	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
+	cmd->body.cid = cpu_to_le32(res->id);
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+	vmw_3d_resource_dec(dev_priv, false);
+}
+
+static int vmw_context_init(struct vmw_private *dev_priv,
+			    struct vmw_resource *res,
+			    void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDefineContext body;
+	} *cmd;
+
+	ret = vmw_resource_init(dev_priv, res, false,
+				res_free, &vmw_legacy_context_func);
+
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed to allocate a resource id.\n");
+		goto out_early;
+	}
+
+	if (unlikely(res->id >= SVGA3D_MAX_CONTEXT_IDS)) {
+		DRM_ERROR("Out of hw context ids.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
+	}
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
+	}
+
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_CONTEXT_DEFINE);
+	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
+	cmd->body.cid = cpu_to_le32(res->id);
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+	(void) vmw_3d_resource_inc(dev_priv, false);
+	vmw_resource_activate(res, vmw_hw_context_destroy);
+	return 0;
+
+out_early:
+	if (res_free == NULL)
+		kfree(res);
+	else
+		res_free(res);
+	return ret;
+}
+
+struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv)
+{
+	struct vmw_resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+	int ret;
+
+	if (unlikely(res == NULL))
+		return NULL;
+
+	ret = vmw_context_init(dev_priv, res, NULL);
+
+	return (ret == 0) ? res : NULL;
+}
+
+/**
+ * User-space context management:
+ */
+
+static struct vmw_resource *
+vmw_user_context_base_to_res(struct ttm_base_object *base)
+{
+	return &(container_of(base, struct vmw_user_context, base)->res);
+}
+
+static void vmw_user_context_free(struct vmw_resource *res)
+{
+	struct vmw_user_context *ctx =
+	    container_of(res, struct vmw_user_context, res);
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	ttm_base_object_kfree(ctx, base);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv),
+			    vmw_user_context_size);
+}
+
+/**
+ * This function is called when user space has no more references on the
+ * base object. It releases the base-object's reference on the resource object.
+ */
+
+static void vmw_user_context_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_context *ctx =
+	    container_of(base, struct vmw_user_context, base);
+	struct vmw_resource *res = &ctx->res;
+
+	*p_base = NULL;
+	vmw_resource_unreference(&res);
+}
+
+int vmw_context_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+
+	return ttm_ref_object_base_unref(tfile, arg->cid, TTM_REF_USAGE);
+}
+
+int vmw_context_define_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_user_context *ctx;
+	struct vmw_resource *res;
+	struct vmw_resource *tmp;
+	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret;
+
+
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of contexts anyway.
+	 */
+
+	if (unlikely(vmw_user_context_size == 0))
+		vmw_user_context_size = ttm_round_pot(sizeof(*ctx)) + 128;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_context_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for context"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (unlikely(ctx == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_context_size);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	res = &ctx->res;
+	ctx->base.shareable = false;
+	ctx->base.tfile = NULL;
+
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
+	ret = vmw_context_init(dev_priv, res, vmw_user_context_free);
+	if (unlikely(ret != 0))
+		goto out_unlock;
+
+	tmp = vmw_resource_reference(&ctx->res);
+	ret = ttm_base_object_init(tfile, &ctx->base, false, VMW_RES_CONTEXT,
+				   &vmw_user_context_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		goto out_err;
+	}
+
+	arg->cid = ctx->base.hash.key;
+out_err:
+	vmw_resource_unreference(&res);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
+	return ret;
+
+}

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index d1498bf..5fae06a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c

@@ -60,13 +60,13 @@
 	if (unlikely(ret != 0))
 		return ret;
 
-	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+	vmw_execbuf_release_pinned_bo(dev_priv);
 
 	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
 	if (unlikely(ret != 0))
 		goto err;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 
 	ttm_bo_unreserve(bo);
 
@@ -105,7 +105,7 @@
 		return ret;
 
 	if (pin)
-		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+		vmw_execbuf_release_pinned_bo(dev_priv);
 
 	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
 	if (unlikely(ret != 0))
@@ -123,7 +123,7 @@
 	else
 		placement = &vmw_vram_gmr_placement;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 	if (likely(ret == 0) || ret == -ERESTARTSYS)
 		goto err_unreserve;
 
@@ -138,7 +138,7 @@
 	else
 		placement = &vmw_vram_placement;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, placement, interruptible, false);
 
 err_unreserve:
 	ttm_bo_unreserve(bo);
@@ -214,8 +214,7 @@
 		return ret;
 
 	if (pin)
-		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
-
+		vmw_execbuf_release_pinned_bo(dev_priv);
 	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
 	if (unlikely(ret != 0))
 		goto err_unlock;
@@ -224,10 +223,9 @@
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
 	    bo->mem.start < bo->num_pages &&
 	    bo->mem.start > 0)
-		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
-				       false, false);
+		(void) ttm_bo_validate(bo, &vmw_sys_placement, false, false);
 
-	ret = ttm_bo_validate(bo, &placement, interruptible, false, false);
+	ret = ttm_bo_validate(bo, &placement, interruptible, false);
 
 	/* For some reason we didn't up at the start of vram */
 	WARN_ON(ret == 0 && bo->offset != 0);
@@ -304,7 +302,7 @@
 	uint32_t old_mem_type = bo->mem.mem_type;
 	int ret;
 
-	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(!ttm_bo_is_reserved(bo));
 	BUG_ON(old_mem_type != TTM_PL_VRAM &&
 	       old_mem_type != VMW_PL_GMR);
 
@@ -316,7 +314,7 @@
 	placement.num_placement = 1;
 	placement.placement = &pl_flags;
 
-	ret = ttm_bo_validate(bo, &placement, false, true, true);
+	ret = ttm_bo_validate(bo, &placement, false, true);
 
 	BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
 }

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 2dd185e..161f8b2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c

@@ -292,7 +292,7 @@
 			     PAGE_SIZE,
 			     ttm_bo_type_device,
 			     &vmw_vram_sys_placement,
-			     0, 0, false, NULL,
+			     0, false, NULL,
 			     &dev_priv->dummy_query_bo);
 }
 
@@ -432,6 +432,7 @@
 	struct vmw_private *dev_priv;
 	int ret;
 	uint32_t svga_id;
+	enum vmw_res_type i;
 
 	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
 	if (unlikely(dev_priv == NULL)) {
@@ -448,15 +449,18 @@
 	mutex_init(&dev_priv->cmdbuf_mutex);
 	mutex_init(&dev_priv->release_mutex);
 	rwlock_init(&dev_priv->resource_lock);
-	idr_init(&dev_priv->context_idr);
-	idr_init(&dev_priv->surface_idr);
-	idr_init(&dev_priv->stream_idr);
+
+	for (i = vmw_res_context; i < vmw_res_max; ++i) {
+		idr_init(&dev_priv->res_idr[i]);
+		INIT_LIST_HEAD(&dev_priv->res_lru[i]);
+	}
+
 	mutex_init(&dev_priv->init_mutex);
 	init_waitqueue_head(&dev_priv->fence_queue);
 	init_waitqueue_head(&dev_priv->fifo_queue);
 	dev_priv->fence_queue_waiters = 0;
 	atomic_set(&dev_priv->fifo_queue_waiters, 0);
-	INIT_LIST_HEAD(&dev_priv->surface_lru);
+
 	dev_priv->used_memory_size = 0;
 
 	dev_priv->io_start = pci_resource_start(dev->pdev, 0);
@@ -609,14 +613,18 @@
 		}
 	}
 
+	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
+		ret = drm_irq_install(dev);
+		if (ret != 0) {
+			DRM_ERROR("Failed installing irq: %d\n", ret);
+			goto out_no_irq;
+		}
+	}
+
 	dev_priv->fman = vmw_fence_manager_init(dev_priv);
 	if (unlikely(dev_priv->fman == NULL))
 		goto out_no_fman;
 
-	/* Need to start the fifo to check if we can do screen objects */
-	ret = vmw_3d_resource_inc(dev_priv, true);
-	if (unlikely(ret != 0))
-		goto out_no_fifo;
 	vmw_kms_save_vga(dev_priv);
 
 	/* Start kms and overlay systems, needs fifo. */
@@ -625,25 +633,11 @@
 		goto out_no_kms;
 	vmw_overlay_init(dev_priv);
 
-	/* 3D Depends on Screen Objects being used. */
-	DRM_INFO("Detected %sdevice 3D availability.\n",
-		 vmw_fifo_have_3d(dev_priv) ?
-		 "" : "no ");
-
-	/* We might be done with the fifo now */
 	if (dev_priv->enable_fb) {
+		ret = vmw_3d_resource_inc(dev_priv, true);
+		if (unlikely(ret != 0))
+			goto out_no_fifo;
 		vmw_fb_init(dev_priv);
-	} else {
-		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv, true);
-	}
-
-	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
-		ret = drm_irq_install(dev);
-		if (unlikely(ret != 0)) {
-			DRM_ERROR("Failed installing irq: %d\n", ret);
-			goto out_no_irq;
-		}
 	}
 
 	dev_priv->pm_nb.notifier_call = vmwgfx_pm_notifier;
@@ -651,20 +645,16 @@
 
 	return 0;
 
-out_no_irq:
-	if (dev_priv->enable_fb)
-		vmw_fb_close(dev_priv);
+out_no_fifo:
 	vmw_overlay_close(dev_priv);
 	vmw_kms_close(dev_priv);
 out_no_kms:
-	/* We still have a 3D resource reference held */
-	if (dev_priv->enable_fb) {
-		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv, false);
-	}
-out_no_fifo:
+	vmw_kms_restore_vga(dev_priv);
 	vmw_fence_manager_takedown(dev_priv->fman);
 out_no_fman:
+	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
+		drm_irq_uninstall(dev_priv->dev);
+out_no_irq:
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
@@ -684,9 +674,9 @@
 out_err1:
 	vmw_ttm_global_release(dev_priv);
 out_err0:
-	idr_destroy(&dev_priv->surface_idr);
-	idr_destroy(&dev_priv->context_idr);
-	idr_destroy(&dev_priv->stream_idr);
+	for (i = vmw_res_context; i < vmw_res_max; ++i)
+		idr_destroy(&dev_priv->res_idr[i]);
+
 	kfree(dev_priv);
 	return ret;
 }
@@ -694,13 +684,14 @@
 static int vmw_driver_unload(struct drm_device *dev)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
+	enum vmw_res_type i;
 
 	unregister_pm_notifier(&dev_priv->pm_nb);
 
+	if (dev_priv->ctx.res_ht_initialized)
+		drm_ht_remove(&dev_priv->ctx.res_ht);
 	if (dev_priv->ctx.cmd_bounce)
 		vfree(dev_priv->ctx.cmd_bounce);
-	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
-		drm_irq_uninstall(dev_priv->dev);
 	if (dev_priv->enable_fb) {
 		vmw_fb_close(dev_priv);
 		vmw_kms_restore_vga(dev_priv);
@@ -709,6 +700,8 @@
 	vmw_kms_close(dev_priv);
 	vmw_overlay_close(dev_priv);
 	vmw_fence_manager_takedown(dev_priv->fman);
+	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
+		drm_irq_uninstall(dev_priv->dev);
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
@@ -723,9 +716,9 @@
 	(void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM);
 	(void)ttm_bo_device_release(&dev_priv->bdev);
 	vmw_ttm_global_release(dev_priv);
-	idr_destroy(&dev_priv->surface_idr);
-	idr_destroy(&dev_priv->context_idr);
-	idr_destroy(&dev_priv->stream_idr);
+
+	for (i = vmw_res_context; i < vmw_res_max; ++i)
+		idr_destroy(&dev_priv->res_idr[i]);
 
 	kfree(dev_priv);
 
@@ -924,11 +917,11 @@
 
 out_no_active_lock:
 	if (!dev_priv->enable_fb) {
+		vmw_kms_restore_vga(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 		mutex_lock(&dev_priv->hw_mutex);
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
 		mutex_unlock(&dev_priv->hw_mutex);
-		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv, true);
 	}
 	return ret;
 }
@@ -949,7 +942,7 @@
 
 	vmw_fp->locked_master = drm_master_get(file_priv->master);
 	ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
-	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+	vmw_execbuf_release_pinned_bo(dev_priv);
 
 	if (unlikely((ret != 0))) {
 		DRM_ERROR("Unable to lock TTM at VT switch.\n");
@@ -962,11 +955,11 @@
 		ret = ttm_bo_evict_mm(&dev_priv->bdev, TTM_PL_VRAM);
 		if (unlikely(ret != 0))
 			DRM_ERROR("Unable to clean VRAM on master drop.\n");
+		vmw_kms_restore_vga(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 		mutex_lock(&dev_priv->hw_mutex);
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
 		mutex_unlock(&dev_priv->hw_mutex);
-		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv, true);
 	}
 
 	dev_priv->active_master = &dev_priv->fbdev_master;
@@ -1001,7 +994,8 @@
 		 * This empties VRAM and unbinds all GMR bindings.
 		 * Buffer contents is moved to swappable memory.
 		 */
-		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+		vmw_execbuf_release_pinned_bo(dev_priv);
+		vmw_resource_evict_all(dev_priv);
 		ttm_bo_swapout_all(&dev_priv->bdev);
 
 		break;

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 88a179e..13aeda7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h

@@ -67,31 +67,46 @@
 
 struct vmw_dma_buffer {
 	struct ttm_buffer_object base;
-	struct list_head validate_list;
-	bool gmr_bound;
-	uint32_t cur_validate_node;
-	bool on_validate_list;
+	struct list_head res_list;
 };
 
+/**
+ * struct vmw_validate_buffer - Carries validation info about buffers.
+ *
+ * @base: Validation info for TTM.
+ * @hash: Hash entry for quick lookup of the TTM buffer object.
+ *
+ * This structure contains also driver private validation info
+ * on top of the info needed by TTM.
+ */
+struct vmw_validate_buffer {
+	struct ttm_validate_buffer base;
+	struct drm_hash_item hash;
+};
+
+struct vmw_res_func;
 struct vmw_resource {
 	struct kref kref;
 	struct vmw_private *dev_priv;
-	struct idr *idr;
 	int id;
-	enum ttm_object_type res_type;
 	bool avail;
-	void (*remove_from_lists) (struct vmw_resource *res);
-	void (*hw_destroy) (struct vmw_resource *res);
+	unsigned long backup_size;
+	bool res_dirty; /* Protected by backup buffer reserved */
+	bool backup_dirty; /* Protected by backup buffer reserved */
+	struct vmw_dma_buffer *backup;
+	unsigned long backup_offset;
+	const struct vmw_res_func *func;
+	struct list_head lru_head; /* Protected by the resource lock */
+	struct list_head mob_head; /* Protected by @backup reserved */
 	void (*res_free) (struct vmw_resource *res);
-	struct list_head validate_head;
-	struct list_head query_head; /* Protected by the cmdbuf mutex */
-	/* TODO is a generic snooper needed? */
-#if 0
-	void (*snoop)(struct vmw_resource *res,
-		      struct ttm_object_file *tfile,
-		      SVGA3dCmdHeader *header);
-	void *snoop_priv;
-#endif
+	void (*hw_destroy) (struct vmw_resource *res);
+};
+
+enum vmw_res_type {
+	vmw_res_context,
+	vmw_res_surface,
+	vmw_res_stream,
+	vmw_res_max
 };
 
 struct vmw_cursor_snooper {
@@ -105,20 +120,18 @@
 
 struct vmw_surface {
 	struct vmw_resource res;
-	struct list_head lru_head; /* Protected by the resource lock */
 	uint32_t flags;
 	uint32_t format;
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
+	struct drm_vmw_size base_size;
 	struct drm_vmw_size *sizes;
 	uint32_t num_sizes;
-
 	bool scanout;
-
 	/* TODO so far just a extra pointer */
 	struct vmw_cursor_snooper snooper;
-	struct ttm_buffer_object *backup;
 	struct vmw_surface_offset *offsets;
-	uint32_t backup_size;
+	SVGA3dTextureFilter autogen_filter;
+	uint32_t multisample_count;
 };
 
 struct vmw_marker_queue {
@@ -145,29 +158,46 @@
 	uint32_t index;
 };
 
+/**
+ * struct vmw_res_cache_entry - resource information cache entry
+ *
+ * @valid: Whether the entry is valid, which also implies that the execbuf
+ * code holds a reference to the resource, and it's placed on the
+ * validation list.
+ * @handle: User-space handle of a resource.
+ * @res: Non-ref-counted pointer to the resource.
+ *
+ * Used to avoid frequent repeated user-space handle lookups of the
+ * same resource.
+ */
+struct vmw_res_cache_entry {
+	bool valid;
+	uint32_t handle;
+	struct vmw_resource *res;
+	struct vmw_resource_val_node *node;
+};
+
 struct vmw_sw_context{
-	struct ida bo_list;
-	uint32_t last_cid;
-	bool cid_valid;
+	struct drm_open_hash res_ht;
+	bool res_ht_initialized;
 	bool kernel; /**< is the called made from the kernel */
-	struct vmw_resource *cur_ctx;
-	uint32_t last_sid;
-	uint32_t sid_translation;
-	bool sid_valid;
 	struct ttm_object_file *tfile;
 	struct list_head validate_nodes;
 	struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS];
 	uint32_t cur_reloc;
-	struct ttm_validate_buffer val_bufs[VMWGFX_MAX_VALIDATIONS];
+	struct vmw_validate_buffer val_bufs[VMWGFX_MAX_VALIDATIONS];
 	uint32_t cur_val_buf;
 	uint32_t *cmd_bounce;
 	uint32_t cmd_bounce_size;
 	struct list_head resource_list;
 	uint32_t fence_flags;
-	struct list_head query_list;
 	struct ttm_buffer_object *cur_query_bo;
-	uint32_t cur_query_cid;
-	bool query_cid_valid;
+	struct list_head res_relocations;
+	uint32_t *buf_start;
+	struct vmw_res_cache_entry res_cache[vmw_res_max];
+	struct vmw_resource *last_query_ctx;
+	bool needs_post_query_barrier;
+	struct vmw_resource *error_resource;
 };
 
 struct vmw_legacy_display;
@@ -242,10 +272,7 @@
 	 */
 
 	rwlock_t resource_lock;
-	struct idr context_idr;
-	struct idr surface_idr;
-	struct idr stream_idr;
-
+	struct idr res_idr[vmw_res_max];
 	/*
 	 * Block lastclose from racing with firstopen.
 	 */
@@ -320,6 +347,7 @@
 	struct ttm_buffer_object *dummy_query_bo;
 	struct ttm_buffer_object *pinned_bo;
 	uint32_t query_cid;
+	uint32_t query_cid_valid;
 	bool dummy_query_bo_pinned;
 
 	/*
@@ -329,10 +357,15 @@
 	 * protected by the cmdbuf mutex for simplicity.
 	 */
 
-	struct list_head surface_lru;
+	struct list_head res_lru[vmw_res_max];
 	uint32_t used_memory_size;
 };
 
+static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
+{
+	return container_of(res, struct vmw_surface, res);
+}
+
 static inline struct vmw_private *vmw_priv(struct drm_device *dev)
 {
 	return (struct vmw_private *)dev->dev_private;
@@ -381,10 +414,16 @@
 /**
  * Resource utilities - vmwgfx_resource.c
  */
+struct vmw_user_resource_conv;
+extern const struct vmw_user_resource_conv *user_surface_converter;
+extern const struct vmw_user_resource_conv *user_context_converter;
 
 extern struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv);
 extern void vmw_resource_unreference(struct vmw_resource **p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
+extern int vmw_resource_validate(struct vmw_resource *res);
+extern int vmw_resource_reserve(struct vmw_resource *res, bool no_backup);
+extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
 extern int vmw_context_destroy_ioctl(struct drm_device *dev, void *data,
 				     struct drm_file *file_priv);
 extern int vmw_context_define_ioctl(struct drm_device *dev, void *data,
@@ -398,14 +437,13 @@
 				  uint32_t handle,
 				  struct vmw_surface **out_surf,
 				  struct vmw_dma_buffer **out_buf);
+extern int vmw_user_resource_lookup_handle(
+	struct vmw_private *dev_priv,
+	struct ttm_object_file *tfile,
+	uint32_t handle,
+	const struct vmw_user_resource_conv *converter,
+	struct vmw_resource **p_res);
 extern void vmw_surface_res_free(struct vmw_resource *res);
-extern int vmw_surface_init(struct vmw_private *dev_priv,
-			    struct vmw_surface *srf,
-			    void (*res_free) (struct vmw_resource *res));
-extern int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
-					  struct ttm_object_file *tfile,
-					  uint32_t handle,
-					  struct vmw_surface **out);
 extern int vmw_surface_destroy_ioctl(struct drm_device *dev, void *data,
 				     struct drm_file *file_priv);
 extern int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
@@ -423,6 +461,8 @@
 			   size_t size, struct ttm_placement *placement,
 			   bool interuptable,
 			   void (*bo_free) (struct ttm_buffer_object *bo));
+extern int vmw_user_dmabuf_verify_access(struct ttm_buffer_object *bo,
+				  struct ttm_object_file *tfile);
 extern int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 extern int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data,
@@ -440,7 +480,14 @@
 				  struct ttm_object_file *tfile,
 				  uint32_t *inout_id,
 				  struct vmw_resource **out);
-extern void vmw_resource_unreserve(struct list_head *list);
+extern void vmw_resource_unreserve(struct vmw_resource *res,
+				   struct vmw_dma_buffer *new_backup,
+				   unsigned long new_backup_offset);
+extern void vmw_resource_move_notify(struct ttm_buffer_object *bo,
+				     struct ttm_mem_reg *mem);
+extern void vmw_fence_single_bo(struct ttm_buffer_object *bo,
+				struct vmw_fence_obj *fence);
+extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
 
 /**
  * DMA buffer helper routines - vmwgfx_dmabuf.c
@@ -538,10 +585,9 @@
 			       struct drm_vmw_fence_rep __user
 			       *user_fence_rep,
 			       struct vmw_fence_obj **out_fence);
-
-extern void
-vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
-			      bool only_on_cid_match, uint32_t cid);
+extern void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+					    struct vmw_fence_obj *fence);
+extern void vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv);
 
 extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
 				      struct vmw_private *dev_priv,
@@ -699,10 +745,13 @@
 static inline void vmw_dmabuf_unreference(struct vmw_dma_buffer **buf)
 {
 	struct vmw_dma_buffer *tmp_buf = *buf;
-	struct ttm_buffer_object *bo = &tmp_buf->base;
-	*buf = NULL;
 
-	ttm_bo_unref(&bo);
+	*buf = NULL;
+	if (tmp_buf != NULL) {
+		struct ttm_buffer_object *bo = &tmp_buf->base;
+
+		ttm_bo_unref(&bo);
+	}
 }
 
 static inline struct vmw_dma_buffer *vmw_dmabuf_reference(struct vmw_dma_buffer *buf)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 30654b4..394e647 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c

@@ -30,6 +30,181 @@
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_placement.h>
 
+#define VMW_RES_HT_ORDER 12
+
+/**
+ * struct vmw_resource_relocation - Relocation info for resources
+ *
+ * @head: List head for the software context's relocation list.
+ * @res: Non-ref-counted pointer to the resource.
+ * @offset: Offset of 4 byte entries into the command buffer where the
+ * id that needs fixup is located.
+ */
+struct vmw_resource_relocation {
+	struct list_head head;
+	const struct vmw_resource *res;
+	unsigned long offset;
+};
+
+/**
+ * struct vmw_resource_val_node - Validation info for resources
+ *
+ * @head: List head for the software context's resource list.
+ * @hash: Hash entry for quick resouce to val_node lookup.
+ * @res: Ref-counted pointer to the resource.
+ * @switch_backup: Boolean whether to switch backup buffer on unreserve.
+ * @new_backup: Refcounted pointer to the new backup buffer.
+ * @new_backup_offset: New backup buffer offset if @new_backup is non-NUll.
+ * @first_usage: Set to true the first time the resource is referenced in
+ * the command stream.
+ * @no_buffer_needed: Resources do not need to allocate buffer backup on
+ * reservation. The command stream will provide one.
+ */
+struct vmw_resource_val_node {
+	struct list_head head;
+	struct drm_hash_item hash;
+	struct vmw_resource *res;
+	struct vmw_dma_buffer *new_backup;
+	unsigned long new_backup_offset;
+	bool first_usage;
+	bool no_buffer_needed;
+};
+
+/**
+ * vmw_resource_unreserve - unreserve resources previously reserved for
+ * command submission.
+ *
+ * @list_head: list of resources to unreserve.
+ * @backoff: Whether command submission failed.
+ */
+static void vmw_resource_list_unreserve(struct list_head *list,
+					bool backoff)
+{
+	struct vmw_resource_val_node *val;
+
+	list_for_each_entry(val, list, head) {
+		struct vmw_resource *res = val->res;
+		struct vmw_dma_buffer *new_backup =
+			backoff ? NULL : val->new_backup;
+
+		vmw_resource_unreserve(res, new_backup,
+			val->new_backup_offset);
+		vmw_dmabuf_unreference(&val->new_backup);
+	}
+}
+
+
+/**
+ * vmw_resource_val_add - Add a resource to the software context's
+ * resource list if it's not already on it.
+ *
+ * @sw_context: Pointer to the software context.
+ * @res: Pointer to the resource.
+ * @p_node On successful return points to a valid pointer to a
+ * struct vmw_resource_val_node, if non-NULL on entry.
+ */
+static int vmw_resource_val_add(struct vmw_sw_context *sw_context,
+				struct vmw_resource *res,
+				struct vmw_resource_val_node **p_node)
+{
+	struct vmw_resource_val_node *node;
+	struct drm_hash_item *hash;
+	int ret;
+
+	if (likely(drm_ht_find_item(&sw_context->res_ht, (unsigned long) res,
+				    &hash) == 0)) {
+		node = container_of(hash, struct vmw_resource_val_node, hash);
+		node->first_usage = false;
+		if (unlikely(p_node != NULL))
+			*p_node = node;
+		return 0;
+	}
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (unlikely(node == NULL)) {
+		DRM_ERROR("Failed to allocate a resource validation "
+			  "entry.\n");
+		return -ENOMEM;
+	}
+
+	node->hash.key = (unsigned long) res;
+	ret = drm_ht_insert_item(&sw_context->res_ht, &node->hash);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed to initialize a resource validation "
+			  "entry.\n");
+		kfree(node);
+		return ret;
+	}
+	list_add_tail(&node->head, &sw_context->resource_list);
+	node->res = vmw_resource_reference(res);
+	node->first_usage = true;
+
+	if (unlikely(p_node != NULL))
+		*p_node = node;
+
+	return 0;
+}
+
+/**
+ * vmw_resource_relocation_add - Add a relocation to the relocation list
+ *
+ * @list: Pointer to head of relocation list.
+ * @res: The resource.
+ * @offset: Offset into the command buffer currently being parsed where the
+ * id that needs fixup is located. Granularity is 4 bytes.
+ */
+static int vmw_resource_relocation_add(struct list_head *list,
+				       const struct vmw_resource *res,
+				       unsigned long offset)
+{
+	struct vmw_resource_relocation *rel;
+
+	rel = kmalloc(sizeof(*rel), GFP_KERNEL);
+	if (unlikely(rel == NULL)) {
+		DRM_ERROR("Failed to allocate a resource relocation.\n");
+		return -ENOMEM;
+	}
+
+	rel->res = res;
+	rel->offset = offset;
+	list_add_tail(&rel->head, list);
+
+	return 0;
+}
+
+/**
+ * vmw_resource_relocations_free - Free all relocations on a list
+ *
+ * @list: Pointer to the head of the relocation list.
+ */
+static void vmw_resource_relocations_free(struct list_head *list)
+{
+	struct vmw_resource_relocation *rel, *n;
+
+	list_for_each_entry_safe(rel, n, list, head) {
+		list_del(&rel->head);
+		kfree(rel);
+	}
+}
+
+/**
+ * vmw_resource_relocations_apply - Apply all relocations on a list
+ *
+ * @cb: Pointer to the start of the command buffer bein patch. This need
+ * not be the same buffer as the one being parsed when the relocation
+ * list was built, but the contents must be the same modulo the
+ * resource ids.
+ * @list: Pointer to the head of the relocation list.
+ */
+static void vmw_resource_relocations_apply(uint32_t *cb,
+					   struct list_head *list)
+{
+	struct vmw_resource_relocation *rel;
+
+	list_for_each_entry(rel, list, head)
+		cb[rel->offset] = rel->res->id;
+}
+
 static int vmw_cmd_invalid(struct vmw_private *dev_priv,
 			   struct vmw_sw_context *sw_context,
 			   SVGA3dCmdHeader *header)
@@ -44,25 +219,11 @@
 	return 0;
 }
 
-static void vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
-					  struct vmw_resource **p_res)
-{
-	struct vmw_resource *res = *p_res;
-
-	if (list_empty(&res->validate_head)) {
-		list_add_tail(&res->validate_head, &sw_context->resource_list);
-		*p_res = NULL;
-	} else
-		vmw_resource_unreference(p_res);
-}
-
 /**
  * vmw_bo_to_validate_list - add a bo to a validate list
  *
  * @sw_context: The software context used for this command submission batch.
  * @bo: The buffer object to add.
- * @fence_flags: Fence flags to be or'ed with any other fence flags for
- * this buffer on this submission batch.
  * @p_val_node: If non-NULL Will be updated with the validate node number
  * on return.
  *
@@ -71,31 +232,43 @@
  */
 static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
 				   struct ttm_buffer_object *bo,
-				   uint32_t fence_flags,
 				   uint32_t *p_val_node)
 {
 	uint32_t val_node;
+	struct vmw_validate_buffer *vval_buf;
 	struct ttm_validate_buffer *val_buf;
+	struct drm_hash_item *hash;
+	int ret;
 
-	val_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
-
-	if (unlikely(val_node >= VMWGFX_MAX_VALIDATIONS)) {
-		DRM_ERROR("Max number of DMA buffers per submission"
-			  " exceeded.\n");
-		return -EINVAL;
-	}
-
-	val_buf = &sw_context->val_bufs[val_node];
-	if (unlikely(val_node == sw_context->cur_val_buf)) {
-		val_buf->new_sync_obj_arg = NULL;
-		val_buf->bo = ttm_bo_reference(bo);
-		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
+	if (likely(drm_ht_find_item(&sw_context->res_ht, (unsigned long) bo,
+				    &hash) == 0)) {
+		vval_buf = container_of(hash, struct vmw_validate_buffer,
+					hash);
+		val_buf = &vval_buf->base;
+		val_node = vval_buf - sw_context->val_bufs;
+	} else {
+		val_node = sw_context->cur_val_buf;
+		if (unlikely(val_node >= VMWGFX_MAX_VALIDATIONS)) {
+			DRM_ERROR("Max number of DMA buffers per submission "
+				  "exceeded.\n");
+			return -EINVAL;
+		}
+		vval_buf = &sw_context->val_bufs[val_node];
+		vval_buf->hash.key = (unsigned long) bo;
+		ret = drm_ht_insert_item(&sw_context->res_ht, &vval_buf->hash);
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Failed to initialize a buffer validation "
+				  "entry.\n");
+			return ret;
+		}
 		++sw_context->cur_val_buf;
+		val_buf = &vval_buf->base;
+		val_buf->bo = ttm_bo_reference(bo);
+		val_buf->reserved = false;
+		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
 	}
 
-	val_buf->new_sync_obj_arg = (void *)
-		((unsigned long) val_buf->new_sync_obj_arg | fence_flags);
-	sw_context->fence_flags |= fence_flags;
+	sw_context->fence_flags |= DRM_VMW_FENCE_FLAG_EXEC;
 
 	if (p_val_node)
 		*p_val_node = val_node;
@@ -103,86 +276,175 @@
 	return 0;
 }
 
+/**
+ * vmw_resources_reserve - Reserve all resources on the sw_context's
+ * resource list.
+ *
+ * @sw_context: Pointer to the software context.
+ *
+ * Note that since vmware's command submission currently is protected by
+ * the cmdbuf mutex, no fancy deadlock avoidance is required for resources,
+ * since only a single thread at once will attempt this.
+ */
+static int vmw_resources_reserve(struct vmw_sw_context *sw_context)
+{
+	struct vmw_resource_val_node *val;
+	int ret;
+
+	list_for_each_entry(val, &sw_context->resource_list, head) {
+		struct vmw_resource *res = val->res;
+
+		ret = vmw_resource_reserve(res, val->no_buffer_needed);
+		if (unlikely(ret != 0))
+			return ret;
+
+		if (res->backup) {
+			struct ttm_buffer_object *bo = &res->backup->base;
+
+			ret = vmw_bo_to_validate_list
+				(sw_context, bo, NULL);
+
+			if (unlikely(ret != 0))
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * vmw_resources_validate - Validate all resources on the sw_context's
+ * resource list.
+ *
+ * @sw_context: Pointer to the software context.
+ *
+ * Before this function is called, all resource backup buffers must have
+ * been validated.
+ */
+static int vmw_resources_validate(struct vmw_sw_context *sw_context)
+{
+	struct vmw_resource_val_node *val;
+	int ret;
+
+	list_for_each_entry(val, &sw_context->resource_list, head) {
+		struct vmw_resource *res = val->res;
+
+		ret = vmw_resource_validate(res);
+		if (unlikely(ret != 0)) {
+			if (ret != -ERESTARTSYS)
+				DRM_ERROR("Failed to validate resource.\n");
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * vmw_cmd_res_check - Check that a resource is present and if so, put it
+ * on the resource validate list unless it's already there.
+ *
+ * @dev_priv: Pointer to a device private structure.
+ * @sw_context: Pointer to the software context.
+ * @res_type: Resource type.
+ * @converter: User-space visisble type specific information.
+ * @id: Pointer to the location in the command buffer currently being
+ * parsed from where the user-space resource id handle is located.
+ */
+static int vmw_cmd_res_check(struct vmw_private *dev_priv,
+			     struct vmw_sw_context *sw_context,
+			     enum vmw_res_type res_type,
+			     const struct vmw_user_resource_conv *converter,
+			     uint32_t *id,
+			     struct vmw_resource_val_node **p_val)
+{
+	struct vmw_res_cache_entry *rcache =
+		&sw_context->res_cache[res_type];
+	struct vmw_resource *res;
+	struct vmw_resource_val_node *node;
+	int ret;
+
+	if (*id == SVGA3D_INVALID_ID)
+		return 0;
+
+	/*
+	 * Fastpath in case of repeated commands referencing the same
+	 * resource
+	 */
+
+	if (likely(rcache->valid && *id == rcache->handle)) {
+		const struct vmw_resource *res = rcache->res;
+
+		rcache->node->first_usage = false;
+		if (p_val)
+			*p_val = rcache->node;
+
+		return vmw_resource_relocation_add
+			(&sw_context->res_relocations, res,
+			 id - sw_context->buf_start);
+	}
+
+	ret = vmw_user_resource_lookup_handle(dev_priv,
+					      sw_context->tfile,
+					      *id,
+					      converter,
+					      &res);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could not find or use resource 0x%08x.\n",
+			  (unsigned) *id);
+		dump_stack();
+		return ret;
+	}
+
+	rcache->valid = true;
+	rcache->res = res;
+	rcache->handle = *id;
+
+	ret = vmw_resource_relocation_add(&sw_context->res_relocations,
+					  res,
+					  id - sw_context->buf_start);
+	if (unlikely(ret != 0))
+		goto out_no_reloc;
+
+	ret = vmw_resource_val_add(sw_context, res, &node);
+	if (unlikely(ret != 0))
+		goto out_no_reloc;
+
+	rcache->node = node;
+	if (p_val)
+		*p_val = node;
+	vmw_resource_unreference(&res);
+	return 0;
+
+out_no_reloc:
+	BUG_ON(sw_context->error_resource != NULL);
+	sw_context->error_resource = res;
+
+	return ret;
+}
+
+/**
+ * vmw_cmd_cid_check - Check a command header for valid context information.
+ *
+ * @dev_priv: Pointer to a device private structure.
+ * @sw_context: Pointer to the software context.
+ * @header: A command header with an embedded user-space context handle.
+ *
+ * Convenience function: Call vmw_cmd_res_check with the user-space context
+ * handle embedded in @header.
+ */
 static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     SVGA3dCmdHeader *header)
 {
-	struct vmw_resource *ctx;
-
 	struct vmw_cid_cmd {
 		SVGA3dCmdHeader header;
 		__le32 cid;
 	} *cmd;
-	int ret;
 
 	cmd = container_of(header, struct vmw_cid_cmd, header);
-	if (likely(sw_context->cid_valid && cmd->cid == sw_context->last_cid))
-		return 0;
-
-	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid,
-				&ctx);
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Could not find or use context %u\n",
-			  (unsigned) cmd->cid);
-		return ret;
-	}
-
-	sw_context->last_cid = cmd->cid;
-	sw_context->cid_valid = true;
-	sw_context->cur_ctx = ctx;
-	vmw_resource_to_validate_list(sw_context, &ctx);
-
-	return 0;
+	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				 user_context_converter, &cmd->cid, NULL);
 }
 
-static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
-			     struct vmw_sw_context *sw_context,
-			     uint32_t *sid)
-{
-	struct vmw_surface *srf;
-	int ret;
-	struct vmw_resource *res;
-
-	if (*sid == SVGA3D_INVALID_ID)
-		return 0;
-
-	if (likely((sw_context->sid_valid  &&
-		      *sid == sw_context->last_sid))) {
-		*sid = sw_context->sid_translation;
-		return 0;
-	}
-
-	ret = vmw_user_surface_lookup_handle(dev_priv,
-					     sw_context->tfile,
-					     *sid, &srf);
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Could ot find or use surface 0x%08x "
-			  "address 0x%08lx\n",
-			  (unsigned int) *sid,
-			  (unsigned long) sid);
-		return ret;
-	}
-
-	ret = vmw_surface_validate(dev_priv, srf);
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("Could not validate surface.\n");
-		vmw_surface_unreference(&srf);
-		return ret;
-	}
-
-	sw_context->last_sid = *sid;
-	sw_context->sid_valid = true;
-	sw_context->sid_translation = srf->res.id;
-	*sid = sw_context->sid_translation;
-
-	res = &srf->res;
-	vmw_resource_to_validate_list(sw_context, &res);
-
-	return 0;
-}
-
-
 static int vmw_cmd_set_render_target_check(struct vmw_private *dev_priv,
 					   struct vmw_sw_context *sw_context,
 					   SVGA3dCmdHeader *header)
@@ -198,7 +460,9 @@
 		return ret;
 
 	cmd = container_of(header, struct vmw_sid_cmd, header);
-	ret = vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.target.sid);
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				user_surface_converter,
+				&cmd->body.target.sid, NULL);
 	return ret;
 }
 
@@ -213,10 +477,14 @@
 	int ret;
 
 	cmd = container_of(header, struct vmw_sid_cmd, header);
-	ret = vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.src.sid);
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				user_surface_converter,
+				&cmd->body.src.sid, NULL);
 	if (unlikely(ret != 0))
 		return ret;
-	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.dest.sid);
+	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				 user_surface_converter,
+				 &cmd->body.dest.sid, NULL);
 }
 
 static int vmw_cmd_stretch_blt_check(struct vmw_private *dev_priv,
@@ -230,10 +498,14 @@
 	int ret;
 
 	cmd = container_of(header, struct vmw_sid_cmd, header);
-	ret = vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.src.sid);
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				user_surface_converter,
+				&cmd->body.src.sid, NULL);
 	if (unlikely(ret != 0))
 		return ret;
-	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.dest.sid);
+	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				 user_surface_converter,
+				 &cmd->body.dest.sid, NULL);
 }
 
 static int vmw_cmd_blt_surf_screen_check(struct vmw_private *dev_priv,
@@ -252,7 +524,9 @@
 		return -EPERM;
 	}
 
-	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.srcImage.sid);
+	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				 user_surface_converter,
+				 &cmd->body.srcImage.sid, NULL);
 }
 
 static int vmw_cmd_present_check(struct vmw_private *dev_priv,
@@ -272,14 +546,15 @@
 		return -EPERM;
 	}
 
-	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
+	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				 user_surface_converter, &cmd->body.sid,
+				 NULL);
 }
 
 /**
  * vmw_query_bo_switch_prepare - Prepare to switch pinned buffer for queries.
  *
  * @dev_priv: The device private structure.
- * @cid: The hardware context for the next query.
  * @new_query_bo: The new buffer holding query results.
  * @sw_context: The software context used for this command submission.
  *
@@ -287,18 +562,18 @@
  * query results, and if another buffer currently is pinned for query
  * results. If so, the function prepares the state of @sw_context for
  * switching pinned buffers after successful submission of the current
- * command batch. It also checks whether we're using a new query context.
- * In that case, it makes sure we emit a query barrier for the old
- * context before the current query buffer is fenced.
+ * command batch.
  */
 static int vmw_query_bo_switch_prepare(struct vmw_private *dev_priv,
-				       uint32_t cid,
 				       struct ttm_buffer_object *new_query_bo,
 				       struct vmw_sw_context *sw_context)
 {
+	struct vmw_res_cache_entry *ctx_entry =
+		&sw_context->res_cache[vmw_res_context];
 	int ret;
-	bool add_cid = false;
-	uint32_t cid_to_add;
+
+	BUG_ON(!ctx_entry->valid);
+	sw_context->last_query_ctx = ctx_entry->res;
 
 	if (unlikely(new_query_bo != sw_context->cur_query_bo)) {
 
@@ -308,12 +583,9 @@
 		}
 
 		if (unlikely(sw_context->cur_query_bo != NULL)) {
-			BUG_ON(!sw_context->query_cid_valid);
-			add_cid = true;
-			cid_to_add = sw_context->cur_query_cid;
+			sw_context->needs_post_query_barrier = true;
 			ret = vmw_bo_to_validate_list(sw_context,
 						      sw_context->cur_query_bo,
-						      DRM_VMW_FENCE_FLAG_EXEC,
 						      NULL);
 			if (unlikely(ret != 0))
 				return ret;
@@ -322,35 +594,12 @@
 
 		ret = vmw_bo_to_validate_list(sw_context,
 					      dev_priv->dummy_query_bo,
-					      DRM_VMW_FENCE_FLAG_EXEC,
 					      NULL);
 		if (unlikely(ret != 0))
 			return ret;
 
 	}
 
-	if (unlikely(cid != sw_context->cur_query_cid &&
-		     sw_context->query_cid_valid)) {
-		add_cid = true;
-		cid_to_add = sw_context->cur_query_cid;
-	}
-
-	sw_context->cur_query_cid = cid;
-	sw_context->query_cid_valid = true;
-
-	if (add_cid) {
-		struct vmw_resource *ctx = sw_context->cur_ctx;
-
-		if (list_empty(&ctx->query_head))
-			list_add_tail(&ctx->query_head,
-				      &sw_context->query_list);
-		ret = vmw_bo_to_validate_list(sw_context,
-					      dev_priv->dummy_query_bo,
-					      DRM_VMW_FENCE_FLAG_EXEC,
-					      NULL);
-		if (unlikely(ret != 0))
-			return ret;
-	}
 	return 0;
 }
 
@@ -362,10 +611,9 @@
  * @sw_context: The software context used for this command submission batch.
  *
  * This function will check if we're switching query buffers, and will then,
- * if no other query waits are issued this command submission batch,
  * issue a dummy occlusion query wait used as a query barrier. When the fence
  * object following that query wait has signaled, we are sure that all
- * preseding queries have finished, and the old query buffer can be unpinned.
+ * preceding queries have finished, and the old query buffer can be unpinned.
  * However, since both the new query buffer and the old one are fenced with
  * that fence, we can do an asynchronus unpin now, and be sure that the
  * old query buffer won't be moved until the fence has signaled.
@@ -376,20 +624,19 @@
 static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
 				     struct vmw_sw_context *sw_context)
 {
-
-	struct vmw_resource *ctx, *next_ctx;
-	int ret;
-
 	/*
 	 * The validate list should still hold references to all
 	 * contexts here.
 	 */
 
-	list_for_each_entry_safe(ctx, next_ctx, &sw_context->query_list,
-				 query_head) {
-		list_del_init(&ctx->query_head);
+	if (sw_context->needs_post_query_barrier) {
+		struct vmw_res_cache_entry *ctx_entry =
+			&sw_context->res_cache[vmw_res_context];
+		struct vmw_resource *ctx;
+		int ret;
 
-		BUG_ON(list_empty(&ctx->validate_head));
+		BUG_ON(!ctx_entry->valid);
+		ctx = ctx_entry->res;
 
 		ret = vmw_fifo_emit_dummy_query(dev_priv, ctx->id);
 
@@ -403,40 +650,46 @@
 			ttm_bo_unref(&dev_priv->pinned_bo);
 		}
 
-		vmw_bo_pin(sw_context->cur_query_bo, true);
+		if (!sw_context->needs_post_query_barrier) {
+			vmw_bo_pin(sw_context->cur_query_bo, true);
 
-		/*
-		 * We pin also the dummy_query_bo buffer so that we
-		 * don't need to validate it when emitting
-		 * dummy queries in context destroy paths.
-		 */
+			/*
+			 * We pin also the dummy_query_bo buffer so that we
+			 * don't need to validate it when emitting
+			 * dummy queries in context destroy paths.
+			 */
 
-		vmw_bo_pin(dev_priv->dummy_query_bo, true);
-		dev_priv->dummy_query_bo_pinned = true;
+			vmw_bo_pin(dev_priv->dummy_query_bo, true);
+			dev_priv->dummy_query_bo_pinned = true;
 
-		dev_priv->query_cid = sw_context->cur_query_cid;
-		dev_priv->pinned_bo =
-			ttm_bo_reference(sw_context->cur_query_bo);
+			BUG_ON(sw_context->last_query_ctx == NULL);
+			dev_priv->query_cid = sw_context->last_query_ctx->id;
+			dev_priv->query_cid_valid = true;
+			dev_priv->pinned_bo =
+				ttm_bo_reference(sw_context->cur_query_bo);
+		}
 	}
 }
 
 /**
- * vmw_query_switch_backoff - clear query barrier list
- * @sw_context: The sw context used for this submission batch.
+ * vmw_translate_guest_pointer - Prepare to translate a user-space buffer
+ * handle to a valid SVGAGuestPtr
  *
- * This function is used as part of an error path, where a previously
- * set up list of query barriers needs to be cleared.
+ * @dev_priv: Pointer to a device private structure.
+ * @sw_context: The software context used for this command batch validation.
+ * @ptr: Pointer to the user-space handle to be translated.
+ * @vmw_bo_p: Points to a location that, on successful return will carry
+ * a reference-counted pointer to the DMA buffer identified by the
+ * user-space handle in @id.
  *
+ * This function saves information needed to translate a user-space buffer
+ * handle to a valid SVGAGuestPtr. The translation does not take place
+ * immediately, but during a call to vmw_apply_relocations().
+ * This function builds a relocation list and a list of buffers to validate.
+ * The former needs to be freed using either vmw_apply_relocations() or
+ * vmw_free_relocations(). The latter needs to be freed using
+ * vmw_clear_validations.
  */
-static void vmw_query_switch_backoff(struct vmw_sw_context *sw_context)
-{
-	struct list_head *list, *next;
-
-	list_for_each_safe(list, next, &sw_context->query_list) {
-		list_del_init(list);
-	}
-}
-
 static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 				   struct vmw_sw_context *sw_context,
 				   SVGAGuestPtr *ptr,
@@ -465,8 +718,7 @@
 	reloc = &sw_context->relocs[sw_context->cur_reloc++];
 	reloc->location = ptr;
 
-	ret = vmw_bo_to_validate_list(sw_context, bo, DRM_VMW_FENCE_FLAG_EXEC,
-				      &reloc->index);
+	ret = vmw_bo_to_validate_list(sw_context, bo, &reloc->index);
 	if (unlikely(ret != 0))
 		goto out_no_reloc;
 
@@ -479,6 +731,37 @@
 	return ret;
 }
 
+/**
+ * vmw_cmd_begin_query - validate a  SVGA_3D_CMD_BEGIN_QUERY command.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context used for this command submission.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_begin_query(struct vmw_private *dev_priv,
+			       struct vmw_sw_context *sw_context,
+			       SVGA3dCmdHeader *header)
+{
+	struct vmw_begin_query_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdBeginQuery q;
+	} *cmd;
+
+	cmd = container_of(header, struct vmw_begin_query_cmd,
+			   header);
+
+	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				 user_context_converter, &cmd->q.cid,
+				 NULL);
+}
+
+/**
+ * vmw_cmd_end_query - validate a  SVGA_3D_CMD_END_QUERY command.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context used for this command submission.
+ * @header: Pointer to the command header in the command stream.
+ */
 static int vmw_cmd_end_query(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     SVGA3dCmdHeader *header)
@@ -501,13 +784,19 @@
 	if (unlikely(ret != 0))
 		return ret;
 
-	ret = vmw_query_bo_switch_prepare(dev_priv, cmd->q.cid,
-					  &vmw_bo->base, sw_context);
+	ret = vmw_query_bo_switch_prepare(dev_priv, &vmw_bo->base, sw_context);
 
 	vmw_dmabuf_unreference(&vmw_bo);
 	return ret;
 }
 
+/*
+ * vmw_cmd_wait_query - validate a  SVGA_3D_CMD_WAIT_QUERY command.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context used for this command submission.
+ * @header: Pointer to the command header in the command stream.
+ */
 static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 			      struct vmw_sw_context *sw_context,
 			      SVGA3dCmdHeader *header)
@@ -518,7 +807,6 @@
 		SVGA3dCmdWaitForQuery q;
 	} *cmd;
 	int ret;
-	struct vmw_resource *ctx;
 
 	cmd = container_of(header, struct vmw_query_cmd, header);
 	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
@@ -532,16 +820,6 @@
 		return ret;
 
 	vmw_dmabuf_unreference(&vmw_bo);
-
-	/*
-	 * This wait will act as a barrier for previous waits for this
-	 * context.
-	 */
-
-	ctx = sw_context->cur_ctx;
-	if (!list_empty(&ctx->query_head))
-		list_del_init(&ctx->query_head);
-
 	return 0;
 }
 
@@ -550,14 +828,12 @@
 		       SVGA3dCmdHeader *header)
 {
 	struct vmw_dma_buffer *vmw_bo = NULL;
-	struct ttm_buffer_object *bo;
 	struct vmw_surface *srf = NULL;
 	struct vmw_dma_cmd {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdSurfaceDMA dma;
 	} *cmd;
 	int ret;
-	struct vmw_resource *res;
 
 	cmd = container_of(header, struct vmw_dma_cmd, header);
 	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
@@ -566,37 +842,20 @@
 	if (unlikely(ret != 0))
 		return ret;
 
-	bo = &vmw_bo->base;
-	ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
-					     cmd->dma.host.sid, &srf);
-	if (ret) {
-		DRM_ERROR("could not find surface\n");
-		goto out_no_reloc;
-	}
-
-	ret = vmw_surface_validate(dev_priv, srf);
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+				user_surface_converter, &cmd->dma.host.sid,
+				NULL);
 	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("Culd not validate surface.\n");
-		goto out_no_validate;
+		if (unlikely(ret != -ERESTARTSYS))
+			DRM_ERROR("could not find surface for DMA.\n");
+		goto out_no_surface;
 	}
 
-	/*
-	 * Patch command stream with device SID.
-	 */
-	cmd->dma.host.sid = srf->res.id;
-	vmw_kms_cursor_snoop(srf, sw_context->tfile, bo, header);
+	srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res);
 
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_kms_cursor_snoop(srf, sw_context->tfile, &vmw_bo->base, header);
 
-	res = &srf->res;
-	vmw_resource_to_validate_list(sw_context, &res);
-
-	return 0;
-
-out_no_validate:
-	vmw_surface_unreference(&srf);
-out_no_reloc:
+out_no_surface:
 	vmw_dmabuf_unreference(&vmw_bo);
 	return ret;
 }
@@ -629,8 +888,9 @@
 	}
 
 	for (i = 0; i < cmd->body.numVertexDecls; ++i, ++decl) {
-		ret = vmw_cmd_sid_check(dev_priv, sw_context,
-					&decl->array.surfaceId);
+		ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+					user_surface_converter,
+					&decl->array.surfaceId, NULL);
 		if (unlikely(ret != 0))
 			return ret;
 	}
@@ -644,8 +904,9 @@
 
 	range = (SVGA3dPrimitiveRange *) decl;
 	for (i = 0; i < cmd->body.numRanges; ++i, ++range) {
-		ret = vmw_cmd_sid_check(dev_priv, sw_context,
-					&range->indexArray.surfaceId);
+		ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+					user_surface_converter,
+					&range->indexArray.surfaceId, NULL);
 		if (unlikely(ret != 0))
 			return ret;
 	}
@@ -676,8 +937,9 @@
 		if (likely(cur_state->name != SVGA3D_TS_BIND_TEXTURE))
 			continue;
 
-		ret = vmw_cmd_sid_check(dev_priv, sw_context,
-					&cur_state->value);
+		ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+					user_surface_converter,
+					&cur_state->value, NULL);
 		if (unlikely(ret != 0))
 			return ret;
 	}
@@ -708,6 +970,34 @@
 	return ret;
 }
 
+/**
+ * vmw_cmd_set_shader - Validate an SVGA_3D_CMD_SET_SHADER
+ * command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
+			      struct vmw_sw_context *sw_context,
+			      SVGA3dCmdHeader *header)
+{
+	struct vmw_set_shader_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSetShader body;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_set_shader_cmd,
+			   header);
+
+	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+	if (unlikely(ret != 0))
+		return ret;
+
+	return 0;
+}
+
 static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv,
 				struct vmw_sw_context *sw_context,
 				void *buf, uint32_t *size)
@@ -781,16 +1071,20 @@
 	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT, &vmw_cmd_present_check),
 	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_cid_check),
 	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_cid_check),
-	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_set_shader),
 	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_cid_check),
 	VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw),
 	VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check),
-	VMW_CMD_DEF(SVGA_3D_CMD_BEGIN_QUERY, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_BEGIN_QUERY, &vmw_cmd_begin_query),
 	VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_end_query),
 	VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_wait_query),
 	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT_READBACK, &vmw_cmd_ok),
 	VMW_CMD_DEF(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN,
-		    &vmw_cmd_blt_surf_screen_check)
+		    &vmw_cmd_blt_surf_screen_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_SURFACE_DEFINE_V2, &vmw_cmd_invalid),
+	VMW_CMD_DEF(SVGA_3D_CMD_GENERATE_MIPMAPS, &vmw_cmd_invalid),
+	VMW_CMD_DEF(SVGA_3D_CMD_ACTIVATE_SURFACE, &vmw_cmd_invalid),
+	VMW_CMD_DEF(SVGA_3D_CMD_DEACTIVATE_SURFACE, &vmw_cmd_invalid),
 };
 
 static int vmw_cmd_check(struct vmw_private *dev_priv,
@@ -837,6 +1131,8 @@
 	int32_t cur_size = size;
 	int ret;
 
+	sw_context->buf_start = buf;
+
 	while (cur_size > 0) {
 		size = cur_size;
 		ret = vmw_cmd_check(dev_priv, sw_context, buf, &size);
@@ -868,43 +1164,63 @@
 
 	for (i = 0; i < sw_context->cur_reloc; ++i) {
 		reloc = &sw_context->relocs[i];
-		validate = &sw_context->val_bufs[reloc->index];
+		validate = &sw_context->val_bufs[reloc->index].base;
 		bo = validate->bo;
-		if (bo->mem.mem_type == TTM_PL_VRAM) {
+		switch (bo->mem.mem_type) {
+		case TTM_PL_VRAM:
 			reloc->location->offset += bo->offset;
 			reloc->location->gmrId = SVGA_GMR_FRAMEBUFFER;
-		} else
+			break;
+		case VMW_PL_GMR:
 			reloc->location->gmrId = bo->mem.start;
+			break;
+		default:
+			BUG();
+		}
 	}
 	vmw_free_relocations(sw_context);
 }
 
+/**
+ * vmw_resource_list_unrefererence - Free up a resource list and unreference
+ * all resources referenced by it.
+ *
+ * @list: The resource list.
+ */
+static void vmw_resource_list_unreference(struct list_head *list)
+{
+	struct vmw_resource_val_node *val, *val_next;
+
+	/*
+	 * Drop references to resources held during command submission.
+	 */
+
+	list_for_each_entry_safe(val, val_next, list, head) {
+		list_del_init(&val->head);
+		vmw_resource_unreference(&val->res);
+		kfree(val);
+	}
+}
+
 static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 {
-	struct ttm_validate_buffer *entry, *next;
-	struct vmw_resource *res, *res_next;
+	struct vmw_validate_buffer *entry, *next;
+	struct vmw_resource_val_node *val;
 
 	/*
 	 * Drop references to DMA buffers held during command submission.
 	 */
 	list_for_each_entry_safe(entry, next, &sw_context->validate_nodes,
-				 head) {
-		list_del(&entry->head);
-		vmw_dmabuf_validate_clear(entry->bo);
-		ttm_bo_unref(&entry->bo);
+				 base.head) {
+		list_del(&entry->base.head);
+		ttm_bo_unref(&entry->base.bo);
+		(void) drm_ht_remove_item(&sw_context->res_ht, &entry->hash);
 		sw_context->cur_val_buf--;
 	}
 	BUG_ON(sw_context->cur_val_buf != 0);
 
-	/*
-	 * Drop references to resources held during command submission.
-	 */
-	vmw_resource_unreserve(&sw_context->resource_list);
-	list_for_each_entry_safe(res, res_next, &sw_context->resource_list,
-				 validate_head) {
-		list_del_init(&res->validate_head);
-		vmw_resource_unreference(&res);
-	}
+	list_for_each_entry(val, &sw_context->resource_list, head)
+		(void) drm_ht_remove_item(&sw_context->res_ht, &val->hash);
 }
 
 static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
@@ -929,7 +1245,7 @@
 	 * used as a GMR, this will return -ENOMEM.
 	 */
 
-	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, true, false, false);
+	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, true, false);
 	if (likely(ret == 0 || ret == -ERESTARTSYS))
 		return ret;
 
@@ -939,7 +1255,7 @@
 	 */
 
 	DRM_INFO("Falling through to VRAM.\n");
-	ret = ttm_bo_validate(bo, &vmw_vram_placement, true, false, false);
+	ret = ttm_bo_validate(bo, &vmw_vram_placement, true, false);
 	return ret;
 }
 
@@ -947,11 +1263,11 @@
 static int vmw_validate_buffers(struct vmw_private *dev_priv,
 				struct vmw_sw_context *sw_context)
 {
-	struct ttm_validate_buffer *entry;
+	struct vmw_validate_buffer *entry;
 	int ret;
 
-	list_for_each_entry(entry, &sw_context->validate_nodes, head) {
-		ret = vmw_validate_single_buffer(dev_priv, entry->bo);
+	list_for_each_entry(entry, &sw_context->validate_nodes, base.head) {
+		ret = vmw_validate_single_buffer(dev_priv, entry->base.bo);
 		if (unlikely(ret != 0))
 			return ret;
 	}
@@ -1114,6 +1430,8 @@
 {
 	struct vmw_sw_context *sw_context = &dev_priv->ctx;
 	struct vmw_fence_obj *fence = NULL;
+	struct vmw_resource *error_resource;
+	struct list_head resource_list;
 	uint32_t handle;
 	void *cmd;
 	int ret;
@@ -1143,24 +1461,33 @@
 		sw_context->kernel = true;
 
 	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
-	sw_context->cid_valid = false;
-	sw_context->sid_valid = false;
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
 	sw_context->fence_flags = 0;
-	INIT_LIST_HEAD(&sw_context->query_list);
 	INIT_LIST_HEAD(&sw_context->resource_list);
 	sw_context->cur_query_bo = dev_priv->pinned_bo;
-	sw_context->cur_query_cid = dev_priv->query_cid;
-	sw_context->query_cid_valid = (dev_priv->pinned_bo != NULL);
-
+	sw_context->last_query_ctx = NULL;
+	sw_context->needs_post_query_barrier = false;
+	memset(sw_context->res_cache, 0, sizeof(sw_context->res_cache));
 	INIT_LIST_HEAD(&sw_context->validate_nodes);
+	INIT_LIST_HEAD(&sw_context->res_relocations);
+	if (!sw_context->res_ht_initialized) {
+		ret = drm_ht_create(&sw_context->res_ht, VMW_RES_HT_ORDER);
+		if (unlikely(ret != 0))
+			goto out_unlock;
+		sw_context->res_ht_initialized = true;
+	}
 
+	INIT_LIST_HEAD(&resource_list);
 	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
 				command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
 
+	ret = vmw_resources_reserve(sw_context);
+	if (unlikely(ret != 0))
+		goto out_err;
+
 	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes);
 	if (unlikely(ret != 0))
 		goto out_err;
@@ -1169,24 +1496,31 @@
 	if (unlikely(ret != 0))
 		goto out_err;
 
-	vmw_apply_relocations(sw_context);
+	ret = vmw_resources_validate(sw_context);
+	if (unlikely(ret != 0))
+		goto out_err;
 
 	if (throttle_us) {
 		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.marker_queue,
 				   throttle_us);
 
 		if (unlikely(ret != 0))
-			goto out_throttle;
+			goto out_err;
 	}
 
 	cmd = vmw_fifo_reserve(dev_priv, command_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving fifo space for commands.\n");
 		ret = -ENOMEM;
-		goto out_throttle;
+		goto out_err;
 	}
 
+	vmw_apply_relocations(sw_context);
 	memcpy(cmd, kernel_commands, command_size);
+
+	vmw_resource_relocations_apply(cmd, &sw_context->res_relocations);
+	vmw_resource_relocations_free(&sw_context->res_relocations);
+
 	vmw_fifo_commit(dev_priv, command_size);
 
 	vmw_query_bo_switch_commit(dev_priv, sw_context);
@@ -1202,9 +1536,14 @@
 	if (ret != 0)
 		DRM_ERROR("Fence submission error. Syncing.\n");
 
+	vmw_resource_list_unreserve(&sw_context->resource_list, false);
 	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
 				    (void *) fence);
 
+	if (unlikely(dev_priv->pinned_bo != NULL &&
+		     !dev_priv->query_cid_valid))
+		__vmw_execbuf_release_pinned_bo(dev_priv, fence);
+
 	vmw_clear_validations(sw_context);
 	vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
 				    user_fence_rep, fence, handle);
@@ -1217,17 +1556,40 @@
 		vmw_fence_obj_unreference(&fence);
 	}
 
+	list_splice_init(&sw_context->resource_list, &resource_list);
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	/*
+	 * Unreference resources outside of the cmdbuf_mutex to
+	 * avoid deadlocks in resource destruction paths.
+	 */
+	vmw_resource_list_unreference(&resource_list);
+
 	return 0;
 
 out_err:
+	vmw_resource_relocations_free(&sw_context->res_relocations);
 	vmw_free_relocations(sw_context);
-out_throttle:
-	vmw_query_switch_backoff(sw_context);
 	ttm_eu_backoff_reservation(&sw_context->validate_nodes);
+	vmw_resource_list_unreserve(&sw_context->resource_list, true);
 	vmw_clear_validations(sw_context);
+	if (unlikely(dev_priv->pinned_bo != NULL &&
+		     !dev_priv->query_cid_valid))
+		__vmw_execbuf_release_pinned_bo(dev_priv, NULL);
 out_unlock:
+	list_splice_init(&sw_context->resource_list, &resource_list);
+	error_resource = sw_context->error_resource;
+	sw_context->error_resource = NULL;
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	/*
+	 * Unreference resources outside of the cmdbuf_mutex to
+	 * avoid deadlocks in resource destruction paths.
+	 */
+	vmw_resource_list_unreference(&resource_list);
+	if (unlikely(error_resource != NULL))
+		vmw_resource_unreference(&error_resource);
+
 	return ret;
 }
 
@@ -1252,13 +1614,100 @@
 
 
 /**
+ * __vmw_execbuf_release_pinned_bo - Flush queries and unpin the pinned
+ * query bo.
+ *
+ * @dev_priv: The device private structure.
+ * @fence: If non-NULL should point to a struct vmw_fence_obj issued
+ * _after_ a query barrier that flushes all queries touching the current
+ * buffer pointed to by @dev_priv->pinned_bo
+ *
+ * This function should be used to unpin the pinned query bo, or
+ * as a query barrier when we need to make sure that all queries have
+ * finished before the next fifo command. (For example on hardware
+ * context destructions where the hardware may otherwise leak unfinished
+ * queries).
+ *
+ * This function does not return any failure codes, but make attempts
+ * to do safe unpinning in case of errors.
+ *
+ * The function will synchronize on the previous query barrier, and will
+ * thus not finish until that barrier has executed.
+ *
+ * the @dev_priv->cmdbuf_mutex needs to be held by the current thread
+ * before calling this function.
+ */
+void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+				     struct vmw_fence_obj *fence)
+{
+	int ret = 0;
+	struct list_head validate_list;
+	struct ttm_validate_buffer pinned_val, query_val;
+	struct vmw_fence_obj *lfence = NULL;
+
+	if (dev_priv->pinned_bo == NULL)
+		goto out_unlock;
+
+	INIT_LIST_HEAD(&validate_list);
+
+	pinned_val.bo = ttm_bo_reference(dev_priv->pinned_bo);
+	list_add_tail(&pinned_val.head, &validate_list);
+
+	query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
+	list_add_tail(&query_val.head, &validate_list);
+
+	do {
+		ret = ttm_eu_reserve_buffers(&validate_list);
+	} while (ret == -ERESTARTSYS);
+
+	if (unlikely(ret != 0)) {
+		vmw_execbuf_unpin_panic(dev_priv);
+		goto out_no_reserve;
+	}
+
+	if (dev_priv->query_cid_valid) {
+		BUG_ON(fence != NULL);
+		ret = vmw_fifo_emit_dummy_query(dev_priv, dev_priv->query_cid);
+		if (unlikely(ret != 0)) {
+			vmw_execbuf_unpin_panic(dev_priv);
+			goto out_no_emit;
+		}
+		dev_priv->query_cid_valid = false;
+	}
+
+	vmw_bo_pin(dev_priv->pinned_bo, false);
+	vmw_bo_pin(dev_priv->dummy_query_bo, false);
+	dev_priv->dummy_query_bo_pinned = false;
+
+	if (fence == NULL) {
+		(void) vmw_execbuf_fence_commands(NULL, dev_priv, &lfence,
+						  NULL);
+		fence = lfence;
+	}
+	ttm_eu_fence_buffer_objects(&validate_list, (void *) fence);
+	if (lfence != NULL)
+		vmw_fence_obj_unreference(&lfence);
+
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+
+out_unlock:
+	return;
+
+out_no_emit:
+	ttm_eu_backoff_reservation(&validate_list);
+out_no_reserve:
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+}
+
+/**
  * vmw_execbuf_release_pinned_bo - Flush queries and unpin the pinned
  * query bo.
  *
  * @dev_priv: The device private structure.
- * @only_on_cid_match: Only flush and unpin if the current active query cid
- * matches @cid.
- * @cid: Optional context id to match.
  *
  * This function should be used to unpin the pinned query bo, or
  * as a query barrier when we need to make sure that all queries have
@@ -1272,69 +1721,11 @@
  * The function will synchronize on the previous query barrier, and will
  * thus not finish until that barrier has executed.
  */
-void vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
-				   bool only_on_cid_match, uint32_t cid)
+void vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv)
 {
-	int ret = 0;
-	struct list_head validate_list;
-	struct ttm_validate_buffer pinned_val, query_val;
-	struct vmw_fence_obj *fence;
-
 	mutex_lock(&dev_priv->cmdbuf_mutex);
-
-	if (dev_priv->pinned_bo == NULL)
-		goto out_unlock;
-
-	if (only_on_cid_match && cid != dev_priv->query_cid)
-		goto out_unlock;
-
-	INIT_LIST_HEAD(&validate_list);
-
-	pinned_val.new_sync_obj_arg = (void *)(unsigned long)
-		DRM_VMW_FENCE_FLAG_EXEC;
-	pinned_val.bo = ttm_bo_reference(dev_priv->pinned_bo);
-	list_add_tail(&pinned_val.head, &validate_list);
-
-	query_val.new_sync_obj_arg = pinned_val.new_sync_obj_arg;
-	query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
-	list_add_tail(&query_val.head, &validate_list);
-
-	do {
-		ret = ttm_eu_reserve_buffers(&validate_list);
-	} while (ret == -ERESTARTSYS);
-
-	if (unlikely(ret != 0)) {
-		vmw_execbuf_unpin_panic(dev_priv);
-		goto out_no_reserve;
-	}
-
-	ret = vmw_fifo_emit_dummy_query(dev_priv, dev_priv->query_cid);
-	if (unlikely(ret != 0)) {
-		vmw_execbuf_unpin_panic(dev_priv);
-		goto out_no_emit;
-	}
-
-	vmw_bo_pin(dev_priv->pinned_bo, false);
-	vmw_bo_pin(dev_priv->dummy_query_bo, false);
-	dev_priv->dummy_query_bo_pinned = false;
-
-	(void) vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
-	ttm_eu_fence_buffer_objects(&validate_list, (void *) fence);
-
-	ttm_bo_unref(&query_val.bo);
-	ttm_bo_unref(&pinned_val.bo);
-	ttm_bo_unref(&dev_priv->pinned_bo);
-
-out_unlock:
-	mutex_unlock(&dev_priv->cmdbuf_mutex);
-	return;
-
-out_no_emit:
-	ttm_eu_backoff_reservation(&validate_list);
-out_no_reserve:
-	ttm_bo_unref(&query_val.bo);
-	ttm_bo_unref(&pinned_val.bo);
-	ttm_bo_unref(&dev_priv->pinned_bo);
+	if (dev_priv->query_cid_valid)
+		__vmw_execbuf_release_pinned_bo(dev_priv, NULL);
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 }
 

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index bc187fa..c62d20e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c

@@ -537,7 +537,7 @@
 		container_of(fence, struct vmw_user_fence, fence);
 	struct vmw_fence_manager *fman = fence->fman;
 
-	kfree(ufence);
+	ttm_base_object_kfree(ufence, base);
 	/*
 	 * Free kernel space accounting.
 	 */

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 7290811..d9fbbe1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c

@@ -133,6 +133,7 @@
 	struct drm_vmw_rect *clips = NULL;
 	struct drm_mode_object *obj;
 	struct vmw_framebuffer *vfb;
+	struct vmw_resource *res;
 	uint32_t num_clips;
 	int ret;
 
@@ -180,11 +181,13 @@
 	if (unlikely(ret != 0))
 		goto out_no_ttm_lock;
 
-	ret = vmw_user_surface_lookup_handle(dev_priv, tfile, arg->sid,
-					     &surface);
+	ret = vmw_user_resource_lookup_handle(dev_priv, tfile, arg->sid,
+					      user_surface_converter,
+					      &res);
 	if (ret)
 		goto out_no_surface;
 
+	surface = vmw_res_to_srf(res);
 	ret = vmw_kms_present(dev_priv, file_priv,
 			      vfb, surface, arg->sid,
 			      arg->dest_x, arg->dest_y,

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 070fb23..79f7e8e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c

@@ -373,7 +373,7 @@
 
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.dirty_info_property,
 				      1);
 

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index cb55b7b..87e39f6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c

@@ -35,6 +35,7 @@
 #include "svga_escape.h"
 
 #define VMW_MAX_NUM_STREAMS 1
+#define VMW_OVERLAY_CAP_MASK (SVGA_FIFO_CAP_VIDEO | SVGA_FIFO_CAP_ESCAPE)
 
 struct vmw_stream {
 	struct vmw_dma_buffer *buf;
@@ -449,6 +450,14 @@
 	return 0;
 }
 
+
+static bool vmw_overlay_available(const struct vmw_private *dev_priv)
+{
+	return (dev_priv->overlay_priv != NULL && 
+		((dev_priv->fifo.capabilities & VMW_OVERLAY_CAP_MASK) ==
+		 VMW_OVERLAY_CAP_MASK));
+}
+
 int vmw_overlay_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv)
 {
@@ -461,7 +470,7 @@
 	struct vmw_resource *res;
 	int ret;
 
-	if (!overlay)
+	if (!vmw_overlay_available(dev_priv))
 		return -ENOSYS;
 
 	ret = vmw_user_stream_lookup(dev_priv, tfile, &arg->stream_id, &res);
@@ -492,7 +501,7 @@
 
 int vmw_overlay_num_overlays(struct vmw_private *dev_priv)
 {
-	if (!dev_priv->overlay_priv)
+	if (!vmw_overlay_available(dev_priv))
 		return 0;
 
 	return VMW_MAX_NUM_STREAMS;
@@ -503,7 +512,7 @@
 	struct vmw_overlay *overlay = dev_priv->overlay_priv;
 	int i, k;
 
-	if (!overlay)
+	if (!vmw_overlay_available(dev_priv))
 		return 0;
 
 	mutex_lock(&overlay->mutex);
@@ -569,12 +578,6 @@
 	if (dev_priv->overlay_priv)
 		return -EINVAL;
 
-	if (!(dev_priv->fifo.capabilities & SVGA_FIFO_CAP_VIDEO) &&
-	     (dev_priv->fifo.capabilities & SVGA_FIFO_CAP_ESCAPE)) {
-		DRM_INFO("hardware doesn't support overlays\n");
-		return -ENOSYS;
-	}
-
 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
 	if (!overlay)
 		return -ENOMEM;

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index da3c6b5..e01a17b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c

@@ -30,17 +30,7 @@
 #include <drm/ttm/ttm_object.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/drmP.h>
-
-struct vmw_user_context {
-	struct ttm_base_object base;
-	struct vmw_resource res;
-};
-
-struct vmw_user_surface {
-	struct ttm_base_object base;
-	struct vmw_surface srf;
-	uint32_t size;
-};
+#include "vmwgfx_resource_priv.h"
 
 struct vmw_user_dma_buffer {
 	struct ttm_base_object base;
@@ -62,17 +52,21 @@
 	struct vmw_stream stream;
 };
 
-struct vmw_surface_offset {
-	uint32_t face;
-	uint32_t mip;
-	uint32_t bo_offset;
-};
 
-
-static uint64_t vmw_user_context_size;
-static uint64_t vmw_user_surface_size;
 static uint64_t vmw_user_stream_size;
 
+static const struct vmw_res_func vmw_stream_func = {
+	.res_type = vmw_res_stream,
+	.needs_backup = false,
+	.may_evict = false,
+	.type_name = "video streams",
+	.backup_placement = NULL,
+	.create = NULL,
+	.destroy = NULL,
+	.bind = NULL,
+	.unbind = NULL
+};
+
 static inline struct vmw_dma_buffer *
 vmw_dma_buffer(struct ttm_buffer_object *bo)
 {
@@ -100,13 +94,14 @@
  *
  * Release the resource id to the resource id manager and set it to -1
  */
-static void vmw_resource_release_id(struct vmw_resource *res)
+void vmw_resource_release_id(struct vmw_resource *res)
 {
 	struct vmw_private *dev_priv = res->dev_priv;
+	struct idr *idr = &dev_priv->res_idr[res->func->res_type];
 
 	write_lock(&dev_priv->resource_lock);
 	if (res->id != -1)
-		idr_remove(res->idr, res->id);
+		idr_remove(idr, res->id);
 	res->id = -1;
 	write_unlock(&dev_priv->resource_lock);
 }
@@ -116,17 +111,33 @@
 	struct vmw_resource *res =
 	    container_of(kref, struct vmw_resource, kref);
 	struct vmw_private *dev_priv = res->dev_priv;
-	int id = res->id;
-	struct idr *idr = res->idr;
+	int id;
+	struct idr *idr = &dev_priv->res_idr[res->func->res_type];
 
 	res->avail = false;
-	if (res->remove_from_lists != NULL)
-		res->remove_from_lists(res);
+	list_del_init(&res->lru_head);
 	write_unlock(&dev_priv->resource_lock);
+	if (res->backup) {
+		struct ttm_buffer_object *bo = &res->backup->base;
+
+		ttm_bo_reserve(bo, false, false, false, 0);
+		if (!list_empty(&res->mob_head) &&
+		    res->func->unbind != NULL) {
+			struct ttm_validate_buffer val_buf;
+
+			val_buf.bo = bo;
+			res->func->unbind(res, false, &val_buf);
+		}
+		res->backup_dirty = false;
+		list_del_init(&res->mob_head);
+		ttm_bo_unreserve(bo);
+		vmw_dmabuf_unreference(&res->backup);
+	}
 
 	if (likely(res->hw_destroy != NULL))
 		res->hw_destroy(res);
 
+	id = res->id;
 	if (res->res_free != NULL)
 		res->res_free(res);
 	else
@@ -153,25 +164,25 @@
 /**
  * vmw_resource_alloc_id - release a resource id to the id manager.
  *
- * @dev_priv: Pointer to the device private structure.
  * @res: Pointer to the resource.
  *
  * Allocate the lowest free resource from the resource manager, and set
  * @res->id to that id. Returns 0 on success and -ENOMEM on failure.
  */
-static int vmw_resource_alloc_id(struct vmw_private *dev_priv,
-				 struct vmw_resource *res)
+int vmw_resource_alloc_id(struct vmw_resource *res)
 {
+	struct vmw_private *dev_priv = res->dev_priv;
 	int ret;
+	struct idr *idr = &dev_priv->res_idr[res->func->res_type];
 
 	BUG_ON(res->id != -1);
 
 	do {
-		if (unlikely(idr_pre_get(res->idr, GFP_KERNEL) == 0))
+		if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
 			return -ENOMEM;
 
 		write_lock(&dev_priv->resource_lock);
-		ret = idr_get_new_above(res->idr, res, 1, &res->id);
+		ret = idr_get_new_above(idr, res, 1, &res->id);
 		write_unlock(&dev_priv->resource_lock);
 
 	} while (ret == -EAGAIN);
@@ -179,31 +190,39 @@
 	return ret;
 }
 
-
-static int vmw_resource_init(struct vmw_private *dev_priv,
-			     struct vmw_resource *res,
-			     struct idr *idr,
-			     enum ttm_object_type obj_type,
-			     bool delay_id,
-			     void (*res_free) (struct vmw_resource *res),
-			     void (*remove_from_lists)
-			     (struct vmw_resource *res))
+/**
+ * vmw_resource_init - initialize a struct vmw_resource
+ *
+ * @dev_priv:       Pointer to a device private struct.
+ * @res:            The struct vmw_resource to initialize.
+ * @obj_type:       Resource object type.
+ * @delay_id:       Boolean whether to defer device id allocation until
+ *                  the first validation.
+ * @res_free:       Resource destructor.
+ * @func:           Resource function table.
+ */
+int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res,
+		      bool delay_id,
+		      void (*res_free) (struct vmw_resource *res),
+		      const struct vmw_res_func *func)
 {
 	kref_init(&res->kref);
 	res->hw_destroy = NULL;
 	res->res_free = res_free;
-	res->remove_from_lists = remove_from_lists;
-	res->res_type = obj_type;
-	res->idr = idr;
 	res->avail = false;
 	res->dev_priv = dev_priv;
-	INIT_LIST_HEAD(&res->query_head);
-	INIT_LIST_HEAD(&res->validate_head);
+	res->func = func;
+	INIT_LIST_HEAD(&res->lru_head);
+	INIT_LIST_HEAD(&res->mob_head);
 	res->id = -1;
+	res->backup = NULL;
+	res->backup_offset = 0;
+	res->backup_dirty = false;
+	res->res_dirty = false;
 	if (delay_id)
 		return 0;
 	else
-		return vmw_resource_alloc_id(dev_priv, res);
+		return vmw_resource_alloc_id(res);
 }
 
 /**
@@ -218,9 +237,8 @@
  * Activate basically means that the function vmw_resource_lookup will
  * find it.
  */
-
-static void vmw_resource_activate(struct vmw_resource *res,
-				  void (*hw_destroy) (struct vmw_resource *))
+void vmw_resource_activate(struct vmw_resource *res,
+			   void (*hw_destroy) (struct vmw_resource *))
 {
 	struct vmw_private *dev_priv = res->dev_priv;
 
@@ -250,946 +268,57 @@
 }
 
 /**
- * Context management:
+ * vmw_user_resource_lookup_handle - lookup a struct resource from a
+ * TTM user-space handle and perform basic type checks
+ *
+ * @dev_priv:     Pointer to a device private struct
+ * @tfile:        Pointer to a struct ttm_object_file identifying the caller
+ * @handle:       The TTM user-space handle
+ * @converter:    Pointer to an object describing the resource type
+ * @p_res:        On successful return the location pointed to will contain
+ *                a pointer to a refcounted struct vmw_resource.
+ *
+ * If the handle can't be found or is associated with an incorrect resource
+ * type, -EINVAL will be returned.
  */
-
-static void vmw_hw_context_destroy(struct vmw_resource *res)
+int vmw_user_resource_lookup_handle(struct vmw_private *dev_priv,
+				    struct ttm_object_file *tfile,
+				    uint32_t handle,
+				    const struct vmw_user_resource_conv
+				    *converter,
+				    struct vmw_resource **p_res)
 {
-
-	struct vmw_private *dev_priv = res->dev_priv;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDestroyContext body;
-	} *cmd;
-
-
-	vmw_execbuf_release_pinned_bo(dev_priv, true, res->id);
-
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving FIFO space for surface "
-			  "destruction.\n");
-		return;
-	}
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_CONTEXT_DESTROY);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
-	cmd->body.cid = cpu_to_le32(res->id);
-
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	vmw_3d_resource_dec(dev_priv, false);
-}
-
-static int vmw_context_init(struct vmw_private *dev_priv,
-			    struct vmw_resource *res,
-			    void (*res_free) (struct vmw_resource *res))
-{
-	int ret;
-
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDefineContext body;
-	} *cmd;
-
-	ret = vmw_resource_init(dev_priv, res, &dev_priv->context_idr,
-				VMW_RES_CONTEXT, false, res_free, NULL);
-
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Failed to allocate a resource id.\n");
-		goto out_early;
-	}
-
-	if (unlikely(res->id >= SVGA3D_MAX_CONTEXT_IDS)) {
-		DRM_ERROR("Out of hw context ids.\n");
-		vmw_resource_unreference(&res);
-		return -ENOMEM;
-	}
-
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed.\n");
-		vmw_resource_unreference(&res);
-		return -ENOMEM;
-	}
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_CONTEXT_DEFINE);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
-	cmd->body.cid = cpu_to_le32(res->id);
-
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	(void) vmw_3d_resource_inc(dev_priv, false);
-	vmw_resource_activate(res, vmw_hw_context_destroy);
-	return 0;
-
-out_early:
-	if (res_free == NULL)
-		kfree(res);
-	else
-		res_free(res);
-	return ret;
-}
-
-struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv)
-{
-	struct vmw_resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
-	int ret;
-
-	if (unlikely(res == NULL))
-		return NULL;
-
-	ret = vmw_context_init(dev_priv, res, NULL);
-	return (ret == 0) ? res : NULL;
-}
-
-/**
- * User-space context management:
- */
-
-static void vmw_user_context_free(struct vmw_resource *res)
-{
-	struct vmw_user_context *ctx =
-	    container_of(res, struct vmw_user_context, res);
-	struct vmw_private *dev_priv = res->dev_priv;
-
-	kfree(ctx);
-	ttm_mem_global_free(vmw_mem_glob(dev_priv),
-			    vmw_user_context_size);
-}
-
-/**
- * This function is called when user space has no more references on the
- * base object. It releases the base-object's reference on the resource object.
- */
-
-static void vmw_user_context_base_release(struct ttm_base_object **p_base)
-{
-	struct ttm_base_object *base = *p_base;
-	struct vmw_user_context *ctx =
-	    container_of(base, struct vmw_user_context, base);
-	struct vmw_resource *res = &ctx->res;
-
-	*p_base = NULL;
-	vmw_resource_unreference(&res);
-}
-
-int vmw_context_destroy_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv)
-{
-	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct ttm_base_object *base;
 	struct vmw_resource *res;
-	struct vmw_user_context *ctx;
-	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
-	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	int ret = 0;
+	int ret = -EINVAL;
 
-	res = vmw_resource_lookup(dev_priv, &dev_priv->context_idr, arg->cid);
-	if (unlikely(res == NULL))
+	base = ttm_base_object_lookup(tfile, handle);
+	if (unlikely(base == NULL))
 		return -EINVAL;
 
-	if (res->res_free != &vmw_user_context_free) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (unlikely(base->object_type != converter->object_type))
+		goto out_bad_resource;
 
-	ctx = container_of(res, struct vmw_user_context, res);
-	if (ctx->base.tfile != tfile && !ctx->base.shareable) {
-		ret = -EPERM;
-		goto out;
-	}
-
-	ttm_ref_object_base_unref(tfile, ctx->base.hash.key, TTM_REF_USAGE);
-out:
-	vmw_resource_unreference(&res);
-	return ret;
-}
-
-int vmw_context_define_ioctl(struct drm_device *dev, void *data,
-			     struct drm_file *file_priv)
-{
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_context *ctx;
-	struct vmw_resource *res;
-	struct vmw_resource *tmp;
-	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
-	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	int ret;
-
-
-	/*
-	 * Approximate idr memory usage with 128 bytes. It will be limited
-	 * by maximum number_of contexts anyway.
-	 */
-
-	if (unlikely(vmw_user_context_size == 0))
-		vmw_user_context_size = ttm_round_pot(sizeof(*ctx)) + 128;
-
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
-				   vmw_user_context_size,
-				   false, true);
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("Out of graphics memory for context"
-				  " creation.\n");
-		goto out_unlock;
-	}
-
-	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
-	if (unlikely(ctx == NULL)) {
-		ttm_mem_global_free(vmw_mem_glob(dev_priv),
-				    vmw_user_context_size);
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
-
-	res = &ctx->res;
-	ctx->base.shareable = false;
-	ctx->base.tfile = NULL;
-
-	/*
-	 * From here on, the destructor takes over resource freeing.
-	 */
-
-	ret = vmw_context_init(dev_priv, res, vmw_user_context_free);
-	if (unlikely(ret != 0))
-		goto out_unlock;
-
-	tmp = vmw_resource_reference(&ctx->res);
-	ret = ttm_base_object_init(tfile, &ctx->base, false, VMW_RES_CONTEXT,
-				   &vmw_user_context_base_release, NULL);
-
-	if (unlikely(ret != 0)) {
-		vmw_resource_unreference(&tmp);
-		goto out_err;
-	}
-
-	arg->cid = res->id;
-out_err:
-	vmw_resource_unreference(&res);
-out_unlock:
-	ttm_read_unlock(&vmaster->lock);
-	return ret;
-
-}
-
-int vmw_context_check(struct vmw_private *dev_priv,
-		      struct ttm_object_file *tfile,
-		      int id,
-		      struct vmw_resource **p_res)
-{
-	struct vmw_resource *res;
-	int ret = 0;
+	res = converter->base_obj_to_res(base);
 
 	read_lock(&dev_priv->resource_lock);
-	res = idr_find(&dev_priv->context_idr, id);
-	if (res && res->avail) {
-		struct vmw_user_context *ctx =
-			container_of(res, struct vmw_user_context, res);
-		if (ctx->base.tfile != tfile && !ctx->base.shareable)
-			ret = -EPERM;
-		if (p_res)
-			*p_res = vmw_resource_reference(res);
-	} else
-		ret = -EINVAL;
+	if (!res->avail || res->res_free != converter->res_free) {
+		read_unlock(&dev_priv->resource_lock);
+		goto out_bad_resource;
+	}
+
+	kref_get(&res->kref);
 	read_unlock(&dev_priv->resource_lock);
 
-	return ret;
-}
+	*p_res = res;
+	ret = 0;
 
-struct vmw_bpp {
-	uint8_t bpp;
-	uint8_t s_bpp;
-};
-
-/*
- * Size table for the supported SVGA3D surface formats. It consists of
- * two values. The bpp value and the s_bpp value which is short for
- * "stride bits per pixel" The values are given in such a way that the
- * minimum stride for the image is calculated using
- *
- * min_stride = w*s_bpp
- *
- * and the total memory requirement for the image is
- *
- * h*min_stride*bpp/s_bpp
- *
- */
-static const struct vmw_bpp vmw_sf_bpp[] = {
-	[SVGA3D_FORMAT_INVALID] = {0, 0},
-	[SVGA3D_X8R8G8B8] = {32, 32},
-	[SVGA3D_A8R8G8B8] = {32, 32},
-	[SVGA3D_R5G6B5] = {16, 16},
-	[SVGA3D_X1R5G5B5] = {16, 16},
-	[SVGA3D_A1R5G5B5] = {16, 16},
-	[SVGA3D_A4R4G4B4] = {16, 16},
-	[SVGA3D_Z_D32] = {32, 32},
-	[SVGA3D_Z_D16] = {16, 16},
-	[SVGA3D_Z_D24S8] = {32, 32},
-	[SVGA3D_Z_D15S1] = {16, 16},
-	[SVGA3D_LUMINANCE8] = {8, 8},
-	[SVGA3D_LUMINANCE4_ALPHA4] = {8, 8},
-	[SVGA3D_LUMINANCE16] = {16, 16},
-	[SVGA3D_LUMINANCE8_ALPHA8] = {16, 16},
-	[SVGA3D_DXT1] = {4, 16},
-	[SVGA3D_DXT2] = {8, 32},
-	[SVGA3D_DXT3] = {8, 32},
-	[SVGA3D_DXT4] = {8, 32},
-	[SVGA3D_DXT5] = {8, 32},
-	[SVGA3D_BUMPU8V8] = {16, 16},
-	[SVGA3D_BUMPL6V5U5] = {16, 16},
-	[SVGA3D_BUMPX8L8V8U8] = {32, 32},
-	[SVGA3D_ARGB_S10E5] = {16, 16},
-	[SVGA3D_ARGB_S23E8] = {32, 32},
-	[SVGA3D_A2R10G10B10] = {32, 32},
-	[SVGA3D_V8U8] = {16, 16},
-	[SVGA3D_Q8W8V8U8] = {32, 32},
-	[SVGA3D_CxV8U8] = {16, 16},
-	[SVGA3D_X8L8V8U8] = {32, 32},
-	[SVGA3D_A2W10V10U10] = {32, 32},
-	[SVGA3D_ALPHA8] = {8, 8},
-	[SVGA3D_R_S10E5] = {16, 16},
-	[SVGA3D_R_S23E8] = {32, 32},
-	[SVGA3D_RG_S10E5] = {16, 16},
-	[SVGA3D_RG_S23E8] = {32, 32},
-	[SVGA3D_BUFFER] = {8, 8},
-	[SVGA3D_Z_D24X8] = {32, 32},
-	[SVGA3D_V16U16] = {32, 32},
-	[SVGA3D_G16R16] = {32, 32},
-	[SVGA3D_A16B16G16R16] = {64,  64},
-	[SVGA3D_UYVY] = {12, 12},
-	[SVGA3D_YUY2] = {12, 12},
-	[SVGA3D_NV12] = {12, 8},
-	[SVGA3D_AYUV] = {32, 32},
-	[SVGA3D_BC4_UNORM] = {4,  16},
-	[SVGA3D_BC5_UNORM] = {8,  32},
-	[SVGA3D_Z_DF16] = {16,  16},
-	[SVGA3D_Z_DF24] = {24,  24},
-	[SVGA3D_Z_D24S8_INT] = {32,  32}
-};
-
-
-/**
- * Surface management.
- */
-
-struct vmw_surface_dma {
-	SVGA3dCmdHeader header;
-	SVGA3dCmdSurfaceDMA body;
-	SVGA3dCopyBox cb;
-	SVGA3dCmdSurfaceDMASuffix suffix;
-};
-
-struct vmw_surface_define {
-	SVGA3dCmdHeader header;
-	SVGA3dCmdDefineSurface body;
-};
-
-struct vmw_surface_destroy {
-	SVGA3dCmdHeader header;
-	SVGA3dCmdDestroySurface body;
-};
-
-
-/**
- * vmw_surface_dma_size - Compute fifo size for a dma command.
- *
- * @srf: Pointer to a struct vmw_surface
- *
- * Computes the required size for a surface dma command for backup or
- * restoration of the surface represented by @srf.
- */
-static inline uint32_t vmw_surface_dma_size(const struct vmw_surface *srf)
-{
-	return srf->num_sizes * sizeof(struct vmw_surface_dma);
-}
-
-
-/**
- * vmw_surface_define_size - Compute fifo size for a surface define command.
- *
- * @srf: Pointer to a struct vmw_surface
- *
- * Computes the required size for a surface define command for the definition
- * of the surface represented by @srf.
- */
-static inline uint32_t vmw_surface_define_size(const struct vmw_surface *srf)
-{
-	return sizeof(struct vmw_surface_define) + srf->num_sizes *
-		sizeof(SVGA3dSize);
-}
-
-
-/**
- * vmw_surface_destroy_size - Compute fifo size for a surface destroy command.
- *
- * Computes the required size for a surface destroy command for the destruction
- * of a hw surface.
- */
-static inline uint32_t vmw_surface_destroy_size(void)
-{
-	return sizeof(struct vmw_surface_destroy);
-}
-
-/**
- * vmw_surface_destroy_encode - Encode a surface_destroy command.
- *
- * @id: The surface id
- * @cmd_space: Pointer to memory area in which the commands should be encoded.
- */
-static void vmw_surface_destroy_encode(uint32_t id,
-				       void *cmd_space)
-{
-	struct vmw_surface_destroy *cmd = (struct vmw_surface_destroy *)
-		cmd_space;
-
-	cmd->header.id = SVGA_3D_CMD_SURFACE_DESTROY;
-	cmd->header.size = sizeof(cmd->body);
-	cmd->body.sid = id;
-}
-
-/**
- * vmw_surface_define_encode - Encode a surface_define command.
- *
- * @srf: Pointer to a struct vmw_surface object.
- * @cmd_space: Pointer to memory area in which the commands should be encoded.
- */
-static void vmw_surface_define_encode(const struct vmw_surface *srf,
-				      void *cmd_space)
-{
-	struct vmw_surface_define *cmd = (struct vmw_surface_define *)
-		cmd_space;
-	struct drm_vmw_size *src_size;
-	SVGA3dSize *cmd_size;
-	uint32_t cmd_len;
-	int i;
-
-	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
-
-	cmd->header.id = SVGA_3D_CMD_SURFACE_DEFINE;
-	cmd->header.size = cmd_len;
-	cmd->body.sid = srf->res.id;
-	cmd->body.surfaceFlags = srf->flags;
-	cmd->body.format = cpu_to_le32(srf->format);
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
-		cmd->body.face[i].numMipLevels = srf->mip_levels[i];
-
-	cmd += 1;
-	cmd_size = (SVGA3dSize *) cmd;
-	src_size = srf->sizes;
-
-	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
-		cmd_size->width = src_size->width;
-		cmd_size->height = src_size->height;
-		cmd_size->depth = src_size->depth;
-	}
-}
-
-
-/**
- * vmw_surface_dma_encode - Encode a surface_dma command.
- *
- * @srf: Pointer to a struct vmw_surface object.
- * @cmd_space: Pointer to memory area in which the commands should be encoded.
- * @ptr: Pointer to an SVGAGuestPtr indicating where the surface contents
- * should be placed or read from.
- * @to_surface: Boolean whether to DMA to the surface or from the surface.
- */
-static void vmw_surface_dma_encode(struct vmw_surface *srf,
-				   void *cmd_space,
-				   const SVGAGuestPtr *ptr,
-				   bool to_surface)
-{
-	uint32_t i;
-	uint32_t bpp = vmw_sf_bpp[srf->format].bpp;
-	uint32_t stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
-	struct vmw_surface_dma *cmd = (struct vmw_surface_dma *)cmd_space;
-
-	for (i = 0; i < srf->num_sizes; ++i) {
-		SVGA3dCmdHeader *header = &cmd->header;
-		SVGA3dCmdSurfaceDMA *body = &cmd->body;
-		SVGA3dCopyBox *cb = &cmd->cb;
-		SVGA3dCmdSurfaceDMASuffix *suffix = &cmd->suffix;
-		const struct vmw_surface_offset *cur_offset = &srf->offsets[i];
-		const struct drm_vmw_size *cur_size = &srf->sizes[i];
-
-		header->id = SVGA_3D_CMD_SURFACE_DMA;
-		header->size = sizeof(*body) + sizeof(*cb) + sizeof(*suffix);
-
-		body->guest.ptr = *ptr;
-		body->guest.ptr.offset += cur_offset->bo_offset;
-		body->guest.pitch = (cur_size->width * stride_bpp + 7) >> 3;
-		body->host.sid = srf->res.id;
-		body->host.face = cur_offset->face;
-		body->host.mipmap = cur_offset->mip;
-		body->transfer = ((to_surface) ?  SVGA3D_WRITE_HOST_VRAM :
-				  SVGA3D_READ_HOST_VRAM);
-		cb->x = 0;
-		cb->y = 0;
-		cb->z = 0;
-		cb->srcx = 0;
-		cb->srcy = 0;
-		cb->srcz = 0;
-		cb->w = cur_size->width;
-		cb->h = cur_size->height;
-		cb->d = cur_size->depth;
-
-		suffix->suffixSize = sizeof(*suffix);
-		suffix->maximumOffset = body->guest.pitch*cur_size->height*
-			cur_size->depth*bpp / stride_bpp;
-		suffix->flags.discard = 0;
-		suffix->flags.unsynchronized = 0;
-		suffix->flags.reserved = 0;
-		++cmd;
-	}
-};
-
-
-static void vmw_hw_surface_destroy(struct vmw_resource *res)
-{
-
-	struct vmw_private *dev_priv = res->dev_priv;
-	struct vmw_surface *srf;
-	void *cmd;
-
-	if (res->id != -1) {
-
-		cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size());
-		if (unlikely(cmd == NULL)) {
-			DRM_ERROR("Failed reserving FIFO space for surface "
-				  "destruction.\n");
-			return;
-		}
-
-		vmw_surface_destroy_encode(res->id, cmd);
-		vmw_fifo_commit(dev_priv, vmw_surface_destroy_size());
-
-		/*
-		 * used_memory_size_atomic, or separate lock
-		 * to avoid taking dev_priv::cmdbuf_mutex in
-		 * the destroy path.
-		 */
-
-		mutex_lock(&dev_priv->cmdbuf_mutex);
-		srf = container_of(res, struct vmw_surface, res);
-		dev_priv->used_memory_size -= srf->backup_size;
-		mutex_unlock(&dev_priv->cmdbuf_mutex);
-
-	}
-	vmw_3d_resource_dec(dev_priv, false);
-}
-
-void vmw_surface_res_free(struct vmw_resource *res)
-{
-	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
-
-	if (srf->backup)
-		ttm_bo_unref(&srf->backup);
-	kfree(srf->offsets);
-	kfree(srf->sizes);
-	kfree(srf->snooper.image);
-	kfree(srf);
-}
-
-
-/**
- * vmw_surface_do_validate - make a surface available to the device.
- *
- * @dev_priv: Pointer to a device private struct.
- * @srf: Pointer to a struct vmw_surface.
- *
- * If the surface doesn't have a hw id, allocate one, and optionally
- * DMA the backed up surface contents to the device.
- *
- * Returns -EBUSY if there wasn't sufficient device resources to
- * complete the validation. Retry after freeing up resources.
- *
- * May return other errors if the kernel is out of guest resources.
- */
-int vmw_surface_do_validate(struct vmw_private *dev_priv,
-			    struct vmw_surface *srf)
-{
-	struct vmw_resource *res = &srf->res;
-	struct list_head val_list;
-	struct ttm_validate_buffer val_buf;
-	uint32_t submit_size;
-	uint8_t *cmd;
-	int ret;
-
-	if (likely(res->id != -1))
-		return 0;
-
-	if (unlikely(dev_priv->used_memory_size + srf->backup_size >=
-		     dev_priv->memory_size))
-		return -EBUSY;
-
-	/*
-	 * Reserve- and validate the backup DMA bo.
-	 */
-
-	if (srf->backup) {
-		INIT_LIST_HEAD(&val_list);
-		val_buf.bo = ttm_bo_reference(srf->backup);
-		val_buf.new_sync_obj_arg = (void *)((unsigned long)
-						    DRM_VMW_FENCE_FLAG_EXEC);
-		list_add_tail(&val_buf.head, &val_list);
-		ret = ttm_eu_reserve_buffers(&val_list);
-		if (unlikely(ret != 0))
-			goto out_no_reserve;
-
-		ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
-				      true, false, false);
-		if (unlikely(ret != 0))
-			goto out_no_validate;
-	}
-
-	/*
-	 * Alloc id for the resource.
-	 */
-
-	ret = vmw_resource_alloc_id(dev_priv, res);
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Failed to allocate a surface id.\n");
-		goto out_no_id;
-	}
-	if (unlikely(res->id >= SVGA3D_MAX_SURFACE_IDS)) {
-		ret = -EBUSY;
-		goto out_no_fifo;
-	}
-
-
-	/*
-	 * Encode surface define- and dma commands.
-	 */
-
-	submit_size = vmw_surface_define_size(srf);
-	if (srf->backup)
-		submit_size += vmw_surface_dma_size(srf);
-
-	cmd = vmw_fifo_reserve(dev_priv, submit_size);
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving FIFO space for surface "
-			  "validation.\n");
-		ret = -ENOMEM;
-		goto out_no_fifo;
-	}
-
-	vmw_surface_define_encode(srf, cmd);
-	if (srf->backup) {
-		SVGAGuestPtr ptr;
-
-		cmd += vmw_surface_define_size(srf);
-		vmw_bo_get_guest_ptr(srf->backup, &ptr);
-		vmw_surface_dma_encode(srf, cmd, &ptr, true);
-	}
-
-	vmw_fifo_commit(dev_priv, submit_size);
-
-	/*
-	 * Create a fence object and fence the backup buffer.
-	 */
-
-	if (srf->backup) {
-		struct vmw_fence_obj *fence;
-
-		(void) vmw_execbuf_fence_commands(NULL, dev_priv,
-						  &fence, NULL);
-		ttm_eu_fence_buffer_objects(&val_list, fence);
-		if (likely(fence != NULL))
-			vmw_fence_obj_unreference(&fence);
-		ttm_bo_unref(&val_buf.bo);
-		ttm_bo_unref(&srf->backup);
-	}
-
-	/*
-	 * Surface memory usage accounting.
-	 */
-
-	dev_priv->used_memory_size += srf->backup_size;
-
-	return 0;
-
-out_no_fifo:
-	vmw_resource_release_id(res);
-out_no_id:
-out_no_validate:
-	if (srf->backup)
-		ttm_eu_backoff_reservation(&val_list);
-out_no_reserve:
-	if (srf->backup)
-		ttm_bo_unref(&val_buf.bo);
-	return ret;
-}
-
-/**
- * vmw_surface_evict - Evict a hw surface.
- *
- * @dev_priv: Pointer to a device private struct.
- * @srf: Pointer to a struct vmw_surface
- *
- * DMA the contents of a hw surface to a backup guest buffer object,
- * and destroy the hw surface, releasing its id.
- */
-int vmw_surface_evict(struct vmw_private *dev_priv,
-		      struct vmw_surface *srf)
-{
-	struct vmw_resource *res = &srf->res;
-	struct list_head val_list;
-	struct ttm_validate_buffer val_buf;
-	uint32_t submit_size;
-	uint8_t *cmd;
-	int ret;
-	struct vmw_fence_obj *fence;
-	SVGAGuestPtr ptr;
-
-	BUG_ON(res->id == -1);
-
-	/*
-	 * Create a surface backup buffer object.
-	 */
-
-	if (!srf->backup) {
-		ret = ttm_bo_create(&dev_priv->bdev, srf->backup_size,
-				    ttm_bo_type_device,
-				    &vmw_srf_placement, 0, 0, true,
-				    NULL, &srf->backup);
-		if (unlikely(ret != 0))
-			return ret;
-	}
-
-	/*
-	 * Reserve- and validate the backup DMA bo.
-	 */
-
-	INIT_LIST_HEAD(&val_list);
-	val_buf.bo = ttm_bo_reference(srf->backup);
-	val_buf.new_sync_obj_arg = (void *)(unsigned long)
-		DRM_VMW_FENCE_FLAG_EXEC;
-	list_add_tail(&val_buf.head, &val_list);
-	ret = ttm_eu_reserve_buffers(&val_list);
-	if (unlikely(ret != 0))
-		goto out_no_reserve;
-
-	ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
-			      true, false, false);
-	if (unlikely(ret != 0))
-		goto out_no_validate;
-
-
-	/*
-	 * Encode the dma- and surface destroy commands.
-	 */
-
-	submit_size = vmw_surface_dma_size(srf) + vmw_surface_destroy_size();
-	cmd = vmw_fifo_reserve(dev_priv, submit_size);
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving FIFO space for surface "
-			  "eviction.\n");
-		ret = -ENOMEM;
-		goto out_no_fifo;
-	}
-
-	vmw_bo_get_guest_ptr(srf->backup, &ptr);
-	vmw_surface_dma_encode(srf, cmd, &ptr, false);
-	cmd += vmw_surface_dma_size(srf);
-	vmw_surface_destroy_encode(res->id, cmd);
-	vmw_fifo_commit(dev_priv, submit_size);
-
-	/*
-	 * Surface memory usage accounting.
-	 */
-
-	dev_priv->used_memory_size -= srf->backup_size;
-
-	/*
-	 * Create a fence object and fence the DMA buffer.
-	 */
-
-	(void) vmw_execbuf_fence_commands(NULL, dev_priv,
-					  &fence, NULL);
-	ttm_eu_fence_buffer_objects(&val_list, fence);
-	if (likely(fence != NULL))
-		vmw_fence_obj_unreference(&fence);
-	ttm_bo_unref(&val_buf.bo);
-
-	/*
-	 * Release the surface ID.
-	 */
-
-	vmw_resource_release_id(res);
-
-	return 0;
-
-out_no_fifo:
-out_no_validate:
-	if (srf->backup)
-		ttm_eu_backoff_reservation(&val_list);
-out_no_reserve:
-	ttm_bo_unref(&val_buf.bo);
-	ttm_bo_unref(&srf->backup);
-	return ret;
-}
-
-
-/**
- * vmw_surface_validate - make a surface available to the device, evicting
- * other surfaces if needed.
- *
- * @dev_priv: Pointer to a device private struct.
- * @srf: Pointer to a struct vmw_surface.
- *
- * Try to validate a surface and if it fails due to limited device resources,
- * repeatedly try to evict other surfaces until the request can be
- * acommodated.
- *
- * May return errors if out of resources.
- */
-int vmw_surface_validate(struct vmw_private *dev_priv,
-			 struct vmw_surface *srf)
-{
-	int ret;
-	struct vmw_surface *evict_srf;
-
-	do {
-		write_lock(&dev_priv->resource_lock);
-		list_del_init(&srf->lru_head);
-		write_unlock(&dev_priv->resource_lock);
-
-		ret = vmw_surface_do_validate(dev_priv, srf);
-		if (likely(ret != -EBUSY))
-			break;
-
-		write_lock(&dev_priv->resource_lock);
-		if (list_empty(&dev_priv->surface_lru)) {
-			DRM_ERROR("Out of device memory for surfaces.\n");
-			ret = -EBUSY;
-			write_unlock(&dev_priv->resource_lock);
-			break;
-		}
-
-		evict_srf = vmw_surface_reference
-			(list_first_entry(&dev_priv->surface_lru,
-					  struct vmw_surface,
-					  lru_head));
-		list_del_init(&evict_srf->lru_head);
-
-		write_unlock(&dev_priv->resource_lock);
-		(void) vmw_surface_evict(dev_priv, evict_srf);
-
-		vmw_surface_unreference(&evict_srf);
-
-	} while (1);
-
-	if (unlikely(ret != 0 && srf->res.id != -1)) {
-		write_lock(&dev_priv->resource_lock);
-		list_add_tail(&srf->lru_head, &dev_priv->surface_lru);
-		write_unlock(&dev_priv->resource_lock);
-	}
+out_bad_resource:
+	ttm_base_object_unref(&base);
 
 	return ret;
 }
 
-
-/**
- * vmw_surface_remove_from_lists - Remove surface resources from lookup lists
- *
- * @res: Pointer to a struct vmw_resource embedded in a struct vmw_surface
- *
- * As part of the resource destruction, remove the surface from any
- * lookup lists.
- */
-static void vmw_surface_remove_from_lists(struct vmw_resource *res)
-{
-	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
-
-	list_del_init(&srf->lru_head);
-}
-
-int vmw_surface_init(struct vmw_private *dev_priv,
-		     struct vmw_surface *srf,
-		     void (*res_free) (struct vmw_resource *res))
-{
-	int ret;
-	struct vmw_resource *res = &srf->res;
-
-	BUG_ON(res_free == NULL);
-	INIT_LIST_HEAD(&srf->lru_head);
-	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
-				VMW_RES_SURFACE, true, res_free,
-				vmw_surface_remove_from_lists);
-
-	if (unlikely(ret != 0))
-		res_free(res);
-
-	/*
-	 * The surface won't be visible to hardware until a
-	 * surface validate.
-	 */
-
-	(void) vmw_3d_resource_inc(dev_priv, false);
-	vmw_resource_activate(res, vmw_hw_surface_destroy);
-	return ret;
-}
-
-static void vmw_user_surface_free(struct vmw_resource *res)
-{
-	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
-	struct vmw_user_surface *user_srf =
-	    container_of(srf, struct vmw_user_surface, srf);
-	struct vmw_private *dev_priv = srf->res.dev_priv;
-	uint32_t size = user_srf->size;
-
-	if (srf->backup)
-		ttm_bo_unref(&srf->backup);
-	kfree(srf->offsets);
-	kfree(srf->sizes);
-	kfree(srf->snooper.image);
-	kfree(user_srf);
-	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
-}
-
-/**
- * vmw_resource_unreserve - unreserve resources previously reserved for
- * command submission.
- *
- * @list_head: list of resources to unreserve.
- *
- * Currently only surfaces are considered, and unreserving a surface
- * means putting it back on the device's surface lru list,
- * so that it can be evicted if necessary.
- * This function traverses the resource list and
- * checks whether resources are surfaces, and in that case puts them back
- * on the device's surface LRU list.
- */
-void vmw_resource_unreserve(struct list_head *list)
-{
-	struct vmw_resource *res;
-	struct vmw_surface *srf;
-	rwlock_t *lock = NULL;
-
-	list_for_each_entry(res, list, validate_head) {
-
-		if (res->res_free != &vmw_surface_res_free &&
-		    res->res_free != &vmw_user_surface_free)
-			continue;
-
-		if (unlikely(lock == NULL)) {
-			lock = &res->dev_priv->resource_lock;
-			write_lock(lock);
-		}
-
-		srf = container_of(res, struct vmw_surface, res);
-		list_del_init(&srf->lru_head);
-		list_add_tail(&srf->lru_head, &res->dev_priv->surface_lru);
-	}
-
-	if (lock != NULL)
-		write_unlock(lock);
-}
-
 /**
  * Helper function that looks either a surface or dmabuf.
  *
@@ -1201,342 +330,24 @@
 			   struct vmw_surface **out_surf,
 			   struct vmw_dma_buffer **out_buf)
 {
+	struct vmw_resource *res;
 	int ret;
 
 	BUG_ON(*out_surf || *out_buf);
 
-	ret = vmw_user_surface_lookup_handle(dev_priv, tfile, handle, out_surf);
-	if (!ret)
+	ret = vmw_user_resource_lookup_handle(dev_priv, tfile, handle,
+					      user_surface_converter,
+					      &res);
+	if (!ret) {
+		*out_surf = vmw_res_to_srf(res);
 		return 0;
+	}
 
+	*out_surf = NULL;
 	ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf);
 	return ret;
 }
 
-
-int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
-				   struct ttm_object_file *tfile,
-				   uint32_t handle, struct vmw_surface **out)
-{
-	struct vmw_resource *res;
-	struct vmw_surface *srf;
-	struct vmw_user_surface *user_srf;
-	struct ttm_base_object *base;
-	int ret = -EINVAL;
-
-	base = ttm_base_object_lookup(tfile, handle);
-	if (unlikely(base == NULL))
-		return -EINVAL;
-
-	if (unlikely(base->object_type != VMW_RES_SURFACE))
-		goto out_bad_resource;
-
-	user_srf = container_of(base, struct vmw_user_surface, base);
-	srf = &user_srf->srf;
-	res = &srf->res;
-
-	read_lock(&dev_priv->resource_lock);
-
-	if (!res->avail || res->res_free != &vmw_user_surface_free) {
-		read_unlock(&dev_priv->resource_lock);
-		goto out_bad_resource;
-	}
-
-	kref_get(&res->kref);
-	read_unlock(&dev_priv->resource_lock);
-
-	*out = srf;
-	ret = 0;
-
-out_bad_resource:
-	ttm_base_object_unref(&base);
-
-	return ret;
-}
-
-static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
-{
-	struct ttm_base_object *base = *p_base;
-	struct vmw_user_surface *user_srf =
-	    container_of(base, struct vmw_user_surface, base);
-	struct vmw_resource *res = &user_srf->srf.res;
-
-	*p_base = NULL;
-	vmw_resource_unreference(&res);
-}
-
-int vmw_surface_destroy_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv)
-{
-	struct drm_vmw_surface_arg *arg = (struct drm_vmw_surface_arg *)data;
-	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-
-	return ttm_ref_object_base_unref(tfile, arg->sid, TTM_REF_USAGE);
-}
-
-int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
-			     struct drm_file *file_priv)
-{
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_surface *user_srf;
-	struct vmw_surface *srf;
-	struct vmw_resource *res;
-	struct vmw_resource *tmp;
-	union drm_vmw_surface_create_arg *arg =
-	    (union drm_vmw_surface_create_arg *)data;
-	struct drm_vmw_surface_create_req *req = &arg->req;
-	struct drm_vmw_surface_arg *rep = &arg->rep;
-	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	struct drm_vmw_size __user *user_sizes;
-	int ret;
-	int i, j;
-	uint32_t cur_bo_offset;
-	struct drm_vmw_size *cur_size;
-	struct vmw_surface_offset *cur_offset;
-	uint32_t stride_bpp;
-	uint32_t bpp;
-	uint32_t num_sizes;
-	uint32_t size;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-
-	if (unlikely(vmw_user_surface_size == 0))
-		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
-			128;
-
-	num_sizes = 0;
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
-		num_sizes += req->mip_levels[i];
-
-	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-	    DRM_VMW_MAX_MIP_LEVELS)
-		return -EINVAL;
-
-	size = vmw_user_surface_size + 128 +
-		ttm_round_pot(num_sizes * sizeof(struct drm_vmw_size)) +
-		ttm_round_pot(num_sizes * sizeof(struct vmw_surface_offset));
-
-
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
-				   size, false, true);
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("Out of graphics memory for surface"
-				  " creation.\n");
-		goto out_unlock;
-	}
-
-	user_srf = kmalloc(sizeof(*user_srf), GFP_KERNEL);
-	if (unlikely(user_srf == NULL)) {
-		ret = -ENOMEM;
-		goto out_no_user_srf;
-	}
-
-	srf = &user_srf->srf;
-	res = &srf->res;
-
-	srf->flags = req->flags;
-	srf->format = req->format;
-	srf->scanout = req->scanout;
-	srf->backup = NULL;
-
-	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
-	srf->num_sizes = num_sizes;
-	user_srf->size = size;
-
-	srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL);
-	if (unlikely(srf->sizes == NULL)) {
-		ret = -ENOMEM;
-		goto out_no_sizes;
-	}
-	srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
-			       GFP_KERNEL);
-	if (unlikely(srf->sizes == NULL)) {
-		ret = -ENOMEM;
-		goto out_no_offsets;
-	}
-
-	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
-	    req->size_addr;
-
-	ret = copy_from_user(srf->sizes, user_sizes,
-			     srf->num_sizes * sizeof(*srf->sizes));
-	if (unlikely(ret != 0)) {
-		ret = -EFAULT;
-		goto out_no_copy;
-	}
-
-	cur_bo_offset = 0;
-	cur_offset = srf->offsets;
-	cur_size = srf->sizes;
-
-	bpp = vmw_sf_bpp[srf->format].bpp;
-	stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
-
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
-		for (j = 0; j < srf->mip_levels[i]; ++j) {
-			uint32_t stride =
-				(cur_size->width * stride_bpp + 7) >> 3;
-
-			cur_offset->face = i;
-			cur_offset->mip = j;
-			cur_offset->bo_offset = cur_bo_offset;
-			cur_bo_offset += stride * cur_size->height *
-				cur_size->depth * bpp / stride_bpp;
-			++cur_offset;
-			++cur_size;
-		}
-	}
-	srf->backup_size = cur_bo_offset;
-
-	if (srf->scanout &&
-	    srf->num_sizes == 1 &&
-	    srf->sizes[0].width == 64 &&
-	    srf->sizes[0].height == 64 &&
-	    srf->format == SVGA3D_A8R8G8B8) {
-
-		/* allocate image area and clear it */
-		srf->snooper.image = kzalloc(64 * 64 * 4, GFP_KERNEL);
-		if (!srf->snooper.image) {
-			DRM_ERROR("Failed to allocate cursor_image\n");
-			ret = -ENOMEM;
-			goto out_no_copy;
-		}
-	} else {
-		srf->snooper.image = NULL;
-	}
-	srf->snooper.crtc = NULL;
-
-	user_srf->base.shareable = false;
-	user_srf->base.tfile = NULL;
-
-	/**
-	 * From this point, the generic resource management functions
-	 * destroy the object on failure.
-	 */
-
-	ret = vmw_surface_init(dev_priv, srf, vmw_user_surface_free);
-	if (unlikely(ret != 0))
-		goto out_unlock;
-
-	tmp = vmw_resource_reference(&srf->res);
-	ret = ttm_base_object_init(tfile, &user_srf->base,
-				   req->shareable, VMW_RES_SURFACE,
-				   &vmw_user_surface_base_release, NULL);
-
-	if (unlikely(ret != 0)) {
-		vmw_resource_unreference(&tmp);
-		vmw_resource_unreference(&res);
-		goto out_unlock;
-	}
-
-	rep->sid = user_srf->base.hash.key;
-	if (rep->sid == SVGA3D_INVALID_ID)
-		DRM_ERROR("Created bad Surface ID.\n");
-
-	vmw_resource_unreference(&res);
-
-	ttm_read_unlock(&vmaster->lock);
-	return 0;
-out_no_copy:
-	kfree(srf->offsets);
-out_no_offsets:
-	kfree(srf->sizes);
-out_no_sizes:
-	kfree(user_srf);
-out_no_user_srf:
-	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
-out_unlock:
-	ttm_read_unlock(&vmaster->lock);
-	return ret;
-}
-
-int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
-{
-	union drm_vmw_surface_reference_arg *arg =
-	    (union drm_vmw_surface_reference_arg *)data;
-	struct drm_vmw_surface_arg *req = &arg->req;
-	struct drm_vmw_surface_create_req *rep = &arg->rep;
-	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	struct vmw_surface *srf;
-	struct vmw_user_surface *user_srf;
-	struct drm_vmw_size __user *user_sizes;
-	struct ttm_base_object *base;
-	int ret = -EINVAL;
-
-	base = ttm_base_object_lookup(tfile, req->sid);
-	if (unlikely(base == NULL)) {
-		DRM_ERROR("Could not find surface to reference.\n");
-		return -EINVAL;
-	}
-
-	if (unlikely(base->object_type != VMW_RES_SURFACE))
-		goto out_bad_resource;
-
-	user_srf = container_of(base, struct vmw_user_surface, base);
-	srf = &user_srf->srf;
-
-	ret = ttm_ref_object_add(tfile, &user_srf->base, TTM_REF_USAGE, NULL);
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Could not add a reference to a surface.\n");
-		goto out_no_reference;
-	}
-
-	rep->flags = srf->flags;
-	rep->format = srf->format;
-	memcpy(rep->mip_levels, srf->mip_levels, sizeof(srf->mip_levels));
-	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
-	    rep->size_addr;
-
-	if (user_sizes)
-		ret = copy_to_user(user_sizes, srf->sizes,
-				   srf->num_sizes * sizeof(*srf->sizes));
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("copy_to_user failed %p %u\n",
-			  user_sizes, srf->num_sizes);
-		ret = -EFAULT;
-	}
-out_bad_resource:
-out_no_reference:
-	ttm_base_object_unref(&base);
-
-	return ret;
-}
-
-int vmw_surface_check(struct vmw_private *dev_priv,
-		      struct ttm_object_file *tfile,
-		      uint32_t handle, int *id)
-{
-	struct ttm_base_object *base;
-	struct vmw_user_surface *user_srf;
-
-	int ret = -EPERM;
-
-	base = ttm_base_object_lookup(tfile, handle);
-	if (unlikely(base == NULL))
-		return -EINVAL;
-
-	if (unlikely(base->object_type != VMW_RES_SURFACE))
-		goto out_bad_surface;
-
-	user_srf = container_of(base, struct vmw_user_surface, base);
-	*id = user_srf->srf.res.id;
-	ret = 0;
-
-out_bad_surface:
-	/**
-	 * FIXME: May deadlock here when called from the
-	 * command parsing code.
-	 */
-
-	ttm_base_object_unref(&base);
-	return ret;
-}
-
 /**
  * Buffer management.
  */
@@ -1562,11 +373,11 @@
 	acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct vmw_dma_buffer));
 	memset(vmw_bo, 0, sizeof(*vmw_bo));
 
-	INIT_LIST_HEAD(&vmw_bo->validate_list);
+	INIT_LIST_HEAD(&vmw_bo->res_list);
 
 	ret = ttm_bo_init(bdev, &vmw_bo->base, size,
 			  ttm_bo_type_device, placement,
-			  0, 0, interruptible,
+			  0, interruptible,
 			  NULL, acc_size, NULL, bo_free);
 	return ret;
 }
@@ -1575,7 +386,7 @@
 {
 	struct vmw_user_dma_buffer *vmw_user_bo = vmw_user_dma_buffer(bo);
 
-	kfree(vmw_user_bo);
+	ttm_base_object_kfree(vmw_user_bo, base);
 }
 
 static void vmw_user_dmabuf_release(struct ttm_base_object **p_base)
@@ -1594,6 +405,79 @@
 	ttm_bo_unref(&bo);
 }
 
+/**
+ * vmw_user_dmabuf_alloc - Allocate a user dma buffer
+ *
+ * @dev_priv: Pointer to a struct device private.
+ * @tfile: Pointer to a struct ttm_object_file on which to register the user
+ * object.
+ * @size: Size of the dma buffer.
+ * @shareable: Boolean whether the buffer is shareable with other open files.
+ * @handle: Pointer to where the handle value should be assigned.
+ * @p_dma_buf: Pointer to where the refcounted struct vmw_dma_buffer pointer
+ * should be assigned.
+ */
+int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv,
+			  struct ttm_object_file *tfile,
+			  uint32_t size,
+			  bool shareable,
+			  uint32_t *handle,
+			  struct vmw_dma_buffer **p_dma_buf)
+{
+	struct vmw_user_dma_buffer *user_bo;
+	struct ttm_buffer_object *tmp;
+	int ret;
+
+	user_bo = kzalloc(sizeof(*user_bo), GFP_KERNEL);
+	if (unlikely(user_bo == NULL)) {
+		DRM_ERROR("Failed to allocate a buffer.\n");
+		return -ENOMEM;
+	}
+
+	ret = vmw_dmabuf_init(dev_priv, &user_bo->dma, size,
+			      &vmw_vram_sys_placement, true,
+			      &vmw_user_dmabuf_destroy);
+	if (unlikely(ret != 0))
+		return ret;
+
+	tmp = ttm_bo_reference(&user_bo->dma.base);
+	ret = ttm_base_object_init(tfile,
+				   &user_bo->base,
+				   shareable,
+				   ttm_buffer_type,
+				   &vmw_user_dmabuf_release, NULL);
+	if (unlikely(ret != 0)) {
+		ttm_bo_unref(&tmp);
+		goto out_no_base_object;
+	}
+
+	*p_dma_buf = &user_bo->dma;
+	*handle = user_bo->base.hash.key;
+
+out_no_base_object:
+	return ret;
+}
+
+/**
+ * vmw_user_dmabuf_verify_access - verify access permissions on this
+ * buffer object.
+ *
+ * @bo: Pointer to the buffer object being accessed
+ * @tfile: Identifying the caller.
+ */
+int vmw_user_dmabuf_verify_access(struct ttm_buffer_object *bo,
+				  struct ttm_object_file *tfile)
+{
+	struct vmw_user_dma_buffer *vmw_user_bo;
+
+	if (unlikely(bo->destroy != vmw_user_dmabuf_destroy))
+		return -EPERM;
+
+	vmw_user_bo = vmw_user_dma_buffer(bo);
+	return (vmw_user_bo->base.tfile == tfile ||
+	vmw_user_bo->base.shareable) ? 0 : -EPERM;
+}
+
 int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
@@ -1602,44 +486,27 @@
 	    (union drm_vmw_alloc_dmabuf_arg *)data;
 	struct drm_vmw_alloc_dmabuf_req *req = &arg->req;
 	struct drm_vmw_dmabuf_rep *rep = &arg->rep;
-	struct vmw_user_dma_buffer *vmw_user_bo;
-	struct ttm_buffer_object *tmp;
+	struct vmw_dma_buffer *dma_buf;
+	uint32_t handle;
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
-	vmw_user_bo = kzalloc(sizeof(*vmw_user_bo), GFP_KERNEL);
-	if (unlikely(vmw_user_bo == NULL))
-		return -ENOMEM;
-
 	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0)) {
-		kfree(vmw_user_bo);
+	if (unlikely(ret != 0))
 		return ret;
-	}
 
-	ret = vmw_dmabuf_init(dev_priv, &vmw_user_bo->dma, req->size,
-			      &vmw_vram_sys_placement, true,
-			      &vmw_user_dmabuf_destroy);
+	ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile,
+				    req->size, false, &handle, &dma_buf);
 	if (unlikely(ret != 0))
 		goto out_no_dmabuf;
 
-	tmp = ttm_bo_reference(&vmw_user_bo->dma.base);
-	ret = ttm_base_object_init(vmw_fpriv(file_priv)->tfile,
-				   &vmw_user_bo->base,
-				   false,
-				   ttm_buffer_type,
-				   &vmw_user_dmabuf_release, NULL);
-	if (unlikely(ret != 0))
-		goto out_no_base_object;
-	else {
-		rep->handle = vmw_user_bo->base.hash.key;
-		rep->map_handle = vmw_user_bo->dma.base.addr_space_offset;
-		rep->cur_gmr_id = vmw_user_bo->base.hash.key;
-		rep->cur_gmr_offset = 0;
-	}
+	rep->handle = handle;
+	rep->map_handle = dma_buf->base.addr_space_offset;
+	rep->cur_gmr_id = handle;
+	rep->cur_gmr_offset = 0;
 
-out_no_base_object:
-	ttm_bo_unref(&tmp);
+	vmw_dmabuf_unreference(&dma_buf);
+
 out_no_dmabuf:
 	ttm_read_unlock(&vmaster->lock);
 
@@ -1657,27 +524,6 @@
 					 TTM_REF_USAGE);
 }
 
-uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
-				  uint32_t cur_validate_node)
-{
-	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
-
-	if (likely(vmw_bo->on_validate_list))
-		return vmw_bo->cur_validate_node;
-
-	vmw_bo->cur_validate_node = cur_validate_node;
-	vmw_bo->on_validate_list = true;
-
-	return cur_validate_node;
-}
-
-void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo)
-{
-	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
-
-	vmw_bo->on_validate_list = false;
-}
-
 int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
 			   uint32_t handle, struct vmw_dma_buffer **out)
 {
@@ -1706,6 +552,18 @@
 	return 0;
 }
 
+int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
+			      struct vmw_dma_buffer *dma_buf)
+{
+	struct vmw_user_dma_buffer *user_bo;
+
+	if (dma_buf->base.destroy != vmw_user_dmabuf_destroy)
+		return -EINVAL;
+
+	user_bo = container_of(dma_buf, struct vmw_user_dma_buffer, dma);
+	return ttm_ref_object_add(tfile, &user_bo->base, TTM_REF_USAGE, NULL);
+}
+
 /*
  * Stream management
  */
@@ -1730,8 +588,8 @@
 	struct vmw_resource *res = &stream->res;
 	int ret;
 
-	ret = vmw_resource_init(dev_priv, res, &dev_priv->stream_idr,
-				VMW_RES_STREAM, false, res_free, NULL);
+	ret = vmw_resource_init(dev_priv, res, false, res_free,
+				&vmw_stream_func);
 
 	if (unlikely(ret != 0)) {
 		if (res_free == NULL)
@@ -1753,17 +611,13 @@
 	return 0;
 }
 
-/**
- * User-space context management:
- */
-
 static void vmw_user_stream_free(struct vmw_resource *res)
 {
 	struct vmw_user_stream *stream =
 	    container_of(res, struct vmw_user_stream, stream.res);
 	struct vmw_private *dev_priv = res->dev_priv;
 
-	kfree(stream);
+	ttm_base_object_kfree(stream, base);
 	ttm_mem_global_free(vmw_mem_glob(dev_priv),
 			    vmw_user_stream_size);
 }
@@ -1792,9 +646,11 @@
 	struct vmw_user_stream *stream;
 	struct drm_vmw_stream_arg *arg = (struct drm_vmw_stream_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct idr *idr = &dev_priv->res_idr[vmw_res_stream];
 	int ret = 0;
 
-	res = vmw_resource_lookup(dev_priv, &dev_priv->stream_idr, arg->stream_id);
+
+	res = vmw_resource_lookup(dev_priv, idr, arg->stream_id);
 	if (unlikely(res == NULL))
 		return -EINVAL;
 
@@ -1895,7 +751,8 @@
 	struct vmw_resource *res;
 	int ret;
 
-	res = vmw_resource_lookup(dev_priv, &dev_priv->stream_idr, *inout_id);
+	res = vmw_resource_lookup(dev_priv, &dev_priv->res_idr[vmw_res_stream],
+				  *inout_id);
 	if (unlikely(res == NULL))
 		return -EINVAL;
 
@@ -1990,3 +847,453 @@
 	return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
 					 handle, TTM_REF_USAGE);
 }
+
+/**
+ * vmw_resource_buf_alloc - Allocate a backup buffer for a resource.
+ *
+ * @res:            The resource for which to allocate a backup buffer.
+ * @interruptible:  Whether any sleeps during allocation should be
+ *                  performed while interruptible.
+ */
+static int vmw_resource_buf_alloc(struct vmw_resource *res,
+				  bool interruptible)
+{
+	unsigned long size =
+		(res->backup_size + PAGE_SIZE - 1) & PAGE_MASK;
+	struct vmw_dma_buffer *backup;
+	int ret;
+
+	if (likely(res->backup)) {
+		BUG_ON(res->backup->base.num_pages * PAGE_SIZE < size);
+		return 0;
+	}
+
+	backup = kzalloc(sizeof(*backup), GFP_KERNEL);
+	if (unlikely(backup == NULL))
+		return -ENOMEM;
+
+	ret = vmw_dmabuf_init(res->dev_priv, backup, res->backup_size,
+			      res->func->backup_placement,
+			      interruptible,
+			      &vmw_dmabuf_bo_free);
+	if (unlikely(ret != 0))
+		goto out_no_dmabuf;
+
+	res->backup = backup;
+
+out_no_dmabuf:
+	return ret;
+}
+
+/**
+ * vmw_resource_do_validate - Make a resource up-to-date and visible
+ *                            to the device.
+ *
+ * @res:            The resource to make visible to the device.
+ * @val_buf:        Information about a buffer possibly
+ *                  containing backup data if a bind operation is needed.
+ *
+ * On hardware resource shortage, this function returns -EBUSY and
+ * should be retried once resources have been freed up.
+ */
+static int vmw_resource_do_validate(struct vmw_resource *res,
+				    struct ttm_validate_buffer *val_buf)
+{
+	int ret = 0;
+	const struct vmw_res_func *func = res->func;
+
+	if (unlikely(res->id == -1)) {
+		ret = func->create(res);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	if (func->bind &&
+	    ((func->needs_backup && list_empty(&res->mob_head) &&
+	      val_buf->bo != NULL) ||
+	     (!func->needs_backup && val_buf->bo != NULL))) {
+		ret = func->bind(res, val_buf);
+		if (unlikely(ret != 0))
+			goto out_bind_failed;
+		if (func->needs_backup)
+			list_add_tail(&res->mob_head, &res->backup->res_list);
+	}
+
+	/*
+	 * Only do this on write operations, and move to
+	 * vmw_resource_unreserve if it can be called after
+	 * backup buffers have been unreserved. Otherwise
+	 * sort out locking.
+	 */
+	res->res_dirty = true;
+
+	return 0;
+
+out_bind_failed:
+	func->destroy(res);
+
+	return ret;
+}
+
+/**
+ * vmw_resource_unreserve - Unreserve a resource previously reserved for
+ * command submission.
+ *
+ * @res:               Pointer to the struct vmw_resource to unreserve.
+ * @new_backup:        Pointer to new backup buffer if command submission
+ *                     switched.
+ * @new_backup_offset: New backup offset if @new_backup is !NULL.
+ *
+ * Currently unreserving a resource means putting it back on the device's
+ * resource lru list, so that it can be evicted if necessary.
+ */
+void vmw_resource_unreserve(struct vmw_resource *res,
+			    struct vmw_dma_buffer *new_backup,
+			    unsigned long new_backup_offset)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	if (!list_empty(&res->lru_head))
+		return;
+
+	if (new_backup && new_backup != res->backup) {
+
+		if (res->backup) {
+			BUG_ON(atomic_read(&res->backup->base.reserved) == 0);
+			list_del_init(&res->mob_head);
+			vmw_dmabuf_unreference(&res->backup);
+		}
+
+		res->backup = vmw_dmabuf_reference(new_backup);
+		BUG_ON(atomic_read(&new_backup->base.reserved) == 0);
+		list_add_tail(&res->mob_head, &new_backup->res_list);
+	}
+	if (new_backup)
+		res->backup_offset = new_backup_offset;
+
+	if (!res->func->may_evict)
+		return;
+
+	write_lock(&dev_priv->resource_lock);
+	list_add_tail(&res->lru_head,
+		      &res->dev_priv->res_lru[res->func->res_type]);
+	write_unlock(&dev_priv->resource_lock);
+}
+
+/**
+ * vmw_resource_check_buffer - Check whether a backup buffer is needed
+ *                             for a resource and in that case, allocate
+ *                             one, reserve and validate it.
+ *
+ * @res:            The resource for which to allocate a backup buffer.
+ * @interruptible:  Whether any sleeps during allocation should be
+ *                  performed while interruptible.
+ * @val_buf:        On successful return contains data about the
+ *                  reserved and validated backup buffer.
+ */
+int vmw_resource_check_buffer(struct vmw_resource *res,
+			      bool interruptible,
+			      struct ttm_validate_buffer *val_buf)
+{
+	struct list_head val_list;
+	bool backup_dirty = false;
+	int ret;
+
+	if (unlikely(res->backup == NULL)) {
+		ret = vmw_resource_buf_alloc(res, interruptible);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	INIT_LIST_HEAD(&val_list);
+	val_buf->bo = ttm_bo_reference(&res->backup->base);
+	list_add_tail(&val_buf->head, &val_list);
+	ret = ttm_eu_reserve_buffers(&val_list);
+	if (unlikely(ret != 0))
+		goto out_no_reserve;
+
+	if (res->func->needs_backup && list_empty(&res->mob_head))
+		return 0;
+
+	backup_dirty = res->backup_dirty;
+	ret = ttm_bo_validate(&res->backup->base,
+			      res->func->backup_placement,
+			      true, false);
+
+	if (unlikely(ret != 0))
+		goto out_no_validate;
+
+	return 0;
+
+out_no_validate:
+	ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	ttm_bo_unref(&val_buf->bo);
+	if (backup_dirty)
+		vmw_dmabuf_unreference(&res->backup);
+
+	return ret;
+}
+
+/**
+ * vmw_resource_reserve - Reserve a resource for command submission
+ *
+ * @res:            The resource to reserve.
+ *
+ * This function takes the resource off the LRU list and make sure
+ * a backup buffer is present for guest-backed resources. However,
+ * the buffer may not be bound to the resource at this point.
+ *
+ */
+int vmw_resource_reserve(struct vmw_resource *res, bool no_backup)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+	int ret;
+
+	write_lock(&dev_priv->resource_lock);
+	list_del_init(&res->lru_head);
+	write_unlock(&dev_priv->resource_lock);
+
+	if (res->func->needs_backup && res->backup == NULL &&
+	    !no_backup) {
+		ret = vmw_resource_buf_alloc(res, true);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_resource_backoff_reservation - Unreserve and unreference a
+ *                                    backup buffer
+ *.
+ * @val_buf:        Backup buffer information.
+ */
+void vmw_resource_backoff_reservation(struct ttm_validate_buffer *val_buf)
+{
+	struct list_head val_list;
+
+	if (likely(val_buf->bo == NULL))
+		return;
+
+	INIT_LIST_HEAD(&val_list);
+	list_add_tail(&val_buf->head, &val_list);
+	ttm_eu_backoff_reservation(&val_list);
+	ttm_bo_unref(&val_buf->bo);
+}
+
+/**
+ * vmw_resource_do_evict - Evict a resource, and transfer its data
+ *                         to a backup buffer.
+ *
+ * @res:            The resource to evict.
+ */
+int vmw_resource_do_evict(struct vmw_resource *res)
+{
+	struct ttm_validate_buffer val_buf;
+	const struct vmw_res_func *func = res->func;
+	int ret;
+
+	BUG_ON(!func->may_evict);
+
+	val_buf.bo = NULL;
+	ret = vmw_resource_check_buffer(res, true, &val_buf);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (unlikely(func->unbind != NULL &&
+		     (!func->needs_backup || !list_empty(&res->mob_head)))) {
+		ret = func->unbind(res, res->res_dirty, &val_buf);
+		if (unlikely(ret != 0))
+			goto out_no_unbind;
+		list_del_init(&res->mob_head);
+	}
+	ret = func->destroy(res);
+	res->backup_dirty = true;
+	res->res_dirty = false;
+out_no_unbind:
+	vmw_resource_backoff_reservation(&val_buf);
+
+	return ret;
+}
+
+
+/**
+ * vmw_resource_validate - Make a resource up-to-date and visible
+ *                         to the device.
+ *
+ * @res:            The resource to make visible to the device.
+ *
+ * On succesful return, any backup DMA buffer pointed to by @res->backup will
+ * be reserved and validated.
+ * On hardware resource shortage, this function will repeatedly evict
+ * resources of the same type until the validation succeeds.
+ */
+int vmw_resource_validate(struct vmw_resource *res)
+{
+	int ret;
+	struct vmw_resource *evict_res;
+	struct vmw_private *dev_priv = res->dev_priv;
+	struct list_head *lru_list = &dev_priv->res_lru[res->func->res_type];
+	struct ttm_validate_buffer val_buf;
+
+	if (likely(!res->func->may_evict))
+		return 0;
+
+	val_buf.bo = NULL;
+	if (res->backup)
+		val_buf.bo = &res->backup->base;
+	do {
+		ret = vmw_resource_do_validate(res, &val_buf);
+		if (likely(ret != -EBUSY))
+			break;
+
+		write_lock(&dev_priv->resource_lock);
+		if (list_empty(lru_list) || !res->func->may_evict) {
+			DRM_ERROR("Out of device device id entries "
+				  "for %s.\n", res->func->type_name);
+			ret = -EBUSY;
+			write_unlock(&dev_priv->resource_lock);
+			break;
+		}
+
+		evict_res = vmw_resource_reference
+			(list_first_entry(lru_list, struct vmw_resource,
+					  lru_head));
+		list_del_init(&evict_res->lru_head);
+
+		write_unlock(&dev_priv->resource_lock);
+		vmw_resource_do_evict(evict_res);
+		vmw_resource_unreference(&evict_res);
+	} while (1);
+
+	if (unlikely(ret != 0))
+		goto out_no_validate;
+	else if (!res->func->needs_backup && res->backup) {
+		list_del_init(&res->mob_head);
+		vmw_dmabuf_unreference(&res->backup);
+	}
+
+	return 0;
+
+out_no_validate:
+	return ret;
+}
+
+/**
+ * vmw_fence_single_bo - Utility function to fence a single TTM buffer
+ *                       object without unreserving it.
+ *
+ * @bo:             Pointer to the struct ttm_buffer_object to fence.
+ * @fence:          Pointer to the fence. If NULL, this function will
+ *                  insert a fence into the command stream..
+ *
+ * Contrary to the ttm_eu version of this function, it takes only
+ * a single buffer object instead of a list, and it also doesn't
+ * unreserve the buffer object, which needs to be done separately.
+ */
+void vmw_fence_single_bo(struct ttm_buffer_object *bo,
+			 struct vmw_fence_obj *fence)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
+	struct vmw_fence_obj *old_fence_obj;
+	struct vmw_private *dev_priv =
+		container_of(bdev, struct vmw_private, bdev);
+
+	if (fence == NULL)
+		vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
+	else
+		driver->sync_obj_ref(fence);
+
+	spin_lock(&bdev->fence_lock);
+
+	old_fence_obj = bo->sync_obj;
+	bo->sync_obj = fence;
+
+	spin_unlock(&bdev->fence_lock);
+
+	if (old_fence_obj)
+		vmw_fence_obj_unreference(&old_fence_obj);
+}
+
+/**
+ * vmw_resource_move_notify - TTM move_notify_callback
+ *
+ * @bo:             The TTM buffer object about to move.
+ * @mem:            The truct ttm_mem_reg indicating to what memory
+ *                  region the move is taking place.
+ *
+ * For now does nothing.
+ */
+void vmw_resource_move_notify(struct ttm_buffer_object *bo,
+			      struct ttm_mem_reg *mem)
+{
+}
+
+/**
+ * vmw_resource_needs_backup - Return whether a resource needs a backup buffer.
+ *
+ * @res:            The resource being queried.
+ */
+bool vmw_resource_needs_backup(const struct vmw_resource *res)
+{
+	return res->func->needs_backup;
+}
+
+/**
+ * vmw_resource_evict_type - Evict all resources of a specific type
+ *
+ * @dev_priv:       Pointer to a device private struct
+ * @type:           The resource type to evict
+ *
+ * To avoid thrashing starvation or as part of the hibernation sequence,
+ * evict all evictable resources of a specific type.
+ */
+static void vmw_resource_evict_type(struct vmw_private *dev_priv,
+				    enum vmw_res_type type)
+{
+	struct list_head *lru_list = &dev_priv->res_lru[type];
+	struct vmw_resource *evict_res;
+
+	do {
+		write_lock(&dev_priv->resource_lock);
+
+		if (list_empty(lru_list))
+			goto out_unlock;
+
+		evict_res = vmw_resource_reference(
+			list_first_entry(lru_list, struct vmw_resource,
+					 lru_head));
+		list_del_init(&evict_res->lru_head);
+		write_unlock(&dev_priv->resource_lock);
+		vmw_resource_do_evict(evict_res);
+		vmw_resource_unreference(&evict_res);
+	} while (1);
+
+out_unlock:
+	write_unlock(&dev_priv->resource_lock);
+}
+
+/**
+ * vmw_resource_evict_all - Evict all evictable resources
+ *
+ * @dev_priv:       Pointer to a device private struct
+ *
+ * To avoid thrashing starvation or as part of the hibernation sequence,
+ * evict all evictable resources. In particular this means that all
+ * guest-backed resources that are registered with the device are
+ * evicted and the OTable becomes clean.
+ */
+void vmw_resource_evict_all(struct vmw_private *dev_priv)
+{
+	enum vmw_res_type type;
+
+	mutex_lock(&dev_priv->cmdbuf_mutex);
+
+	for (type = 0; type < vmw_res_max; ++type)
+		vmw_resource_evict_type(dev_priv, type);
+
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+}

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
new file mode 100644
index 0000000..f3adeed
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h

@@ -0,0 +1,84 @@
+/**************************************************************************
+ *
+ * Copyright © 2012 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _VMWGFX_RESOURCE_PRIV_H_
+#define _VMWGFX_RESOURCE_PRIV_H_
+
+#include "vmwgfx_drv.h"
+
+/**
+ * struct vmw_user_resource_conv - Identify a derived user-exported resource
+ * type and provide a function to convert its ttm_base_object pointer to
+ * a struct vmw_resource
+ */
+struct vmw_user_resource_conv {
+	enum ttm_object_type object_type;
+	struct vmw_resource *(*base_obj_to_res)(struct ttm_base_object *base);
+	void (*res_free) (struct vmw_resource *res);
+};
+
+/**
+ * struct vmw_res_func - members and functions common for a resource type
+ *
+ * @res_type:          Enum that identifies the lru list to use for eviction.
+ * @needs_backup:      Whether the resource is guest-backed and needs
+ *                     persistent buffer storage.
+ * @type_name:         String that identifies the resource type.
+ * @backup_placement:  TTM placement for backup buffers.
+ * @may_evict          Whether the resource may be evicted.
+ * @create:            Create a hardware resource.
+ * @destroy:           Destroy a hardware resource.
+ * @bind:              Bind a hardware resource to persistent buffer storage.
+ * @unbind:            Unbind a hardware resource from persistent
+ *                     buffer storage.
+ */
+
+struct vmw_res_func {
+	enum vmw_res_type res_type;
+	bool needs_backup;
+	const char *type_name;
+	struct ttm_placement *backup_placement;
+	bool may_evict;
+
+	int (*create) (struct vmw_resource *res);
+	int (*destroy) (struct vmw_resource *res);
+	int (*bind) (struct vmw_resource *res,
+		     struct ttm_validate_buffer *val_buf);
+	int (*unbind) (struct vmw_resource *res,
+		       bool readback,
+		       struct ttm_validate_buffer *val_buf);
+};
+
+int vmw_resource_alloc_id(struct vmw_resource *res);
+void vmw_resource_release_id(struct vmw_resource *res);
+int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res,
+		      bool delay_id,
+		      void (*res_free) (struct vmw_resource *res),
+		      const struct vmw_res_func *func);
+void vmw_resource_activate(struct vmw_resource *res,
+			   void (*hw_destroy) (struct vmw_resource *));
+#endif

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index 6deaf2f..26387c3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c

@@ -468,7 +468,7 @@
 
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
-	drm_connector_attach_property(connector,
+	drm_object_attach_property(&connector->base,
 				      dev->mode_config.dirty_info_property,
 				      1);
 
@@ -485,7 +485,7 @@
 		return -EINVAL;
 	}
 
-	if (!(dev_priv->fifo.capabilities & SVGA_FIFO_CAP_SCREEN_OBJECT_2)) {
+	if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) {
 		DRM_INFO("Not using screen objects,"
 			 " missing cap SCREEN_OBJECT_2\n");
 		return -ENOSYS;

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
new file mode 100644
index 0000000..5828143
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c

@@ -0,0 +1,893 @@
+/**************************************************************************
+ *
+ * Copyright © 2009-2012 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "vmwgfx_resource_priv.h"
+#include <ttm/ttm_placement.h>
+#include "svga3d_surfacedefs.h"
+
+/**
+ * struct vmw_user_surface - User-space visible surface resource
+ *
+ * @base:           The TTM base object handling user-space visibility.
+ * @srf:            The surface metadata.
+ * @size:           TTM accounting size for the surface.
+ */
+struct vmw_user_surface {
+	struct ttm_base_object base;
+	struct vmw_surface srf;
+	uint32_t size;
+	uint32_t backup_handle;
+};
+
+/**
+ * struct vmw_surface_offset - Backing store mip level offset info
+ *
+ * @face:           Surface face.
+ * @mip:            Mip level.
+ * @bo_offset:      Offset into backing store of this mip level.
+ *
+ */
+struct vmw_surface_offset {
+	uint32_t face;
+	uint32_t mip;
+	uint32_t bo_offset;
+};
+
+static void vmw_user_surface_free(struct vmw_resource *res);
+static struct vmw_resource *
+vmw_user_surface_base_to_res(struct ttm_base_object *base);
+static int vmw_legacy_srf_bind(struct vmw_resource *res,
+			       struct ttm_validate_buffer *val_buf);
+static int vmw_legacy_srf_unbind(struct vmw_resource *res,
+				 bool readback,
+				 struct ttm_validate_buffer *val_buf);
+static int vmw_legacy_srf_create(struct vmw_resource *res);
+static int vmw_legacy_srf_destroy(struct vmw_resource *res);
+
+static const struct vmw_user_resource_conv user_surface_conv = {
+	.object_type = VMW_RES_SURFACE,
+	.base_obj_to_res = vmw_user_surface_base_to_res,
+	.res_free = vmw_user_surface_free
+};
+
+const struct vmw_user_resource_conv *user_surface_converter =
+	&user_surface_conv;
+
+
+static uint64_t vmw_user_surface_size;
+
+static const struct vmw_res_func vmw_legacy_surface_func = {
+	.res_type = vmw_res_surface,
+	.needs_backup = false,
+	.may_evict = true,
+	.type_name = "legacy surfaces",
+	.backup_placement = &vmw_srf_placement,
+	.create = &vmw_legacy_srf_create,
+	.destroy = &vmw_legacy_srf_destroy,
+	.bind = &vmw_legacy_srf_bind,
+	.unbind = &vmw_legacy_srf_unbind
+};
+
+/**
+ * struct vmw_surface_dma - SVGA3D DMA command
+ */
+struct vmw_surface_dma {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdSurfaceDMA body;
+	SVGA3dCopyBox cb;
+	SVGA3dCmdSurfaceDMASuffix suffix;
+};
+
+/**
+ * struct vmw_surface_define - SVGA3D Surface Define command
+ */
+struct vmw_surface_define {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDefineSurface body;
+};
+
+/**
+ * struct vmw_surface_destroy - SVGA3D Surface Destroy command
+ */
+struct vmw_surface_destroy {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDestroySurface body;
+};
+
+
+/**
+ * vmw_surface_dma_size - Compute fifo size for a dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface dma command for backup or
+ * restoration of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_dma_size(const struct vmw_surface *srf)
+{
+	return srf->num_sizes * sizeof(struct vmw_surface_dma);
+}
+
+
+/**
+ * vmw_surface_define_size - Compute fifo size for a surface define command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface define command for the definition
+ * of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_define_size(const struct vmw_surface *srf)
+{
+	return sizeof(struct vmw_surface_define) + srf->num_sizes *
+		sizeof(SVGA3dSize);
+}
+
+
+/**
+ * vmw_surface_destroy_size - Compute fifo size for a surface destroy command.
+ *
+ * Computes the required size for a surface destroy command for the destruction
+ * of a hw surface.
+ */
+static inline uint32_t vmw_surface_destroy_size(void)
+{
+	return sizeof(struct vmw_surface_destroy);
+}
+
+/**
+ * vmw_surface_destroy_encode - Encode a surface_destroy command.
+ *
+ * @id: The surface id
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_destroy_encode(uint32_t id,
+				       void *cmd_space)
+{
+	struct vmw_surface_destroy *cmd = (struct vmw_surface_destroy *)
+		cmd_space;
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DESTROY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.sid = id;
+}
+
+/**
+ * vmw_surface_define_encode - Encode a surface_define command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_define_encode(const struct vmw_surface *srf,
+				      void *cmd_space)
+{
+	struct vmw_surface_define *cmd = (struct vmw_surface_define *)
+		cmd_space;
+	struct drm_vmw_size *src_size;
+	SVGA3dSize *cmd_size;
+	uint32_t cmd_len;
+	int i;
+
+	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DEFINE;
+	cmd->header.size = cmd_len;
+	cmd->body.sid = srf->res.id;
+	cmd->body.surfaceFlags = srf->flags;
+	cmd->body.format = cpu_to_le32(srf->format);
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		cmd->body.face[i].numMipLevels = srf->mip_levels[i];
+
+	cmd += 1;
+	cmd_size = (SVGA3dSize *) cmd;
+	src_size = srf->sizes;
+
+	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
+		cmd_size->width = src_size->width;
+		cmd_size->height = src_size->height;
+		cmd_size->depth = src_size->depth;
+	}
+}
+
+/**
+ * vmw_surface_dma_encode - Encode a surface_dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ * @ptr: Pointer to an SVGAGuestPtr indicating where the surface contents
+ * should be placed or read from.
+ * @to_surface: Boolean whether to DMA to the surface or from the surface.
+ */
+static void vmw_surface_dma_encode(struct vmw_surface *srf,
+				   void *cmd_space,
+				   const SVGAGuestPtr *ptr,
+				   bool to_surface)
+{
+	uint32_t i;
+	struct vmw_surface_dma *cmd = (struct vmw_surface_dma *)cmd_space;
+	const struct svga3d_surface_desc *desc =
+		svga3dsurface_get_desc(srf->format);
+
+	for (i = 0; i < srf->num_sizes; ++i) {
+		SVGA3dCmdHeader *header = &cmd->header;
+		SVGA3dCmdSurfaceDMA *body = &cmd->body;
+		SVGA3dCopyBox *cb = &cmd->cb;
+		SVGA3dCmdSurfaceDMASuffix *suffix = &cmd->suffix;
+		const struct vmw_surface_offset *cur_offset = &srf->offsets[i];
+		const struct drm_vmw_size *cur_size = &srf->sizes[i];
+
+		header->id = SVGA_3D_CMD_SURFACE_DMA;
+		header->size = sizeof(*body) + sizeof(*cb) + sizeof(*suffix);
+
+		body->guest.ptr = *ptr;
+		body->guest.ptr.offset += cur_offset->bo_offset;
+		body->guest.pitch = svga3dsurface_calculate_pitch(desc,
+								  cur_size);
+		body->host.sid = srf->res.id;
+		body->host.face = cur_offset->face;
+		body->host.mipmap = cur_offset->mip;
+		body->transfer = ((to_surface) ?  SVGA3D_WRITE_HOST_VRAM :
+				  SVGA3D_READ_HOST_VRAM);
+		cb->x = 0;
+		cb->y = 0;
+		cb->z = 0;
+		cb->srcx = 0;
+		cb->srcy = 0;
+		cb->srcz = 0;
+		cb->w = cur_size->width;
+		cb->h = cur_size->height;
+		cb->d = cur_size->depth;
+
+		suffix->suffixSize = sizeof(*suffix);
+		suffix->maximumOffset =
+			svga3dsurface_get_image_buffer_size(desc, cur_size,
+							    body->guest.pitch);
+		suffix->flags.discard = 0;
+		suffix->flags.unsynchronized = 0;
+		suffix->flags.reserved = 0;
+		++cmd;
+	}
+};
+
+
+/**
+ * vmw_hw_surface_destroy - destroy a Device surface
+ *
+ * @res:        Pointer to a struct vmw_resource embedded in a struct
+ *              vmw_surface.
+ *
+ * Destroys a the device surface associated with a struct vmw_surface if
+ * any, and adjusts accounting and resource count accordingly.
+ */
+static void vmw_hw_surface_destroy(struct vmw_resource *res)
+{
+
+	struct vmw_private *dev_priv = res->dev_priv;
+	struct vmw_surface *srf;
+	void *cmd;
+
+	if (res->id != -1) {
+
+		cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size());
+		if (unlikely(cmd == NULL)) {
+			DRM_ERROR("Failed reserving FIFO space for surface "
+				  "destruction.\n");
+			return;
+		}
+
+		vmw_surface_destroy_encode(res->id, cmd);
+		vmw_fifo_commit(dev_priv, vmw_surface_destroy_size());
+
+		/*
+		 * used_memory_size_atomic, or separate lock
+		 * to avoid taking dev_priv::cmdbuf_mutex in
+		 * the destroy path.
+		 */
+
+		mutex_lock(&dev_priv->cmdbuf_mutex);
+		srf = vmw_res_to_srf(res);
+		dev_priv->used_memory_size -= res->backup_size;
+		mutex_unlock(&dev_priv->cmdbuf_mutex);
+	}
+	vmw_3d_resource_dec(dev_priv, false);
+}
+
+/**
+ * vmw_legacy_srf_create - Create a device surface as part of the
+ * resource validation process.
+ *
+ * @res: Pointer to a struct vmw_surface.
+ *
+ * If the surface doesn't have a hw id.
+ *
+ * Returns -EBUSY if there wasn't sufficient device resources to
+ * complete the validation. Retry after freeing up resources.
+ *
+ * May return other errors if the kernel is out of guest resources.
+ */
+static int vmw_legacy_srf_create(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+	struct vmw_surface *srf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
+
+	if (likely(res->id != -1))
+		return 0;
+
+	srf = vmw_res_to_srf(res);
+	if (unlikely(dev_priv->used_memory_size + res->backup_size >=
+		     dev_priv->memory_size))
+		return -EBUSY;
+
+	/*
+	 * Alloc id for the resource.
+	 */
+
+	ret = vmw_resource_alloc_id(res);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed to allocate a surface id.\n");
+		goto out_no_id;
+	}
+
+	if (unlikely(res->id >= SVGA3D_MAX_SURFACE_IDS)) {
+		ret = -EBUSY;
+		goto out_no_fifo;
+	}
+
+	/*
+	 * Encode surface define- commands.
+	 */
+
+	submit_size = vmw_surface_define_size(srf);
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "creation.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
+	}
+
+	vmw_surface_define_encode(srf, cmd);
+	vmw_fifo_commit(dev_priv, submit_size);
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size += res->backup_size;
+	return 0;
+
+out_no_fifo:
+	vmw_resource_release_id(res);
+out_no_id:
+	return ret;
+}
+
+/**
+ * vmw_legacy_srf_dma - Copy backup data to or from a legacy surface.
+ *
+ * @res:            Pointer to a struct vmw_res embedded in a struct
+ *                  vmw_surface.
+ * @val_buf:        Pointer to a struct ttm_validate_buffer containing
+ *                  information about the backup buffer.
+ * @bind:           Boolean wether to DMA to the surface.
+ *
+ * Transfer backup data to or from a legacy surface as part of the
+ * validation process.
+ * May return other errors if the kernel is out of guest resources.
+ * The backup buffer will be fenced or idle upon successful completion,
+ * and if the surface needs persistent backup storage, the backup buffer
+ * will also be returned reserved iff @bind is true.
+ */
+static int vmw_legacy_srf_dma(struct vmw_resource *res,
+			      struct ttm_validate_buffer *val_buf,
+			      bool bind)
+{
+	SVGAGuestPtr ptr;
+	struct vmw_fence_obj *fence;
+	uint32_t submit_size;
+	struct vmw_surface *srf = vmw_res_to_srf(res);
+	uint8_t *cmd;
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	BUG_ON(val_buf->bo == NULL);
+
+	submit_size = vmw_surface_dma_size(srf);
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "DMA.\n");
+		return -ENOMEM;
+	}
+	vmw_bo_get_guest_ptr(val_buf->bo, &ptr);
+	vmw_surface_dma_encode(srf, cmd, &ptr, bind);
+
+	vmw_fifo_commit(dev_priv, submit_size);
+
+	/*
+	 * Create a fence object and fence the backup buffer.
+	 */
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+					  &fence, NULL);
+
+	vmw_fence_single_bo(val_buf->bo, fence);
+
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
+
+	return 0;
+}
+
+/**
+ * vmw_legacy_srf_bind - Perform a legacy surface bind as part of the
+ *                       surface validation process.
+ *
+ * @res:            Pointer to a struct vmw_res embedded in a struct
+ *                  vmw_surface.
+ * @val_buf:        Pointer to a struct ttm_validate_buffer containing
+ *                  information about the backup buffer.
+ *
+ * This function will copy backup data to the surface if the
+ * backup buffer is dirty.
+ */
+static int vmw_legacy_srf_bind(struct vmw_resource *res,
+			       struct ttm_validate_buffer *val_buf)
+{
+	if (!res->backup_dirty)
+		return 0;
+
+	return vmw_legacy_srf_dma(res, val_buf, true);
+}
+
+
+/**
+ * vmw_legacy_srf_unbind - Perform a legacy surface unbind as part of the
+ *                         surface eviction process.
+ *
+ * @res:            Pointer to a struct vmw_res embedded in a struct
+ *                  vmw_surface.
+ * @val_buf:        Pointer to a struct ttm_validate_buffer containing
+ *                  information about the backup buffer.
+ *
+ * This function will copy backup data from the surface.
+ */
+static int vmw_legacy_srf_unbind(struct vmw_resource *res,
+				 bool readback,
+				 struct ttm_validate_buffer *val_buf)
+{
+	if (unlikely(readback))
+		return vmw_legacy_srf_dma(res, val_buf, false);
+	return 0;
+}
+
+/**
+ * vmw_legacy_srf_destroy - Destroy a device surface as part of a
+ *                          resource eviction process.
+ *
+ * @res:            Pointer to a struct vmw_res embedded in a struct
+ *                  vmw_surface.
+ */
+static int vmw_legacy_srf_destroy(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+	uint32_t submit_size;
+	uint8_t *cmd;
+
+	BUG_ON(res->id == -1);
+
+	/*
+	 * Encode the dma- and surface destroy commands.
+	 */
+
+	submit_size = vmw_surface_destroy_size();
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "eviction.\n");
+		return -ENOMEM;
+	}
+
+	vmw_surface_destroy_encode(res->id, cmd);
+	vmw_fifo_commit(dev_priv, submit_size);
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size -= res->backup_size;
+
+	/*
+	 * Release the surface ID.
+	 */
+
+	vmw_resource_release_id(res);
+
+	return 0;
+}
+
+
+/**
+ * vmw_surface_init - initialize a struct vmw_surface
+ *
+ * @dev_priv:       Pointer to a device private struct.
+ * @srf:            Pointer to the struct vmw_surface to initialize.
+ * @res_free:       Pointer to a resource destructor used to free
+ *                  the object.
+ */
+static int vmw_surface_init(struct vmw_private *dev_priv,
+			    struct vmw_surface *srf,
+			    void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+	struct vmw_resource *res = &srf->res;
+
+	BUG_ON(res_free == NULL);
+	(void) vmw_3d_resource_inc(dev_priv, false);
+	ret = vmw_resource_init(dev_priv, res, true, res_free,
+				&vmw_legacy_surface_func);
+
+	if (unlikely(ret != 0)) {
+		vmw_3d_resource_dec(dev_priv, false);
+		res_free(res);
+		return ret;
+	}
+
+	/*
+	 * The surface won't be visible to hardware until a
+	 * surface validate.
+	 */
+
+	vmw_resource_activate(res, vmw_hw_surface_destroy);
+	return ret;
+}
+
+/**
+ * vmw_user_surface_base_to_res - TTM base object to resource converter for
+ *                                user visible surfaces
+ *
+ * @base:           Pointer to a TTM base object
+ *
+ * Returns the struct vmw_resource embedded in a struct vmw_surface
+ * for the user-visible object identified by the TTM base object @base.
+ */
+static struct vmw_resource *
+vmw_user_surface_base_to_res(struct ttm_base_object *base)
+{
+	return &(container_of(base, struct vmw_user_surface, base)->srf.res);
+}
+
+/**
+ * vmw_user_surface_free - User visible surface resource destructor
+ *
+ * @res:            A struct vmw_resource embedded in a struct vmw_surface.
+ */
+static void vmw_user_surface_free(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = vmw_res_to_srf(res);
+	struct vmw_user_surface *user_srf =
+	    container_of(srf, struct vmw_user_surface, srf);
+	struct vmw_private *dev_priv = srf->res.dev_priv;
+	uint32_t size = user_srf->size;
+
+	kfree(srf->offsets);
+	kfree(srf->sizes);
+	kfree(srf->snooper.image);
+	ttm_base_object_kfree(user_srf, base);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+}
+
+/**
+ * vmw_user_surface_free - User visible surface TTM base object destructor
+ *
+ * @p_base:         Pointer to a pointer to a TTM base object
+ *                  embedded in a struct vmw_user_surface.
+ *
+ * Drops the base object's reference on its resource, and the
+ * pointer pointed to by *p_base is set to NULL.
+ */
+static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_surface *user_srf =
+	    container_of(base, struct vmw_user_surface, base);
+	struct vmw_resource *res = &user_srf->srf.res;
+
+	*p_base = NULL;
+	vmw_resource_unreference(&res);
+}
+
+/**
+ * vmw_user_surface_destroy_ioctl - Ioctl function implementing
+ *                                  the user surface destroy functionality.
+ *
+ * @dev:            Pointer to a struct drm_device.
+ * @data:           Pointer to data copied from / to user-space.
+ * @file_priv:      Pointer to a drm file private structure.
+ */
+int vmw_surface_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct drm_vmw_surface_arg *arg = (struct drm_vmw_surface_arg *)data;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+
+	return ttm_ref_object_base_unref(tfile, arg->sid, TTM_REF_USAGE);
+}
+
+/**
+ * vmw_user_surface_define_ioctl - Ioctl function implementing
+ *                                  the user surface define functionality.
+ *
+ * @dev:            Pointer to a struct drm_device.
+ * @data:           Pointer to data copied from / to user-space.
+ * @file_priv:      Pointer to a drm file private structure.
+ */
+int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_user_surface *user_srf;
+	struct vmw_surface *srf;
+	struct vmw_resource *res;
+	struct vmw_resource *tmp;
+	union drm_vmw_surface_create_arg *arg =
+	    (union drm_vmw_surface_create_arg *)data;
+	struct drm_vmw_surface_create_req *req = &arg->req;
+	struct drm_vmw_surface_arg *rep = &arg->rep;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct drm_vmw_size __user *user_sizes;
+	int ret;
+	int i, j;
+	uint32_t cur_bo_offset;
+	struct drm_vmw_size *cur_size;
+	struct vmw_surface_offset *cur_offset;
+	uint32_t num_sizes;
+	uint32_t size;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	const struct svga3d_surface_desc *desc;
+
+	if (unlikely(vmw_user_surface_size == 0))
+		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
+			128;
+
+	num_sizes = 0;
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		num_sizes += req->mip_levels[i];
+
+	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
+	    DRM_VMW_MAX_MIP_LEVELS)
+		return -EINVAL;
+
+	size = vmw_user_surface_size + 128 +
+		ttm_round_pot(num_sizes * sizeof(struct drm_vmw_size)) +
+		ttm_round_pot(num_sizes * sizeof(struct vmw_surface_offset));
+
+
+	desc = svga3dsurface_get_desc(req->format);
+	if (unlikely(desc->block_desc == SVGA3DBLOCKDESC_NONE)) {
+		DRM_ERROR("Invalid surface format for surface creation.\n");
+		return -EINVAL;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   size, false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for surface"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	user_srf = kzalloc(sizeof(*user_srf), GFP_KERNEL);
+	if (unlikely(user_srf == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_user_srf;
+	}
+
+	srf = &user_srf->srf;
+	res = &srf->res;
+
+	srf->flags = req->flags;
+	srf->format = req->format;
+	srf->scanout = req->scanout;
+
+	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
+	srf->num_sizes = num_sizes;
+	user_srf->size = size;
+
+	srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL);
+	if (unlikely(srf->sizes == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_sizes;
+	}
+	srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
+			       GFP_KERNEL);
+	if (unlikely(srf->sizes == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_offsets;
+	}
+
+	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
+	    req->size_addr;
+
+	ret = copy_from_user(srf->sizes, user_sizes,
+			     srf->num_sizes * sizeof(*srf->sizes));
+	if (unlikely(ret != 0)) {
+		ret = -EFAULT;
+		goto out_no_copy;
+	}
+
+	srf->base_size = *srf->sizes;
+	srf->autogen_filter = SVGA3D_TEX_FILTER_NONE;
+	srf->multisample_count = 1;
+
+	cur_bo_offset = 0;
+	cur_offset = srf->offsets;
+	cur_size = srf->sizes;
+
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		for (j = 0; j < srf->mip_levels[i]; ++j) {
+			uint32_t stride = svga3dsurface_calculate_pitch
+				(desc, cur_size);
+
+			cur_offset->face = i;
+			cur_offset->mip = j;
+			cur_offset->bo_offset = cur_bo_offset;
+			cur_bo_offset += svga3dsurface_get_image_buffer_size
+				(desc, cur_size, stride);
+			++cur_offset;
+			++cur_size;
+		}
+	}
+	res->backup_size = cur_bo_offset;
+	if (srf->scanout &&
+	    srf->num_sizes == 1 &&
+	    srf->sizes[0].width == 64 &&
+	    srf->sizes[0].height == 64 &&
+	    srf->format == SVGA3D_A8R8G8B8) {
+
+		srf->snooper.image = kmalloc(64 * 64 * 4, GFP_KERNEL);
+		/* clear the image */
+		if (srf->snooper.image) {
+			memset(srf->snooper.image, 0x00, 64 * 64 * 4);
+		} else {
+			DRM_ERROR("Failed to allocate cursor_image\n");
+			ret = -ENOMEM;
+			goto out_no_copy;
+		}
+	} else {
+		srf->snooper.image = NULL;
+	}
+	srf->snooper.crtc = NULL;
+
+	user_srf->base.shareable = false;
+	user_srf->base.tfile = NULL;
+
+	/**
+	 * From this point, the generic resource management functions
+	 * destroy the object on failure.
+	 */
+
+	ret = vmw_surface_init(dev_priv, srf, vmw_user_surface_free);
+	if (unlikely(ret != 0))
+		goto out_unlock;
+
+	tmp = vmw_resource_reference(&srf->res);
+	ret = ttm_base_object_init(tfile, &user_srf->base,
+				   req->shareable, VMW_RES_SURFACE,
+				   &vmw_user_surface_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		vmw_resource_unreference(&res);
+		goto out_unlock;
+	}
+
+	rep->sid = user_srf->base.hash.key;
+	vmw_resource_unreference(&res);
+
+	ttm_read_unlock(&vmaster->lock);
+	return 0;
+out_no_copy:
+	kfree(srf->offsets);
+out_no_offsets:
+	kfree(srf->sizes);
+out_no_sizes:
+	ttm_base_object_kfree(user_srf, base);
+out_no_user_srf:
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * vmw_user_surface_define_ioctl - Ioctl function implementing
+ *                                  the user surface reference functionality.
+ *
+ * @dev:            Pointer to a struct drm_device.
+ * @data:           Pointer to data copied from / to user-space.
+ * @file_priv:      Pointer to a drm file private structure.
+ */
+int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	union drm_vmw_surface_reference_arg *arg =
+	    (union drm_vmw_surface_reference_arg *)data;
+	struct drm_vmw_surface_arg *req = &arg->req;
+	struct drm_vmw_surface_create_req *rep = &arg->rep;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_surface *srf;
+	struct vmw_user_surface *user_srf;
+	struct drm_vmw_size __user *user_sizes;
+	struct ttm_base_object *base;
+	int ret = -EINVAL;
+
+	base = ttm_base_object_lookup(tfile, req->sid);
+	if (unlikely(base == NULL)) {
+		DRM_ERROR("Could not find surface to reference.\n");
+		return -EINVAL;
+	}
+
+	if (unlikely(base->object_type != VMW_RES_SURFACE))
+		goto out_bad_resource;
+
+	user_srf = container_of(base, struct vmw_user_surface, base);
+	srf = &user_srf->srf;
+
+	ret = ttm_ref_object_add(tfile, &user_srf->base, TTM_REF_USAGE, NULL);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could not add a reference to a surface.\n");
+		goto out_no_reference;
+	}
+
+	rep->flags = srf->flags;
+	rep->format = srf->format;
+	memcpy(rep->mip_levels, srf->mip_levels, sizeof(srf->mip_levels));
+	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
+	    rep->size_addr;
+
+	if (user_sizes)
+		ret = copy_to_user(user_sizes, srf->sizes,
+				   srf->num_sizes * sizeof(*srf->sizes));
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("copy_to_user failed %p %u\n",
+			  user_sizes, srf->num_sizes);
+		ret = -EFAULT;
+	}
+out_bad_resource:
+out_no_reference:
+	ttm_base_object_unref(&base);
+
+	return ret;
+}

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index e25cf31..fa60add 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c

@@ -18,7 +18,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/dmi.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/fs.h>
@@ -376,7 +375,6 @@
 			     size_t cnt, loff_t *ppos)
 {
 	char usercmd[64];
-	const char *pdev_name;
 	int ret;
 	bool delay = false, can_switch;
 	bool just_mux = false;
@@ -468,7 +466,6 @@
 		goto out;
 
 	if (can_switch) {
-		pdev_name = pci_name(client->pdev);
 		ret = vga_switchto_stage1(client);
 		if (ret)
 			printk(KERN_ERR "vga_switcheroo: switching failed stage 1 %d\n", ret);
@@ -540,7 +537,6 @@
 int vga_switcheroo_process_delayed_switch(void)
 {
 	struct vga_switcheroo_client *client;
-	const char *pdev_name;
 	int ret;
 	int err = -EINVAL;
 
@@ -555,7 +551,6 @@
 	if (!client || !check_can_switch())
 		goto err;
 
-	pdev_name = pci_name(client->pdev);
 	ret = vga_switchto_stage2(client);
 	if (ret)
 		printk(KERN_ERR "vga_switcheroo: delayed switching failed stage 2 %d\n", ret);
@@ -567,4 +562,3 @@
 	return err;
 }
 EXPORT_SYMBOL(vga_switcheroo_process_delayed_switch);
-

diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index 9f26400..89cfd64 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c

@@ -115,6 +115,12 @@
 		return (val < 32) ? 1550 - 25 * val
 			: 775 - (25 * (val - 31)) / 2;
 
+	case 26:		/* AMD family 10h to 15h, serial VID */
+		val &= 0x7f;
+		if (val >= 0x7c)
+			return 0;
+		return DIV_ROUND_CLOSEST(15500 - 125 * val, 10);
+
 	case 91:		/* VRM 9.1 */
 	case 90:		/* VRM 9.0 */
 		val &= 0x1f;
@@ -195,6 +201,10 @@
 	{X86_VENDOR_AMD, 0xF, 0x40, 0x7F, ANY, 24},	/* NPT family 0Fh */
 	{X86_VENDOR_AMD, 0xF, 0x80, ANY, ANY, 25},	/* future fam. 0Fh */
 	{X86_VENDOR_AMD, 0x10, 0x0, ANY, ANY, 25},	/* NPT family 10h */
+	{X86_VENDOR_AMD, 0x11, 0x0, ANY, ANY, 26},	/* family 11h */
+	{X86_VENDOR_AMD, 0x12, 0x0, ANY, ANY, 26},	/* family 12h */
+	{X86_VENDOR_AMD, 0x14, 0x0, ANY, ANY, 26},	/* family 14h */
+	{X86_VENDOR_AMD, 0x15, 0x0, ANY, ANY, 26},	/* family 15h */
 
 	{X86_VENDOR_INTEL, 0x6, 0x0, 0x6, ANY, 82},	/* Pentium Pro,
 							 * Pentium II, Xeon,

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index c3c471c..646314f 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c

@@ -84,19 +84,21 @@
 
 	/* Open access to 0x295-0x296 on MSI MS-7031 */
 	sb = pci_get_device(PCI_VENDOR_ID_ATI, 0x436c, NULL);
-	if (sb &&
-	    (sb->subsystem_vendor == 0x1462 &&	/* MSI */
-	     sb->subsystem_device == 0x0031)) {	/* MS-7031 */
+	if (sb) {
+		if (sb->subsystem_vendor == 0x1462 &&	/* MSI */
+		    sb->subsystem_device == 0x0031) {	/* MS-7031 */
+			pci_read_config_byte(sb, 0x48, &enable);
+			pci_read_config_word(sb, 0x64, &base);
 
-		pci_read_config_byte(sb, 0x48, &enable);
-		pci_read_config_word(sb, 0x64, &base);
-
-		if (base == 0 && !(enable & BIT(2))) {
-			dev_info(&sb->dev,
-				 "Opening wide generic port at 0x295\n");
-			pci_write_config_word(sb, 0x64, 0x295);
-			pci_write_config_byte(sb, 0x48, enable | BIT(2));
+			if (base == 0 && !(enable & BIT(2))) {
+				dev_info(&sb->dev,
+					 "Opening wide generic port at 0x295\n");
+				pci_write_config_word(sb, 0x64, 0x295);
+				pci_write_config_byte(sb, 0x48,
+						      enable | BIT(2));
+			}
 		}
+		pci_dev_put(sb);
 	}
 #endif
 }

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index d32aa35..117d66f 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c

@@ -203,6 +203,8 @@
 static const u8 IT87_REG_FAN_MIN[]	= { 0x10, 0x11, 0x12, 0x84, 0x86 };
 static const u8 IT87_REG_FANX[]		= { 0x18, 0x19, 0x1a, 0x81, 0x83 };
 static const u8 IT87_REG_FANX_MIN[]	= { 0x1b, 0x1c, 0x1d, 0x85, 0x87 };
+static const u8 IT87_REG_TEMP_OFFSET[]	= { 0x56, 0x57, 0x59 };
+
 #define IT87_REG_FAN_MAIN_CTRL 0x13
 #define IT87_REG_FAN_CTL       0x14
 #define IT87_REG_PWM(nr)       (0x15 + (nr))
@@ -226,6 +228,83 @@
 #define IT87_REG_AUTO_TEMP(nr, i) (0x60 + (nr) * 8 + (i))
 #define IT87_REG_AUTO_PWM(nr, i)  (0x65 + (nr) * 8 + (i))
 
+struct it87_devices {
+	const char *name;
+	u16 features;
+	u8 peci_mask;
+	u8 old_peci_mask;
+};
+
+#define FEAT_12MV_ADC		(1 << 0)
+#define FEAT_NEWER_AUTOPWM	(1 << 1)
+#define FEAT_OLD_AUTOPWM	(1 << 2)
+#define FEAT_16BIT_FANS		(1 << 3)
+#define FEAT_TEMP_OFFSET	(1 << 4)
+#define FEAT_TEMP_PECI		(1 << 5)
+#define FEAT_TEMP_OLD_PECI	(1 << 6)
+
+static const struct it87_devices it87_devices[] = {
+	[it87] = {
+		.name = "it87",
+		.features = FEAT_OLD_AUTOPWM,	/* may need to overwrite */
+	},
+	[it8712] = {
+		.name = "it8712",
+		.features = FEAT_OLD_AUTOPWM,	/* may need to overwrite */
+	},
+	[it8716] = {
+		.name = "it8716",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET,
+	},
+	[it8718] = {
+		.name = "it8718",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
+		  | FEAT_TEMP_OLD_PECI,
+		.old_peci_mask = 0x4,
+	},
+	[it8720] = {
+		.name = "it8720",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
+		  | FEAT_TEMP_OLD_PECI,
+		.old_peci_mask = 0x4,
+	},
+	[it8721] = {
+		.name = "it8721",
+		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI,
+		.peci_mask = 0x05,
+		.old_peci_mask = 0x02,	/* Actually reports PCH */
+	},
+	[it8728] = {
+		.name = "it8728",
+		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
+		.peci_mask = 0x07,
+	},
+	[it8782] = {
+		.name = "it8782",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
+		  | FEAT_TEMP_OLD_PECI,
+		.old_peci_mask = 0x4,
+	},
+	[it8783] = {
+		.name = "it8783",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
+		  | FEAT_TEMP_OLD_PECI,
+		.old_peci_mask = 0x4,
+	},
+};
+
+#define has_16bit_fans(data)	((data)->features & FEAT_16BIT_FANS)
+#define has_12mv_adc(data)	((data)->features & FEAT_12MV_ADC)
+#define has_newer_autopwm(data)	((data)->features & FEAT_NEWER_AUTOPWM)
+#define has_old_autopwm(data)	((data)->features & FEAT_OLD_AUTOPWM)
+#define has_temp_offset(data)	((data)->features & FEAT_TEMP_OFFSET)
+#define has_temp_peci(data, nr)	(((data)->features & FEAT_TEMP_PECI) && \
+				 ((data)->peci_mask & (1 << nr)))
+#define has_temp_old_peci(data, nr) \
+				(((data)->features & FEAT_TEMP_OLD_PECI) && \
+				 ((data)->old_peci_mask & (1 << nr)))
 
 struct it87_sio_data {
 	enum chips type;
@@ -249,7 +328,9 @@
 struct it87_data {
 	struct device *hwmon_dev;
 	enum chips type;
-	u8 revision;
+	u16 features;
+	u8 peci_mask;
+	u8 old_peci_mask;
 
 	unsigned short addr;
 	const char *name;
@@ -258,17 +339,13 @@
 	unsigned long last_updated;	/* In jiffies */
 
 	u16 in_scaled;		/* Internal voltage sensors are scaled */
-	u8 in[9];		/* Register value */
-	u8 in_max[8];		/* Register value */
-	u8 in_min[8];		/* Register value */
+	u8 in[9][3];		/* [nr][0]=in, [1]=min, [2]=max */
 	u8 has_fan;		/* Bitfield, fans enabled */
-	u16 fan[5];		/* Register values, possibly combined */
-	u16 fan_min[5];		/* Register values, possibly combined */
+	u16 fan[5][2];		/* Register values, [nr][0]=fan, [1]=min */
 	u8 has_temp;		/* Bitfield, temp sensors enabled */
-	s8 temp[3];		/* Register value */
-	s8 temp_high[3];	/* Register value */
-	s8 temp_low[3];		/* Register value */
-	u8 sensor;		/* Register value */
+	s8 temp[3][4];		/* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */
+	u8 sensor;		/* Register value (IT87_REG_TEMP_ENABLE) */
+	u8 extra;		/* Register value (IT87_REG_TEMP_EXTRA) */
 	u8 fan_div[3];		/* Register encoding, shifted right */
 	u8 vid;			/* Register encoding, combined */
 	u8 vrm;
@@ -296,26 +373,6 @@
 	s8 auto_temp[3][5];	/* [nr][0] is point1_temp_hyst */
 };
 
-static inline int has_12mv_adc(const struct it87_data *data)
-{
-	/*
-	 * IT8721F and later have a 12 mV ADC, also with internal scaling
-	 * on selected inputs.
-	 */
-	return data->type == it8721
-	    || data->type == it8728;
-}
-
-static inline int has_newer_autopwm(const struct it87_data *data)
-{
-	/*
-	 * IT8721F and later have separate registers for the temperature
-	 * mapping and the manual duty cycle.
-	 */
-	return data->type == it8721
-	    || data->type == it8728;
-}
-
 static int adc_lsb(const struct it87_data *data, int nr)
 {
 	int lsb = has_12mv_adc(data) ? 12 : 16;
@@ -398,35 +455,6 @@
 	750000 / 128,
 };
 
-static inline int has_16bit_fans(const struct it87_data *data)
-{
-	/*
-	 * IT8705F Datasheet 0.4.1, 3h == Version G.
-	 * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J.
-	 * These are the first revisions with 16-bit tachometer support.
-	 */
-	return (data->type == it87 && data->revision >= 0x03)
-	    || (data->type == it8712 && data->revision >= 0x08)
-	    || data->type == it8716
-	    || data->type == it8718
-	    || data->type == it8720
-	    || data->type == it8721
-	    || data->type == it8728
-	    || data->type == it8782
-	    || data->type == it8783;
-}
-
-static inline int has_old_autopwm(const struct it87_data *data)
-{
-	/*
-	 * The old automatic fan speed control interface is implemented
-	 * by IT8705F chips up to revision F and IT8712F chips up to
-	 * revision G.
-	 */
-	return (data->type == it87 && data->revision < 0x03)
-	    || (data->type == it8712 && data->revision < 0x08);
-}
-
 static int it87_probe(struct platform_device *pdev);
 static int it87_remove(struct platform_device *pdev);
 
@@ -447,40 +475,22 @@
 };
 
 static ssize_t show_in(struct device *dev, struct device_attribute *attr,
-		char *buf)
+		       char *buf)
 {
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	int nr = sattr->nr;
+	int index = sattr->index;
 
 	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr]));
+	return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr][index]));
 }
 
-static ssize_t show_in_min(struct device *dev, struct device_attribute *attr,
-		char *buf)
+static ssize_t set_in(struct device *dev, struct device_attribute *attr,
+		      const char *buf, size_t count)
 {
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in_min[nr]));
-}
-
-static ssize_t show_in_max(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in_max[nr]));
-}
-
-static ssize_t set_in_min(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	int nr = sattr->nr;
+	int index = sattr->index;
 
 	struct it87_data *data = dev_get_drvdata(dev);
 	unsigned long val;
@@ -489,159 +499,167 @@
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
-	data->in_min[nr] = in_to_reg(data, nr, val);
-	it87_write_value(data, IT87_REG_VIN_MIN(nr),
-			data->in_min[nr]);
-	mutex_unlock(&data->update_lock);
-	return count;
-}
-static ssize_t set_in_max(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
-	struct it87_data *data = dev_get_drvdata(dev);
-	unsigned long val;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-	data->in_max[nr] = in_to_reg(data, nr, val);
-	it87_write_value(data, IT87_REG_VIN_MAX(nr),
-			data->in_max[nr]);
+	data->in[nr][index] = in_to_reg(data, nr, val);
+	it87_write_value(data,
+			 index == 1 ? IT87_REG_VIN_MIN(nr)
+				    : IT87_REG_VIN_MAX(nr),
+			 data->in[nr][index]);
 	mutex_unlock(&data->update_lock);
 	return count;
 }
 
-#define show_in_offset(offset)					\
-static SENSOR_DEVICE_ATTR(in##offset##_input, S_IRUGO,		\
-		show_in, NULL, offset);
+static SENSOR_DEVICE_ATTR_2(in0_input, S_IRUGO, show_in, NULL, 0, 0);
+static SENSOR_DEVICE_ATTR_2(in0_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    0, 1);
+static SENSOR_DEVICE_ATTR_2(in0_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    0, 2);
 
-#define limit_in_offset(offset)					\
-static SENSOR_DEVICE_ATTR(in##offset##_min, S_IRUGO | S_IWUSR,	\
-		show_in_min, set_in_min, offset);		\
-static SENSOR_DEVICE_ATTR(in##offset##_max, S_IRUGO | S_IWUSR,	\
-		show_in_max, set_in_max, offset);
+static SENSOR_DEVICE_ATTR_2(in1_input, S_IRUGO, show_in, NULL, 1, 0);
+static SENSOR_DEVICE_ATTR_2(in1_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    1, 1);
+static SENSOR_DEVICE_ATTR_2(in1_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    1, 2);
 
-show_in_offset(0);
-limit_in_offset(0);
-show_in_offset(1);
-limit_in_offset(1);
-show_in_offset(2);
-limit_in_offset(2);
-show_in_offset(3);
-limit_in_offset(3);
-show_in_offset(4);
-limit_in_offset(4);
-show_in_offset(5);
-limit_in_offset(5);
-show_in_offset(6);
-limit_in_offset(6);
-show_in_offset(7);
-limit_in_offset(7);
-show_in_offset(8);
+static SENSOR_DEVICE_ATTR_2(in2_input, S_IRUGO, show_in, NULL, 2, 0);
+static SENSOR_DEVICE_ATTR_2(in2_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    2, 1);
+static SENSOR_DEVICE_ATTR_2(in2_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    2, 2);
+
+static SENSOR_DEVICE_ATTR_2(in3_input, S_IRUGO, show_in, NULL, 3, 0);
+static SENSOR_DEVICE_ATTR_2(in3_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    3, 1);
+static SENSOR_DEVICE_ATTR_2(in3_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    3, 2);
+
+static SENSOR_DEVICE_ATTR_2(in4_input, S_IRUGO, show_in, NULL, 4, 0);
+static SENSOR_DEVICE_ATTR_2(in4_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    4, 1);
+static SENSOR_DEVICE_ATTR_2(in4_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    4, 2);
+
+static SENSOR_DEVICE_ATTR_2(in5_input, S_IRUGO, show_in, NULL, 5, 0);
+static SENSOR_DEVICE_ATTR_2(in5_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    5, 1);
+static SENSOR_DEVICE_ATTR_2(in5_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    5, 2);
+
+static SENSOR_DEVICE_ATTR_2(in6_input, S_IRUGO, show_in, NULL, 6, 0);
+static SENSOR_DEVICE_ATTR_2(in6_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    6, 1);
+static SENSOR_DEVICE_ATTR_2(in6_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    6, 2);
+
+static SENSOR_DEVICE_ATTR_2(in7_input, S_IRUGO, show_in, NULL, 7, 0);
+static SENSOR_DEVICE_ATTR_2(in7_min, S_IRUGO | S_IWUSR, show_in, set_in,
+			    7, 1);
+static SENSOR_DEVICE_ATTR_2(in7_max, S_IRUGO | S_IWUSR, show_in, set_in,
+			    7, 2);
+
+static SENSOR_DEVICE_ATTR_2(in8_input, S_IRUGO, show_in, NULL, 8, 0);
 
 /* 3 temperatures */
 static ssize_t show_temp(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			 char *buf)
 {
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	int nr = sattr->nr;
+	int index = sattr->index;
 	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[nr]));
-}
-static ssize_t show_temp_max(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
 
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_high[nr]));
+	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[nr][index]));
 }
-static ssize_t show_temp_min(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
 
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_low[nr]));
-}
-static ssize_t set_temp_max(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
+static ssize_t set_temp(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t count)
 {
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	int nr = sattr->nr;
+	int index = sattr->index;
 	struct it87_data *data = dev_get_drvdata(dev);
 	long val;
+	u8 reg, regval;
 
 	if (kstrtol(buf, 10, &val) < 0)
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
-	data->temp_high[nr] = TEMP_TO_REG(val);
-	it87_write_value(data, IT87_REG_TEMP_HIGH(nr), data->temp_high[nr]);
+
+	switch (index) {
+	default:
+	case 1:
+		reg = IT87_REG_TEMP_LOW(nr);
+		break;
+	case 2:
+		reg = IT87_REG_TEMP_HIGH(nr);
+		break;
+	case 3:
+		regval = it87_read_value(data, IT87_REG_BEEP_ENABLE);
+		if (!(regval & 0x80)) {
+			regval |= 0x80;
+			it87_write_value(data, IT87_REG_BEEP_ENABLE, regval);
+		}
+		data->valid = 0;
+		reg = IT87_REG_TEMP_OFFSET[nr];
+		break;
+	}
+
+	data->temp[nr][index] = TEMP_TO_REG(val);
+	it87_write_value(data, reg, data->temp[nr][index]);
 	mutex_unlock(&data->update_lock);
 	return count;
 }
-static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
 
-	struct it87_data *data = dev_get_drvdata(dev);
-	long val;
+static SENSOR_DEVICE_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, 0, 0);
+static SENSOR_DEVICE_ATTR_2(temp1_min, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    0, 1);
+static SENSOR_DEVICE_ATTR_2(temp1_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    0, 2);
+static SENSOR_DEVICE_ATTR_2(temp1_offset, S_IRUGO | S_IWUSR, show_temp,
+			    set_temp, 0, 3);
+static SENSOR_DEVICE_ATTR_2(temp2_input, S_IRUGO, show_temp, NULL, 1, 0);
+static SENSOR_DEVICE_ATTR_2(temp2_min, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    1, 1);
+static SENSOR_DEVICE_ATTR_2(temp2_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    1, 2);
+static SENSOR_DEVICE_ATTR_2(temp2_offset, S_IRUGO | S_IWUSR, show_temp,
+			    set_temp, 1, 3);
+static SENSOR_DEVICE_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, 2, 0);
+static SENSOR_DEVICE_ATTR_2(temp3_min, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    2, 1);
+static SENSOR_DEVICE_ATTR_2(temp3_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    2, 2);
+static SENSOR_DEVICE_ATTR_2(temp3_offset, S_IRUGO | S_IWUSR, show_temp,
+			    set_temp, 2, 3);
 
-	if (kstrtol(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-	data->temp_low[nr] = TEMP_TO_REG(val);
-	it87_write_value(data, IT87_REG_TEMP_LOW(nr), data->temp_low[nr]);
-	mutex_unlock(&data->update_lock);
-	return count;
-}
-#define show_temp_offset(offset)					\
-static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO,		\
-		show_temp, NULL, offset - 1);				\
-static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR,	\
-		show_temp_max, set_temp_max, offset - 1);		\
-static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR,	\
-		show_temp_min, set_temp_min, offset - 1);
-
-show_temp_offset(1);
-show_temp_offset(2);
-show_temp_offset(3);
-
-static ssize_t show_sensor(struct device *dev, struct device_attribute *attr,
-		char *buf)
+static ssize_t show_temp_type(struct device *dev, struct device_attribute *attr,
+			      char *buf)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
 	int nr = sensor_attr->index;
 	struct it87_data *data = it87_update_device(dev);
 	u8 reg = data->sensor;	    /* In case value is updated while used */
+	u8 extra = data->extra;
 
+	if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1))
+	    || (has_temp_old_peci(data, nr) && (extra & 0x80)))
+		return sprintf(buf, "6\n");  /* Intel PECI */
 	if (reg & (1 << nr))
 		return sprintf(buf, "3\n");  /* thermal diode */
 	if (reg & (8 << nr))
 		return sprintf(buf, "4\n");  /* thermistor */
 	return sprintf(buf, "0\n");      /* disabled */
 }
-static ssize_t set_sensor(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
+
+static ssize_t set_temp_type(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
 	int nr = sensor_attr->index;
 
 	struct it87_data *data = dev_get_drvdata(dev);
 	long val;
-	u8 reg;
+	u8 reg, extra;
 
 	if (kstrtol(buf, 10, &val) < 0)
 		return -EINVAL;
@@ -649,33 +667,45 @@
 	reg = it87_read_value(data, IT87_REG_TEMP_ENABLE);
 	reg &= ~(1 << nr);
 	reg &= ~(8 << nr);
+	if (has_temp_peci(data, nr) && (reg >> 6 == nr + 1 || val == 6))
+		reg &= 0x3f;
+	extra = it87_read_value(data, IT87_REG_TEMP_EXTRA);
+	if (has_temp_old_peci(data, nr) && ((extra & 0x80) || val == 6))
+		extra &= 0x7f;
 	if (val == 2) {	/* backwards compatibility */
-		dev_warn(dev, "Sensor type 2 is deprecated, please use 4 "
-			 "instead\n");
+		dev_warn(dev,
+			 "Sensor type 2 is deprecated, please use 4 instead\n");
 		val = 4;
 	}
-	/* 3 = thermal diode; 4 = thermistor; 0 = disabled */
+	/* 3 = thermal diode; 4 = thermistor; 6 = Intel PECI; 0 = disabled */
 	if (val == 3)
 		reg |= 1 << nr;
 	else if (val == 4)
 		reg |= 8 << nr;
+	else if (has_temp_peci(data, nr) && val == 6)
+		reg |= (nr + 1) << 6;
+	else if (has_temp_old_peci(data, nr) && val == 6)
+		extra |= 0x80;
 	else if (val != 0)
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
 	data->sensor = reg;
+	data->extra = extra;
 	it87_write_value(data, IT87_REG_TEMP_ENABLE, data->sensor);
+	if (has_temp_old_peci(data, nr))
+		it87_write_value(data, IT87_REG_TEMP_EXTRA, data->extra);
 	data->valid = 0;	/* Force cache refresh */
 	mutex_unlock(&data->update_lock);
 	return count;
 }
-#define show_sensor_offset(offset)					\
-static SENSOR_DEVICE_ATTR(temp##offset##_type, S_IRUGO | S_IWUSR,	\
-		show_sensor, set_sensor, offset - 1);
 
-show_sensor_offset(1);
-show_sensor_offset(2);
-show_sensor_offset(3);
+static SENSOR_DEVICE_ATTR(temp1_type, S_IRUGO | S_IWUSR, show_temp_type,
+			  set_temp_type, 0);
+static SENSOR_DEVICE_ATTR(temp2_type, S_IRUGO | S_IWUSR, show_temp_type,
+			  set_temp_type, 1);
+static SENSOR_DEVICE_ATTR(temp3_type, S_IRUGO | S_IWUSR, show_temp_type,
+			  set_temp_type, 2);
 
 /* 3 Fans */
 
@@ -692,25 +722,21 @@
 }
 
 static ssize_t show_fan(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			char *buf)
 {
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	int nr = sattr->nr;
+	int index = sattr->index;
+	int speed;
 	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan[nr],
-				DIV_FROM_REG(data->fan_div[nr])));
-}
-static ssize_t show_fan_min(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
 
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan_min[nr],
-				DIV_FROM_REG(data->fan_div[nr])));
+	speed = has_16bit_fans(data) ?
+		FAN16_FROM_REG(data->fan[nr][index]) :
+		FAN_FROM_REG(data->fan[nr][index],
+			     DIV_FROM_REG(data->fan_div[nr]));
+	return sprintf(buf, "%d\n", speed);
 }
+
 static ssize_t show_fan_div(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
@@ -747,11 +773,13 @@
 
 	return sprintf(buf, "%u\n", pwm_freq[index]);
 }
-static ssize_t set_fan_min(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
+
+static ssize_t set_fan(struct device *dev, struct device_attribute *attr,
+		       const char *buf, size_t count)
 {
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	int nr = sattr->nr;
+	int index = sattr->index;
 
 	struct it87_data *data = dev_get_drvdata(dev);
 	long val;
@@ -761,24 +789,36 @@
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
-	reg = it87_read_value(data, IT87_REG_FAN_DIV);
-	switch (nr) {
-	case 0:
-		data->fan_div[nr] = reg & 0x07;
-		break;
-	case 1:
-		data->fan_div[nr] = (reg >> 3) & 0x07;
-		break;
-	case 2:
-		data->fan_div[nr] = (reg & 0x40) ? 3 : 1;
-		break;
+
+	if (has_16bit_fans(data)) {
+		data->fan[nr][index] = FAN16_TO_REG(val);
+		it87_write_value(data, IT87_REG_FAN_MIN[nr],
+				 data->fan[nr][index] & 0xff);
+		it87_write_value(data, IT87_REG_FANX_MIN[nr],
+				 data->fan[nr][index] >> 8);
+	} else {
+		reg = it87_read_value(data, IT87_REG_FAN_DIV);
+		switch (nr) {
+		case 0:
+			data->fan_div[nr] = reg & 0x07;
+			break;
+		case 1:
+			data->fan_div[nr] = (reg >> 3) & 0x07;
+			break;
+		case 2:
+			data->fan_div[nr] = (reg & 0x40) ? 3 : 1;
+			break;
+		}
+		data->fan[nr][index] =
+		  FAN_TO_REG(val, DIV_FROM_REG(data->fan_div[nr]));
+		it87_write_value(data, IT87_REG_FAN_MIN[nr],
+				 data->fan[nr][index]);
 	}
 
-	data->fan_min[nr] = FAN_TO_REG(val, DIV_FROM_REG(data->fan_div[nr]));
-	it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan_min[nr]);
 	mutex_unlock(&data->update_lock);
 	return count;
 }
+
 static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t count)
 {
@@ -797,7 +837,7 @@
 	old = it87_read_value(data, IT87_REG_FAN_DIV);
 
 	/* Save fan min limit */
-	min = FAN_FROM_REG(data->fan_min[nr], DIV_FROM_REG(data->fan_div[nr]));
+	min = FAN_FROM_REG(data->fan[nr][1], DIV_FROM_REG(data->fan_div[nr]));
 
 	switch (nr) {
 	case 0:
@@ -818,8 +858,8 @@
 	it87_write_value(data, IT87_REG_FAN_DIV, val);
 
 	/* Restore fan min limit */
-	data->fan_min[nr] = FAN_TO_REG(min, DIV_FROM_REG(data->fan_div[nr]));
-	it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan_min[nr]);
+	data->fan[nr][1] = FAN_TO_REG(min, DIV_FROM_REG(data->fan_div[nr]));
+	it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan[nr][1]);
 
 	mutex_unlock(&data->update_lock);
 	return count;
@@ -843,8 +883,8 @@
 	}
 
 	if (err) {
-		dev_err(dev, "Inconsistent trip points, not switching to "
-			"automatic mode\n");
+		dev_err(dev,
+			"Inconsistent trip points, not switching to automatic mode\n");
 		dev_err(dev, "Adjust the trip points and try again\n");
 	}
 	return err;
@@ -1092,118 +1132,106 @@
 	return count;
 }
 
-#define show_fan_offset(offset)					\
-static SENSOR_DEVICE_ATTR(fan##offset##_input, S_IRUGO,		\
-		show_fan, NULL, offset - 1);			\
-static SENSOR_DEVICE_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR, \
-		show_fan_min, set_fan_min, offset - 1);		\
-static SENSOR_DEVICE_ATTR(fan##offset##_div, S_IRUGO | S_IWUSR, \
-		show_fan_div, set_fan_div, offset - 1);
+static SENSOR_DEVICE_ATTR_2(fan1_input, S_IRUGO, show_fan, NULL, 0, 0);
+static SENSOR_DEVICE_ATTR_2(fan1_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
+			    0, 1);
+static SENSOR_DEVICE_ATTR(fan1_div, S_IRUGO | S_IWUSR, show_fan_div,
+			  set_fan_div, 0);
 
-show_fan_offset(1);
-show_fan_offset(2);
-show_fan_offset(3);
+static SENSOR_DEVICE_ATTR_2(fan2_input, S_IRUGO, show_fan, NULL, 1, 0);
+static SENSOR_DEVICE_ATTR_2(fan2_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
+			    1, 1);
+static SENSOR_DEVICE_ATTR(fan2_div, S_IRUGO | S_IWUSR, show_fan_div,
+			  set_fan_div, 1);
 
-#define show_pwm_offset(offset)						\
-static SENSOR_DEVICE_ATTR(pwm##offset##_enable, S_IRUGO | S_IWUSR,	\
-		show_pwm_enable, set_pwm_enable, offset - 1);		\
-static SENSOR_DEVICE_ATTR(pwm##offset, S_IRUGO | S_IWUSR,		\
-		show_pwm, set_pwm, offset - 1);				\
-static DEVICE_ATTR(pwm##offset##_freq,					\
-		(offset == 1 ? S_IRUGO | S_IWUSR : S_IRUGO),		\
-		show_pwm_freq, (offset == 1 ? set_pwm_freq : NULL));	\
-static SENSOR_DEVICE_ATTR(pwm##offset##_auto_channels_temp,		\
-		S_IRUGO | S_IWUSR, show_pwm_temp_map, set_pwm_temp_map,	\
-		offset - 1);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_pwm,		\
-		S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm,		\
-		offset - 1, 0);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point2_pwm,		\
-		S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm,		\
-		offset - 1, 1);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point3_pwm,		\
-		S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm,		\
-		offset - 1, 2);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point4_pwm,		\
-		S_IRUGO, show_auto_pwm, NULL, offset - 1, 3);		\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_temp,		\
-		S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp,	\
-		offset - 1, 1);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_temp_hyst,	\
-		S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp,	\
-		offset - 1, 0);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point2_temp,		\
-		S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp,	\
-		offset - 1, 2);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point3_temp,		\
-		S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp,	\
-		offset - 1, 3);						\
-static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point4_temp,		\
-		S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp,	\
-		offset - 1, 4);
+static SENSOR_DEVICE_ATTR_2(fan3_input, S_IRUGO, show_fan, NULL, 2, 0);
+static SENSOR_DEVICE_ATTR_2(fan3_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
+			    2, 1);
+static SENSOR_DEVICE_ATTR(fan3_div, S_IRUGO | S_IWUSR, show_fan_div,
+			  set_fan_div, 2);
 
-show_pwm_offset(1);
-show_pwm_offset(2);
-show_pwm_offset(3);
+static SENSOR_DEVICE_ATTR_2(fan4_input, S_IRUGO, show_fan, NULL, 3, 0);
+static SENSOR_DEVICE_ATTR_2(fan4_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
+			    3, 1);
 
-/* A different set of callbacks for 16-bit fans */
-static ssize_t show_fan16(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", FAN16_FROM_REG(data->fan[nr]));
-}
+static SENSOR_DEVICE_ATTR_2(fan5_input, S_IRUGO, show_fan, NULL, 4, 0);
+static SENSOR_DEVICE_ATTR_2(fan5_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
+			    4, 1);
 
-static ssize_t show_fan16_min(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", FAN16_FROM_REG(data->fan_min[nr]));
-}
+static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
+			  show_pwm_enable, set_pwm_enable, 0);
+static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 0);
+static DEVICE_ATTR(pwm1_freq, S_IRUGO | S_IWUSR, show_pwm_freq, set_pwm_freq);
+static SENSOR_DEVICE_ATTR(pwm1_auto_channels_temp, S_IRUGO | S_IWUSR,
+			  show_pwm_temp_map, set_pwm_temp_map, 0);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 0, 0);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 0, 1);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 0, 2);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_pwm, S_IRUGO,
+			    show_auto_pwm, NULL, 0, 3);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 0, 1);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 0, 0);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 0, 2);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 0, 3);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 0, 4);
 
-static ssize_t set_fan16_min(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-	struct it87_data *data = dev_get_drvdata(dev);
-	long val;
+static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR,
+			  show_pwm_enable, set_pwm_enable, 1);
+static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 1);
+static DEVICE_ATTR(pwm2_freq, S_IRUGO, show_pwm_freq, NULL);
+static SENSOR_DEVICE_ATTR(pwm2_auto_channels_temp, S_IRUGO | S_IWUSR,
+			  show_pwm_temp_map, set_pwm_temp_map, 1);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 1, 0);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 1, 1);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 1, 2);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_pwm, S_IRUGO,
+			    show_auto_pwm, NULL, 1, 3);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 1, 1);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 1, 0);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 1, 2);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 1, 3);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 1, 4);
 
-	if (kstrtol(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-	data->fan_min[nr] = FAN16_TO_REG(val);
-	it87_write_value(data, IT87_REG_FAN_MIN[nr],
-			 data->fan_min[nr] & 0xff);
-	it87_write_value(data, IT87_REG_FANX_MIN[nr],
-			 data->fan_min[nr] >> 8);
-	mutex_unlock(&data->update_lock);
-	return count;
-}
-
-/*
- * We want to use the same sysfs file names as 8-bit fans, but we need
- * different variable names, so we have to use SENSOR_ATTR instead of
- * SENSOR_DEVICE_ATTR.
- */
-#define show_fan16_offset(offset) \
-static struct sensor_device_attribute sensor_dev_attr_fan##offset##_input16 \
-	= SENSOR_ATTR(fan##offset##_input, S_IRUGO,		\
-		show_fan16, NULL, offset - 1);			\
-static struct sensor_device_attribute sensor_dev_attr_fan##offset##_min16 \
-	= SENSOR_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR,	\
-		show_fan16_min, set_fan16_min, offset - 1)
-
-show_fan16_offset(1);
-show_fan16_offset(2);
-show_fan16_offset(3);
-show_fan16_offset(4);
-show_fan16_offset(5);
+static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR,
+			  show_pwm_enable, set_pwm_enable, 2);
+static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 2);
+static DEVICE_ATTR(pwm3_freq, S_IRUGO, show_pwm_freq, NULL);
+static SENSOR_DEVICE_ATTR(pwm3_auto_channels_temp, S_IRUGO | S_IWUSR,
+			  show_pwm_temp_map, set_pwm_temp_map, 2);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point2_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point3_pwm, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_pwm, S_IRUGO,
+			    show_auto_pwm, NULL, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point2_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point3_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 4);
 
 /* Alarms */
 static ssize_t show_alarms(struct device *dev, struct device_attribute *attr,
@@ -1471,6 +1499,12 @@
 	{ .attrs = it87_attributes_temp[2] },
 };
 
+static struct attribute *it87_attributes_temp_offset[] = {
+	&sensor_dev_attr_temp1_offset.dev_attr.attr,
+	&sensor_dev_attr_temp2_offset.dev_attr.attr,
+	&sensor_dev_attr_temp3_offset.dev_attr.attr,
+};
+
 static struct attribute *it87_attributes[] = {
 	&dev_attr_alarms.attr,
 	&sensor_dev_attr_intrusion0_alarm.dev_attr.attr,
@@ -1500,72 +1534,46 @@
 	&sensor_dev_attr_temp3_beep.dev_attr.attr,
 };
 
-static struct attribute *it87_attributes_fan16[5][3+1] = { {
-	&sensor_dev_attr_fan1_input16.dev_attr.attr,
-	&sensor_dev_attr_fan1_min16.dev_attr.attr,
-	&sensor_dev_attr_fan1_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_fan2_input16.dev_attr.attr,
-	&sensor_dev_attr_fan2_min16.dev_attr.attr,
-	&sensor_dev_attr_fan2_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_fan3_input16.dev_attr.attr,
-	&sensor_dev_attr_fan3_min16.dev_attr.attr,
-	&sensor_dev_attr_fan3_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_fan4_input16.dev_attr.attr,
-	&sensor_dev_attr_fan4_min16.dev_attr.attr,
-	&sensor_dev_attr_fan4_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_fan5_input16.dev_attr.attr,
-	&sensor_dev_attr_fan5_min16.dev_attr.attr,
-	&sensor_dev_attr_fan5_alarm.dev_attr.attr,
-	NULL
-} };
-
-static const struct attribute_group it87_group_fan16[5] = {
-	{ .attrs = it87_attributes_fan16[0] },
-	{ .attrs = it87_attributes_fan16[1] },
-	{ .attrs = it87_attributes_fan16[2] },
-	{ .attrs = it87_attributes_fan16[3] },
-	{ .attrs = it87_attributes_fan16[4] },
-};
-
-static struct attribute *it87_attributes_fan[3][4+1] = { {
+static struct attribute *it87_attributes_fan[5][3+1] = { {
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_fan1_min.dev_attr.attr,
-	&sensor_dev_attr_fan1_div.dev_attr.attr,
 	&sensor_dev_attr_fan1_alarm.dev_attr.attr,
 	NULL
 }, {
 	&sensor_dev_attr_fan2_input.dev_attr.attr,
 	&sensor_dev_attr_fan2_min.dev_attr.attr,
-	&sensor_dev_attr_fan2_div.dev_attr.attr,
 	&sensor_dev_attr_fan2_alarm.dev_attr.attr,
 	NULL
 }, {
 	&sensor_dev_attr_fan3_input.dev_attr.attr,
 	&sensor_dev_attr_fan3_min.dev_attr.attr,
-	&sensor_dev_attr_fan3_div.dev_attr.attr,
 	&sensor_dev_attr_fan3_alarm.dev_attr.attr,
 	NULL
+}, {
+	&sensor_dev_attr_fan4_input.dev_attr.attr,
+	&sensor_dev_attr_fan4_min.dev_attr.attr,
+	&sensor_dev_attr_fan4_alarm.dev_attr.attr,
+	NULL
+}, {
+	&sensor_dev_attr_fan5_input.dev_attr.attr,
+	&sensor_dev_attr_fan5_min.dev_attr.attr,
+	&sensor_dev_attr_fan5_alarm.dev_attr.attr,
+	NULL
 } };
 
-static const struct attribute_group it87_group_fan[3] = {
+static const struct attribute_group it87_group_fan[5] = {
 	{ .attrs = it87_attributes_fan[0] },
 	{ .attrs = it87_attributes_fan[1] },
 	{ .attrs = it87_attributes_fan[2] },
+	{ .attrs = it87_attributes_fan[3] },
+	{ .attrs = it87_attributes_fan[4] },
 };
 
-static const struct attribute_group *
-it87_get_fan_group(const struct it87_data *data)
-{
-	return has_16bit_fans(data) ? it87_group_fan16 : it87_group_fan;
-}
+static const struct attribute *it87_attributes_fan_div[] = {
+	&sensor_dev_attr_fan1_div.dev_attr.attr,
+	&sensor_dev_attr_fan2_div.dev_attr.attr,
+	&sensor_dev_attr_fan3_div.dev_attr.attr,
+};
 
 static struct attribute *it87_attributes_pwm[3][4+1] = { {
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
@@ -1925,7 +1933,6 @@
 {
 	struct it87_data *data = platform_get_drvdata(pdev);
 	struct it87_sio_data *sio_data = dev->platform_data;
-	const struct attribute_group *fan_group = it87_get_fan_group(data);
 	int i;
 
 	sysfs_remove_group(&dev->kobj, &it87_group);
@@ -1941,6 +1948,9 @@
 		if (!(data->has_temp & (1 << i)))
 			continue;
 		sysfs_remove_group(&dev->kobj, &it87_group_temp[i]);
+		if (has_temp_offset(data))
+			sysfs_remove_file(&dev->kobj,
+					  it87_attributes_temp_offset[i]);
 		if (sio_data->beep_pin)
 			sysfs_remove_file(&dev->kobj,
 					  it87_attributes_temp_beep[i]);
@@ -1948,10 +1958,13 @@
 	for (i = 0; i < 5; i++) {
 		if (!(data->has_fan & (1 << i)))
 			continue;
-		sysfs_remove_group(&dev->kobj, &fan_group[i]);
+		sysfs_remove_group(&dev->kobj, &it87_group_fan[i]);
 		if (sio_data->beep_pin)
 			sysfs_remove_file(&dev->kobj,
 					  it87_attributes_fan_beep[i]);
+		if (i < 3 && !has_16bit_fans(data))
+			sysfs_remove_file(&dev->kobj,
+					  it87_attributes_fan_div[i]);
 	}
 	for (i = 0; i < 3; i++) {
 		if (sio_data->skip_pwm & (1 << 0))
@@ -1972,21 +1985,9 @@
 	struct resource *res;
 	struct device *dev = &pdev->dev;
 	struct it87_sio_data *sio_data = dev->platform_data;
-	const struct attribute_group *fan_group;
 	int err = 0, i;
 	int enable_pwm_interface;
 	int fan_beep_need_rw;
-	static const char * const names[] = {
-		"it87",
-		"it8712",
-		"it8716",
-		"it8718",
-		"it8720",
-		"it8721",
-		"it8728",
-		"it8782",
-		"it8783",
-	};
 
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
 	if (!devm_request_region(&pdev->dev, res->start, IT87_EC_EXTENT,
@@ -2003,8 +2004,31 @@
 
 	data->addr = res->start;
 	data->type = sio_data->type;
-	data->revision = sio_data->revision;
-	data->name = names[sio_data->type];
+	data->features = it87_devices[sio_data->type].features;
+	data->peci_mask = it87_devices[sio_data->type].peci_mask;
+	data->old_peci_mask = it87_devices[sio_data->type].old_peci_mask;
+	data->name = it87_devices[sio_data->type].name;
+	/*
+	 * IT8705F Datasheet 0.4.1, 3h == Version G.
+	 * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J.
+	 * These are the first revisions with 16-bit tachometer support.
+	 */
+	switch (data->type) {
+	case it87:
+		if (sio_data->revision >= 0x03) {
+			data->features &= ~FEAT_OLD_AUTOPWM;
+			data->features |= FEAT_16BIT_FANS;
+		}
+		break;
+	case it8712:
+		if (sio_data->revision >= 0x08) {
+			data->features &= ~FEAT_OLD_AUTOPWM;
+			data->features |= FEAT_16BIT_FANS;
+		}
+		break;
+	default:
+		break;
+	}
 
 	/* Now, we do the remaining detection. */
 	if ((it87_read_value(data, IT87_REG_CONFIG) & 0x80)
@@ -2068,6 +2092,12 @@
 		err = sysfs_create_group(&dev->kobj, &it87_group_temp[i]);
 		if (err)
 			goto error;
+		if (has_temp_offset(data)) {
+			err = sysfs_create_file(&dev->kobj,
+						it87_attributes_temp_offset[i]);
+			if (err)
+				goto error;
+		}
 		if (sio_data->beep_pin) {
 			err = sysfs_create_file(&dev->kobj,
 						it87_attributes_temp_beep[i]);
@@ -2077,15 +2107,21 @@
 	}
 
 	/* Do not create fan files for disabled fans */
-	fan_group = it87_get_fan_group(data);
 	fan_beep_need_rw = 1;
 	for (i = 0; i < 5; i++) {
 		if (!(data->has_fan & (1 << i)))
 			continue;
-		err = sysfs_create_group(&dev->kobj, &fan_group[i]);
+		err = sysfs_create_group(&dev->kobj, &it87_group_fan[i]);
 		if (err)
 			goto error;
 
+		if (i < 3 && !has_16bit_fans(data)) {
+			err = sysfs_create_file(&dev->kobj,
+						it87_attributes_fan_div[i]);
+			if (err)
+				goto error;
+		}
+
 		if (sio_data->beep_pin) {
 			err = sysfs_create_file(&dev->kobj,
 						it87_attributes_fan_beep[i]);
@@ -2221,8 +2257,8 @@
 			 * PWM interface).
 			 */
 			if (!((pwm[0] | pwm[1] | pwm[2]) & 0x80)) {
-				dev_info(dev, "Reconfiguring PWM to "
-					 "active high polarity\n");
+				dev_info(dev,
+					 "Reconfiguring PWM to active high polarity\n");
 				it87_write_value(data, IT87_REG_FAN_CTL,
 						 tmp | 0x87);
 				for (i = 0; i < 3; i++)
@@ -2232,16 +2268,16 @@
 				return 1;
 			}
 
-			dev_info(dev, "PWM configuration is "
-				 "too broken to be fixed\n");
+			dev_info(dev,
+				 "PWM configuration is too broken to be fixed\n");
 		}
 
-		dev_info(dev, "Detected broken BIOS "
-			 "defaults, disabling PWM interface\n");
+		dev_info(dev,
+			 "Detected broken BIOS defaults, disabling PWM interface\n");
 		return 0;
 	} else if (fix_pwm_polarity) {
-		dev_info(dev, "PWM configuration looks "
-			 "sane, won't touch\n");
+		dev_info(dev,
+			 "PWM configuration looks sane, won't touch\n");
 	}
 
 	return 1;
@@ -2389,42 +2425,46 @@
 				it87_read_value(data, IT87_REG_CONFIG) | 0x40);
 		}
 		for (i = 0; i <= 7; i++) {
-			data->in[i] =
+			data->in[i][0] =
 				it87_read_value(data, IT87_REG_VIN(i));
-			data->in_min[i] =
+			data->in[i][1] =
 				it87_read_value(data, IT87_REG_VIN_MIN(i));
-			data->in_max[i] =
+			data->in[i][2] =
 				it87_read_value(data, IT87_REG_VIN_MAX(i));
 		}
 		/* in8 (battery) has no limit registers */
-		data->in[8] = it87_read_value(data, IT87_REG_VIN(8));
+		data->in[8][0] = it87_read_value(data, IT87_REG_VIN(8));
 
 		for (i = 0; i < 5; i++) {
 			/* Skip disabled fans */
 			if (!(data->has_fan & (1 << i)))
 				continue;
 
-			data->fan_min[i] =
+			data->fan[i][1] =
 				it87_read_value(data, IT87_REG_FAN_MIN[i]);
-			data->fan[i] = it87_read_value(data,
+			data->fan[i][0] = it87_read_value(data,
 				       IT87_REG_FAN[i]);
 			/* Add high byte if in 16-bit mode */
 			if (has_16bit_fans(data)) {
-				data->fan[i] |= it87_read_value(data,
+				data->fan[i][0] |= it87_read_value(data,
 						IT87_REG_FANX[i]) << 8;
-				data->fan_min[i] |= it87_read_value(data,
+				data->fan[i][1] |= it87_read_value(data,
 						IT87_REG_FANX_MIN[i]) << 8;
 			}
 		}
 		for (i = 0; i < 3; i++) {
 			if (!(data->has_temp & (1 << i)))
 				continue;
-			data->temp[i] =
+			data->temp[i][0] =
 				it87_read_value(data, IT87_REG_TEMP(i));
-			data->temp_high[i] =
-				it87_read_value(data, IT87_REG_TEMP_HIGH(i));
-			data->temp_low[i] =
+			data->temp[i][1] =
 				it87_read_value(data, IT87_REG_TEMP_LOW(i));
+			data->temp[i][2] =
+				it87_read_value(data, IT87_REG_TEMP_HIGH(i));
+			if (has_temp_offset(data))
+				data->temp[i][3] =
+				  it87_read_value(data,
+						  IT87_REG_TEMP_OFFSET[i]);
 		}
 
 		/* Newer chips don't have clock dividers */
@@ -2448,6 +2488,7 @@
 			it87_update_pwm_ctrl(data, i);
 
 		data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE);
+		data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA);
 		/*
 		 * The IT8705F does not have VID capability.
 		 * The IT8718F and later don't use IT87_REG_VID for the
@@ -2549,8 +2590,7 @@
 }
 
 
-MODULE_AUTHOR("Chris Gauthron, "
-	      "Jean Delvare <khali@linux-fr.org>");
+MODULE_AUTHOR("Chris Gauthron, Jean Delvare <khali@linux-fr.org>");
 MODULE_DESCRIPTION("IT8705F/IT871xF/IT872xF hardware monitoring driver");
 module_param(update_vbat, bool, 0);
 MODULE_PARM_DESC(update_vbat, "Update vbat if set else return powerup value");

diff --git a/drivers/hwmon/twl4030-madc-hwmon.c b/drivers/hwmon/twl4030-madc-hwmon.c
index 149d44a..6c6d440 100644
--- a/drivers/hwmon/twl4030-madc-hwmon.c
+++ b/drivers/hwmon/twl4030-madc-hwmon.c

@@ -130,7 +130,7 @@
 
 static struct platform_driver twl4030_madc_hwmon_driver = {
 	.probe = twl4030_madc_hwmon_probe,
-	.remove = __exit_p(twl4030_madc_hwmon_remove),
+	.remove = twl4030_madc_hwmon_remove,
 	.driver = {
 		   .name = "twl4030_madc_hwmon",
 		   .owner = THIS_MODULE,

diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c
index 55ac41c..0e8ffd6 100644
--- a/drivers/hwmon/w83627ehf.c
+++ b/drivers/hwmon/w83627ehf.c

@@ -1,7 +1,7 @@
 /*
  *  w83627ehf - Driver for the hardware monitoring functionality of
  *		the Winbond W83627EHF Super-I/O chip
- *  Copyright (C) 2005-2011  Jean Delvare <khali@linux-fr.org>
+ *  Copyright (C) 2005-2012  Jean Delvare <khali@linux-fr.org>
  *  Copyright (C) 2006  Yuan Mu (Winbond),
  *			Rudolf Marek <r.marek@assembler.cz>
  *			David Hubbard <david.c.hubbard@gmail.com>
@@ -502,6 +502,13 @@
 	u16 have_temp_offset;
 	u8 in6_skip:1;
 	u8 temp3_val_only:1;
+
+#ifdef CONFIG_PM
+	/* Remember extra register values over suspend/resume */
+	u8 vbat;
+	u8 fandiv1;
+	u8 fandiv2;
+#endif
 };
 
 struct w83627ehf_sio_data {
@@ -898,6 +905,8 @@
 				data->temp_max_hyst[i]
 				  = w83627ehf_read_temp(data,
 						data->reg_temp_hyst[i]);
+			if (i > 2)
+				continue;
 			if (data->have_temp_offset & (1 << i))
 				data->temp_offset[i]
 				  = w83627ehf_read_value(data,
@@ -2608,10 +2617,98 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int w83627ehf_suspend(struct device *dev)
+{
+	struct w83627ehf_data *data = w83627ehf_update_device(dev);
+	struct w83627ehf_sio_data *sio_data = dev->platform_data;
+
+	mutex_lock(&data->update_lock);
+	data->vbat = w83627ehf_read_value(data, W83627EHF_REG_VBAT);
+	if (sio_data->kind == nct6775) {
+		data->fandiv1 = w83627ehf_read_value(data, NCT6775_REG_FANDIV1);
+		data->fandiv2 = w83627ehf_read_value(data, NCT6775_REG_FANDIV2);
+	}
+	mutex_unlock(&data->update_lock);
+
+	return 0;
+}
+
+static int w83627ehf_resume(struct device *dev)
+{
+	struct w83627ehf_data *data = dev_get_drvdata(dev);
+	struct w83627ehf_sio_data *sio_data = dev->platform_data;
+	int i;
+
+	mutex_lock(&data->update_lock);
+	data->bank = 0xff;		/* Force initial bank selection */
+
+	/* Restore limits */
+	for (i = 0; i < data->in_num; i++) {
+		if ((i == 6) && data->in6_skip)
+			continue;
+
+		w83627ehf_write_value(data, W83627EHF_REG_IN_MIN(i),
+				      data->in_min[i]);
+		w83627ehf_write_value(data, W83627EHF_REG_IN_MAX(i),
+				      data->in_max[i]);
+	}
+
+	for (i = 0; i < 5; i++) {
+		if (!(data->has_fan_min & (1 << i)))
+			continue;
+
+		w83627ehf_write_value(data, data->REG_FAN_MIN[i],
+				      data->fan_min[i]);
+	}
+
+	for (i = 0; i < NUM_REG_TEMP; i++) {
+		if (!(data->have_temp & (1 << i)))
+			continue;
+
+		if (data->reg_temp_over[i])
+			w83627ehf_write_temp(data, data->reg_temp_over[i],
+					     data->temp_max[i]);
+		if (data->reg_temp_hyst[i])
+			w83627ehf_write_temp(data, data->reg_temp_hyst[i],
+					     data->temp_max_hyst[i]);
+		if (i > 2)
+			continue;
+		if (data->have_temp_offset & (1 << i))
+			w83627ehf_write_value(data,
+					      W83627EHF_REG_TEMP_OFFSET[i],
+					      data->temp_offset[i]);
+	}
+
+	/* Restore other settings */
+	w83627ehf_write_value(data, W83627EHF_REG_VBAT, data->vbat);
+	if (sio_data->kind == nct6775) {
+		w83627ehf_write_value(data, NCT6775_REG_FANDIV1, data->fandiv1);
+		w83627ehf_write_value(data, NCT6775_REG_FANDIV2, data->fandiv2);
+	}
+
+	/* Force re-reading all values */
+	data->valid = 0;
+	mutex_unlock(&data->update_lock);
+
+	return 0;
+}
+
+static const struct dev_pm_ops w83627ehf_dev_pm_ops = {
+	.suspend = w83627ehf_suspend,
+	.resume = w83627ehf_resume,
+};
+
+#define W83627EHF_DEV_PM_OPS	(&w83627ehf_dev_pm_ops)
+#else
+#define W83627EHF_DEV_PM_OPS	NULL
+#endif /* CONFIG_PM */
+
 static struct platform_driver w83627ehf_driver = {
 	.driver = {
 		.owner	= THIS_MODULE,
 		.name	= DRVNAME,
+		.pm	= W83627EHF_DEV_PM_OPS,
 	},
 	.probe		= w83627ehf_probe,
 	.remove		= w83627ehf_remove,

diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index 7f68b83..81f4865 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c

@@ -5,7 +5,7 @@
  *			      Philip Edelbrock <phil@netroedge.com>,
  *			      and Mark Studebaker <mdsxyz123@yahoo.com>
  * Ported to 2.6 by Bernhard C. Schrenk <clemy@clemy.org>
- * Copyright (c) 2007  Jean Delvare <khali@linux-fr.org>
+ * Copyright (c) 2007 - 1012  Jean Delvare <khali@linux-fr.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -389,6 +389,12 @@
 				 */
 	u8 vrm;
 	u8 vrm_ovt;		/* Register value, 627THF/637HF/687THF only */
+
+#ifdef CONFIG_PM
+	/* Remember extra register values over suspend/resume */
+	u8 scfg1;
+	u8 scfg2;
+#endif
 };
 
 
@@ -401,10 +407,77 @@
 static struct w83627hf_data *w83627hf_update_device(struct device *dev);
 static void w83627hf_init_device(struct platform_device *pdev);
 
+#ifdef CONFIG_PM
+static int w83627hf_suspend(struct device *dev)
+{
+	struct w83627hf_data *data = w83627hf_update_device(dev);
+
+	mutex_lock(&data->update_lock);
+	data->scfg1 = w83627hf_read_value(data, W83781D_REG_SCFG1);
+	data->scfg2 = w83627hf_read_value(data, W83781D_REG_SCFG2);
+	mutex_unlock(&data->update_lock);
+
+	return 0;
+}
+
+static int w83627hf_resume(struct device *dev)
+{
+	struct w83627hf_data *data = dev_get_drvdata(dev);
+	int i, num_temps = (data->type == w83697hf) ? 2 : 3;
+
+	/* Restore limits */
+	mutex_lock(&data->update_lock);
+	for (i = 0; i <= 8; i++) {
+		/* skip missing sensors */
+		if (((data->type == w83697hf) && (i == 1)) ||
+		    ((data->type != w83627hf && data->type != w83697hf)
+		    && (i == 5 || i == 6)))
+			continue;
+		w83627hf_write_value(data, W83781D_REG_IN_MAX(i),
+				     data->in_max[i]);
+		w83627hf_write_value(data, W83781D_REG_IN_MIN(i),
+				     data->in_min[i]);
+	}
+	for (i = 0; i <= 2; i++)
+		w83627hf_write_value(data, W83627HF_REG_FAN_MIN(i),
+				     data->fan_min[i]);
+	for (i = 0; i < num_temps; i++) {
+		w83627hf_write_value(data, w83627hf_reg_temp_over[i],
+				     data->temp_max[i]);
+		w83627hf_write_value(data, w83627hf_reg_temp_hyst[i],
+				     data->temp_max_hyst[i]);
+	}
+
+	/* Fixup BIOS bugs */
+	if (data->type == w83627thf || data->type == w83637hf ||
+	    data->type == w83687thf)
+		w83627hf_write_value(data, W83627THF_REG_VRM_OVT_CFG,
+				     data->vrm_ovt);
+	w83627hf_write_value(data, W83781D_REG_SCFG1, data->scfg1);
+	w83627hf_write_value(data, W83781D_REG_SCFG2, data->scfg2);
+
+	/* Force re-reading all values */
+	data->valid = 0;
+	mutex_unlock(&data->update_lock);
+
+	return 0;
+}
+
+static const struct dev_pm_ops w83627hf_dev_pm_ops = {
+	.suspend = w83627hf_suspend,
+	.resume = w83627hf_resume,
+};
+
+#define W83627HF_DEV_PM_OPS	(&w83627hf_dev_pm_ops)
+#else
+#define W83627HF_DEV_PM_OPS	NULL
+#endif /* CONFIG_PM */
+
 static struct platform_driver w83627hf_driver = {
 	.driver = {
 		.owner	= THIS_MODULE,
 		.name	= DRVNAME,
+		.pm	= W83627HF_DEV_PM_OPS,
 	},
 	.probe		= w83627hf_probe,
 	.remove		= w83627hf_remove,
@@ -1659,8 +1732,10 @@
 	/* Minimize conflicts with other winbond i2c-only clients...  */
 	/* disable i2c subclients... how to disable main i2c client?? */
 	/* force i2c address to relatively uncommon address */
-	w83627hf_write_value(data, W83781D_REG_I2C_SUBADDR, 0x89);
-	w83627hf_write_value(data, W83781D_REG_I2C_ADDR, force_i2c);
+	if (type == w83627hf) {
+		w83627hf_write_value(data, W83781D_REG_I2C_SUBADDR, 0x89);
+		w83627hf_write_value(data, W83781D_REG_I2C_ADDR, force_i2c);
+	}
 
 	/* Read VID only once */
 	if (type == w83627hf || type == w83637hf) {

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e9df461..bdca511 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig

@@ -337,6 +337,16 @@
 	help
 	  The unit of the TWI clock is kHz.
 
+config I2C_CBUS_GPIO
+	tristate "CBUS I2C driver"
+	depends on GENERIC_GPIO
+	help
+	  Support for CBUS access using I2C API. Mostly relevant for Nokia
+	  Internet Tablets (770, N800 and N810).
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-cbus-gpio.
+
 config I2C_CPM
 	tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)"
 	depends on (CPM1 || CPM2) && OF_I2C
@@ -818,6 +828,16 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-tiny-usb.
 
+config I2C_VIPERBOARD
+	tristate "Viperboard I2C master support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the I2C part of the Nano River
+	  Technologies Viperboard as I2C master.
+          See viperboard API specification and Nano
+          River Tech's viperboard.h for detailed meaning
+          of the module parameters.
+
 comment "Other I2C/SMBus bus drivers"
 
 config I2C_ACORN

diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 395b516..6181f3f 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile

@@ -31,6 +31,7 @@
 obj-$(CONFIG_I2C_AT91)		+= i2c-at91.o
 obj-$(CONFIG_I2C_AU1550)	+= i2c-au1550.o
 obj-$(CONFIG_I2C_BLACKFIN_TWI)	+= i2c-bfin-twi.o
+obj-$(CONFIG_I2C_CBUS_GPIO)	+= i2c-cbus-gpio.o
 obj-$(CONFIG_I2C_CPM)		+= i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)	+= i2c-davinci.o
 obj-$(CONFIG_I2C_DESIGNWARE_CORE)	+= i2c-designware-core.o
@@ -79,6 +80,7 @@
 obj-$(CONFIG_I2C_PARPORT_LIGHT)	+= i2c-parport-light.o
 obj-$(CONFIG_I2C_TAOS_EVM)	+= i2c-taos-evm.o
 obj-$(CONFIG_I2C_TINY_USB)	+= i2c-tiny-usb.o
+obj-$(CONFIG_I2C_VIPERBOARD)	+= i2c-viperboard.o
 
 # Other I2C/SMBus bus drivers
 obj-$(CONFIG_I2C_ACORN)		+= i2c-acorn.o

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index c02bf20..b4575ee 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c

@@ -19,6 +19,8 @@
 
 #include <linux/clk.h>
 #include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
@@ -29,9 +31,11 @@
 #include <linux/of_i2c.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/platform_data/dma-atmel.h>
 
 #define TWI_CLK_HZ		100000			/* max 400 Kbits/s */
 #define AT91_I2C_TIMEOUT	msecs_to_jiffies(100)	/* transfer timeout */
+#define AT91_I2C_DMA_THRESHOLD	8			/* enable DMA if transfer size is bigger than this threshold */
 
 /* AT91 TWI register definitions */
 #define	AT91_TWI_CR		0x0000	/* Control Register */
@@ -66,24 +70,39 @@
 #define	AT91_TWI_THR		0x0034	/* Transmit Holding Register */
 
 struct at91_twi_pdata {
-	unsigned	clk_max_div;
-	unsigned	clk_offset;
-	bool		has_unre_flag;
+	unsigned clk_max_div;
+	unsigned clk_offset;
+	bool has_unre_flag;
+	bool has_dma_support;
+	struct at_dma_slave dma_slave;
+};
+
+struct at91_twi_dma {
+	struct dma_chan *chan_rx;
+	struct dma_chan *chan_tx;
+	struct scatterlist sg;
+	struct dma_async_tx_descriptor *data_desc;
+	enum dma_data_direction direction;
+	bool buf_mapped;
+	bool xfer_in_progress;
 };
 
 struct at91_twi_dev {
-	struct device		*dev;
-	void __iomem		*base;
-	struct completion	cmd_complete;
-	struct clk		*clk;
-	u8			*buf;
-	size_t			buf_len;
-	struct i2c_msg		*msg;
-	int			irq;
-	unsigned		transfer_status;
-	struct i2c_adapter	adapter;
-	unsigned		twi_cwgr_reg;
-	struct at91_twi_pdata	*pdata;
+	struct device *dev;
+	void __iomem *base;
+	struct completion cmd_complete;
+	struct clk *clk;
+	u8 *buf;
+	size_t buf_len;
+	struct i2c_msg *msg;
+	int irq;
+	unsigned imr;
+	unsigned transfer_status;
+	struct i2c_adapter adapter;
+	unsigned twi_cwgr_reg;
+	struct at91_twi_pdata *pdata;
+	bool use_dma;
+	struct at91_twi_dma dma;
 };
 
 static unsigned at91_twi_read(struct at91_twi_dev *dev, unsigned reg)
@@ -102,6 +121,17 @@
 		       AT91_TWI_TXCOMP | AT91_TWI_RXRDY | AT91_TWI_TXRDY);
 }
 
+static void at91_twi_irq_save(struct at91_twi_dev *dev)
+{
+	dev->imr = at91_twi_read(dev, AT91_TWI_IMR) & 0x7;
+	at91_disable_twi_interrupts(dev);
+}
+
+static void at91_twi_irq_restore(struct at91_twi_dev *dev)
+{
+	at91_twi_write(dev, AT91_TWI_IER, dev->imr);
+}
+
 static void at91_init_twi_bus(struct at91_twi_dev *dev)
 {
 	at91_disable_twi_interrupts(dev);
@@ -138,6 +168,28 @@
 	dev_dbg(dev->dev, "cdiv %d ckdiv %d\n", cdiv, ckdiv);
 }
 
+static void at91_twi_dma_cleanup(struct at91_twi_dev *dev)
+{
+	struct at91_twi_dma *dma = &dev->dma;
+
+	at91_twi_irq_save(dev);
+
+	if (dma->xfer_in_progress) {
+		if (dma->direction == DMA_FROM_DEVICE)
+			dmaengine_terminate_all(dma->chan_rx);
+		else
+			dmaengine_terminate_all(dma->chan_tx);
+		dma->xfer_in_progress = false;
+	}
+	if (dma->buf_mapped) {
+		dma_unmap_single(dev->dev, sg_dma_address(&dma->sg),
+				 dev->buf_len, dma->direction);
+		dma->buf_mapped = false;
+	}
+
+	at91_twi_irq_restore(dev);
+}
+
 static void at91_twi_write_next_byte(struct at91_twi_dev *dev)
 {
 	if (dev->buf_len <= 0)
@@ -154,6 +206,60 @@
 	++dev->buf;
 }
 
+static void at91_twi_write_data_dma_callback(void *data)
+{
+	struct at91_twi_dev *dev = (struct at91_twi_dev *)data;
+
+	dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg),
+			 dev->buf_len, DMA_MEM_TO_DEV);
+
+	at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
+}
+
+static void at91_twi_write_data_dma(struct at91_twi_dev *dev)
+{
+	dma_addr_t dma_addr;
+	struct dma_async_tx_descriptor *txdesc;
+	struct at91_twi_dma *dma = &dev->dma;
+	struct dma_chan *chan_tx = dma->chan_tx;
+
+	if (dev->buf_len <= 0)
+		return;
+
+	dma->direction = DMA_TO_DEVICE;
+
+	at91_twi_irq_save(dev);
+	dma_addr = dma_map_single(dev->dev, dev->buf, dev->buf_len,
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(dev->dev, dma_addr)) {
+		dev_err(dev->dev, "dma map failed\n");
+		return;
+	}
+	dma->buf_mapped = true;
+	at91_twi_irq_restore(dev);
+	sg_dma_len(&dma->sg) = dev->buf_len;
+	sg_dma_address(&dma->sg) = dma_addr;
+
+	txdesc = dmaengine_prep_slave_sg(chan_tx, &dma->sg, 1, DMA_MEM_TO_DEV,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!txdesc) {
+		dev_err(dev->dev, "dma prep slave sg failed\n");
+		goto error;
+	}
+
+	txdesc->callback = at91_twi_write_data_dma_callback;
+	txdesc->callback_param = dev;
+
+	dma->xfer_in_progress = true;
+	dmaengine_submit(txdesc);
+	dma_async_issue_pending(chan_tx);
+
+	return;
+
+error:
+	at91_twi_dma_cleanup(dev);
+}
+
 static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
 {
 	if (dev->buf_len <= 0)
@@ -179,6 +285,61 @@
 	++dev->buf;
 }
 
+static void at91_twi_read_data_dma_callback(void *data)
+{
+	struct at91_twi_dev *dev = (struct at91_twi_dev *)data;
+
+	dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg),
+			 dev->buf_len, DMA_DEV_TO_MEM);
+
+	/* The last two bytes have to be read without using dma */
+	dev->buf += dev->buf_len - 2;
+	dev->buf_len = 2;
+	at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_RXRDY);
+}
+
+static void at91_twi_read_data_dma(struct at91_twi_dev *dev)
+{
+	dma_addr_t dma_addr;
+	struct dma_async_tx_descriptor *rxdesc;
+	struct at91_twi_dma *dma = &dev->dma;
+	struct dma_chan *chan_rx = dma->chan_rx;
+
+	dma->direction = DMA_FROM_DEVICE;
+
+	/* Keep in mind that we won't use dma to read the last two bytes */
+	at91_twi_irq_save(dev);
+	dma_addr = dma_map_single(dev->dev, dev->buf, dev->buf_len - 2,
+				  DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev->dev, dma_addr)) {
+		dev_err(dev->dev, "dma map failed\n");
+		return;
+	}
+	dma->buf_mapped = true;
+	at91_twi_irq_restore(dev);
+	dma->sg.dma_address = dma_addr;
+	sg_dma_len(&dma->sg) = dev->buf_len - 2;
+
+	rxdesc = dmaengine_prep_slave_sg(chan_rx, &dma->sg, 1, DMA_DEV_TO_MEM,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!rxdesc) {
+		dev_err(dev->dev, "dma prep slave sg failed\n");
+		goto error;
+	}
+
+	rxdesc->callback = at91_twi_read_data_dma_callback;
+	rxdesc->callback_param = dev;
+
+	dma->xfer_in_progress = true;
+	dmaengine_submit(rxdesc);
+	dma_async_issue_pending(dma->chan_rx);
+
+	return;
+
+error:
+	at91_twi_dma_cleanup(dev);
+}
+
 static irqreturn_t atmel_twi_interrupt(int irq, void *dev_id)
 {
 	struct at91_twi_dev *dev = dev_id;
@@ -229,12 +390,36 @@
 		if (dev->buf_len <= 1 && !(dev->msg->flags & I2C_M_RECV_LEN))
 			start_flags |= AT91_TWI_STOP;
 		at91_twi_write(dev, AT91_TWI_CR, start_flags);
-		at91_twi_write(dev, AT91_TWI_IER,
+		/*
+		 * When using dma, the last byte has to be read manually in
+		 * order to not send the stop command too late and then
+		 * to receive extra data. In practice, there are some issues
+		 * if you use the dma to read n-1 bytes because of latency.
+		 * Reading n-2 bytes with dma and the two last ones manually
+		 * seems to be the best solution.
+		 */
+		if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) {
+			at91_twi_read_data_dma(dev);
+			/*
+			 * It is important to enable TXCOMP irq here because
+			 * doing it only when transferring the last two bytes
+			 * will mask NACK errors since TXCOMP is set when a
+			 * NACK occurs.
+			 */
+			at91_twi_write(dev, AT91_TWI_IER,
+			       AT91_TWI_TXCOMP);
+		} else
+			at91_twi_write(dev, AT91_TWI_IER,
 			       AT91_TWI_TXCOMP | AT91_TWI_RXRDY);
 	} else {
-		at91_twi_write_next_byte(dev);
-		at91_twi_write(dev, AT91_TWI_IER,
-			       AT91_TWI_TXCOMP | AT91_TWI_TXRDY);
+		if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) {
+			at91_twi_write_data_dma(dev);
+			at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP);
+		} else {
+			at91_twi_write_next_byte(dev);
+			at91_twi_write(dev, AT91_TWI_IER,
+				AT91_TWI_TXCOMP | AT91_TWI_TXRDY);
+		}
 	}
 
 	ret = wait_for_completion_interruptible_timeout(&dev->cmd_complete,
@@ -242,23 +427,31 @@
 	if (ret == 0) {
 		dev_err(dev->dev, "controller timed out\n");
 		at91_init_twi_bus(dev);
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto error;
 	}
 	if (dev->transfer_status & AT91_TWI_NACK) {
 		dev_dbg(dev->dev, "received nack\n");
-		return -EREMOTEIO;
+		ret = -EREMOTEIO;
+		goto error;
 	}
 	if (dev->transfer_status & AT91_TWI_OVRE) {
 		dev_err(dev->dev, "overrun while reading\n");
-		return -EIO;
+		ret = -EIO;
+		goto error;
 	}
 	if (has_unre_flag && dev->transfer_status & AT91_TWI_UNRE) {
 		dev_err(dev->dev, "underrun while writing\n");
-		return -EIO;
+		ret = -EIO;
+		goto error;
 	}
 	dev_dbg(dev->dev, "transfer complete\n");
 
 	return 0;
+
+error:
+	at91_twi_dma_cleanup(dev);
+	return ret;
 }
 
 static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
@@ -329,36 +522,42 @@
 	.clk_max_div = 5,
 	.clk_offset = 3,
 	.has_unre_flag = true,
+	.has_dma_support = false,
 };
 
 static struct at91_twi_pdata at91sam9261_config = {
 	.clk_max_div = 5,
 	.clk_offset = 4,
 	.has_unre_flag = false,
+	.has_dma_support = false,
 };
 
 static struct at91_twi_pdata at91sam9260_config = {
 	.clk_max_div = 7,
 	.clk_offset = 4,
 	.has_unre_flag = false,
+	.has_dma_support = false,
 };
 
 static struct at91_twi_pdata at91sam9g20_config = {
 	.clk_max_div = 7,
 	.clk_offset = 4,
 	.has_unre_flag = false,
+	.has_dma_support = false,
 };
 
 static struct at91_twi_pdata at91sam9g10_config = {
 	.clk_max_div = 7,
 	.clk_offset = 4,
 	.has_unre_flag = false,
+	.has_dma_support = false,
 };
 
 static struct at91_twi_pdata at91sam9x5_config = {
 	.clk_max_div = 7,
 	.clk_offset = 4,
 	.has_unre_flag = false,
+	.has_dma_support = true,
 };
 
 static const struct platform_device_id at91_twi_devtypes[] = {
@@ -405,6 +604,90 @@
 #define atmel_twi_dt_ids NULL
 #endif
 
+static bool __devinit filter(struct dma_chan *chan, void *slave)
+{
+	struct at_dma_slave *sl = slave;
+
+	if (sl->dma_dev == chan->device->dev) {
+		chan->private = sl;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static int __devinit at91_twi_configure_dma(struct at91_twi_dev *dev, u32 phy_addr)
+{
+	int ret = 0;
+	struct at_dma_slave *sdata;
+	struct dma_slave_config slave_config;
+	struct at91_twi_dma *dma = &dev->dma;
+
+	sdata = &dev->pdata->dma_slave;
+
+	memset(&slave_config, 0, sizeof(slave_config));
+	slave_config.src_addr = (dma_addr_t)phy_addr + AT91_TWI_RHR;
+	slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	slave_config.src_maxburst = 1;
+	slave_config.dst_addr = (dma_addr_t)phy_addr + AT91_TWI_THR;
+	slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	slave_config.dst_maxburst = 1;
+	slave_config.device_fc = false;
+
+	if (sdata && sdata->dma_dev) {
+		dma_cap_mask_t mask;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+		dma->chan_tx = dma_request_channel(mask, filter, sdata);
+		if (!dma->chan_tx) {
+			dev_err(dev->dev, "no DMA channel available for tx\n");
+			ret = -EBUSY;
+			goto error;
+		}
+		dma->chan_rx = dma_request_channel(mask, filter, sdata);
+		if (!dma->chan_rx) {
+			dev_err(dev->dev, "no DMA channel available for rx\n");
+			ret = -EBUSY;
+			goto error;
+		}
+	} else {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	slave_config.direction = DMA_MEM_TO_DEV;
+	if (dmaengine_slave_config(dma->chan_tx, &slave_config)) {
+		dev_err(dev->dev, "failed to configure tx channel\n");
+		ret = -EINVAL;
+		goto error;
+	}
+
+	slave_config.direction = DMA_DEV_TO_MEM;
+	if (dmaengine_slave_config(dma->chan_rx, &slave_config)) {
+		dev_err(dev->dev, "failed to configure rx channel\n");
+		ret = -EINVAL;
+		goto error;
+	}
+
+	sg_init_table(&dma->sg, 1);
+	dma->buf_mapped = false;
+	dma->xfer_in_progress = false;
+
+	dev_info(dev->dev, "using %s (tx) and %s (rx) for DMA transfers\n",
+		 dma_chan_name(dma->chan_tx), dma_chan_name(dma->chan_rx));
+
+	return ret;
+
+error:
+	dev_info(dev->dev, "can't use DMA\n");
+	if (dma->chan_rx)
+		dma_release_channel(dma->chan_rx);
+	if (dma->chan_tx)
+		dma_release_channel(dma->chan_tx);
+	return ret;
+}
+
 static struct at91_twi_pdata * __devinit at91_twi_get_driver_data(
 					struct platform_device *pdev)
 {
@@ -413,7 +696,7 @@
 		match = of_match_node(atmel_twi_dt_ids, pdev->dev.of_node);
 		if (!match)
 			return NULL;
-		return match->data;
+		return (struct at91_twi_pdata *)match->data;
 	}
 	return (struct at91_twi_pdata *) platform_get_device_id(pdev)->driver_data;
 }
@@ -423,6 +706,7 @@
 	struct at91_twi_dev *dev;
 	struct resource *mem;
 	int rc;
+	u32 phy_addr;
 
 	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
 	if (!dev)
@@ -433,6 +717,7 @@
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem)
 		return -ENODEV;
+	phy_addr = mem->start;
 
 	dev->pdata = at91_twi_get_driver_data(pdev);
 	if (!dev->pdata)
@@ -462,6 +747,11 @@
 	}
 	clk_prepare_enable(dev->clk);
 
+	if (dev->pdata->has_dma_support) {
+		if (at91_twi_configure_dma(dev, phy_addr) == 0)
+			dev->use_dma = true;
+	}
+
 	at91_calc_twi_clock(dev, TWI_CLK_HZ);
 	at91_init_twi_bus(dev);
 

diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c
new file mode 100644
index 0000000..98386d6
--- /dev/null
+++ b/drivers/i2c/busses/i2c-cbus-gpio.c

@@ -0,0 +1,300 @@
+/*
+ * CBUS I2C driver for Nokia Internet Tablets.
+ *
+ * Copyright (C) 2004-2010 Nokia Corporation
+ *
+ * Based on code written by Juha Yrjölä, David Weinehall, Mikko Ylinen and
+ * Felipe Balbi. Converted to I2C driver by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/i2c-cbus-gpio.h>
+
+/*
+ * Bit counts are derived from Nokia implementation. These should be checked
+ * if other CBUS implementations appear.
+ */
+#define CBUS_ADDR_BITS	3
+#define CBUS_REG_BITS	5
+
+struct cbus_host {
+	spinlock_t	lock;		/* host lock */
+	struct device	*dev;
+	int		clk_gpio;
+	int		dat_gpio;
+	int		sel_gpio;
+};
+
+/**
+ * cbus_send_bit - sends one bit over the bus
+ * @host: the host we're using
+ * @bit: one bit of information to send
+ */
+static void cbus_send_bit(struct cbus_host *host, unsigned bit)
+{
+	gpio_set_value(host->dat_gpio, bit ? 1 : 0);
+	gpio_set_value(host->clk_gpio, 1);
+	gpio_set_value(host->clk_gpio, 0);
+}
+
+/**
+ * cbus_send_data - sends @len amount of data over the bus
+ * @host: the host we're using
+ * @data: the data to send
+ * @len: size of the transfer
+ */
+static void cbus_send_data(struct cbus_host *host, unsigned data, unsigned len)
+{
+	int i;
+
+	for (i = len; i > 0; i--)
+		cbus_send_bit(host, data & (1 << (i - 1)));
+}
+
+/**
+ * cbus_receive_bit - receives one bit from the bus
+ * @host: the host we're using
+ */
+static int cbus_receive_bit(struct cbus_host *host)
+{
+	int ret;
+
+	gpio_set_value(host->clk_gpio, 1);
+	ret = gpio_get_value(host->dat_gpio);
+	gpio_set_value(host->clk_gpio, 0);
+	return ret;
+}
+
+/**
+ * cbus_receive_word - receives 16-bit word from the bus
+ * @host: the host we're using
+ */
+static int cbus_receive_word(struct cbus_host *host)
+{
+	int ret = 0;
+	int i;
+
+	for (i = 16; i > 0; i--) {
+		int bit = cbus_receive_bit(host);
+
+		if (bit < 0)
+			return bit;
+
+		if (bit)
+			ret |= 1 << (i - 1);
+	}
+	return ret;
+}
+
+/**
+ * cbus_transfer - transfers data over the bus
+ * @host: the host we're using
+ * @rw: read/write flag
+ * @dev: device address
+ * @reg: register address
+ * @data: if @rw == I2C_SBUS_WRITE data to send otherwise 0
+ */
+static int cbus_transfer(struct cbus_host *host, char rw, unsigned dev,
+			 unsigned reg, unsigned data)
+{
+	unsigned long flags;
+	int ret;
+
+	/* We don't want interrupts disturbing our transfer */
+	spin_lock_irqsave(&host->lock, flags);
+
+	/* Reset state and start of transfer, SEL stays down during transfer */
+	gpio_set_value(host->sel_gpio, 0);
+
+	/* Set the DAT pin to output */
+	gpio_direction_output(host->dat_gpio, 1);
+
+	/* Send the device address */
+	cbus_send_data(host, dev, CBUS_ADDR_BITS);
+
+	/* Send the rw flag */
+	cbus_send_bit(host, rw == I2C_SMBUS_READ);
+
+	/* Send the register address */
+	cbus_send_data(host, reg, CBUS_REG_BITS);
+
+	if (rw == I2C_SMBUS_WRITE) {
+		cbus_send_data(host, data, 16);
+		ret = 0;
+	} else {
+		ret = gpio_direction_input(host->dat_gpio);
+		if (ret) {
+			dev_dbg(host->dev, "failed setting direction\n");
+			goto out;
+		}
+		gpio_set_value(host->clk_gpio, 1);
+
+		ret = cbus_receive_word(host);
+		if (ret < 0) {
+			dev_dbg(host->dev, "failed receiving data\n");
+			goto out;
+		}
+	}
+
+	/* Indicate end of transfer, SEL goes up until next transfer */
+	gpio_set_value(host->sel_gpio, 1);
+	gpio_set_value(host->clk_gpio, 1);
+	gpio_set_value(host->clk_gpio, 0);
+
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return ret;
+}
+
+static int cbus_i2c_smbus_xfer(struct i2c_adapter	*adapter,
+			       u16			addr,
+			       unsigned short		flags,
+			       char			read_write,
+			       u8			command,
+			       int			size,
+			       union i2c_smbus_data	*data)
+{
+	struct cbus_host *chost = i2c_get_adapdata(adapter);
+	int ret;
+
+	if (size != I2C_SMBUS_WORD_DATA)
+		return -EINVAL;
+
+	ret = cbus_transfer(chost, read_write == I2C_SMBUS_READ, addr,
+			    command, data->word);
+	if (ret < 0)
+		return ret;
+
+	if (read_write == I2C_SMBUS_READ)
+		data->word = ret;
+
+	return 0;
+}
+
+static u32 cbus_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_SMBUS_READ_WORD_DATA | I2C_FUNC_SMBUS_WRITE_WORD_DATA;
+}
+
+static const struct i2c_algorithm cbus_i2c_algo = {
+	.smbus_xfer	= cbus_i2c_smbus_xfer,
+	.functionality	= cbus_i2c_func,
+};
+
+static int cbus_i2c_remove(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+
+	return i2c_del_adapter(adapter);
+}
+
+static int cbus_i2c_probe(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter;
+	struct cbus_host *chost;
+	int ret;
+
+	adapter = devm_kzalloc(&pdev->dev, sizeof(struct i2c_adapter),
+			       GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	chost = devm_kzalloc(&pdev->dev, sizeof(*chost), GFP_KERNEL);
+	if (!chost)
+		return -ENOMEM;
+
+	if (pdev->dev.of_node) {
+		struct device_node *dnode = pdev->dev.of_node;
+		if (of_gpio_count(dnode) != 3)
+			return -ENODEV;
+		chost->clk_gpio = of_get_gpio(dnode, 0);
+		chost->dat_gpio = of_get_gpio(dnode, 1);
+		chost->sel_gpio = of_get_gpio(dnode, 2);
+	} else if (pdev->dev.platform_data) {
+		struct i2c_cbus_platform_data *pdata = pdev->dev.platform_data;
+		chost->clk_gpio = pdata->clk_gpio;
+		chost->dat_gpio = pdata->dat_gpio;
+		chost->sel_gpio = pdata->sel_gpio;
+	} else {
+		return -ENODEV;
+	}
+
+	adapter->owner		= THIS_MODULE;
+	adapter->class		= I2C_CLASS_HWMON;
+	adapter->dev.parent	= &pdev->dev;
+	adapter->nr		= pdev->id;
+	adapter->timeout	= HZ;
+	adapter->algo		= &cbus_i2c_algo;
+	strlcpy(adapter->name, "CBUS I2C adapter", sizeof(adapter->name));
+
+	spin_lock_init(&chost->lock);
+	chost->dev = &pdev->dev;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->clk_gpio,
+				    GPIOF_OUT_INIT_LOW, "CBUS clk");
+	if (ret)
+		return ret;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->dat_gpio, GPIOF_IN,
+				    "CBUS data");
+	if (ret)
+		return ret;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->sel_gpio,
+				    GPIOF_OUT_INIT_HIGH, "CBUS sel");
+	if (ret)
+		return ret;
+
+	i2c_set_adapdata(adapter, chost);
+	platform_set_drvdata(pdev, adapter);
+
+	return i2c_add_numbered_adapter(adapter);
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id i2c_cbus_dt_ids[] = {
+	{ .compatible = "i2c-cbus-gpio", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, i2c_cbus_dt_ids);
+#endif
+
+static struct platform_driver cbus_i2c_driver = {
+	.probe	= cbus_i2c_probe,
+	.remove	= cbus_i2c_remove,
+	.driver	= {
+		.owner	= THIS_MODULE,
+		.name	= "i2c-cbus-gpio",
+	},
+};
+module_platform_driver(cbus_i2c_driver);
+
+MODULE_ALIAS("platform:i2c-cbus-gpio");
+MODULE_DESCRIPTION("CBUS I2C driver");
+MODULE_AUTHOR("Juha Yrjölä");
+MODULE_AUTHOR("David Weinehall");
+MODULE_AUTHOR("Mikko Ylinen");
+MODULE_AUTHOR("Felipe Balbi");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index e62d2d9..257299a 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c

@@ -184,7 +184,11 @@
 	bit_data->data = pdata;
 
 	adap->owner = THIS_MODULE;
-	snprintf(adap->name, sizeof(adap->name), "i2c-gpio%d", pdev->id);
+	if (pdev->dev.of_node)
+		strlcpy(adap->name, dev_name(&pdev->dev), sizeof(adap->name));
+	else
+		snprintf(adap->name, sizeof(adap->name), "i2c-gpio%d", pdev->id);
+
 	adap->algo_data = bit_data;
 	adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
 	adap->dev.parent = &pdev->dev;

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 6abc00d..1e73638 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c

@@ -81,6 +81,7 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/err.h>
+#include <linux/of_i2c.h>
 
 #if (defined CONFIG_I2C_MUX_GPIO || defined CONFIG_I2C_MUX_GPIO_MODULE) && \
 		defined CONFIG_DMI
@@ -1108,6 +1109,7 @@
 		/* fall through */
 	default:
 		priv->features |= FEATURE_I2C_BLOCK_READ;
+		priv->features |= FEATURE_IRQ;
 		/* fall through */
 	case PCI_DEVICE_ID_INTEL_82801DB_3:
 		priv->features |= FEATURE_SMBUS_PEC;
@@ -1120,16 +1122,6 @@
 		break;
 	}
 
-	/* IRQ processing tested on CougarPoint PCH, ICH5, ICH7-M and ICH10 */
-	if (dev->device == PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS ||
-	    dev->device == PCI_DEVICE_ID_INTEL_82801EB_3 ||
-	    dev->device == PCI_DEVICE_ID_INTEL_ICH7_17 ||
-	    dev->device == PCI_DEVICE_ID_INTEL_ICH8_5 ||
-	    dev->device == PCI_DEVICE_ID_INTEL_ICH9_6 ||
-	    dev->device == PCI_DEVICE_ID_INTEL_ICH10_4 ||
-	    dev->device == PCI_DEVICE_ID_INTEL_ICH10_5)
-		priv->features |= FEATURE_IRQ;
-
 	/* Disable features on user request */
 	for (i = 0; i < ARRAY_SIZE(i801_feature_names); i++) {
 		if (priv->features & disable_features & (1 << i))
@@ -1215,6 +1207,7 @@
 		goto exit_free_irq;
 	}
 
+	of_i2c_register_devices(&priv->adapter);
 	i801_probe_optional_slaves(priv);
 	/* We ignore errors - multiplexing is optional */
 	i801_add_mux(priv);

diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index 0670da7..6ed53da 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c

@@ -359,7 +359,7 @@
 
 static u32 mxs_i2c_func(struct i2c_adapter *adap)
 {
-	return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
 static irqreturn_t mxs_i2c_isr(int this_irq, void *dev_id)

diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
index 02c3115..8b2ffcf 100644
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c

@@ -435,13 +435,6 @@
 	timeout = wait_for_completion_timeout(
 		&dev->xfer_complete, dev->adap.timeout);
 
-	if (timeout < 0) {
-		dev_err(&dev->adev->dev,
-			"wait_for_completion_timeout "
-			"returned %d waiting for event\n", timeout);
-		status = timeout;
-	}
-
 	if (timeout == 0) {
 		/* Controller timed out */
 		dev_err(&dev->adev->dev, "read from slave 0x%x timed out\n",
@@ -523,13 +516,6 @@
 	timeout = wait_for_completion_timeout(
 		&dev->xfer_complete, dev->adap.timeout);
 
-	if (timeout < 0) {
-		dev_err(&dev->adev->dev,
-			"wait_for_completion_timeout "
-			"returned %d waiting for event\n", timeout);
-		status = timeout;
-	}
-
 	if (timeout == 0) {
 		/* Controller timed out */
 		dev_err(&dev->adev->dev, "write to slave 0x%x timed out\n",

diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 15da1ac..9b35c9f 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c

@@ -4,6 +4,9 @@
  *
  * Peter Korsgaard <jacmet@sunsite.dk>
  *
+ * Support for the GRLIB port of the controller by
+ * Andreas Larsson <andreas@gaisler.com>
+ *
  * This file is licensed under the terms of the GNU General Public License
  * version 2.  This program is licensed "as is" without any warranty of any
  * kind, whether express or implied.
@@ -34,6 +37,8 @@
 	int nmsgs;
 	int state; /* see STATE_ */
 	int clock_khz;
+	void (*setreg)(struct ocores_i2c *i2c, int reg, u8 value);
+	u8 (*getreg)(struct ocores_i2c *i2c, int reg);
 };
 
 /* registers */
@@ -67,24 +72,47 @@
 #define STATE_READ		3
 #define STATE_ERROR		4
 
+#define TYPE_OCORES		0
+#define TYPE_GRLIB		1
+
+static void oc_setreg_8(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	iowrite8(value, i2c->base + (reg << i2c->reg_shift));
+}
+
+static void oc_setreg_16(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	iowrite16(value, i2c->base + (reg << i2c->reg_shift));
+}
+
+static void oc_setreg_32(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	iowrite32(value, i2c->base + (reg << i2c->reg_shift));
+}
+
+static inline u8 oc_getreg_8(struct ocores_i2c *i2c, int reg)
+{
+	return ioread8(i2c->base + (reg << i2c->reg_shift));
+}
+
+static inline u8 oc_getreg_16(struct ocores_i2c *i2c, int reg)
+{
+	return ioread16(i2c->base + (reg << i2c->reg_shift));
+}
+
+static inline u8 oc_getreg_32(struct ocores_i2c *i2c, int reg)
+{
+	return ioread32(i2c->base + (reg << i2c->reg_shift));
+}
+
 static inline void oc_setreg(struct ocores_i2c *i2c, int reg, u8 value)
 {
-	if (i2c->reg_io_width == 4)
-		iowrite32(value, i2c->base + (reg << i2c->reg_shift));
-	else if (i2c->reg_io_width == 2)
-		iowrite16(value, i2c->base + (reg << i2c->reg_shift));
-	else
-		iowrite8(value, i2c->base + (reg << i2c->reg_shift));
+	i2c->setreg(i2c, reg, value);
 }
 
 static inline u8 oc_getreg(struct ocores_i2c *i2c, int reg)
 {
-	if (i2c->reg_io_width == 4)
-		return ioread32(i2c->base + (reg << i2c->reg_shift));
-	else if (i2c->reg_io_width == 2)
-		return ioread16(i2c->base + (reg << i2c->reg_shift));
-	else
-		return ioread8(i2c->base + (reg << i2c->reg_shift));
+	return i2c->getreg(i2c, reg);
 }
 
 static void ocores_process(struct ocores_i2c *i2c)
@@ -223,11 +251,59 @@
 	.algo		= &ocores_algorithm,
 };
 
+static struct of_device_id ocores_i2c_match[] = {
+	{
+		.compatible = "opencores,i2c-ocores",
+		.data = (void *)TYPE_OCORES,
+	},
+	{
+		.compatible = "aeroflexgaisler,i2cmst",
+		.data = (void *)TYPE_GRLIB,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, ocores_i2c_match);
+
 #ifdef CONFIG_OF
+/* Read and write functions for the GRLIB port of the controller. Registers are
+ * 32-bit big endian and the PRELOW and PREHIGH registers are merged into one
+ * register. The subsequent registers has their offset decreased accordingly. */
+static u8 oc_getreg_grlib(struct ocores_i2c *i2c, int reg)
+{
+	u32 rd;
+	int rreg = reg;
+	if (reg != OCI2C_PRELOW)
+		rreg--;
+	rd = ioread32be(i2c->base + (rreg << i2c->reg_shift));
+	if (reg == OCI2C_PREHIGH)
+		return (u8)(rd >> 8);
+	else
+		return (u8)rd;
+}
+
+static void oc_setreg_grlib(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	u32 curr, wr;
+	int rreg = reg;
+	if (reg != OCI2C_PRELOW)
+		rreg--;
+	if (reg == OCI2C_PRELOW || reg == OCI2C_PREHIGH) {
+		curr = ioread32be(i2c->base + (rreg << i2c->reg_shift));
+		if (reg == OCI2C_PRELOW)
+			wr = (curr & 0xff00) | value;
+		else
+			wr = (((u32)value) << 8) | (curr & 0xff);
+	} else {
+		wr = value;
+	}
+	iowrite32be(wr, i2c->base + (rreg << i2c->reg_shift));
+}
+
 static int ocores_i2c_of_probe(struct platform_device *pdev,
 				struct ocores_i2c *i2c)
 {
 	struct device_node *np = pdev->dev.of_node;
+	const struct of_device_id *match;
 	u32 val;
 
 	if (of_property_read_u32(np, "reg-shift", &i2c->reg_shift)) {
@@ -253,6 +329,14 @@
 
 	of_property_read_u32(pdev->dev.of_node, "reg-io-width",
 				&i2c->reg_io_width);
+
+	match = of_match_node(ocores_i2c_match, pdev->dev.of_node);
+	if (match && (int)match->data == TYPE_GRLIB) {
+		dev_dbg(&pdev->dev, "GRLIB variant of i2c-ocores\n");
+		i2c->setreg = oc_setreg_grlib;
+		i2c->getreg = oc_getreg_grlib;
+	}
+
 	return 0;
 }
 #else
@@ -263,7 +347,8 @@
 {
 	struct ocores_i2c *i2c;
 	struct ocores_i2c_platform_data *pdata;
-	struct resource *res, *res2;
+	struct resource *res;
+	int irq;
 	int ret;
 	int i;
 
@@ -271,26 +356,17 @@
 	if (!res)
 		return -ENODEV;
 
-	res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res2)
-		return -ENODEV;
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
 
 	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
 	if (!i2c)
 		return -ENOMEM;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				     resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Memory region busy\n");
-		return -EBUSY;
-	}
-
-	i2c->base = devm_ioremap_nocache(&pdev->dev, res->start,
-					 resource_size(res));
-	if (!i2c->base) {
-		dev_err(&pdev->dev, "Unable to map registers\n");
-		return -EIO;
-	}
+	i2c->base = devm_request_and_ioremap(&pdev->dev, res);
+	if (!i2c->base)
+		return -EADDRNOTAVAIL;
 
 	pdata = pdev->dev.platform_data;
 	if (pdata) {
@@ -306,10 +382,34 @@
 	if (i2c->reg_io_width == 0)
 		i2c->reg_io_width = 1; /* Set to default value */
 
+	if (!i2c->setreg || !i2c->getreg) {
+		switch (i2c->reg_io_width) {
+		case 1:
+			i2c->setreg = oc_setreg_8;
+			i2c->getreg = oc_getreg_8;
+			break;
+
+		case 2:
+			i2c->setreg = oc_setreg_16;
+			i2c->getreg = oc_getreg_16;
+			break;
+
+		case 4:
+			i2c->setreg = oc_setreg_32;
+			i2c->getreg = oc_getreg_32;
+			break;
+
+		default:
+			dev_err(&pdev->dev, "Unsupported I/O width (%d)\n",
+				i2c->reg_io_width);
+			return -EINVAL;
+		}
+	}
+
 	ocores_init(i2c);
 
 	init_waitqueue_head(&i2c->wait);
-	ret = devm_request_irq(&pdev->dev, res2->start, ocores_isr, 0,
+	ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
 			       pdev->name, i2c);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot claim IRQ\n");
@@ -383,12 +483,6 @@
 #define OCORES_I2C_PM	NULL
 #endif
 
-static struct of_device_id ocores_i2c_match[] = {
-	{ .compatible = "opencores,i2c-ocores", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, ocores_i2c_match);
-
 static struct platform_driver ocores_i2c_driver = {
 	.probe   = ocores_i2c_probe,
 	.remove  = __devexit_p(ocores_i2c_remove),

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 3525c9e..7a62acb 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c

@@ -43,14 +43,16 @@
 #include <linux/slab.h>
 #include <linux/i2c-omap.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/consumer.h>
 
 /* I2C controller revisions */
 #define OMAP_I2C_OMAP1_REV_2		0x20
 
 /* I2C controller revisions present on specific hardware */
-#define OMAP_I2C_REV_ON_2430		0x36
-#define OMAP_I2C_REV_ON_3430_3530	0x3C
-#define OMAP_I2C_REV_ON_3630_4430	0x40
+#define OMAP_I2C_REV_ON_2430		0x00000036
+#define OMAP_I2C_REV_ON_3430_3530	0x0000003C
+#define OMAP_I2C_REV_ON_3630		0x00000040
+#define OMAP_I2C_REV_ON_4430_PLUS	0x50400002
 
 /* timeout waiting for the controller to respond */
 #define OMAP_I2C_TIMEOUT (msecs_to_jiffies(1000))
@@ -190,7 +192,6 @@
 	void			(*set_mpu_wkup_lat)(struct device *dev,
 						    long latency);
 	u32			speed;		/* Speed of bus in kHz */
-	u32			dtrev;		/* extra revision from DT */
 	u32			flags;
 	u16			cmd_err;
 	u8			*buf;
@@ -202,17 +203,18 @@
 						 * fifo_size==0 implies no fifo
 						 * if set, should be trsh+1
 						 */
-	u8			rev;
+	u32			rev;
 	unsigned		b_hw:1;		/* bad h/w fixes */
 	unsigned		receiver:1;	/* true when we're in receiver mode */
 	u16			iestate;	/* Saved interrupt register */
 	u16			pscstate;
 	u16			scllstate;
 	u16			sclhstate;
-	u16			bufstate;
 	u16			syscstate;
 	u16			westate;
 	u16			errata;
+
+	struct pinctrl		*pins;
 };
 
 static const u8 reg_map_ip_v1[] = {
@@ -275,16 +277,39 @@
 				(i2c_dev->regs[reg] << i2c_dev->reg_shift));
 }
 
-static int omap_i2c_init(struct omap_i2c_dev *dev)
+static void __omap_i2c_init(struct omap_i2c_dev *dev)
 {
-	u16 psc = 0, scll = 0, sclh = 0, buf = 0;
-	u16 fsscll = 0, fssclh = 0, hsscll = 0, hssclh = 0;
-	unsigned long fclk_rate = 12000000;
+
+	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
+
+	/* Setup clock prescaler to obtain approx 12MHz I2C module clock: */
+	omap_i2c_write_reg(dev, OMAP_I2C_PSC_REG, dev->pscstate);
+
+	/* SCL low and high time values */
+	omap_i2c_write_reg(dev, OMAP_I2C_SCLL_REG, dev->scllstate);
+	omap_i2c_write_reg(dev, OMAP_I2C_SCLH_REG, dev->sclhstate);
+	if (dev->rev >= OMAP_I2C_REV_ON_3430_3530)
+		omap_i2c_write_reg(dev, OMAP_I2C_WE_REG, dev->westate);
+
+	/* Take the I2C module out of reset: */
+	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
+
+	/*
+	 * Don't write to this register if the IE state is 0 as it can
+	 * cause deadlock.
+	 */
+	if (dev->iestate)
+		omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, dev->iestate);
+}
+
+static int omap_i2c_reset(struct omap_i2c_dev *dev)
+{
 	unsigned long timeout;
-	unsigned long internal_clk = 0;
-	struct clk *fclk;
+	u16 sysc;
 
 	if (dev->rev >= OMAP_I2C_OMAP1_REV_2) {
+		sysc = omap_i2c_read_reg(dev, OMAP_I2C_SYSC_REG);
+
 		/* Disable I2C controller before soft reset */
 		omap_i2c_write_reg(dev, OMAP_I2C_CON_REG,
 			omap_i2c_read_reg(dev, OMAP_I2C_CON_REG) &
@@ -306,32 +331,28 @@
 		}
 
 		/* SYSC register is cleared by the reset; rewrite it */
-		if (dev->rev == OMAP_I2C_REV_ON_2430) {
+		omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG, sysc);
 
-			omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG,
-					   SYSC_AUTOIDLE_MASK);
-
-		} else if (dev->rev >= OMAP_I2C_REV_ON_3430_3530) {
-			dev->syscstate = SYSC_AUTOIDLE_MASK;
-			dev->syscstate |= SYSC_ENAWAKEUP_MASK;
-			dev->syscstate |= (SYSC_IDLEMODE_SMART <<
-			      __ffs(SYSC_SIDLEMODE_MASK));
-			dev->syscstate |= (SYSC_CLOCKACTIVITY_FCLK <<
-			      __ffs(SYSC_CLOCKACTIVITY_MASK));
-
-			omap_i2c_write_reg(dev, OMAP_I2C_SYSC_REG,
-							dev->syscstate);
-			/*
-			 * Enabling all wakup sources to stop I2C freezing on
-			 * WFI instruction.
-			 * REVISIT: Some wkup sources might not be needed.
-			 */
-			dev->westate = OMAP_I2C_WE_ALL;
-			omap_i2c_write_reg(dev, OMAP_I2C_WE_REG,
-							dev->westate);
-		}
 	}
-	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
+	return 0;
+}
+
+static int omap_i2c_init(struct omap_i2c_dev *dev)
+{
+	u16 psc = 0, scll = 0, sclh = 0;
+	u16 fsscll = 0, fssclh = 0, hsscll = 0, hssclh = 0;
+	unsigned long fclk_rate = 12000000;
+	unsigned long internal_clk = 0;
+	struct clk *fclk;
+
+	if (dev->rev >= OMAP_I2C_REV_ON_3430_3530) {
+		/*
+		 * Enabling all wakup sources to stop I2C freezing on
+		 * WFI instruction.
+		 * REVISIT: Some wkup sources might not be needed.
+		 */
+		dev->westate = OMAP_I2C_WE_ALL;
+	}
 
 	if (dev->flags & OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK) {
 		/*
@@ -416,28 +437,17 @@
 		sclh = fclk_rate / (dev->speed * 2) - 7 + psc;
 	}
 
-	/* Setup clock prescaler to obtain approx 12MHz I2C module clock: */
-	omap_i2c_write_reg(dev, OMAP_I2C_PSC_REG, psc);
-
-	/* SCL low and high time values */
-	omap_i2c_write_reg(dev, OMAP_I2C_SCLL_REG, scll);
-	omap_i2c_write_reg(dev, OMAP_I2C_SCLH_REG, sclh);
-
-	/* Take the I2C module out of reset: */
-	omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
-
-	/* Enable interrupts */
 	dev->iestate = (OMAP_I2C_IE_XRDY | OMAP_I2C_IE_RRDY |
 			OMAP_I2C_IE_ARDY | OMAP_I2C_IE_NACK |
 			OMAP_I2C_IE_AL)  | ((dev->fifo_size) ?
 				(OMAP_I2C_IE_RDR | OMAP_I2C_IE_XDR) : 0);
-	omap_i2c_write_reg(dev, OMAP_I2C_IE_REG, dev->iestate);
-	if (dev->flags & OMAP_I2C_FLAG_RESET_REGS_POSTIDLE) {
-		dev->pscstate = psc;
-		dev->scllstate = scll;
-		dev->sclhstate = sclh;
-		dev->bufstate = buf;
-	}
+
+	dev->pscstate = psc;
+	dev->scllstate = scll;
+	dev->sclhstate = sclh;
+
+	__omap_i2c_init(dev);
+
 	return 0;
 }
 
@@ -490,7 +500,7 @@
 
 	omap_i2c_write_reg(dev, OMAP_I2C_BUF_REG, buf);
 
-	if (dev->rev < OMAP_I2C_REV_ON_3630_4430)
+	if (dev->rev < OMAP_I2C_REV_ON_3630)
 		dev->b_hw = 1; /* Enable hardware fixes */
 
 	/* calculate wakeup latency constraint for MPU */
@@ -586,7 +596,8 @@
 						OMAP_I2C_TIMEOUT);
 	if (timeout == 0) {
 		dev_err(dev->dev, "controller timed out\n");
-		omap_i2c_init(dev);
+		omap_i2c_reset(dev);
+		__omap_i2c_init(dev);
 		return -ETIMEDOUT;
 	}
 
@@ -596,7 +607,8 @@
 	/* We have an error */
 	if (dev->cmd_err & (OMAP_I2C_STAT_AL | OMAP_I2C_STAT_ROVR |
 			    OMAP_I2C_STAT_XUDF)) {
-		omap_i2c_init(dev);
+		omap_i2c_reset(dev);
+		__omap_i2c_init(dev);
 		return -EIO;
 	}
 
@@ -642,13 +654,14 @@
 			break;
 	}
 
-	if (dev->set_mpu_wkup_lat != NULL)
-		dev->set_mpu_wkup_lat(dev->dev, -1);
-
 	if (r == 0)
 		r = num;
 
 	omap_i2c_wait_for_bb(dev);
+
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, -1);
+
 out:
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
@@ -1025,9 +1038,7 @@
 #ifdef CONFIG_OF
 static struct omap_i2c_bus_platform_data omap3_pdata = {
 	.rev = OMAP_I2C_IP_VERSION_1,
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_i2c_bus_platform_data omap4_pdata = {
@@ -1048,6 +1059,16 @@
 MODULE_DEVICE_TABLE(of, omap_i2c_of_match);
 #endif
 
+#define OMAP_I2C_SCHEME(rev)		((rev & 0xc000) >> 14)
+
+#define OMAP_I2C_REV_SCHEME_0_MAJOR(rev) (rev >> 4)
+#define OMAP_I2C_REV_SCHEME_0_MINOR(rev) (rev & 0xf)
+
+#define OMAP_I2C_REV_SCHEME_1_MAJOR(rev) ((rev & 0x0700) >> 7)
+#define OMAP_I2C_REV_SCHEME_1_MINOR(rev) (rev & 0x1f)
+#define OMAP_I2C_SCHEME_0		0
+#define OMAP_I2C_SCHEME_1		1
+
 static int __devinit
 omap_i2c_probe(struct platform_device *pdev)
 {
@@ -1060,6 +1081,8 @@
 	const struct of_device_id *match;
 	int irq;
 	int r;
+	u32 rev;
+	u16 minor, major, scheme;
 
 	/* NOTE: driver uses the static register mapping */
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1091,7 +1114,6 @@
 		u32 freq = 100000; /* default to 100000 Hz */
 
 		pdata = match->data;
-		dev->dtrev = pdata->rev;
 		dev->flags = pdata->flags;
 
 		of_property_read_u32(node, "clock-frequency", &freq);
@@ -1101,7 +1123,16 @@
 		dev->speed = pdata->clkrate;
 		dev->flags = pdata->flags;
 		dev->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat;
-		dev->dtrev = pdata->rev;
+	}
+
+	dev->pins = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(dev->pins)) {
+		if (PTR_ERR(dev->pins) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		dev_warn(&pdev->dev, "did not get pins for i2c error: %li\n",
+			 PTR_ERR(dev->pins));
+		dev->pins = NULL;
 	}
 
 	dev->dev = &pdev->dev;
@@ -1114,11 +1145,6 @@
 
 	dev->reg_shift = (dev->flags >> OMAP_I2C_FLAG_BUS_SHIFT__SHIFT) & 3;
 
-	if (dev->dtrev == OMAP_I2C_IP_VERSION_2)
-		dev->regs = (u8 *)reg_map_ip_v2;
-	else
-		dev->regs = (u8 *)reg_map_ip_v1;
-
 	pm_runtime_enable(dev->dev);
 	pm_runtime_set_autosuspend_delay(dev->dev, OMAP_I2C_PM_TIMEOUT);
 	pm_runtime_use_autosuspend(dev->dev);
@@ -1127,11 +1153,37 @@
 	if (IS_ERR_VALUE(r))
 		goto err_free_mem;
 
-	dev->rev = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG) & 0xff;
+	/*
+	 * Read the Rev hi bit-[15:14] ie scheme this is 1 indicates ver2.
+	 * On omap1/3/2 Offset 4 is IE Reg the bit [15:14] is 0 at reset.
+	 * Also since the omap_i2c_read_reg uses reg_map_ip_* a
+	 * raw_readw is done.
+	 */
+	rev = __raw_readw(dev->base + 0x04);
+
+	scheme = OMAP_I2C_SCHEME(rev);
+	switch (scheme) {
+	case OMAP_I2C_SCHEME_0:
+		dev->regs = (u8 *)reg_map_ip_v1;
+		dev->rev = omap_i2c_read_reg(dev, OMAP_I2C_REV_REG);
+		minor = OMAP_I2C_REV_SCHEME_0_MAJOR(dev->rev);
+		major = OMAP_I2C_REV_SCHEME_0_MAJOR(dev->rev);
+		break;
+	case OMAP_I2C_SCHEME_1:
+		/* FALLTHROUGH */
+	default:
+		dev->regs = (u8 *)reg_map_ip_v2;
+		rev = (rev << 16) |
+			omap_i2c_read_reg(dev, OMAP_I2C_IP_V2_REVNB_LO);
+		minor = OMAP_I2C_REV_SCHEME_1_MINOR(rev);
+		major = OMAP_I2C_REV_SCHEME_1_MAJOR(rev);
+		dev->rev = rev;
+	}
 
 	dev->errata = 0;
 
-	if (dev->flags & OMAP_I2C_FLAG_APPLY_ERRATA_I207)
+	if (dev->rev >= OMAP_I2C_REV_ON_2430 &&
+			dev->rev < OMAP_I2C_REV_ON_4430_PLUS)
 		dev->errata |= I2C_OMAP_ERRATA_I207;
 
 	if (dev->rev <= OMAP_I2C_REV_ON_3430_3530)
@@ -1152,7 +1204,7 @@
 
 		dev->fifo_size = (dev->fifo_size / 2);
 
-		if (dev->rev < OMAP_I2C_REV_ON_3630_4430)
+		if (dev->rev < OMAP_I2C_REV_ON_3630)
 			dev->b_hw = 1; /* Enable hardware fixes */
 
 		/* calculate wakeup latency constraint for MPU */
@@ -1195,8 +1247,8 @@
 		goto err_unuse_clocks;
 	}
 
-	dev_info(dev->dev, "bus %d rev%d.%d.%d at %d kHz\n", adap->nr,
-		 dev->dtrev, dev->rev >> 4, dev->rev & 0xf, dev->speed);
+	dev_info(dev->dev, "bus %d rev%d.%d at %d kHz\n", adap->nr,
+		 major, minor, dev->speed);
 
 	of_i2c_register_devices(adap);
 
@@ -1239,14 +1291,13 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_i2c_dev *_dev = platform_get_drvdata(pdev);
-	u16 iv;
 
 	_dev->iestate = omap_i2c_read_reg(_dev, OMAP_I2C_IE_REG);
 
 	omap_i2c_write_reg(_dev, OMAP_I2C_IE_REG, 0);
 
 	if (_dev->rev < OMAP_I2C_OMAP1_REV_2) {
-		iv = omap_i2c_read_reg(_dev, OMAP_I2C_IV_REG); /* Read clears */
+		omap_i2c_read_reg(_dev, OMAP_I2C_IV_REG); /* Read clears */
 	} else {
 		omap_i2c_write_reg(_dev, OMAP_I2C_STAT_REG, _dev->iestate);
 
@@ -1262,23 +1313,10 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_i2c_dev *_dev = platform_get_drvdata(pdev);
 
-	if (_dev->flags & OMAP_I2C_FLAG_RESET_REGS_POSTIDLE) {
-		omap_i2c_write_reg(_dev, OMAP_I2C_CON_REG, 0);
-		omap_i2c_write_reg(_dev, OMAP_I2C_PSC_REG, _dev->pscstate);
-		omap_i2c_write_reg(_dev, OMAP_I2C_SCLL_REG, _dev->scllstate);
-		omap_i2c_write_reg(_dev, OMAP_I2C_SCLH_REG, _dev->sclhstate);
-		omap_i2c_write_reg(_dev, OMAP_I2C_BUF_REG, _dev->bufstate);
-		omap_i2c_write_reg(_dev, OMAP_I2C_SYSC_REG, _dev->syscstate);
-		omap_i2c_write_reg(_dev, OMAP_I2C_WE_REG, _dev->westate);
-		omap_i2c_write_reg(_dev, OMAP_I2C_CON_REG, OMAP_I2C_CON_EN);
-	}
+	if (!_dev->regs)
+		return 0;
 
-	/*
-	 * Don't write to this register if the IE state is 0 as it can
-	 * cause deadlock.
-	 */
-	if (_dev->iestate)
-		omap_i2c_write_reg(_dev, OMAP_I2C_IE_REG, _dev->iestate);
+	__omap_i2c_init(_dev);
 
 	return 0;
 }

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 8bbd6ec..f7216ed 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c

@@ -204,9 +204,8 @@
 			 */
 			pci_write_config_byte(PIIX4_dev, SMBHSTCFG,
 					      temp | 1);
-			dev_printk(KERN_NOTICE, &PIIX4_dev->dev,
-				"WARNING: SMBus interface has been "
-				"FORCEFULLY ENABLED!\n");
+			dev_notice(&PIIX4_dev->dev,
+				   "WARNING: SMBus interface has been FORCEFULLY ENABLED!\n");
 		} else {
 			dev_err(&PIIX4_dev->dev,
 				"Host SMBus controller not enabled!\n");

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index f9399d1..72a8071 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c

@@ -642,7 +642,7 @@
 	if (ret < 0)
 		return ret;
 
-	priv->io = devm_ioremap(dev, res->start, resource_size(res));
+	priv->io = devm_request_and_ioremap(dev, res);
 	if (!priv->io) {
 		dev_err(dev, "cannot ioremap\n");
 		return -ENODEV;
@@ -693,7 +693,7 @@
 	return 0;
 }
 
-static struct platform_driver rcar_i2c_drv = {
+static struct platform_driver rcar_i2c_driver = {
 	.driver	= {
 		.name	= "i2c-rcar",
 		.owner	= THIS_MODULE,
@@ -702,7 +702,7 @@
 	.remove		= __devexit_p(rcar_i2c_remove),
 };
 
-module_platform_driver(rcar_i2c_drv);
+module_platform_driver(rcar_i2c_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Renesas R-Car I2C bus driver");

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index b33d95e..a290d08 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c

@@ -38,6 +38,7 @@
 #include <linux/io.h>
 #include <linux/of_i2c.h>
 #include <linux/of_gpio.h>
+#include <linux/pinctrl/consumer.h>
 
 #include <asm/irq.h>
 
@@ -49,6 +50,9 @@
 #define QUIRK_HDMIPHY		(1 << 1)
 #define QUIRK_NO_GPIO		(1 << 2)
 
+/* Max time to wait for bus to become idle after a xfer (in us) */
+#define S3C2410_IDLE_TIMEOUT	5000
+
 /* i2c controller state */
 enum s3c24xx_i2c_state {
 	STATE_IDLE,
@@ -59,7 +63,6 @@
 };
 
 struct s3c24xx_i2c {
-	spinlock_t		lock;
 	wait_queue_head_t	wait;
 	unsigned int            quirks;
 	unsigned int		suspended:1;
@@ -78,11 +81,11 @@
 	void __iomem		*regs;
 	struct clk		*clk;
 	struct device		*dev;
-	struct resource		*ioarea;
 	struct i2c_adapter	adap;
 
 	struct s3c2410_platform_i2c	*pdata;
 	int			gpios[2];
+	struct pinctrl          *pctrl;
 #ifdef CONFIG_CPU_FREQ
 	struct notifier_block	freq_transition;
 #endif
@@ -235,8 +238,47 @@
 
 	dev_dbg(i2c->dev, "STOP\n");
 
-	/* stop the transfer */
-	iicstat &= ~S3C2410_IICSTAT_START;
+	/*
+	 * The datasheet says that the STOP sequence should be:
+	 *  1) I2CSTAT.5 = 0	- Clear BUSY (or 'generate STOP')
+	 *  2) I2CCON.4 = 0	- Clear IRQPEND
+	 *  3) Wait until the stop condition takes effect.
+	 *  4*) I2CSTAT.4 = 0	- Clear TXRXEN
+	 *
+	 * Where, step "4*" is only for buses with the "HDMIPHY" quirk.
+	 *
+	 * However, after much experimentation, it appears that:
+	 * a) normal buses automatically clear BUSY and transition from
+	 *    Master->Slave when they complete generating a STOP condition.
+	 *    Therefore, step (3) can be done in doxfer() by polling I2CCON.4
+	 *    after starting the STOP generation here.
+	 * b) HDMIPHY bus does neither, so there is no way to do step 3.
+	 *    There is no indication when this bus has finished generating
+	 *    STOP.
+	 *
+	 * In fact, we have found that as soon as the IRQPEND bit is cleared in
+	 * step 2, the HDMIPHY bus generates the STOP condition, and then
+	 * immediately starts transferring another data byte, even though the
+	 * bus is supposedly stopped.  This is presumably because the bus is
+	 * still in "Master" mode, and its BUSY bit is still set.
+	 *
+	 * To avoid these extra post-STOP transactions on HDMI phy devices, we
+	 * just disable Serial Output on the bus (I2CSTAT.4 = 0) directly,
+	 * instead of first generating a proper STOP condition.  This should
+	 * float SDA & SCK terminating the transfer.  Subsequent transfers
+	 *  start with a proper START condition, and proceed normally.
+	 *
+	 * The HDMIPHY bus is an internal bus that always has exactly two
+	 * devices, the host as Master and the HDMIPHY device as the slave.
+	 * Skipping the STOP condition has been tested on this bus and works.
+	 */
+	if (i2c->quirks & QUIRK_HDMIPHY) {
+		/* Stop driving the I2C pins */
+		iicstat &= ~S3C2410_IICSTAT_TXRXEN;
+	} else {
+		/* stop the transfer */
+		iicstat &= ~S3C2410_IICSTAT_START;
+	}
 	writel(iicstat, i2c->regs + S3C2410_IICSTAT);
 
 	i2c->state = STATE_STOP;
@@ -490,13 +532,6 @@
 	unsigned long iicstat;
 	int timeout = 400;
 
-	/* the timeout for HDMIPHY is reduced to 10 ms because
-	 * the hangup is expected to happen, so waiting 400 ms
-	 * causes only unnecessary system hangup
-	 */
-	if (i2c->quirks & QUIRK_HDMIPHY)
-		timeout = 10;
-
 	while (timeout-- > 0) {
 		iicstat = readl(i2c->regs + S3C2410_IICSTAT);
 
@@ -506,16 +541,61 @@
 		msleep(1);
 	}
 
-	/* hang-up of bus dedicated for HDMIPHY occurred, resetting */
-	if (i2c->quirks & QUIRK_HDMIPHY) {
-		writel(0, i2c->regs + S3C2410_IICCON);
-		writel(0, i2c->regs + S3C2410_IICSTAT);
-		writel(0, i2c->regs + S3C2410_IICDS);
+	return -ETIMEDOUT;
+}
 
-		return 0;
+/* s3c24xx_i2c_wait_idle
+ *
+ * wait for the i2c bus to become idle.
+*/
+
+static void s3c24xx_i2c_wait_idle(struct s3c24xx_i2c *i2c)
+{
+	unsigned long iicstat;
+	ktime_t start, now;
+	unsigned long delay;
+	int spins;
+
+	/* ensure the stop has been through the bus */
+
+	dev_dbg(i2c->dev, "waiting for bus idle\n");
+
+	start = now = ktime_get();
+
+	/*
+	 * Most of the time, the bus is already idle within a few usec of the
+	 * end of a transaction.  However, really slow i2c devices can stretch
+	 * the clock, delaying STOP generation.
+	 *
+	 * On slower SoCs this typically happens within a very small number of
+	 * instructions so busy wait briefly to avoid scheduling overhead.
+	 */
+	spins = 3;
+	iicstat = readl(i2c->regs + S3C2410_IICSTAT);
+	while ((iicstat & S3C2410_IICSTAT_START) && --spins) {
+		cpu_relax();
+		iicstat = readl(i2c->regs + S3C2410_IICSTAT);
 	}
 
-	return -ETIMEDOUT;
+	/*
+	 * If we do get an appreciable delay as a compromise between idle
+	 * detection latency for the normal, fast case, and system load in the
+	 * slow device case, use an exponential back off in the polling loop,
+	 * up to 1/10th of the total timeout, then continue to poll at a
+	 * constant rate up to the timeout.
+	 */
+	delay = 1;
+	while ((iicstat & S3C2410_IICSTAT_START) &&
+	       ktime_us_delta(now, start) < S3C2410_IDLE_TIMEOUT) {
+		usleep_range(delay, 2 * delay);
+		if (delay < S3C2410_IDLE_TIMEOUT / 10)
+			delay <<= 1;
+		now = ktime_get();
+		iicstat = readl(i2c->regs + S3C2410_IICSTAT);
+	}
+
+	if (iicstat & S3C2410_IICSTAT_START)
+		dev_warn(i2c->dev, "timeout waiting for bus idle\n");
 }
 
 /* s3c24xx_i2c_doxfer
@@ -526,8 +606,7 @@
 static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c,
 			      struct i2c_msg *msgs, int num)
 {
-	unsigned long iicstat, timeout;
-	int spins = 20;
+	unsigned long timeout;
 	int ret;
 
 	if (i2c->suspended)
@@ -540,8 +619,6 @@
 		goto out;
 	}
 
-	spin_lock_irq(&i2c->lock);
-
 	i2c->msg     = msgs;
 	i2c->msg_num = num;
 	i2c->msg_ptr = 0;
@@ -550,7 +627,6 @@
 
 	s3c24xx_i2c_enable_irq(i2c);
 	s3c24xx_i2c_message_start(i2c, msgs);
-	spin_unlock_irq(&i2c->lock);
 
 	timeout = wait_event_timeout(i2c->wait, i2c->msg_num == 0, HZ * 5);
 
@@ -564,24 +640,11 @@
 	else if (ret != num)
 		dev_dbg(i2c->dev, "incomplete xfer (%d)\n", ret);
 
-	/* ensure the stop has been through the bus */
+	/* For QUIRK_HDMIPHY, bus is already disabled */
+	if (i2c->quirks & QUIRK_HDMIPHY)
+		goto out;
 
-	dev_dbg(i2c->dev, "waiting for bus idle\n");
-
-	/* first, try busy waiting briefly */
-	do {
-		cpu_relax();
-		iicstat = readl(i2c->regs + S3C2410_IICSTAT);
-	} while ((iicstat & S3C2410_IICSTAT_START) && --spins);
-
-	/* if that timed out sleep */
-	if (!spins) {
-		msleep(1);
-		iicstat = readl(i2c->regs + S3C2410_IICSTAT);
-	}
-
-	if (iicstat & S3C2410_IICSTAT_START)
-		dev_warn(i2c->dev, "timeout waiting for bus idle\n");
+	s3c24xx_i2c_wait_idle(i2c);
 
  out:
 	return ret;
@@ -740,7 +803,6 @@
 					  unsigned long val, void *data)
 {
 	struct s3c24xx_i2c *i2c = freq_to_i2c(nb);
-	unsigned long flags;
 	unsigned int got;
 	int delta_f;
 	int ret;
@@ -754,9 +816,9 @@
 
 	if ((val == CPUFREQ_POSTCHANGE && delta_f < 0) ||
 	    (val == CPUFREQ_PRECHANGE && delta_f > 0)) {
-		spin_lock_irqsave(&i2c->lock, flags);
+		i2c_lock_adapter(&i2c->adap);
 		ret = s3c24xx_i2c_clockrate(i2c, &got);
-		spin_unlock_irqrestore(&i2c->lock, flags);
+		i2c_unlock_adapter(&i2c->adap);
 
 		if (ret < 0)
 			dev_err(i2c->dev, "cannot find frequency\n");
@@ -858,14 +920,6 @@
 
 	pdata = i2c->pdata;
 
-	/* inititalise the gpio */
-
-	if (pdata->cfg_gpio)
-		pdata->cfg_gpio(to_platform_device(i2c->dev));
-	else
-		if (s3c24xx_i2c_parse_dt_gpio(i2c))
-			return -EINVAL;
-
 	/* write slave address */
 
 	writeb(pdata->slave_addr, i2c->regs + S3C2410_IICADD);
@@ -963,7 +1017,6 @@
 	i2c->adap.class   = I2C_CLASS_HWMON | I2C_CLASS_SPD;
 	i2c->tx_setup     = 50;
 
-	spin_lock_init(&i2c->lock);
 	init_waitqueue_head(&i2c->wait);
 
 	/* find the clock and enable it */
@@ -989,36 +1042,38 @@
 		goto err_clk;
 	}
 
-	i2c->ioarea = request_mem_region(res->start, resource_size(res),
-					 pdev->name);
-
-	if (i2c->ioarea == NULL) {
-		dev_err(&pdev->dev, "cannot request IO\n");
-		ret = -ENXIO;
-		goto err_clk;
-	}
-
-	i2c->regs = ioremap(res->start, resource_size(res));
+	i2c->regs = devm_request_and_ioremap(&pdev->dev, res);
 
 	if (i2c->regs == NULL) {
 		dev_err(&pdev->dev, "cannot map IO\n");
 		ret = -ENXIO;
-		goto err_ioarea;
+		goto err_clk;
 	}
 
-	dev_dbg(&pdev->dev, "registers %p (%p, %p)\n",
-		i2c->regs, i2c->ioarea, res);
+	dev_dbg(&pdev->dev, "registers %p (%p)\n",
+		i2c->regs, res);
 
 	/* setup info block for the i2c core */
 
 	i2c->adap.algo_data = i2c;
 	i2c->adap.dev.parent = &pdev->dev;
 
+	i2c->pctrl = devm_pinctrl_get_select_default(i2c->dev);
+
+	/* inititalise the i2c gpio lines */
+
+	if (i2c->pdata->cfg_gpio) {
+		i2c->pdata->cfg_gpio(to_platform_device(i2c->dev));
+	} else if (IS_ERR(i2c->pctrl) && s3c24xx_i2c_parse_dt_gpio(i2c)) {
+		ret = -EINVAL;
+		goto err_clk;
+	}
+
 	/* initialise the i2c controller */
 
 	ret = s3c24xx_i2c_init(i2c);
 	if (ret != 0)
-		goto err_iomap;
+		goto err_clk;
 
 	/* find the IRQ for this unit (note, this relies on the init call to
 	 * ensure no current IRQs pending
@@ -1027,7 +1082,7 @@
 	i2c->irq = ret = platform_get_irq(pdev, 0);
 	if (ret <= 0) {
 		dev_err(&pdev->dev, "cannot find IRQ\n");
-		goto err_iomap;
+		goto err_clk;
 	}
 
 	ret = request_irq(i2c->irq, s3c24xx_i2c_irq, 0,
@@ -1035,7 +1090,7 @@
 
 	if (ret != 0) {
 		dev_err(&pdev->dev, "cannot claim IRQ %d\n", i2c->irq);
-		goto err_iomap;
+		goto err_clk;
 	}
 
 	ret = s3c24xx_i2c_register_cpufreq(i2c);
@@ -1075,13 +1130,6 @@
  err_irq:
 	free_irq(i2c->irq, i2c);
 
- err_iomap:
-	iounmap(i2c->regs);
-
- err_ioarea:
-	release_resource(i2c->ioarea);
-	kfree(i2c->ioarea);
-
  err_clk:
 	clk_disable_unprepare(i2c->clk);
 	clk_put(i2c->clk);
@@ -1110,16 +1158,13 @@
 	clk_disable_unprepare(i2c->clk);
 	clk_put(i2c->clk);
 
-	iounmap(i2c->regs);
-
-	release_resource(i2c->ioarea);
-	s3c24xx_i2c_dt_gpio_free(i2c);
-	kfree(i2c->ioarea);
+	if (pdev->dev.of_node && IS_ERR(i2c->pctrl))
+		s3c24xx_i2c_dt_gpio_free(i2c);
 
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int s3c24xx_i2c_suspend_noirq(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -1142,10 +1187,14 @@
 
 	return 0;
 }
+#endif
 
+#ifdef CONFIG_PM
 static const struct dev_pm_ops s3c24xx_i2c_dev_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
 	.suspend_noirq = s3c24xx_i2c_suspend_noirq,
 	.resume = s3c24xx_i2c_resume,
+#endif
 };
 
 #define S3C24XX_DEV_PM_OPS (&s3c24xx_i2c_dev_pm_ops)

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 8110ca4..9411c1b 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c

@@ -120,11 +120,12 @@
 	void __iomem *reg;
 	struct i2c_adapter adap;
 	unsigned long bus_speed;
+	unsigned int clks_per_count;
 	struct clk *clk;
 	u_int8_t icic;
-	u_int8_t iccl;
-	u_int8_t icch;
 	u_int8_t flags;
+	u_int16_t iccl;
+	u_int16_t icch;
 
 	spinlock_t lock;
 	wait_queue_head_t wait;
@@ -135,7 +136,8 @@
 
 #define IIC_FLAG_HAS_ICIC67	(1 << 0)
 
-#define NORMAL_SPEED		100000 /* FAST_SPEED 400000 */
+#define STANDARD_MODE		100000
+#define FAST_MODE		400000
 
 /* Register offsets */
 #define ICDR			0x00
@@ -187,58 +189,91 @@
 	iic_wr(pd, offs, (iic_rd(pd, offs) | set) & ~clr);
 }
 
+static u32 sh_mobile_i2c_iccl(unsigned long count_khz, u32 tLOW, u32 tf, int offset)
+{
+	/*
+	 * Conditional expression:
+	 *   ICCL >= COUNT_CLK * (tLOW + tf)
+	 *
+	 * SH-Mobile IIC hardware starts counting the LOW period of
+	 * the SCL signal (tLOW) as soon as it pulls the SCL line.
+	 * In order to meet the tLOW timing spec, we need to take into
+	 * account the fall time of SCL signal (tf).  Default tf value
+	 * should be 0.3 us, for safety.
+	 */
+	return (((count_khz * (tLOW + tf)) + 5000) / 10000) + offset;
+}
+
+static u32 sh_mobile_i2c_icch(unsigned long count_khz, u32 tHIGH, u32 tf, int offset)
+{
+	/*
+	 * Conditional expression:
+	 *   ICCH >= COUNT_CLK * (tHIGH + tf)
+	 *
+	 * SH-Mobile IIC hardware is aware of SCL transition period 'tr',
+	 * and can ignore it.  SH-Mobile IIC controller starts counting
+	 * the HIGH period of the SCL signal (tHIGH) after the SCL input
+	 * voltage increases at VIH.
+	 *
+	 * Afterward it turned out calculating ICCH using only tHIGH spec
+	 * will result in violation of the tHD;STA timing spec.  We need
+	 * to take into account the fall time of SDA signal (tf) at START
+	 * condition, in order to meet both tHIGH and tHD;STA specs.
+	 */
+	return (((count_khz * (tHIGH + tf)) + 5000) / 10000) + offset;
+}
+
+static void sh_mobile_i2c_init(struct sh_mobile_i2c_data *pd)
+{
+	unsigned long i2c_clk_khz;
+	u32 tHIGH, tLOW, tf;
+	int offset;
+
+	/* Get clock rate after clock is enabled */
+	clk_enable(pd->clk);
+	i2c_clk_khz = clk_get_rate(pd->clk) / 1000;
+	i2c_clk_khz /= pd->clks_per_count;
+
+	if (pd->bus_speed == STANDARD_MODE) {
+		tLOW	= 47;	/* tLOW = 4.7 us */
+		tHIGH	= 40;	/* tHD;STA = tHIGH = 4.0 us */
+		tf	= 3;	/* tf = 0.3 us */
+		offset	= 0;	/* No offset */
+	} else if (pd->bus_speed == FAST_MODE) {
+		tLOW	= 13;	/* tLOW = 1.3 us */
+		tHIGH	= 6;	/* tHD;STA = tHIGH = 0.6 us */
+		tf	= 3;	/* tf = 0.3 us */
+		offset	= 0;	/* No offset */
+	} else {
+		dev_err(pd->dev, "unrecognized bus speed %lu Hz\n",
+			pd->bus_speed);
+		goto out;
+	}
+
+	pd->iccl = sh_mobile_i2c_iccl(i2c_clk_khz, tLOW, tf, offset);
+	/* one more bit of ICCL in ICIC */
+	if ((pd->iccl > 0xff) && (pd->flags & IIC_FLAG_HAS_ICIC67))
+		pd->icic |= ICIC_ICCLB8;
+	else
+		pd->icic &= ~ICIC_ICCLB8;
+
+	pd->icch = sh_mobile_i2c_icch(i2c_clk_khz, tHIGH, tf, offset);
+	/* one more bit of ICCH in ICIC */
+	if ((pd->icch > 0xff) && (pd->flags & IIC_FLAG_HAS_ICIC67))
+		pd->icic |= ICIC_ICCHB8;
+	else
+		pd->icic &= ~ICIC_ICCHB8;
+
+out:
+	clk_disable(pd->clk);
+}
+
 static void activate_ch(struct sh_mobile_i2c_data *pd)
 {
-	unsigned long i2c_clk;
-	u_int32_t num;
-	u_int32_t denom;
-	u_int32_t tmp;
-
 	/* Wake up device and enable clock */
 	pm_runtime_get_sync(pd->dev);
 	clk_enable(pd->clk);
 
-	/* Get clock rate after clock is enabled */
-	i2c_clk = clk_get_rate(pd->clk);
-
-	/* Calculate the value for iccl. From the data sheet:
-	 * iccl = (p clock / transfer rate) * (L / (L + H))
-	 * where L and H are the SCL low/high ratio (5/4 in this case).
-	 * We also round off the result.
-	 */
-	num = i2c_clk * 5;
-	denom = pd->bus_speed * 9;
-	tmp = num * 10 / denom;
-	if (tmp % 10 >= 5)
-		pd->iccl = (u_int8_t)((num/denom) + 1);
-	else
-		pd->iccl = (u_int8_t)(num/denom);
-
-	/* one more bit of ICCL in ICIC */
-	if (pd->flags & IIC_FLAG_HAS_ICIC67) {
-		if ((num/denom) > 0xff)
-			pd->icic |= ICIC_ICCLB8;
-		else
-			pd->icic &= ~ICIC_ICCLB8;
-	}
-
-	/* Calculate the value for icch. From the data sheet:
-	   icch = (p clock / transfer rate) * (H / (L + H)) */
-	num = i2c_clk * 4;
-	tmp = num * 10 / denom;
-	if (tmp % 10 >= 5)
-		pd->icch = (u_int8_t)((num/denom) + 1);
-	else
-		pd->icch = (u_int8_t)(num/denom);
-
-	/* one more bit of ICCH in ICIC */
-	if (pd->flags & IIC_FLAG_HAS_ICIC67) {
-		if ((num/denom) > 0xff)
-			pd->icic |= ICIC_ICCHB8;
-		else
-			pd->icic &= ~ICIC_ICCHB8;
-	}
-
 	/* Enable channel and configure rx ack */
 	iic_set_clr(pd, ICCR, ICCR_ICE, 0);
 
@@ -246,8 +281,8 @@
 	iic_wr(pd, ICIC, 0);
 
 	/* Set the clock */
-	iic_wr(pd, ICCL, pd->iccl);
-	iic_wr(pd, ICCH, pd->icch);
+	iic_wr(pd, ICCL, pd->iccl & 0xff);
+	iic_wr(pd, ICCH, pd->icch & 0xff);
 }
 
 static void deactivate_ch(struct sh_mobile_i2c_data *pd)
@@ -434,6 +469,9 @@
 		wake_up(&pd->wait);
 	}
 
+	/* defeat write posting to avoid spurious WAIT interrupts */
+	iic_rd(pd, ICSR);
+
 	return IRQ_HANDLED;
 }
 
@@ -451,8 +489,8 @@
 	iic_set_clr(pd, ICCR, ICCR_ICE, 0);
 
 	/* Set the clock */
-	iic_wr(pd, ICCL, pd->iccl);
-	iic_wr(pd, ICCH, pd->icch);
+	iic_wr(pd, ICCL, pd->iccl & 0xff);
+	iic_wr(pd, ICCH, pd->icch & 0xff);
 
 	pd->msg = usr_msg;
 	pd->pos = -1;
@@ -621,10 +659,13 @@
 		goto err_irq;
 	}
 
-	/* Use platformd data bus speed or NORMAL_SPEED */
-	pd->bus_speed = NORMAL_SPEED;
+	/* Use platform data bus speed or STANDARD_MODE */
+	pd->bus_speed = STANDARD_MODE;
 	if (pdata && pdata->bus_speed)
 		pd->bus_speed = pdata->bus_speed;
+	pd->clks_per_count = 1;
+	if (pdata && pdata->clks_per_count)
+		pd->clks_per_count = pdata->clks_per_count;
 
 	/* The IIC blocks on SH-Mobile ARM processors
 	 * come with two new bits in ICIC.
@@ -632,6 +673,8 @@
 	if (size > 0x17)
 		pd->flags |= IIC_FLAG_HAS_ICIC67;
 
+	sh_mobile_i2c_init(pd);
+
 	/* Enable Runtime PM for this device.
 	 *
 	 * Also tell the Runtime PM core to ignore children
@@ -667,8 +710,9 @@
 		goto err_all;
 	}
 
-	dev_info(&dev->dev, "I2C adapter %d with bus speed %lu Hz\n",
-		 adap->nr, pd->bus_speed);
+	dev_info(&dev->dev,
+		 "I2C adapter %d with bus speed %lu Hz (L/H=%x/%x)\n",
+		 adap->nr, pd->bus_speed, pd->iccl, pd->icch);
 
 	of_i2c_register_devices(adap);
 	return 0;

diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c
new file mode 100644
index 0000000..f5fa20d
--- /dev/null
+++ b/drivers/i2c/busses/i2c-viperboard.c

@@ -0,0 +1,480 @@
+/*
+ *  Nano River Technologies viperboard i2c master driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/i2c.h>
+
+#include <linux/mfd/viperboard.h>
+
+struct vprbrd_i2c {
+	struct i2c_adapter i2c;
+	u8 bus_freq_param;
+};
+
+/* i2c bus frequency module parameter */
+static u8 i2c_bus_param;
+static unsigned int i2c_bus_freq = 100;
+module_param(i2c_bus_freq, int, 0);
+MODULE_PARM_DESC(i2c_bus_freq,
+	"i2c bus frequency in khz (default is 100) valid values: 10, 100, 200, 400, 1000, 3000, 6000");
+
+static int vprbrd_i2c_status(struct i2c_adapter *i2c,
+	struct vprbrd_i2c_status *status, bool prev_error)
+{
+	u16 bytes_xfer;
+	int ret;
+	struct vprbrd *vb = (struct vprbrd *)i2c->algo_data;
+
+	/* check for protocol error */
+	bytes_xfer = sizeof(struct vprbrd_i2c_status);
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_I2C, VPRBRD_USB_TYPE_IN, 0x0000, 0x0000,
+		status, bytes_xfer, VPRBRD_USB_TIMEOUT_MS);
+
+	if (ret != bytes_xfer)
+		prev_error = true;
+
+	if (prev_error) {
+		dev_err(&i2c->dev, "failure in usb communication\n");
+		return -EREMOTEIO;
+	}
+
+	dev_dbg(&i2c->dev, "  status = %d\n", status->status);
+	if (status->status != 0x00) {
+		dev_err(&i2c->dev, "failure: i2c protocol error\n");
+		return -EPROTO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_receive(struct usb_device *usb_dev,
+	struct vprbrd_i2c_read_msg *rmsg, int bytes_xfer)
+{
+	int ret, bytes_actual;
+	int error = 0;
+
+	/* send the read request */
+	ret = usb_bulk_msg(usb_dev,
+		usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), rmsg,
+		sizeof(struct vprbrd_i2c_read_hdr), &bytes_actual,
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0)
+		|| (bytes_actual != sizeof(struct vprbrd_i2c_read_hdr))) {
+		dev_err(&usb_dev->dev, "failure transmitting usb\n");
+		error = -EREMOTEIO;
+	}
+
+	/* read the actual data */
+	ret = usb_bulk_msg(usb_dev,
+		usb_rcvbulkpipe(usb_dev, VPRBRD_EP_IN), rmsg,
+		bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0) || (bytes_xfer != bytes_actual)) {
+		dev_err(&usb_dev->dev, "failure receiving usb\n");
+		error = -EREMOTEIO;
+	}
+	return error;
+}
+
+static int vprbrd_i2c_addr(struct usb_device *usb_dev,
+	struct vprbrd_i2c_addr_msg *amsg)
+{
+	int ret, bytes_actual;
+
+	ret = usb_bulk_msg(usb_dev,
+		usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), amsg,
+		sizeof(struct vprbrd_i2c_addr_msg), &bytes_actual,
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0) ||
+			(sizeof(struct vprbrd_i2c_addr_msg) != bytes_actual)) {
+		dev_err(&usb_dev->dev, "failure transmitting usb\n");
+		return -EREMOTEIO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_read(struct vprbrd *vb, struct i2c_msg *msg)
+{
+	int ret;
+	u16 remain_len, bytes_xfer, len1, len2,
+		start = 0x0000;
+	struct vprbrd_i2c_read_msg *rmsg =
+		(struct vprbrd_i2c_read_msg *)vb->buf;
+
+	remain_len = msg->len;
+	rmsg->header.cmd = VPRBRD_I2C_CMD_READ;
+	while (remain_len > 0) {
+		rmsg->header.addr = cpu_to_le16(start + 0x4000);
+		if (remain_len <= 255) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len;
+			rmsg->header.len1 = 0x00;
+			rmsg->header.len2 = 0x00;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 510) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len - 255;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0x00;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 512) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len - 510;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 767) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 512;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			bytes_xfer = remain_len;
+			remain_len = 0;
+		} else if (remain_len <= 1022) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 767;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 1024) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 1022;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0xff;
+			remain_len = 0;
+		} else {
+			len1 = 512;
+			len2 = 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = 0x02;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0xff;
+			remain_len -= 1024;
+			start += 1024;
+		}
+		rmsg->header.tf1 = cpu_to_le16(len1);
+		rmsg->header.tf2 = cpu_to_le16(len2);
+
+		/* first read transfer */
+		ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len1);
+		if (ret < 0)
+			return ret;
+		/* copy the received data */
+		memcpy(msg->buf + start, rmsg, len1);
+
+		/* second read transfer if neccessary */
+		if (len2 > 0) {
+			ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len2);
+			if (ret < 0)
+				return ret;
+			/* copy the received data */
+			memcpy(msg->buf + start + 512, rmsg, len2);
+		}
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_write(struct vprbrd *vb, struct i2c_msg *msg)
+{
+	int ret, bytes_actual;
+	u16 remain_len, bytes_xfer,
+		start = 0x0000;
+	struct vprbrd_i2c_write_msg *wmsg =
+		(struct vprbrd_i2c_write_msg *)vb->buf;
+
+	remain_len = msg->len;
+	wmsg->header.cmd = VPRBRD_I2C_CMD_WRITE;
+	wmsg->header.last = 0x00;
+	wmsg->header.chan = 0x00;
+	wmsg->header.spi = 0x0000;
+	while (remain_len > 0) {
+		wmsg->header.addr = cpu_to_le16(start + 0x4000);
+		if (remain_len > 503) {
+			wmsg->header.len1 = 0xff;
+			wmsg->header.len2 = 0xf8;
+			remain_len -= 503;
+			bytes_xfer = 503 + sizeof(struct vprbrd_i2c_write_hdr);
+			start += 503;
+		} else if (remain_len > 255) {
+			wmsg->header.len1 = 0xff;
+			wmsg->header.len2 = (remain_len - 255);
+			bytes_xfer = remain_len +
+				sizeof(struct vprbrd_i2c_write_hdr);
+			remain_len = 0;
+		} else {
+			wmsg->header.len1 = remain_len;
+			wmsg->header.len2 = 0x00;
+			bytes_xfer = remain_len +
+				sizeof(struct vprbrd_i2c_write_hdr);
+			remain_len = 0;
+		}
+		memcpy(wmsg->data, msg->buf + start,
+			bytes_xfer - sizeof(struct vprbrd_i2c_write_hdr));
+
+		ret = usb_bulk_msg(vb->usb_dev,
+			usb_sndbulkpipe(vb->usb_dev,
+			VPRBRD_EP_OUT), wmsg,
+			bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS);
+		if ((ret < 0) || (bytes_xfer != bytes_actual))
+			return -EREMOTEIO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs,
+		int num)
+{
+	struct i2c_msg *pmsg;
+	int i, ret,
+		error = 0;
+	struct vprbrd *vb = (struct vprbrd *)i2c->algo_data;
+	struct vprbrd_i2c_addr_msg *amsg =
+		(struct vprbrd_i2c_addr_msg *)vb->buf;
+	struct vprbrd_i2c_status *smsg = (struct vprbrd_i2c_status *)vb->buf;
+
+	dev_dbg(&i2c->dev, "master xfer %d messages:\n", num);
+
+	for (i = 0 ; i < num ; i++) {
+		pmsg = &msgs[i];
+
+		dev_dbg(&i2c->dev,
+			"  %d: %s (flags %d) %d bytes to 0x%02x\n",
+			i, pmsg->flags & I2C_M_RD ? "read" : "write",
+			pmsg->flags, pmsg->len, pmsg->addr);
+
+		/* msgs longer than 2048 bytes are not supported by adapter */
+		if (pmsg->len > 2048)
+			return -EINVAL;
+
+		mutex_lock(&vb->lock);
+		/* directly send the message */
+		if (pmsg->flags & I2C_M_RD) {
+			/* read data */
+			amsg->cmd = VPRBRD_I2C_CMD_ADDR;
+			amsg->unknown2 = 0x00;
+			amsg->unknown3 = 0x00;
+			amsg->addr = pmsg->addr;
+			amsg->unknown1 = 0x01;
+			amsg->len = cpu_to_le16(pmsg->len);
+			/* send the addr and len, we're interested to board */
+			ret = vprbrd_i2c_addr(vb->usb_dev, amsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_read(vb, pmsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_status(i2c, smsg, error);
+			if (ret < 0)
+				error = ret;
+			/* in case of protocol error, return the error */
+			if (error < 0)
+				goto error;
+		} else {
+			/* write data */
+			ret = vprbrd_i2c_write(vb, pmsg);
+
+			amsg->cmd = VPRBRD_I2C_CMD_ADDR;
+			amsg->unknown2 = 0x00;
+			amsg->unknown3 = 0x00;
+			amsg->addr = pmsg->addr;
+			amsg->unknown1 = 0x00;
+			amsg->len = cpu_to_le16(pmsg->len);
+			/* send the addr, the data goes to to board */
+			ret = vprbrd_i2c_addr(vb->usb_dev, amsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_status(i2c, smsg, error);
+			if (ret < 0)
+				error = ret;
+
+			if (error < 0)
+				goto error;
+		}
+		mutex_unlock(&vb->lock);
+	}
+	return 0;
+error:
+	mutex_unlock(&vb->lock);
+	return error;
+}
+
+static u32 vprbrd_i2c_func(struct i2c_adapter *i2c)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+/* This is the actual algorithm we define */
+static const struct i2c_algorithm vprbrd_algorithm = {
+	.master_xfer	= vprbrd_i2c_xfer,
+	.functionality	= vprbrd_i2c_func,
+};
+
+static int __devinit vprbrd_i2c_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_i2c *vb_i2c;
+	int ret;
+	int pipe;
+
+	vb_i2c = kzalloc(sizeof(*vb_i2c), GFP_KERNEL);
+	if (vb_i2c == NULL)
+		return -ENOMEM;
+
+	/* setup i2c adapter description */
+	vb_i2c->i2c.owner = THIS_MODULE;
+	vb_i2c->i2c.class = I2C_CLASS_HWMON;
+	vb_i2c->i2c.algo = &vprbrd_algorithm;
+	vb_i2c->i2c.algo_data = vb;
+	/* save the param in usb capabable memory */
+	vb_i2c->bus_freq_param = i2c_bus_param;
+
+	snprintf(vb_i2c->i2c.name, sizeof(vb_i2c->i2c.name),
+		 "viperboard at bus %03d device %03d",
+		 vb->usb_dev->bus->busnum, vb->usb_dev->devnum);
+
+	/* setting the bus frequency */
+	if ((i2c_bus_param <= VPRBRD_I2C_FREQ_10KHZ)
+		&& (i2c_bus_param >= VPRBRD_I2C_FREQ_6MHZ)) {
+		pipe = usb_sndctrlpipe(vb->usb_dev, 0);
+		ret = usb_control_msg(vb->usb_dev, pipe,
+			VPRBRD_USB_REQUEST_I2C_FREQ, VPRBRD_USB_TYPE_OUT,
+			0x0000, 0x0000, &vb_i2c->bus_freq_param, 1,
+			VPRBRD_USB_TIMEOUT_MS);
+	    if (ret != 1) {
+		dev_err(&pdev->dev,
+			"failure setting i2c_bus_freq to %d\n", i2c_bus_freq);
+		ret = -EIO;
+		goto error;
+	    }
+	} else {
+		dev_err(&pdev->dev,
+			"invalid i2c_bus_freq setting:%d\n", i2c_bus_freq);
+		ret = -EIO;
+		goto error;
+	}
+
+	vb_i2c->i2c.dev.parent = &pdev->dev;
+
+	/* attach to i2c layer */
+	i2c_add_adapter(&vb_i2c->i2c);
+
+	platform_set_drvdata(pdev, vb_i2c);
+
+	return 0;
+
+error:
+	kfree(vb_i2c);
+	return ret;
+}
+
+static int __devexit vprbrd_i2c_remove(struct platform_device *pdev)
+{
+	struct vprbrd_i2c *vb_i2c = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = i2c_del_adapter(&vb_i2c->i2c);
+
+	return ret;
+}
+
+static struct platform_driver vprbrd_i2c_driver = {
+	.driver.name	= "viperboard-i2c",
+	.driver.owner	= THIS_MODULE,
+	.probe		= vprbrd_i2c_probe,
+	.remove		= __devexit_p(vprbrd_i2c_remove),
+};
+
+static int __init vprbrd_i2c_init(void)
+{
+	switch (i2c_bus_freq) {
+	case 6000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_6MHZ;
+		break;
+	case 3000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_3MHZ;
+		break;
+	case 1000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_1MHZ;
+		break;
+	case 400:
+		i2c_bus_param = VPRBRD_I2C_FREQ_400KHZ;
+		break;
+	case 200:
+		i2c_bus_param = VPRBRD_I2C_FREQ_200KHZ;
+		break;
+	case 100:
+		i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ;
+		break;
+	case 10:
+		i2c_bus_param = VPRBRD_I2C_FREQ_10KHZ;
+		break;
+	default:
+		pr_warn("invalid i2c_bus_freq (%d)\n", i2c_bus_freq);
+		i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ;
+	}
+
+	return platform_driver_register(&vprbrd_i2c_driver);
+}
+subsys_initcall(vprbrd_i2c_init);
+
+static void __exit vprbrd_i2c_exit(void)
+{
+	platform_driver_unregister(&vprbrd_i2c_driver);
+}
+module_exit(vprbrd_i2c_exit);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("I2C master driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-i2c");

diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
index 566a675..3b7bc06 100644
--- a/drivers/i2c/muxes/i2c-mux-gpio.c
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c

@@ -16,6 +16,8 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/gpio.h>
+#include <linux/of_i2c.h>
+#include <linux/of_gpio.h>
 
 struct gpiomux {
 	struct i2c_adapter *parent;
@@ -57,29 +59,110 @@
 	return !strcmp(chip->label, data);
 }
 
+#ifdef CONFIG_OF
+static int __devinit i2c_mux_gpio_probe_dt(struct gpiomux *mux,
+					struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *adapter_np, *child;
+	struct i2c_adapter *adapter;
+	unsigned *values, *gpios;
+	int i = 0;
+
+	if (!np)
+		return -ENODEV;
+
+	adapter_np = of_parse_phandle(np, "i2c-parent", 0);
+	if (!adapter_np) {
+		dev_err(&pdev->dev, "Cannot parse i2c-parent\n");
+		return -ENODEV;
+	}
+	adapter = of_find_i2c_adapter_by_node(adapter_np);
+	if (!adapter) {
+		dev_err(&pdev->dev, "Cannot find parent bus\n");
+		return -ENODEV;
+	}
+	mux->data.parent = i2c_adapter_id(adapter);
+	put_device(&adapter->dev);
+
+	mux->data.n_values = of_get_child_count(np);
+
+	values = devm_kzalloc(&pdev->dev,
+			      sizeof(*mux->data.values) * mux->data.n_values,
+			      GFP_KERNEL);
+	if (!values) {
+		dev_err(&pdev->dev, "Cannot allocate values array");
+		return -ENOMEM;
+	}
+
+	for_each_child_of_node(np, child) {
+		of_property_read_u32(child, "reg", values + i);
+		i++;
+	}
+	mux->data.values = values;
+
+	if (of_property_read_u32(np, "idle-state", &mux->data.idle))
+		mux->data.idle = I2C_MUX_GPIO_NO_IDLE;
+
+	mux->data.n_gpios = of_gpio_named_count(np, "mux-gpios");
+	if (mux->data.n_gpios < 0) {
+		dev_err(&pdev->dev, "Missing mux-gpios property in the DT.\n");
+		return -EINVAL;
+	}
+
+	gpios = devm_kzalloc(&pdev->dev,
+			     sizeof(*mux->data.gpios) * mux->data.n_gpios, GFP_KERNEL);
+	if (!gpios) {
+		dev_err(&pdev->dev, "Cannot allocate gpios array");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < mux->data.n_gpios; i++)
+		gpios[i] = of_get_named_gpio(np, "mux-gpios", i);
+
+	mux->data.gpios = gpios;
+
+	return 0;
+}
+#else
+static int __devinit i2c_mux_gpio_probe_dt(struct gpiomux *mux,
+					struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
 static int __devinit i2c_mux_gpio_probe(struct platform_device *pdev)
 {
 	struct gpiomux *mux;
-	struct i2c_mux_gpio_platform_data *pdata;
 	struct i2c_adapter *parent;
 	int (*deselect) (struct i2c_adapter *, void *, u32);
 	unsigned initial_state, gpio_base;
 	int i, ret;
 
-	pdata = pdev->dev.platform_data;
-	if (!pdata) {
-		dev_err(&pdev->dev, "Missing platform data\n");
-		return -ENODEV;
+	mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux) {
+		dev_err(&pdev->dev, "Cannot allocate gpiomux structure");
+		return -ENOMEM;
 	}
 
+	platform_set_drvdata(pdev, mux);
+
+	if (!pdev->dev.platform_data) {
+		ret = i2c_mux_gpio_probe_dt(mux, pdev);
+		if (ret < 0)
+			return ret;
+	} else
+		memcpy(&mux->data, pdev->dev.platform_data, sizeof(mux->data));
+
 	/*
 	 * If a GPIO chip name is provided, the GPIO pin numbers provided are
 	 * relative to its base GPIO number. Otherwise they are absolute.
 	 */
-	if (pdata->gpio_chip) {
+	if (mux->data.gpio_chip) {
 		struct gpio_chip *gpio;
 
-		gpio = gpiochip_find(pdata->gpio_chip,
+		gpio = gpiochip_find(mux->data.gpio_chip,
 				     match_gpio_chip_by_label);
 		if (!gpio)
 			return -EPROBE_DEFER;
@@ -89,49 +172,44 @@
 		gpio_base = 0;
 	}
 
-	parent = i2c_get_adapter(pdata->parent);
+	parent = i2c_get_adapter(mux->data.parent);
 	if (!parent) {
 		dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
-			pdata->parent);
+			mux->data.parent);
 		return -ENODEV;
 	}
 
-	mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
-	if (!mux) {
-		ret = -ENOMEM;
-		goto alloc_failed;
-	}
-
 	mux->parent = parent;
-	mux->data = *pdata;
 	mux->gpio_base = gpio_base;
+
 	mux->adap = devm_kzalloc(&pdev->dev,
-				 sizeof(*mux->adap) * pdata->n_values,
+				 sizeof(*mux->adap) * mux->data.n_values,
 				 GFP_KERNEL);
 	if (!mux->adap) {
+		dev_err(&pdev->dev, "Cannot allocate i2c_adapter structure");
 		ret = -ENOMEM;
 		goto alloc_failed;
 	}
 
-	if (pdata->idle != I2C_MUX_GPIO_NO_IDLE) {
-		initial_state = pdata->idle;
+	if (mux->data.idle != I2C_MUX_GPIO_NO_IDLE) {
+		initial_state = mux->data.idle;
 		deselect = i2c_mux_gpio_deselect;
 	} else {
-		initial_state = pdata->values[0];
+		initial_state = mux->data.values[0];
 		deselect = NULL;
 	}
 
-	for (i = 0; i < pdata->n_gpios; i++) {
-		ret = gpio_request(gpio_base + pdata->gpios[i], "i2c-mux-gpio");
+	for (i = 0; i < mux->data.n_gpios; i++) {
+		ret = gpio_request(gpio_base + mux->data.gpios[i], "i2c-mux-gpio");
 		if (ret)
 			goto err_request_gpio;
-		gpio_direction_output(gpio_base + pdata->gpios[i],
+		gpio_direction_output(gpio_base + mux->data.gpios[i],
 				      initial_state & (1 << i));
 	}
 
-	for (i = 0; i < pdata->n_values; i++) {
-		u32 nr = pdata->base_nr ? (pdata->base_nr + i) : 0;
-		unsigned int class = pdata->classes ? pdata->classes[i] : 0;
+	for (i = 0; i < mux->data.n_values; i++) {
+		u32 nr = mux->data.base_nr ? (mux->data.base_nr + i) : 0;
+		unsigned int class = mux->data.classes ? mux->data.classes[i] : 0;
 
 		mux->adap[i] = i2c_add_mux_adapter(parent, &pdev->dev, mux, nr,
 						   i, class,
@@ -144,19 +222,17 @@
 	}
 
 	dev_info(&pdev->dev, "%d port mux on %s adapter\n",
-		 pdata->n_values, parent->name);
-
-	platform_set_drvdata(pdev, mux);
+		 mux->data.n_values, parent->name);
 
 	return 0;
 
 add_adapter_failed:
 	for (; i > 0; i--)
 		i2c_del_mux_adapter(mux->adap[i - 1]);
-	i = pdata->n_gpios;
+	i = mux->data.n_gpios;
 err_request_gpio:
 	for (; i > 0; i--)
-		gpio_free(gpio_base + pdata->gpios[i - 1]);
+		gpio_free(gpio_base + mux->data.gpios[i - 1]);
 alloc_failed:
 	i2c_put_adapter(parent);
 
@@ -180,12 +256,19 @@
 	return 0;
 }
 
+static const struct of_device_id i2c_mux_gpio_of_match[] __devinitconst = {
+	{ .compatible = "i2c-mux-gpio", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, i2c_mux_gpio_of_match);
+
 static struct platform_driver i2c_mux_gpio_driver = {
 	.probe	= i2c_mux_gpio_probe,
 	.remove	= __devexit_p(i2c_mux_gpio_remove),
 	.driver	= {
 		.owner	= THIS_MODULE,
 		.name	= "i2c-mux-gpio",
+		.of_match_table = of_match_ptr(i2c_mux_gpio_of_match),
 	},
 };
 

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 961b8d0..fe822a1 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig

@@ -125,4 +125,18 @@
 	  This driver can also be built as a module. If so, the module will be
 	  called ti-adc081c.
 
+config TI_AM335X_ADC
+	tristate "TI's ADC driver"
+	depends on MFD_TI_AM335X_TSCADC
+	help
+	  Say yes here to build support for Texas Instruments ADC
+	  driver which is also a MFD client.
+
+config VIPERBOARD_ADC
+	tristate "Viperboard ADC support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the ADC part of the Nano River
+	  Technologies Viperboard.
+
 endmenu

diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 472fd7c..2d5f100 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile

@@ -13,4 +13,5 @@
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
 obj-$(CONFIG_MAX1363) += max1363.o
 obj-$(CONFIG_TI_ADC081C) += ti-adc081c.o
-
+obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o
+obj-$(CONFIG_VIPERBOARD_ADC) += viperboard_adc.o

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
new file mode 100644
index 0000000..02a43c8
--- /dev/null
+++ b/drivers/iio/adc/ti_am335x_adc.c

@@ -0,0 +1,260 @@
+/*
+ * TI ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/iio/iio.h>
+
+#include <linux/mfd/ti_am335x_tscadc.h>
+#include <linux/platform_data/ti_am335x_adc.h>
+
+struct tiadc_device {
+	struct ti_tscadc_dev *mfd_tscadc;
+	int channels;
+};
+
+static unsigned int tiadc_readl(struct tiadc_device *adc, unsigned int reg)
+{
+	return readl(adc->mfd_tscadc->tscadc_base + reg);
+}
+
+static void tiadc_writel(struct tiadc_device *adc, unsigned int reg,
+					unsigned int val)
+{
+	writel(val, adc->mfd_tscadc->tscadc_base + reg);
+}
+
+static void tiadc_step_config(struct tiadc_device *adc_dev)
+{
+	unsigned int stepconfig;
+	int i, channels = 0, steps;
+
+	/*
+	 * There are 16 configurable steps and 8 analog input
+	 * lines available which are shared between Touchscreen and ADC.
+	 *
+	 * Steps backwards i.e. from 16 towards 0 are used by ADC
+	 * depending on number of input lines needed.
+	 * Channel would represent which analog input
+	 * needs to be given to ADC to digitalize data.
+	 */
+
+	steps = TOTAL_STEPS - adc_dev->channels;
+	channels = TOTAL_CHANNELS - adc_dev->channels;
+
+	stepconfig = STEPCONFIG_AVG_16 | STEPCONFIG_FIFO1;
+
+	for (i = (steps + 1); i <= TOTAL_STEPS; i++) {
+		tiadc_writel(adc_dev, REG_STEPCONFIG(i),
+				stepconfig | STEPCONFIG_INP(channels));
+		tiadc_writel(adc_dev, REG_STEPDELAY(i),
+				STEPCONFIG_OPENDLY);
+		channels++;
+	}
+	tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB);
+}
+
+static int tiadc_channel_init(struct iio_dev *indio_dev, int channels)
+{
+	struct iio_chan_spec *chan_array;
+	int i;
+
+	indio_dev->num_channels = channels;
+	chan_array = kcalloc(indio_dev->num_channels,
+			sizeof(struct iio_chan_spec), GFP_KERNEL);
+
+	if (chan_array == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < (indio_dev->num_channels); i++) {
+		struct iio_chan_spec *chan = chan_array + i;
+		chan->type = IIO_VOLTAGE;
+		chan->indexed = 1;
+		chan->channel = i;
+		chan->info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT;
+	}
+
+	indio_dev->channels = chan_array;
+
+	return indio_dev->num_channels;
+}
+
+static void tiadc_channels_remove(struct iio_dev *indio_dev)
+{
+	kfree(indio_dev->channels);
+}
+
+static int tiadc_read_raw(struct iio_dev *indio_dev,
+		struct iio_chan_spec const *chan,
+		int *val, int *val2, long mask)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	int i;
+	unsigned int fifo1count, readx1;
+
+	/*
+	 * When the sub-system is first enabled,
+	 * the sequencer will always start with the
+	 * lowest step (1) and continue until step (16).
+	 * For ex: If we have enabled 4 ADC channels and
+	 * currently use only 1 out of them, the
+	 * sequencer still configures all the 4 steps,
+	 * leading to 3 unwanted data.
+	 * Hence we need to flush out this data.
+	 */
+
+	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
+	for (i = 0; i < fifo1count; i++) {
+		readx1 = tiadc_readl(adc_dev, REG_FIFO1);
+		if (i == chan->channel)
+			*val = readx1 & 0xfff;
+	}
+	tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB);
+
+	return IIO_VAL_INT;
+}
+
+static const struct iio_info tiadc_info = {
+	.read_raw = &tiadc_read_raw,
+};
+
+static int __devinit tiadc_probe(struct platform_device *pdev)
+{
+	struct iio_dev		*indio_dev;
+	struct tiadc_device	*adc_dev;
+	struct ti_tscadc_dev	*tscadc_dev = pdev->dev.platform_data;
+	struct mfd_tscadc_board	*pdata;
+	int			err;
+
+	pdata = tscadc_dev->dev->platform_data;
+	if (!pdata || !pdata->adc_init) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
+		return -EINVAL;
+	}
+
+	indio_dev = iio_device_alloc(sizeof(struct tiadc_device));
+	if (indio_dev == NULL) {
+		dev_err(&pdev->dev, "failed to allocate iio device\n");
+		err = -ENOMEM;
+		goto err_ret;
+	}
+	adc_dev = iio_priv(indio_dev);
+
+	adc_dev->mfd_tscadc = tscadc_dev;
+	adc_dev->channels = pdata->adc_init->adc_channels;
+
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->name = dev_name(&pdev->dev);
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &tiadc_info;
+
+	tiadc_step_config(adc_dev);
+
+	err = tiadc_channel_init(indio_dev, adc_dev->channels);
+	if (err < 0)
+		goto err_free_device;
+
+	err = iio_device_register(indio_dev);
+	if (err)
+		goto err_free_channels;
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	return 0;
+
+err_free_channels:
+	tiadc_channels_remove(indio_dev);
+err_free_device:
+	iio_device_free(indio_dev);
+err_ret:
+	return err;
+}
+
+static int __devexit tiadc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+
+	iio_device_unregister(indio_dev);
+	tiadc_channels_remove(indio_dev);
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tiadc_suspend(struct device *dev)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	unsigned int idle;
+
+	if (!device_may_wakeup(tscadc_dev->dev)) {
+		idle = tiadc_readl(adc_dev, REG_CTRL);
+		idle &= ~(CNTRLREG_TSCSSENB);
+		tiadc_writel(adc_dev, REG_CTRL, (idle |
+				CNTRLREG_POWERDOWN));
+	}
+
+	return 0;
+}
+
+static int tiadc_resume(struct device *dev)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	unsigned int restore;
+
+	/* Make sure ADC is powered up */
+	restore = tiadc_readl(adc_dev, REG_CTRL);
+	restore &= ~(CNTRLREG_POWERDOWN);
+	tiadc_writel(adc_dev, REG_CTRL, restore);
+
+	tiadc_step_config(adc_dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops tiadc_pm_ops = {
+	.suspend = tiadc_suspend,
+	.resume = tiadc_resume,
+};
+#define TIADC_PM_OPS (&tiadc_pm_ops)
+#else
+#define TIADC_PM_OPS NULL
+#endif
+
+static struct platform_driver tiadc_driver = {
+	.driver = {
+		.name   = "tiadc",
+		.owner	= THIS_MODULE,
+		.pm	= TIADC_PM_OPS,
+	},
+	.probe	= tiadc_probe,
+	.remove	= __devexit_p(tiadc_remove),
+};
+
+module_platform_driver(tiadc_driver);
+
+MODULE_DESCRIPTION("TI ADC controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/iio/adc/viperboard_adc.c b/drivers/iio/adc/viperboard_adc.c
new file mode 100644
index 0000000..10136a8
--- /dev/null
+++ b/drivers/iio/adc/viperboard_adc.c

@@ -0,0 +1,181 @@
+/*
+ *  Nano River Technologies viperboard IIO ADC driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/iio/iio.h>
+
+#include <linux/mfd/viperboard.h>
+
+#define VPRBRD_ADC_CMD_GET		0x00
+
+struct vprbrd_adc_msg {
+	u8 cmd;
+	u8 chan;
+	u8 val;
+} __packed;
+
+struct vprbrd_adc {
+	struct vprbrd *vb;
+};
+
+#define VPRBRD_ADC_CHANNEL(_index) {			\
+	.type = IIO_VOLTAGE,				\
+	.indexed = 1,					\
+	.channel = _index,				\
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT,	\
+	.scan_index = _index,				\
+	.scan_type = {					\
+		.sign = 'u',				\
+		.realbits = 8,				\
+		.storagebits = 8,			\
+	},						\
+}
+
+static struct iio_chan_spec const vprbrd_adc_iio_channels[] = {
+	VPRBRD_ADC_CHANNEL(0),
+	VPRBRD_ADC_CHANNEL(1),
+	VPRBRD_ADC_CHANNEL(2),
+	VPRBRD_ADC_CHANNEL(3),
+};
+
+static int vprbrd_iio_read_raw(struct iio_dev *iio_dev,
+				struct iio_chan_spec const *chan,
+				int *val,
+				int *val2,
+				long info)
+{
+	int ret, error = 0;
+	struct vprbrd_adc *adc = iio_priv(iio_dev);
+	struct vprbrd *vb = adc->vb;
+	struct vprbrd_adc_msg *admsg = (struct vprbrd_adc_msg *)vb->buf;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		mutex_lock(&vb->lock);
+
+		admsg->cmd = VPRBRD_ADC_CMD_GET;
+		admsg->chan = chan->scan_index;
+		admsg->val = 0x00;
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC,
+			VPRBRD_USB_TYPE_OUT, 0x0000, 0x0000, admsg,
+			sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS);
+		if (ret != sizeof(struct vprbrd_adc_msg)) {
+			dev_err(&iio_dev->dev, "usb send error on adc read\n");
+			error = -EREMOTEIO;
+		}
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_rcvctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC,
+			VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, admsg,
+			sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		*val = admsg->val;
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_adc_msg)) {
+			dev_err(&iio_dev->dev, "usb recv error on adc read\n");
+			error = -EREMOTEIO;
+		}
+
+		if (error)
+			goto error;
+
+		return IIO_VAL_INT;
+	default:
+		error = -EINVAL;
+		break;
+	}
+error:
+	return error;
+}
+
+static const struct iio_info vprbrd_adc_iio_info = {
+	.read_raw = &vprbrd_iio_read_raw,
+	.driver_module = THIS_MODULE,
+};
+
+static int __devinit vprbrd_adc_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_adc *adc;
+	struct iio_dev *indio_dev;
+	int ret;
+
+	/* registering iio */
+	indio_dev = iio_device_alloc(sizeof(*adc));
+	if (!indio_dev) {
+		dev_err(&pdev->dev, "failed allocating iio device\n");
+		return -ENOMEM;
+	}
+
+	adc = iio_priv(indio_dev);
+	adc->vb = vb;
+	indio_dev->name = "viperboard adc";
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->info = &vprbrd_adc_iio_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = vprbrd_adc_iio_channels;
+	indio_dev->num_channels = ARRAY_SIZE(vprbrd_adc_iio_channels);
+
+	ret = iio_device_register(indio_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "could not register iio (adc)");
+		goto error;
+	}
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	return 0;
+
+error:
+	iio_device_free(indio_dev);
+	return ret;
+}
+
+static int __devexit vprbrd_adc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+
+	iio_device_unregister(indio_dev);
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static struct platform_driver vprbrd_adc_driver = {
+	.driver = {
+		.name	= "viperboard-adc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= vprbrd_adc_probe,
+	.remove		= __devexit_p(vprbrd_adc_remove),
+};
+
+module_platform_driver(vprbrd_adc_driver);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("IIO ADC driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-adc");

diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 2d41d04..89517ff 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c

@@ -90,26 +90,6 @@
 
 static DEFINE_SPINLOCK(hcall_lock);
 
-static u32 get_longbusy_msecs(int longbusy_rc)
-{
-	switch (longbusy_rc) {
-	case H_LONG_BUSY_ORDER_1_MSEC:
-		return 1;
-	case H_LONG_BUSY_ORDER_10_MSEC:
-		return 10;
-	case H_LONG_BUSY_ORDER_100_MSEC:
-		return 100;
-	case H_LONG_BUSY_ORDER_1_SEC:
-		return 1000;
-	case H_LONG_BUSY_ORDER_10_SEC:
-		return 10000;
-	case H_LONG_BUSY_ORDER_100_SEC:
-		return 100000;
-	default:
-		return 1;
-	}
-}
-
 static long ehca_plpar_hcall_norets(unsigned long opcode,
 				    unsigned long arg1,
 				    unsigned long arg2,

diff --git a/drivers/input/gameport/emu10k1-gp.c b/drivers/input/gameport/emu10k1-gp.c
index daceafe..fa7a95c 100644
--- a/drivers/input/gameport/emu10k1-gp.c
+++ b/drivers/input/gameport/emu10k1-gp.c

@@ -57,7 +57,7 @@
 
 MODULE_DEVICE_TABLE(pci, emu_tbl);
 
-static int __devinit emu_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int emu_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct emu *emu;
 	struct gameport *port;
@@ -107,7 +107,7 @@
 	return error;
 }
 
-static void __devexit emu_remove(struct pci_dev *pdev)
+static void emu_remove(struct pci_dev *pdev)
 {
 	struct emu *emu = pci_get_drvdata(pdev);
 
@@ -122,7 +122,7 @@
         .name =         "Emu10k1_gameport",
         .id_table =     emu_tbl,
         .probe =        emu_probe,
-        .remove =       __devexit_p(emu_remove),
+	.remove =	emu_remove,
 };
 
 module_pci_driver(emu_driver);

diff --git a/drivers/input/gameport/fm801-gp.c b/drivers/input/gameport/fm801-gp.c
index 48ad382..ae912d3 100644
--- a/drivers/input/gameport/fm801-gp.c
+++ b/drivers/input/gameport/fm801-gp.c

@@ -78,7 +78,7 @@
 	return 0;
 }
 
-static int __devinit fm801_gp_probe(struct pci_dev *pci, const struct pci_device_id *id)
+static int fm801_gp_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
 	struct fm801_gp *gp;
 	struct gameport *port;
@@ -129,7 +129,7 @@
 	return error;
 }
 
-static void __devexit fm801_gp_remove(struct pci_dev *pci)
+static void fm801_gp_remove(struct pci_dev *pci)
 {
 	struct fm801_gp *gp = pci_get_drvdata(pci);
 
@@ -150,7 +150,7 @@
 	.name =		"FM801_gameport",
 	.id_table =	fm801_gp_id_table,
 	.probe =	fm801_gp_probe,
-	.remove =	__devexit_p(fm801_gp_remove),
+	.remove =	fm801_gp_remove,
 };
 
 module_pci_driver(fm801_gp_driver);

diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
index 8c4b50f..47a6009 100644
--- a/drivers/input/input-mt.c
+++ b/drivers/input/input-mt.c

@@ -194,7 +194,7 @@
 	if (!mt)
 		return;
 
-	oldest = 0;
+	oldest = NULL;
 	oldid = mt->trkid;
 	count = 0;
 

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 53a0dde..ce01332f 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c

@@ -534,8 +534,11 @@
 static void __input_release_device(struct input_handle *handle)
 {
 	struct input_dev *dev = handle->dev;
+	struct input_handle *grabber;
 
-	if (dev->grab == handle) {
+	grabber = rcu_dereference_protected(dev->grab,
+					    lockdep_is_held(&dev->mutex));
+	if (grabber == handle) {
 		rcu_assign_pointer(dev->grab, NULL);
 		/* Make sure input_pass_event() notices that grab is gone */
 		synchronize_rcu();
@@ -1723,7 +1726,7 @@
 /**
  * input_allocate_device - allocate memory for new input device
  *
- * Returns prepared struct input_dev or NULL.
+ * Returns prepared struct input_dev or %NULL.
  *
  * NOTE: Use input_free_device() to free devices that have not been
  * registered; input_unregister_device() should be used for already
@@ -1750,6 +1753,70 @@
 }
 EXPORT_SYMBOL(input_allocate_device);
 
+struct input_devres {
+	struct input_dev *input;
+};
+
+static int devm_input_device_match(struct device *dev, void *res, void *data)
+{
+	struct input_devres *devres = res;
+
+	return devres->input == data;
+}
+
+static void devm_input_device_release(struct device *dev, void *res)
+{
+	struct input_devres *devres = res;
+	struct input_dev *input = devres->input;
+
+	dev_dbg(dev, "%s: dropping reference to %s\n",
+		__func__, dev_name(&input->dev));
+	input_put_device(input);
+}
+
+/**
+ * devm_input_allocate_device - allocate managed input device
+ * @dev: device owning the input device being created
+ *
+ * Returns prepared struct input_dev or %NULL.
+ *
+ * Managed input devices do not need to be explicitly unregistered or
+ * freed as it will be done automatically when owner device unbinds from
+ * its driver (or binding fails). Once managed input device is allocated,
+ * it is ready to be set up and registered in the same fashion as regular
+ * input device. There are no special devm_input_device_[un]register()
+ * variants, regular ones work with both managed and unmanaged devices.
+ *
+ * NOTE: the owner device is set up as parent of input device and users
+ * should not override it.
+ */
+
+struct input_dev *devm_input_allocate_device(struct device *dev)
+{
+	struct input_dev *input;
+	struct input_devres *devres;
+
+	devres = devres_alloc(devm_input_device_release,
+			      sizeof(struct input_devres), GFP_KERNEL);
+	if (!devres)
+		return NULL;
+
+	input = input_allocate_device();
+	if (!input) {
+		devres_free(devres);
+		return NULL;
+	}
+
+	input->dev.parent = dev;
+	input->devres_managed = true;
+
+	devres->input = input;
+	devres_add(dev, devres);
+
+	return input;
+}
+EXPORT_SYMBOL(devm_input_allocate_device);
+
 /**
  * input_free_device - free memory occupied by input_dev structure
  * @dev: input device to free
@@ -1766,8 +1833,14 @@
  */
 void input_free_device(struct input_dev *dev)
 {
-	if (dev)
+	if (dev) {
+		if (dev->devres_managed)
+			WARN_ON(devres_destroy(dev->dev.parent,
+						devm_input_device_release,
+						devm_input_device_match,
+						dev));
 		input_put_device(dev);
+	}
 }
 EXPORT_SYMBOL(input_free_device);
 
@@ -1888,6 +1961,38 @@
 	INPUT_CLEANSE_BITMASK(dev, SW, sw);
 }
 
+static void __input_unregister_device(struct input_dev *dev)
+{
+	struct input_handle *handle, *next;
+
+	input_disconnect_device(dev);
+
+	mutex_lock(&input_mutex);
+
+	list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
+		handle->handler->disconnect(handle);
+	WARN_ON(!list_empty(&dev->h_list));
+
+	del_timer_sync(&dev->timer);
+	list_del_init(&dev->node);
+
+	input_wakeup_procfs_readers();
+
+	mutex_unlock(&input_mutex);
+
+	device_del(&dev->dev);
+}
+
+static void devm_input_device_unregister(struct device *dev, void *res)
+{
+	struct input_devres *devres = res;
+	struct input_dev *input = devres->input;
+
+	dev_dbg(dev, "%s: unregistering device %s\n",
+		__func__, dev_name(&input->dev));
+	__input_unregister_device(input);
+}
+
 /**
  * input_register_device - register device with input core
  * @dev: device to be registered
@@ -1903,11 +2008,21 @@
 int input_register_device(struct input_dev *dev)
 {
 	static atomic_t input_no = ATOMIC_INIT(0);
+	struct input_devres *devres = NULL;
 	struct input_handler *handler;
 	unsigned int packet_size;
 	const char *path;
 	int error;
 
+	if (dev->devres_managed) {
+		devres = devres_alloc(devm_input_device_unregister,
+				      sizeof(struct input_devres), GFP_KERNEL);
+		if (!devres)
+			return -ENOMEM;
+
+		devres->input = dev;
+	}
+
 	/* Every input device generates EV_SYN/SYN_REPORT events. */
 	__set_bit(EV_SYN, dev->evbit);
 
@@ -1923,8 +2038,10 @@
 
 	dev->max_vals = max(dev->hint_events_per_packet, packet_size) + 2;
 	dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL);
-	if (!dev->vals)
-		return -ENOMEM;
+	if (!dev->vals) {
+		error = -ENOMEM;
+		goto err_devres_free;
+	}
 
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
@@ -1949,7 +2066,7 @@
 
 	error = device_add(&dev->dev);
 	if (error)
-		return error;
+		goto err_free_vals;
 
 	path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
 	pr_info("%s as %s\n",
@@ -1958,10 +2075,8 @@
 	kfree(path);
 
 	error = mutex_lock_interruptible(&input_mutex);
-	if (error) {
-		device_del(&dev->dev);
-		return error;
-	}
+	if (error)
+		goto err_device_del;
 
 	list_add_tail(&dev->node, &input_dev_list);
 
@@ -1972,7 +2087,21 @@
 
 	mutex_unlock(&input_mutex);
 
+	if (dev->devres_managed) {
+		dev_dbg(dev->dev.parent, "%s: registering %s with devres.\n",
+			__func__, dev_name(&dev->dev));
+		devres_add(dev->dev.parent, devres);
+	}
 	return 0;
+
+err_device_del:
+	device_del(&dev->dev);
+err_free_vals:
+	kfree(dev->vals);
+	dev->vals = NULL;
+err_devres_free:
+	devres_free(devres);
+	return error;
 }
 EXPORT_SYMBOL(input_register_device);
 
@@ -1985,24 +2114,20 @@
  */
 void input_unregister_device(struct input_dev *dev)
 {
-	struct input_handle *handle, *next;
-
-	input_disconnect_device(dev);
-
-	mutex_lock(&input_mutex);
-
-	list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
-		handle->handler->disconnect(handle);
-	WARN_ON(!list_empty(&dev->h_list));
-
-	del_timer_sync(&dev->timer);
-	list_del_init(&dev->node);
-
-	input_wakeup_procfs_readers();
-
-	mutex_unlock(&input_mutex);
-
-	device_unregister(&dev->dev);
+	if (dev->devres_managed) {
+		WARN_ON(devres_destroy(dev->dev.parent,
+					devm_input_device_unregister,
+					devm_input_device_match,
+					dev));
+		__input_unregister_device(dev);
+		/*
+		 * We do not do input_put_device() here because it will be done
+		 * when 2nd devres fires up.
+		 */
+	} else {
+		__input_unregister_device(dev);
+		input_put_device(dev);
+	}
 }
 EXPORT_SYMBOL(input_unregister_device);
 

diff --git a/drivers/input/joystick/as5011.c b/drivers/input/joystick/as5011.c
index c96653b..121cd63 100644
--- a/drivers/input/joystick/as5011.c
+++ b/drivers/input/joystick/as5011.c

@@ -85,7 +85,10 @@
 {
 	uint8_t data[2] = { aregaddr, avalue };
 	struct i2c_msg msg = {
-		client->addr, I2C_M_IGNORE_NAK, 2, (uint8_t *)data
+		.addr = client->addr,
+		.flags = I2C_M_IGNORE_NAK,
+		.len = 2,
+		.buf = (uint8_t *)data
 	};
 	int error;
 
@@ -98,8 +101,18 @@
 {
 	uint8_t data[2] = { aregaddr };
 	struct i2c_msg msg_set[2] = {
-		{ client->addr, I2C_M_REV_DIR_ADDR, 1, (uint8_t *)data },
-		{ client->addr, I2C_M_RD | I2C_M_NOSTART, 1, (uint8_t *)data }
+		{
+			.addr = client->addr,
+			.flags = I2C_M_REV_DIR_ADDR,
+			.len = 1,
+			.buf = (uint8_t *)data
+		},
+		{
+			.addr = client->addr,
+			.flags = I2C_M_RD | I2C_M_NOSTART,
+			.len = 1,
+			.buf = (uint8_t *)data
+		}
 	};
 	int error;
 
@@ -144,7 +157,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit as5011_configure_chip(struct as5011_device *as5011,
+static int as5011_configure_chip(struct as5011_device *as5011,
 				const struct as5011_platform_data *plat_dat)
 {
 	struct i2c_client *client = as5011->i2c_client;
@@ -212,8 +225,8 @@
 	return 0;
 }
 
-static int __devinit as5011_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int as5011_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
 {
 	const struct as5011_platform_data *plat_data;
 	struct as5011_device *as5011;
@@ -328,7 +341,7 @@
 	return error;
 }
 
-static int __devexit as5011_remove(struct i2c_client *client)
+static int as5011_remove(struct i2c_client *client)
 {
 	struct as5011_device *as5011 = i2c_get_clientdata(client);
 
@@ -353,7 +366,7 @@
 		.name = "as5011",
 	},
 	.probe		= as5011_probe,
-	.remove		= __devexit_p(as5011_remove),
+	.remove		= as5011_remove,
 	.id_table	= as5011_id,
 };
 

diff --git a/drivers/input/joystick/maplecontrol.c b/drivers/input/joystick/maplecontrol.c
index 77cfde5..59c10ec 100644
--- a/drivers/input/joystick/maplecontrol.c
+++ b/drivers/input/joystick/maplecontrol.c

@@ -78,7 +78,7 @@
 }
 
 /* allow the controller to be used */
-static int __devinit probe_maple_controller(struct device *dev)
+static int probe_maple_controller(struct device *dev)
 {
 	static const short btn_bit[32] = {
 		BTN_C, BTN_B, BTN_A, BTN_START, -1, -1, -1, -1,
@@ -157,7 +157,7 @@
 	return error;
 }
 
-static int __devexit remove_maple_controller(struct device *dev)
+static int remove_maple_controller(struct device *dev)
 {
 	struct maple_device *mdev = to_maple_dev(dev);
 	struct dc_pad *pad = maple_get_drvdata(mdev);
@@ -175,7 +175,7 @@
 	.drv = {
 		.name	= "Dreamcast_controller",
 		.probe	= probe_maple_controller,
-		.remove	= __devexit_p(remove_maple_controller),
+		.remove	= remove_maple_controller,
 	},
 };
 

diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c
index 4dfa1ee..f8f892b 100644
--- a/drivers/input/joystick/walkera0701.c
+++ b/drivers/input/joystick/walkera0701.c

@@ -196,6 +196,7 @@
 	struct walkera_dev *w = input_get_drvdata(dev);
 
 	parport_disable_irq(w->parport);
+	hrtimer_cancel(&w->timer);
 }
 
 static int walkera0701_connect(struct walkera_dev *w, int parport)
@@ -224,6 +225,9 @@
 	if (parport_claim(w->pardevice))
 		goto init_err1;
 
+	hrtimer_init(&w->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	w->timer.function = timer_handler;
+
 	w->input_dev = input_allocate_device();
 	if (!w->input_dev)
 		goto init_err2;
@@ -254,8 +258,6 @@
 	if (err)
 		goto init_err3;
 
-	hrtimer_init(&w->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	w->timer.function = timer_handler;
 	return 0;
 
  init_err3:
@@ -271,7 +273,6 @@
 
 static void walkera0701_disconnect(struct walkera_dev *w)
 {
-	hrtimer_cancel(&w->timer);
 	input_unregister_device(w->input_dev);
 	parport_release(w->pardevice);
 	parport_unregister_device(w->pardevice);

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 83811e4..d6cbfe9 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c

@@ -118,11 +118,12 @@
 	u8 xtype;
 } xpad_device[] = {
 	{ 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX },
-	{ 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX },
+	{ 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
+	{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
+	{ 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
 	{ 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
-	{ 0x0c12, 0x8809, "RedOctane Xbox Dance Pad", DANCEPAD_MAP_CONFIG, XTYPE_XBOX },
 	{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
 	{ 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX },
@@ -136,9 +137,12 @@
 	{ 0x0738, 0x4540, "Mad Catz Beat Pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x0738, 0x4556, "Mad Catz Lynx Wireless Controller", 0, XTYPE_XBOX },
 	{ 0x0738, 0x4716, "Mad Catz Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
+	{ 0x0738, 0x4728, "Mad Catz Street Fighter IV FightPad", XTYPE_XBOX360 },
 	{ 0x0738, 0x4738, "Mad Catz Wired Xbox 360 Controller (SFIV)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0738, 0x6040, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+	{ 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 },
 	{ 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
+	{ 0x0c12, 0x8809, "RedOctane Xbox Dance Pad", DANCEPAD_MAP_CONFIG, XTYPE_XBOX },
 	{ 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX },
 	{ 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
 	{ 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX },
@@ -148,24 +152,28 @@
 	{ 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX },
 	{ 0x0e6f, 0x0005, "Eclipse wireless Controller", 0, XTYPE_XBOX },
 	{ 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX },
-	{ 0x0e6f, 0x0006, "Pelican 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
+	{ 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
+	{ 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
 	{ 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX },
+	{ 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX },
 	{ 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
 	{ 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX },
-	{ 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
-	{ 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 },
 	{ 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
-	{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
+	{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
-	{ 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
-	{ 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
-	{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 },
 	{ 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
 	{ 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
 };
@@ -235,7 +243,7 @@
 	{ XPAD_XBOX360_VENDOR_PROTOCOL(vend,1) }, \
 	{ XPAD_XBOX360_VENDOR_PROTOCOL(vend,129) }
 
-static struct usb_device_id xpad_table [] = {
+static struct usb_device_id xpad_table[] = {
 	{ USB_INTERFACE_INFO('X', 'B', 0) },	/* X-Box USB-IF not approved class */
 	XPAD_XBOX360_VENDOR(0x045e),		/* Microsoft X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x046d),		/* Logitech X-Box 360 style controllers */
@@ -248,10 +256,11 @@
 	XPAD_XBOX360_VENDOR(0x1bad),		/* Harminix Rock Band Guitar and Drums */
 	XPAD_XBOX360_VENDOR(0x0f0d),		/* Hori Controllers */
 	XPAD_XBOX360_VENDOR(0x1689),		/* Razer Onza */
+	XPAD_XBOX360_VENDOR(0x24c6),		/* PowerA Controllers */
 	{ }
 };
 
-MODULE_DEVICE_TABLE (usb, xpad_table);
+MODULE_DEVICE_TABLE(usb, xpad_table);
 
 struct usb_xpad {
 	struct input_dev *dev;		/* input device interface */
@@ -783,7 +792,7 @@
 	struct usb_xpad *xpad = input_get_drvdata(dev);
 
 	/* URB was submitted in probe */
-	if(xpad->xtype == XTYPE_XBOX360W)
+	if (xpad->xtype == XTYPE_XBOX360W)
 		return 0;
 
 	xpad->irq_in->dev = xpad->udev;

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 77629d3..5a240c6 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig

@@ -134,7 +134,7 @@
 
 config KEYBOARD_QT2160
 	tristate "Atmel AT42QT2160 Touch Sensor Chip"
-	depends on I2C && EXPERIMENTAL
+	depends on I2C
 	help
 	  If you say yes here you get support for Atmel AT42QT2160 Touch
 	  Sensor chip as a keyboard input.
@@ -544,6 +544,7 @@
 
 config KEYBOARD_OMAP4
 	tristate "TI OMAP4+ keypad support"
+	depends on ARCH_OMAP2PLUS
 	select INPUT_MATRIXKMAP
 	help
 	  Say Y here if you want to use the OMAP4+ keypad.

diff --git a/drivers/input/keyboard/adp5520-keys.c b/drivers/input/keyboard/adp5520-keys.c
index e9e8674..ef26b17 100644
--- a/drivers/input/keyboard/adp5520-keys.c
+++ b/drivers/input/keyboard/adp5520-keys.c

@@ -69,7 +69,7 @@
 	return 0;
 }
 
-static int __devinit adp5520_keys_probe(struct platform_device *pdev)
+static int adp5520_keys_probe(struct platform_device *pdev)
 {
 	struct adp5520_keys_platform_data *pdata = pdev->dev.platform_data;
 	struct input_dev *input;
@@ -182,7 +182,7 @@
 	return ret;
 }
 
-static int __devexit adp5520_keys_remove(struct platform_device *pdev)
+static int adp5520_keys_remove(struct platform_device *pdev)
 {
 	struct adp5520_keys *dev = platform_get_drvdata(pdev);
 
@@ -200,7 +200,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= adp5520_keys_probe,
-	.remove		= __devexit_p(adp5520_keys_remove),
+	.remove		= adp5520_keys_remove,
 };
 module_platform_driver(adp5520_keys_driver);
 

diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index b083bf1..dbd2047 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c

@@ -145,7 +145,7 @@
 	return ret;
 }
 
-static int __devinit adp5588_build_gpiomap(struct adp5588_kpad *kpad,
+static int adp5588_build_gpiomap(struct adp5588_kpad *kpad,
 				const struct adp5588_kpad_platform_data *pdata)
 {
 	bool pin_used[ADP5588_MAXGPIO];
@@ -170,7 +170,7 @@
 	return n_unused;
 }
 
-static int __devinit adp5588_gpio_add(struct adp5588_kpad *kpad)
+static int adp5588_gpio_add(struct adp5588_kpad *kpad)
 {
 	struct device *dev = &kpad->client->dev;
 	const struct adp5588_kpad_platform_data *pdata = dev->platform_data;
@@ -224,7 +224,7 @@
 	return 0;
 }
 
-static void __devexit adp5588_gpio_remove(struct adp5588_kpad *kpad)
+static void adp5588_gpio_remove(struct adp5588_kpad *kpad)
 {
 	struct device *dev = &kpad->client->dev;
 	const struct adp5588_kpad_platform_data *pdata = dev->platform_data;
@@ -319,7 +319,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit adp5588_setup(struct i2c_client *client)
+static int adp5588_setup(struct i2c_client *client)
 {
 	const struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
 	const struct adp5588_gpio_platform_data *gpio_data = pdata->gpio_data;
@@ -382,7 +382,7 @@
 	return 0;
 }
 
-static void __devinit adp5588_report_switch_state(struct adp5588_kpad *kpad)
+static void adp5588_report_switch_state(struct adp5588_kpad *kpad)
 {
 	int gpi_stat1 = adp5588_read(kpad->client, GPIO_DAT_STAT1);
 	int gpi_stat2 = adp5588_read(kpad->client, GPIO_DAT_STAT2);
@@ -420,8 +420,8 @@
 }
 
 
-static int __devinit adp5588_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+static int adp5588_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
 {
 	struct adp5588_kpad *kpad;
 	const struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
@@ -587,7 +587,7 @@
 	return error;
 }
 
-static int __devexit adp5588_remove(struct i2c_client *client)
+static int adp5588_remove(struct i2c_client *client)
 {
 	struct adp5588_kpad *kpad = i2c_get_clientdata(client);
 
@@ -650,7 +650,7 @@
 #endif
 	},
 	.probe    = adp5588_probe,
-	.remove   = __devexit_p(adp5588_remove),
+	.remove   = adp5588_remove,
 	.id_table = adp5588_id,
 };
 

diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c
index 74e6032..67d12b3 100644
--- a/drivers/input/keyboard/adp5589-keys.c
+++ b/drivers/input/keyboard/adp5589-keys.c

@@ -464,7 +464,7 @@
 	return ret;
 }
 
-static int __devinit adp5589_build_gpiomap(struct adp5589_kpad *kpad,
+static int adp5589_build_gpiomap(struct adp5589_kpad *kpad,
 				const struct adp5589_kpad_platform_data *pdata)
 {
 	bool pin_used[ADP5589_MAXGPIO];
@@ -496,7 +496,7 @@
 	return n_unused;
 }
 
-static int __devinit adp5589_gpio_add(struct adp5589_kpad *kpad)
+static int adp5589_gpio_add(struct adp5589_kpad *kpad)
 {
 	struct device *dev = &kpad->client->dev;
 	const struct adp5589_kpad_platform_data *pdata = dev->platform_data;
@@ -550,7 +550,7 @@
 	return 0;
 }
 
-static void __devexit adp5589_gpio_remove(struct adp5589_kpad *kpad)
+static void adp5589_gpio_remove(struct adp5589_kpad *kpad)
 {
 	struct device *dev = &kpad->client->dev;
 	const struct adp5589_kpad_platform_data *pdata = dev->platform_data;
@@ -641,8 +641,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit adp5589_get_evcode(struct adp5589_kpad *kpad,
-					unsigned short key)
+static int adp5589_get_evcode(struct adp5589_kpad *kpad, unsigned short key)
 {
 	int i;
 
@@ -655,7 +654,7 @@
 	return -EINVAL;
 }
 
-static int __devinit adp5589_setup(struct adp5589_kpad *kpad)
+static int adp5589_setup(struct adp5589_kpad *kpad)
 {
 	struct i2c_client *client = kpad->client;
 	const struct adp5589_kpad_platform_data *pdata =
@@ -820,7 +819,7 @@
 	return 0;
 }
 
-static void __devinit adp5589_report_switch_state(struct adp5589_kpad *kpad)
+static void adp5589_report_switch_state(struct adp5589_kpad *kpad)
 {
 	int gpi_stat_tmp, pin_loc;
 	int i;
@@ -860,8 +859,8 @@
 	input_sync(kpad->input);
 }
 
-static int __devinit adp5589_probe(struct i2c_client *client,
-				   const struct i2c_device_id *id)
+static int adp5589_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
 {
 	struct adp5589_kpad *kpad;
 	const struct adp5589_kpad_platform_data *pdata =
@@ -1045,7 +1044,7 @@
 	return error;
 }
 
-static int __devexit adp5589_remove(struct i2c_client *client)
+static int adp5589_remove(struct i2c_client *client)
 {
 	struct adp5589_kpad *kpad = i2c_get_clientdata(client);
 
@@ -1104,7 +1103,7 @@
 		.pm = &adp5589_dev_pm_ops,
 	},
 	.probe = adp5589_probe,
-	.remove = __devexit_p(adp5589_remove),
+	.remove = adp5589_remove,
 	.id_table = adp5589_id,
 };
 

diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c
index 8eb9116..20b9fa9 100644
--- a/drivers/input/keyboard/bf54x-keys.c
+++ b/drivers/input/keyboard/bf54x-keys.c

@@ -177,7 +177,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit bfin_kpad_probe(struct platform_device *pdev)
+static int bfin_kpad_probe(struct platform_device *pdev)
 {
 	struct bf54x_kpad *bf54x_kpad;
 	struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data;
@@ -331,7 +331,7 @@
 	return error;
 }
 
-static int __devexit bfin_kpad_remove(struct platform_device *pdev)
+static int bfin_kpad_remove(struct platform_device *pdev)
 {
 	struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data;
 	struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev);
@@ -390,7 +390,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= bfin_kpad_probe,
-	.remove		= __devexit_p(bfin_kpad_remove),
+	.remove		= bfin_kpad_remove,
 	.suspend	= bfin_kpad_suspend,
 	.resume		= bfin_kpad_resume,
 };

diff --git a/drivers/input/keyboard/davinci_keyscan.c b/drivers/input/keyboard/davinci_keyscan.c
index d5bacbb..4e4e453 100644
--- a/drivers/input/keyboard/davinci_keyscan.c
+++ b/drivers/input/keyboard/davinci_keyscan.c

@@ -303,7 +303,7 @@
 	return error;
 }
 
-static int __devexit davinci_ks_remove(struct platform_device *pdev)
+static int davinci_ks_remove(struct platform_device *pdev)
 {
 	struct davinci_ks *davinci_ks = platform_get_drvdata(pdev);
 
@@ -326,7 +326,7 @@
 		.name = "davinci_keyscan",
 		.owner = THIS_MODULE,
 	},
-	.remove	= __devexit_p(davinci_ks_remove),
+	.remove	= davinci_ks_remove,
 };
 
 static int __init davinci_ks_init(void)

diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c
index 7363402..9857e8f 100644
--- a/drivers/input/keyboard/ep93xx_keypad.c
+++ b/drivers/input/keyboard/ep93xx_keypad.c

@@ -232,7 +232,7 @@
 static SIMPLE_DEV_PM_OPS(ep93xx_keypad_pm_ops,
 			 ep93xx_keypad_suspend, ep93xx_keypad_resume);
 
-static int __devinit ep93xx_keypad_probe(struct platform_device *pdev)
+static int ep93xx_keypad_probe(struct platform_device *pdev)
 {
 	struct ep93xx_keypad *keypad;
 	const struct matrix_keymap_data *keymap_data;
@@ -346,7 +346,7 @@
 	return err;
 }
 
-static int __devexit ep93xx_keypad_remove(struct platform_device *pdev)
+static int ep93xx_keypad_remove(struct platform_device *pdev)
 {
 	struct ep93xx_keypad *keypad = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -380,7 +380,7 @@
 		.pm	= &ep93xx_keypad_pm_ops,
 	},
 	.probe		= ep93xx_keypad_probe,
-	.remove		= __devexit_p(ep93xx_keypad_remove),
+	.remove		= ep93xx_keypad_remove,
 };
 module_platform_driver(ep93xx_keypad_driver);
 

diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 6a68041..d327f5a 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c

@@ -423,10 +423,10 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit gpio_keys_setup_key(struct platform_device *pdev,
-					 struct input_dev *input,
-					 struct gpio_button_data *bdata,
-					 const struct gpio_keys_button *button)
+static int gpio_keys_setup_key(struct platform_device *pdev,
+				struct input_dev *input,
+				struct gpio_button_data *bdata,
+				const struct gpio_keys_button *button)
 {
 	const char *desc = button->desc ? button->desc : "gpio_keys";
 	struct device *dev = &pdev->dev;
@@ -440,21 +440,13 @@
 
 	if (gpio_is_valid(button->gpio)) {
 
-		error = gpio_request(button->gpio, desc);
+		error = gpio_request_one(button->gpio, GPIOF_IN, desc);
 		if (error < 0) {
 			dev_err(dev, "Failed to request GPIO %d, error %d\n",
 				button->gpio, error);
 			return error;
 		}
 
-		error = gpio_direction_input(button->gpio);
-		if (error < 0) {
-			dev_err(dev,
-				"Failed to configure direction for GPIO %d, error %d\n",
-				button->gpio, error);
-			goto fail;
-		}
-
 		if (button->debounce_interval) {
 			error = gpio_set_debounce(button->gpio,
 					button->debounce_interval * 1000);
@@ -526,12 +518,35 @@
 	return error;
 }
 
+static void gpio_keys_report_state(struct gpio_keys_drvdata *ddata)
+{
+	struct input_dev *input = ddata->input;
+	int i;
+
+	for (i = 0; i < ddata->pdata->nbuttons; i++) {
+		struct gpio_button_data *bdata = &ddata->data[i];
+		if (gpio_is_valid(bdata->button->gpio))
+			gpio_keys_gpio_report_event(bdata);
+	}
+	input_sync(input);
+}
+
 static int gpio_keys_open(struct input_dev *input)
 {
 	struct gpio_keys_drvdata *ddata = input_get_drvdata(input);
 	const struct gpio_keys_platform_data *pdata = ddata->pdata;
+	int error;
 
-	return pdata->enable ? pdata->enable(input->dev.parent) : 0;
+	if (pdata->enable) {
+		error = pdata->enable(input->dev.parent);
+		if (error)
+			return error;
+	}
+
+	/* Report current state of buttons that are connected to GPIOs */
+	gpio_keys_report_state(ddata);
+
+	return 0;
 }
 
 static void gpio_keys_close(struct input_dev *input)
@@ -551,7 +566,7 @@
 /*
  * Translate OpenFirmware node properties into platform_data
  */
-static struct gpio_keys_platform_data * __devinit
+static struct gpio_keys_platform_data *
 gpio_keys_get_devtree_pdata(struct device *dev)
 {
 	struct device_node *node, *pp;
@@ -658,7 +673,7 @@
 		gpio_free(bdata->button->gpio);
 }
 
-static int __devinit gpio_keys_probe(struct platform_device *pdev)
+static int gpio_keys_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	const struct gpio_keys_platform_data *pdata = dev_get_platdata(dev);
@@ -731,14 +746,6 @@
 		goto fail3;
 	}
 
-	/* get current state of buttons that are connected to GPIOs */
-	for (i = 0; i < pdata->nbuttons; i++) {
-		struct gpio_button_data *bdata = &ddata->data[i];
-		if (gpio_is_valid(bdata->button->gpio))
-			gpio_keys_gpio_report_event(bdata);
-	}
-	input_sync(input);
-
 	device_init_wakeup(&pdev->dev, wakeup);
 
 	return 0;
@@ -760,7 +767,7 @@
 	return error;
 }
 
-static int __devexit gpio_keys_remove(struct platform_device *pdev)
+static int gpio_keys_remove(struct platform_device *pdev)
 {
 	struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
 	struct input_dev *input = ddata->input;
@@ -788,6 +795,7 @@
 static int gpio_keys_suspend(struct device *dev)
 {
 	struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
+	struct input_dev *input = ddata->input;
 	int i;
 
 	if (device_may_wakeup(dev)) {
@@ -796,6 +804,11 @@
 			if (bdata->button->wakeup)
 				enable_irq_wake(bdata->irq);
 		}
+	} else {
+		mutex_lock(&input->mutex);
+		if (input->users)
+			gpio_keys_close(input);
+		mutex_unlock(&input->mutex);
 	}
 
 	return 0;
@@ -804,18 +817,27 @@
 static int gpio_keys_resume(struct device *dev)
 {
 	struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
+	struct input_dev *input = ddata->input;
+	int error = 0;
 	int i;
 
-	for (i = 0; i < ddata->pdata->nbuttons; i++) {
-		struct gpio_button_data *bdata = &ddata->data[i];
-		if (bdata->button->wakeup && device_may_wakeup(dev))
-			disable_irq_wake(bdata->irq);
-
-		if (gpio_is_valid(bdata->button->gpio))
-			gpio_keys_gpio_report_event(bdata);
+	if (device_may_wakeup(dev)) {
+		for (i = 0; i < ddata->pdata->nbuttons; i++) {
+			struct gpio_button_data *bdata = &ddata->data[i];
+			if (bdata->button->wakeup)
+				disable_irq_wake(bdata->irq);
+		}
+	} else {
+		mutex_lock(&input->mutex);
+		if (input->users)
+			error = gpio_keys_open(input);
+		mutex_unlock(&input->mutex);
 	}
-	input_sync(ddata->input);
 
+	if (error)
+		return error;
+
+	gpio_keys_report_state(ddata);
 	return 0;
 }
 #endif
@@ -824,7 +846,7 @@
 
 static struct platform_driver gpio_keys_device_driver = {
 	.probe		= gpio_keys_probe,
-	.remove		= __devexit_p(gpio_keys_remove),
+	.remove		= gpio_keys_remove,
 	.driver		= {
 		.name	= "gpio-keys",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c
index f2142de..f686fd9 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c

@@ -103,8 +103,7 @@
 }
 
 #ifdef CONFIG_OF
-static struct gpio_keys_platform_data * __devinit
-gpio_keys_polled_get_devtree_pdata(struct device *dev)
+static struct gpio_keys_platform_data *gpio_keys_polled_get_devtree_pdata(struct device *dev)
 {
 	struct device_node *node, *pp;
 	struct gpio_keys_platform_data *pdata;
@@ -196,7 +195,7 @@
 }
 #endif
 
-static int __devinit gpio_keys_polled_probe(struct platform_device *pdev)
+static int gpio_keys_polled_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	const struct gpio_keys_platform_data *pdata = dev_get_platdata(dev);
@@ -246,7 +245,6 @@
 
 	input = poll_dev->input;
 
-	input->evbit[0] = BIT(EV_KEY);
 	input->name = pdev->name;
 	input->phys = DRV_NAME"/input0";
 	input->dev.parent = &pdev->dev;
@@ -256,6 +254,10 @@
 	input->id.product = 0x0001;
 	input->id.version = 0x0100;
 
+	__set_bit(EV_KEY, input->evbit);
+	if (pdata->rep)
+		__set_bit(EV_REP, input->evbit);
+
 	for (i = 0; i < pdata->nbuttons; i++) {
 		struct gpio_keys_button *button = &pdata->buttons[i];
 		struct gpio_keys_button_data *bdata = &bdev->data[i];
@@ -268,22 +270,14 @@
 			goto err_free_gpio;
 		}
 
-		error = gpio_request(gpio,
-				     button->desc ? button->desc : DRV_NAME);
+		error = gpio_request_one(gpio, GPIOF_IN,
+					 button->desc ?: DRV_NAME);
 		if (error) {
 			dev_err(dev, "unable to claim gpio %u, err=%d\n",
 				gpio, error);
 			goto err_free_gpio;
 		}
 
-		error = gpio_direction_input(gpio);
-		if (error) {
-			dev_err(dev,
-				"unable to set direction on gpio %u, err=%d\n",
-				gpio, error);
-			goto err_free_gpio;
-		}
-
 		bdata->can_sleep = gpio_cansleep(gpio);
 		bdata->last_state = -1;
 		bdata->threshold = DIV_ROUND_UP(button->debounce_interval,
@@ -329,7 +323,7 @@
 	return error;
 }
 
-static int __devexit gpio_keys_polled_remove(struct platform_device *pdev)
+static int gpio_keys_polled_remove(struct platform_device *pdev)
 {
 	struct gpio_keys_polled_dev *bdev = platform_get_drvdata(pdev);
 	const struct gpio_keys_platform_data *pdata = bdev->pdata;
@@ -357,7 +351,7 @@
 
 static struct platform_driver gpio_keys_polled_driver = {
 	.probe	= gpio_keys_polled_probe,
-	.remove	= __devexit_p(gpio_keys_polled_remove),
+	.remove	= gpio_keys_polled_remove,
 	.driver	= {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index 5f72440..198dc07 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c

@@ -200,7 +200,7 @@
 
 
 /* initialize HIL */
-static int __devinit hil_keyb_init(void)
+static int hil_keyb_init(void)
 {
 	unsigned char c;
 	unsigned int i, kbid;
@@ -286,7 +286,7 @@
 	return err;
 }
 
-static void __devexit hil_keyb_exit(void)
+static void hil_keyb_exit(void)
 {
 	if (HIL_IRQ)
 		free_irq(HIL_IRQ, hil_dev.dev_id);
@@ -299,7 +299,7 @@
 }
 
 #if defined(CONFIG_PARISC)
-static int __devinit hil_probe_chip(struct parisc_device *dev)
+static int hil_probe_chip(struct parisc_device *dev)
 {
 	/* Only allow one HIL keyboard */
 	if (hil_dev.dev)
@@ -320,7 +320,7 @@
 	return hil_keyb_init();
 }
 
-static int __devexit hil_remove_chip(struct parisc_device *dev)
+static int hil_remove_chip(struct parisc_device *dev)
 {
 	hil_keyb_exit();
 
@@ -341,7 +341,7 @@
 	.name		= "hil",
 	.id_table	= hil_tbl,
 	.probe		= hil_probe_chip,
-	.remove		= __devexit_p(hil_remove_chip),
+	.remove		= hil_remove_chip,
 };
 
 static int __init hil_init(void)

diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c
index cdc2526..6d150e3 100644
--- a/drivers/input/keyboard/imx_keypad.c
+++ b/drivers/input/keyboard/imx_keypad.c

@@ -362,7 +362,8 @@
 	writew(reg_val, keypad->mmio_base + KPSR);
 
 	/* Colums as open drain and disable all rows */
-	writew(0xff00, keypad->mmio_base + KPCR);
+	reg_val = (keypad->cols_en_mask & 0xff) << 8;
+	writew(reg_val, keypad->mmio_base + KPCR);
 }
 
 static void imx_keypad_close(struct input_dev *dev)
@@ -413,7 +414,7 @@
 	return -EIO;
 }
 
-static int __devinit imx_keypad_probe(struct platform_device *pdev)
+static int imx_keypad_probe(struct platform_device *pdev)
 {
 	const struct matrix_keymap_data *keymap_data = pdev->dev.platform_data;
 	struct imx_keypad *keypad;
@@ -554,7 +555,7 @@
 	return error;
 }
 
-static int __devexit imx_keypad_remove(struct platform_device *pdev)
+static int imx_keypad_remove(struct platform_device *pdev)
 {
 	struct imx_keypad *keypad = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -632,7 +633,7 @@
 		.pm	= &imx_kbd_pm_ops,
 	},
 	.probe		= imx_keypad_probe,
-	.remove		= __devexit_p(imx_keypad_remove),
+	.remove		= imx_keypad_remove,
 };
 module_platform_driver(imx_keypad_driver);
 

diff --git a/drivers/input/keyboard/jornada680_kbd.c b/drivers/input/keyboard/jornada680_kbd.c
index 24f3ea0..74e75a6 100644
--- a/drivers/input/keyboard/jornada680_kbd.c
+++ b/drivers/input/keyboard/jornada680_kbd.c

@@ -179,7 +179,7 @@
 	memcpy(jornadakbd->old_scan, jornadakbd->new_scan, JORNADA_SCAN_SIZE);
 }
 
-static int __devinit jornada680kbd_probe(struct platform_device *pdev)
+static int jornada680kbd_probe(struct platform_device *pdev)
 {
 	struct jornadakbd *jornadakbd;
 	struct input_polled_dev *poll_dev;
@@ -240,7 +240,7 @@
 
 }
 
-static int __devexit jornada680kbd_remove(struct platform_device *pdev)
+static int jornada680kbd_remove(struct platform_device *pdev)
 {
 	struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
 
@@ -258,7 +258,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe	= jornada680kbd_probe,
-	.remove	= __devexit_p(jornada680kbd_remove),
+	.remove	= jornada680kbd_remove,
 };
 module_platform_driver(jornada680kbd_driver);
 

diff --git a/drivers/input/keyboard/jornada720_kbd.c b/drivers/input/keyboard/jornada720_kbd.c
index 9d639fa..5ceef63 100644
--- a/drivers/input/keyboard/jornada720_kbd.c
+++ b/drivers/input/keyboard/jornada720_kbd.c

@@ -94,7 +94,7 @@
 	return IRQ_HANDLED;
 };
 
-static int __devinit jornada720_kbd_probe(struct platform_device *pdev)
+static int jornada720_kbd_probe(struct platform_device *pdev)
 {
 	struct jornadakbd *jornadakbd;
 	struct input_dev *input_dev;
@@ -152,7 +152,7 @@
 	return err;
 };
 
-static int __devexit jornada720_kbd_remove(struct platform_device *pdev)
+static int jornada720_kbd_remove(struct platform_device *pdev)
 {
 	struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
 
@@ -173,6 +173,6 @@
 		.owner	= THIS_MODULE,
 	 },
 	.probe   = jornada720_kbd_probe,
-	.remove  = __devexit_p(jornada720_kbd_remove),
+	.remove  = jornada720_kbd_remove,
 };
 module_platform_driver(jornada720_kbd_driver);

diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index 39ac278..93c8126 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c

@@ -624,7 +624,7 @@
 }
 static DEVICE_ATTR(disable_kp, 0644, lm8323_show_disable, lm8323_set_disable);
 
-static int __devinit lm8323_probe(struct i2c_client *client,
+static int lm8323_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct lm8323_platform_data *pdata = client->dev.platform_data;
@@ -764,7 +764,7 @@
 	return err;
 }
 
-static int __devexit lm8323_remove(struct i2c_client *client)
+static int lm8323_remove(struct i2c_client *client)
 {
 	struct lm8323_chip *lm = i2c_get_clientdata(client);
 	int i;
@@ -846,7 +846,7 @@
 		.pm	= &lm8323_pm_ops,
 	},
 	.probe		= lm8323_probe,
-	.remove		= __devexit_p(lm8323_remove),
+	.remove		= lm8323_remove,
 	.id_table	= lm8323_id,
 };
 MODULE_DEVICE_TABLE(i2c, lm8323_id);

diff --git a/drivers/input/keyboard/lm8333.c b/drivers/input/keyboard/lm8333.c
index 081fd9e..5a8ca35 100644
--- a/drivers/input/keyboard/lm8333.c
+++ b/drivers/input/keyboard/lm8333.c

@@ -128,7 +128,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit lm8333_probe(struct i2c_client *client,
+static int lm8333_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	const struct lm8333_platform_data *pdata = client->dev.platform_data;
@@ -202,7 +202,7 @@
 	return err;
 }
 
-static int __devexit lm8333_remove(struct i2c_client *client)
+static int lm8333_remove(struct i2c_client *client)
 {
 	struct lm8333 *lm8333 = i2c_get_clientdata(client);
 
@@ -225,7 +225,7 @@
 		.owner		= THIS_MODULE,
 	},
 	.probe		= lm8333_probe,
-	.remove		= __devexit_p(lm8333_remove),
+	.remove		= lm8333_remove,
 	.id_table	= lm8333_id,
 };
 module_i2c_driver(lm8333_driver);

diff --git a/drivers/input/keyboard/locomokbd.c b/drivers/input/keyboard/locomokbd.c
index b1ab298..c94d610 100644
--- a/drivers/input/keyboard/locomokbd.c
+++ b/drivers/input/keyboard/locomokbd.c

@@ -46,7 +46,7 @@
 #define KEY_CENTER		KEY_F15
 
 static const unsigned char
-locomokbd_keycode[LOCOMOKBD_NUMKEYS] __devinitconst = {
+locomokbd_keycode[LOCOMOKBD_NUMKEYS] = {
 	0, KEY_ESC, KEY_ACTIVITY, 0, 0, 0, 0, 0, 0, 0,				/* 0 - 9 */
 	0, 0, 0, 0, 0, 0, 0, KEY_MENU, KEY_HOME, KEY_CONTACT,			/* 10 - 19 */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,						/* 20 - 29 */
@@ -236,7 +236,7 @@
 	locomo_writel(r, locomokbd->base + LOCOMO_KIC);
 }
 
-static int __devinit locomokbd_probe(struct locomo_dev *dev)
+static int locomokbd_probe(struct locomo_dev *dev)
 {
 	struct locomokbd *locomokbd;
 	struct input_dev *input_dev;
@@ -321,7 +321,7 @@
 	return err;
 }
 
-static int __devexit locomokbd_remove(struct locomo_dev *dev)
+static int locomokbd_remove(struct locomo_dev *dev)
 {
 	struct locomokbd *locomokbd = locomo_get_drvdata(dev);
 
@@ -345,7 +345,7 @@
 	},
 	.devid	= LOCOMO_DEVID_KEYBOARD,
 	.probe	= locomokbd_probe,
-	.remove	= __devexit_p(locomokbd_remove),
+	.remove	= locomokbd_remove,
 };
 
 static int __init locomokbd_init(void)

diff --git a/drivers/input/keyboard/lpc32xx-keys.c b/drivers/input/keyboard/lpc32xx-keys.c
index dd786c8..1b8add6 100644
--- a/drivers/input/keyboard/lpc32xx-keys.c
+++ b/drivers/input/keyboard/lpc32xx-keys.c

@@ -139,7 +139,7 @@
 	clk_disable_unprepare(kscandat->clk);
 }
 
-static int __devinit lpc32xx_parse_dt(struct device *dev,
+static int lpc32xx_parse_dt(struct device *dev,
 				      struct lpc32xx_kscan_drv *kscandat)
 {
 	struct device_node *np = dev->of_node;
@@ -166,7 +166,7 @@
 	return 0;
 }
 
-static int __devinit lpc32xx_kscan_probe(struct platform_device *pdev)
+static int lpc32xx_kscan_probe(struct platform_device *pdev)
 {
 	struct lpc32xx_kscan_drv *kscandat;
 	struct input_dev *input;
@@ -310,7 +310,7 @@
 	return error;
 }
 
-static int __devexit lpc32xx_kscan_remove(struct platform_device *pdev)
+static int lpc32xx_kscan_remove(struct platform_device *pdev)
 {
 	struct lpc32xx_kscan_drv *kscandat = platform_get_drvdata(pdev);
 
@@ -377,7 +377,7 @@
 
 static struct platform_driver lpc32xx_kscan_driver = {
 	.probe		= lpc32xx_kscan_probe,
-	.remove		= __devexit_p(lpc32xx_kscan_remove),
+	.remove		= lpc32xx_kscan_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index 18b7237..f4ff0dd 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c

@@ -23,6 +23,9 @@
 #include <linux/gpio.h>
 #include <linux/input/matrix_keypad.h>
 #include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
 
 struct matrix_keypad {
 	const struct matrix_keypad_platform_data *pdata;
@@ -37,8 +40,6 @@
 	bool scan_pending;
 	bool stopped;
 	bool gpio_all_disabled;
-
-	unsigned short keycodes[];
 };
 
 /*
@@ -118,6 +119,7 @@
 	struct matrix_keypad *keypad =
 		container_of(work, struct matrix_keypad, work.work);
 	struct input_dev *input_dev = keypad->input_dev;
+	const unsigned short *keycodes = input_dev->keycode;
 	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
 	uint32_t new_state[MATRIX_MAX_COLS];
 	int row, col, code;
@@ -153,7 +155,7 @@
 			code = MATRIX_SCAN_CODE(row, col, keypad->row_shift);
 			input_event(input_dev, EV_MSC, MSC_SCAN, code);
 			input_report_key(input_dev,
-					 keypad->keycodes[code],
+					 keycodes[code],
 					 new_state[col] & (1 << row));
 		}
 	}
@@ -299,8 +301,8 @@
 static SIMPLE_DEV_PM_OPS(matrix_keypad_pm_ops,
 			 matrix_keypad_suspend, matrix_keypad_resume);
 
-static int __devinit matrix_keypad_init_gpio(struct platform_device *pdev,
-					     struct matrix_keypad *keypad)
+static int matrix_keypad_init_gpio(struct platform_device *pdev,
+				   struct matrix_keypad *keypad)
 {
 	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
 	int i, err;
@@ -394,33 +396,95 @@
 		gpio_free(pdata->col_gpios[i]);
 }
 
-static int __devinit matrix_keypad_probe(struct platform_device *pdev)
+#ifdef CONFIG_OF
+static struct matrix_keypad_platform_data *
+matrix_keypad_parse_dt(struct device *dev)
 {
-	const struct matrix_keypad_platform_data *pdata;
-	const struct matrix_keymap_data *keymap_data;
-	struct matrix_keypad *keypad;
-	struct input_dev *input_dev;
-	unsigned int row_shift;
-	size_t keymap_size;
-	int err;
+	struct matrix_keypad_platform_data *pdata;
+	struct device_node *np = dev->of_node;
+	unsigned int *gpios;
+	int i;
 
-	pdata = pdev->dev.platform_data;
-	if (!pdata) {
-		dev_err(&pdev->dev, "no platform data defined\n");
-		return -EINVAL;
+	if (!np) {
+		dev_err(dev, "device lacks DT data\n");
+		return ERR_PTR(-ENODEV);
 	}
 
-	keymap_data = pdata->keymap_data;
-	if (!keymap_data) {
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		dev_err(dev, "could not allocate memory for platform data\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pdata->num_row_gpios = of_gpio_named_count(np, "row-gpios");
+	pdata->num_col_gpios = of_gpio_named_count(np, "col-gpios");
+	if (!pdata->num_row_gpios || !pdata->num_col_gpios) {
+		dev_err(dev, "number of keypad rows/columns not specified\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (of_get_property(np, "linux,no-autorepeat", NULL))
+		pdata->no_autorepeat = true;
+	if (of_get_property(np, "linux,wakeup", NULL))
+		pdata->wakeup = true;
+	if (of_get_property(np, "gpio-activelow", NULL))
+		pdata->active_low = true;
+
+	of_property_read_u32(np, "debounce-delay-ms", &pdata->debounce_ms);
+	of_property_read_u32(np, "col-scan-delay-us",
+						&pdata->col_scan_delay_us);
+
+	gpios = devm_kzalloc(dev,
+			     sizeof(unsigned int) *
+				(pdata->num_row_gpios + pdata->num_col_gpios),
+			     GFP_KERNEL);
+	if (!gpios) {
+		dev_err(dev, "could not allocate memory for gpios\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < pdata->num_row_gpios; i++)
+		gpios[i] = of_get_named_gpio(np, "row-gpios", i);
+
+	for (i = 0; i < pdata->num_col_gpios; i++)
+		gpios[pdata->num_row_gpios + i] =
+			of_get_named_gpio(np, "col-gpios", i);
+
+	pdata->row_gpios = gpios;
+	pdata->col_gpios = &gpios[pdata->num_row_gpios];
+
+	return pdata;
+}
+#else
+static inline struct matrix_keypad_platform_data *
+matrix_keypad_parse_dt(struct device *dev)
+{
+	dev_err(dev, "no platform data defined\n");
+
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
+static int matrix_keypad_probe(struct platform_device *pdev)
+{
+	const struct matrix_keypad_platform_data *pdata;
+	struct matrix_keypad *keypad;
+	struct input_dev *input_dev;
+	int err;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		pdata = matrix_keypad_parse_dt(&pdev->dev);
+		if (IS_ERR(pdata)) {
+			dev_err(&pdev->dev, "no platform data defined\n");
+			return PTR_ERR(pdata);
+		}
+	} else if (!pdata->keymap_data) {
 		dev_err(&pdev->dev, "no keymap data defined\n");
 		return -EINVAL;
 	}
 
-	row_shift = get_count_order(pdata->num_col_gpios);
-	keymap_size = (pdata->num_row_gpios << row_shift) *
-			sizeof(keypad->keycodes[0]);
-	keypad = kzalloc(sizeof(struct matrix_keypad) + keymap_size,
-			 GFP_KERNEL);
+	keypad = kzalloc(sizeof(struct matrix_keypad), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!keypad || !input_dev) {
 		err = -ENOMEM;
@@ -429,7 +493,7 @@
 
 	keypad->input_dev = input_dev;
 	keypad->pdata = pdata;
-	keypad->row_shift = row_shift;
+	keypad->row_shift = get_count_order(pdata->num_col_gpios);
 	keypad->stopped = true;
 	INIT_DELAYED_WORK(&keypad->work, matrix_keypad_scan);
 	spin_lock_init(&keypad->lock);
@@ -440,12 +504,14 @@
 	input_dev->open		= matrix_keypad_start;
 	input_dev->close	= matrix_keypad_stop;
 
-	err = matrix_keypad_build_keymap(keymap_data, NULL,
+	err = matrix_keypad_build_keymap(pdata->keymap_data, NULL,
 					 pdata->num_row_gpios,
 					 pdata->num_col_gpios,
-					 keypad->keycodes, input_dev);
-	if (err)
+					 NULL, input_dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to build keymap\n");
 		goto err_free_mem;
+	}
 
 	if (!pdata->no_autorepeat)
 		__set_bit(EV_REP, input_dev->evbit);
@@ -473,7 +539,7 @@
 	return err;
 }
 
-static int __devexit matrix_keypad_remove(struct platform_device *pdev)
+static int matrix_keypad_remove(struct platform_device *pdev)
 {
 	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
 
@@ -488,13 +554,22 @@
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id matrix_keypad_dt_match[] = {
+	{ .compatible = "gpio-matrix-keypad" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, matrix_keypad_dt_match);
+#endif
+
 static struct platform_driver matrix_keypad_driver = {
 	.probe		= matrix_keypad_probe,
-	.remove		= __devexit_p(matrix_keypad_remove),
+	.remove		= matrix_keypad_remove,
 	.driver		= {
 		.name	= "matrix-keypad",
 		.owner	= THIS_MODULE,
 		.pm	= &matrix_keypad_pm_ops,
+		.of_match_table = of_match_ptr(matrix_keypad_dt_match),
 	},
 };
 module_platform_driver(matrix_keypad_driver);

diff --git a/drivers/input/keyboard/max7359_keypad.c b/drivers/input/keyboard/max7359_keypad.c
index 8edada8..7c7af2b 100644
--- a/drivers/input/keyboard/max7359_keypad.c
+++ b/drivers/input/keyboard/max7359_keypad.c

@@ -179,7 +179,7 @@
 	max7359_fall_deepsleep(client);
 }
 
-static int __devinit max7359_probe(struct i2c_client *client,
+static int max7359_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	const struct matrix_keymap_data *keymap_data = client->dev.platform_data;
@@ -260,7 +260,7 @@
 	return error;
 }
 
-static int __devexit max7359_remove(struct i2c_client *client)
+static int max7359_remove(struct i2c_client *client)
 {
 	struct max7359_keypad *keypad = i2c_get_clientdata(client);
 
@@ -312,7 +312,7 @@
 		.pm   = &max7359_pm,
 	},
 	.probe		= max7359_probe,
-	.remove		= __devexit_p(max7359_remove),
+	.remove		= max7359_remove,
 	.id_table	= max7359_ids,
 };
 

diff --git a/drivers/input/keyboard/mcs_touchkey.c b/drivers/input/keyboard/mcs_touchkey.c
index 0d77f6c..7c236f9 100644
--- a/drivers/input/keyboard/mcs_touchkey.c
+++ b/drivers/input/keyboard/mcs_touchkey.c

@@ -97,7 +97,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit mcs_touchkey_probe(struct i2c_client *client,
+static int mcs_touchkey_probe(struct i2c_client *client,
 		const struct i2c_device_id *id)
 {
 	const struct mcs_platform_data *pdata;
@@ -200,7 +200,7 @@
 	return error;
 }
 
-static int __devexit mcs_touchkey_remove(struct i2c_client *client)
+static int mcs_touchkey_remove(struct i2c_client *client)
 {
 	struct mcs_touchkey_data *data = i2c_get_clientdata(client);
 
@@ -270,7 +270,7 @@
 		.pm	= &mcs_touchkey_pm_ops,
 	},
 	.probe		= mcs_touchkey_probe,
-	.remove		= __devexit_p(mcs_touchkey_remove),
+	.remove		= mcs_touchkey_remove,
 	.shutdown       = mcs_touchkey_shutdown,
 	.id_table	= mcs_touchkey_id,
 };

diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c
index 7613f1c..f7f3e9a 100644
--- a/drivers/input/keyboard/mpr121_touchkey.c
+++ b/drivers/input/keyboard/mpr121_touchkey.c

@@ -71,7 +71,7 @@
 	u8 val;
 };
 
-static const struct mpr121_init_register init_reg_table[] __devinitconst = {
+static const struct mpr121_init_register init_reg_table[] = {
 	{ MHD_RISING_ADDR,	0x1 },
 	{ NHD_RISING_ADDR,	0x1 },
 	{ MHD_FALLING_ADDR,	0x1 },
@@ -123,7 +123,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit mpr121_phys_init(const struct mpr121_platform_data *pdata,
+static int mpr121_phys_init(const struct mpr121_platform_data *pdata,
 				      struct mpr121_touchkey *mpr121,
 				      struct i2c_client *client)
 {
@@ -185,8 +185,8 @@
 	return ret;
 }
 
-static int __devinit mpr_touchkey_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+static int mpr_touchkey_probe(struct i2c_client *client,
+			      const struct i2c_device_id *id)
 {
 	const struct mpr121_platform_data *pdata = client->dev.platform_data;
 	struct mpr121_touchkey *mpr121;
@@ -272,7 +272,7 @@
 	return error;
 }
 
-static int __devexit mpr_touchkey_remove(struct i2c_client *client)
+static int mpr_touchkey_remove(struct i2c_client *client)
 {
 	struct mpr121_touchkey *mpr121 = i2c_get_clientdata(client);
 
@@ -327,7 +327,7 @@
 	},
 	.id_table	= mpr121_id,
 	.probe		= mpr_touchkey_probe,
-	.remove		= __devexit_p(mpr_touchkey_remove),
+	.remove		= mpr_touchkey_remove,
 };
 
 module_i2c_driver(mpr_touchkey_driver);

diff --git a/drivers/input/keyboard/nomadik-ske-keypad.c b/drivers/input/keyboard/nomadik-ske-keypad.c
index 49f5fa6..0e6a815 100644
--- a/drivers/input/keyboard/nomadik-ske-keypad.c
+++ b/drivers/input/keyboard/nomadik-ske-keypad.c

@@ -67,6 +67,7 @@
 	const struct ske_keypad_platform_data *board;
 	unsigned short keymap[SKE_KPD_NUM_ROWS * SKE_KPD_NUM_COLS];
 	struct clk *clk;
+	struct clk *pclk;
 	spinlock_t ske_keypad_lock;
 };
 
@@ -271,11 +272,18 @@
 		goto err_free_mem_region;
 	}
 
+	keypad->pclk = clk_get(&pdev->dev, "apb_pclk");
+	if (IS_ERR(keypad->pclk)) {
+		dev_err(&pdev->dev, "failed to get pclk\n");
+		error = PTR_ERR(keypad->pclk);
+		goto err_iounmap;
+	}
+
 	keypad->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(keypad->clk)) {
 		dev_err(&pdev->dev, "failed to get clk\n");
 		error = PTR_ERR(keypad->clk);
-		goto err_iounmap;
+		goto err_pclk;
 	}
 
 	input->id.bustype = BUS_HOST;
@@ -287,14 +295,25 @@
 					   keypad->keymap, input);
 	if (error) {
 		dev_err(&pdev->dev, "Failed to build keymap\n");
-		goto err_iounmap;
+		goto err_clk;
 	}
 
 	input_set_capability(input, EV_MSC, MSC_SCAN);
 	if (!plat->no_autorepeat)
 		__set_bit(EV_REP, input->evbit);
 
-	clk_enable(keypad->clk);
+	error = clk_prepare_enable(keypad->pclk);
+	if (error) {
+		dev_err(&pdev->dev, "Failed to prepare/enable pclk\n");
+		goto err_clk;
+	}
+
+	error = clk_prepare_enable(keypad->clk);
+	if (error) {
+		dev_err(&pdev->dev, "Failed to prepare/enable clk\n");
+		goto err_pclk_disable;
+	}
+
 
 	/* go through board initialization helpers */
 	if (keypad->board->init)
@@ -330,8 +349,13 @@
 err_free_irq:
 	free_irq(keypad->irq, keypad);
 err_clk_disable:
-	clk_disable(keypad->clk);
+	clk_disable_unprepare(keypad->clk);
+err_pclk_disable:
+	clk_disable_unprepare(keypad->pclk);
+err_clk:
 	clk_put(keypad->clk);
+err_pclk:
+	clk_put(keypad->pclk);
 err_iounmap:
 	iounmap(keypad->reg_base);
 err_free_mem_region:
@@ -342,7 +366,7 @@
 	return error;
 }
 
-static int __devexit ske_keypad_remove(struct platform_device *pdev)
+static int ske_keypad_remove(struct platform_device *pdev)
 {
 	struct ske_keypad *keypad = platform_get_drvdata(pdev);
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -351,7 +375,7 @@
 
 	input_unregister_device(keypad->input);
 
-	clk_disable(keypad->clk);
+	clk_disable_unprepare(keypad->clk);
 	clk_put(keypad->clk);
 
 	if (keypad->board->exit)
@@ -403,7 +427,7 @@
 		.owner  = THIS_MODULE,
 		.pm = &ske_keypad_dev_pm_ops,
 	},
-	.remove = __devexit_p(ske_keypad_remove),
+	.remove = ske_keypad_remove,
 };
 
 static int __init ske_keypad_init(void)

diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c
index 4a5fcc8..d0d5226 100644
--- a/drivers/input/keyboard/omap-keypad.c
+++ b/drivers/input/keyboard/omap-keypad.c

@@ -244,7 +244,7 @@
 #define omap_kp_resume	NULL
 #endif
 
-static int __devinit omap_kp_probe(struct platform_device *pdev)
+static int omap_kp_probe(struct platform_device *pdev)
 {
 	struct omap_kp *omap_kp;
 	struct input_dev *input_dev;
@@ -357,7 +357,7 @@
 	return -EINVAL;
 }
 
-static int __devexit omap_kp_remove(struct platform_device *pdev)
+static int omap_kp_remove(struct platform_device *pdev)
 {
 	struct omap_kp *omap_kp = platform_get_drvdata(pdev);
 
@@ -379,7 +379,7 @@
 
 static struct platform_driver omap_kp_driver = {
 	.probe		= omap_kp_probe,
-	.remove		= __devexit_p(omap_kp_remove),
+	.remove		= omap_kp_remove,
 	.suspend	= omap_kp_suspend,
 	.resume		= omap_kp_resume,
 	.driver		= {

diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index c05f98c..e25b022 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c

@@ -211,8 +211,8 @@
 }
 
 #ifdef CONFIG_OF
-static int __devinit omap4_keypad_parse_dt(struct device *dev,
-					   struct omap4_keypad *keypad_data)
+static int omap4_keypad_parse_dt(struct device *dev,
+				 struct omap4_keypad *keypad_data)
 {
 	struct device_node *np = dev->of_node;
 
@@ -241,7 +241,7 @@
 }
 #endif
 
-static int __devinit omap4_keypad_probe(struct platform_device *pdev)
+static int omap4_keypad_probe(struct platform_device *pdev)
 {
 	const struct omap4_keypad_platform_data *pdata =
 				dev_get_platdata(&pdev->dev);
@@ -406,7 +406,7 @@
 	return error;
 }
 
-static int __devexit omap4_keypad_remove(struct platform_device *pdev)
+static int omap4_keypad_remove(struct platform_device *pdev)
 {
 	struct omap4_keypad *keypad_data = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -440,7 +440,7 @@
 
 static struct platform_driver omap4_keypad_driver = {
 	.probe		= omap4_keypad_probe,
-	.remove		= __devexit_p(omap4_keypad_remove),
+	.remove		= omap4_keypad_remove,
 	.driver		= {
 		.name	= "omap4-keypad",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/opencores-kbd.c b/drivers/input/keyboard/opencores-kbd.c
index abe728c..7ac5f17 100644
--- a/drivers/input/keyboard/opencores-kbd.c
+++ b/drivers/input/keyboard/opencores-kbd.c

@@ -37,7 +37,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit opencores_kbd_probe(struct platform_device *pdev)
+static int opencores_kbd_probe(struct platform_device *pdev)
 {
 	struct input_dev *input;
 	struct opencores_kbd *opencores_kbd;
@@ -139,7 +139,7 @@
 	return error;
 }
 
-static int __devexit opencores_kbd_remove(struct platform_device *pdev)
+static int opencores_kbd_remove(struct platform_device *pdev)
 {
 	struct opencores_kbd *opencores_kbd = platform_get_drvdata(pdev);
 
@@ -158,7 +158,7 @@
 
 static struct platform_driver opencores_kbd_device_driver = {
 	.probe    = opencores_kbd_probe,
-	.remove   = __devexit_p(opencores_kbd_remove),
+	.remove   = opencores_kbd_remove,
 	.driver   = {
 		.name = "opencores-kbd",
 	},

diff --git a/drivers/input/keyboard/pmic8xxx-keypad.c b/drivers/input/keyboard/pmic8xxx-keypad.c
index 52c3465..74339e1 100644
--- a/drivers/input/keyboard/pmic8xxx-keypad.c
+++ b/drivers/input/keyboard/pmic8xxx-keypad.c

@@ -397,7 +397,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit pmic8xxx_kpd_init(struct pmic8xxx_kp *kp)
+static int pmic8xxx_kpd_init(struct pmic8xxx_kp *kp)
 {
 	int bits, rc, cycles;
 	u8 scan_val = 0, ctrl_val = 0;
@@ -447,7 +447,7 @@
 
 }
 
-static int  __devinit pmic8xxx_kp_config_gpio(int gpio_start, int num_gpios,
+static int  pmic8xxx_kp_config_gpio(int gpio_start, int num_gpios,
 			struct pmic8xxx_kp *kp, struct pm_gpio *gpio_config)
 {
 	int	rc, i;
@@ -518,7 +518,7 @@
  * - set irq edge type.
  * - enable the keypad controller.
  */
-static int __devinit pmic8xxx_kp_probe(struct platform_device *pdev)
+static int pmic8xxx_kp_probe(struct platform_device *pdev)
 {
 	const struct pm8xxx_keypad_platform_data *pdata =
 					dev_get_platdata(&pdev->dev);
@@ -712,7 +712,7 @@
 	return rc;
 }
 
-static int __devexit pmic8xxx_kp_remove(struct platform_device *pdev)
+static int pmic8xxx_kp_remove(struct platform_device *pdev)
 {
 	struct pmic8xxx_kp *kp = platform_get_drvdata(pdev);
 
@@ -773,7 +773,7 @@
 
 static struct platform_driver pmic8xxx_kp_driver = {
 	.probe		= pmic8xxx_kp_probe,
-	.remove		= __devexit_p(pmic8xxx_kp_remove),
+	.remove		= pmic8xxx_kp_remove,
 	.driver		= {
 		.name = PM8XXX_KEYPAD_DEV_NAME,
 		.owner = THIS_MODULE,

diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c
index cad9d5d..5330d8f 100644
--- a/drivers/input/keyboard/pxa27x_keypad.c
+++ b/drivers/input/keyboard/pxa27x_keypad.c

@@ -482,7 +482,7 @@
 };
 #endif
 
-static int __devinit pxa27x_keypad_probe(struct platform_device *pdev)
+static int pxa27x_keypad_probe(struct platform_device *pdev)
 {
 	struct pxa27x_keypad_platform_data *pdata = pdev->dev.platform_data;
 	struct pxa27x_keypad *keypad;
@@ -595,7 +595,7 @@
 	return error;
 }
 
-static int __devexit pxa27x_keypad_remove(struct platform_device *pdev)
+static int pxa27x_keypad_remove(struct platform_device *pdev)
 {
 	struct pxa27x_keypad *keypad = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -620,7 +620,7 @@
 
 static struct platform_driver pxa27x_keypad_driver = {
 	.probe		= pxa27x_keypad_probe,
-	.remove		= __devexit_p(pxa27x_keypad_remove),
+	.remove		= pxa27x_keypad_remove,
 	.driver		= {
 		.name	= "pxa27x-keypad",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/pxa930_rotary.c b/drivers/input/keyboard/pxa930_rotary.c
index 41488f9..bcad95b 100644
--- a/drivers/input/keyboard/pxa930_rotary.c
+++ b/drivers/input/keyboard/pxa930_rotary.c

@@ -82,7 +82,7 @@
 	clear_sbcr(r);
 }
 
-static int __devinit pxa930_rotary_probe(struct platform_device *pdev)
+static int pxa930_rotary_probe(struct platform_device *pdev)
 {
 	struct pxa930_rotary_platform_data *pdata = pdev->dev.platform_data;
 	struct pxa930_rotary *r;
@@ -174,7 +174,7 @@
 	return err;
 }
 
-static int __devexit pxa930_rotary_remove(struct platform_device *pdev)
+static int pxa930_rotary_remove(struct platform_device *pdev)
 {
 	struct pxa930_rotary *r = platform_get_drvdata(pdev);
 
@@ -193,7 +193,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= pxa930_rotary_probe,
-	.remove		= __devexit_p(pxa930_rotary_remove),
+	.remove		= pxa930_rotary_remove,
 };
 module_platform_driver(pxa930_rotary_driver);
 

diff --git a/drivers/input/keyboard/qt1070.c b/drivers/input/keyboard/qt1070.c
index ca68f29..42b773b 100644
--- a/drivers/input/keyboard/qt1070.c
+++ b/drivers/input/keyboard/qt1070.c

@@ -91,7 +91,7 @@
 	return ret;
 }
 
-static bool __devinit qt1070_identify(struct i2c_client *client)
+static bool qt1070_identify(struct i2c_client *client)
 {
 	int id, ver;
 
@@ -140,7 +140,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit qt1070_probe(struct i2c_client *client,
+static int qt1070_probe(struct i2c_client *client,
 				const struct i2c_device_id *id)
 {
 	struct qt1070_data *data;
@@ -230,7 +230,7 @@
 	return err;
 }
 
-static int __devexit qt1070_remove(struct i2c_client *client)
+static int qt1070_remove(struct i2c_client *client)
 {
 	struct qt1070_data *data = i2c_get_clientdata(client);
 
@@ -256,7 +256,7 @@
 	},
 	.id_table	= qt1070_id,
 	.probe		= qt1070_probe,
-	.remove		= __devexit_p(qt1070_remove),
+	.remove		= qt1070_remove,
 };
 
 module_i2c_driver(qt1070_driver);

diff --git a/drivers/input/keyboard/qt2160.c b/drivers/input/keyboard/qt2160.c
index 76b7d43..3dc2b0f 100644
--- a/drivers/input/keyboard/qt2160.c
+++ b/drivers/input/keyboard/qt2160.c

@@ -183,7 +183,7 @@
 	qt2160_schedule_read(qt2160);
 }
 
-static int __devinit qt2160_read(struct i2c_client *client, u8 reg)
+static int qt2160_read(struct i2c_client *client, u8 reg)
 {
 	int ret;
 
@@ -204,29 +204,20 @@
 	return ret;
 }
 
-static int __devinit qt2160_write(struct i2c_client *client, u8 reg, u8 data)
+static int qt2160_write(struct i2c_client *client, u8 reg, u8 data)
 {
-	int error;
+	int ret;
 
-	error = i2c_smbus_write_byte(client, reg);
-	if (error) {
+	ret = i2c_smbus_write_byte_data(client, reg, data);
+	if (ret < 0)
 		dev_err(&client->dev,
-			"couldn't send request. Returned %d\n", error);
-		return error;
-	}
+			"couldn't write data. Returned %d\n", ret);
 
-	error = i2c_smbus_write_byte(client, data);
-	if (error) {
-		dev_err(&client->dev,
-			"couldn't write data. Returned %d\n", error);
-		return error;
-	}
-
-	return error;
+	return ret;
 }
 
 
-static bool __devinit qt2160_identify(struct i2c_client *client)
+static bool qt2160_identify(struct i2c_client *client)
 {
 	int id, ver, rev;
 
@@ -257,7 +248,7 @@
 	return true;
 }
 
-static int __devinit qt2160_probe(struct i2c_client *client,
+static int qt2160_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct qt2160_data *qt2160;
@@ -344,7 +335,7 @@
 	return error;
 }
 
-static int __devexit qt2160_remove(struct i2c_client *client)
+static int qt2160_remove(struct i2c_client *client)
 {
 	struct qt2160_data *qt2160 = i2c_get_clientdata(client);
 
@@ -375,7 +366,7 @@
 
 	.id_table	= qt2160_idtable,
 	.probe		= qt2160_probe,
-	.remove		= __devexit_p(qt2160_remove),
+	.remove		= qt2160_remove,
 };
 
 module_i2c_driver(qt2160_driver);

diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c
index 9d7a111..22e357b 100644
--- a/drivers/input/keyboard/samsung-keypad.c
+++ b/drivers/input/keyboard/samsung-keypad.c

@@ -309,7 +309,7 @@
 				struct samsung_keypad *keypad)
 {
 	struct device_node *np = dev->of_node;
-	int gpio, ret, row, col;
+	int gpio, error, row, col;
 
 	for (row = 0; row < keypad->rows; row++) {
 		gpio = of_get_named_gpio(np, "row-gpios", row);
@@ -320,10 +320,11 @@
 			continue;
 		}
 
-		ret = gpio_request(gpio, "keypad-row");
-		if (ret)
-			dev_err(dev, "keypad row[%d] gpio request failed\n",
-					row);
+		error = devm_gpio_request(dev, gpio, "keypad-row");
+		if (error)
+			dev_err(dev,
+				"keypad row[%d] gpio request failed: %d\n",
+				row, error);
 	}
 
 	for (col = 0; col < keypad->cols; col++) {
@@ -335,38 +336,22 @@
 			continue;
 		}
 
-		ret = gpio_request(gpio, "keypad-col");
-		if (ret)
-			dev_err(dev, "keypad column[%d] gpio request failed\n",
-					col);
+		error = devm_gpio_request(dev, gpio, "keypad-col");
+		if (error)
+			dev_err(dev,
+				"keypad column[%d] gpio request failed: %d\n",
+				col, error);
 	}
 }
-
-static void samsung_keypad_dt_gpio_free(struct samsung_keypad *keypad)
-{
-	int cnt;
-
-	for (cnt = 0; cnt < keypad->rows; cnt++)
-		if (gpio_is_valid(keypad->row_gpios[cnt]))
-			gpio_free(keypad->row_gpios[cnt]);
-
-	for (cnt = 0; cnt < keypad->cols; cnt++)
-		if (gpio_is_valid(keypad->col_gpios[cnt]))
-			gpio_free(keypad->col_gpios[cnt]);
-}
 #else
 static
 struct samsung_keypad_platdata *samsung_keypad_parse_dt(struct device *dev)
 {
 	return NULL;
 }
-
-static void samsung_keypad_dt_gpio_free(struct samsung_keypad *keypad)
-{
-}
 #endif
 
-static int __devinit samsung_keypad_probe(struct platform_device *pdev)
+static int samsung_keypad_probe(struct platform_device *pdev)
 {
 	const struct samsung_keypad_platdata *pdata;
 	const struct matrix_keymap_data *keymap_data;
@@ -405,36 +390,30 @@
 	row_shift = get_count_order(pdata->cols);
 	keymap_size = (pdata->rows << row_shift) * sizeof(keypad->keycodes[0]);
 
-	keypad = kzalloc(sizeof(*keypad) + keymap_size, GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!keypad || !input_dev) {
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad) + keymap_size,
+			      GFP_KERNEL);
+	input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!keypad || !input_dev)
+		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		error = -ENODEV;
-		goto err_free_mem;
-	}
+	if (!res)
+		return -ENODEV;
 
-	keypad->base = ioremap(res->start, resource_size(res));
-	if (!keypad->base) {
-		error = -EBUSY;
-		goto err_free_mem;
-	}
+	keypad->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+	if (!keypad->base)
+		return -EBUSY;
 
-	keypad->clk = clk_get(&pdev->dev, "keypad");
+	keypad->clk = devm_clk_get(&pdev->dev, "keypad");
 	if (IS_ERR(keypad->clk)) {
 		dev_err(&pdev->dev, "failed to get keypad clk\n");
-		error = PTR_ERR(keypad->clk);
-		goto err_unmap_base;
+		return PTR_ERR(keypad->clk);
 	}
 
 	error = clk_prepare(keypad->clk);
 	if (error) {
 		dev_err(&pdev->dev, "keypad clock prepare failed\n");
-		goto err_put_clk;
+		return error;
 	}
 
 	keypad->input_dev = input_dev;
@@ -479,14 +458,15 @@
 	keypad->irq = platform_get_irq(pdev, 0);
 	if (keypad->irq < 0) {
 		error = keypad->irq;
-		goto err_put_clk;
+		goto err_unprepare_clk;
 	}
 
-	error = request_threaded_irq(keypad->irq, NULL, samsung_keypad_irq,
-			IRQF_ONESHOT, dev_name(&pdev->dev), keypad);
+	error = devm_request_threaded_irq(&pdev->dev, keypad->irq, NULL,
+					  samsung_keypad_irq, IRQF_ONESHOT,
+					  dev_name(&pdev->dev), keypad);
 	if (error) {
 		dev_err(&pdev->dev, "failed to register keypad interrupt\n");
-		goto err_put_clk;
+		goto err_unprepare_clk;
 	}
 
 	device_init_wakeup(&pdev->dev, pdata->wakeup);
@@ -495,7 +475,7 @@
 
 	error = input_register_device(keypad->input_dev);
 	if (error)
-		goto err_free_irq;
+		goto err_disable_runtime_pm;
 
 	if (pdev->dev.of_node) {
 		devm_kfree(&pdev->dev, (void *)pdata->keymap_data->keymap);
@@ -504,26 +484,16 @@
 	}
 	return 0;
 
-err_free_irq:
-	free_irq(keypad->irq, keypad);
+err_disable_runtime_pm:
 	pm_runtime_disable(&pdev->dev);
 	device_init_wakeup(&pdev->dev, 0);
 	platform_set_drvdata(pdev, NULL);
 err_unprepare_clk:
 	clk_unprepare(keypad->clk);
-err_put_clk:
-	clk_put(keypad->clk);
-	samsung_keypad_dt_gpio_free(keypad);
-err_unmap_base:
-	iounmap(keypad->base);
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(keypad);
-
 	return error;
 }
 
-static int __devexit samsung_keypad_remove(struct platform_device *pdev)
+static int samsung_keypad_remove(struct platform_device *pdev)
 {
 	struct samsung_keypad *keypad = platform_get_drvdata(pdev);
 
@@ -533,18 +503,7 @@
 
 	input_unregister_device(keypad->input_dev);
 
-	/*
-	 * It is safe to free IRQ after unregistering device because
-	 * samsung_keypad_close will shut off interrupts.
-	 */
-	free_irq(keypad->irq, keypad);
-
 	clk_unprepare(keypad->clk);
-	clk_put(keypad->clk);
-	samsung_keypad_dt_gpio_free(keypad);
-
-	iounmap(keypad->base);
-	kfree(keypad);
 
 	return 0;
 }
@@ -685,7 +644,7 @@
 
 static struct platform_driver samsung_keypad_driver = {
 	.probe		= samsung_keypad_probe,
-	.remove		= __devexit_p(samsung_keypad_remove),
+	.remove		= samsung_keypad_remove,
 	.driver		= {
 		.name	= "samsung-keypad",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index da54ad5..fdb9eb2 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c

@@ -162,7 +162,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit sh_keysc_probe(struct platform_device *pdev)
+static int sh_keysc_probe(struct platform_device *pdev)
 {
 	struct sh_keysc_priv *priv;
 	struct sh_keysc_info *pdata;
@@ -272,7 +272,7 @@
 	return error;
 }
 
-static int __devexit sh_keysc_remove(struct platform_device *pdev)
+static int sh_keysc_remove(struct platform_device *pdev)
 {
 	struct sh_keysc_priv *priv = platform_get_drvdata(pdev);
 
@@ -331,7 +331,7 @@
 
 static struct platform_driver sh_keysc_device_driver = {
 	.probe		= sh_keysc_probe,
-	.remove		= __devexit_p(sh_keysc_remove),
+	.remove		= sh_keysc_remove,
 	.driver		= {
 		.name	= "sh_keysc",
 		.pm	= &sh_keysc_dev_pm_ops,

diff --git a/drivers/input/keyboard/spear-keyboard.c b/drivers/input/keyboard/spear-keyboard.c
index c7ca97f..695d237 100644
--- a/drivers/input/keyboard/spear-keyboard.c
+++ b/drivers/input/keyboard/spear-keyboard.c

@@ -55,15 +55,15 @@
 
 struct spear_kbd {
 	struct input_dev *input;
-	struct resource *res;
 	void __iomem *io_base;
 	struct clk *clk;
 	unsigned int irq;
 	unsigned int mode;
+	unsigned int suspended_rate;
 	unsigned short last_key;
 	unsigned short keycodes[NUM_ROWS * NUM_COLS];
 	bool rep;
-	unsigned int suspended_rate;
+	bool irq_wake_enabled;
 	u32 mode_ctl_reg;
 };
 
@@ -146,7 +146,7 @@
 }
 
 #ifdef CONFIG_OF
-static int __devinit spear_kbd_parse_dt(struct platform_device *pdev,
+static int spear_kbd_parse_dt(struct platform_device *pdev,
                                         struct spear_kbd *kbd)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -181,7 +181,7 @@
 }
 #endif
 
-static int __devinit spear_kbd_probe(struct platform_device *pdev)
+static int spear_kbd_probe(struct platform_device *pdev)
 {
 	struct kbd_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	const struct matrix_keymap_data *keymap = pdata ? pdata->keymap : NULL;
@@ -203,12 +203,16 @@
 		return irq;
 	}
 
-	kbd = kzalloc(sizeof(*kbd), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!kbd || !input_dev) {
-		dev_err(&pdev->dev, "out of memory\n");
-		error = -ENOMEM;
-		goto err_free_mem;
+	kbd = devm_kzalloc(&pdev->dev, sizeof(*kbd), GFP_KERNEL);
+	if (!kbd) {
+		dev_err(&pdev->dev, "not enough memory for driver data\n");
+		return -ENOMEM;
+	}
+
+	input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!input_dev) {
+		dev_err(&pdev->dev, "unable to allocate input device\n");
+		return -ENOMEM;
 	}
 
 	kbd->input = input_dev;
@@ -217,37 +221,25 @@
 	if (!pdata) {
 		error = spear_kbd_parse_dt(pdev, kbd);
 		if (error)
-			goto err_free_mem;
+			return error;
 	} else {
 		kbd->mode = pdata->mode;
 		kbd->rep = pdata->rep;
 		kbd->suspended_rate = pdata->suspended_rate;
 	}
 
-	kbd->res = request_mem_region(res->start, resource_size(res),
-				      pdev->name);
-	if (!kbd->res) {
-		dev_err(&pdev->dev, "keyboard region already claimed\n");
-		error = -EBUSY;
-		goto err_free_mem;
-	}
-
-	kbd->io_base = ioremap(res->start, resource_size(res));
+	kbd->io_base = devm_request_and_ioremap(&pdev->dev, res);
 	if (!kbd->io_base) {
-		dev_err(&pdev->dev, "ioremap failed for kbd_region\n");
-		error = -ENOMEM;
-		goto err_release_mem_region;
+		dev_err(&pdev->dev, "request-ioremap failed for kbd_region\n");
+		return -ENOMEM;
 	}
 
-	kbd->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(kbd->clk)) {
-		error = PTR_ERR(kbd->clk);
-		goto err_iounmap;
-	}
+	kbd->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(kbd->clk))
+		return PTR_ERR(kbd->clk);
 
 	input_dev->name = "Spear Keyboard";
 	input_dev->phys = "keyboard/input0";
-	input_dev->dev.parent = &pdev->dev;
 	input_dev->id.bustype = BUS_HOST;
 	input_dev->id.vendor = 0x0001;
 	input_dev->id.product = 0x0001;
@@ -259,7 +251,7 @@
 					   kbd->keycodes, input_dev);
 	if (error) {
 		dev_err(&pdev->dev, "Failed to build keymap\n");
-		goto err_put_clk;
+		return error;
 	}
 
 	if (kbd->rep)
@@ -268,48 +260,36 @@
 
 	input_set_drvdata(input_dev, kbd);
 
-	error = request_irq(irq, spear_kbd_interrupt, 0, "keyboard", kbd);
+	error = devm_request_irq(&pdev->dev, irq, spear_kbd_interrupt, 0,
+			"keyboard", kbd);
 	if (error) {
-		dev_err(&pdev->dev, "request_irq fail\n");
-		goto err_put_clk;
+		dev_err(&pdev->dev, "request_irq failed\n");
+		return error;
 	}
 
+	error = clk_prepare(kbd->clk);
+	if (error)
+		return error;
+
 	error = input_register_device(input_dev);
 	if (error) {
 		dev_err(&pdev->dev, "Unable to register keyboard device\n");
-		goto err_free_irq;
+		clk_unprepare(kbd->clk);
+		return error;
 	}
 
 	device_init_wakeup(&pdev->dev, 1);
 	platform_set_drvdata(pdev, kbd);
 
 	return 0;
-
-err_free_irq:
-	free_irq(kbd->irq, kbd);
-err_put_clk:
-	clk_put(kbd->clk);
-err_iounmap:
-	iounmap(kbd->io_base);
-err_release_mem_region:
-	release_mem_region(res->start, resource_size(res));
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(kbd);
-
-	return error;
 }
 
-static int __devexit spear_kbd_remove(struct platform_device *pdev)
+static int spear_kbd_remove(struct platform_device *pdev)
 {
 	struct spear_kbd *kbd = platform_get_drvdata(pdev);
 
-	free_irq(kbd->irq, kbd);
 	input_unregister_device(kbd->input);
-	clk_put(kbd->clk);
-	iounmap(kbd->io_base);
-	release_mem_region(kbd->res->start, resource_size(kbd->res));
-	kfree(kbd);
+	clk_unprepare(kbd->clk);
 
 	device_init_wakeup(&pdev->dev, 0);
 	platform_set_drvdata(pdev, NULL);
@@ -333,7 +313,8 @@
 	mode_ctl_reg = readl_relaxed(kbd->io_base + MODE_CTL_REG);
 
 	if (device_may_wakeup(&pdev->dev)) {
-		enable_irq_wake(kbd->irq);
+		if (!enable_irq_wake(kbd->irq))
+			kbd->irq_wake_enabled = true;
 
 		/*
 		 * reprogram the keyboard operating frequency as on some
@@ -379,7 +360,10 @@
 	mutex_lock(&input_dev->mutex);
 
 	if (device_may_wakeup(&pdev->dev)) {
-		disable_irq_wake(kbd->irq);
+		if (kbd->irq_wake_enabled) {
+			kbd->irq_wake_enabled = false;
+			disable_irq_wake(kbd->irq);
+		}
 	} else {
 		if (input_dev->users)
 			clk_enable(kbd->clk);
@@ -407,7 +391,7 @@
 
 static struct platform_driver spear_kbd_driver = {
 	.probe		= spear_kbd_probe,
-	.remove		= __devexit_p(spear_kbd_remove),
+	.remove		= spear_kbd_remove,
 	.driver		= {
 		.name	= "keyboard",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/stmpe-keypad.c b/drivers/input/keyboard/stmpe-keypad.c
index 470a877..5cbec56 100644
--- a/drivers/input/keyboard/stmpe-keypad.c
+++ b/drivers/input/keyboard/stmpe-keypad.c

@@ -166,7 +166,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit stmpe_keypad_altfunc_init(struct stmpe_keypad *keypad)
+static int stmpe_keypad_altfunc_init(struct stmpe_keypad *keypad)
 {
 	const struct stmpe_keypad_variant *variant = keypad->variant;
 	unsigned int col_gpios = variant->col_gpios;
@@ -207,7 +207,7 @@
 	return stmpe_set_altfunc(stmpe, pins, STMPE_BLOCK_KEYPAD);
 }
 
-static int __devinit stmpe_keypad_chip_init(struct stmpe_keypad *keypad)
+static int stmpe_keypad_chip_init(struct stmpe_keypad *keypad)
 {
 	const struct stmpe_keypad_platform_data *plat = keypad->plat;
 	const struct stmpe_keypad_variant *variant = keypad->variant;
@@ -257,105 +257,131 @@
 			      (plat->debounce_ms << 1));
 }
 
-static int __devinit stmpe_keypad_probe(struct platform_device *pdev)
+static void stmpe_keypad_fill_used_pins(struct stmpe_keypad *keypad)
+{
+	int row, col;
+
+	for (row = 0; row < STMPE_KEYPAD_MAX_ROWS; row++) {
+		for (col = 0; col < STMPE_KEYPAD_MAX_COLS; col++) {
+			int code = MATRIX_SCAN_CODE(row, col,
+						STMPE_KEYPAD_ROW_SHIFT);
+			if (keypad->keymap[code] != KEY_RESERVED) {
+				keypad->rows |= 1 << row;
+				keypad->cols |= 1 << col;
+			}
+		}
+	}
+}
+
+#ifdef CONFIG_OF
+static const struct stmpe_keypad_platform_data *
+stmpe_keypad_of_probe(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct stmpe_keypad_platform_data *plat;
+
+	if (!np)
+		return ERR_PTR(-ENODEV);
+
+	plat = devm_kzalloc(dev, sizeof(*plat), GFP_KERNEL);
+	if (!plat)
+		return ERR_PTR(-ENOMEM);
+
+	of_property_read_u32(np, "debounce-interval", &plat->debounce_ms);
+	of_property_read_u32(np, "st,scan-count", &plat->scan_count);
+
+	plat->no_autorepeat = of_property_read_bool(np, "st,no-autorepeat");
+
+	return plat;
+}
+#else
+static inline const struct stmpe_keypad_platform_data *
+stmpe_keypad_of_probe(struct device *dev)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
+static int stmpe_keypad_probe(struct platform_device *pdev)
 {
 	struct stmpe *stmpe = dev_get_drvdata(pdev->dev.parent);
-	struct stmpe_keypad_platform_data *plat;
+	const struct stmpe_keypad_platform_data *plat;
 	struct stmpe_keypad *keypad;
 	struct input_dev *input;
-	int ret;
+	int error;
 	int irq;
-	int i;
 
 	plat = stmpe->pdata->keypad;
-	if (!plat)
-		return -ENODEV;
+	if (!plat) {
+		plat = stmpe_keypad_of_probe(&pdev->dev);
+		if (IS_ERR(plat))
+			return PTR_ERR(plat);
+	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
 
-	keypad = kzalloc(sizeof(struct stmpe_keypad), GFP_KERNEL);
+	keypad = devm_kzalloc(&pdev->dev, sizeof(struct stmpe_keypad),
+			      GFP_KERNEL);
 	if (!keypad)
 		return -ENOMEM;
 
-	input = input_allocate_device();
-	if (!input) {
-		ret = -ENOMEM;
-		goto out_freekeypad;
-	}
+	input = devm_input_allocate_device(&pdev->dev);
+	if (!input)
+		return -ENOMEM;
 
 	input->name = "STMPE keypad";
 	input->id.bustype = BUS_I2C;
 	input->dev.parent = &pdev->dev;
 
-	ret = matrix_keypad_build_keymap(plat->keymap_data, NULL,
-					 STMPE_KEYPAD_MAX_ROWS,
-					 STMPE_KEYPAD_MAX_COLS,
-					 keypad->keymap, input);
-	if (ret)
-		goto out_freeinput;
+	error = matrix_keypad_build_keymap(plat->keymap_data, NULL,
+					   STMPE_KEYPAD_MAX_ROWS,
+					   STMPE_KEYPAD_MAX_COLS,
+					   keypad->keymap, input);
+	if (error)
+		return error;
 
 	input_set_capability(input, EV_MSC, MSC_SCAN);
 	if (!plat->no_autorepeat)
 		__set_bit(EV_REP, input->evbit);
 
-	for (i = 0; i < plat->keymap_data->keymap_size; i++) {
-		unsigned int key = plat->keymap_data->keymap[i];
-
-		keypad->cols |= 1 << KEY_COL(key);
-		keypad->rows |= 1 << KEY_ROW(key);
-	}
+	stmpe_keypad_fill_used_pins(keypad);
 
 	keypad->stmpe = stmpe;
 	keypad->plat = plat;
 	keypad->input = input;
 	keypad->variant = &stmpe_keypad_variants[stmpe->partnum];
 
-	ret = stmpe_keypad_chip_init(keypad);
-	if (ret < 0)
-		goto out_freeinput;
+	error = stmpe_keypad_chip_init(keypad);
+	if (error < 0)
+		return error;
 
-	ret = input_register_device(input);
-	if (ret) {
-		dev_err(&pdev->dev,
-			"unable to register input device: %d\n", ret);
-		goto out_freeinput;
+	error = devm_request_threaded_irq(&pdev->dev, irq,
+					  NULL, stmpe_keypad_irq,
+					  IRQF_ONESHOT, "stmpe-keypad", keypad);
+	if (error) {
+		dev_err(&pdev->dev, "unable to get irq: %d\n", error);
+		return error;
 	}
 
-	ret = request_threaded_irq(irq, NULL, stmpe_keypad_irq, IRQF_ONESHOT,
-				   "stmpe-keypad", keypad);
-	if (ret) {
-		dev_err(&pdev->dev, "unable to get irq: %d\n", ret);
-		goto out_unregisterinput;
+	error = input_register_device(input);
+	if (error) {
+		dev_err(&pdev->dev,
+			"unable to register input device: %d\n", error);
+		return error;
 	}
 
 	platform_set_drvdata(pdev, keypad);
 
 	return 0;
-
-out_unregisterinput:
-	input_unregister_device(input);
-	input = NULL;
-out_freeinput:
-	input_free_device(input);
-out_freekeypad:
-	kfree(keypad);
-	return ret;
 }
 
-static int __devexit stmpe_keypad_remove(struct platform_device *pdev)
+static int stmpe_keypad_remove(struct platform_device *pdev)
 {
 	struct stmpe_keypad *keypad = platform_get_drvdata(pdev);
-	struct stmpe *stmpe = keypad->stmpe;
-	int irq = platform_get_irq(pdev, 0);
 
-	stmpe_disable(stmpe, STMPE_BLOCK_KEYPAD);
-
-	free_irq(irq, keypad);
-	input_unregister_device(keypad->input);
-	platform_set_drvdata(pdev, NULL);
-	kfree(keypad);
+	stmpe_disable(keypad->stmpe, STMPE_BLOCK_KEYPAD);
 
 	return 0;
 }
@@ -364,7 +390,7 @@
 	.driver.name	= "stmpe-keypad",
 	.driver.owner	= THIS_MODULE,
 	.probe		= stmpe_keypad_probe,
-	.remove		= __devexit_p(stmpe_keypad_remove),
+	.remove		= stmpe_keypad_remove,
 };
 module_platform_driver(stmpe_keypad_driver);
 

diff --git a/drivers/input/keyboard/tc3589x-keypad.c b/drivers/input/keyboard/tc3589x-keypad.c
index 7d498e6..2fb0d76 100644
--- a/drivers/input/keyboard/tc3589x-keypad.c
+++ b/drivers/input/keyboard/tc3589x-keypad.c

@@ -299,7 +299,7 @@
 	tc3589x_keypad_disable(keypad);
 }
 
-static int __devinit tc3589x_keypad_probe(struct platform_device *pdev)
+static int tc3589x_keypad_probe(struct platform_device *pdev)
 {
 	struct tc3589x *tc3589x = dev_get_drvdata(pdev->dev.parent);
 	struct tc_keypad *keypad;
@@ -382,7 +382,7 @@
 	return error;
 }
 
-static int __devexit tc3589x_keypad_remove(struct platform_device *pdev)
+static int tc3589x_keypad_remove(struct platform_device *pdev)
 {
 	struct tc_keypad *keypad = platform_get_drvdata(pdev);
 	int irq = platform_get_irq(pdev, 0);
@@ -448,7 +448,7 @@
 		.pm	= &tc3589x_keypad_dev_pm_ops,
 	},
 	.probe	= tc3589x_keypad_probe,
-	.remove	= __devexit_p(tc3589x_keypad_remove),
+	.remove	= tc3589x_keypad_remove,
 };
 module_platform_driver(tc3589x_keypad_driver);
 

diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c
index c355cdd..bfc832c 100644
--- a/drivers/input/keyboard/tca6416-keypad.c
+++ b/drivers/input/keyboard/tca6416-keypad.c

@@ -166,7 +166,7 @@
 		disable_irq(chip->irqnum);
 }
 
-static int __devinit tca6416_setup_registers(struct tca6416_keypad_chip *chip)
+static int tca6416_setup_registers(struct tca6416_keypad_chip *chip)
 {
 	int error;
 
@@ -197,7 +197,7 @@
 	return 0;
 }
 
-static int __devinit tca6416_keypad_probe(struct i2c_client *client,
+static int tca6416_keypad_probe(struct i2c_client *client,
 				   const struct i2c_device_id *id)
 {
 	struct tca6416_keys_platform_data *pdata;
@@ -313,7 +313,7 @@
 	return error;
 }
 
-static int __devexit tca6416_keypad_remove(struct i2c_client *client)
+static int tca6416_keypad_remove(struct i2c_client *client)
 {
 	struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
 
@@ -361,7 +361,7 @@
 		.pm	= &tca6416_keypad_dev_pm_ops,
 	},
 	.probe		= tca6416_keypad_probe,
-	.remove		= __devexit_p(tca6416_keypad_remove),
+	.remove		= tca6416_keypad_remove,
 	.id_table	= tca6416_id,
 };
 

diff --git a/drivers/input/keyboard/tca8418_keypad.c b/drivers/input/keyboard/tca8418_keypad.c
index 893869b..50e9c5e 100644
--- a/drivers/input/keyboard/tca8418_keypad.c
+++ b/drivers/input/keyboard/tca8418_keypad.c

@@ -35,6 +35,7 @@
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/input/tca8418_keypad.h>
+#include <linux/of.h>
 
 /* TCA8418 hardware limits */
 #define TCA8418_MAX_ROWS	8
@@ -109,25 +110,11 @@
 #define KEY_EVENT_CODE		0x7f
 #define KEY_EVENT_VALUE		0x80
 
-
-static const struct i2c_device_id tca8418_id[] = {
-	{ TCA8418_NAME, 8418, },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, tca8418_id);
-
 struct tca8418_keypad {
-	unsigned int rows;
-	unsigned int cols;
-	unsigned int keypad_mask; /* Mask for keypad col/rol regs */
-	unsigned int irq;
-	unsigned int row_shift;
-
 	struct i2c_client *client;
 	struct input_dev *input;
 
-	/* Flexible array member, must be at end of struct */
-	unsigned short keymap[];
+	unsigned int row_shift;
 };
 
 /*
@@ -172,6 +159,8 @@
 
 static void tca8418_read_keypad(struct tca8418_keypad *keypad_data)
 {
+	struct input_dev *input = keypad_data->input;
+	unsigned short *keymap = input->keycode;
 	int error, col, row;
 	u8 reg, state, code;
 
@@ -190,9 +179,8 @@
 		col = (col) ? col - 1 : TCA8418_MAX_COLS - 1;
 
 		code = MATRIX_SCAN_CODE(row, col, keypad_data->row_shift);
-		input_event(keypad_data->input, EV_MSC, MSC_SCAN, code);
-		input_report_key(keypad_data->input,
-				keypad_data->keymap[code], state);
+		input_event(input, EV_MSC, MSC_SCAN, code);
+		input_report_key(input, keymap[code], state);
 
 		/* Read for next loop */
 		error = tca8418_read_byte(keypad_data, REG_KEY_EVENT_A, &reg);
@@ -202,7 +190,7 @@
 		dev_err(&keypad_data->client->dev,
 			"unable to read REG_KEY_EVENT_A\n");
 
-	input_sync(keypad_data->input);
+	input_sync(input);
 }
 
 /*
@@ -218,16 +206,18 @@
 	if (error) {
 		dev_err(&keypad_data->client->dev,
 			"unable to read REG_INT_STAT\n");
-		goto exit;
+		return IRQ_NONE;
 	}
 
+	if (!reg)
+		return IRQ_NONE;
+
 	if (reg & INT_STAT_OVR_FLOW_INT)
 		dev_warn(&keypad_data->client->dev, "overflow occurred\n");
 
 	if (reg & INT_STAT_K_INT)
 		tca8418_read_keypad(keypad_data);
 
-exit:
 	/* Clear all interrupts, even IRQs we didn't check (GPI, CAD, LCK) */
 	reg = 0xff;
 	error = tca8418_write_byte(keypad_data, REG_INT_STAT, reg);
@@ -241,7 +231,8 @@
 /*
  * Configure the TCA8418 for keypad operation
  */
-static int __devinit tca8418_configure(struct tca8418_keypad *keypad_data)
+static int tca8418_configure(struct tca8418_keypad *keypad_data,
+			     u32 rows, u32 cols)
 {
 	int reg, error;
 
@@ -253,9 +244,8 @@
 
 
 	/* Assemble a mask for row and column registers */
-	reg  =  ~(~0 << keypad_data->rows);
-	reg += (~(~0 << keypad_data->cols)) << 8;
-	keypad_data->keypad_mask = reg;
+	reg  =  ~(~0 << rows);
+	reg += (~(~0 << cols)) << 8;
 
 	/* Set registers to keypad mode */
 	error |= tca8418_write_byte(keypad_data, REG_KP_GPIO1, reg);
@@ -270,145 +260,144 @@
 	return error;
 }
 
-static int __devinit tca8418_keypad_probe(struct i2c_client *client,
+static int tca8418_keypad_probe(struct i2c_client *client,
 					  const struct i2c_device_id *id)
 {
+	struct device *dev = &client->dev;
 	const struct tca8418_keypad_platform_data *pdata =
-						client->dev.platform_data;
+						dev_get_platdata(dev);
 	struct tca8418_keypad *keypad_data;
 	struct input_dev *input;
+	const struct matrix_keymap_data *keymap_data = NULL;
+	u32 rows = 0, cols = 0;
+	bool rep = false;
+	bool irq_is_gpio = false;
+	int irq;
 	int error, row_shift, max_keys;
 
 	/* Copy the platform data */
-	if (!pdata) {
-		dev_dbg(&client->dev, "no platform data\n");
+	if (pdata) {
+		if (!pdata->keymap_data) {
+			dev_err(dev, "no keymap data defined\n");
+			return -EINVAL;
+		}
+		keymap_data = pdata->keymap_data;
+		rows = pdata->rows;
+		cols = pdata->cols;
+		rep  = pdata->rep;
+		irq_is_gpio = pdata->irq_is_gpio;
+	} else {
+		struct device_node *np = dev->of_node;
+		of_property_read_u32(np, "keypad,num-rows", &rows);
+		of_property_read_u32(np, "keypad,num-columns", &cols);
+		rep = of_property_read_bool(np, "keypad,autorepeat");
+	}
+
+	if (!rows || rows > TCA8418_MAX_ROWS) {
+		dev_err(dev, "invalid rows\n");
 		return -EINVAL;
 	}
 
-	if (!pdata->keymap_data) {
-		dev_err(&client->dev, "no keymap data defined\n");
-		return -EINVAL;
-	}
-
-	if (!pdata->rows || pdata->rows > TCA8418_MAX_ROWS) {
-		dev_err(&client->dev, "invalid rows\n");
-		return -EINVAL;
-	}
-
-	if (!pdata->cols || pdata->cols > TCA8418_MAX_COLS) {
-		dev_err(&client->dev, "invalid columns\n");
+	if (!cols || cols > TCA8418_MAX_COLS) {
+		dev_err(dev, "invalid columns\n");
 		return -EINVAL;
 	}
 
 	/* Check i2c driver capabilities */
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) {
-		dev_err(&client->dev, "%s adapter not supported\n",
+		dev_err(dev, "%s adapter not supported\n",
 			dev_driver_string(&client->adapter->dev));
 		return -ENODEV;
 	}
 
-	row_shift = get_count_order(pdata->cols);
-	max_keys = pdata->rows << row_shift;
+	row_shift = get_count_order(cols);
+	max_keys = rows << row_shift;
 
-	/* Allocate memory for keypad_data, keymap and input device */
-	keypad_data = kzalloc(sizeof(*keypad_data) +
-			max_keys * sizeof(keypad_data->keymap[0]), GFP_KERNEL);
+	/* Allocate memory for keypad_data and input device */
+	keypad_data = devm_kzalloc(dev, sizeof(*keypad_data), GFP_KERNEL);
 	if (!keypad_data)
 		return -ENOMEM;
 
-	keypad_data->rows = pdata->rows;
-	keypad_data->cols = pdata->cols;
 	keypad_data->client = client;
 	keypad_data->row_shift = row_shift;
 
 	/* Initialize the chip or fail if chip isn't present */
-	error = tca8418_configure(keypad_data);
+	error = tca8418_configure(keypad_data, rows, cols);
 	if (error < 0)
-		goto fail1;
+		return error;
 
 	/* Configure input device */
-	input = input_allocate_device();
-	if (!input) {
-		error = -ENOMEM;
-		goto fail1;
-	}
+	input = devm_input_allocate_device(dev);
+	if (!input)
+		return -ENOMEM;
+
 	keypad_data->input = input;
 
 	input->name = client->name;
-	input->dev.parent = &client->dev;
-
 	input->id.bustype = BUS_I2C;
 	input->id.vendor  = 0x0001;
 	input->id.product = 0x001;
 	input->id.version = 0x0001;
 
-	error = matrix_keypad_build_keymap(pdata->keymap_data, NULL,
-					   pdata->rows, pdata->cols,
-					   keypad_data->keymap, input);
+	error = matrix_keypad_build_keymap(keymap_data, NULL, rows, cols,
+					   NULL, input);
 	if (error) {
-		dev_dbg(&client->dev, "Failed to build keymap\n");
-		goto fail2;
+		dev_err(dev, "Failed to build keymap\n");
+		return error;
 	}
 
-	if (pdata->rep)
+	if (rep)
 		__set_bit(EV_REP, input->evbit);
 	input_set_capability(input, EV_MSC, MSC_SCAN);
 
 	input_set_drvdata(input, keypad_data);
 
-	if (pdata->irq_is_gpio)
-		client->irq = gpio_to_irq(client->irq);
+	irq = client->irq;
+	if (irq_is_gpio)
+		irq = gpio_to_irq(irq);
 
-	error = request_threaded_irq(client->irq, NULL, tca8418_irq_handler,
-				     IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				     client->name, keypad_data);
+	error = devm_request_threaded_irq(dev, irq, NULL, tca8418_irq_handler,
+					  IRQF_TRIGGER_FALLING |
+						IRQF_SHARED |
+						IRQF_ONESHOT,
+					  client->name, keypad_data);
 	if (error) {
-		dev_dbg(&client->dev,
-			"Unable to claim irq %d; error %d\n",
+		dev_err(dev, "Unable to claim irq %d; error %d\n",
 			client->irq, error);
-		goto fail2;
+		return error;
 	}
 
 	error = input_register_device(input);
 	if (error) {
-		dev_dbg(&client->dev,
-			"Unable to register input device, error: %d\n", error);
-		goto fail3;
+		dev_err(dev, "Unable to register input device, error: %d\n",
+			error);
+		return error;
 	}
 
-	i2c_set_clientdata(client, keypad_data);
-	return 0;
-
-fail3:
-	free_irq(client->irq, keypad_data);
-fail2:
-	input_free_device(input);
-fail1:
-	kfree(keypad_data);
-	return error;
-}
-
-static int __devexit tca8418_keypad_remove(struct i2c_client *client)
-{
-	struct tca8418_keypad *keypad_data = i2c_get_clientdata(client);
-
-	free_irq(keypad_data->client->irq, keypad_data);
-
-	input_unregister_device(keypad_data->input);
-
-	kfree(keypad_data);
-
 	return 0;
 }
 
+static const struct i2c_device_id tca8418_id[] = {
+	{ TCA8418_NAME, 8418, },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tca8418_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id tca8418_dt_ids[] __devinitconst = {
+	{ .compatible = "ti,tca8418", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tca8418_dt_ids);
+#endif
 
 static struct i2c_driver tca8418_keypad_driver = {
 	.driver = {
 		.name	= TCA8418_NAME,
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(tca8418_dt_ids),
 	},
 	.probe		= tca8418_keypad_probe,
-	.remove		= __devexit_p(tca8418_keypad_remove),
 	.id_table	= tca8418_id,
 };
 

diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index 5faaf25..c76f968 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c

@@ -87,7 +87,7 @@
 	struct clk *clk;
 };
 
-static const u32 tegra_kbc_default_keymap[] __devinitdata = {
+static const u32 tegra_kbc_default_keymap[] = {
 	KEY(0, 2, KEY_W),
 	KEY(0, 3, KEY_S),
 	KEY(0, 4, KEY_A),
@@ -223,7 +223,7 @@
 };
 
 static const
-struct matrix_keymap_data tegra_kbc_default_keymap_data __devinitdata = {
+struct matrix_keymap_data tegra_kbc_default_keymap_data = {
 	.keymap		= tegra_kbc_default_keymap,
 	.keymap_size	= ARRAY_SIZE(tegra_kbc_default_keymap),
 };
@@ -573,7 +573,7 @@
 	return tegra_kbc_stop(kbc);
 }
 
-static bool __devinit
+static bool
 tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
 			struct device *dev, unsigned int *num_rows)
 {
@@ -619,7 +619,7 @@
 }
 
 #ifdef CONFIG_OF
-static struct tegra_kbc_platform_data * __devinit tegra_kbc_dt_parse_pdata(
+static struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata(
 	struct platform_device *pdev)
 {
 	struct tegra_kbc_platform_data *pdata;
@@ -670,7 +670,7 @@
 }
 #endif
 
-static int __devinit tegra_kbd_setup_keymap(struct tegra_kbc *kbc)
+static int tegra_kbd_setup_keymap(struct tegra_kbc *kbc)
 {
 	const struct tegra_kbc_platform_data *pdata = kbc->pdata;
 	const struct matrix_keymap_data *keymap_data = pdata->keymap_data;
@@ -697,7 +697,7 @@
 	return retval;
 }
 
-static int __devinit tegra_kbc_probe(struct platform_device *pdev)
+static int tegra_kbc_probe(struct platform_device *pdev)
 {
 	const struct tegra_kbc_platform_data *pdata = pdev->dev.platform_data;
 	struct tegra_kbc *kbc;
@@ -838,7 +838,7 @@
 	return err;
 }
 
-static int __devexit tegra_kbc_remove(struct platform_device *pdev)
+static int tegra_kbc_remove(struct platform_device *pdev)
 {
 	struct tegra_kbc *kbc = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -954,7 +954,7 @@
 
 static struct platform_driver tegra_kbc_driver = {
 	.probe		= tegra_kbc_probe,
-	.remove		= __devexit_p(tegra_kbc_remove),
+	.remove		= tegra_kbc_remove,
 	.driver	= {
 		.name	= "tegra-kbc",
 		.owner  = THIS_MODULE,

diff --git a/drivers/input/keyboard/tnetv107x-keypad.c b/drivers/input/keyboard/tnetv107x-keypad.c
index 4c34f21..ee16350 100644
--- a/drivers/input/keyboard/tnetv107x-keypad.c
+++ b/drivers/input/keyboard/tnetv107x-keypad.c

@@ -153,7 +153,7 @@
 	clk_disable(kp->clk);
 }
 
-static int __devinit keypad_probe(struct platform_device *pdev)
+static int keypad_probe(struct platform_device *pdev)
 {
 	const struct matrix_keypad_platform_data *pdata;
 	const struct matrix_keymap_data *keymap_data;
@@ -301,7 +301,7 @@
 	return error;
 }
 
-static int __devexit keypad_remove(struct platform_device *pdev)
+static int keypad_remove(struct platform_device *pdev)
 {
 	struct keypad_data *kp = platform_get_drvdata(pdev);
 
@@ -319,7 +319,7 @@
 
 static struct platform_driver keypad_driver = {
 	.probe		= keypad_probe,
-	.remove		= __devexit_p(keypad_remove),
+	.remove		= keypad_remove,
 	.driver.name	= "tnetv107x-keypad",
 	.driver.owner	= THIS_MODULE,
 };

diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c
index a2c6f79..04f84fd 100644
--- a/drivers/input/keyboard/twl4030_keypad.c
+++ b/drivers/input/keyboard/twl4030_keypad.c

@@ -271,7 +271,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit twl4030_kp_program(struct twl4030_keypad *kp)
+static int twl4030_kp_program(struct twl4030_keypad *kp)
 {
 	u8 reg;
 	int i;
@@ -328,7 +328,7 @@
  * Registers keypad device with input subsystem
  * and configures TWL4030 keypad registers
  */
-static int __devinit twl4030_kp_probe(struct platform_device *pdev)
+static int twl4030_kp_probe(struct platform_device *pdev)
 {
 	struct twl4030_keypad_data *pdata = pdev->dev.platform_data;
 	const struct matrix_keymap_data *keymap_data;
@@ -432,7 +432,7 @@
 	return error;
 }
 
-static int __devexit twl4030_kp_remove(struct platform_device *pdev)
+static int twl4030_kp_remove(struct platform_device *pdev)
 {
 	struct twl4030_keypad *kp = platform_get_drvdata(pdev);
 
@@ -452,7 +452,7 @@
 
 static struct platform_driver twl4030_kp_driver = {
 	.probe		= twl4030_kp_probe,
-	.remove		= __devexit_p(twl4030_kp_remove),
+	.remove		= twl4030_kp_remove,
 	.driver		= {
 		.name	= "twl4030_keypad",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/keyboard/w90p910_keypad.c b/drivers/input/keyboard/w90p910_keypad.c
index e0f6cd1..ee163be 100644
--- a/drivers/input/keyboard/w90p910_keypad.c
+++ b/drivers/input/keyboard/w90p910_keypad.c

@@ -118,7 +118,7 @@
 	clk_disable(keypad->clk);
 }
 
-static int __devinit w90p910_keypad_probe(struct platform_device *pdev)
+static int w90p910_keypad_probe(struct platform_device *pdev)
 {
 	const struct w90p910_keypad_platform_data *pdata =
 						pdev->dev.platform_data;
@@ -234,7 +234,7 @@
 	return error;
 }
 
-static int __devexit w90p910_keypad_remove(struct platform_device *pdev)
+static int w90p910_keypad_remove(struct platform_device *pdev)
 {
 	struct w90p910_keypad *keypad = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -257,7 +257,7 @@
 
 static struct platform_driver w90p910_keypad_driver = {
 	.probe		= w90p910_keypad_probe,
-	.remove		= __devexit_p(w90p910_keypad_remove),
+	.remove		= w90p910_keypad_remove,
 	.driver		= {
 		.name	= "nuc900-kpi",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/matrix-keymap.c b/drivers/input/matrix-keymap.c
index d88d9be..3ae496e 100644
--- a/drivers/input/matrix-keymap.c
+++ b/drivers/input/matrix-keymap.c

@@ -18,6 +18,7 @@
  */
 
 #include <linux/device.h>
+#include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/input.h>
@@ -123,6 +124,11 @@
  * it will attempt load the keymap from property specified by @keymap_name
  * argument (or "linux,keymap" if @keymap_name is %NULL).
  *
+ * If @keymap is %NULL the function will automatically allocate managed
+ * block of memory to store the keymap. This memory will be associated with
+ * the parent device and automatically freed when device unbinds from the
+ * driver.
+ *
  * Callers are expected to set up input_dev->dev.parent before calling this
  * function.
  */
@@ -133,12 +139,27 @@
 			       struct input_dev *input_dev)
 {
 	unsigned int row_shift = get_count_order(cols);
+	size_t max_keys = rows << row_shift;
 	int i;
 	int error;
 
+	if (WARN_ON(!input_dev->dev.parent))
+		return -EINVAL;
+
+	if (!keymap) {
+		keymap = devm_kzalloc(input_dev->dev.parent,
+				      max_keys * sizeof(*keymap),
+				      GFP_KERNEL);
+		if (!keymap) {
+			dev_err(input_dev->dev.parent,
+				"Unable to allocate memory for keymap");
+			return -ENOMEM;
+		}
+	}
+
 	input_dev->keycode = keymap;
 	input_dev->keycodesize = sizeof(*keymap);
-	input_dev->keycodemax = rows << row_shift;
+	input_dev->keycodemax = max_keys;
 
 	__set_bit(EV_KEY, input_dev->evbit);
 

diff --git a/drivers/input/misc/88pm80x_onkey.c b/drivers/input/misc/88pm80x_onkey.c
index 7f26e7b..ee43e5b 100644
--- a/drivers/input/misc/88pm80x_onkey.c
+++ b/drivers/input/misc/88pm80x_onkey.c

@@ -62,7 +62,7 @@
 static SIMPLE_DEV_PM_OPS(pm80x_onkey_pm_ops, pm80x_dev_suspend,
 			 pm80x_dev_resume);
 
-static int __devinit pm80x_onkey_probe(struct platform_device *pdev)
+static int pm80x_onkey_probe(struct platform_device *pdev)
 {
 
 	struct pm80x_chip *chip = dev_get_drvdata(pdev->dev.parent);
@@ -139,7 +139,7 @@
 	return err;
 }
 
-static int __devexit pm80x_onkey_remove(struct platform_device *pdev)
+static int pm80x_onkey_remove(struct platform_device *pdev)
 {
 	struct pm80x_onkey_info *info = platform_get_drvdata(pdev);
 
@@ -157,7 +157,7 @@
 		   .pm = &pm80x_onkey_pm_ops,
 		   },
 	.probe = pm80x_onkey_probe,
-	.remove = __devexit_p(pm80x_onkey_remove),
+	.remove = pm80x_onkey_remove,
 };
 
 module_platform_driver(pm80x_onkey_driver);

diff --git a/drivers/input/misc/88pm860x_onkey.c b/drivers/input/misc/88pm860x_onkey.c
index f9ce183..abd8453 100644
--- a/drivers/input/misc/88pm860x_onkey.c
+++ b/drivers/input/misc/88pm860x_onkey.c

@@ -56,7 +56,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit pm860x_onkey_probe(struct platform_device *pdev)
+static int pm860x_onkey_probe(struct platform_device *pdev)
 {
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
 	struct pm860x_onkey_info *info;
@@ -121,7 +121,7 @@
 	return ret;
 }
 
-static int __devexit pm860x_onkey_remove(struct platform_device *pdev)
+static int pm860x_onkey_remove(struct platform_device *pdev)
 {
 	struct pm860x_onkey_info *info = platform_get_drvdata(pdev);
 
@@ -161,7 +161,7 @@
 		.pm	= &pm860x_onkey_pm_ops,
 	},
 	.probe		= pm860x_onkey_probe,
-	.remove		= __devexit_p(pm860x_onkey_remove),
+	.remove		= pm860x_onkey_remove,
 };
 module_platform_driver(pm860x_onkey_driver);
 

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 104a7c3..259ef31 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig

@@ -300,8 +300,7 @@
 	  called ati_remote2.
 
 config INPUT_KEYSPAN_REMOTE
-	tristate "Keyspan DMR USB remote control (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "Keyspan DMR USB remote control"
 	depends on USB_ARCH_HAS_HCD
 	select USB
 	help
@@ -350,7 +349,6 @@
 
 config INPUT_YEALINK
 	tristate "Yealink usb-p1k voip phone"
-	depends on EXPERIMENTAL
 	depends on USB_ARCH_HAS_HCD
 	select USB
 	help
@@ -366,7 +364,6 @@
 
 config INPUT_CM109
 	tristate "C-Media CM109 USB I/O Controller"
-	depends on EXPERIMENTAL
 	depends on USB_ARCH_HAS_HCD
 	select USB
 	help
@@ -377,6 +374,16 @@
 	  To compile this driver as a module, choose M here: the module will be
 	  called cm109.
 
+config INPUT_RETU_PWRBUTTON
+	tristate "Retu Power button Driver"
+	depends on MFD_RETU
+	help
+	  Say Y here if you want to enable power key reporting via the
+	  Retu chips found in Nokia Internet Tablets (770, N800, N810).
+
+	  To compile this driver as a module, choose M here. The module will
+	  be called retu-pwrbutton.
+
 config INPUT_TWL4030_PWRBUTTON
 	tristate "TWL4030 Power button Driver"
 	depends on TWL4030_CORE
@@ -444,7 +451,7 @@
 
 config INPUT_PCF8574
 	tristate "PCF8574 Keypad input device"
-	depends on I2C && EXPERIMENTAL
+	depends on I2C
 	help
 	  Say Y here if you want to support a keypad connected via I2C
 	  with a PCF8574.
@@ -454,7 +461,7 @@
 
 config INPUT_PWM_BEEPER
 	tristate "PWM beeper support"
-	depends on HAVE_PWM
+	depends on HAVE_PWM || PWM
 	help
 	  Say Y here to get support for PWM based beeper devices.
 
@@ -496,6 +503,16 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called da9052_onkey.
 
+config INPUT_DA9055_ONKEY
+	tristate "Dialog Semiconductor DA9055 ONKEY"
+	depends on MFD_DA9055
+	help
+	  Support the ONKEY of DA9055 PMICs as an input device
+	  reporting power button status.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called da9055_onkey.
+
 config INPUT_DM355EVM
 	tristate "TI DaVinci DM355 EVM Keypad and IR Remote"
 	depends on MFD_DM355EVM_MSP

diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 5ea769e..1f1e1b1 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile

@@ -24,6 +24,7 @@
 obj-$(CONFIG_INPUT_CMA3000_I2C)		+= cma3000_d0x_i2c.o
 obj-$(CONFIG_INPUT_COBALT_BTNS)		+= cobalt_btns.o
 obj-$(CONFIG_INPUT_DA9052_ONKEY)	+= da9052_onkey.o
+obj-$(CONFIG_INPUT_DA9055_ONKEY)	+= da9055_onkey.o
 obj-$(CONFIG_INPUT_DM355EVM)		+= dm355evm_keys.o
 obj-$(CONFIG_INPUT_GP2A)		+= gp2ap002a00f.o
 obj-$(CONFIG_INPUT_GPIO_TILT_POLLED)	+= gpio_tilt_polled.o
@@ -46,6 +47,7 @@
 obj-$(CONFIG_INPUT_POWERMATE)		+= powermate.o
 obj-$(CONFIG_INPUT_PWM_BEEPER)		+= pwm-beeper.o
 obj-$(CONFIG_INPUT_RB532_BUTTON)	+= rb532_button.o
+obj-$(CONFIG_INPUT_RETU_PWRBUTTON)	+= retu-pwrbutton.o
 obj-$(CONFIG_INPUT_GPIO_ROTARY_ENCODER)	+= rotary_encoder.o
 obj-$(CONFIG_INPUT_SGI_BTNS)		+= sgi_btns.o
 obj-$(CONFIG_INPUT_SPARCSPKR)		+= sparcspkr.o

diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c
index 84ec691..2f090b4 100644
--- a/drivers/input/misc/ab8500-ponkey.c
+++ b/drivers/input/misc/ab8500-ponkey.c

@@ -45,7 +45,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit ab8500_ponkey_probe(struct platform_device *pdev)
+static int ab8500_ponkey_probe(struct platform_device *pdev)
 {
 	struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent);
 	struct ab8500_ponkey *ponkey;
@@ -118,7 +118,7 @@
 	return error;
 }
 
-static int __devexit ab8500_ponkey_remove(struct platform_device *pdev)
+static int ab8500_ponkey_remove(struct platform_device *pdev)
 {
 	struct ab8500_ponkey *ponkey = platform_get_drvdata(pdev);
 
@@ -146,7 +146,7 @@
 		.of_match_table = of_match_ptr(ab8500_ponkey_match),
 	},
 	.probe		= ab8500_ponkey_probe,
-	.remove		= __devexit_p(ab8500_ponkey_remove),
+	.remove		= ab8500_ponkey_remove,
 };
 module_platform_driver(ab8500_ponkey_driver);
 

diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c
index c8a7901..29d2064 100644
--- a/drivers/input/misc/ad714x-i2c.c
+++ b/drivers/input/misc/ad714x-i2c.c

@@ -72,7 +72,7 @@
 	return 0;
 }
 
-static int __devinit ad714x_i2c_probe(struct i2c_client *client,
+static int ad714x_i2c_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct ad714x_chip *chip;
@@ -87,7 +87,7 @@
 	return 0;
 }
 
-static int __devexit ad714x_i2c_remove(struct i2c_client *client)
+static int ad714x_i2c_remove(struct i2c_client *client)
 {
 	struct ad714x_chip *chip = i2c_get_clientdata(client);
 
@@ -112,7 +112,7 @@
 		.pm   = &ad714x_i2c_pm,
 	},
 	.probe    = ad714x_i2c_probe,
-	.remove   = __devexit_p(ad714x_i2c_remove),
+	.remove   = ad714x_i2c_remove,
 	.id_table = ad714x_id,
 };
 

diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
index 75f6136..bdccca4 100644
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c

@@ -83,7 +83,7 @@
 	return 0;
 }
 
-static int __devinit ad714x_spi_probe(struct spi_device *spi)
+static int ad714x_spi_probe(struct spi_device *spi)
 {
 	struct ad714x_chip *chip;
 	int err;
@@ -103,7 +103,7 @@
 	return 0;
 }
 
-static int __devexit ad714x_spi_remove(struct spi_device *spi)
+static int ad714x_spi_remove(struct spi_device *spi)
 {
 	struct ad714x_chip *chip = spi_get_drvdata(spi);
 
@@ -120,7 +120,7 @@
 		.pm	= &ad714x_spi_pm,
 	},
 	.probe		= ad714x_spi_probe,
-	.remove		= __devexit_p(ad714x_spi_remove),
+	.remove		= ad714x_spi_remove,
 };
 
 module_spi_driver(ad714x_spi_driver);

diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c
index dd1d1c1..535dda4 100644
--- a/drivers/input/misc/adxl34x-i2c.c
+++ b/drivers/input/misc/adxl34x-i2c.c

@@ -73,7 +73,7 @@
 	.read_block	= adxl34x_i2c_read_block,
 };
 
-static int __devinit adxl34x_i2c_probe(struct i2c_client *client,
+static int adxl34x_i2c_probe(struct i2c_client *client,
 				       const struct i2c_device_id *id)
 {
 	struct adxl34x *ac;
@@ -98,7 +98,7 @@
 	return 0;
 }
 
-static int __devexit adxl34x_i2c_remove(struct i2c_client *client)
+static int adxl34x_i2c_remove(struct i2c_client *client)
 {
 	struct adxl34x *ac = i2c_get_clientdata(client);
 
@@ -144,7 +144,7 @@
 		.pm = &adxl34x_i2c_pm,
 	},
 	.probe    = adxl34x_i2c_probe,
-	.remove   = __devexit_p(adxl34x_i2c_remove),
+	.remove   = adxl34x_i2c_remove,
 	.id_table = adxl34x_id,
 };
 

diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c
index 820a802..ad5f40d 100644
--- a/drivers/input/misc/adxl34x-spi.c
+++ b/drivers/input/misc/adxl34x-spi.c

@@ -65,7 +65,7 @@
 	.read_block	= adxl34x_spi_read_block,
 };
 
-static int __devinit adxl34x_spi_probe(struct spi_device *spi)
+static int adxl34x_spi_probe(struct spi_device *spi)
 {
 	struct adxl34x *ac;
 
@@ -87,7 +87,7 @@
 	return 0;
 }
 
-static int __devexit adxl34x_spi_remove(struct spi_device *spi)
+static int adxl34x_spi_remove(struct spi_device *spi)
 {
 	struct adxl34x *ac = dev_get_drvdata(&spi->dev);
 
@@ -126,7 +126,7 @@
 		.pm = &adxl34x_spi_pm,
 	},
 	.probe   = adxl34x_spi_probe,
-	.remove  = __devexit_p(adxl34x_spi_remove),
+	.remove  = adxl34x_spi_remove,
 };
 
 module_spi_driver(adxl34x_driver);

diff --git a/drivers/input/misc/bfin_rotary.c b/drivers/input/misc/bfin_rotary.c
index 1c4146f..a6666e1 100644
--- a/drivers/input/misc/bfin_rotary.c
+++ b/drivers/input/misc/bfin_rotary.c

@@ -90,7 +90,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit bfin_rotary_probe(struct platform_device *pdev)
+static int bfin_rotary_probe(struct platform_device *pdev)
 {
 	struct bfin_rotary_platform_data *pdata = pdev->dev.platform_data;
 	struct bfin_rot *rotary;
@@ -196,7 +196,7 @@
 	return error;
 }
 
-static int __devexit bfin_rotary_remove(struct platform_device *pdev)
+static int bfin_rotary_remove(struct platform_device *pdev)
 {
 	struct bfin_rot *rotary = platform_get_drvdata(pdev);
 
@@ -255,7 +255,7 @@
 
 static struct platform_driver bfin_rotary_device_driver = {
 	.probe		= bfin_rotary_probe,
-	.remove		= __devexit_p(bfin_rotary_remove),
+	.remove		= bfin_rotary_remove,
 	.driver		= {
 		.name	= "bfin-rotary",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/bma150.c b/drivers/input/misc/bma150.c
index e2f1e9f..08ffcab 100644
--- a/drivers/input/misc/bma150.c
+++ b/drivers/input/misc/bma150.c

@@ -158,7 +158,7 @@
  * are stated and verified by Bosch Sensortec where they are configured
  * to provide a generic sensitivity performance.
  */
-static struct bma150_cfg default_cfg __devinitdata = {
+static struct bma150_cfg default_cfg = {
 	.any_motion_int = 1,
 	.hg_int = 1,
 	.lg_int = 1,
@@ -224,7 +224,7 @@
 	return 0;
 }
 
-static int __devinit bma150_soft_reset(struct bma150_data *bma150)
+static int bma150_soft_reset(struct bma150_data *bma150)
 {
 	int error;
 
@@ -237,19 +237,19 @@
 	return 0;
 }
 
-static int __devinit bma150_set_range(struct bma150_data *bma150, u8 range)
+static int bma150_set_range(struct bma150_data *bma150, u8 range)
 {
 	return bma150_set_reg_bits(bma150->client, range, BMA150_RANGE_POS,
 				BMA150_RANGE_MSK, BMA150_RANGE_REG);
 }
 
-static int __devinit bma150_set_bandwidth(struct bma150_data *bma150, u8 bw)
+static int bma150_set_bandwidth(struct bma150_data *bma150, u8 bw)
 {
 	return bma150_set_reg_bits(bma150->client, bw, BMA150_BANDWIDTH_POS,
 				BMA150_BANDWIDTH_MSK, BMA150_BANDWIDTH_REG);
 }
 
-static int __devinit bma150_set_low_g_interrupt(struct bma150_data *bma150,
+static int bma150_set_low_g_interrupt(struct bma150_data *bma150,
 					u8 enable, u8 hyst, u8 dur, u8 thres)
 {
 	int error;
@@ -273,7 +273,7 @@
 				BMA150_LOW_G_EN_REG);
 }
 
-static int __devinit bma150_set_high_g_interrupt(struct bma150_data *bma150,
+static int bma150_set_high_g_interrupt(struct bma150_data *bma150,
 					u8 enable, u8 hyst, u8 dur, u8 thres)
 {
 	int error;
@@ -300,7 +300,7 @@
 }
 
 
-static int __devinit bma150_set_any_motion_interrupt(struct bma150_data *bma150,
+static int bma150_set_any_motion_interrupt(struct bma150_data *bma150,
 						u8 enable, u8 dur, u8 thres)
 {
 	int error;
@@ -424,7 +424,7 @@
 	bma150_close(bma150);
 }
 
-static int __devinit bma150_initialize(struct bma150_data *bma150,
+static int bma150_initialize(struct bma150_data *bma150,
 				       const struct bma150_cfg *cfg)
 {
 	int error;
@@ -465,7 +465,7 @@
 	return bma150_set_mode(bma150, BMA150_MODE_SLEEP);
 }
 
-static void __devinit bma150_init_input_device(struct bma150_data *bma150,
+static void bma150_init_input_device(struct bma150_data *bma150,
 						struct input_dev *idev)
 {
 	idev->name = BMA150_DRIVER;
@@ -479,7 +479,7 @@
 	input_set_abs_params(idev, ABS_Z, ABSMIN_ACC_VAL, ABSMAX_ACC_VAL, 0, 0);
 }
 
-static int __devinit bma150_register_input_device(struct bma150_data *bma150)
+static int bma150_register_input_device(struct bma150_data *bma150)
 {
 	struct input_dev *idev;
 	int error;
@@ -504,7 +504,7 @@
 	return 0;
 }
 
-static int __devinit bma150_register_polled_device(struct bma150_data *bma150)
+static int bma150_register_polled_device(struct bma150_data *bma150)
 {
 	struct input_polled_dev *ipoll_dev;
 	int error;
@@ -535,7 +535,7 @@
 	return 0;
 }
 
-static int __devinit bma150_probe(struct i2c_client *client,
+static int bma150_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	const struct bma150_platform_data *pdata = client->dev.platform_data;
@@ -613,7 +613,7 @@
 	return error;
 }
 
-static int __devexit bma150_remove(struct i2c_client *client)
+static int bma150_remove(struct i2c_client *client)
 {
 	struct bma150_data *bma150 = i2c_get_clientdata(client);
 
@@ -670,7 +670,7 @@
 	.class		= I2C_CLASS_HWMON,
 	.id_table	= bma150_id,
 	.probe		= bma150_probe,
-	.remove		= __devexit_p(bma150_remove),
+	.remove		= bma150_remove,
 };
 
 module_i2c_driver(bma150_driver);

diff --git a/drivers/input/misc/cma3000_d0x_i2c.c b/drivers/input/misc/cma3000_d0x_i2c.c
index fe9b85f..4fdef98 100644
--- a/drivers/input/misc/cma3000_d0x_i2c.c
+++ b/drivers/input/misc/cma3000_d0x_i2c.c

@@ -55,7 +55,7 @@
 	.write		= cma3000_i2c_set,
 };
 
-static int __devinit cma3000_i2c_probe(struct i2c_client *client,
+static int cma3000_i2c_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct cma3000_accl_data *data;
@@ -69,7 +69,7 @@
 	return 0;
 }
 
-static int __devexit cma3000_i2c_remove(struct i2c_client *client)
+static int cma3000_i2c_remove(struct i2c_client *client)
 {
 	struct cma3000_accl_data *data = i2c_get_clientdata(client);
 
@@ -114,7 +114,7 @@
 
 static struct i2c_driver cma3000_i2c_driver = {
 	.probe		= cma3000_i2c_probe,
-	.remove		= __devexit_p(cma3000_i2c_remove),
+	.remove		= cma3000_i2c_remove,
 	.id_table	= cma3000_i2c_id,
 	.driver = {
 		.name	= "cma3000_i2c_accl",

diff --git a/drivers/input/misc/cobalt_btns.c b/drivers/input/misc/cobalt_btns.c
index 53e43d2..4f77f87 100644
--- a/drivers/input/misc/cobalt_btns.c
+++ b/drivers/input/misc/cobalt_btns.c

@@ -73,7 +73,7 @@
 	}
 }
 
-static int __devinit cobalt_buttons_probe(struct platform_device *pdev)
+static int cobalt_buttons_probe(struct platform_device *pdev)
 {
 	struct buttons_dev *bdev;
 	struct input_polled_dev *poll_dev;
@@ -135,7 +135,7 @@
 	return error;
 }
 
-static int __devexit cobalt_buttons_remove(struct platform_device *pdev)
+static int cobalt_buttons_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct buttons_dev *bdev = dev_get_drvdata(dev);
@@ -157,7 +157,7 @@
 
 static struct platform_driver cobalt_buttons_driver = {
 	.probe	= cobalt_buttons_probe,
-	.remove	= __devexit_p(cobalt_buttons_remove),
+	.remove	= cobalt_buttons_remove,
 	.driver	= {
 		.name	= "Cobalt buttons",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/da9052_onkey.c b/drivers/input/misc/da9052_onkey.c
index 3c843cd..020569a 100644
--- a/drivers/input/misc/da9052_onkey.c
+++ b/drivers/input/misc/da9052_onkey.c

@@ -24,7 +24,6 @@
 	struct da9052 *da9052;
 	struct input_dev *input;
 	struct delayed_work work;
-	unsigned int irq;
 };
 
 static void da9052_onkey_query(struct da9052_onkey *onkey)
@@ -71,12 +70,11 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit da9052_onkey_probe(struct platform_device *pdev)
+static int da9052_onkey_probe(struct platform_device *pdev)
 {
 	struct da9052 *da9052 = dev_get_drvdata(pdev->dev.parent);
 	struct da9052_onkey *onkey;
 	struct input_dev *input_dev;
-	int irq;
 	int error;
 
 	if (!da9052) {
@@ -84,13 +82,6 @@
 		return -EINVAL;
 	}
 
-	irq = platform_get_irq_byname(pdev, "ONKEY");
-	if (irq < 0) {
-		dev_err(&pdev->dev,
-			"Failed to get an IRQ for input device, %d\n", irq);
-		return -EINVAL;
-	}
-
 	onkey = kzalloc(sizeof(*onkey), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!onkey || !input_dev) {
@@ -101,7 +92,6 @@
 
 	onkey->input = input_dev;
 	onkey->da9052 = da9052;
-	onkey->irq = irq;
 	INIT_DELAYED_WORK(&onkey->work, da9052_onkey_work);
 
 	input_dev->name = "da9052-onkey";
@@ -111,13 +101,11 @@
 	input_dev->evbit[0] = BIT_MASK(EV_KEY);
 	__set_bit(KEY_POWER, input_dev->keybit);
 
-	error = request_threaded_irq(onkey->irq, NULL, da9052_onkey_irq,
-				     IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				     "ONKEY", onkey);
+	error = da9052_request_irq(onkey->da9052, DA9052_IRQ_NONKEY, "ONKEY",
+			    da9052_onkey_irq, onkey);
 	if (error < 0) {
 		dev_err(onkey->da9052->dev,
-			"Failed to register ONKEY IRQ %d, error = %d\n",
-			onkey->irq, error);
+			"Failed to register ONKEY IRQ: %d\n", error);
 		goto err_free_mem;
 	}
 
@@ -132,7 +120,7 @@
 	return 0;
 
 err_free_irq:
-	free_irq(onkey->irq, onkey);
+	da9052_free_irq(onkey->da9052, DA9052_IRQ_NONKEY, onkey);
 	cancel_delayed_work_sync(&onkey->work);
 err_free_mem:
 	input_free_device(input_dev);
@@ -141,11 +129,11 @@
 	return error;
 }
 
-static int __devexit da9052_onkey_remove(struct platform_device *pdev)
+static int da9052_onkey_remove(struct platform_device *pdev)
 {
 	struct da9052_onkey *onkey = platform_get_drvdata(pdev);
 
-	free_irq(onkey->irq, onkey);
+	da9052_free_irq(onkey->da9052, DA9052_IRQ_NONKEY, onkey);
 	cancel_delayed_work_sync(&onkey->work);
 
 	input_unregister_device(onkey->input);
@@ -156,7 +144,7 @@
 
 static struct platform_driver da9052_onkey_driver = {
 	.probe	= da9052_onkey_probe,
-	.remove	= __devexit_p(da9052_onkey_remove),
+	.remove	= da9052_onkey_remove,
 	.driver = {
 		.name	= "da9052-onkey",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/da9055_onkey.c b/drivers/input/misc/da9055_onkey.c
new file mode 100644
index 0000000..ee6ae3a
--- /dev/null
+++ b/drivers/input/misc/da9055_onkey.c

@@ -0,0 +1,171 @@
+/*
+ * ON pin driver for Dialog DA9055 PMICs
+ *
+ * Copyright(c) 2012 Dialog Semiconductor Ltd.
+ *
+ * Author: David Dajun Chen <dchen@diasemi.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <linux/mfd/da9055/core.h>
+#include <linux/mfd/da9055/reg.h>
+
+struct da9055_onkey {
+	struct da9055 *da9055;
+	struct input_dev *input;
+	struct delayed_work work;
+};
+
+static void da9055_onkey_query(struct da9055_onkey *onkey)
+{
+	int key_stat;
+
+	key_stat = da9055_reg_read(onkey->da9055, DA9055_REG_STATUS_A);
+	if (key_stat < 0) {
+		dev_err(onkey->da9055->dev,
+			"Failed to read onkey event %d\n", key_stat);
+	} else {
+		key_stat &= DA9055_NOKEY_STS;
+		/*
+		 * Onkey status bit is cleared when onkey button is relased.
+		 */
+		if (!key_stat) {
+			input_report_key(onkey->input, KEY_POWER, 0);
+			input_sync(onkey->input);
+		}
+	}
+
+	/*
+	 * Interrupt is generated only when the ONKEY pin is asserted.
+	 * Hence the deassertion of the pin is simulated through work queue.
+	 */
+	if (key_stat)
+		schedule_delayed_work(&onkey->work, msecs_to_jiffies(10));
+
+}
+
+static void da9055_onkey_work(struct work_struct *work)
+{
+	struct da9055_onkey *onkey = container_of(work, struct da9055_onkey,
+						  work.work);
+
+	da9055_onkey_query(onkey);
+}
+
+static irqreturn_t da9055_onkey_irq(int irq, void *data)
+{
+	struct da9055_onkey *onkey = data;
+
+	input_report_key(onkey->input, KEY_POWER, 1);
+	input_sync(onkey->input);
+
+	da9055_onkey_query(onkey);
+
+	return IRQ_HANDLED;
+}
+
+static int da9055_onkey_probe(struct platform_device *pdev)
+{
+	struct da9055 *da9055 = dev_get_drvdata(pdev->dev.parent);
+	struct da9055_onkey *onkey;
+	struct input_dev *input_dev;
+	int irq, err;
+
+	irq = platform_get_irq_byname(pdev, "ONKEY");
+	if (irq < 0) {
+		dev_err(&pdev->dev,
+			"Failed to get an IRQ for input device, %d\n", irq);
+		return -EINVAL;
+	}
+
+	onkey = devm_kzalloc(&pdev->dev, sizeof(*onkey), GFP_KERNEL);
+	if (!onkey) {
+		dev_err(&pdev->dev, "Failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	input_dev = input_allocate_device();
+	if (!input_dev) {
+		dev_err(&pdev->dev, "Failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	onkey->input = input_dev;
+	onkey->da9055 = da9055;
+	input_dev->name = "da9055-onkey";
+	input_dev->phys = "da9055-onkey/input0";
+	input_dev->dev.parent = &pdev->dev;
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
+	__set_bit(KEY_POWER, input_dev->keybit);
+
+	INIT_DELAYED_WORK(&onkey->work, da9055_onkey_work);
+
+	irq = regmap_irq_get_virq(da9055->irq_data, irq);
+	err = request_threaded_irq(irq, NULL, da9055_onkey_irq,
+				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				   "ONKEY", onkey);
+	if (err < 0) {
+		dev_err(&pdev->dev,
+			"Failed to register ONKEY IRQ %d, error = %d\n",
+			irq, err);
+		goto err_free_input;
+	}
+
+	err = input_register_device(input_dev);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to register input device, %d\n",
+			err);
+		goto err_free_irq;
+	}
+
+	platform_set_drvdata(pdev, onkey);
+
+	return 0;
+
+err_free_irq:
+	free_irq(irq, onkey);
+	cancel_delayed_work_sync(&onkey->work);
+err_free_input:
+	input_free_device(input_dev);
+
+	return err;
+}
+
+static int da9055_onkey_remove(struct platform_device *pdev)
+{
+	struct da9055_onkey *onkey = platform_get_drvdata(pdev);
+	int irq = platform_get_irq_byname(pdev, "ONKEY");
+
+	irq = regmap_irq_get_virq(onkey->da9055->irq_data, irq);
+	free_irq(irq, onkey);
+	cancel_delayed_work_sync(&onkey->work);
+	input_unregister_device(onkey->input);
+
+	return 0;
+}
+
+static struct platform_driver da9055_onkey_driver = {
+	.probe	= da9055_onkey_probe,
+	.remove	= da9055_onkey_remove,
+	.driver = {
+		.name	= "da9055-onkey",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(da9055_onkey_driver);
+
+MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
+MODULE_DESCRIPTION("Onkey driver for DA9055");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:da9055-onkey");

diff --git a/drivers/input/misc/dm355evm_keys.c b/drivers/input/misc/dm355evm_keys.c
index c1313d8..a309a5c 100644
--- a/drivers/input/misc/dm355evm_keys.c
+++ b/drivers/input/misc/dm355evm_keys.c

@@ -173,7 +173,7 @@
 
 /*----------------------------------------------------------------------*/
 
-static int __devinit dm355evm_keys_probe(struct platform_device *pdev)
+static int dm355evm_keys_probe(struct platform_device *pdev)
 {
 	struct dm355evm_keys	*keys;
 	struct input_dev	*input;
@@ -239,7 +239,7 @@
 	return status;
 }
 
-static int __devexit dm355evm_keys_remove(struct platform_device *pdev)
+static int dm355evm_keys_remove(struct platform_device *pdev)
 {
 	struct dm355evm_keys	*keys = platform_get_drvdata(pdev);
 
@@ -262,7 +262,7 @@
  */
 static struct platform_driver dm355evm_keys_driver = {
 	.probe		= dm355evm_keys_probe,
-	.remove		= __devexit_p(dm355evm_keys_remove),
+	.remove		= dm355evm_keys_remove,
 	.driver		= {
 		.owner	= THIS_MODULE,
 		.name	= "dm355evm_keys",

diff --git a/drivers/input/misc/gp2ap002a00f.c b/drivers/input/misc/gp2ap002a00f.c
index b6664cf..fe30bd0 100644
--- a/drivers/input/misc/gp2ap002a00f.c
+++ b/drivers/input/misc/gp2ap002a00f.c

@@ -98,7 +98,7 @@
 			"unable to deactivate, err %d\n", error);
 }
 
-static int __devinit gp2a_initialize(struct gp2a_data *dt)
+static int gp2a_initialize(struct gp2a_data *dt)
 {
 	int error;
 
@@ -122,7 +122,7 @@
 	return error;
 }
 
-static int __devinit gp2a_probe(struct i2c_client *client,
+static int gp2a_probe(struct i2c_client *client,
 				const struct i2c_device_id *id)
 {
 	const struct gp2a_platform_data *pdata = client->dev.platform_data;
@@ -205,7 +205,7 @@
 	return error;
 }
 
-static int __devexit gp2a_remove(struct i2c_client *client)
+static int gp2a_remove(struct i2c_client *client)
 {
 	struct gp2a_data *dt = i2c_get_clientdata(client);
 	const struct gp2a_platform_data *pdata = dt->pdata;
@@ -277,7 +277,7 @@
 		.pm	= &gp2a_pm,
 	},
 	.probe		= gp2a_probe,
-	.remove		= __devexit_p(gp2a_remove),
+	.remove		= gp2a_remove,
 	.id_table	= gp2a_i2c_id,
 };
 

diff --git a/drivers/input/misc/gpio_tilt_polled.c b/drivers/input/misc/gpio_tilt_polled.c
index 277a057..da05cca 100644
--- a/drivers/input/misc/gpio_tilt_polled.c
+++ b/drivers/input/misc/gpio_tilt_polled.c

@@ -96,7 +96,7 @@
 		pdata->disable(tdev->dev);
 }
 
-static int __devinit gpio_tilt_polled_probe(struct platform_device *pdev)
+static int gpio_tilt_polled_probe(struct platform_device *pdev)
 {
 	const struct gpio_tilt_platform_data *pdata = pdev->dev.platform_data;
 	struct device *dev = &pdev->dev;
@@ -179,7 +179,7 @@
 	return error;
 }
 
-static int __devexit gpio_tilt_polled_remove(struct platform_device *pdev)
+static int gpio_tilt_polled_remove(struct platform_device *pdev)
 {
 	struct gpio_tilt_polled_dev *tdev = platform_get_drvdata(pdev);
 	const struct gpio_tilt_platform_data *pdata = tdev->pdata;
@@ -198,7 +198,7 @@
 
 static struct platform_driver gpio_tilt_polled_driver = {
 	.probe	= gpio_tilt_polled_probe,
-	.remove	= __devexit_p(gpio_tilt_polled_remove),
+	.remove	= gpio_tilt_polled_remove,
 	.driver	= {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/ixp4xx-beeper.c b/drivers/input/misc/ixp4xx-beeper.c
index 50e2830..6ab3dec 100644
--- a/drivers/input/misc/ixp4xx-beeper.c
+++ b/drivers/input/misc/ixp4xx-beeper.c

@@ -87,7 +87,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit ixp4xx_spkr_probe(struct platform_device *dev)
+static int ixp4xx_spkr_probe(struct platform_device *dev)
 {
 	struct input_dev *input_dev;
 	int err;
@@ -132,7 +132,7 @@
 	return err;
 }
 
-static int __devexit ixp4xx_spkr_remove(struct platform_device *dev)
+static int ixp4xx_spkr_remove(struct platform_device *dev)
 {
 	struct input_dev *input_dev = platform_get_drvdata(dev);
 	unsigned int pin = (unsigned int) input_get_drvdata(input_dev);
@@ -165,7 +165,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= ixp4xx_spkr_probe,
-	.remove		= __devexit_p(ixp4xx_spkr_remove),
+	.remove		= ixp4xx_spkr_remove,
 	.shutdown	= ixp4xx_spkr_shutdown,
 };
 module_platform_driver(ixp4xx_spkr_platform_driver);

diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
index f46139f..a993b67 100644
--- a/drivers/input/misc/kxtj9.c
+++ b/drivers/input/misc/kxtj9.c

@@ -295,7 +295,7 @@
 	kxtj9_disable(tj9);
 }
 
-static void __devinit kxtj9_init_input_device(struct kxtj9_data *tj9,
+static void kxtj9_init_input_device(struct kxtj9_data *tj9,
 					      struct input_dev *input_dev)
 {
 	__set_bit(EV_ABS, input_dev->evbit);
@@ -308,7 +308,7 @@
 	input_dev->dev.parent = &tj9->client->dev;
 }
 
-static int __devinit kxtj9_setup_input_device(struct kxtj9_data *tj9)
+static int kxtj9_setup_input_device(struct kxtj9_data *tj9)
 {
 	struct input_dev *input_dev;
 	int err;
@@ -433,7 +433,7 @@
 	kxtj9_disable(tj9);
 }
 
-static int __devinit kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+static int kxtj9_setup_polled_device(struct kxtj9_data *tj9)
 {
 	int err;
 	struct input_polled_dev *poll_dev;
@@ -466,7 +466,7 @@
 	return 0;
 }
 
-static void __devexit kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+static void kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
 {
 	input_unregister_polled_device(tj9->poll_dev);
 	input_free_polled_device(tj9->poll_dev);
@@ -485,7 +485,7 @@
 
 #endif
 
-static int __devinit kxtj9_verify(struct kxtj9_data *tj9)
+static int kxtj9_verify(struct kxtj9_data *tj9)
 {
 	int retval;
 
@@ -506,7 +506,7 @@
 	return retval;
 }
 
-static int __devinit kxtj9_probe(struct i2c_client *client,
+static int kxtj9_probe(struct i2c_client *client,
 				 const struct i2c_device_id *id)
 {
 	const struct kxtj9_platform_data *pdata = client->dev.platform_data;
@@ -594,7 +594,7 @@
 	return err;
 }
 
-static int __devexit kxtj9_remove(struct i2c_client *client)
+static int kxtj9_remove(struct i2c_client *client)
 {
 	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
 
@@ -663,7 +663,7 @@
 		.pm	= &kxtj9_pm_ops,
 	},
 	.probe		= kxtj9_probe,
-	.remove		= __devexit_p(kxtj9_remove),
+	.remove		= kxtj9_remove,
 	.id_table	= kxtj9_id,
 };
 

diff --git a/drivers/input/misc/m68kspkr.c b/drivers/input/misc/m68kspkr.c
index 0c64d9b..b40ee4b 100644
--- a/drivers/input/misc/m68kspkr.c
+++ b/drivers/input/misc/m68kspkr.c

@@ -48,7 +48,7 @@
 	return 0;
 }
 
-static int __devinit m68kspkr_probe(struct platform_device *dev)
+static int m68kspkr_probe(struct platform_device *dev)
 {
 	struct input_dev *input_dev;
 	int err;
@@ -80,7 +80,7 @@
 	return 0;
 }
 
-static int __devexit m68kspkr_remove(struct platform_device *dev)
+static int m68kspkr_remove(struct platform_device *dev)
 {
 	struct input_dev *input_dev = platform_get_drvdata(dev);
 
@@ -104,7 +104,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= m68kspkr_probe,
-	.remove		= __devexit_p(m68kspkr_remove),
+	.remove		= m68kspkr_remove,
 	.shutdown	= m68kspkr_shutdown,
 };
 

diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c
index 0a12b74..369a39d 100644
--- a/drivers/input/misc/max8925_onkey.c
+++ b/drivers/input/misc/max8925_onkey.c

@@ -62,7 +62,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit max8925_onkey_probe(struct platform_device *pdev)
+static int max8925_onkey_probe(struct platform_device *pdev)
 {
 	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
 	struct max8925_onkey_info *info;
@@ -141,7 +141,7 @@
 	return error;
 }
 
-static int __devexit max8925_onkey_remove(struct platform_device *pdev)
+static int max8925_onkey_remove(struct platform_device *pdev)
 {
 	struct max8925_onkey_info *info = platform_get_drvdata(pdev);
 	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
@@ -195,7 +195,7 @@
 		.pm	= &max8925_onkey_pm_ops,
 	},
 	.probe		= max8925_onkey_probe,
-	.remove		= __devexit_p(max8925_onkey_remove),
+	.remove		= max8925_onkey_remove,
 };
 module_platform_driver(max8925_onkey_driver);
 

diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c
index 05b7b8b..e973133 100644
--- a/drivers/input/misc/max8997_haptic.c
+++ b/drivers/input/misc/max8997_haptic.c

@@ -241,7 +241,7 @@
 	max8997_haptic_disable(chip);
 }
 
-static int __devinit max8997_haptic_probe(struct platform_device *pdev)
+static int max8997_haptic_probe(struct platform_device *pdev)
 {
 	struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
 	const struct max8997_platform_data *pdata =
@@ -354,7 +354,7 @@
 	return error;
 }
 
-static int __devexit max8997_haptic_remove(struct platform_device *pdev)
+static int max8997_haptic_remove(struct platform_device *pdev)
 {
 	struct max8997_haptic *chip = platform_get_drvdata(pdev);
 
@@ -396,7 +396,7 @@
 		.pm	= &max8997_haptic_pm_ops,
 	},
 	.probe		= max8997_haptic_probe,
-	.remove		= __devexit_p(max8997_haptic_remove),
+	.remove		= max8997_haptic_remove,
 	.id_table	= max8997_haptic_id,
 };
 module_platform_driver(max8997_haptic_driver);

diff --git a/drivers/input/misc/mc13783-pwrbutton.c b/drivers/input/misc/mc13783-pwrbutton.c
index 8428f1e..0906ca5 100644
--- a/drivers/input/misc/mc13783-pwrbutton.c
+++ b/drivers/input/misc/mc13783-pwrbutton.c

@@ -89,7 +89,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit mc13783_pwrbutton_probe(struct platform_device *pdev)
+static int mc13783_pwrbutton_probe(struct platform_device *pdev)
 {
 	const struct mc13xxx_buttons_platform_data *pdata;
 	struct mc13xxx *mc13783 = dev_get_drvdata(pdev->dev.parent);
@@ -230,7 +230,7 @@
 	return err;
 }
 
-static int __devexit mc13783_pwrbutton_remove(struct platform_device *pdev)
+static int mc13783_pwrbutton_remove(struct platform_device *pdev)
 {
 	struct mc13783_pwrb *priv = platform_get_drvdata(pdev);
 	const struct mc13xxx_buttons_platform_data *pdata;
@@ -257,7 +257,7 @@
 
 static struct platform_driver mc13783_pwrbutton_driver = {
 	.probe		= mc13783_pwrbutton_probe,
-	.remove		= __devexit_p(mc13783_pwrbutton_remove),
+	.remove		= mc13783_pwrbutton_remove,
 	.driver		= {
 		.name	= "mc13783-pwrbutton",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
index 873ebce..480557f 100644
--- a/drivers/input/misc/mma8450.c
+++ b/drivers/input/misc/mma8450.c

@@ -167,7 +167,7 @@
 /*
  * I2C init/probing/exit functions
  */
-static int __devinit mma8450_probe(struct i2c_client *c,
+static int mma8450_probe(struct i2c_client *c,
 				   const struct i2c_device_id *id)
 {
 	struct input_polled_dev *idev;
@@ -212,7 +212,7 @@
 	return err;
 }
 
-static int __devexit mma8450_remove(struct i2c_client *c)
+static int mma8450_remove(struct i2c_client *c)
 {
 	struct mma8450 *m = i2c_get_clientdata(c);
 	struct input_polled_dev *idev = m->idev;
@@ -243,7 +243,7 @@
 		.of_match_table = mma8450_dt_ids,
 	},
 	.probe		= mma8450_probe,
-	.remove		= __devexit_p(mma8450_remove),
+	.remove		= mma8450_remove,
 	.id_table	= mma8450_id,
 };
 

diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c
index 306f84c..dce0d95 100644
--- a/drivers/input/misc/mpu3050.c
+++ b/drivers/input/misc/mpu3050.c

@@ -257,7 +257,7 @@
  *
  *	Called during device probe; configures the sampling method.
  */
-static int __devinit mpu3050_hw_init(struct mpu3050_sensor *sensor)
+static int mpu3050_hw_init(struct mpu3050_sensor *sensor)
 {
 	struct i2c_client *client = sensor->client;
 	int ret;
@@ -306,7 +306,7 @@
  *
  *	If present install the relevant sysfs interfaces and input device.
  */
-static int __devinit mpu3050_probe(struct i2c_client *client,
+static int mpu3050_probe(struct i2c_client *client,
 				   const struct i2c_device_id *id)
 {
 	struct mpu3050_sensor *sensor;
@@ -402,7 +402,7 @@
  *
  *	Our sensor is going away, clean up the resources.
  */
-static int __devexit mpu3050_remove(struct i2c_client *client)
+static int mpu3050_remove(struct i2c_client *client)
 {
 	struct mpu3050_sensor *sensor = i2c_get_clientdata(client);
 
@@ -471,7 +471,7 @@
 		.of_match_table = mpu3050_of_match,
 	},
 	.probe		= mpu3050_probe,
-	.remove		= __devexit_p(mpu3050_remove),
+	.remove		= mpu3050_remove,
 	.id_table	= mpu3050_ids,
 };
 

diff --git a/drivers/input/misc/pcap_keys.c b/drivers/input/misc/pcap_keys.c
index e09b4fe..40ac9a5 100644
--- a/drivers/input/misc/pcap_keys.c
+++ b/drivers/input/misc/pcap_keys.c

@@ -48,7 +48,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit pcap_keys_probe(struct platform_device *pdev)
+static int pcap_keys_probe(struct platform_device *pdev)
 {
 	int err = -ENOMEM;
 	struct pcap_keys *pcap_keys;
@@ -104,7 +104,7 @@
 	return err;
 }
 
-static int __devexit pcap_keys_remove(struct platform_device *pdev)
+static int pcap_keys_remove(struct platform_device *pdev)
 {
 	struct pcap_keys *pcap_keys = platform_get_drvdata(pdev);
 
@@ -119,7 +119,7 @@
 
 static struct platform_driver pcap_keys_device_driver = {
 	.probe		= pcap_keys_probe,
-	.remove		= __devexit_p(pcap_keys_remove),
+	.remove		= pcap_keys_remove,
 	.driver		= {
 		.name	= "pcap-keys",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/pcf50633-input.c b/drivers/input/misc/pcf50633-input.c
index 53891de8..73b13eb 100644
--- a/drivers/input/misc/pcf50633-input.c
+++ b/drivers/input/misc/pcf50633-input.c

@@ -53,7 +53,7 @@
 	input_sync(input->input_dev);
 }
 
-static int __devinit pcf50633_input_probe(struct platform_device *pdev)
+static int pcf50633_input_probe(struct platform_device *pdev)
 {
 	struct pcf50633_input *input;
 	struct input_dev *input_dev;
@@ -93,7 +93,7 @@
 	return 0;
 }
 
-static int __devexit pcf50633_input_remove(struct platform_device *pdev)
+static int pcf50633_input_remove(struct platform_device *pdev)
 {
 	struct pcf50633_input *input  = platform_get_drvdata(pdev);
 
@@ -111,7 +111,7 @@
 		.name = "pcf50633-input",
 	},
 	.probe = pcf50633_input_probe,
-	.remove = __devexit_p(pcf50633_input_remove),
+	.remove = pcf50633_input_remove,
 };
 module_platform_driver(pcf50633_input_driver);
 

diff --git a/drivers/input/misc/pcf8574_keypad.c b/drivers/input/misc/pcf8574_keypad.c
index 544c663..e373929 100644
--- a/drivers/input/misc/pcf8574_keypad.c
+++ b/drivers/input/misc/pcf8574_keypad.c

@@ -82,7 +82,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit pcf8574_kp_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int pcf8574_kp_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	int i, ret;
 	struct input_dev *idev;
@@ -156,7 +156,7 @@
 	return ret;
 }
 
-static int __devexit pcf8574_kp_remove(struct i2c_client *client)
+static int pcf8574_kp_remove(struct i2c_client *client)
 {
 	struct kp_data *lp = i2c_get_clientdata(client);
 
@@ -212,7 +212,7 @@
 #endif
 	},
 	.probe    = pcf8574_kp_probe,
-	.remove   = __devexit_p(pcf8574_kp_remove),
+	.remove   = pcf8574_kp_remove,
 	.id_table = pcf8574_kp_id,
 };
 

diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index b2484aa..199db78 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c

@@ -63,7 +63,7 @@
 	return 0;
 }
 
-static int __devinit pcspkr_probe(struct platform_device *dev)
+static int pcspkr_probe(struct platform_device *dev)
 {
 	struct input_dev *pcspkr_dev;
 	int err;
@@ -95,7 +95,7 @@
 	return 0;
 }
 
-static int __devexit pcspkr_remove(struct platform_device *dev)
+static int pcspkr_remove(struct platform_device *dev)
 {
 	struct input_dev *pcspkr_dev = platform_get_drvdata(dev);
 
@@ -131,7 +131,7 @@
 		.pm	= &pcspkr_pm_ops,
 	},
 	.probe		= pcspkr_probe,
-	.remove		= __devexit_p(pcspkr_remove),
+	.remove		= pcspkr_remove,
 	.shutdown	= pcspkr_shutdown,
 };
 module_platform_driver(pcspkr_platform_driver);

diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c
index dfbfb46..a9da65e 100644
--- a/drivers/input/misc/pm8xxx-vibrator.c
+++ b/drivers/input/misc/pm8xxx-vibrator.c

@@ -178,7 +178,7 @@
 	return 0;
 }
 
-static int __devinit pm8xxx_vib_probe(struct platform_device *pdev)
+static int pm8xxx_vib_probe(struct platform_device *pdev)
 
 {
 	struct pm8xxx_vib *vib;
@@ -242,7 +242,7 @@
 	return error;
 }
 
-static int __devexit pm8xxx_vib_remove(struct platform_device *pdev)
+static int pm8xxx_vib_remove(struct platform_device *pdev)
 {
 	struct pm8xxx_vib *vib = platform_get_drvdata(pdev);
 
@@ -270,7 +270,7 @@
 
 static struct platform_driver pm8xxx_vib_driver = {
 	.probe		= pm8xxx_vib_probe,
-	.remove		= __devexit_p(pm8xxx_vib_remove),
+	.remove		= pm8xxx_vib_remove,
 	.driver		= {
 		.name	= "pm8xxx-vib",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c
index 0f83d0f..4b811be 100644
--- a/drivers/input/misc/pmic8xxx-pwrkey.c
+++ b/drivers/input/misc/pmic8xxx-pwrkey.c

@@ -81,7 +81,7 @@
 static SIMPLE_DEV_PM_OPS(pm8xxx_pwr_key_pm_ops,
 		pmic8xxx_pwrkey_suspend, pmic8xxx_pwrkey_resume);
 
-static int __devinit pmic8xxx_pwrkey_probe(struct platform_device *pdev)
+static int pmic8xxx_pwrkey_probe(struct platform_device *pdev)
 {
 	struct input_dev *pwr;
 	int key_release_irq = platform_get_irq(pdev, 0);
@@ -187,7 +187,7 @@
 	return err;
 }
 
-static int __devexit pmic8xxx_pwrkey_remove(struct platform_device *pdev)
+static int pmic8xxx_pwrkey_remove(struct platform_device *pdev)
 {
 	struct pmic8xxx_pwrkey *pwrkey = platform_get_drvdata(pdev);
 	int key_release_irq = platform_get_irq(pdev, 0);
@@ -206,7 +206,7 @@
 
 static struct platform_driver pmic8xxx_pwrkey_driver = {
 	.probe		= pmic8xxx_pwrkey_probe,
-	.remove		= __devexit_p(pmic8xxx_pwrkey_remove),
+	.remove		= pmic8xxx_pwrkey_remove,
 	.driver		= {
 		.name	= PM8XXX_PWRKEY_DEV_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c
index fc84c8a..0808868 100644
--- a/drivers/input/misc/pwm-beeper.c
+++ b/drivers/input/misc/pwm-beeper.c

@@ -65,7 +65,7 @@
 	return 0;
 }
 
-static int __devinit pwm_beeper_probe(struct platform_device *pdev)
+static int pwm_beeper_probe(struct platform_device *pdev)
 {
 	unsigned long pwm_id = (unsigned long)pdev->dev.platform_data;
 	struct pwm_beeper *beeper;
@@ -75,7 +75,11 @@
 	if (!beeper)
 		return -ENOMEM;
 
-	beeper->pwm = pwm_request(pwm_id, "pwm beeper");
+	beeper->pwm = pwm_get(&pdev->dev, NULL);
+	if (IS_ERR(beeper->pwm)) {
+		dev_dbg(&pdev->dev, "unable to request PWM, trying legacy API\n");
+		beeper->pwm = pwm_request(pwm_id, "pwm beeper");
+	}
 
 	if (IS_ERR(beeper->pwm)) {
 		error = PTR_ERR(beeper->pwm);
@@ -125,7 +129,7 @@
 	return error;
 }
 
-static int __devexit pwm_beeper_remove(struct platform_device *pdev)
+static int pwm_beeper_remove(struct platform_device *pdev)
 {
 	struct pwm_beeper *beeper = platform_get_drvdata(pdev);
 
@@ -171,13 +175,21 @@
 #define PWM_BEEPER_PM_OPS NULL
 #endif
 
+#ifdef CONFIG_OF
+static const struct of_device_id pwm_beeper_match[] = {
+	{ .compatible = "pwm-beeper", },
+	{ },
+};
+#endif
+
 static struct platform_driver pwm_beeper_driver = {
 	.probe	= pwm_beeper_probe,
-	.remove = __devexit_p(pwm_beeper_remove),
+	.remove = pwm_beeper_remove,
 	.driver = {
 		.name	= "pwm-beeper",
 		.owner	= THIS_MODULE,
 		.pm	= PWM_BEEPER_PM_OPS,
+		.of_match_table = of_match_ptr(pwm_beeper_match),
 	},
 };
 module_platform_driver(pwm_beeper_driver);

diff --git a/drivers/input/misc/rb532_button.c b/drivers/input/misc/rb532_button.c
index aeb02bc..fb4f8ac 100644
--- a/drivers/input/misc/rb532_button.c
+++ b/drivers/input/misc/rb532_button.c

@@ -51,7 +51,7 @@
 	input_sync(poll_dev->input);
 }
 
-static int __devinit rb532_button_probe(struct platform_device *pdev)
+static int rb532_button_probe(struct platform_device *pdev)
 {
 	struct input_polled_dev *poll_dev;
 	int error;
@@ -81,7 +81,7 @@
 	return 0;
 }
 
-static int __devexit rb532_button_remove(struct platform_device *pdev)
+static int rb532_button_remove(struct platform_device *pdev)
 {
 	struct input_polled_dev *poll_dev = dev_get_drvdata(&pdev->dev);
 
@@ -94,7 +94,7 @@
 
 static struct platform_driver rb532_button_driver = {
 	.probe = rb532_button_probe,
-	.remove = __devexit_p(rb532_button_remove),
+	.remove = rb532_button_remove,
 	.driver = {
 		.name = DRV_NAME,
 		.owner = THIS_MODULE,

diff --git a/drivers/input/misc/retu-pwrbutton.c b/drivers/input/misc/retu-pwrbutton.c
new file mode 100644
index 0000000..7ca09ba
--- /dev/null
+++ b/drivers/input/misc/retu-pwrbutton.c

@@ -0,0 +1,99 @@
+/*
+ * Retu power button driver.
+ *
+ * Copyright (C) 2004-2010 Nokia Corporation
+ *
+ * Original code written by Ari Saastamoinen, Juha Yrjölä and Felipe Balbi.
+ * Rewritten by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mfd/retu.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+
+#define RETU_STATUS_PWRONX (1 << 5)
+
+static irqreturn_t retu_pwrbutton_irq(int irq, void *_pwr)
+{
+	struct input_dev *idev = _pwr;
+	struct retu_dev *rdev = input_get_drvdata(idev);
+	bool state;
+
+	state = !(retu_read(rdev, RETU_REG_STATUS) & RETU_STATUS_PWRONX);
+	input_report_key(idev, KEY_POWER, state);
+	input_sync(idev);
+
+	return IRQ_HANDLED;
+}
+
+static int retu_pwrbutton_probe(struct platform_device *pdev)
+{
+	struct retu_dev *rdev = dev_get_drvdata(pdev->dev.parent);
+	struct input_dev *idev;
+	int irq;
+	int error;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	idev = devm_input_allocate_device(&pdev->dev);
+	if (!idev)
+		return -ENOMEM;
+
+	idev->name = "retu-pwrbutton";
+	idev->dev.parent = &pdev->dev;
+
+	input_set_capability(idev, EV_KEY, KEY_POWER);
+	input_set_drvdata(idev, rdev);
+
+	error = devm_request_threaded_irq(&pdev->dev, irq,
+					  NULL, retu_pwrbutton_irq, 0,
+					  "retu-pwrbutton", idev);
+	if (error)
+		return error;
+
+	error = input_register_device(idev);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static int retu_pwrbutton_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static struct platform_driver retu_pwrbutton_driver = {
+	.probe		= retu_pwrbutton_probe,
+	.remove		= retu_pwrbutton_remove,
+	.driver		= {
+		.name	= "retu-pwrbutton",
+		.owner	= THIS_MODULE,
+	},
+};
+module_platform_driver(retu_pwrbutton_driver);
+
+MODULE_ALIAS("platform:retu-pwrbutton");
+MODULE_DESCRIPTION("Retu Power Button");
+MODULE_AUTHOR("Ari Saastamoinen");
+MODULE_AUTHOR("Felipe Balbi");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c
index 99a49e4..aff47b2 100644
--- a/drivers/input/misc/rotary_encoder.c
+++ b/drivers/input/misc/rotary_encoder.c

@@ -149,8 +149,7 @@
 };
 MODULE_DEVICE_TABLE(of, rotary_encoder_of_match);
 
-static struct rotary_encoder_platform_data * __devinit
-rotary_encoder_parse_dt(struct device *dev)
+static struct rotary_encoder_platform_data *rotary_encoder_parse_dt(struct device *dev)
 {
 	const struct of_device_id *of_id =
 				of_match_device(rotary_encoder_of_match, dev);
@@ -192,7 +191,7 @@
 }
 #endif
 
-static int __devinit rotary_encoder_probe(struct platform_device *pdev)
+static int rotary_encoder_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	const struct rotary_encoder_platform_data *pdata = dev_get_platdata(dev);
@@ -302,7 +301,7 @@
 	return err;
 }
 
-static int __devexit rotary_encoder_remove(struct platform_device *pdev)
+static int rotary_encoder_remove(struct platform_device *pdev)
 {
 	struct rotary_encoder *encoder = platform_get_drvdata(pdev);
 	const struct rotary_encoder_platform_data *pdata = encoder->pdata;
@@ -325,7 +324,7 @@
 
 static struct platform_driver rotary_encoder_driver = {
 	.probe		= rotary_encoder_probe,
-	.remove		= __devexit_p(rotary_encoder_remove),
+	.remove		= rotary_encoder_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/sgi_btns.c b/drivers/input/misc/sgi_btns.c
index 5d9fd55..ad6415c 100644
--- a/drivers/input/misc/sgi_btns.c
+++ b/drivers/input/misc/sgi_btns.c

@@ -91,7 +91,7 @@
 	}
 }
 
-static int __devinit sgi_buttons_probe(struct platform_device *pdev)
+static int sgi_buttons_probe(struct platform_device *pdev)
 {
 	struct buttons_dev *bdev;
 	struct input_polled_dev *poll_dev;
@@ -143,7 +143,7 @@
 	return error;
 }
 
-static int __devexit sgi_buttons_remove(struct platform_device *pdev)
+static int sgi_buttons_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct buttons_dev *bdev = dev_get_drvdata(dev);
@@ -158,7 +158,7 @@
 
 static struct platform_driver sgi_buttons_driver = {
 	.probe	= sgi_buttons_probe,
-	.remove	= __devexit_p(sgi_buttons_remove),
+	.remove	= sgi_buttons_remove,
 	.driver	= {
 		.name	= "sgibtns",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
index 0122f53..a53586a 100644
--- a/drivers/input/misc/sparcspkr.c
+++ b/drivers/input/misc/sparcspkr.c

@@ -139,7 +139,7 @@
 	return 0;
 }
 
-static int __devinit sparcspkr_probe(struct device *dev)
+static int sparcspkr_probe(struct device *dev)
 {
 	struct sparcspkr_state *state = dev_get_drvdata(dev);
 	struct input_dev *input_dev;
@@ -182,7 +182,7 @@
 	state->event(input_dev, EV_SND, SND_BELL, 0);
 }
 
-static int __devinit bbc_beep_probe(struct platform_device *op)
+static int bbc_beep_probe(struct platform_device *op)
 {
 	struct sparcspkr_state *state;
 	struct bbc_beep_info *info;
@@ -229,7 +229,7 @@
 	return err;
 }
 
-static int __devexit bbc_remove(struct platform_device *op)
+static int bbc_remove(struct platform_device *op)
 {
 	struct sparcspkr_state *state = dev_get_drvdata(&op->dev);
 	struct input_dev *input_dev = state->input_dev;
@@ -263,11 +263,11 @@
 		.of_match_table = bbc_beep_match,
 	},
 	.probe		= bbc_beep_probe,
-	.remove		= __devexit_p(bbc_remove),
+	.remove		= bbc_remove,
 	.shutdown	= sparcspkr_shutdown,
 };
 
-static int __devinit grover_beep_probe(struct platform_device *op)
+static int grover_beep_probe(struct platform_device *op)
 {
 	struct sparcspkr_state *state;
 	struct grover_beep_info *info;
@@ -310,7 +310,7 @@
 	return err;
 }
 
-static int __devexit grover_remove(struct platform_device *op)
+static int grover_remove(struct platform_device *op)
 {
 	struct sparcspkr_state *state = dev_get_drvdata(&op->dev);
 	struct grover_beep_info *info = &state->u.grover;
@@ -345,7 +345,7 @@
 		.of_match_table = grover_beep_match,
 	},
 	.probe		= grover_beep_probe,
-	.remove		= __devexit_p(grover_remove),
+	.remove		= grover_remove,
 	.shutdown	= sparcspkr_shutdown,
 };
 

diff --git a/drivers/input/misc/twl4030-pwrbutton.c b/drivers/input/misc/twl4030-pwrbutton.c
index b3dd96d..27c2bc8 100644
--- a/drivers/input/misc/twl4030-pwrbutton.c
+++ b/drivers/input/misc/twl4030-pwrbutton.c

@@ -39,8 +39,7 @@
 	int err;
 	u8 value;
 
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &value,
-				STS_HW_CONDITIONS);
+	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &value, STS_HW_CONDITIONS);
 	if (!err)  {
 		pm_wakeup_event(pwr->dev.parent, 0);
 		input_report_key(pwr, KEY_POWER, value & PWR_PWRON_IRQ);

diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c
index 2194a3c..78eb6b3 100644
--- a/drivers/input/misc/twl4030-vibra.c
+++ b/drivers/input/misc/twl4030-vibra.c

@@ -207,7 +207,7 @@
 	return false;
 }
 
-static int __devinit twl4030_vibra_probe(struct platform_device *pdev)
+static int twl4030_vibra_probe(struct platform_device *pdev)
 {
 	struct twl4030_vibra_data *pdata = pdev->dev.platform_data;
 	struct device_node *twl4030_core_node = pdev->dev.parent->of_node;
@@ -269,7 +269,7 @@
 	return ret;
 }
 
-static int __devexit twl4030_vibra_remove(struct platform_device *pdev)
+static int twl4030_vibra_remove(struct platform_device *pdev)
 {
 	struct vibra_info *info = platform_get_drvdata(pdev);
 
@@ -283,7 +283,7 @@
 
 static struct platform_driver twl4030_vibra_driver = {
 	.probe		= twl4030_vibra_probe,
-	.remove		= __devexit_p(twl4030_vibra_remove),
+	.remove		= twl4030_vibra_remove,
 	.driver		= {
 		.name	= "twl4030-vibra",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index c8a288a..71a28ee 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c

@@ -255,7 +255,7 @@
 
 static SIMPLE_DEV_PM_OPS(twl6040_vibra_pm_ops, twl6040_vibra_suspend, NULL);
 
-static int __devinit twl6040_vibra_probe(struct platform_device *pdev)
+static int twl6040_vibra_probe(struct platform_device *pdev)
 {
 	struct twl6040_vibra_data *pdata = pdev->dev.platform_data;
 	struct device *twl6040_core_dev = pdev->dev.parent;
@@ -418,7 +418,7 @@
 	return ret;
 }
 
-static int __devexit twl6040_vibra_remove(struct platform_device *pdev)
+static int twl6040_vibra_remove(struct platform_device *pdev)
 {
 	struct vibra_info *info = platform_get_drvdata(pdev);
 
@@ -433,7 +433,7 @@
 
 static struct platform_driver twl6040_vibra_driver = {
 	.probe		= twl6040_vibra_probe,
-	.remove		= __devexit_p(twl6040_vibra_remove),
+	.remove		= twl6040_vibra_remove,
 	.driver		= {
 		.name	= "twl6040-vibra",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c
index e2bdfd4..56536f4b9 100644
--- a/drivers/input/misc/wistron_btns.c
+++ b/drivers/input/misc/wistron_btns.c

@@ -170,7 +170,7 @@
 	return regs.eax;
 }
 
-static void __devinit bios_attach(void)
+static void bios_attach(void)
 {
 	struct regs regs;
 
@@ -190,7 +190,7 @@
 	call_bios(&regs);
 }
 
-static u8 __devinit bios_get_cmos_address(void)
+static u8 bios_get_cmos_address(void)
 {
 	struct regs regs;
 
@@ -202,7 +202,7 @@
 	return regs.ecx;
 }
 
-static u16 __devinit bios_get_default_setting(u8 subsys)
+static u16 bios_get_default_setting(u8 subsys)
 {
 	struct regs regs;
 
@@ -1052,7 +1052,7 @@
 	.brightness_set		= wistron_wifi_led_set,
 };
 
-static void __devinit wistron_led_init(struct device *parent)
+static void wistron_led_init(struct device *parent)
 {
 	if (leds_present & FE_WIFI_LED) {
 		u16 wifi = bios_get_default_setting(WIFI);
@@ -1077,7 +1077,7 @@
 	}
 }
 
-static void __devexit wistron_led_remove(void)
+static void wistron_led_remove(void)
 {
 	if (leds_present & FE_MAIL_LED)
 		led_classdev_unregister(&wistron_mail_led);
@@ -1168,7 +1168,7 @@
 		dev->poll_interval = POLL_INTERVAL_DEFAULT;
 }
 
-static int __devinit wistron_setup_keymap(struct input_dev *dev,
+static int wistron_setup_keymap(struct input_dev *dev,
 					  struct key_entry *entry)
 {
 	switch (entry->type) {
@@ -1199,7 +1199,7 @@
 	return 0;
 }
 
-static int __devinit setup_input_dev(void)
+static int setup_input_dev(void)
 {
 	struct input_dev *input_dev;
 	int error;
@@ -1237,7 +1237,7 @@
 
 /* Driver core */
 
-static int __devinit wistron_probe(struct platform_device *dev)
+static int wistron_probe(struct platform_device *dev)
 {
 	int err;
 
@@ -1277,7 +1277,7 @@
 	return 0;
 }
 
-static int __devexit wistron_remove(struct platform_device *dev)
+static int wistron_remove(struct platform_device *dev)
 {
 	wistron_led_remove();
 	input_unregister_polled_device(wistron_idev);
@@ -1334,7 +1334,7 @@
 #endif
 	},
 	.probe		= wistron_probe,
-	.remove		= __devexit_p(wistron_remove),
+	.remove		= wistron_remove,
 };
 
 static int __init wb_module_init(void)

diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c
index 6790a81..558767d 100644
--- a/drivers/input/misc/wm831x-on.c
+++ b/drivers/input/misc/wm831x-on.c

@@ -69,14 +69,15 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit wm831x_on_probe(struct platform_device *pdev)
+static int wm831x_on_probe(struct platform_device *pdev)
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_on *wm831x_on;
 	int irq = wm831x_irq(wm831x, platform_get_irq(pdev, 0));
 	int ret;
 
-	wm831x_on = kzalloc(sizeof(struct wm831x_on), GFP_KERNEL);
+	wm831x_on = devm_kzalloc(&pdev->dev, sizeof(struct wm831x_on),
+				 GFP_KERNEL);
 	if (!wm831x_on) {
 		dev_err(&pdev->dev, "Can't allocate data\n");
 		return -ENOMEM;
@@ -120,11 +121,10 @@
 err_input_dev:
 	input_free_device(wm831x_on->dev);
 err:
-	kfree(wm831x_on);
 	return ret;
 }
 
-static int __devexit wm831x_on_remove(struct platform_device *pdev)
+static int wm831x_on_remove(struct platform_device *pdev)
 {
 	struct wm831x_on *wm831x_on = platform_get_drvdata(pdev);
 	int irq = platform_get_irq(pdev, 0);
@@ -132,14 +132,13 @@
 	free_irq(irq, wm831x_on);
 	cancel_delayed_work_sync(&wm831x_on->work);
 	input_unregister_device(wm831x_on->dev);
-	kfree(wm831x_on);
 
 	return 0;
 }
 
 static struct platform_driver wm831x_on_driver = {
 	.probe		= wm831x_on_probe,
-	.remove		= __devexit_p(wm831x_on_remove),
+	.remove		= wm831x_on_remove,
 	.driver		= {
 		.name	= "wm831x-on",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 6f7d990..e21c181 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c

@@ -104,7 +104,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit xenkbd_probe(struct xenbus_device *dev,
+static int xenkbd_probe(struct xenbus_device *dev,
 				  const struct xenbus_device_id *id)
 {
 	int ret, i, abs;

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index cf5af1f..e229fa3 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c

@@ -767,9 +767,8 @@
 		      psmouse->packet[5]) & 0x80) ||
 		    (!alps_is_valid_first_byte(priv->i, psmouse->packet[6]))) {
 			psmouse_dbg(psmouse,
-				    "refusing packet %x %x %x %x (suspected interleaved ps/2)\n",
-				    psmouse->packet[3], psmouse->packet[4],
-				    psmouse->packet[5], psmouse->packet[6]);
+				    "refusing packet %4ph (suspected interleaved ps/2)\n",
+				    psmouse->packet + 3);
 			return PSMOUSE_BAD_DATA;
 		}
 
@@ -831,9 +830,8 @@
 		     psmouse->packet[4] |
 		     psmouse->packet[5]) & 0x80) {
 			psmouse_dbg(psmouse,
-				    "refusing packet %x %x %x (suspected interleaved ps/2)\n",
-				    psmouse->packet[3], psmouse->packet[4],
-				    psmouse->packet[5]);
+				    "refusing packet %3ph (suspected interleaved ps/2)\n",
+				    psmouse->packet + 3);
 		} else {
 			alps_process_packet(psmouse);
 		}

diff --git a/drivers/input/mouse/gpio_mouse.c b/drivers/input/mouse/gpio_mouse.c
index 39fe9b7..532eaca4 100644
--- a/drivers/input/mouse/gpio_mouse.c
+++ b/drivers/input/mouse/gpio_mouse.c

@@ -46,7 +46,7 @@
 	input_sync(input);
 }
 
-static int __devinit gpio_mouse_probe(struct platform_device *pdev)
+static int gpio_mouse_probe(struct platform_device *pdev)
 {
 	struct gpio_mouse_platform_data *pdata = pdev->dev.platform_data;
 	struct input_polled_dev *input_poll;
@@ -150,7 +150,7 @@
 	return error;
 }
 
-static int __devexit gpio_mouse_remove(struct platform_device *pdev)
+static int gpio_mouse_remove(struct platform_device *pdev)
 {
 	struct input_polled_dev *input = platform_get_drvdata(pdev);
 	struct gpio_mouse_platform_data *pdata = input->private;
@@ -172,7 +172,7 @@
 
 static struct platform_driver gpio_mouse_device_driver = {
 	.probe		= gpio_mouse_probe,
-	.remove		= __devexit_p(gpio_mouse_remove),
+	.remove		= gpio_mouse_remove,
 	.driver		= {
 		.name	= "gpio_mouse",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/mouse/maplemouse.c b/drivers/input/mouse/maplemouse.c
index 5f27817..0a60717 100644
--- a/drivers/input/mouse/maplemouse.c
+++ b/drivers/input/mouse/maplemouse.c

@@ -64,7 +64,7 @@
 }
 
 /* allow the mouse to be used */
-static int __devinit probe_maple_mouse(struct device *dev)
+static int probe_maple_mouse(struct device *dev)
 {
 	struct maple_device *mdev = to_maple_dev(dev);
 	struct maple_driver *mdrv = to_maple_driver(dev->driver);
@@ -114,7 +114,7 @@
 	return error;
 }
 
-static int __devexit remove_maple_mouse(struct device *dev)
+static int remove_maple_mouse(struct device *dev)
 {
 	struct maple_device *mdev = to_maple_dev(dev);
 	struct dc_mouse *mse = maple_get_drvdata(mdev);
@@ -132,7 +132,7 @@
 	.drv = {
 		.name = "Dreamcast_mouse",
 		.probe = probe_maple_mouse,
-		.remove = __devexit_p(remove_maple_mouse),
+		.remove = remove_maple_mouse,
 	},
 };
 

diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c
index c29ae76..8e1b98e 100644
--- a/drivers/input/mouse/navpoint.c
+++ b/drivers/input/mouse/navpoint.c

@@ -206,7 +206,7 @@
 	navpoint_down(navpoint);
 }
 
-static int __devinit navpoint_probe(struct platform_device *pdev)
+static int navpoint_probe(struct platform_device *pdev)
 {
 	const struct navpoint_platform_data *pdata =
 					dev_get_platdata(&pdev->dev);
@@ -299,7 +299,7 @@
 	return error;
 }
 
-static int __devexit navpoint_remove(struct platform_device *pdev)
+static int navpoint_remove(struct platform_device *pdev)
 {
 	const struct navpoint_platform_data *pdata =
 					dev_get_platdata(&pdev->dev);
@@ -353,7 +353,7 @@
 
 static struct platform_driver navpoint_driver = {
 	.probe		= navpoint_probe,
-	.remove		= __devexit_p(navpoint_remove),
+	.remove		= navpoint_remove,
 	.driver = {
 		.name	= "navpoint",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/mouse/pxa930_trkball.c b/drivers/input/mouse/pxa930_trkball.c
index 4fe055f..0ecb9e7 100644
--- a/drivers/input/mouse/pxa930_trkball.c
+++ b/drivers/input/mouse/pxa930_trkball.c

@@ -143,7 +143,7 @@
 	pxa930_trkball_disable(trkball);
 }
 
-static int __devinit pxa930_trkball_probe(struct platform_device *pdev)
+static int pxa930_trkball_probe(struct platform_device *pdev)
 {
 	struct pxa930_trkball *trkball;
 	struct input_dev *input;
@@ -230,7 +230,7 @@
 	return error;
 }
 
-static int __devexit pxa930_trkball_remove(struct platform_device *pdev)
+static int pxa930_trkball_remove(struct platform_device *pdev)
 {
 	struct pxa930_trkball *trkball = platform_get_drvdata(pdev);
 	int irq = platform_get_irq(pdev, 0);
@@ -248,7 +248,7 @@
 		.name	= "pxa930-trkball",
 	},
 	.probe		= pxa930_trkball_probe,
-	.remove		= __devexit_p(pxa930_trkball_remove),
+	.remove		= pxa930_trkball_remove,
 };
 module_platform_driver(pxa930_trkball_driver);
 

diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index 063a174..ad82260 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c

@@ -535,7 +535,7 @@
 	return touch;
 }
 
-static int __devinit synaptics_i2c_probe(struct i2c_client *client,
+static int synaptics_i2c_probe(struct i2c_client *client,
 			       const struct i2c_device_id *dev_id)
 {
 	int ret;
@@ -601,7 +601,7 @@
 	return ret;
 }
 
-static int __devexit synaptics_i2c_remove(struct i2c_client *client)
+static int synaptics_i2c_remove(struct i2c_client *client)
 {
 	struct synaptics_i2c *touch = i2c_get_clientdata(client);
 
@@ -662,7 +662,7 @@
 	},
 
 	.probe		= synaptics_i2c_probe,
-	.remove		= __devexit_p(synaptics_i2c_remove),
+	.remove		= synaptics_i2c_remove,
 
 	.id_table	= synaptics_i2c_id_table,
 };

diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index 55f2c22..4a4e182 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig

@@ -234,4 +234,13 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called ps2mult.
 
+config SERIO_ARC_PS2
+	tristate "ARC PS/2 support"
+	help
+	  Say Y here if you have an ARC FPGA platform with a PS/2
+	  controller in it.
+
+	  To compile this driver as a module, choose M here; the module
+	  will be called arc_ps2.
+
 endif

diff --git a/drivers/input/serio/Makefile b/drivers/input/serio/Makefile
index dbbe376..4b0c8f8 100644
--- a/drivers/input/serio/Makefile
+++ b/drivers/input/serio/Makefile

@@ -25,3 +25,4 @@
 obj-$(CONFIG_SERIO_AMS_DELTA)	+= ams_delta_serio.o
 obj-$(CONFIG_SERIO_XILINX_XPS_PS2)	+= xilinx_ps2.o
 obj-$(CONFIG_SERIO_ALTERA_PS2)	+= altera_ps2.o
+obj-$(CONFIG_SERIO_ARC_PS2)	+= arc_ps2.o

diff --git a/drivers/input/serio/altera_ps2.c b/drivers/input/serio/altera_ps2.c
index cc11f4e..479ce5f 100644
--- a/drivers/input/serio/altera_ps2.c
+++ b/drivers/input/serio/altera_ps2.c

@@ -81,7 +81,7 @@
 /*
  * Add one device to this driver.
  */
-static int __devinit altera_ps2_probe(struct platform_device *pdev)
+static int altera_ps2_probe(struct platform_device *pdev)
 {
 	struct ps2if *ps2if;
 	struct serio *serio;
@@ -159,7 +159,7 @@
 /*
  * Remove one device from this driver.
  */
-static int __devexit altera_ps2_remove(struct platform_device *pdev)
+static int altera_ps2_remove(struct platform_device *pdev)
 {
 	struct ps2if *ps2if = platform_get_drvdata(pdev);
 
@@ -187,7 +187,7 @@
  */
 static struct platform_driver altera_ps2_driver = {
 	.probe		= altera_ps2_probe,
-	.remove		= __devexit_p(altera_ps2_remove),
+	.remove		= altera_ps2_remove,
 	.driver	= {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c
index 2e77246..4e2fd44 100644
--- a/drivers/input/serio/ambakmi.c
+++ b/drivers/input/serio/ambakmi.c

@@ -107,7 +107,7 @@
 	clk_disable_unprepare(kmi->clk);
 }
 
-static int __devinit amba_kmi_probe(struct amba_device *dev,
+static int amba_kmi_probe(struct amba_device *dev,
 	const struct amba_id *id)
 {
 	struct amba_kmi_port *kmi;
@@ -163,7 +163,7 @@
 	return ret;
 }
 
-static int __devexit amba_kmi_remove(struct amba_device *dev)
+static int amba_kmi_remove(struct amba_device *dev)
 {
 	struct amba_kmi_port *kmi = amba_get_drvdata(dev);
 
@@ -204,7 +204,7 @@
 	},
 	.id_table	= amba_kmi_idtable,
 	.probe		= amba_kmi_probe,
-	.remove		= __devexit_p(amba_kmi_remove),
+	.remove		= amba_kmi_remove,
 	.resume		= amba_kmi_resume,
 };
 

diff --git a/drivers/input/serio/arc_ps2.c b/drivers/input/serio/arc_ps2.c
new file mode 100644
index 0000000..b571eb3
--- /dev/null
+++ b/drivers/input/serio/arc_ps2.c

@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Driver is originally developed by Pavel Sokolov <psokolov@synopsys.com>
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/serio.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#define ARC_PS2_PORTS                   2
+
+#define ARC_ARC_PS2_ID                  0x0001f609
+
+#define STAT_TIMEOUT                    128
+
+#define PS2_STAT_RX_FRM_ERR             (1)
+#define PS2_STAT_RX_BUF_OVER            (1 << 1)
+#define PS2_STAT_RX_INT_EN              (1 << 2)
+#define PS2_STAT_RX_VAL                 (1 << 3)
+#define PS2_STAT_TX_ISNOT_FUL           (1 << 4)
+#define PS2_STAT_TX_INT_EN              (1 << 5)
+
+struct arc_ps2_port {
+	void __iomem *data_addr;
+	void __iomem *status_addr;
+	struct serio *io;
+};
+
+struct arc_ps2_data {
+	struct arc_ps2_port port[ARC_PS2_PORTS];
+	void __iomem *addr;
+	unsigned int frame_error;
+	unsigned int buf_overflow;
+	unsigned int total_int;
+};
+
+static void arc_ps2_check_rx(struct arc_ps2_data *arc_ps2,
+			     struct arc_ps2_port *port)
+{
+	unsigned int timeout = 1000;
+	unsigned int flag, status;
+	unsigned char data;
+
+	do {
+		status = ioread32(port->status_addr);
+		if (!(status & PS2_STAT_RX_VAL))
+			return;
+
+		data = ioread32(port->data_addr) & 0xff;
+
+		flag = 0;
+		arc_ps2->total_int++;
+		if (status & PS2_STAT_RX_FRM_ERR) {
+			arc_ps2->frame_error++;
+			flag |= SERIO_PARITY;
+		} else if (status & PS2_STAT_RX_BUF_OVER) {
+			arc_ps2->buf_overflow++;
+			flag |= SERIO_FRAME;
+		}
+
+		serio_interrupt(port->io, data, flag);
+	} while (--timeout);
+
+	dev_err(&port->io->dev, "PS/2 hardware stuck\n");
+}
+
+static irqreturn_t arc_ps2_interrupt(int irq, void *dev)
+{
+	struct arc_ps2_data *arc_ps2 = dev;
+	int i;
+
+	for (i = 0; i < ARC_PS2_PORTS; i++)
+		arc_ps2_check_rx(arc_ps2, &arc_ps2->port[i]);
+
+	return IRQ_HANDLED;
+}
+
+static int arc_ps2_write(struct serio *io, unsigned char val)
+{
+	unsigned status;
+	struct arc_ps2_port *port = io->port_data;
+	int timeout = STAT_TIMEOUT;
+
+	do {
+		status = ioread32(port->status_addr);
+		cpu_relax();
+
+		if (status & PS2_STAT_TX_ISNOT_FUL) {
+			iowrite32(val & 0xff, port->data_addr);
+			return 0;
+		}
+
+	} while (--timeout);
+
+	dev_err(&io->dev, "write timeout\n");
+	return -ETIMEDOUT;
+}
+
+static int arc_ps2_open(struct serio *io)
+{
+	struct arc_ps2_port *port = io->port_data;
+
+	iowrite32(PS2_STAT_RX_INT_EN, port->status_addr);
+
+	return 0;
+}
+
+static void arc_ps2_close(struct serio *io)
+{
+	struct arc_ps2_port *port = io->port_data;
+
+	iowrite32(ioread32(port->status_addr) & ~PS2_STAT_RX_INT_EN,
+		  port->status_addr);
+}
+
+static void __iomem *arc_ps2_calc_addr(struct arc_ps2_data *arc_ps2,
+						  int index, bool status)
+{
+	void __iomem *addr;
+
+	addr = arc_ps2->addr + 4 + 4 * index;
+	if (status)
+		addr += ARC_PS2_PORTS * 4;
+
+	return addr;
+}
+
+static void arc_ps2_inhibit_ports(struct arc_ps2_data *arc_ps2)
+{
+	void __iomem *addr;
+	u32 val;
+	int i;
+
+	for (i = 0; i < ARC_PS2_PORTS; i++) {
+		addr = arc_ps2_calc_addr(arc_ps2, i, true);
+		val = ioread32(addr);
+		val &= ~(PS2_STAT_RX_INT_EN | PS2_STAT_TX_INT_EN);
+		iowrite32(val, addr);
+	}
+}
+
+static int arc_ps2_create_port(struct platform_device *pdev,
+					 struct arc_ps2_data *arc_ps2,
+					 int index)
+{
+	struct arc_ps2_port *port = &arc_ps2->port[index];
+	struct serio *io;
+
+	io = kzalloc(sizeof(struct serio), GFP_KERNEL);
+	if (!io)
+		return -ENOMEM;
+
+	io->id.type = SERIO_8042;
+	io->write = arc_ps2_write;
+	io->open = arc_ps2_open;
+	io->close = arc_ps2_close;
+	snprintf(io->name, sizeof(io->name), "ARC PS/2 port%d", index);
+	snprintf(io->phys, sizeof(io->phys), "arc/serio%d", index);
+	io->port_data = port;
+
+	port->io = io;
+
+	port->data_addr = arc_ps2_calc_addr(arc_ps2, index, false);
+	port->status_addr = arc_ps2_calc_addr(arc_ps2, index, true);
+
+	dev_dbg(&pdev->dev, "port%d is allocated (data = 0x%p, status = 0x%p)\n",
+		index, port->data_addr, port->status_addr);
+
+	serio_register_port(port->io);
+	return 0;
+}
+
+static int arc_ps2_probe(struct platform_device *pdev)
+{
+	struct arc_ps2_data *arc_ps2;
+	struct resource *res;
+	int irq;
+	int error, id, i;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no IO memory defined\n");
+		return -EINVAL;
+	}
+
+	irq = platform_get_irq_byname(pdev, "arc_ps2_irq");
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no IRQ defined\n");
+		return -EINVAL;
+	}
+
+	arc_ps2 = devm_kzalloc(&pdev->dev, sizeof(struct arc_ps2_data),
+				GFP_KERNEL);
+	if (!arc_ps2) {
+		dev_err(&pdev->dev, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	arc_ps2->addr = devm_request_and_ioremap(&pdev->dev, res);
+	if (!arc_ps2->addr)
+		return -EBUSY;
+
+	dev_info(&pdev->dev, "irq = %d, address = 0x%p, ports = %i\n",
+		 irq, arc_ps2->addr, ARC_PS2_PORTS);
+
+	id = ioread32(arc_ps2->addr);
+	if (id != ARC_ARC_PS2_ID) {
+		dev_err(&pdev->dev, "device id does not match\n");
+		return -ENXIO;
+	}
+
+	arc_ps2_inhibit_ports(arc_ps2);
+
+	error = devm_request_irq(&pdev->dev, irq, arc_ps2_interrupt,
+				 0, "arc_ps2", arc_ps2);
+	if (error) {
+		dev_err(&pdev->dev, "Could not allocate IRQ\n");
+		return error;
+	}
+
+	for (i = 0; i < ARC_PS2_PORTS; i++) {
+		error = arc_ps2_create_port(pdev, arc_ps2, i);
+		if (error) {
+			while (--i >= 0)
+				serio_unregister_port(arc_ps2->port[i].io);
+			return error;
+		}
+	}
+
+	platform_set_drvdata(pdev, arc_ps2);
+
+	return 0;
+}
+
+static int arc_ps2_remove(struct platform_device *pdev)
+{
+	struct arc_ps2_data *arc_ps2 = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ARC_PS2_PORTS; i++)
+		serio_unregister_port(arc_ps2->port[i].io);
+
+	dev_dbg(&pdev->dev, "interrupt count = %i\n", arc_ps2->total_int);
+	dev_dbg(&pdev->dev, "frame error count = %i\n", arc_ps2->frame_error);
+	dev_dbg(&pdev->dev, "buffer overflow count = %i\n",
+		arc_ps2->buf_overflow);
+
+	return 0;
+}
+
+static struct platform_driver arc_ps2_driver = {
+	.driver	= {
+		.name	= "arc_ps2",
+		.owner	= THIS_MODULE,
+	},
+	.probe	= arc_ps2_probe,
+	.remove	= arc_ps2_remove,
+};
+
+module_platform_driver(arc_ps2_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pavel Sokolov <psokolov@synopsys.com>");
+MODULE_DESCRIPTION("ARC PS/2 Driver");

diff --git a/drivers/input/serio/ct82c710.c b/drivers/input/serio/ct82c710.c
index 8528165..cfe549d 100644
--- a/drivers/input/serio/ct82c710.c
+++ b/drivers/input/serio/ct82c710.c

@@ -175,7 +175,7 @@
 	return 0;
 }
 
-static int __devinit ct82c710_probe(struct platform_device *dev)
+static int ct82c710_probe(struct platform_device *dev)
 {
 	ct82c710_port = kzalloc(sizeof(struct serio), GFP_KERNEL);
 	if (!ct82c710_port)
@@ -199,7 +199,7 @@
 	return 0;
 }
 
-static int __devexit ct82c710_remove(struct platform_device *dev)
+static int ct82c710_remove(struct platform_device *dev)
 {
 	serio_unregister_port(ct82c710_port);
 
@@ -212,7 +212,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= ct82c710_probe,
-	.remove		= __devexit_p(ct82c710_remove),
+	.remove		= ct82c710_remove,
 };
 
 

diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c
index 4225f5d..8d9ba0c 100644
--- a/drivers/input/serio/gscps2.c
+++ b/drivers/input/serio/gscps2.c

@@ -327,7 +327,7 @@
  * @return: success/error report
  */
 
-static int __devinit gscps2_probe(struct parisc_device *dev)
+static int gscps2_probe(struct parisc_device *dev)
 {
 	struct gscps2port *ps2port;
 	struct serio *serio;
@@ -414,7 +414,7 @@
  * @return: success/error report
  */
 
-static int __devexit gscps2_remove(struct parisc_device *dev)
+static int gscps2_remove(struct parisc_device *dev)
 {
 	struct gscps2port *ps2port = dev_get_drvdata(&dev->dev);
 
@@ -444,7 +444,7 @@
 	.name		= "gsc_ps2",
 	.id_table	= gscps2_device_tbl,
 	.probe		= gscps2_probe,
-	.remove		= __devexit_p(gscps2_remove),
+	.remove		= gscps2_remove,
 };
 
 static int __init gscps2_init(void)

diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c
index bfd3865..65605e4 100644
--- a/drivers/input/serio/hil_mlc.c
+++ b/drivers/input/serio/hil_mlc.c

@@ -686,13 +686,12 @@
 		write_lock_irqsave(&mlc->lock, flags);
 		pack = node->object.packet;
 	out:
-		if (mlc->istarted)
-			goto out2;
-		/* Prepare to receive input */
-		if ((node + 1)->act & HILSE_IN)
-			hilse_setup_input(mlc, node + 1);
+		if (!mlc->istarted) {
+			/* Prepare to receive input */
+			if ((node + 1)->act & HILSE_IN)
+				hilse_setup_input(mlc, node + 1);
+		}
 
-	out2:
 		write_unlock_irqrestore(&mlc->lock, flags);
 
 		if (down_trylock(&mlc->osem)) {
@@ -1010,8 +1009,6 @@
 static void __exit hil_mlc_exit(void)
 {
 	del_timer_sync(&hil_mlcs_kicker);
-
-	tasklet_disable(&hil_mlcs_tasklet);
 	tasklet_kill(&hil_mlcs_tasklet);
 }
 

diff --git a/drivers/input/serio/i8042-io.h b/drivers/input/serio/i8042-io.h
index 5d48bb6..a5eed2a 100644
--- a/drivers/input/serio/i8042-io.h
+++ b/drivers/input/serio/i8042-io.h

@@ -76,7 +76,7 @@
 	if (check_legacy_ioport(I8042_DATA_REG))
 		return -ENODEV;
 #endif
-#if !defined(__sh__) && !defined(__alpha__) && !defined(__mips__)
+#if !defined(__sh__) && !defined(__alpha__)
 	if (!request_region(I8042_DATA_REG, 16, "i8042"))
 		return -EBUSY;
 #endif

diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h
index 395a9af..d6aa4c6 100644
--- a/drivers/input/serio/i8042-sparcio.h
+++ b/drivers/input/serio/i8042-sparcio.h

@@ -49,7 +49,7 @@
 #define OBP_PS2MS_NAME1		"kdmouse"
 #define OBP_PS2MS_NAME2		"mouse"
 
-static int __devinit sparc_i8042_probe(struct platform_device *op)
+static int sparc_i8042_probe(struct platform_device *op)
 {
 	struct device_node *dp = op->dev.of_node;
 
@@ -80,7 +80,7 @@
 	return 0;
 }
 
-static int __devexit sparc_i8042_remove(struct platform_device *op)
+static int sparc_i8042_remove(struct platform_device *op)
 {
 	of_iounmap(kbd_res, kbd_iobase, 8);
 
@@ -102,7 +102,7 @@
 		.of_match_table = sparc_i8042_match,
 	},
 	.probe		= sparc_i8042_probe,
-	.remove		= __devexit_p(sparc_i8042_remove),
+	.remove		= sparc_i8042_remove,
 };
 
 static int __init i8042_platform_init(void)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index d6cc77a..5f306f7 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h

@@ -921,6 +921,7 @@
 	int retval;
 
 #ifdef CONFIG_X86
+	u8 a20_on = 0xdf;
 	/* Just return if pre-detection shows no i8042 controller exist */
 	if (!x86_platform.i8042_detect())
 		return -ENODEV;
@@ -960,6 +961,14 @@
 
 	if (dmi_check_system(i8042_dmi_dritek_table))
 		i8042_dritek = true;
+
+	/*
+	 * A20 was already enabled during early kernel init. But some buggy
+	 * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to
+	 * resume from S3. So we do it here and hope that nothing breaks.
+	 */
+	i8042_command(&a20_on, 0x10d1);
+	i8042_command(NULL, 0x00ff);	/* Null command for SMM firmware */
 #endif /* CONFIG_X86 */
 
 	return retval;

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 8656441..78e4de4 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c

@@ -1284,7 +1284,7 @@
 	}
 }
 
-static void __devexit i8042_unregister_ports(void)
+static void i8042_unregister_ports(void)
 {
 	int i;
 
@@ -1437,7 +1437,7 @@
 	return error;
 }
 
-static int __devexit i8042_remove(struct platform_device *dev)
+static int i8042_remove(struct platform_device *dev)
 {
 	i8042_unregister_ports();
 	i8042_free_irqs();
@@ -1455,7 +1455,7 @@
 		.pm	= &i8042_pm_ops,
 #endif
 	},
-	.remove		= __devexit_p(i8042_remove),
+	.remove		= i8042_remove,
 	.shutdown	= i8042_shutdown,
 };
 

diff --git a/drivers/input/serio/maceps2.c b/drivers/input/serio/maceps2.c
index 61da763..bc85e1c 100644
--- a/drivers/input/serio/maceps2.c
+++ b/drivers/input/serio/maceps2.c

@@ -116,7 +116,7 @@
 }
 
 
-static struct serio * __devinit maceps2_allocate_port(int idx)
+static struct serio *maceps2_allocate_port(int idx)
 {
 	struct serio *serio;
 
@@ -135,7 +135,7 @@
 	return serio;
 }
 
-static int __devinit maceps2_probe(struct platform_device *dev)
+static int maceps2_probe(struct platform_device *dev)
 {
 	maceps2_port[0] = maceps2_allocate_port(0);
 	maceps2_port[1] = maceps2_allocate_port(1);
@@ -151,7 +151,7 @@
 	return 0;
 }
 
-static int __devexit maceps2_remove(struct platform_device *dev)
+static int maceps2_remove(struct platform_device *dev)
 {
 	serio_unregister_port(maceps2_port[0]);
 	serio_unregister_port(maceps2_port[1]);
@@ -165,7 +165,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= maceps2_probe,
-	.remove		= __devexit_p(maceps2_remove),
+	.remove		= maceps2_remove,
 };
 
 static int __init maceps2_init(void)

diff --git a/drivers/input/serio/pcips2.c b/drivers/input/serio/pcips2.c
index 0c42497..76f8383 100644
--- a/drivers/input/serio/pcips2.c
+++ b/drivers/input/serio/pcips2.c

@@ -127,7 +127,7 @@
 	free_irq(ps2if->dev->irq, ps2if);
 }
 
-static int __devinit pcips2_probe(struct pci_dev *dev, const struct pci_device_id *id)
+static int pcips2_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct pcips2_data *ps2if;
 	struct serio *serio;
@@ -176,7 +176,7 @@
 	return ret;
 }
 
-static void __devexit pcips2_remove(struct pci_dev *dev)
+static void pcips2_remove(struct pci_dev *dev)
 {
 	struct pcips2_data *ps2if = pci_get_drvdata(dev);
 
@@ -212,7 +212,7 @@
 	.name			= "pcips2",
 	.id_table		= pcips2_ids,
 	.probe			= pcips2_probe,
-	.remove			= __devexit_p(pcips2_remove),
+	.remove			= pcips2_remove,
 };
 
 module_pci_driver(pcips2_driver);

diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c
index 0c0df7f..70fe542 100644
--- a/drivers/input/serio/q40kbd.c
+++ b/drivers/input/serio/q40kbd.c

@@ -122,7 +122,7 @@
 	q40kbd_flush(q40kbd);
 }
 
-static int __devinit q40kbd_probe(struct platform_device *pdev)
+static int q40kbd_probe(struct platform_device *pdev)
 {
 	struct q40kbd *q40kbd;
 	struct serio *port;
@@ -168,7 +168,7 @@
 	return error;
 }
 
-static int __devexit q40kbd_remove(struct platform_device *pdev)
+static int q40kbd_remove(struct platform_device *pdev)
 {
 	struct q40kbd *q40kbd = platform_get_drvdata(pdev);
 
@@ -190,7 +190,7 @@
 		.name	= "q40kbd",
 		.owner	= THIS_MODULE,
 	},
-	.remove		= __devexit_p(q40kbd_remove),
+	.remove		= q40kbd_remove,
 };
 
 static int __init q40kbd_init(void)

diff --git a/drivers/input/serio/rpckbd.c b/drivers/input/serio/rpckbd.c
index 2af5df6..567566a 100644
--- a/drivers/input/serio/rpckbd.c
+++ b/drivers/input/serio/rpckbd.c

@@ -114,7 +114,7 @@
  * Allocate and initialize serio structure for subsequent registration
  * with serio core.
  */
-static int __devinit rpckbd_probe(struct platform_device *dev)
+static int rpckbd_probe(struct platform_device *dev)
 {
 	struct rpckbd_data *rpckbd;
 	struct serio *serio;
@@ -153,7 +153,7 @@
 	return 0;
 }
 
-static int __devexit rpckbd_remove(struct platform_device *dev)
+static int rpckbd_remove(struct platform_device *dev)
 {
 	struct serio *serio = platform_get_drvdata(dev);
 	struct rpckbd_data *rpckbd = serio->port_data;
@@ -166,7 +166,7 @@
 
 static struct platform_driver rpckbd_driver = {
 	.probe		= rpckbd_probe,
-	.remove		= __devexit_p(rpckbd_remove),
+	.remove		= rpckbd_remove,
 	.driver		= {
 		.name	= "kart",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/serio/sa1111ps2.c b/drivers/input/serio/sa1111ps2.c
index 3897667..b3e6889 100644
--- a/drivers/input/serio/sa1111ps2.c
+++ b/drivers/input/serio/sa1111ps2.c

@@ -193,7 +193,7 @@
 /*
  * Clear the input buffer.
  */
-static void __devinit ps2_clear_input(struct ps2if *ps2if)
+static void ps2_clear_input(struct ps2if *ps2if)
 {
 	int maxread = 100;
 
@@ -203,7 +203,7 @@
 	}
 }
 
-static unsigned int __devinit ps2_test_one(struct ps2if *ps2if,
+static unsigned int ps2_test_one(struct ps2if *ps2if,
 					   unsigned int mask)
 {
 	unsigned int val;
@@ -220,7 +220,7 @@
  * Test the keyboard interface.  We basically check to make sure that
  * we can drive each line to the keyboard independently of each other.
  */
-static int __devinit ps2_test(struct ps2if *ps2if)
+static int ps2_test(struct ps2if *ps2if)
 {
 	unsigned int stat;
 	int ret = 0;
@@ -251,7 +251,7 @@
 /*
  * Add one device to this driver.
  */
-static int __devinit ps2_probe(struct sa1111_dev *dev)
+static int ps2_probe(struct sa1111_dev *dev)
 {
 	struct ps2if *ps2if;
 	struct serio *serio;
@@ -334,7 +334,7 @@
 /*
  * Remove one device from this driver.
  */
-static int __devexit ps2_remove(struct sa1111_dev *dev)
+static int ps2_remove(struct sa1111_dev *dev)
 {
 	struct ps2if *ps2if = sa1111_get_drvdata(dev);
 
@@ -357,7 +357,7 @@
 	},
 	.devid		= SA1111_DEVID_PS2,
 	.probe		= ps2_probe,
-	.remove		= __devexit_p(ps2_remove),
+	.remove		= ps2_remove,
 };
 
 static int __init ps2_init(void)

diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index d0f7533..25fc597 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c

@@ -891,8 +891,6 @@
 	return serio_match_port(serio_drv->id_table, serio);
 }
 
-#ifdef CONFIG_HOTPLUG
-
 #define SERIO_ADD_UEVENT_VAR(fmt, val...)				\
 	do {								\
 		int err = add_uevent_var(env, fmt, val);		\
@@ -920,15 +918,6 @@
 }
 #undef SERIO_ADD_UEVENT_VAR
 
-#else
-
-static int serio_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	return -ENODEV;
-}
-
-#endif /* CONFIG_HOTPLUG */
-
 #ifdef CONFIG_PM
 static int serio_suspend(struct device *dev)
 {

diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c
index 1e983be..17be859 100644
--- a/drivers/input/serio/xilinx_ps2.c
+++ b/drivers/input/serio/xilinx_ps2.c

@@ -233,7 +233,7 @@
  * It returns 0, if the driver is bound to the PS/2 device, or a negative
  * value if there is an error.
  */
-static int __devinit xps2_of_probe(struct platform_device *ofdev)
+static int xps2_of_probe(struct platform_device *ofdev)
 {
 	struct resource r_irq; /* Interrupt resources */
 	struct resource r_mem; /* IO mem resources */
@@ -333,7 +333,7 @@
  * if the driver module is being unloaded. It frees any resources allocated to
  * the device.
  */
-static int __devexit xps2_of_remove(struct platform_device *of_dev)
+static int xps2_of_remove(struct platform_device *of_dev)
 {
 	struct xps2data *drvdata = platform_get_drvdata(of_dev);
 	struct resource r_mem; /* IO mem resources */
@@ -355,7 +355,7 @@
 }
 
 /* Match table for of_platform binding */
-static const struct of_device_id xps2_of_match[] __devinitconst = {
+static const struct of_device_id xps2_of_match[] = {
 	{ .compatible = "xlnx,xps-ps2-1.00.a", },
 	{ /* end of list */ },
 };
@@ -368,7 +368,7 @@
 		.of_match_table = xps2_of_match,
 	},
 	.probe		= xps2_of_probe,
-	.remove		= __devexit_p(xps2_of_remove),
+	.remove		= xps2_of_remove,
 };
 module_platform_driver(xps2_of_driver);
 

diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index 858ad44..f92d34f 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c

@@ -386,23 +386,40 @@
 				if (usage == WCM_DESKTOP) {
 					if (finger) {
 						features->device_type = BTN_TOOL_FINGER;
-						if (features->type == TABLETPC2FG) {
-							/* need to reset back */
+
+						switch (features->type) {
+						case TABLETPC2FG:
 							features->pktlen = WACOM_PKGLEN_TPC2FG;
+							break;
+
+						case MTSCREEN:
+						case WACOM_24HDT:
+							features->pktlen = WACOM_PKGLEN_MTOUCH;
+							break;
+
+						case MTTPC:
+							features->pktlen = WACOM_PKGLEN_MTTPC;
+							break;
+
+						case BAMBOO_PT:
+							features->pktlen = WACOM_PKGLEN_BBTOUCH;
+							break;
+
+						default:
+							features->pktlen = WACOM_PKGLEN_GRAPHIRE;
+							break;
 						}
 
-						if (features->type == MTSCREEN || features->type == WACOM_24HDT)
-							features->pktlen = WACOM_PKGLEN_MTOUCH;
-
-						if (features->type == BAMBOO_PT) {
-							/* need to reset back */
-							features->pktlen = WACOM_PKGLEN_BBTOUCH;
+						switch (features->type) {
+						case BAMBOO_PT:
 							features->x_phy =
 								get_unaligned_le16(&report[i + 5]);
 							features->x_max =
 								get_unaligned_le16(&report[i + 8]);
 							i += 15;
-						} else if (features->type == WACOM_24HDT) {
+							break;
+
+						case WACOM_24HDT:
 							features->x_max =
 								get_unaligned_le16(&report[i + 3]);
 							features->x_phy =
@@ -410,7 +427,9 @@
 							features->unit = report[i - 1];
 							features->unitExpo = report[i - 3];
 							i += 12;
-						} else {
+							break;
+
+						default:
 							features->x_max =
 								get_unaligned_le16(&report[i + 3]);
 							features->x_phy =
@@ -418,10 +437,11 @@
 							features->unit = report[i + 9];
 							features->unitExpo = report[i + 11];
 							i += 12;
+							break;
 						}
 					} else if (pen) {
 						/* penabled only accepts exact bytes of data */
-						if (features->type == TABLETPC2FG)
+						if (features->type >= TABLETPC)
 							features->pktlen = WACOM_PKGLEN_GRAPHIRE;
 						features->device_type = BTN_TOOL_PEN;
 						features->x_max =
@@ -434,32 +454,40 @@
 			case HID_USAGE_Y:
 				if (usage == WCM_DESKTOP) {
 					if (finger) {
-						int type = features->type;
-
-						if (type == TABLETPC2FG || type == MTSCREEN) {
+						switch (features->type) {
+						case TABLETPC2FG:
+						case MTSCREEN:
+						case MTTPC:
 							features->y_max =
 								get_unaligned_le16(&report[i + 3]);
 							features->y_phy =
 								get_unaligned_le16(&report[i + 6]);
 							i += 7;
-						} else if (type == WACOM_24HDT) {
+							break;
+
+						case WACOM_24HDT:
 							features->y_max =
 								get_unaligned_le16(&report[i + 3]);
 							features->y_phy =
 								get_unaligned_le16(&report[i - 2]);
 							i += 7;
-						} else if (type == BAMBOO_PT) {
+							break;
+
+						case BAMBOO_PT:
 							features->y_phy =
 								get_unaligned_le16(&report[i + 3]);
 							features->y_max =
 								get_unaligned_le16(&report[i + 6]);
 							i += 12;
-						} else {
+							break;
+
+						default:
 							features->y_max =
 								features->x_max;
 							features->y_phy =
 								get_unaligned_le16(&report[i + 3]);
 							i += 4;
+							break;
 						}
 					} else if (pen) {
 						features->y_max =

diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 0a67031..264138f 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c

@@ -467,9 +467,7 @@
 	/* general pen packet */
 	if ((data[1] & 0xb8) == 0xa0) {
 		t = (data[6] << 2) | ((data[7] >> 6) & 3);
-		if ((features->type >= INTUOS4S && features->type <= INTUOS4L) ||
-                    (features->type >= INTUOS5S && features->type <= INTUOS5L) ||
-		    (features->type >= WACOM_21UX2 && features->type <= WACOM_24HD)) {
+		if (features->type >= INTUOS4S && features->type <= WACOM_24HD) {
 			t = (t << 1) | (data[1] & 1);
 		}
 		input_report_abs(input, ABS_PRESSURE, t);
@@ -877,6 +875,11 @@
 	int i;
 	int current_num_contacts = data[2];
 	int contacts_to_send = 0;
+	int x_offset = 0;
+
+	/* MTTPC does not support Height and Width */
+	if (wacom->features.type == MTTPC)
+		x_offset = -4;
 
 	/*
 	 * First packet resets the counter since only the first
@@ -889,7 +892,7 @@
 	contacts_to_send = min(5, wacom->num_contacts_left);
 
 	for (i = 0; i < contacts_to_send; i++) {
-		int offset = (WACOM_BYTES_PER_MT_PACKET * i) + 3;
+		int offset = (WACOM_BYTES_PER_MT_PACKET + x_offset) * i + 3;
 		bool touch = data[offset] & 0x1;
 		int id = le16_to_cpup((__le16 *)&data[offset + 1]);
 		int slot = find_slot_from_contactid(wacom, id);
@@ -900,8 +903,8 @@
 		input_mt_slot(input, slot);
 		input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
 		if (touch) {
-			int x = le16_to_cpup((__le16 *)&data[offset + 7]);
-			int y = le16_to_cpup((__le16 *)&data[offset + 9]);
+			int x = le16_to_cpup((__le16 *)&data[offset + x_offset + 7]);
+			int y = le16_to_cpup((__le16 *)&data[offset + x_offset + 9]);
 			input_report_abs(input, ABS_MT_POSITION_X, x);
 			input_report_abs(input, ABS_MT_POSITION_Y, y);
 		}
@@ -1336,6 +1339,7 @@
 	case TABLETPCE:
 	case TABLETPC2FG:
 	case MTSCREEN:
+	case MTTPC:
 		sync = wacom_tpc_irq(wacom_wac, len);
 		break;
 
@@ -1657,6 +1661,7 @@
 		/* fall through */
 
 	case MTSCREEN:
+	case MTTPC:
 		if (features->device_type == BTN_TOOL_FINGER) {
 			wacom_wac->slots = kmalloc(features->touch_max *
 							sizeof(int),
@@ -2018,6 +2023,15 @@
 static const struct wacom_features wacom_features_0xEF =
 	{ "Wacom ISDv4 EF",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  255,
 	  0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x100 =
+	{ "Wacom ISDv4 100",      WACOM_PKGLEN_MTTPC,     26202, 16325,  255,
+	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x101 =
+	{ "Wacom ISDv4 101",      WACOM_PKGLEN_MTTPC,     26202, 16325,  255,
+	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x4001 =
+	{ "Wacom ISDv4 4001",      WACOM_PKGLEN_MTTPC,     26202, 16325,  255,
+	  0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x47 =
 	{ "Wacom Intuos2 6x8",    WACOM_PKGLEN_INTUOS,    20320, 16240, 1023,
 	  31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2034,7 +2048,8 @@
 	  .touch_max = 2 };
 static const struct wacom_features wacom_features_0xD2 =
 	{ "Wacom Bamboo Craft",   WACOM_PKGLEN_BBFUN,     14720,  9200, 1023,
-	  31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+	  31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES,
+	  .touch_max = 2 };
 static const struct wacom_features wacom_features_0xD3 =
 	{ "Wacom Bamboo 2FG 6x8", WACOM_PKGLEN_BBFUN,     21648, 13700, 1023,
 	  31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES,
@@ -2194,6 +2209,9 @@
 	{ USB_DEVICE_WACOM(0xEC) },
 	{ USB_DEVICE_WACOM(0xED) },
 	{ USB_DEVICE_WACOM(0xEF) },
+	{ USB_DEVICE_WACOM(0x100) },
+	{ USB_DEVICE_WACOM(0x101) },
+	{ USB_DEVICE_WACOM(0x4001) },
 	{ USB_DEVICE_WACOM(0x47) },
 	{ USB_DEVICE_WACOM(0xF4) },
 	{ USB_DEVICE_WACOM(0xF8) },

diff --git a/drivers/input/tablet/wacom_wac.h b/drivers/input/tablet/wacom_wac.h
index 345f1e7..9396d77 100644
--- a/drivers/input/tablet/wacom_wac.h
+++ b/drivers/input/tablet/wacom_wac.h

@@ -26,6 +26,7 @@
 #define WACOM_PKGLEN_BBPEN	10
 #define WACOM_PKGLEN_WIRELESS	32
 #define WACOM_PKGLEN_MTOUCH	62
+#define WACOM_PKGLEN_MTTPC	40
 
 /* wacom data size per MT contact */
 #define WACOM_BYTES_PER_MT_PACKET	11
@@ -88,6 +89,7 @@
 	TABLETPCE,
 	TABLETPC2FG,
 	MTSCREEN,
+	MTTPC,
 	MAX_TYPE
 };
 

diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 326218d..c706894 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c

@@ -115,7 +115,7 @@
 }
 
 #ifdef CONFIG_OF
-static int __devinit pm860x_touch_dt_init(struct platform_device *pdev,
+static int pm860x_touch_dt_init(struct platform_device *pdev,
 					  struct pm860x_chip *chip,
 					  int *res_x)
 {
@@ -169,7 +169,7 @@
 #define pm860x_touch_dt_init(x, y, z)	(-1)
 #endif
 
-static int __devinit pm860x_touch_probe(struct platform_device *pdev)
+static int pm860x_touch_probe(struct platform_device *pdev)
 {
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
 	struct pm860x_touch_pdata *pdata = pdev->dev.platform_data;
@@ -293,7 +293,7 @@
 	return ret;
 }
 
-static int __devexit pm860x_touch_remove(struct platform_device *pdev)
+static int pm860x_touch_remove(struct platform_device *pdev)
 {
 	struct pm860x_touch *touch = platform_get_drvdata(pdev);
 
@@ -310,7 +310,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe	= pm860x_touch_probe,
-	.remove	= __devexit_p(pm860x_touch_remove),
+	.remove	= pm860x_touch_remove,
 };
 module_platform_driver(pm860x_touch_driver);
 

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index f7668b2..515cfe7 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig

@@ -111,18 +111,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called auo-pixcir-ts.
 
-config TOUCHSCREEN_BITSY
-	tristate "Compaq iPAQ H3600 (Bitsy) touchscreen"
-	depends on SA1100_BITSY
-	select SERIO
-	help
-	  Say Y here if you have the h3600 (Bitsy) touchscreen.
-
-	  If unsure, say N.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called h3600_ts_input.
-
 config TOUCHSCREEN_BU21013
 	tristate "BU21013 based touch panel controllers"
 	depends on I2C
@@ -529,9 +517,9 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called touchwin.
 
-config TOUCHSCREEN_TI_TSCADC
+config TOUCHSCREEN_TI_AM335X_TSC
 	tristate "TI Touchscreen Interface"
-	depends on ARCH_OMAP2PLUS
+	depends on MFD_TI_AM335X_TSCADC
 	help
 	  Say Y here if you have 4/5/8 wire touchscreen controller
 	  to be connected to the ADC controller on your TI AM335x SoC.
@@ -539,7 +527,7 @@
 	  If unsure, say N.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called ti_tscadc.
+	  module will be called ti_am335x_tsc.
 
 config TOUCHSCREEN_ATMEL_TSADCC
 	tristate "Atmel Touchscreen Interface"

diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 178eb12..6bfbeab 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile

@@ -15,7 +15,6 @@
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_MXT)	+= atmel_mxt_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
 obj-$(CONFIG_TOUCHSCREEN_AUO_PIXCIR)	+= auo-pixcir-ts.o
-obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
 obj-$(CONFIG_TOUCHSCREEN_BU21013)	+= bu21013_ts.o
 obj-$(CONFIG_TOUCHSCREEN_CY8CTMG110)	+= cy8ctmg110_ts.o
 obj-$(CONFIG_TOUCHSCREEN_CYTTSP_CORE)	+= cyttsp_core.o
@@ -52,7 +51,7 @@
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)	+= s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ST1232)	+= st1232.o
 obj-$(CONFIG_TOUCHSCREEN_STMPE)		+= stmpe-ts.o
-obj-$(CONFIG_TOUCHSCREEN_TI_TSCADC)	+= ti_tscadc.o
+obj-$(CONFIG_TOUCHSCREEN_TI_AM335X_TSC)	+= ti_am335x_tsc.o
 obj-$(CONFIG_TOUCHSCREEN_TNETV107X)	+= tnetv107x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213)	+= touchit213.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT)	+= touchright.o

diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index 2c76921..23fa829b8 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c

@@ -682,7 +682,7 @@
 	}
 }
 
-static int __devinit ad7877_probe(struct spi_device *spi)
+static int ad7877_probe(struct spi_device *spi)
 {
 	struct ad7877			*ts;
 	struct input_dev		*input_dev;
@@ -810,7 +810,7 @@
 	return err;
 }
 
-static int __devexit ad7877_remove(struct spi_device *spi)
+static int ad7877_remove(struct spi_device *spi)
 {
 	struct ad7877 *ts = dev_get_drvdata(&spi->dev);
 
@@ -857,7 +857,7 @@
 		.pm	= &ad7877_pm,
 	},
 	.probe		= ad7877_probe,
-	.remove		= __devexit_p(ad7877_remove),
+	.remove		= ad7877_remove,
 };
 
 module_spi_driver(ad7877_driver);

diff --git a/drivers/input/touchscreen/ad7879-i2c.c b/drivers/input/touchscreen/ad7879-i2c.c
index 3054354..dcf3907 100644
--- a/drivers/input/touchscreen/ad7879-i2c.c
+++ b/drivers/input/touchscreen/ad7879-i2c.c

@@ -54,7 +54,7 @@
 	.write		= ad7879_i2c_write,
 };
 
-static int __devinit ad7879_i2c_probe(struct i2c_client *client,
+static int ad7879_i2c_probe(struct i2c_client *client,
 				      const struct i2c_device_id *id)
 {
 	struct ad7879 *ts;
@@ -75,7 +75,7 @@
 	return 0;
 }
 
-static int __devexit ad7879_i2c_remove(struct i2c_client *client)
+static int ad7879_i2c_remove(struct i2c_client *client)
 {
 	struct ad7879 *ts = i2c_get_clientdata(client);
 
@@ -98,7 +98,7 @@
 		.pm	= &ad7879_pm_ops,
 	},
 	.probe		= ad7879_i2c_probe,
-	.remove		= __devexit_p(ad7879_i2c_remove),
+	.remove		= ad7879_i2c_remove,
 	.id_table	= ad7879_id,
 };
 

diff --git a/drivers/input/touchscreen/ad7879-spi.c b/drivers/input/touchscreen/ad7879-spi.c
index db49abf..606da5b 100644
--- a/drivers/input/touchscreen/ad7879-spi.c
+++ b/drivers/input/touchscreen/ad7879-spi.c

@@ -110,7 +110,7 @@
 	.write		= ad7879_spi_write,
 };
 
-static int __devinit ad7879_spi_probe(struct spi_device *spi)
+static int ad7879_spi_probe(struct spi_device *spi)
 {
 	struct ad7879 *ts;
 	int err;
@@ -137,7 +137,7 @@
 	return 0;
 }
 
-static int __devexit ad7879_spi_remove(struct spi_device *spi)
+static int ad7879_spi_remove(struct spi_device *spi)
 {
 	struct ad7879 *ts = spi_get_drvdata(spi);
 
@@ -154,7 +154,7 @@
 		.pm	= &ad7879_pm_ops,
 	},
 	.probe		= ad7879_spi_probe,
-	.remove		= __devexit_p(ad7879_spi_remove),
+	.remove		= ad7879_spi_remove,
 };
 
 module_spi_driver(ad7879_spi_driver);

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 78e5d9a..4f702b3 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c

@@ -955,7 +955,7 @@
 
 static SIMPLE_DEV_PM_OPS(ads7846_pm, ads7846_suspend, ads7846_resume);
 
-static int __devinit ads7846_setup_pendown(struct spi_device *spi,
+static int ads7846_setup_pendown(struct spi_device *spi,
 					   struct ads7846 *ts)
 {
 	struct ads7846_platform_data *pdata = spi->dev.platform_data;
@@ -997,7 +997,7 @@
  * Set up the transfers to read touchscreen state; this assumes we
  * use formula #2 for pressure, not #3.
  */
-static void __devinit ads7846_setup_spi_msg(struct ads7846 *ts,
+static void ads7846_setup_spi_msg(struct ads7846 *ts,
 				const struct ads7846_platform_data *pdata)
 {
 	struct spi_message *m = &ts->msg[0];
@@ -1196,7 +1196,7 @@
 	spi_message_add_tail(x, m);
 }
 
-static int __devinit ads7846_probe(struct spi_device *spi)
+static int ads7846_probe(struct spi_device *spi)
 {
 	struct ads7846 *ts;
 	struct ads7846_packet *packet;
@@ -1390,7 +1390,7 @@
 	return err;
 }
 
-static int __devexit ads7846_remove(struct spi_device *spi)
+static int ads7846_remove(struct spi_device *spi)
 {
 	struct ads7846 *ts = dev_get_drvdata(&spi->dev);
 
@@ -1434,7 +1434,7 @@
 		.pm	= &ads7846_pm,
 	},
 	.probe		= ads7846_probe,
-	.remove		= __devexit_p(ads7846_remove),
+	.remove		= ads7846_remove,
 };
 
 module_spi_driver(ads7846_driver);

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 1df2396a..d04f810 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c

@@ -1095,7 +1095,7 @@
 	mxt_stop(data);
 }
 
-static int __devinit mxt_probe(struct i2c_client *client,
+static int mxt_probe(struct i2c_client *client,
 		const struct i2c_device_id *id)
 {
 	const struct mxt_platform_data *pdata = client->dev.platform_data;
@@ -1200,7 +1200,7 @@
 	return error;
 }
 
-static int __devexit mxt_remove(struct i2c_client *client)
+static int mxt_remove(struct i2c_client *client)
 {
 	struct mxt_data *data = i2c_get_clientdata(client);
 
@@ -1270,7 +1270,7 @@
 		.pm	= &mxt_pm_ops,
 	},
 	.probe		= mxt_probe,
-	.remove		= __devexit_p(mxt_remove),
+	.remove		= mxt_remove,
 	.id_table	= mxt_id,
 };
 

diff --git a/drivers/input/touchscreen/atmel_tsadcc.c b/drivers/input/touchscreen/atmel_tsadcc.c
index ea392ee..95f6785 100644
--- a/drivers/input/touchscreen/atmel_tsadcc.c
+++ b/drivers/input/touchscreen/atmel_tsadcc.c

@@ -177,7 +177,7 @@
  * The functions for inserting/removing us as a module.
  */
 
-static int __devinit atmel_tsadcc_probe(struct platform_device *pdev)
+static int atmel_tsadcc_probe(struct platform_device *pdev)
 {
 	struct atmel_tsadcc	*ts_dev;
 	struct input_dev	*input_dev;
@@ -323,7 +323,7 @@
 	return err;
 }
 
-static int __devexit atmel_tsadcc_remove(struct platform_device *pdev)
+static int atmel_tsadcc_remove(struct platform_device *pdev)
 {
 	struct atmel_tsadcc *ts_dev = dev_get_drvdata(&pdev->dev);
 	struct resource *res;
@@ -346,7 +346,7 @@
 
 static struct platform_driver atmel_tsadcc_driver = {
 	.probe		= atmel_tsadcc_probe,
-	.remove		= __devexit_p(atmel_tsadcc_remove),
+	.remove		= atmel_tsadcc_remove,
 	.driver		= {
 		.name	= "atmel_tsadcc",
 	},

diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c
index c7047b6..c6e19a9 100644
--- a/drivers/input/touchscreen/auo-pixcir-ts.c
+++ b/drivers/input/touchscreen/auo-pixcir-ts.c

@@ -286,7 +286,7 @@
 	return 0;
 }
 
-static __devinit int auo_pixcir_int_config(struct auo_pixcir_ts *ts,
+static int auo_pixcir_int_config(struct auo_pixcir_ts *ts,
 					   int int_setting)
 {
 	struct i2c_client *client = ts->client;
@@ -482,7 +482,7 @@
 static SIMPLE_DEV_PM_OPS(auo_pixcir_pm_ops, auo_pixcir_suspend,
 			 auo_pixcir_resume);
 
-static int __devinit auo_pixcir_probe(struct i2c_client *client,
+static int auo_pixcir_probe(struct i2c_client *client,
 				      const struct i2c_device_id *id)
 {
 	const struct auo_pixcir_ts_platdata *pdata = client->dev.platform_data;
@@ -599,7 +599,7 @@
 	return ret;
 }
 
-static int __devexit auo_pixcir_remove(struct i2c_client *client)
+static int auo_pixcir_remove(struct i2c_client *client)
 {
 	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
 	const struct auo_pixcir_ts_platdata *pdata = client->dev.platform_data;
@@ -631,7 +631,7 @@
 		.pm	= &auo_pixcir_pm_ops,
 	},
 	.probe		= auo_pixcir_probe,
-	.remove		= __devexit_p(auo_pixcir_remove),
+	.remove		= auo_pixcir_remove,
 	.id_table	= auo_pixcir_idtable,
 };
 

diff --git a/drivers/input/touchscreen/bu21013_ts.c b/drivers/input/touchscreen/bu21013_ts.c
index 5c487d2..b9b5dda 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c

@@ -14,6 +14,9 @@
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
 
 #define PEN_DOWN_INTR	0
 #define MAX_FINGERS	2
@@ -148,11 +151,12 @@
 struct bu21013_ts_data {
 	struct i2c_client *client;
 	wait_queue_head_t wait;
-	bool touch_stopped;
 	const struct bu21013_platform_device *chip;
 	struct input_dev *in_dev;
-	unsigned int intr_pin;
 	struct regulator *regulator;
+	unsigned int irq;
+	unsigned int intr_pin;
+	bool touch_stopped;
 };
 
 /**
@@ -262,7 +266,7 @@
 			return IRQ_NONE;
 		}
 
-		data->intr_pin = data->chip->irq_read_val();
+		data->intr_pin = gpio_get_value(data->chip->touch_pin);
 		if (data->intr_pin == PEN_DOWN_INTR)
 			wait_event_timeout(data->wait, data->touch_stopped,
 					   msecs_to_jiffies(2));
@@ -418,10 +422,72 @@
 {
 	bu21013_data->touch_stopped = true;
 	wake_up(&bu21013_data->wait);
-	free_irq(bu21013_data->chip->irq, bu21013_data);
+	free_irq(bu21013_data->irq, bu21013_data);
 }
 
 /**
+ * bu21013_cs_disable() - deconfigures the touch panel controller
+ * @bu21013_data: device structure pointer
+ *
+ * This function is used to deconfigure the chip selection
+ * for touch panel controller.
+ */
+static void bu21013_cs_disable(struct bu21013_ts_data *bu21013_data)
+{
+	int error;
+
+	error = gpio_direction_output(bu21013_data->chip->cs_pin, 0);
+	if (error < 0)
+		dev_warn(&bu21013_data->client->dev,
+			 "%s: gpio direction failed, error: %d\n",
+			 __func__, error);
+	else
+		gpio_set_value(bu21013_data->chip->cs_pin, 0);
+
+	gpio_free(bu21013_data->chip->cs_pin);
+}
+
+#ifdef CONFIG_OF
+static const struct bu21013_platform_device *
+bu21013_parse_dt(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct bu21013_platform_device *pdata;
+
+	if (!np) {
+		dev_err(dev, "no device tree or platform data\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->y_flip = pdata->x_flip = false;
+
+	pdata->x_flip = of_property_read_bool(np, "rohm,flip-x");
+	pdata->y_flip = of_property_read_bool(np, "rohm,flip-y");
+
+	of_property_read_u32(np, "rohm,touch-max-x", &pdata->touch_x_max);
+	of_property_read_u32(np, "rohm,touch-max-y", &pdata->touch_y_max);
+
+	pdata->touch_pin = of_get_named_gpio(np, "touch-gpio", 0);
+	pdata->cs_pin = of_get_named_gpio(np, "reset-gpio", 0);
+
+	pdata->ext_clk = false;
+
+	return pdata;
+}
+#else
+static inline const struct bu21013_platform_device *
+bu21013_parse_dt(struct device *dev)
+{
+	dev_err(dev, "no platform data available\n");
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
+/**
  * bu21013_probe() - initializes the i2c-client touchscreen driver
  * @client: i2c client structure pointer
  * @id: i2c device id pointer
@@ -429,13 +495,13 @@
  * This function used to initializes the i2c-client touchscreen
  * driver and returns integer.
  */
-static int __devinit bu21013_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+static int bu21013_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
 {
+	const struct bu21013_platform_device *pdata =
+					dev_get_platdata(&client->dev);
 	struct bu21013_ts_data *bu21013_data;
 	struct input_dev *in_dev;
-	const struct bu21013_platform_device *pdata =
-					client->dev.platform_data;
 	int error;
 
 	if (!i2c_check_functionality(client->adapter,
@@ -445,7 +511,13 @@
 	}
 
 	if (!pdata) {
-		dev_err(&client->dev, "platform data not defined\n");
+		pdata = bu21013_parse_dt(&client->dev);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+	}
+
+	if (!gpio_is_valid(pdata->touch_pin)) {
+		dev_err(&client->dev, "invalid touch_pin supplied\n");
 		return -EINVAL;
 	}
 
@@ -460,8 +532,9 @@
 	bu21013_data->in_dev = in_dev;
 	bu21013_data->chip = pdata;
 	bu21013_data->client = client;
+	bu21013_data->irq = gpio_to_irq(pdata->touch_pin);
 
-	bu21013_data->regulator = regulator_get(&client->dev, "V-TOUCH");
+	bu21013_data->regulator = regulator_get(&client->dev, "avdd");
 	if (IS_ERR(bu21013_data->regulator)) {
 		dev_err(&client->dev, "regulator_get failed\n");
 		error = PTR_ERR(bu21013_data->regulator);
@@ -478,12 +551,11 @@
 	init_waitqueue_head(&bu21013_data->wait);
 
 	/* configure the gpio pins */
-	if (pdata->cs_en) {
-		error = pdata->cs_en(pdata->cs_pin);
-		if (error < 0) {
-			dev_err(&client->dev, "chip init failed\n");
-			goto err_disable_regulator;
-		}
+	error = gpio_request_one(pdata->cs_pin, GPIOF_OUT_INIT_HIGH,
+				 "touchp_reset");
+	if (error < 0) {
+		dev_err(&client->dev, "Unable to request gpio reset_pin\n");
+		goto err_disable_regulator;
 	}
 
 	/* configure the touch panel controller */
@@ -508,12 +580,13 @@
 						pdata->touch_y_max, 0, 0);
 	input_set_drvdata(in_dev, bu21013_data);
 
-	error = request_threaded_irq(pdata->irq, NULL, bu21013_gpio_irq,
+	error = request_threaded_irq(bu21013_data->irq, NULL, bu21013_gpio_irq,
 				     IRQF_TRIGGER_FALLING | IRQF_SHARED |
 					IRQF_ONESHOT,
 				     DRIVER_TP, bu21013_data);
 	if (error) {
-		dev_err(&client->dev, "request irq %d failed\n", pdata->irq);
+		dev_err(&client->dev, "request irq %d failed\n",
+			bu21013_data->irq);
 		goto err_cs_disable;
 	}
 
@@ -531,7 +604,7 @@
 err_free_irq:
 	bu21013_free_irq(bu21013_data);
 err_cs_disable:
-	pdata->cs_dis(pdata->cs_pin);
+	bu21013_cs_disable(bu21013_data);
 err_disable_regulator:
 	regulator_disable(bu21013_data->regulator);
 err_put_regulator:
@@ -549,13 +622,13 @@
  * This function uses to remove the i2c-client
  * touchscreen driver and returns integer.
  */
-static int __devexit bu21013_remove(struct i2c_client *client)
+static int bu21013_remove(struct i2c_client *client)
 {
 	struct bu21013_ts_data *bu21013_data = i2c_get_clientdata(client);
 
 	bu21013_free_irq(bu21013_data);
 
-	bu21013_data->chip->cs_dis(bu21013_data->chip->cs_pin);
+	bu21013_cs_disable(bu21013_data);
 
 	input_unregister_device(bu21013_data->in_dev);
 
@@ -584,9 +657,9 @@
 
 	bu21013_data->touch_stopped = true;
 	if (device_may_wakeup(&client->dev))
-		enable_irq_wake(bu21013_data->chip->irq);
+		enable_irq_wake(bu21013_data->irq);
 	else
-		disable_irq(bu21013_data->chip->irq);
+		disable_irq(bu21013_data->irq);
 
 	regulator_disable(bu21013_data->regulator);
 
@@ -621,9 +694,9 @@
 	bu21013_data->touch_stopped = false;
 
 	if (device_may_wakeup(&client->dev))
-		disable_irq_wake(bu21013_data->chip->irq);
+		disable_irq_wake(bu21013_data->irq);
 	else
-		enable_irq(bu21013_data->chip->irq);
+		enable_irq(bu21013_data->irq);
 
 	return 0;
 }
@@ -649,7 +722,7 @@
 #endif
 	},
 	.probe		=	bu21013_probe,
-	.remove		=	__devexit_p(bu21013_remove),
+	.remove		=	bu21013_remove,
 	.id_table	=	bu21013_id,
 };
 

diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
index 464f1bf..96e0eedc 100644
--- a/drivers/input/touchscreen/cy8ctmg110_ts.c
+++ b/drivers/input/touchscreen/cy8ctmg110_ts.c

@@ -99,9 +99,18 @@
 	int ret;
 	struct i2c_msg msg[2] = {
 		/* first write slave position to i2c devices */
-		{ client->addr, 0, 1, &cmd },
+		{
+			.addr = client->addr,
+			.len = 1,
+			.buf = &cmd
+		},
 		/* Second read data from position */
-		{ client->addr, I2C_M_RD, len, data }
+		{
+			.addr = client->addr,
+			.flags = I2C_M_RD,
+			.len = len,
+			.buf = data
+		}
 	};
 
 	ret = i2c_transfer(client->adapter, msg, 2);
@@ -166,7 +175,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit cy8ctmg110_probe(struct i2c_client *client,
+static int cy8ctmg110_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	const struct cy8ctmg110_pdata *pdata = client->dev.platform_data;
@@ -314,7 +323,7 @@
 static SIMPLE_DEV_PM_OPS(cy8ctmg110_pm, cy8ctmg110_suspend, cy8ctmg110_resume);
 #endif
 
-static int __devexit cy8ctmg110_remove(struct i2c_client *client)
+static int cy8ctmg110_remove(struct i2c_client *client)
 {
 	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
 
@@ -348,7 +357,7 @@
 	},
 	.id_table	= cy8ctmg110_idtable,
 	.probe		= cy8ctmg110_probe,
-	.remove		= __devexit_p(cy8ctmg110_remove),
+	.remove		= cy8ctmg110_remove,
 };
 
 module_i2c_driver(cy8ctmg110_driver);

diff --git a/drivers/input/touchscreen/cyttsp_i2c.c b/drivers/input/touchscreen/cyttsp_i2c.c
index 2af1d0c..4dbdf44 100644
--- a/drivers/input/touchscreen/cyttsp_i2c.c
+++ b/drivers/input/touchscreen/cyttsp_i2c.c

@@ -81,7 +81,7 @@
 	.read           = cyttsp_i2c_read_block_data,
 };
 
-static int __devinit cyttsp_i2c_probe(struct i2c_client *client,
+static int cyttsp_i2c_probe(struct i2c_client *client,
 				      const struct i2c_device_id *id)
 {
 	struct cyttsp *ts;
@@ -102,7 +102,7 @@
 	return 0;
 }
 
-static int __devexit cyttsp_i2c_remove(struct i2c_client *client)
+static int cyttsp_i2c_remove(struct i2c_client *client)
 {
 	struct cyttsp *ts = i2c_get_clientdata(client);
 
@@ -124,7 +124,7 @@
 		.pm	= &cyttsp_pm_ops,
 	},
 	.probe		= cyttsp_i2c_probe,
-	.remove		= __devexit_p(cyttsp_i2c_remove),
+	.remove		= cyttsp_i2c_remove,
 	.id_table	= cyttsp_i2c_id,
 };
 

diff --git a/drivers/input/touchscreen/cyttsp_spi.c b/drivers/input/touchscreen/cyttsp_spi.c
index 9f26341..638e203 100644
--- a/drivers/input/touchscreen/cyttsp_spi.c
+++ b/drivers/input/touchscreen/cyttsp_spi.c

@@ -147,7 +147,7 @@
 	.read		= cyttsp_spi_read_block_data,
 };
 
-static int __devinit cyttsp_spi_probe(struct spi_device *spi)
+static int cyttsp_spi_probe(struct spi_device *spi)
 {
 	struct cyttsp *ts;
 	int error;
@@ -172,7 +172,7 @@
 	return 0;
 }
 
-static int __devexit cyttsp_spi_remove(struct spi_device *spi)
+static int cyttsp_spi_remove(struct spi_device *spi)
 {
 	struct cyttsp *ts = spi_get_drvdata(spi);
 
@@ -188,7 +188,7 @@
 		.pm	= &cyttsp_pm_ops,
 	},
 	.probe  = cyttsp_spi_probe,
-	.remove = __devexit_p(cyttsp_spi_remove),
+	.remove = cyttsp_spi_remove,
 };
 
 module_spi_driver(cyttsp_spi_driver);

diff --git a/drivers/input/touchscreen/da9034-ts.c b/drivers/input/touchscreen/da9034-ts.c
index 36b65cf..34ad841 100644
--- a/drivers/input/touchscreen/da9034-ts.c
+++ b/drivers/input/touchscreen/da9034-ts.c

@@ -297,7 +297,7 @@
 }
 
 
-static int __devinit da9034_touch_probe(struct platform_device *pdev)
+static int da9034_touch_probe(struct platform_device *pdev)
 {
 	struct da9034_touch_pdata *pdata = pdev->dev.platform_data;
 	struct da9034_touch *touch;
@@ -361,7 +361,7 @@
 	return ret;
 }
 
-static int __devexit da9034_touch_remove(struct platform_device *pdev)
+static int da9034_touch_remove(struct platform_device *pdev)
 {
 	struct da9034_touch *touch = platform_get_drvdata(pdev);
 
@@ -377,7 +377,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= da9034_touch_probe,
-	.remove		= __devexit_p(da9034_touch_remove),
+	.remove		= da9034_touch_remove,
 };
 module_platform_driver(da9034_touch_driver);
 

diff --git a/drivers/input/touchscreen/da9052_tsi.c b/drivers/input/touchscreen/da9052_tsi.c
index e8df341..8f561e2 100644
--- a/drivers/input/touchscreen/da9052_tsi.c
+++ b/drivers/input/touchscreen/da9052_tsi.c

@@ -27,8 +27,6 @@
 	struct input_dev *dev;
 	struct delayed_work ts_pen_work;
 	struct mutex mutex;
-	unsigned int irq_pendwn;
-	unsigned int irq_datardy;
 	bool stopped;
 	bool adc_on;
 };
@@ -45,8 +43,8 @@
 
 	if (!tsi->stopped) {
 		/* Mask PEN_DOWN event and unmask TSI_READY event */
-		disable_irq_nosync(tsi->irq_pendwn);
-		enable_irq(tsi->irq_datardy);
+		da9052_disable_irq_nosync(tsi->da9052, DA9052_IRQ_PENDOWN);
+		da9052_enable_irq(tsi->da9052, DA9052_IRQ_TSIREADY);
 
 		da9052_ts_adc_toggle(tsi, true);
 
@@ -137,13 +135,13 @@
 				return;
 
 			/* Mask TSI_READY event and unmask PEN_DOWN event */
-			disable_irq(tsi->irq_datardy);
-			enable_irq(tsi->irq_pendwn);
+			da9052_disable_irq(tsi->da9052, DA9052_IRQ_TSIREADY);
+			da9052_enable_irq(tsi->da9052, DA9052_IRQ_PENDOWN);
 		}
 	}
 }
 
-static int __devinit da9052_ts_configure_gpio(struct da9052 *da9052)
+static int da9052_ts_configure_gpio(struct da9052 *da9052)
 {
 	int error;
 
@@ -162,7 +160,7 @@
 	return 0;
 }
 
-static int __devinit da9052_configure_tsi(struct da9052_tsi *tsi)
+static int da9052_configure_tsi(struct da9052_tsi *tsi)
 {
 	int error;
 
@@ -197,7 +195,7 @@
 	mb();
 
 	/* Unmask PEN_DOWN event */
-	enable_irq(tsi->irq_pendwn);
+	da9052_enable_irq(tsi->da9052, DA9052_IRQ_PENDOWN);
 
 	/* Enable Pen Detect Circuit */
 	return da9052_reg_update(tsi->da9052, DA9052_TSI_CONT_A_REG,
@@ -210,11 +208,11 @@
 
 	tsi->stopped = true;
 	mb();
-	disable_irq(tsi->irq_pendwn);
+	da9052_disable_irq(tsi->da9052, DA9052_IRQ_PENDOWN);
 	cancel_delayed_work_sync(&tsi->ts_pen_work);
 
 	if (tsi->adc_on) {
-		disable_irq(tsi->irq_datardy);
+		da9052_disable_irq(tsi->da9052, DA9052_IRQ_TSIREADY);
 		da9052_ts_adc_toggle(tsi, false);
 
 		/*
@@ -222,33 +220,24 @@
 		 * twice and we need to enable it to keep enable/disable
 		 * counter balanced. IRQ is still off though.
 		 */
-		enable_irq(tsi->irq_pendwn);
+		da9052_enable_irq(tsi->da9052, DA9052_IRQ_PENDOWN);
 	}
 
 	/* Disable Pen Detect Circuit */
 	da9052_reg_update(tsi->da9052, DA9052_TSI_CONT_A_REG, 1 << 1, 0);
 }
 
-static int __devinit da9052_ts_probe(struct platform_device *pdev)
+static int da9052_ts_probe(struct platform_device *pdev)
 {
 	struct da9052 *da9052;
 	struct da9052_tsi *tsi;
 	struct input_dev *input_dev;
-	int irq_pendwn;
-	int irq_datardy;
 	int error;
 
 	da9052 = dev_get_drvdata(pdev->dev.parent);
 	if (!da9052)
 		return -EINVAL;
 
-	irq_pendwn = platform_get_irq_byname(pdev, "PENDWN");
-	irq_datardy = platform_get_irq_byname(pdev, "TSIRDY");
-	if (irq_pendwn < 0 || irq_datardy < 0) {
-		dev_err(da9052->dev, "Unable to determine device interrupts\n");
-		return -ENXIO;
-	}
-
 	tsi = kzalloc(sizeof(struct da9052_tsi), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!tsi || !input_dev) {
@@ -258,8 +247,6 @@
 
 	tsi->da9052 = da9052;
 	tsi->dev = input_dev;
-	tsi->irq_pendwn = da9052->irq_base + irq_pendwn;
-	tsi->irq_datardy = da9052->irq_base + irq_datardy;
 	tsi->stopped = true;
 	INIT_DELAYED_WORK(&tsi->ts_pen_work, da9052_ts_pen_work);
 
@@ -287,31 +274,25 @@
 	/* Disable ADC */
 	da9052_ts_adc_toggle(tsi, false);
 
-	error = request_threaded_irq(tsi->irq_pendwn,
-				     NULL, da9052_ts_pendwn_irq,
-				     IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				     "PENDWN", tsi);
+	error = da9052_request_irq(tsi->da9052, DA9052_IRQ_PENDOWN,
+				"pendown-irq", da9052_ts_pendwn_irq, tsi);
 	if (error) {
 		dev_err(tsi->da9052->dev,
-			"Failed to register PENDWN IRQ %d, error = %d\n",
-			tsi->irq_pendwn, error);
+			"Failed to register PENDWN IRQ: %d\n", error);
 		goto err_free_mem;
 	}
 
-	error = request_threaded_irq(tsi->irq_datardy,
-				     NULL, da9052_ts_datardy_irq,
-				     IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				     "TSIRDY", tsi);
+	error = da9052_request_irq(tsi->da9052, DA9052_IRQ_TSIREADY,
+				"tsiready-irq", da9052_ts_datardy_irq, tsi);
 	if (error) {
 		dev_err(tsi->da9052->dev,
-			"Failed to register TSIRDY IRQ %d, error = %d\n",
-			tsi->irq_datardy, error);
+			"Failed to register TSIRDY IRQ :%d\n", error);
 		goto err_free_pendwn_irq;
 	}
 
 	/* Mask PEN_DOWN and TSI_READY events */
-	disable_irq(tsi->irq_pendwn);
-	disable_irq(tsi->irq_datardy);
+	da9052_disable_irq(tsi->da9052, DA9052_IRQ_PENDOWN);
+	da9052_disable_irq(tsi->da9052, DA9052_IRQ_TSIREADY);
 
 	error = da9052_configure_tsi(tsi);
 	if (error)
@@ -326,9 +307,9 @@
 	return 0;
 
 err_free_datardy_irq:
-	free_irq(tsi->irq_datardy, tsi);
+	da9052_free_irq(tsi->da9052, DA9052_IRQ_TSIREADY, tsi);
 err_free_pendwn_irq:
-	free_irq(tsi->irq_pendwn, tsi);
+	da9052_free_irq(tsi->da9052, DA9052_IRQ_PENDOWN, tsi);
 err_free_mem:
 	kfree(tsi);
 	input_free_device(input_dev);
@@ -336,14 +317,14 @@
 	return error;
 }
 
-static int  __devexit da9052_ts_remove(struct platform_device *pdev)
+static int  da9052_ts_remove(struct platform_device *pdev)
 {
 	struct da9052_tsi *tsi = platform_get_drvdata(pdev);
 
 	da9052_reg_write(tsi->da9052, DA9052_LDO9_REG, 0x19);
 
-	free_irq(tsi->irq_pendwn, tsi);
-	free_irq(tsi->irq_datardy, tsi);
+	da9052_free_irq(tsi->da9052, DA9052_IRQ_TSIREADY, tsi);
+	da9052_free_irq(tsi->da9052, DA9052_IRQ_PENDOWN, tsi);
 
 	input_unregister_device(tsi->dev);
 	kfree(tsi);
@@ -355,7 +336,7 @@
 
 static struct platform_driver da9052_tsi_driver = {
 	.probe	= da9052_ts_probe,
-	.remove	= __devexit_p(da9052_ts_remove),
+	.remove	= da9052_ts_remove,
 	.driver	= {
 		.name	= "da9052-tsi",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 099d144a..a917015 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c

@@ -491,14 +491,6 @@
 DEFINE_SIMPLE_ATTRIBUTE(debugfs_mode_fops, edt_ft5x06_debugfs_mode_get,
 			edt_ft5x06_debugfs_mode_set, "%llu\n");
 
-static int edt_ft5x06_debugfs_raw_data_open(struct inode *inode,
-					    struct file *file)
-{
-	file->private_data = inode->i_private;
-
-	return 0;
-}
-
 static ssize_t edt_ft5x06_debugfs_raw_data_read(struct file *file,
 				char __user *buf, size_t count, loff_t *off)
 {
@@ -579,11 +571,11 @@
 
 
 static const struct file_operations debugfs_raw_data_fops = {
-	.open = edt_ft5x06_debugfs_raw_data_open,
+	.open = simple_open,
 	.read = edt_ft5x06_debugfs_raw_data_read,
 };
 
-static void __devinit
+static void
 edt_ft5x06_ts_prepare_debugfs(struct edt_ft5x06_ts_data *tsdata,
 			      const char *debugfs_name)
 {
@@ -600,7 +592,7 @@
 			    tsdata->debug_dir, tsdata, &debugfs_raw_data_fops);
 }
 
-static void __devexit
+static void
 edt_ft5x06_ts_teardown_debugfs(struct edt_ft5x06_ts_data *tsdata)
 {
 	if (tsdata->debug_dir)
@@ -625,7 +617,7 @@
 
 
 
-static int __devinit edt_ft5x06_ts_reset(struct i2c_client *client,
+static int edt_ft5x06_ts_reset(struct i2c_client *client,
 					 int reset_pin)
 {
 	int error;
@@ -649,7 +641,7 @@
 	return 0;
 }
 
-static int __devinit edt_ft5x06_ts_identify(struct i2c_client *client,
+static int edt_ft5x06_ts_identify(struct i2c_client *client,
 					    char *model_name,
 					    char *fw_version)
 {
@@ -683,7 +675,7 @@
 	    pdata->name <= edt_ft5x06_attr_##name.limit_high)		\
 		edt_ft5x06_register_write(tsdata, reg, pdata->name)
 
-static void __devinit
+static void
 edt_ft5x06_ts_get_defaults(struct edt_ft5x06_ts_data *tsdata,
 			   const struct edt_ft5x06_platform_data *pdata)
 {
@@ -697,7 +689,7 @@
 	EDT_ATTR_CHECKSET(report_rate, WORK_REGISTER_REPORT_RATE);
 }
 
-static void __devinit
+static void
 edt_ft5x06_ts_get_parameters(struct edt_ft5x06_ts_data *tsdata)
 {
 	tsdata->threshold = edt_ft5x06_register_read(tsdata,
@@ -710,7 +702,7 @@
 	tsdata->num_y = edt_ft5x06_register_read(tsdata, WORK_REGISTER_NUM_Y);
 }
 
-static int __devinit edt_ft5x06_ts_probe(struct i2c_client *client,
+static int edt_ft5x06_ts_probe(struct i2c_client *client,
 					 const struct i2c_device_id *id)
 {
 	const struct edt_ft5x06_platform_data *pdata =
@@ -830,7 +822,7 @@
 	return error;
 }
 
-static int __devexit edt_ft5x06_ts_remove(struct i2c_client *client)
+static int edt_ft5x06_ts_remove(struct i2c_client *client)
 {
 	const struct edt_ft5x06_platform_data *pdata =
 						dev_get_platdata(&client->dev);
@@ -891,7 +883,7 @@
 	},
 	.id_table = edt_ft5x06_ts_id,
 	.probe    = edt_ft5x06_ts_probe,
-	.remove   = __devexit_p(edt_ft5x06_ts_remove),
+	.remove   = edt_ft5x06_ts_remove,
 };
 
 module_i2c_driver(edt_ft5x06_ts_driver);

diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c
index 908407e..55255a9 100644
--- a/drivers/input/touchscreen/eeti_ts.c
+++ b/drivers/input/touchscreen/eeti_ts.c

@@ -154,7 +154,7 @@
 	eeti_ts_stop(priv);
 }
 
-static int __devinit eeti_ts_probe(struct i2c_client *client,
+static int eeti_ts_probe(struct i2c_client *client,
 				   const struct i2c_device_id *idp)
 {
 	struct eeti_ts_platform_data *pdata = client->dev.platform_data;
@@ -248,7 +248,7 @@
 	return err;
 }
 
-static int __devexit eeti_ts_remove(struct i2c_client *client)
+static int eeti_ts_remove(struct i2c_client *client)
 {
 	struct eeti_ts_priv *priv = i2c_get_clientdata(client);
 
@@ -321,7 +321,7 @@
 #endif
 	},
 	.probe = eeti_ts_probe,
-	.remove = __devexit_p(eeti_ts_remove),
+	.remove = eeti_ts_remove,
 	.id_table = eeti_ts_id,
 };
 

diff --git a/drivers/input/touchscreen/egalax_ts.c b/drivers/input/touchscreen/egalax_ts.c
index 13fa62f..17c9097 100644
--- a/drivers/input/touchscreen/egalax_ts.c
+++ b/drivers/input/touchscreen/egalax_ts.c

@@ -153,7 +153,7 @@
 	return 0;
 }
 
-static int __devinit egalax_firmware_version(struct i2c_client *client)
+static int egalax_firmware_version(struct i2c_client *client)
 {
 	static const u8 cmd[MAX_I2C_DATA_LEN] = { 0x03, 0x03, 0xa, 0x01, 0x41 };
 	int ret;
@@ -165,7 +165,7 @@
 	return 0;
 }
 
-static int __devinit egalax_ts_probe(struct i2c_client *client,
+static int egalax_ts_probe(struct i2c_client *client,
 				       const struct i2c_device_id *id)
 {
 	struct egalax_ts *ts;
@@ -246,7 +246,7 @@
 	return error;
 }
 
-static __devexit int egalax_ts_remove(struct i2c_client *client)
+static int egalax_ts_remove(struct i2c_client *client)
 {
 	struct egalax_ts *ts = i2c_get_clientdata(client);
 
@@ -301,7 +301,7 @@
 	},
 	.id_table	= egalax_ts_id,
 	.probe		= egalax_ts_probe,
-	.remove		= __devexit_p(egalax_ts_remove),
+	.remove		= egalax_ts_remove,
 };
 
 module_i2c_driver(egalax_ts_driver);

diff --git a/drivers/input/touchscreen/h3600_ts_input.c b/drivers/input/touchscreen/h3600_ts_input.c
deleted file mode 100644
index b9e8686..0000000
--- a/drivers/input/touchscreen/h3600_ts_input.c
+++ /dev/null

@@ -1,479 +0,0 @@
-/*
- *  Copyright (c) 2001 "Crazy" James Simmons jsimmons@transvirtual.com
- *
- *  Sponsored by Transvirtual Technology.
- *
- *  Derived from the code in h3600_ts.[ch] by Charles Flynn
- */
-
-/*
- * Driver for the h3600 Touch Screen and other Atmel controlled devices.
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so by
- * e-mail - mail your message to <jsimmons@transvirtual.com>.
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/input.h>
-#include <linux/serio.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-
-/* SA1100 serial defines */
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-
-#define DRIVER_DESC	"H3600 touchscreen driver"
-
-MODULE_AUTHOR("James Simmons <jsimmons@transvirtual.com>");
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-
-/*
- * Definitions & global arrays.
- */
-
-/* The start and end of frame characters SOF and EOF */
-#define CHAR_SOF                0x02
-#define CHAR_EOF                0x03
-#define FRAME_OVERHEAD          3       /* CHAR_SOF,CHAR_EOF,LENGTH = 3 */
-
-/*
-        Atmel events and response IDs contained in frame.
-        Programmer has no control over these numbers.
-        TODO there are holes - specifically  1,7,0x0a
-*/
-#define VERSION_ID              0       /* Get Version (request/response) */
-#define KEYBD_ID                2       /* Keyboard (event) */
-#define TOUCHS_ID               3       /* Touch Screen (event)*/
-#define EEPROM_READ_ID          4       /* (request/response) */
-#define EEPROM_WRITE_ID         5       /* (request/response) */
-#define THERMAL_ID              6       /* (request/response) */
-#define NOTIFY_LED_ID           8       /* (request/response) */
-#define BATTERY_ID              9       /* (request/response) */
-#define SPI_READ_ID             0x0b    /* ( request/response) */
-#define SPI_WRITE_ID            0x0c    /* ( request/response) */
-#define FLITE_ID                0x0d    /* backlight ( request/response) */
-#define STX_ID                  0xa1    /* extension pack status (req/resp) */
-
-#define MAX_ID                  14
-
-#define H3600_MAX_LENGTH 16
-#define H3600_KEY 0xf
-
-#define H3600_SCANCODE_RECORD	1	 /* 1 -> record button */
-#define H3600_SCANCODE_CALENDAR 2	 /* 2 -> calendar */
-#define H3600_SCANCODE_CONTACTS 3	 /* 3 -> contact */
-#define H3600_SCANCODE_Q	4	 /* 4 -> Q button */
-#define	H3600_SCANCODE_START	5	 /* 5 -> start menu */
-#define	H3600_SCANCODE_UP	6	 /* 6 -> up */
-#define H3600_SCANCODE_RIGHT	7	 /* 7 -> right */
-#define H3600_SCANCODE_LEFT	8	 /* 8 -> left */
-#define H3600_SCANCODE_DOWN	9	 /* 9 -> down */
-
-/*
- * Per-touchscreen data.
- */
-struct h3600_dev {
-	struct input_dev *dev;
-	struct serio *serio;
-	unsigned char event;	/* event ID from packet */
-	unsigned char chksum;
-	unsigned char len;
-	unsigned char idx;
-	unsigned char buf[H3600_MAX_LENGTH];
-	char phys[32];
-};
-
-static irqreturn_t action_button_handler(int irq, void *dev_id)
-{
-	int down = (GPLR & GPIO_BITSY_ACTION_BUTTON) ? 0 : 1;
-	struct input_dev *dev = dev_id;
-
-	input_report_key(dev, KEY_ENTER, down);
-	input_sync(dev);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t npower_button_handler(int irq, void *dev_id)
-{
-	int down = (GPLR & GPIO_BITSY_NPOWER_BUTTON) ? 0 : 1;
-	struct input_dev *dev = dev_id;
-
-	/*
-	 * This interrupt is only called when we release the key. So we have
-	 * to fake a key press.
-	 */
-	input_report_key(dev, KEY_SUSPEND, 1);
-	input_report_key(dev, KEY_SUSPEND, down);
-	input_sync(dev);
-
-	return IRQ_HANDLED;
-}
-
-#ifdef CONFIG_PM
-
-static int flite_brightness = 25;
-
-enum flite_pwr {
-	FLITE_PWR_OFF = 0,
-	FLITE_PWR_ON = 1
-};
-
-/*
- * h3600_flite_power: enables or disables power to frontlight, using last bright */
-unsigned int h3600_flite_power(struct input_dev *dev, enum flite_pwr pwr)
-{
-	unsigned char brightness = (pwr == FLITE_PWR_OFF) ? 0 : flite_brightness;
-	struct h3600_dev *ts = input_get_drvdata(dev);
-
-	/* Must be in this order */
-	serio_write(ts->serio, 1);
-	serio_write(ts->serio, pwr);
-	serio_write(ts->serio, brightness);
-
-	return 0;
-}
-
-#endif
-
-/*
- * This function translates the native event packets to linux input event
- * packets. Some packets coming from serial are not touchscreen related. In
- * this case we send them off to be processed elsewhere.
- */
-static void h3600ts_process_packet(struct h3600_dev *ts)
-{
-	struct input_dev *dev = ts->dev;
-	static int touched = 0;
-	int key, down = 0;
-
-	switch (ts->event) {
-		/*
-		   Buttons - returned as a single byte
-			7 6 5 4 3 2 1 0
-			S x x x N N N N
-
-		   S       switch state ( 0=pressed 1=released)
-		   x       Unused.
-		   NNNN    switch number 0-15
-
-		   Note: This is true for non interrupt generated key events.
-		*/
-		case KEYBD_ID:
-			down = (ts->buf[0] & 0x80) ? 0 : 1;
-
-			switch (ts->buf[0] & 0x7f) {
-				case H3600_SCANCODE_RECORD:
-					key = KEY_RECORD;
-					break;
-				case H3600_SCANCODE_CALENDAR:
-					key = KEY_PROG1;
-                                        break;
-				case H3600_SCANCODE_CONTACTS:
-					key = KEY_PROG2;
-					break;
-				case H3600_SCANCODE_Q:
-					key = KEY_Q;
-					break;
-				case H3600_SCANCODE_START:
-					key = KEY_PROG3;
-					break;
-				case H3600_SCANCODE_UP:
-					key = KEY_UP;
-					break;
-				case H3600_SCANCODE_RIGHT:
-					key = KEY_RIGHT;
-					break;
-				case H3600_SCANCODE_LEFT:
-					key = KEY_LEFT;
-					break;
-				case H3600_SCANCODE_DOWN:
-					key = KEY_DOWN;
-					break;
-				default:
-					key = 0;
-			}
-			if (key)
-				input_report_key(dev, key, down);
-			break;
-		/*
-		 * Native touchscreen event data is formatted as shown below:-
-		 *
-		 *      +-------+-------+-------+-------+
-		 *      | Xmsb  | Xlsb  | Ymsb  | Ylsb  |
-		 *      +-------+-------+-------+-------+
-		 *       byte 0    1       2       3
-		 */
-		case TOUCHS_ID:
-			if (!touched) {
-				input_report_key(dev, BTN_TOUCH, 1);
-				touched = 1;
-			}
-
-			if (ts->len) {
-				unsigned short x, y;
-
-				x = ts->buf[0]; x <<= 8; x += ts->buf[1];
-				y = ts->buf[2]; y <<= 8; y += ts->buf[3];
-
-				input_report_abs(dev, ABS_X, x);
-				input_report_abs(dev, ABS_Y, y);
-			} else {
-				input_report_key(dev, BTN_TOUCH, 0);
-				touched = 0;
-			}
-			break;
-		default:
-			/* Send a non input event elsewhere */
-			break;
-	}
-
-	input_sync(dev);
-}
-
-/*
- * h3600ts_event() handles events from the input module.
- */
-static int h3600ts_event(struct input_dev *dev, unsigned int type,
-			 unsigned int code, int value)
-{
-#if 0
-	struct h3600_dev *ts = input_get_drvdata(dev);
-
-	switch (type) {
-		case EV_LED: {
-		//	serio_write(ts->serio, SOME_CMD);
-			return 0;
-		}
-	}
-	return -1;
-#endif
-	return 0;
-}
-
-/*
-        Frame format
-  byte    1       2               3              len + 4
-        +-------+---------------+---------------+--=------------+
-        |SOF    |id     |len    | len bytes     | Chksum        |
-        +-------+---------------+---------------+--=------------+
-  bit   0     7  8    11 12   15 16
-
-        +-------+---------------+-------+
-        |SOF    |id     |0      |Chksum | - Note Chksum does not include SOF
-        +-------+---------------+-------+
-  bit   0     7  8    11 12   15 16
-
-*/
-
-static int state;
-
-/* decode States  */
-#define STATE_SOF       0       /* start of FRAME */
-#define STATE_ID        1       /* state where we decode the ID & len */
-#define STATE_DATA      2       /* state where we decode data */
-#define STATE_EOF       3       /* state where we decode checksum or EOF */
-
-static irqreturn_t h3600ts_interrupt(struct serio *serio, unsigned char data,
-                                     unsigned int flags)
-{
-	struct h3600_dev *ts = serio_get_drvdata(serio);
-
-	/*
-	 * We have a new frame coming in.
-	 */
-	switch (state) {
-		case STATE_SOF:
-			if (data == CHAR_SOF)
-				state = STATE_ID;
-			break;
-		case STATE_ID:
-			ts->event = (data & 0xf0) >> 4;
-			ts->len = (data & 0xf);
-			ts->idx = 0;
-			if (ts->event >= MAX_ID) {
-				state = STATE_SOF;
-				break;
-			}
-			ts->chksum = data;
-			state = (ts->len > 0) ? STATE_DATA : STATE_EOF;
-			break;
-		case STATE_DATA:
-			ts->chksum += data;
-			ts->buf[ts->idx]= data;
-			if (++ts->idx == ts->len)
-				state = STATE_EOF;
-			break;
-		case STATE_EOF:
-			state = STATE_SOF;
-			if (data == CHAR_EOF || data == ts->chksum)
-				h3600ts_process_packet(ts);
-			break;
-		default:
-			printk("Error3\n");
-			break;
-	}
-
-	return IRQ_HANDLED;
-}
-
-/*
- * h3600ts_connect() is the routine that is called when someone adds a
- * new serio device that supports H3600 protocol and registers it as
- * an input device.
- */
-static int h3600ts_connect(struct serio *serio, struct serio_driver *drv)
-{
-	struct h3600_dev *ts;
-	struct input_dev *input_dev;
-	int err;
-
-	ts = kzalloc(sizeof(struct h3600_dev), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!ts || !input_dev) {
-		err = -ENOMEM;
-		goto fail1;
-	}
-
-	ts->serio = serio;
-	ts->dev = input_dev;
-	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", serio->phys);
-
-	input_dev->name = "H3600 TouchScreen";
-	input_dev->phys = ts->phys;
-	input_dev->id.bustype = BUS_RS232;
-	input_dev->id.vendor = SERIO_H3600;
-	input_dev->id.product = 0x0666;  /* FIXME !!! We can ask the hardware */
-	input_dev->id.version = 0x0100;
-	input_dev->dev.parent = &serio->dev;
-
-	input_set_drvdata(input_dev, ts);
-
-	input_dev->event = h3600ts_event;
-
-	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
-		BIT_MASK(EV_LED) | BIT_MASK(EV_PWR);
-	input_dev->ledbit[0] = BIT_MASK(LED_SLEEP);
-	input_set_abs_params(input_dev, ABS_X, 60, 985, 0, 0);
-	input_set_abs_params(input_dev, ABS_Y, 35, 1024, 0, 0);
-
-	set_bit(KEY_RECORD, input_dev->keybit);
-	set_bit(KEY_Q, input_dev->keybit);
-	set_bit(KEY_PROG1, input_dev->keybit);
-	set_bit(KEY_PROG2, input_dev->keybit);
-	set_bit(KEY_PROG3, input_dev->keybit);
-	set_bit(KEY_UP, input_dev->keybit);
-	set_bit(KEY_RIGHT, input_dev->keybit);
-	set_bit(KEY_LEFT, input_dev->keybit);
-	set_bit(KEY_DOWN, input_dev->keybit);
-	set_bit(KEY_ENTER, input_dev->keybit);
-	set_bit(KEY_SUSPEND, input_dev->keybit);
-	set_bit(BTN_TOUCH, input_dev->keybit);
-
-	/* Device specific stuff */
-	set_GPIO_IRQ_edge(GPIO_BITSY_ACTION_BUTTON, GPIO_BOTH_EDGES);
-	set_GPIO_IRQ_edge(GPIO_BITSY_NPOWER_BUTTON, GPIO_RISING_EDGE);
-
-	if (request_irq(IRQ_GPIO_BITSY_ACTION_BUTTON, action_button_handler,
-			IRQF_SHARED, "h3600_action", ts->dev)) {
-		printk(KERN_ERR "h3600ts.c: Could not allocate Action Button IRQ!\n");
-		err = -EBUSY;
-		goto fail1;
-	}
-
-	if (request_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, npower_button_handler,
-			IRQF_SHARED, "h3600_suspend", ts->dev)) {
-		printk(KERN_ERR "h3600ts.c: Could not allocate Power Button IRQ!\n");
-		err = -EBUSY;
-		goto fail2;
-	}
-
-	serio_set_drvdata(serio, ts);
-
-	err = serio_open(serio, drv);
-	if (err)
-		goto fail3;
-
-	//h3600_flite_control(1, 25);     /* default brightness */
-	err = input_register_device(ts->dev);
-	if (err)
-		goto fail4;
-
-	return 0;
-
-fail4:	serio_close(serio);
-fail3:	serio_set_drvdata(serio, NULL);
-	free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev);
-fail2:	free_irq(IRQ_GPIO_BITSY_ACTION_BUTTON, ts->dev);
-fail1:	input_free_device(input_dev);
-	kfree(ts);
-	return err;
-}
-
-/*
- * h3600ts_disconnect() is the opposite of h3600ts_connect()
- */
-
-static void h3600ts_disconnect(struct serio *serio)
-{
-	struct h3600_dev *ts = serio_get_drvdata(serio);
-
-	free_irq(IRQ_GPIO_BITSY_ACTION_BUTTON, ts->dev);
-	free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev);
-	input_get_device(ts->dev);
-	input_unregister_device(ts->dev);
-	serio_close(serio);
-	serio_set_drvdata(serio, NULL);
-	input_put_device(ts->dev);
-	kfree(ts);
-}
-
-/*
- * The serio driver structure.
- */
-
-static struct serio_device_id h3600ts_serio_ids[] = {
-	{
-		.type	= SERIO_RS232,
-		.proto	= SERIO_H3600,
-		.id	= SERIO_ANY,
-		.extra	= SERIO_ANY,
-	},
-	{ 0 }
-};
-
-MODULE_DEVICE_TABLE(serio, h3600ts_serio_ids);
-
-static struct serio_driver h3600ts_drv = {
-	.driver		= {
-		.name	= "h3600ts",
-	},
-	.description	= DRIVER_DESC,
-	.id_table	= h3600ts_serio_ids,
-	.interrupt	= h3600ts_interrupt,
-	.connect	= h3600ts_connect,
-	.disconnect	= h3600ts_disconnect,
-};
-
-module_serio_driver(h3600ts_drv);

diff --git a/drivers/input/touchscreen/htcpen.c b/drivers/input/touchscreen/htcpen.c
index d13143b..6c4fb84 100644
--- a/drivers/input/touchscreen/htcpen.c
+++ b/drivers/input/touchscreen/htcpen.c

@@ -102,7 +102,7 @@
 	synchronize_irq(HTCPEN_IRQ);
 }
 
-static int __devinit htcpen_isa_probe(struct device *dev, unsigned int id)
+static int htcpen_isa_probe(struct device *dev, unsigned int id)
 {
 	struct input_dev *htcpen_dev;
 	int err = -EBUSY;
@@ -174,7 +174,7 @@
 	return err;
 }
 
-static int __devexit htcpen_isa_remove(struct device *dev, unsigned int id)
+static int htcpen_isa_remove(struct device *dev, unsigned int id)
 {
 	struct input_dev *htcpen_dev = dev_get_drvdata(dev);
 
@@ -210,7 +210,7 @@
 
 static struct isa_driver htcpen_isa_driver = {
 	.probe		= htcpen_isa_probe,
-	.remove		= __devexit_p(htcpen_isa_remove),
+	.remove		= htcpen_isa_remove,
 #ifdef CONFIG_PM
 	.suspend	= htcpen_isa_suspend,
 	.resume		= htcpen_isa_resume,

diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 4ac6976..1418bdd 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c

@@ -180,7 +180,7 @@
 	.attrs = ili210x_attributes,
 };
 
-static int __devinit ili210x_i2c_probe(struct i2c_client *client,
+static int ili210x_i2c_probe(struct i2c_client *client,
 				       const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
@@ -298,7 +298,7 @@
 	return error;
 }
 
-static int __devexit ili210x_i2c_remove(struct i2c_client *client)
+static int ili210x_i2c_remove(struct i2c_client *client)
 {
 	struct ili210x *priv = i2c_get_clientdata(client);
 
@@ -350,7 +350,7 @@
 	},
 	.id_table = ili210x_i2c_id,
 	.probe = ili210x_i2c_probe,
-	.remove = __devexit_p(ili210x_i2c_remove),
+	.remove = ili210x_i2c_remove,
 };
 
 module_i2c_driver(ili210x_ts_driver);

diff --git a/drivers/input/touchscreen/intel-mid-touch.c b/drivers/input/touchscreen/intel-mid-touch.c
index cf29937..465db5db 100644
--- a/drivers/input/touchscreen/intel-mid-touch.c
+++ b/drivers/input/touchscreen/intel-mid-touch.c

@@ -427,7 +427,7 @@
 }
 
 /* Utility to read PMIC ID */
-static int __devinit mrstouch_read_pmic_id(uint *vendor, uint *rev)
+static int mrstouch_read_pmic_id(uint *vendor, uint *rev)
 {
 	int err;
 	u8 r;
@@ -446,7 +446,7 @@
  * Parse ADC channels to find end of the channel configured by other ADC user
  * NEC and MAXIM requires 4 channels and FreeScale needs 18 channels
  */
-static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev)
+static int mrstouch_chan_parse(struct mrstouch_dev *tsdev)
 {
 	int found = 0;
 	int err, i;
@@ -478,7 +478,7 @@
 /*
  * Writes touch screen channels to ADC address selection registers
  */
-static int __devinit mrstouch_ts_chan_set(uint offset)
+static int mrstouch_ts_chan_set(uint offset)
 {
 	u16 chan;
 
@@ -494,7 +494,7 @@
 }
 
 /* Initialize ADC */
-static int __devinit mrstouch_adc_init(struct mrstouch_dev *tsdev)
+static int mrstouch_adc_init(struct mrstouch_dev *tsdev)
 {
 	int err, start;
 	u8 ra, rm;
@@ -568,7 +568,7 @@
 
 
 /* Probe function for touch screen driver */
-static int __devinit mrstouch_probe(struct platform_device *pdev)
+static int mrstouch_probe(struct platform_device *pdev)
 {
 	struct mrstouch_dev *tsdev;
 	struct input_dev *input;
@@ -643,7 +643,7 @@
 	return err;
 }
 
-static int __devexit mrstouch_remove(struct platform_device *pdev)
+static int mrstouch_remove(struct platform_device *pdev)
 {
 	struct mrstouch_dev *tsdev = platform_get_drvdata(pdev);
 
@@ -662,7 +662,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= mrstouch_probe,
-	.remove		= __devexit_p(mrstouch_remove),
+	.remove		= mrstouch_remove,
 };
 module_platform_driver(mrstouch_driver);
 

diff --git a/drivers/input/touchscreen/jornada720_ts.c b/drivers/input/touchscreen/jornada720_ts.c
index 7f03d1bd..282d7c7 100644
--- a/drivers/input/touchscreen/jornada720_ts.c
+++ b/drivers/input/touchscreen/jornada720_ts.c

@@ -99,7 +99,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit jornada720_ts_probe(struct platform_device *pdev)
+static int jornada720_ts_probe(struct platform_device *pdev)
 {
 	struct jornada_ts *jornada_ts;
 	struct input_dev *input_dev;
@@ -151,7 +151,7 @@
 	return error;
 }
 
-static int __devexit jornada720_ts_remove(struct platform_device *pdev)
+static int jornada720_ts_remove(struct platform_device *pdev)
 {
 	struct jornada_ts *jornada_ts = platform_get_drvdata(pdev);
 
@@ -168,7 +168,7 @@
 
 static struct platform_driver jornada720_ts_driver = {
 	.probe		= jornada720_ts_probe,
-	.remove		= __devexit_p(jornada720_ts_remove),
+	.remove		= jornada720_ts_remove,
 	.driver		= {
 		.name	= "jornada_ts",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/touchscreen/lpc32xx_ts.c b/drivers/input/touchscreen/lpc32xx_ts.c
index 4c2b8ed..9101ee5 100644
--- a/drivers/input/touchscreen/lpc32xx_ts.c
+++ b/drivers/input/touchscreen/lpc32xx_ts.c

@@ -203,7 +203,7 @@
 	lpc32xx_stop_tsc(tsc);
 }
 
-static int __devinit lpc32xx_ts_probe(struct platform_device *pdev)
+static int lpc32xx_ts_probe(struct platform_device *pdev)
 {
 	struct lpc32xx_tsc *tsc;
 	struct input_dev *input;
@@ -309,7 +309,7 @@
 	return error;
 }
 
-static int __devexit lpc32xx_ts_remove(struct platform_device *pdev)
+static int lpc32xx_ts_remove(struct platform_device *pdev)
 {
 	struct lpc32xx_tsc *tsc = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -394,7 +394,7 @@
 
 static struct platform_driver lpc32xx_ts_driver = {
 	.probe		= lpc32xx_ts_probe,
-	.remove		= __devexit_p(lpc32xx_ts_remove),
+	.remove		= lpc32xx_ts_remove,
 	.driver		= {
 		.name	= MOD_NAME,
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/touchscreen/max11801_ts.c b/drivers/input/touchscreen/max11801_ts.c
index 4eab50b..00bc6caa 100644
--- a/drivers/input/touchscreen/max11801_ts.c
+++ b/drivers/input/touchscreen/max11801_ts.c

@@ -156,7 +156,7 @@
 	return IRQ_HANDLED;
 }
 
-static void __devinit max11801_ts_phy_init(struct max11801_data *data)
+static void max11801_ts_phy_init(struct max11801_data *data)
 {
 	struct i2c_client *client = data->client;
 
@@ -174,7 +174,7 @@
 	max11801_write_reg(client, OP_MODE_CONF_REG, 0x36);
 }
 
-static int __devinit max11801_ts_probe(struct i2c_client *client,
+static int max11801_ts_probe(struct i2c_client *client,
 				       const struct i2c_device_id *id)
 {
 	struct max11801_data *data;
@@ -228,7 +228,7 @@
 	return error;
 }
 
-static __devexit int max11801_ts_remove(struct i2c_client *client)
+static int max11801_ts_remove(struct i2c_client *client)
 {
 	struct max11801_data *data = i2c_get_clientdata(client);
 
@@ -252,7 +252,7 @@
 	},
 	.id_table	= max11801_ts_id,
 	.probe		= max11801_ts_probe,
-	.remove		= __devexit_p(max11801_ts_remove),
+	.remove		= max11801_ts_remove,
 };
 
 module_i2c_driver(max11801_ts_driver);

diff --git a/drivers/input/touchscreen/mc13783_ts.c b/drivers/input/touchscreen/mc13783_ts.c
index 48dc5b0..02103b6 100644
--- a/drivers/input/touchscreen/mc13783_ts.c
+++ b/drivers/input/touchscreen/mc13783_ts.c

@@ -229,7 +229,7 @@
 	return ret;
 }
 
-static int __devexit mc13783_ts_remove(struct platform_device *pdev)
+static int mc13783_ts_remove(struct platform_device *pdev)
 {
 	struct mc13783_ts_priv *priv = platform_get_drvdata(pdev);
 
@@ -243,7 +243,7 @@
 }
 
 static struct platform_driver mc13783_ts_driver = {
-	.remove		= __devexit_p(mc13783_ts_remove),
+	.remove		= mc13783_ts_remove,
 	.driver		= {
 		.owner	= THIS_MODULE,
 		.name	= MC13783_TS_NAME,

diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c
index b528511..f9f4e0c 100644
--- a/drivers/input/touchscreen/mcs5000_ts.c
+++ b/drivers/input/touchscreen/mcs5000_ts.c

@@ -187,7 +187,7 @@
 			OP_MODE_ACTIVE | REPORT_RATE_80);
 }
 
-static int __devinit mcs5000_ts_probe(struct i2c_client *client,
+static int mcs5000_ts_probe(struct i2c_client *client,
 		const struct i2c_device_id *id)
 {
 	struct mcs5000_ts_data *data;
@@ -249,7 +249,7 @@
 	return ret;
 }
 
-static int __devexit mcs5000_ts_remove(struct i2c_client *client)
+static int mcs5000_ts_remove(struct i2c_client *client)
 {
 	struct mcs5000_ts_data *data = i2c_get_clientdata(client);
 
@@ -292,7 +292,7 @@
 
 static struct i2c_driver mcs5000_ts_driver = {
 	.probe		= mcs5000_ts_probe,
-	.remove		= __devexit_p(mcs5000_ts_remove),
+	.remove		= mcs5000_ts_remove,
 	.driver = {
 		.name = "mcs5000_ts",
 #ifdef CONFIG_PM

diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index 560cf09..98841d8 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c

@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 #include <linux/i2c.h>
 #include <linux/i2c/mms114.h>
 #include <linux/input/mt.h>
@@ -360,14 +361,63 @@
 	mms114_stop(data);
 }
 
-static int __devinit mms114_probe(struct i2c_client *client,
+#ifdef CONFIG_OF
+static struct mms114_platform_data *mms114_parse_dt(struct device *dev)
+{
+	struct mms114_platform_data *pdata;
+	struct device_node *np = dev->of_node;
+
+	if (!np)
+		return NULL;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		dev_err(dev, "failed to allocate platform data\n");
+		return NULL;
+	}
+
+	if (of_property_read_u32(np, "x-size", &pdata->x_size)) {
+		dev_err(dev, "failed to get x-size property\n");
+		return NULL;
+	};
+
+	if (of_property_read_u32(np, "y-size", &pdata->y_size)) {
+		dev_err(dev, "failed to get y-size property\n");
+		return NULL;
+	};
+
+	of_property_read_u32(np, "contact-threshold",
+				&pdata->contact_threshold);
+	of_property_read_u32(np, "moving-threshold",
+				&pdata->moving_threshold);
+
+	if (of_find_property(np, "x-invert", NULL))
+		pdata->x_invert = true;
+	if (of_find_property(np, "y-invert", NULL))
+		pdata->y_invert = true;
+
+	return pdata;
+}
+#else
+static inline struct mms114_platform_data *mms114_parse_dt(struct device *dev)
+{
+	return NULL;
+}
+#endif
+
+static int mms114_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
+	const struct mms114_platform_data *pdata;
 	struct mms114_data *data;
 	struct input_dev *input_dev;
 	int error;
 
-	if (!client->dev.platform_data) {
+	pdata = dev_get_platdata(&client->dev);
+	if (!pdata)
+		pdata = mms114_parse_dt(&client->dev);
+
+	if (!pdata) {
 		dev_err(&client->dev, "Need platform data\n");
 		return -EINVAL;
 	}
@@ -389,7 +439,7 @@
 
 	data->client = client;
 	data->input_dev = input_dev;
-	data->pdata = client->dev.platform_data;
+	data->pdata = pdata;
 
 	input_dev->name = "MELPAS MMS114 Touchscreen";
 	input_dev->id.bustype = BUS_I2C;
@@ -458,7 +508,7 @@
 	return error;
 }
 
-static int __devexit mms114_remove(struct i2c_client *client)
+static int mms114_remove(struct i2c_client *client)
 {
 	struct mms114_data *data = i2c_get_clientdata(client);
 
@@ -525,14 +575,22 @@
 };
 MODULE_DEVICE_TABLE(i2c, mms114_id);
 
+#ifdef CONFIG_OF
+static struct of_device_id mms114_dt_match[] = {
+	{ .compatible = "melfas,mms114" },
+	{ }
+};
+#endif
+
 static struct i2c_driver mms114_driver = {
 	.driver = {
 		.name	= "mms114",
 		.owner	= THIS_MODULE,
 		.pm	= &mms114_pm_ops,
+		.of_match_table = of_match_ptr(mms114_dt_match),
 	},
 	.probe		= mms114_probe,
-	.remove		= __devexit_p(mms114_remove),
+	.remove		= mms114_remove,
 	.id_table	= mms114_id,
 };
 

diff --git a/drivers/input/touchscreen/pcap_ts.c b/drivers/input/touchscreen/pcap_ts.c
index f57aeb8..f22e04d 100644
--- a/drivers/input/touchscreen/pcap_ts.c
+++ b/drivers/input/touchscreen/pcap_ts.c

@@ -137,7 +137,7 @@
 				pcap_ts->read_state << PCAP_ADC_TS_M_SHIFT);
 }
 
-static int __devinit pcap_ts_probe(struct platform_device *pdev)
+static int pcap_ts_probe(struct platform_device *pdev)
 {
 	struct input_dev *input_dev;
 	struct pcap_ts *pcap_ts;
@@ -202,7 +202,7 @@
 	return err;
 }
 
-static int __devexit pcap_ts_remove(struct platform_device *pdev)
+static int pcap_ts_remove(struct platform_device *pdev)
 {
 	struct pcap_ts *pcap_ts = platform_get_drvdata(pdev);
 
@@ -245,7 +245,7 @@
 
 static struct platform_driver pcap_ts_driver = {
 	.probe		= pcap_ts_probe,
-	.remove		= __devexit_p(pcap_ts_remove),
+	.remove		= pcap_ts_remove,
 	.driver		= {
 		.name	= "pcap-ts",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index 953b4c1..6cc6b36 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c

@@ -125,7 +125,7 @@
 static SIMPLE_DEV_PM_OPS(pixcir_dev_pm_ops,
 			 pixcir_i2c_ts_suspend, pixcir_i2c_ts_resume);
 
-static int __devinit pixcir_i2c_ts_probe(struct i2c_client *client,
+static int pixcir_i2c_ts_probe(struct i2c_client *client,
 					 const struct i2c_device_id *id)
 {
 	const struct pixcir_ts_platform_data *pdata = client->dev.platform_data;
@@ -189,7 +189,7 @@
 	return error;
 }
 
-static int __devexit pixcir_i2c_ts_remove(struct i2c_client *client)
+static int pixcir_i2c_ts_remove(struct i2c_client *client)
 {
 	struct pixcir_i2c_ts_data *tsdata = i2c_get_clientdata(client);
 
@@ -218,7 +218,7 @@
 		.pm	= &pixcir_dev_pm_ops,
 	},
 	.probe		= pixcir_i2c_ts_probe,
-	.remove		= __devexit_p(pixcir_i2c_ts_remove),
+	.remove		= pixcir_i2c_ts_remove,
 	.id_table	= pixcir_i2c_ts_id,
 };
 

diff --git a/drivers/input/touchscreen/s3c2410_ts.c b/drivers/input/touchscreen/s3c2410_ts.c
index 549fa29..b061af2 100644
--- a/drivers/input/touchscreen/s3c2410_ts.c
+++ b/drivers/input/touchscreen/s3c2410_ts.c

@@ -238,7 +238,7 @@
  * Initialise, find and allocate any resources we need to run and then
  * register with the ADC and input systems.
  */
-static int __devinit s3c2410ts_probe(struct platform_device *pdev)
+static int s3c2410ts_probe(struct platform_device *pdev)
 {
 	struct s3c2410_ts_mach_info *info;
 	struct device *dev = &pdev->dev;
@@ -365,7 +365,7 @@
  *
  * Free up our state ready to be removed.
  */
-static int __devexit s3c2410ts_remove(struct platform_device *pdev)
+static int s3c2410ts_remove(struct platform_device *pdev)
 {
 	free_irq(ts.irq_tc, ts.input);
 	del_timer_sync(&touch_timer);
@@ -430,7 +430,7 @@
 	},
 	.id_table	= s3cts_driver_ids,
 	.probe		= s3c2410ts_probe,
-	.remove		= __devexit_p(s3c2410ts_remove),
+	.remove		= s3c2410ts_remove,
 };
 module_platform_driver(s3c_ts_driver);
 

diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c
index 6cb68a1..d9d05e2 100644
--- a/drivers/input/touchscreen/st1232.c
+++ b/drivers/input/touchscreen/st1232.c

@@ -139,7 +139,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit st1232_ts_probe(struct i2c_client *client,
+static int st1232_ts_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct st1232_ts_data *ts;
@@ -206,7 +206,7 @@
 	return error;
 }
 
-static int __devexit st1232_ts_remove(struct i2c_client *client)
+static int st1232_ts_remove(struct i2c_client *client)
 {
 	struct st1232_ts_data *ts = i2c_get_clientdata(client);
 
@@ -255,7 +255,7 @@
 MODULE_DEVICE_TABLE(i2c, st1232_ts_id);
 
 #ifdef CONFIG_OF
-static const struct of_device_id st1232_ts_dt_ids[] __devinitconst = {
+static const struct of_device_id st1232_ts_dt_ids[] = {
 	{ .compatible = "sitronix,st1232", },
 	{ }
 };
@@ -264,7 +264,7 @@
 
 static struct i2c_driver st1232_ts_driver = {
 	.probe		= st1232_ts_probe,
-	.remove		= __devexit_p(st1232_ts_remove),
+	.remove		= st1232_ts_remove,
 	.id_table	= st1232_ts_id,
 	.driver = {
 		.name	= ST1232_TS_NAME,

diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c
index 692b685..84d884b 100644
--- a/drivers/input/touchscreen/stmpe-ts.c
+++ b/drivers/input/touchscreen/stmpe-ts.c

@@ -1,4 +1,5 @@
-/* STMicroelectronics STMPE811 Touchscreen Driver
+/*
+ * STMicroelectronics STMPE811 Touchscreen Driver
  *
  * (C) 2010 Luotao Fu <l.fu@pengutronix.de>
  * All rights reserved.
@@ -16,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/slab.h>
@@ -166,7 +168,7 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit stmpe_init_hw(struct stmpe_touch *ts)
+static int stmpe_init_hw(struct stmpe_touch *ts)
 {
 	int ret;
 	u8 adc_ctrl1, adc_ctrl1_mask, tsc_cfg, tsc_cfg_mask;
@@ -261,41 +263,18 @@
 			STMPE_TSC_CTRL_TSC_EN, 0);
 }
 
-static int __devinit stmpe_input_probe(struct platform_device *pdev)
+static void stmpe_ts_get_platform_info(struct platform_device *pdev,
+					struct stmpe_touch *ts)
 {
 	struct stmpe *stmpe = dev_get_drvdata(pdev->dev.parent);
-	struct stmpe_platform_data *pdata = stmpe->pdata;
-	struct stmpe_touch *ts;
-	struct input_dev *idev;
+	struct device_node *np = pdev->dev.of_node;
 	struct stmpe_ts_platform_data *ts_pdata = NULL;
-	int ret;
-	int ts_irq;
 
-	ts_irq = platform_get_irq_byname(pdev, "FIFO_TH");
-	if (ts_irq < 0)
-		return ts_irq;
-
-	ts = kzalloc(sizeof(*ts), GFP_KERNEL);
-	if (!ts) {
-		ret = -ENOMEM;
-		goto err_out;
-	}
-
-	idev = input_allocate_device();
-	if (!idev) {
-		ret = -ENOMEM;
-		goto err_free_ts;
-	}
-
-	platform_set_drvdata(pdev, ts);
 	ts->stmpe = stmpe;
-	ts->idev = idev;
-	ts->dev = &pdev->dev;
 
-	if (pdata)
-		ts_pdata = pdata->ts;
+	if (stmpe->pdata && stmpe->pdata->ts) {
+		ts_pdata = stmpe->pdata->ts;
 
-	if (ts_pdata) {
 		ts->sample_time = ts_pdata->sample_time;
 		ts->mod_12b = ts_pdata->mod_12b;
 		ts->ref_sel = ts_pdata->ref_sel;
@@ -305,22 +284,71 @@
 		ts->settling = ts_pdata->settling;
 		ts->fraction_z = ts_pdata->fraction_z;
 		ts->i_drive = ts_pdata->i_drive;
+	} else if (np) {
+		u32 val;
+
+		if (!of_property_read_u32(np, "st,sample-time", &val))
+			ts->sample_time = val;
+		if (!of_property_read_u32(np, "st,mod-12b", &val))
+			ts->mod_12b = val;
+		if (!of_property_read_u32(np, "st,ref-sel", &val))
+			ts->ref_sel = val;
+		if (!of_property_read_u32(np, "st,adc-freq", &val))
+			ts->adc_freq = val;
+		if (!of_property_read_u32(np, "st,ave-ctrl", &val))
+			ts->ave_ctrl = val;
+		if (!of_property_read_u32(np, "st,touch-det-delay", &val))
+			ts->touch_det_delay = val;
+		if (!of_property_read_u32(np, "st,settling", &val))
+			ts->settling = val;
+		if (!of_property_read_u32(np, "st,fraction-z", &val))
+			ts->fraction_z = val;
+		if (!of_property_read_u32(np, "st,i-drive", &val))
+			ts->i_drive = val;
 	}
+}
+
+static int stmpe_input_probe(struct platform_device *pdev)
+{
+	struct stmpe_touch *ts;
+	struct input_dev *idev;
+	int error;
+	int ts_irq;
+
+	ts_irq = platform_get_irq_byname(pdev, "FIFO_TH");
+	if (ts_irq < 0)
+		return ts_irq;
+
+	ts = devm_kzalloc(&pdev->dev, sizeof(*ts), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	idev = devm_input_allocate_device(&pdev->dev);
+	if (!idev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ts);
+	ts->idev = idev;
+	ts->dev = &pdev->dev;
+
+	stmpe_ts_get_platform_info(pdev, ts);
 
 	INIT_DELAYED_WORK(&ts->work, stmpe_work);
 
-	ret = request_threaded_irq(ts_irq, NULL, stmpe_ts_handler,
-			IRQF_ONESHOT, STMPE_TS_NAME, ts);
-	if (ret) {
+	error = devm_request_threaded_irq(&pdev->dev, ts_irq,
+					  NULL, stmpe_ts_handler,
+					  IRQF_ONESHOT, STMPE_TS_NAME, ts);
+	if (error) {
 		dev_err(&pdev->dev, "Failed to request IRQ %d\n", ts_irq);
-		goto err_free_input;
+		return error;
 	}
 
-	ret = stmpe_init_hw(ts);
-	if (ret)
-		goto err_free_irq;
+	error = stmpe_init_hw(ts);
+	if (error)
+		return error;
 
 	idev->name = STMPE_TS_NAME;
+	idev->phys = STMPE_TS_NAME"/input0";
 	idev->id.bustype = BUS_I2C;
 	idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 	idev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
@@ -334,40 +362,21 @@
 	input_set_abs_params(idev, ABS_Y, 0, XY_MASK, 0, 0);
 	input_set_abs_params(idev, ABS_PRESSURE, 0x0, 0xff, 0, 0);
 
-	ret = input_register_device(idev);
-	if (ret) {
+	error = input_register_device(idev);
+	if (error) {
 		dev_err(&pdev->dev, "Could not register input device\n");
-		goto err_free_irq;
+		return error;
 	}
 
-	return ret;
-
-err_free_irq:
-	free_irq(ts_irq, ts);
-err_free_input:
-	input_free_device(idev);
-	platform_set_drvdata(pdev, NULL);
-err_free_ts:
-	kfree(ts);
-err_out:
-	return ret;
+	return 0;
 }
 
-static int __devexit stmpe_ts_remove(struct platform_device *pdev)
+static int stmpe_ts_remove(struct platform_device *pdev)
 {
 	struct stmpe_touch *ts = platform_get_drvdata(pdev);
-	unsigned int ts_irq = platform_get_irq_byname(pdev, "FIFO_TH");
 
 	stmpe_disable(ts->stmpe, STMPE_BLOCK_TOUCHSCREEN);
 
-	free_irq(ts_irq, ts);
-
-	platform_set_drvdata(pdev, NULL);
-
-	input_unregister_device(ts->idev);
-
-	kfree(ts);
-
 	return 0;
 }
 
@@ -377,7 +386,7 @@
 		   .owner = THIS_MODULE,
 		   },
 	.probe = stmpe_input_probe,
-	.remove = __devexit_p(stmpe_ts_remove),
+	.remove = stmpe_ts_remove,
 };
 module_platform_driver(stmpe_ts_driver);
 

diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
new file mode 100644
index 0000000..51e7b87
--- /dev/null
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c

@@ -0,0 +1,398 @@
+/*
+ * TI Touch Screen driver
+ *
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/input/ti_am335x_tsc.h>
+#include <linux/delay.h>
+
+#include <linux/mfd/ti_am335x_tscadc.h>
+
+#define ADCFSM_STEPID		0x10
+#define SEQ_SETTLE		275
+#define MAX_12BIT		((1 << 12) - 1)
+
+struct titsc {
+	struct input_dev	*input;
+	struct ti_tscadc_dev	*mfd_tscadc;
+	unsigned int		irq;
+	unsigned int		wires;
+	unsigned int		x_plate_resistance;
+	bool			pen_down;
+	int			steps_to_configure;
+};
+
+static unsigned int titsc_readl(struct titsc *ts, unsigned int reg)
+{
+	return readl(ts->mfd_tscadc->tscadc_base + reg);
+}
+
+static void titsc_writel(struct titsc *tsc, unsigned int reg,
+					unsigned int val)
+{
+	writel(val, tsc->mfd_tscadc->tscadc_base + reg);
+}
+
+static void titsc_step_config(struct titsc *ts_dev)
+{
+	unsigned int	config;
+	int i, total_steps;
+
+	/* Configure the Step registers */
+	total_steps = 2 * ts_dev->steps_to_configure;
+
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_XPP;
+	switch (ts_dev->wires) {
+	case 4:
+		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
+		break;
+	case 5:
+		config |= STEPCONFIG_YNN |
+				STEPCONFIG_INP_AN4 | STEPCONFIG_XNN |
+				STEPCONFIG_YPP;
+		break;
+	case 8:
+		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
+		break;
+	}
+
+	for (i = 1; i <= ts_dev->steps_to_configure; i++) {
+		titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
+		titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+	}
+
+	config = 0;
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_YNN |
+			STEPCONFIG_INM_ADCREFM | STEPCONFIG_FIFO1;
+	switch (ts_dev->wires) {
+	case 4:
+		config |= STEPCONFIG_YPP;
+		break;
+	case 5:
+		config |= STEPCONFIG_XPP | STEPCONFIG_INP_AN4 |
+				STEPCONFIG_XNP | STEPCONFIG_YPN;
+		break;
+	case 8:
+		config |= STEPCONFIG_YPP;
+		break;
+	}
+
+	for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) {
+		titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
+		titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+	}
+
+	config = 0;
+	/* Charge step configuration */
+	config = STEPCONFIG_XPP | STEPCONFIG_YNN |
+			STEPCHARGE_RFP_XPUL | STEPCHARGE_RFM_XNUR |
+			STEPCHARGE_INM_AN1 | STEPCHARGE_INP_AN1;
+
+	titsc_writel(ts_dev, REG_CHARGECONFIG, config);
+	titsc_writel(ts_dev, REG_CHARGEDELAY, CHARGEDLY_OPENDLY);
+
+	config = 0;
+	/* Configure to calculate pressure */
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_YPP |
+			STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM;
+	titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config);
+	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 1),
+			STEPCONFIG_OPENDLY);
+
+	config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1;
+	titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config);
+	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
+			STEPCONFIG_OPENDLY);
+
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC);
+}
+
+static void titsc_read_coordinates(struct titsc *ts_dev,
+				    unsigned int *x, unsigned int *y)
+{
+	unsigned int fifocount = titsc_readl(ts_dev, REG_FIFO0CNT);
+	unsigned int prev_val_x = ~0, prev_val_y = ~0;
+	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
+	unsigned int read, diff;
+	unsigned int i, channel;
+
+	/*
+	 * Delta filter is used to remove large variations in sampled
+	 * values from ADC. The filter tries to predict where the next
+	 * coordinate could be. This is done by taking a previous
+	 * coordinate and subtracting it form current one. Further the
+	 * algorithm compares the difference with that of a present value,
+	 * if true the value is reported to the sub system.
+	 */
+	for (i = 0; i < fifocount - 1; i++) {
+		read = titsc_readl(ts_dev, REG_FIFO0);
+		channel = read & 0xf0000;
+		channel = channel >> 0x10;
+		if ((channel >= 0) && (channel < ts_dev->steps_to_configure)) {
+			read &= 0xfff;
+			diff = abs(read - prev_val_x);
+			if (diff < prev_diff_x) {
+				prev_diff_x = diff;
+				*x = read;
+			}
+			prev_val_x = read;
+		}
+
+		read = titsc_readl(ts_dev, REG_FIFO1);
+		channel = read & 0xf0000;
+		channel = channel >> 0x10;
+		if ((channel >= ts_dev->steps_to_configure) &&
+			(channel < (2 * ts_dev->steps_to_configure - 1))) {
+			read &= 0xfff;
+			diff = abs(read - prev_val_y);
+			if (diff < prev_diff_y) {
+				prev_diff_y = diff;
+				*y = read;
+			}
+			prev_val_y = read;
+		}
+	}
+}
+
+static irqreturn_t titsc_irq(int irq, void *dev)
+{
+	struct titsc *ts_dev = dev;
+	struct input_dev *input_dev = ts_dev->input;
+	unsigned int status, irqclr = 0;
+	unsigned int x = 0, y = 0;
+	unsigned int z1, z2, z;
+	unsigned int fsm;
+	unsigned int fifo1count, fifo0count;
+	int i;
+
+	status = titsc_readl(ts_dev, REG_IRQSTATUS);
+	if (status & IRQENB_FIFO0THRES) {
+		titsc_read_coordinates(ts_dev, &x, &y);
+
+		z1 = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
+		z2 = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
+
+		fifo1count = titsc_readl(ts_dev, REG_FIFO1CNT);
+		for (i = 0; i < fifo1count; i++)
+			titsc_readl(ts_dev, REG_FIFO1);
+
+		fifo0count = titsc_readl(ts_dev, REG_FIFO0CNT);
+		for (i = 0; i < fifo0count; i++)
+			titsc_readl(ts_dev, REG_FIFO0);
+
+		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
+			/*
+			 * Calculate pressure using formula
+			 * Resistance(touch) = x plate resistance *
+			 * x postion/4096 * ((z2 / z1) - 1)
+			 */
+			z = z2 - z1;
+			z *= x;
+			z *= ts_dev->x_plate_resistance;
+			z /= z1;
+			z = (z + 2047) >> 12;
+
+			if (z <= MAX_12BIT) {
+				input_report_abs(input_dev, ABS_X, x);
+				input_report_abs(input_dev, ABS_Y, y);
+				input_report_abs(input_dev, ABS_PRESSURE, z);
+				input_report_key(input_dev, BTN_TOUCH, 1);
+				input_sync(input_dev);
+			}
+		}
+		irqclr |= IRQENB_FIFO0THRES;
+	}
+
+	/*
+	 * Time for sequencer to settle, to read
+	 * correct state of the sequencer.
+	 */
+	udelay(SEQ_SETTLE);
+
+	status = titsc_readl(ts_dev, REG_RAWIRQSTATUS);
+	if (status & IRQENB_PENUP) {
+		/* Pen up event */
+		fsm = titsc_readl(ts_dev, REG_ADCFSM);
+		if (fsm == ADCFSM_STEPID) {
+			ts_dev->pen_down = false;
+			input_report_key(input_dev, BTN_TOUCH, 0);
+			input_report_abs(input_dev, ABS_PRESSURE, 0);
+			input_sync(input_dev);
+		} else {
+			ts_dev->pen_down = true;
+		}
+		irqclr |= IRQENB_PENUP;
+	}
+
+	titsc_writel(ts_dev, REG_IRQSTATUS, irqclr);
+
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC);
+	return IRQ_HANDLED;
+}
+
+/*
+ * The functions for inserting/removing driver as a module.
+ */
+
+static int titsc_probe(struct platform_device *pdev)
+{
+	struct titsc *ts_dev;
+	struct input_dev *input_dev;
+	struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data;
+	struct mfd_tscadc_board	*pdata;
+	int err;
+
+	pdata = tscadc_dev->dev->platform_data;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
+		return -EINVAL;
+	}
+
+	/* Allocate memory for device */
+	ts_dev = kzalloc(sizeof(struct titsc), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!ts_dev || !input_dev) {
+		dev_err(&pdev->dev, "failed to allocate memory.\n");
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	tscadc_dev->tsc = ts_dev;
+	ts_dev->mfd_tscadc = tscadc_dev;
+	ts_dev->input = input_dev;
+	ts_dev->irq = tscadc_dev->irq;
+	ts_dev->wires = pdata->tsc_init->wires;
+	ts_dev->x_plate_resistance = pdata->tsc_init->x_plate_resistance;
+	ts_dev->steps_to_configure = pdata->tsc_init->steps_to_configure;
+
+	err = request_irq(ts_dev->irq, titsc_irq,
+			  0, pdev->dev.driver->name, ts_dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to allocate irq.\n");
+		goto err_free_mem;
+	}
+
+	titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
+	titsc_step_config(ts_dev);
+	titsc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure);
+
+	input_dev->name = "ti-tsc";
+	input_dev->dev.parent = &pdev->dev;
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+
+	input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0);
+
+	/* register to the input system */
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_free_irq;
+
+	platform_set_drvdata(pdev, ts_dev);
+	return 0;
+
+err_free_irq:
+	free_irq(ts_dev->irq, ts_dev);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(ts_dev);
+	return err;
+}
+
+static int titsc_remove(struct platform_device *pdev)
+{
+	struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
+
+	free_irq(ts_dev->irq, ts_dev);
+
+	input_unregister_device(ts_dev->input);
+
+	platform_set_drvdata(pdev, NULL);
+	kfree(ts_dev);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int titsc_suspend(struct device *dev)
+{
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
+	unsigned int idle;
+
+	if (device_may_wakeup(tscadc_dev->dev)) {
+		idle = titsc_readl(ts_dev, REG_IRQENABLE);
+		titsc_writel(ts_dev, REG_IRQENABLE,
+				(idle | IRQENB_HW_PEN));
+		titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
+	}
+	return 0;
+}
+
+static int titsc_resume(struct device *dev)
+{
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
+
+	if (device_may_wakeup(tscadc_dev->dev)) {
+		titsc_writel(ts_dev, REG_IRQWAKEUP,
+				0x00);
+		titsc_writel(ts_dev, REG_IRQCLR, IRQENB_HW_PEN);
+	}
+	titsc_step_config(ts_dev);
+	titsc_writel(ts_dev, REG_FIFO0THR,
+			ts_dev->steps_to_configure);
+	return 0;
+}
+
+static const struct dev_pm_ops titsc_pm_ops = {
+	.suspend = titsc_suspend,
+	.resume  = titsc_resume,
+};
+#define TITSC_PM_OPS (&titsc_pm_ops)
+#else
+#define TITSC_PM_OPS NULL
+#endif
+
+static struct platform_driver ti_tsc_driver = {
+	.probe	= titsc_probe,
+	.remove	= titsc_remove,
+	.driver	= {
+		.name   = "tsc",
+		.owner	= THIS_MODULE,
+		.pm	= TITSC_PM_OPS,
+	},
+};
+module_platform_driver(ti_tsc_driver);
+
+MODULE_DESCRIPTION("TI touchscreen controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/input/touchscreen/ti_tscadc.c b/drivers/input/touchscreen/ti_tscadc.c
deleted file mode 100644
index d229c74..0000000
--- a/drivers/input/touchscreen/ti_tscadc.c
+++ /dev/null

@@ -1,486 +0,0 @@
-/*
- * TI Touch Screen driver
- *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/input.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/input/ti_tscadc.h>
-#include <linux/delay.h>
-
-#define REG_IRQEOI		0x020
-#define REG_RAWIRQSTATUS	0x024
-#define REG_IRQSTATUS		0x028
-#define REG_IRQENABLE		0x02C
-#define REG_IRQWAKEUP		0x034
-#define REG_CTRL		0x040
-#define REG_ADCFSM		0x044
-#define REG_CLKDIV		0x04C
-#define REG_SE			0x054
-#define REG_IDLECONFIG		0x058
-#define REG_CHARGECONFIG	0x05C
-#define REG_CHARGEDELAY		0x060
-#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
-#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
-#define REG_STEPCONFIG13	0x0C4
-#define REG_STEPDELAY13		0x0C8
-#define REG_STEPCONFIG14	0x0CC
-#define REG_STEPDELAY14		0x0D0
-#define REG_FIFO0CNT		0xE4
-#define REG_FIFO1THR		0xF4
-#define REG_FIFO0		0x100
-#define REG_FIFO1		0x200
-
-/*	Register Bitfields	*/
-#define IRQWKUP_ENB		BIT(0)
-#define STPENB_STEPENB		0x7FFF
-#define IRQENB_FIFO1THRES	BIT(5)
-#define IRQENB_PENUP		BIT(9)
-#define STEPCONFIG_MODE_HWSYNC	0x2
-#define STEPCONFIG_SAMPLES_AVG	(1 << 4)
-#define STEPCONFIG_XPP		(1 << 5)
-#define STEPCONFIG_XNN		(1 << 6)
-#define STEPCONFIG_YPP		(1 << 7)
-#define STEPCONFIG_YNN		(1 << 8)
-#define STEPCONFIG_XNP		(1 << 9)
-#define STEPCONFIG_YPN		(1 << 10)
-#define STEPCONFIG_INM		(1 << 18)
-#define STEPCONFIG_INP		(1 << 20)
-#define STEPCONFIG_INP_5	(1 << 21)
-#define STEPCONFIG_FIFO1	(1 << 26)
-#define STEPCONFIG_OPENDLY	0xff
-#define STEPCONFIG_Z1		(3 << 19)
-#define STEPIDLE_INP		(1 << 22)
-#define STEPCHARGE_RFP		(1 << 12)
-#define STEPCHARGE_INM		(1 << 15)
-#define STEPCHARGE_INP		(1 << 19)
-#define STEPCHARGE_RFM		(1 << 23)
-#define STEPCHARGE_DELAY	0x1
-#define CNTRLREG_TSCSSENB	(1 << 0)
-#define CNTRLREG_STEPID		(1 << 1)
-#define CNTRLREG_STEPCONFIGWRT	(1 << 2)
-#define CNTRLREG_4WIRE		(1 << 5)
-#define CNTRLREG_5WIRE		(1 << 6)
-#define CNTRLREG_8WIRE		(3 << 5)
-#define CNTRLREG_TSCENB		(1 << 7)
-#define ADCFSM_STEPID		0x10
-
-#define SEQ_SETTLE		275
-#define ADC_CLK			3000000
-#define MAX_12BIT		((1 << 12) - 1)
-#define TSCADC_DELTA_X		15
-#define TSCADC_DELTA_Y		15
-
-struct tscadc {
-	struct input_dev	*input;
-	struct clk		*tsc_ick;
-	void __iomem		*tsc_base;
-	unsigned int		irq;
-	unsigned int		wires;
-	unsigned int		x_plate_resistance;
-	bool			pen_down;
-};
-
-static unsigned int tscadc_readl(struct tscadc *ts, unsigned int reg)
-{
-	return readl(ts->tsc_base + reg);
-}
-
-static void tscadc_writel(struct tscadc *tsc, unsigned int reg,
-					unsigned int val)
-{
-	writel(val, tsc->tsc_base + reg);
-}
-
-static void tscadc_step_config(struct tscadc *ts_dev)
-{
-	unsigned int	config;
-	int i;
-
-	/* Configure the Step registers */
-
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_SAMPLES_AVG | STEPCONFIG_XPP;
-	switch (ts_dev->wires) {
-	case 4:
-		config |= STEPCONFIG_INP | STEPCONFIG_XNN;
-		break;
-	case 5:
-		config |= STEPCONFIG_YNN |
-				STEPCONFIG_INP_5 | STEPCONFIG_XNN |
-				STEPCONFIG_YPP;
-		break;
-	case 8:
-		config |= STEPCONFIG_INP | STEPCONFIG_XNN;
-		break;
-	}
-
-	for (i = 1; i < 7; i++) {
-		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
-		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
-	}
-
-	config = 0;
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_SAMPLES_AVG | STEPCONFIG_YNN |
-			STEPCONFIG_INM | STEPCONFIG_FIFO1;
-	switch (ts_dev->wires) {
-	case 4:
-		config |= STEPCONFIG_YPP;
-		break;
-	case 5:
-		config |= STEPCONFIG_XPP | STEPCONFIG_INP_5 |
-				STEPCONFIG_XNP | STEPCONFIG_YPN;
-		break;
-	case 8:
-		config |= STEPCONFIG_YPP;
-		break;
-	}
-
-	for (i = 7; i < 13; i++) {
-		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
-		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
-	}
-
-	config = 0;
-	/* Charge step configuration */
-	config = STEPCONFIG_XPP | STEPCONFIG_YNN |
-			STEPCHARGE_RFP | STEPCHARGE_RFM |
-			STEPCHARGE_INM | STEPCHARGE_INP;
-
-	tscadc_writel(ts_dev, REG_CHARGECONFIG, config);
-	tscadc_writel(ts_dev, REG_CHARGEDELAY, STEPCHARGE_DELAY);
-
-	config = 0;
-	/* Configure to calculate pressure */
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_SAMPLES_AVG | STEPCONFIG_YPP |
-			STEPCONFIG_XNN | STEPCONFIG_INM;
-	tscadc_writel(ts_dev, REG_STEPCONFIG13, config);
-	tscadc_writel(ts_dev, REG_STEPDELAY13, STEPCONFIG_OPENDLY);
-
-	config |= STEPCONFIG_Z1 | STEPCONFIG_FIFO1;
-	tscadc_writel(ts_dev, REG_STEPCONFIG14, config);
-	tscadc_writel(ts_dev, REG_STEPDELAY14, STEPCONFIG_OPENDLY);
-
-	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-}
-
-static void tscadc_idle_config(struct tscadc *ts_config)
-{
-	unsigned int idleconfig;
-
-	idleconfig = STEPCONFIG_YNN |
-			STEPCONFIG_INM |
-			STEPCONFIG_YPN | STEPIDLE_INP;
-	tscadc_writel(ts_config, REG_IDLECONFIG, idleconfig);
-}
-
-static void tscadc_read_coordinates(struct tscadc *ts_dev,
-				    unsigned int *x, unsigned int *y)
-{
-	unsigned int fifocount = tscadc_readl(ts_dev, REG_FIFO0CNT);
-	unsigned int prev_val_x = ~0, prev_val_y = ~0;
-	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
-	unsigned int read, diff;
-	unsigned int i;
-
-	/*
-	 * Delta filter is used to remove large variations in sampled
-	 * values from ADC. The filter tries to predict where the next
-	 * coordinate could be. This is done by taking a previous
-	 * coordinate and subtracting it form current one. Further the
-	 * algorithm compares the difference with that of a present value,
-	 * if true the value is reported to the sub system.
-	 */
-	for (i = 0; i < fifocount - 1; i++) {
-		read = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		diff = abs(read - prev_val_x);
-		if (diff < prev_diff_x) {
-			prev_diff_x = diff;
-			*x = read;
-		}
-		prev_val_x = read;
-
-		read = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff;
-		diff = abs(read - prev_val_y);
-		if (diff < prev_diff_y) {
-			prev_diff_y = diff;
-			*y = read;
-		}
-		prev_val_y = read;
-	}
-}
-
-static irqreturn_t tscadc_irq(int irq, void *dev)
-{
-	struct tscadc *ts_dev = dev;
-	struct input_dev *input_dev = ts_dev->input;
-	unsigned int status, irqclr = 0;
-	unsigned int x = 0, y = 0;
-	unsigned int z1, z2, z;
-	unsigned int fsm;
-
-	status = tscadc_readl(ts_dev, REG_IRQSTATUS);
-	if (status & IRQENB_FIFO1THRES) {
-		tscadc_read_coordinates(ts_dev, &x, &y);
-
-		z1 = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		z2 = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff;
-
-		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
-			/*
-			 * Calculate pressure using formula
-			 * Resistance(touch) = x plate resistance *
-			 * x postion/4096 * ((z2 / z1) - 1)
-			 */
-			z = z2 - z1;
-			z *= x;
-			z *= ts_dev->x_plate_resistance;
-			z /= z1;
-			z = (z + 2047) >> 12;
-
-			if (z <= MAX_12BIT) {
-				input_report_abs(input_dev, ABS_X, x);
-				input_report_abs(input_dev, ABS_Y, y);
-				input_report_abs(input_dev, ABS_PRESSURE, z);
-				input_report_key(input_dev, BTN_TOUCH, 1);
-				input_sync(input_dev);
-			}
-		}
-		irqclr |= IRQENB_FIFO1THRES;
-	}
-
-	/*
-	 * Time for sequencer to settle, to read
-	 * correct state of the sequencer.
-	 */
-	udelay(SEQ_SETTLE);
-
-	status = tscadc_readl(ts_dev, REG_RAWIRQSTATUS);
-	if (status & IRQENB_PENUP) {
-		/* Pen up event */
-		fsm = tscadc_readl(ts_dev, REG_ADCFSM);
-		if (fsm == ADCFSM_STEPID) {
-			ts_dev->pen_down = false;
-			input_report_key(input_dev, BTN_TOUCH, 0);
-			input_report_abs(input_dev, ABS_PRESSURE, 0);
-			input_sync(input_dev);
-		} else {
-			ts_dev->pen_down = true;
-		}
-		irqclr |= IRQENB_PENUP;
-	}
-
-	tscadc_writel(ts_dev, REG_IRQSTATUS, irqclr);
-	/* check pending interrupts */
-	tscadc_writel(ts_dev, REG_IRQEOI, 0x0);
-
-	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-	return IRQ_HANDLED;
-}
-
-/*
- * The functions for inserting/removing driver as a module.
- */
-
-static int __devinit tscadc_probe(struct platform_device *pdev)
-{
-	const struct tsc_data *pdata = pdev->dev.platform_data;
-	struct resource *res;
-	struct tscadc *ts_dev;
-	struct input_dev *input_dev;
-	struct clk *clk;
-	int err;
-	int clk_value, ctrl, irq;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "missing platform data.\n");
-		return -EINVAL;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no memory resource defined.\n");
-		return -EINVAL;
-	}
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no irq ID is specified.\n");
-		return -EINVAL;
-	}
-
-	/* Allocate memory for device */
-	ts_dev = kzalloc(sizeof(struct tscadc), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!ts_dev || !input_dev) {
-		dev_err(&pdev->dev, "failed to allocate memory.\n");
-		err = -ENOMEM;
-		goto err_free_mem;
-	}
-
-	ts_dev->input = input_dev;
-	ts_dev->irq = irq;
-	ts_dev->wires = pdata->wires;
-	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (!res) {
-		dev_err(&pdev->dev, "failed to reserve registers.\n");
-		err = -EBUSY;
-		goto err_free_mem;
-	}
-
-	ts_dev->tsc_base = ioremap(res->start, resource_size(res));
-	if (!ts_dev->tsc_base) {
-		dev_err(&pdev->dev, "failed to map registers.\n");
-		err = -ENOMEM;
-		goto err_release_mem_region;
-	}
-
-	err = request_irq(ts_dev->irq, tscadc_irq,
-			  0, pdev->dev.driver->name, ts_dev);
-	if (err) {
-		dev_err(&pdev->dev, "failed to allocate irq.\n");
-		goto err_unmap_regs;
-	}
-
-	ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick");
-	if (IS_ERR(ts_dev->tsc_ick)) {
-		dev_err(&pdev->dev, "failed to get TSC ick\n");
-		goto err_free_irq;
-	}
-	clk_enable(ts_dev->tsc_ick);
-
-	clk = clk_get(&pdev->dev, "adc_tsc_fck");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "failed to get TSC fck\n");
-		err = PTR_ERR(clk);
-		goto err_disable_clk;
-	}
-
-	clk_value = clk_get_rate(clk) / ADC_CLK;
-	clk_put(clk);
-
-	if (clk_value < 7) {
-		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
-		goto err_disable_clk;
-	}
-	/* CLKDIV needs to be configured to the value minus 1 */
-	tscadc_writel(ts_dev, REG_CLKDIV, clk_value - 1);
-
-	 /* Enable wake-up of the SoC using touchscreen */
-	tscadc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
-
-	ctrl = CNTRLREG_STEPCONFIGWRT |
-			CNTRLREG_TSCENB |
-			CNTRLREG_STEPID;
-	switch (ts_dev->wires) {
-	case 4:
-		ctrl |= CNTRLREG_4WIRE;
-		break;
-	case 5:
-		ctrl |= CNTRLREG_5WIRE;
-		break;
-	case 8:
-		ctrl |= CNTRLREG_8WIRE;
-		break;
-	}
-	tscadc_writel(ts_dev, REG_CTRL, ctrl);
-
-	tscadc_idle_config(ts_dev);
-	tscadc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO1THRES);
-	tscadc_step_config(ts_dev);
-	tscadc_writel(ts_dev, REG_FIFO1THR, 6);
-
-	ctrl |= CNTRLREG_TSCSSENB;
-	tscadc_writel(ts_dev, REG_CTRL, ctrl);
-
-	input_dev->name = "ti-tsc-adc";
-	input_dev->dev.parent = &pdev->dev;
-
-	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
-	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
-
-	input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0);
-	input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0);
-	input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0);
-
-	/* register to the input system */
-	err = input_register_device(input_dev);
-	if (err)
-		goto err_disable_clk;
-
-	platform_set_drvdata(pdev, ts_dev);
-	return 0;
-
-err_disable_clk:
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
-err_free_irq:
-	free_irq(ts_dev->irq, ts_dev);
-err_unmap_regs:
-	iounmap(ts_dev->tsc_base);
-err_release_mem_region:
-	release_mem_region(res->start, resource_size(res));
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(ts_dev);
-	return err;
-}
-
-static int __devexit tscadc_remove(struct platform_device *pdev)
-{
-	struct tscadc *ts_dev = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	free_irq(ts_dev->irq, ts_dev);
-
-	input_unregister_device(ts_dev->input);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	iounmap(ts_dev->tsc_base);
-	release_mem_region(res->start, resource_size(res));
-
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
-
-	kfree(ts_dev);
-
-	platform_set_drvdata(pdev, NULL);
-	return 0;
-}
-
-static struct platform_driver ti_tsc_driver = {
-	.probe	= tscadc_probe,
-	.remove	= __devexit_p(tscadc_remove),
-	.driver	= {
-		.name   = "tsc",
-		.owner	= THIS_MODULE,
-	},
-};
-module_platform_driver(ti_tsc_driver);
-
-MODULE_DESCRIPTION("TI touchscreen controller driver");
-MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
-MODULE_LICENSE("GPL");

diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c
index 368d2c6c..acfb876 100644
--- a/drivers/input/touchscreen/tnetv107x-ts.c
+++ b/drivers/input/touchscreen/tnetv107x-ts.c

@@ -243,7 +243,7 @@
 	clk_disable(ts->clk);
 }
 
-static int __devinit tsc_probe(struct platform_device *pdev)
+static int tsc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct tsc_data *ts;
@@ -357,7 +357,7 @@
 	return error;
 }
 
-static int __devexit tsc_remove(struct platform_device *pdev)
+static int tsc_remove(struct platform_device *pdev)
 {
 	struct tsc_data *ts = platform_get_drvdata(pdev);
 
@@ -374,7 +374,7 @@
 
 static struct platform_driver tsc_driver = {
 	.probe		= tsc_probe,
-	.remove		= __devexit_p(tsc_remove),
+	.remove		= tsc_remove,
 	.driver.name	= "tnetv107x-ts",
 	.driver.owner	= THIS_MODULE,
 };

diff --git a/drivers/input/touchscreen/tps6507x-ts.c b/drivers/input/touchscreen/tps6507x-ts.c
index f7eda3d0..820a066 100644
--- a/drivers/input/touchscreen/tps6507x-ts.c
+++ b/drivers/input/touchscreen/tps6507x-ts.c

@@ -345,7 +345,7 @@
 	return error;
 }
 
-static int __devexit tps6507x_ts_remove(struct platform_device *pdev)
+static int tps6507x_ts_remove(struct platform_device *pdev)
 {
 	struct tps6507x_dev *tps6507x_dev = platform_get_drvdata(pdev);
 	struct tps6507x_ts *tsc = tps6507x_dev->ts;
@@ -367,7 +367,7 @@
 		.owner = THIS_MODULE,
 	},
 	.probe = tps6507x_ts_probe,
-	.remove = __devexit_p(tps6507x_ts_remove),
+	.remove = tps6507x_ts_remove,
 };
 module_platform_driver(tps6507x_ts_driver);
 

diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index 5ce3fa8..9c0cdc7 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c

@@ -555,7 +555,7 @@
 	mutex_unlock(&ts->mutex);
 }
 
-static void __devinit tsc2005_setup_spi_xfer(struct tsc2005 *ts)
+static void tsc2005_setup_spi_xfer(struct tsc2005 *ts)
 {
 	tsc2005_setup_read(&ts->spi_x, TSC2005_REG_X, false);
 	tsc2005_setup_read(&ts->spi_y, TSC2005_REG_Y, false);
@@ -569,7 +569,7 @@
 	spi_message_add_tail(&ts->spi_z2.spi_xfer, &ts->spi_read_msg);
 }
 
-static int __devinit tsc2005_probe(struct spi_device *spi)
+static int tsc2005_probe(struct spi_device *spi)
 {
 	const struct tsc2005_platform_data *pdata = spi->dev.platform_data;
 	struct tsc2005 *ts;
@@ -686,7 +686,7 @@
 	return error;
 }
 
-static int __devexit tsc2005_remove(struct spi_device *spi)
+static int tsc2005_remove(struct spi_device *spi)
 {
 	struct tsc2005 *ts = spi_get_drvdata(spi);
 
@@ -745,7 +745,7 @@
 		.pm	= &tsc2005_pm_ops,
 	},
 	.probe	= tsc2005_probe,
-	.remove	= __devexit_p(tsc2005_remove),
+	.remove	= tsc2005_remove,
 };
 
 module_spi_driver(tsc2005_driver);

diff --git a/drivers/input/touchscreen/tsc2007.c b/drivers/input/touchscreen/tsc2007.c
index 1473d23..0b67ba4 100644
--- a/drivers/input/touchscreen/tsc2007.c
+++ b/drivers/input/touchscreen/tsc2007.c

@@ -273,7 +273,7 @@
 	tsc2007_stop(ts);
 }
 
-static int __devinit tsc2007_probe(struct i2c_client *client,
+static int tsc2007_probe(struct i2c_client *client,
 				   const struct i2c_device_id *id)
 {
 	struct tsc2007 *ts;
@@ -366,7 +366,7 @@
 	return err;
 }
 
-static int __devexit tsc2007_remove(struct i2c_client *client)
+static int tsc2007_remove(struct i2c_client *client)
 {
 	struct tsc2007	*ts = i2c_get_clientdata(client);
 	struct tsc2007_platform_data *pdata = client->dev.platform_data;
@@ -396,7 +396,7 @@
 	},
 	.id_table	= tsc2007_idtable,
 	.probe		= tsc2007_probe,
-	.remove		= __devexit_p(tsc2007_remove),
+	.remove		= tsc2007_remove,
 };
 
 module_i2c_driver(tsc2007_driver);

diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 46e83ad..1271f97 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c

@@ -274,7 +274,7 @@
  * Try to probe our interrupt, rather than relying on lots of
  * hard-coded machine dependencies.
  */
-static int __devinit ucb1400_ts_detect_irq(struct ucb1400_ts *ucb,
+static int ucb1400_ts_detect_irq(struct ucb1400_ts *ucb,
 					   struct platform_device *pdev)
 {
 	unsigned long mask, timeout;
@@ -318,7 +318,7 @@
 	return 0;
 }
 
-static int __devinit ucb1400_ts_probe(struct platform_device *pdev)
+static int ucb1400_ts_probe(struct platform_device *pdev)
 {
 	struct ucb1400_ts *ucb = pdev->dev.platform_data;
 	int error, x_res, y_res;
@@ -397,7 +397,7 @@
 	return error;
 }
 
-static int __devexit ucb1400_ts_remove(struct platform_device *pdev)
+static int ucb1400_ts_remove(struct platform_device *pdev)
 {
 	struct ucb1400_ts *ucb = pdev->dev.platform_data;
 
@@ -442,7 +442,7 @@
 
 static struct platform_driver ucb1400_ts_driver = {
 	.probe	= ucb1400_ts_probe,
-	.remove	= __devexit_p(ucb1400_ts_remove),
+	.remove	= ucb1400_ts_remove,
 	.driver	= {
 		.name	= "ucb1400_ts",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/touchscreen/w90p910_ts.c b/drivers/input/touchscreen/w90p910_ts.c
index 9396b21..d2ef8f0 100644
--- a/drivers/input/touchscreen/w90p910_ts.c
+++ b/drivers/input/touchscreen/w90p910_ts.c

@@ -215,7 +215,7 @@
 	clk_disable(w90p910_ts->clk);
 }
 
-static int __devinit w90x900ts_probe(struct platform_device *pdev)
+static int w90x900ts_probe(struct platform_device *pdev)
 {
 	struct w90p910_ts *w90p910_ts;
 	struct input_dev *input_dev;
@@ -301,7 +301,7 @@
 	return err;
 }
 
-static int __devexit w90x900ts_remove(struct platform_device *pdev)
+static int w90x900ts_remove(struct platform_device *pdev)
 {
 	struct w90p910_ts *w90p910_ts = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -325,7 +325,7 @@
 
 static struct platform_driver w90x900ts_driver = {
 	.probe		= w90x900ts_probe,
-	.remove		= __devexit_p(w90x900ts_remove),
+	.remove		= w90x900ts_remove,
 	.driver		= {
 		.name	= "nuc900-ts",
 		.owner	= THIS_MODULE,

diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c
index 0c01657..bf0d076 100644
--- a/drivers/input/touchscreen/wacom_i2c.c
+++ b/drivers/input/touchscreen/wacom_i2c.c

@@ -144,7 +144,7 @@
 	disable_irq(client->irq);
 }
 
-static int __devinit wacom_i2c_probe(struct i2c_client *client,
+static int wacom_i2c_probe(struct i2c_client *client,
 				     const struct i2c_device_id *id)
 {
 	struct wacom_i2c *wac_i2c;
@@ -225,7 +225,7 @@
 	return error;
 }
 
-static int __devexit wacom_i2c_remove(struct i2c_client *client)
+static int wacom_i2c_remove(struct i2c_client *client)
 {
 	struct wacom_i2c *wac_i2c = i2c_get_clientdata(client);
 
@@ -272,7 +272,7 @@
 	},
 
 	.probe		= wacom_i2c_probe,
-	.remove		= __devexit_p(wacom_i2c_remove),
+	.remove		= wacom_i2c_remove,
 	.id_table	= wacom_i2c_id,
 };
 module_i2c_driver(wacom_i2c_driver);

diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index 52abb98..f88fab5 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c

@@ -233,7 +233,7 @@
 	}
 }
 
-static __devinit int wm831x_ts_probe(struct platform_device *pdev)
+static int wm831x_ts_probe(struct platform_device *pdev)
 {
 	struct wm831x_ts *wm831x_ts;
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
@@ -245,7 +245,8 @@
 	if (core_pdata)
 		pdata = core_pdata->touch;
 
-	wm831x_ts = kzalloc(sizeof(struct wm831x_ts), GFP_KERNEL);
+	wm831x_ts = devm_kzalloc(&pdev->dev, sizeof(struct wm831x_ts),
+				 GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!wm831x_ts || !input_dev) {
 		error = -ENOMEM;
@@ -376,21 +377,18 @@
 	free_irq(wm831x_ts->data_irq, wm831x_ts);
 err_alloc:
 	input_free_device(input_dev);
-	kfree(wm831x_ts);
 
 	return error;
 }
 
-static __devexit int wm831x_ts_remove(struct platform_device *pdev)
+static int wm831x_ts_remove(struct platform_device *pdev)
 {
 	struct wm831x_ts *wm831x_ts = platform_get_drvdata(pdev);
 
 	free_irq(wm831x_ts->pd_irq, wm831x_ts);
 	free_irq(wm831x_ts->data_irq, wm831x_ts);
 	input_unregister_device(wm831x_ts->input_dev);
-	kfree(wm831x_ts);
 
-	platform_set_drvdata(pdev, NULL);
 	return 0;
 }
 
@@ -400,7 +398,7 @@
 		.owner = THIS_MODULE,
 	},
 	.probe = wm831x_ts_probe,
-	.remove = __devexit_p(wm831x_ts_remove),
+	.remove = wm831x_ts_remove,
 };
 module_platform_driver(wm831x_ts_driver);
 

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 55074cb..c1c74e0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c

@@ -57,17 +57,9 @@
  * physically contiguous memory regions it is mapping into page sizes
  * that we support.
  *
- * Traditionally the IOMMU core just handed us the mappings directly,
- * after making sure the size is an order of a 4KiB page and that the
- * mapping has natural alignment.
- *
- * To retain this behavior, we currently advertise that we support
- * all page sizes that are an order of 4KiB.
- *
- * If at some point we'd like to utilize the IOMMU core's new behavior,
- * we could change this to advertise the real page sizes we support.
+ * 512GB Pages are not supported due to a hardware bug
  */
-#define AMD_IOMMU_PGSIZES	(~0xFFFUL)
+#define AMD_IOMMU_PGSIZES	((~0xFFFUL) & ~(2ULL << 38))
 
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
@@ -140,6 +132,9 @@
 	list_del(&dev_data->dev_data_list);
 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
 
+	if (dev_data->group)
+		iommu_group_put(dev_data->group);
+
 	kfree(dev_data);
 }
 
@@ -274,13 +269,160 @@
 	*from = to;
 }
 
+static struct pci_bus *find_hosted_bus(struct pci_bus *bus)
+{
+	while (!bus->self) {
+		if (!pci_is_root_bus(bus))
+			bus = bus->parent;
+		else
+			return ERR_PTR(-ENODEV);
+	}
+
+	return bus;
+}
+
 #define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
 
+static struct pci_dev *get_isolation_root(struct pci_dev *pdev)
+{
+	struct pci_dev *dma_pdev = pdev;
+
+	/* Account for quirked devices */
+	swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+
+	/*
+	 * If it's a multifunction device that does not support our
+	 * required ACS flags, add to the same group as function 0.
+	 */
+	if (dma_pdev->multifunction &&
+	    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
+		swap_pci_ref(&dma_pdev,
+			     pci_get_slot(dma_pdev->bus,
+					  PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
+					  0)));
+
+	/*
+	 * Devices on the root bus go through the iommu.  If that's not us,
+	 * find the next upstream device and test ACS up to the root bus.
+	 * Finding the next device may require skipping virtual buses.
+	 */
+	while (!pci_is_root_bus(dma_pdev->bus)) {
+		struct pci_bus *bus = find_hosted_bus(dma_pdev->bus);
+		if (IS_ERR(bus))
+			break;
+
+		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+			break;
+
+		swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
+	}
+
+	return dma_pdev;
+}
+
+static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev)
+{
+	struct iommu_group *group = iommu_group_get(&pdev->dev);
+	int ret;
+
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+
+		WARN_ON(&pdev->dev != dev);
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+	return ret;
+}
+
+static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data,
+				    struct device *dev)
+{
+	if (!dev_data->group) {
+		struct iommu_group *group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+
+		dev_data->group = group;
+	}
+
+	return iommu_group_add_device(dev_data->group, dev);
+}
+
+static int init_iommu_group(struct device *dev)
+{
+	struct iommu_dev_data *dev_data;
+	struct iommu_group *group;
+	struct pci_dev *dma_pdev;
+	int ret;
+
+	group = iommu_group_get(dev);
+	if (group) {
+		iommu_group_put(group);
+		return 0;
+	}
+
+	dev_data = find_dev_data(get_device_id(dev));
+	if (!dev_data)
+		return -ENOMEM;
+
+	if (dev_data->alias_data) {
+		u16 alias;
+		struct pci_bus *bus;
+
+		if (dev_data->alias_data->group)
+			goto use_group;
+
+		/*
+		 * If the alias device exists, it's effectively just a first
+		 * level quirk for finding the DMA source.
+		 */
+		alias = amd_iommu_alias_table[dev_data->devid];
+		dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
+		if (dma_pdev) {
+			dma_pdev = get_isolation_root(dma_pdev);
+			goto use_pdev;
+		}
+
+		/*
+		 * If the alias is virtual, try to find a parent device
+		 * and test whether the IOMMU group is actualy rooted above
+		 * the alias.  Be careful to also test the parent device if
+		 * we think the alias is the root of the group.
+		 */
+		bus = pci_find_bus(0, alias >> 8);
+		if (!bus)
+			goto use_group;
+
+		bus = find_hosted_bus(bus);
+		if (IS_ERR(bus) || !bus->self)
+			goto use_group;
+
+		dma_pdev = get_isolation_root(pci_dev_get(bus->self));
+		if (dma_pdev != bus->self || (dma_pdev->multifunction &&
+		    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)))
+			goto use_pdev;
+
+		pci_dev_put(dma_pdev);
+		goto use_group;
+	}
+
+	dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev)));
+use_pdev:
+	ret = use_pdev_iommu_group(dma_pdev, dev);
+	pci_dev_put(dma_pdev);
+	return ret;
+use_group:
+	return use_dev_data_iommu_group(dev_data->alias_data, dev);
+}
+
 static int iommu_init_device(struct device *dev)
 {
-	struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev);
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct iommu_dev_data *dev_data;
-	struct iommu_group *group;
 	u16 alias;
 	int ret;
 
@@ -303,61 +445,9 @@
 			return -ENOTSUPP;
 		}
 		dev_data->alias_data = alias_data;
-
-		dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
 	}
 
-	if (dma_pdev == NULL)
-		dma_pdev = pci_dev_get(pdev);
-
-	/* Account for quirked devices */
-	swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
-
-	/*
-	 * If it's a multifunction device that does not support our
-	 * required ACS flags, add to the same group as function 0.
-	 */
-	if (dma_pdev->multifunction &&
-	    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
-		swap_pci_ref(&dma_pdev,
-			     pci_get_slot(dma_pdev->bus,
-					  PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
-					  0)));
-
-	/*
-	 * Devices on the root bus go through the iommu.  If that's not us,
-	 * find the next upstream device and test ACS up to the root bus.
-	 * Finding the next device may require skipping virtual buses.
-	 */
-	while (!pci_is_root_bus(dma_pdev->bus)) {
-		struct pci_bus *bus = dma_pdev->bus;
-
-		while (!bus->self) {
-			if (!pci_is_root_bus(bus))
-				bus = bus->parent;
-			else
-				goto root_bus;
-		}
-
-		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
-			break;
-
-		swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
-	}
-
-root_bus:
-	group = iommu_group_get(&dma_pdev->dev);
-	pci_dev_put(dma_pdev);
-	if (!group) {
-		group = iommu_group_alloc();
-		if (IS_ERR(group))
-			return PTR_ERR(group);
-	}
-
-	ret = iommu_group_add_device(group, dev);
-
-	iommu_group_put(group);
-
+	ret = init_iommu_group(dev);
 	if (ret)
 		return ret;
 

diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index c9aa3d0..e38ab43 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h

@@ -426,6 +426,7 @@
 	struct iommu_dev_data *alias_data;/* The alias dev_data */
 	struct protection_domain *domain; /* Domain the device is bound to */
 	atomic_t bind;			  /* Domain attach reference count */
+	struct iommu_group *group;	  /* IOMMU group for virtual aliases */
 	u16 devid;			  /* PCI Device ID */
 	bool iommu_v2;			  /* Device can make use of IOMMUv2 */
 	bool passthrough;		  /* Default for device is pt_domain */

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0badfa4..c2c07a4 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c

@@ -1827,10 +1827,17 @@
 			if (!pte)
 				return -ENOMEM;
 			/* It is large page*/
-			if (largepage_lvl > 1)
+			if (largepage_lvl > 1) {
 				pteval |= DMA_PTE_LARGE_PAGE;
-			else
+				/* Ensure that old small page tables are removed to make room
+				   for superpage, if they exist. */
+				dma_pte_clear_range(domain, iov_pfn,
+						    iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
+				dma_pte_free_pagetable(domain, iov_pfn,
+						       iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
+			} else {
 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
+			}
 
 		}
 		/* We don't need lock here, nobody else
@@ -2320,8 +2327,39 @@
 	return 0;
 }
 
+static bool device_has_rmrr(struct pci_dev *dev)
+{
+	struct dmar_rmrr_unit *rmrr;
+	int i;
+
+	for_each_rmrr_units(rmrr) {
+		for (i = 0; i < rmrr->devices_cnt; i++) {
+			/*
+			 * Return TRUE if this RMRR contains the device that
+			 * is passed in.
+			 */
+			if (rmrr->devices[i] == dev)
+				return true;
+		}
+	}
+	return false;
+}
+
 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
 {
+
+	/*
+	 * We want to prevent any device associated with an RMRR from
+	 * getting placed into the SI Domain. This is done because
+	 * problems exist when devices are moved in and out of domains
+	 * and their respective RMRR info is lost. We exempt USB devices
+	 * from this process due to their usage of RMRRs that are known
+	 * to not be needed after BIOS hand-off to OS.
+	 */
+	if (device_has_rmrr(pdev) &&
+	    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
+		return 0;
+
 	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
 		return 1;
 

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index badc17c..18108c14 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c

@@ -16,13 +16,13 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/iommu.h>
 #include <linux/omap-iommu.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/pm_runtime.h>
 
 #include <asm/cacheflush.h>
 
@@ -143,31 +143,44 @@
 static int iommu_enable(struct omap_iommu *obj)
 {
 	int err;
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
-	if (!obj)
+	if (!obj || !pdata)
 		return -EINVAL;
 
 	if (!arch_iommu)
 		return -ENODEV;
 
-	clk_enable(obj->clk);
+	if (pdata->deassert_reset) {
+		err = pdata->deassert_reset(pdev, pdata->reset_name);
+		if (err) {
+			dev_err(obj->dev, "deassert_reset failed: %d\n", err);
+			return err;
+		}
+	}
+
+	pm_runtime_get_sync(obj->dev);
 
 	err = arch_iommu->enable(obj);
 
-	clk_disable(obj->clk);
 	return err;
 }
 
 static void iommu_disable(struct omap_iommu *obj)
 {
-	if (!obj)
-		return;
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
-	clk_enable(obj->clk);
+	if (!obj || !pdata)
+		return;
 
 	arch_iommu->disable(obj);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
+
+	if (pdata->assert_reset)
+		pdata->assert_reset(pdev, pdata->reset_name);
 }
 
 /*
@@ -290,7 +303,7 @@
 	if (!obj || !obj->nr_tlb_entries || !e)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	iotlb_lock_get(obj, &l);
 	if (l.base == obj->nr_tlb_entries) {
@@ -320,7 +333,7 @@
 
 	cr = iotlb_alloc_cr(obj, e);
 	if (IS_ERR(cr)) {
-		clk_disable(obj->clk);
+		pm_runtime_put_sync(obj->dev);
 		return PTR_ERR(cr);
 	}
 
@@ -334,7 +347,7 @@
 		l.vict = l.base;
 	iotlb_lock_set(obj, &l);
 out:
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 	return err;
 }
 
@@ -364,7 +377,7 @@
 	int i;
 	struct cr_regs cr;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) {
 		u32 start;
@@ -383,7 +396,7 @@
 			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
 		}
 	}
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	if (i == obj->nr_tlb_entries)
 		dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
@@ -397,7 +410,7 @@
 {
 	struct iotlb_lock l;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	l.base = 0;
 	l.vict = 0;
@@ -405,7 +418,7 @@
 
 	iommu_write_reg(obj, 1, MMU_GFLUSH);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 }
 
 #if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
@@ -415,11 +428,11 @@
 	if (!obj || !buf)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	bytes = arch_iommu->dump_ctx(obj, buf, bytes);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return bytes;
 }
@@ -433,7 +446,7 @@
 	struct cr_regs tmp;
 	struct cr_regs *p = crs;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 	iotlb_lock_get(obj, &saved);
 
 	for_each_iotlb_cr(obj, num, i, tmp) {
@@ -443,7 +456,7 @@
 	}
 
 	iotlb_lock_set(obj, &saved);
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return  p - crs;
 }
@@ -807,9 +820,7 @@
 	if (!obj->refcount)
 		return IRQ_NONE;
 
-	clk_enable(obj->clk);
 	errs = iommu_report_fault(obj, &da);
-	clk_disable(obj->clk);
 	if (errs == 0)
 		return IRQ_HANDLED;
 
@@ -931,17 +942,10 @@
 	struct resource *res;
 	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
-	if (pdev->num_resources != 2)
-		return -EINVAL;
-
 	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
 
-	obj->clk = clk_get(&pdev->dev, pdata->clk_name);
-	if (IS_ERR(obj->clk))
-		goto err_clk;
-
 	obj->nr_tlb_entries = pdata->nr_tlb_entries;
 	obj->name = pdata->name;
 	obj->dev = &pdev->dev;
@@ -984,6 +988,9 @@
 		goto err_irq;
 	platform_set_drvdata(pdev, obj);
 
+	pm_runtime_irq_safe(obj->dev);
+	pm_runtime_enable(obj->dev);
+
 	dev_info(&pdev->dev, "%s registered\n", obj->name);
 	return 0;
 
@@ -992,8 +999,6 @@
 err_ioremap:
 	release_mem_region(res->start, resource_size(res));
 err_mem:
-	clk_put(obj->clk);
-err_clk:
 	kfree(obj);
 	return err;
 }
@@ -1014,7 +1019,8 @@
 	release_mem_region(res->start, resource_size(res));
 	iounmap(obj->regbase);
 
-	clk_put(obj->clk);
+	pm_runtime_disable(obj->dev);
+
 	dev_info(&pdev->dev, "%s removed\n", obj->name);
 	kfree(obj);
 	return 0;

diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 2b5f3c0..1200842 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h

@@ -29,7 +29,6 @@
 struct omap_iommu {
 	const char	*name;
 	struct module	*owner;
-	struct clk	*clk;
 	void __iomem	*regbase;
 	struct device	*dev;
 	void		*isr_priv;
@@ -116,8 +115,6 @@
  * MMU Register offsets
  */
 #define MMU_REVISION		0x00
-#define MMU_SYSCONFIG		0x10
-#define MMU_SYSSTATUS		0x14
 #define MMU_IRQSTATUS		0x18
 #define MMU_IRQENABLE		0x1c
 #define MMU_WALKING_ST		0x40

diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
index c020202..d745094 100644
--- a/drivers/iommu/omap-iommu2.c
+++ b/drivers/iommu/omap-iommu2.c

@@ -28,19 +28,6 @@
  */
 #define IOMMU_ARCH_VERSION	0x00000011
 
-/* SYSCONF */
-#define MMU_SYS_IDLE_SHIFT	3
-#define MMU_SYS_IDLE_FORCE	(0 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_NONE	(1 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_SMART	(2 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_MASK	(3 << MMU_SYS_IDLE_SHIFT)
-
-#define MMU_SYS_SOFTRESET	(1 << 1)
-#define MMU_SYS_AUTOIDLE	1
-
-/* SYSSTATUS */
-#define MMU_SYS_RESETDONE	1
-
 /* IRQSTATUS & IRQENABLE */
 #define MMU_IRQ_MULTIHITFAULT	(1 << 4)
 #define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
@@ -97,7 +84,6 @@
 static int omap2_iommu_enable(struct omap_iommu *obj)
 {
 	u32 l, pa;
-	unsigned long timeout;
 
 	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
 		return -EINVAL;
@@ -106,29 +92,10 @@
 	if (!IS_ALIGNED(pa, SZ_16K))
 		return -EINVAL;
 
-	iommu_write_reg(obj, MMU_SYS_SOFTRESET, MMU_SYSCONFIG);
-
-	timeout = jiffies + msecs_to_jiffies(20);
-	do {
-		l = iommu_read_reg(obj, MMU_SYSSTATUS);
-		if (l & MMU_SYS_RESETDONE)
-			break;
-	} while (!time_after(jiffies, timeout));
-
-	if (!(l & MMU_SYS_RESETDONE)) {
-		dev_err(obj->dev, "can't take mmu out of reset\n");
-		return -ENODEV;
-	}
-
 	l = iommu_read_reg(obj, MMU_REVISION);
 	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
 		 (l >> 4) & 0xf, l & 0xf);
 
-	l = iommu_read_reg(obj, MMU_SYSCONFIG);
-	l &= ~MMU_SYS_IDLE_MASK;
-	l |= (MMU_SYS_IDLE_SMART | MMU_SYS_AUTOIDLE);
-	iommu_write_reg(obj, l, MMU_SYSCONFIG);
-
 	iommu_write_reg(obj, pa, MMU_TTB);
 
 	__iommu_set_twl(obj, true);
@@ -142,7 +109,6 @@
 
 	l &= ~MMU_CNTL_MASK;
 	iommu_write_reg(obj, l, MMU_CNTL);
-	iommu_write_reg(obj, MMU_SYS_IDLE_FORCE, MMU_SYSCONFIG);
 
 	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
 }
@@ -271,8 +237,6 @@
 	char *p = buf;
 
 	pr_reg(REVISION);
-	pr_reg(SYSCONFIG);
-	pr_reg(SYSSTATUS);
 	pr_reg(IRQSTATUS);
 	pr_reg(IRQENABLE);
 	pr_reg(WALKING_ST);

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index c16e8fc..4c9db62 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c

@@ -398,6 +398,7 @@
 	do_gart_setup(gart, NULL);
 
 	gart_handle = gart;
+	bus_set_iommu(&platform_bus_type, &gart_iommu_ops);
 	return 0;
 
 fail:
@@ -450,7 +451,6 @@
 
 static int __devinit tegra_gart_init(void)
 {
-	bus_set_iommu(&platform_bus_type, &gart_iommu_ops);
 	return platform_driver_register(&tegra_gart_driver);
 }
 

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 4252d74..25c1210 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c

@@ -694,10 +694,8 @@
 	*pte = _PTE_VACANT(iova);
 	FLUSH_CPU_DCACHE(pte, page, sizeof(*pte));
 	flush_ptc_and_tlb(as->smmu, as, iova, pte, page, 0);
-	if (!--(*count)) {
+	if (!--(*count))
 		free_ptbl(as, iova);
-		smmu_flush_regs(as->smmu, 0);
-	}
 }
 
 static void __smmu_iommu_map_pfn(struct smmu_as *as, dma_addr_t iova,
@@ -1232,6 +1230,7 @@
 
 	smmu_debugfs_create(smmu);
 	smmu_handle = smmu;
+	bus_set_iommu(&platform_bus_type, &smmu_iommu_ops);
 	return 0;
 }
 
@@ -1276,7 +1275,6 @@
 
 static int __devinit tegra_smmu_init(void)
 {
-	bus_set_iommu(&platform_bus_type, &smmu_iommu_ops);
 	return platform_driver_register(&tegra_smmu_driver);
 }
 

diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 28c99c6..22b720e 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c

@@ -1217,8 +1217,7 @@
 {
 	mISDN_unregister_Bprotocol(&DSP);
 
-	if (timer_pending(&dsp_spl_tl))
-		del_timer(&dsp_spl_tl);
+	del_timer_sync(&dsp_spl_tl);
 
 	if (!list_empty(&dsp_ilist)) {
 		printk(KERN_ERR "mISDN_dsp: Audio DSP object inst list not "

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index b5fdcb7..a5ebc00 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c

@@ -225,7 +225,7 @@
 			 * eventfd (ie. the appropriate virtqueue thread)?
 			 */
 			if (!send_notify_to_eventfd(cpu)) {
-				/* OK, we tell the main Laucher. */
+				/* OK, we tell the main Launcher. */
 				if (put_user(cpu->pending_notify, user))
 					return -EFAULT;
 				return sizeof(cpu->pending_notify);

diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 1963680..9c6b964 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c

@@ -997,7 +997,7 @@
 		       "%02x !\n", id, hdr->id);
 		goto failure;
 	}
-	if (prom_add_property(smu->of_node, prop)) {
+	if (of_add_property(smu->of_node, prop)) {
 		printk(KERN_DEBUG "SMU: Failed creating sdb-partition-%02x "
 		       "property !\n", id);
 		goto failure;

diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c
index b3411ed..fd6ed15 100644
--- a/drivers/macintosh/windfarm_fcu_controls.c
+++ b/drivers/macintosh/windfarm_fcu_controls.c

@@ -593,19 +593,7 @@
 	.id_table	= wf_fcu_id,
 };
 
-static int __init wf_fcu_init(void)
-{
-	return i2c_add_driver(&wf_fcu_driver);
-}
-
-static void __exit wf_fcu_exit(void)
-{
-	i2c_del_driver(&wf_fcu_driver);
-}
-
-
-module_init(wf_fcu_init);
-module_exit(wf_fcu_exit);
+module_i2c_driver(wf_fcu_driver);
 
 MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
 MODULE_DESCRIPTION("FCU control objects for PowerMacs thermal control");

diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index b0c2d36..9ef32b3 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c

@@ -174,19 +174,7 @@
 	.id_table	= wf_lm75_id,
 };
 
-static int __init wf_lm75_sensor_init(void)
-{
-	return i2c_add_driver(&wf_lm75_driver);
-}
-
-static void __exit wf_lm75_sensor_exit(void)
-{
-	i2c_del_driver(&wf_lm75_driver);
-}
-
-
-module_init(wf_lm75_sensor_init);
-module_exit(wf_lm75_sensor_exit);
+module_i2c_driver(wf_lm75_driver);
 
 MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
 MODULE_DESCRIPTION("LM75 sensor objects for PowerMacs thermal control");

diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c
index 371b058..945a25b 100644
--- a/drivers/macintosh/windfarm_max6690_sensor.c
+++ b/drivers/macintosh/windfarm_max6690_sensor.c

@@ -130,18 +130,7 @@
 	.id_table	= wf_max6690_id,
 };
 
-static int __init wf_max6690_sensor_init(void)
-{
-	return i2c_add_driver(&wf_max6690_driver);
-}
-
-static void __exit wf_max6690_sensor_exit(void)
-{
-	i2c_del_driver(&wf_max6690_driver);
-}
-
-module_init(wf_max6690_sensor_init);
-module_exit(wf_max6690_sensor_exit);
+module_i2c_driver(wf_max6690_driver);
 
 MODULE_AUTHOR("Paul Mackerras <paulus@samba.org>");
 MODULE_DESCRIPTION("MAX6690 sensor objects for PowerMac thermal control");

diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index 426e810..d87f5ee 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c

@@ -364,18 +364,7 @@
 	.id_table	= wf_sat_id,
 };
 
-static int __init sat_sensors_init(void)
-{
-	return i2c_add_driver(&wf_sat_driver);
-}
-
-static void __exit sat_sensors_exit(void)
-{
-	i2c_del_driver(&wf_sat_driver);
-}
-
-module_init(sat_sensors_init);
-module_exit(sat_sensors_exit);
+module_i2c_driver(wf_sat_driver);
 
 MODULE_AUTHOR("Paul Mackerras <paulus@samba.org>");
 MODULE_DESCRIPTION("SMU satellite sensors for PowerMac thermal control");

diff --git a/drivers/md/md.c b/drivers/md/md.c
index bd8bf09..3db3d1b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c

@@ -452,7 +452,7 @@
 	spin_lock_irq(&mddev->write_lock);
 	wait_event_lock_irq(mddev->sb_wait,
 			    !mddev->flush_bio,
-			    mddev->write_lock, /*nothing*/);
+			    mddev->write_lock);
 	mddev->flush_bio = bio;
 	spin_unlock_irq(&mddev->write_lock);
 
@@ -1414,12 +1414,11 @@
 	unsigned long long newcsum;
 	int size = 256 + le32_to_cpu(sb->max_dev)*2;
 	__le32 *isuper = (__le32*)sb;
-	int i;
 
 	disk_csum = sb->sb_csum;
 	sb->sb_csum = 0;
 	newcsum = 0;
-	for (i=0; size>=4; size -= 4 )
+	for (; size >= 4; size -= 4)
 		newcsum += le32_to_cpu(*isuper++);
 
 	if (size == 2)
@@ -4753,6 +4752,8 @@
 	}
 	mddev_get(mddev);
 	spin_unlock(&all_mddevs_lock);
+	if (entry->store == new_dev_store)
+		flush_workqueue(md_misc_wq);
 	rv = mddev_lock(mddev);
 	if (!rv) {
 		rv = entry->store(mddev, page, length);
@@ -6346,24 +6347,23 @@
 	 * Commands dealing with the RAID driver but not any
 	 * particular array:
 	 */
-	switch (cmd)
-	{
-		case RAID_VERSION:
-			err = get_version(argp);
-			goto done;
+	switch (cmd) {
+	case RAID_VERSION:
+		err = get_version(argp);
+		goto done;
 
-		case PRINT_RAID_DEBUG:
-			err = 0;
-			md_print_devices();
-			goto done;
+	case PRINT_RAID_DEBUG:
+		err = 0;
+		md_print_devices();
+		goto done;
 
 #ifndef MODULE
-		case RAID_AUTORUN:
-			err = 0;
-			autostart_arrays(arg);
-			goto done;
+	case RAID_AUTORUN:
+		err = 0;
+		autostart_arrays(arg);
+		goto done;
 #endif
-		default:;
+	default:;
 	}
 
 	/*
@@ -6398,6 +6398,10 @@
 		goto abort;
 	}
 
+	if (cmd == ADD_NEW_DISK)
+		/* need to ensure md_delayed_delete() has completed */
+		flush_workqueue(md_misc_wq);
+
 	err = mddev_lock(mddev);
 	if (err) {
 		printk(KERN_INFO 
@@ -6406,50 +6410,44 @@
 		goto abort;
 	}
 
-	switch (cmd)
-	{
-		case SET_ARRAY_INFO:
-			{
-				mdu_array_info_t info;
-				if (!arg)
-					memset(&info, 0, sizeof(info));
-				else if (copy_from_user(&info, argp, sizeof(info))) {
-					err = -EFAULT;
-					goto abort_unlock;
-				}
-				if (mddev->pers) {
-					err = update_array_info(mddev, &info);
-					if (err) {
-						printk(KERN_WARNING "md: couldn't update"
-						       " array info. %d\n", err);
-						goto abort_unlock;
-					}
-					goto done_unlock;
-				}
-				if (!list_empty(&mddev->disks)) {
-					printk(KERN_WARNING
-					       "md: array %s already has disks!\n",
-					       mdname(mddev));
-					err = -EBUSY;
-					goto abort_unlock;
-				}
-				if (mddev->raid_disks) {
-					printk(KERN_WARNING
-					       "md: array %s already initialised!\n",
-					       mdname(mddev));
-					err = -EBUSY;
-					goto abort_unlock;
-				}
-				err = set_array_info(mddev, &info);
-				if (err) {
-					printk(KERN_WARNING "md: couldn't set"
-					       " array info. %d\n", err);
-					goto abort_unlock;
-				}
+	if (cmd == SET_ARRAY_INFO) {
+		mdu_array_info_t info;
+		if (!arg)
+			memset(&info, 0, sizeof(info));
+		else if (copy_from_user(&info, argp, sizeof(info))) {
+			err = -EFAULT;
+			goto abort_unlock;
+		}
+		if (mddev->pers) {
+			err = update_array_info(mddev, &info);
+			if (err) {
+				printk(KERN_WARNING "md: couldn't update"
+				       " array info. %d\n", err);
+				goto abort_unlock;
 			}
 			goto done_unlock;
-
-		default:;
+		}
+		if (!list_empty(&mddev->disks)) {
+			printk(KERN_WARNING
+			       "md: array %s already has disks!\n",
+			       mdname(mddev));
+			err = -EBUSY;
+			goto abort_unlock;
+		}
+		if (mddev->raid_disks) {
+			printk(KERN_WARNING
+			       "md: array %s already initialised!\n",
+			       mdname(mddev));
+			err = -EBUSY;
+			goto abort_unlock;
+		}
+		err = set_array_info(mddev, &info);
+		if (err) {
+			printk(KERN_WARNING "md: couldn't set"
+			       " array info. %d\n", err);
+			goto abort_unlock;
+		}
+		goto done_unlock;
 	}
 
 	/*
@@ -6468,52 +6466,51 @@
 	/*
 	 * Commands even a read-only array can execute:
 	 */
-	switch (cmd)
-	{
-		case GET_BITMAP_FILE:
-			err = get_bitmap_file(mddev, argp);
+	switch (cmd) {
+	case GET_BITMAP_FILE:
+		err = get_bitmap_file(mddev, argp);
+		goto done_unlock;
+
+	case RESTART_ARRAY_RW:
+		err = restart_array(mddev);
+		goto done_unlock;
+
+	case STOP_ARRAY:
+		err = do_md_stop(mddev, 0, bdev);
+		goto done_unlock;
+
+	case STOP_ARRAY_RO:
+		err = md_set_readonly(mddev, bdev);
+		goto done_unlock;
+
+	case BLKROSET:
+		if (get_user(ro, (int __user *)(arg))) {
+			err = -EFAULT;
+			goto done_unlock;
+		}
+		err = -EINVAL;
+
+		/* if the bdev is going readonly the value of mddev->ro
+		 * does not matter, no writes are coming
+		 */
+		if (ro)
 			goto done_unlock;
 
-		case RESTART_ARRAY_RW:
+		/* are we are already prepared for writes? */
+		if (mddev->ro != 1)
+			goto done_unlock;
+
+		/* transitioning to readauto need only happen for
+		 * arrays that call md_write_start
+		 */
+		if (mddev->pers) {
 			err = restart_array(mddev);
-			goto done_unlock;
-
-		case STOP_ARRAY:
-			err = do_md_stop(mddev, 0, bdev);
-			goto done_unlock;
-
-		case STOP_ARRAY_RO:
-			err = md_set_readonly(mddev, bdev);
-			goto done_unlock;
-
-		case BLKROSET:
-			if (get_user(ro, (int __user *)(arg))) {
-				err = -EFAULT;
-				goto done_unlock;
+			if (err == 0) {
+				mddev->ro = 2;
+				set_disk_ro(mddev->gendisk, 0);
 			}
-			err = -EINVAL;
-
-			/* if the bdev is going readonly the value of mddev->ro
-			 * does not matter, no writes are coming
-			 */
-			if (ro)
-				goto done_unlock;
-
-			/* are we are already prepared for writes? */
-			if (mddev->ro != 1)
-				goto done_unlock;
-
-			/* transitioning to readauto need only happen for
-			 * arrays that call md_write_start
-			 */
-			if (mddev->pers) {
-				err = restart_array(mddev);
-				if (err == 0) {
-					mddev->ro = 2;
-					set_disk_ro(mddev->gendisk, 0);
-				}
-			}
-			goto done_unlock;
+		}
+		goto done_unlock;
 	}
 
 	/*
@@ -6535,37 +6532,36 @@
 		}
 	}
 
-	switch (cmd)
+	switch (cmd) {
+	case ADD_NEW_DISK:
 	{
-		case ADD_NEW_DISK:
-		{
-			mdu_disk_info_t info;
-			if (copy_from_user(&info, argp, sizeof(info)))
-				err = -EFAULT;
-			else
-				err = add_new_disk(mddev, &info);
-			goto done_unlock;
-		}
+		mdu_disk_info_t info;
+		if (copy_from_user(&info, argp, sizeof(info)))
+			err = -EFAULT;
+		else
+			err = add_new_disk(mddev, &info);
+		goto done_unlock;
+	}
 
-		case HOT_REMOVE_DISK:
-			err = hot_remove_disk(mddev, new_decode_dev(arg));
-			goto done_unlock;
+	case HOT_REMOVE_DISK:
+		err = hot_remove_disk(mddev, new_decode_dev(arg));
+		goto done_unlock;
 
-		case HOT_ADD_DISK:
-			err = hot_add_disk(mddev, new_decode_dev(arg));
-			goto done_unlock;
+	case HOT_ADD_DISK:
+		err = hot_add_disk(mddev, new_decode_dev(arg));
+		goto done_unlock;
 
-		case RUN_ARRAY:
-			err = do_md_run(mddev);
-			goto done_unlock;
+	case RUN_ARRAY:
+		err = do_md_run(mddev);
+		goto done_unlock;
 
-		case SET_BITMAP_FILE:
-			err = set_bitmap_file(mddev, (int)arg);
-			goto done_unlock;
+	case SET_BITMAP_FILE:
+		err = set_bitmap_file(mddev, (int)arg);
+		goto done_unlock;
 
-		default:
-			err = -EINVAL;
-			goto abort_unlock;
+	default:
+		err = -EINVAL;
+		goto abort_unlock;
 	}
 
 done_unlock:
@@ -7184,6 +7180,7 @@
 	wake_up(&mddev->recovery_wait);
 	if (!ok) {
 		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+		set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 		// stop recovery, signal do_sync ....
 	}
@@ -7281,6 +7278,7 @@
 
 #define SYNC_MARKS	10
 #define	SYNC_MARK_STEP	(3*HZ)
+#define UPDATE_FREQUENCY (5*60*HZ)
 void md_do_sync(struct md_thread *thread)
 {
 	struct mddev *mddev = thread->mddev;
@@ -7289,6 +7287,7 @@
 		 window;
 	sector_t max_sectors,j, io_sectors;
 	unsigned long mark[SYNC_MARKS];
+	unsigned long update_time;
 	sector_t mark_cnt[SYNC_MARKS];
 	int last_mark,m;
 	struct list_head *tmp;
@@ -7448,6 +7447,7 @@
 	mddev->curr_resync_completed = j;
 	sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 	md_new_event(mddev);
+	update_time = jiffies;
 
 	blk_start_plug(&plug);
 	while (j < max_sectors) {
@@ -7459,6 +7459,7 @@
 		    ((mddev->curr_resync > mddev->curr_resync_completed &&
 		      (mddev->curr_resync - mddev->curr_resync_completed)
 		      > (max_sectors >> 4)) ||
+		     time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
 		     (j - mddev->curr_resync_completed)*2
 		     >= mddev->resync_max - mddev->curr_resync_completed
 			    )) {
@@ -7466,6 +7467,10 @@
 			wait_event(mddev->recovery_wait,
 				   atomic_read(&mddev->recovery_active) == 0);
 			mddev->curr_resync_completed = j;
+			if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
+			    j > mddev->recovery_cp)
+				mddev->recovery_cp = j;
+			update_time = jiffies;
 			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
 			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 		}
@@ -7570,8 +7575,13 @@
 					printk(KERN_INFO
 					       "md: checkpointing %s of %s.\n",
 					       desc, mdname(mddev));
-					mddev->recovery_cp =
-						mddev->curr_resync_completed;
+					if (test_bit(MD_RECOVERY_ERROR,
+						&mddev->recovery))
+						mddev->recovery_cp =
+							mddev->curr_resync_completed;
+					else
+						mddev->recovery_cp =
+							mddev->curr_resync;
 				}
 			} else
 				mddev->recovery_cp = MaxSector;

diff --git a/drivers/md/md.h b/drivers/md/md.h
index af443ab..eca59c3 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h

@@ -307,6 +307,7 @@
 	 * REQUEST:  user-space has requested a sync (used with SYNC)
 	 * CHECK:    user-space request for check-only, no repair
 	 * RESHAPE:  A reshape is happening
+	 * ERROR:    sync-action interrupted because io-error
 	 *
 	 * If neither SYNC or RESHAPE are set, then it is a recovery.
 	 */
@@ -320,6 +321,7 @@
 #define	MD_RECOVERY_CHECK	7
 #define MD_RECOVERY_RESHAPE	8
 #define	MD_RECOVERY_FROZEN	9
+#define	MD_RECOVERY_ERROR	10
 
 	unsigned long			recovery;
 	/* If a RAID personality determines that recovery (of a particular
@@ -551,32 +553,6 @@
 
 #define THREAD_WAKEUP  0
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
-do {									\
-	wait_queue_t __wait;						\
-	init_waitqueue_entry(&__wait, current);				\
-									\
-	add_wait_queue(&wq, &__wait);					\
-	for (;;) {							\
-		set_current_state(TASK_UNINTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	current->state = TASK_RUNNING;					\
-	remove_wait_queue(&wq, &__wait);				\
-} while (0)
-
-#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
-do {									\
-	if (condition)	 						\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
-} while (0)
-
 static inline void safe_put_page(struct page *p)
 {
 	if (p) put_page(p);

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a0f7309..d5bddfc 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -822,7 +822,7 @@
 
 	/* Wait until no block IO is waiting */
 	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -830,7 +830,7 @@
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -864,8 +864,7 @@
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -898,10 +897,10 @@
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 	spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(struct r1conf *conf)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c9acbd7..64d4824 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -952,7 +952,7 @@
 
 	/* Wait until no block IO is waiting (unless 'force') */
 	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -960,7 +960,7 @@
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -993,8 +993,7 @@
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -1027,10 +1026,10 @@
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 
 	spin_unlock_irq(&conf->resync_lock);
 }

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3380372..19d77a0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c

@@ -53,6 +53,8 @@
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
+#include <trace/events/block.h>
+
 #include "md.h"
 #include "raid5.h"
 #include "raid0.h"
@@ -182,6 +184,8 @@
 		return_bi = bi->bi_next;
 		bi->bi_next = NULL;
 		bi->bi_size = 0;
+		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+					 bi, 0);
 		bio_endio(bi, 0);
 		bi = return_bi;
 	}
@@ -466,7 +470,7 @@
 	do {
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0 || noquiesce,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
 			if (!conf->inactive_blocked)
@@ -480,8 +484,7 @@
 						    (atomic_read(&conf->active_stripes)
 						     < (conf->max_nr_stripes *3/4)
 						     || !conf->inactive_blocked),
-						    conf->device_lock,
-						    );
+						    conf->device_lock);
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, previous);
@@ -671,6 +674,9 @@
 			bi->bi_next = NULL;
 			if (rrdev)
 				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
+			trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+					      bi, disk_devt(conf->mddev->gendisk),
+					      sh->dev[i].sector);
 			generic_make_request(bi);
 		}
 		if (rrdev) {
@@ -698,6 +704,9 @@
 			rbi->bi_io_vec[0].bv_offset = 0;
 			rbi->bi_size = STRIPE_SIZE;
 			rbi->bi_next = NULL;
+			trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+					      rbi, disk_devt(conf->mddev->gendisk),
+					      sh->dev[i].sector);
 			generic_make_request(rbi);
 		}
 		if (!rdev && !rrdev) {
@@ -1646,8 +1655,7 @@
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    !list_empty(&conf->inactive_list),
-				    conf->device_lock,
-				    );
+				    conf->device_lock);
 		osh = get_free_stripe(conf);
 		spin_unlock_irq(&conf->device_lock);
 		atomic_set(&nsh->count, 1);
@@ -2855,8 +2863,10 @@
 	pr_debug("for sector %llu, rmw=%d rcw=%d\n",
 		(unsigned long long)sh->sector, rmw, rcw);
 	set_bit(STRIPE_HANDLE, &sh->state);
-	if (rmw < rcw && rmw > 0)
+	if (rmw < rcw && rmw > 0) {
 		/* prefer read-modify-write, but need to get some data */
+		blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
+				  (unsigned long long)sh->sector, rmw);
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if ((dev->towrite || i == sh->pd_idx) &&
@@ -2867,7 +2877,7 @@
 				if (
 				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 					pr_debug("Read_old block "
-						"%d for r-m-w\n", i);
+						 "%d for r-m-w\n", i);
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
@@ -2877,8 +2887,10 @@
 				}
 			}
 		}
+	}
 	if (rcw <= rmw && rcw > 0) {
 		/* want reconstruct write, but need to get some data */
+		int qread =0;
 		rcw = 0;
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
@@ -2897,12 +2909,17 @@
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
+					qread++;
 				} else {
 					set_bit(STRIPE_DELAYED, &sh->state);
 					set_bit(STRIPE_HANDLE, &sh->state);
 				}
 			}
 		}
+		if (rcw)
+			blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
+					  (unsigned long long)sh->sector,
+					  rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
 	}
 	/* now if nothing is locked, and if we have enough data,
 	 * we can start a write request
@@ -3224,10 +3241,7 @@
 
 		}
 	/* done submitting copies, wait for them to complete */
-	if (tx) {
-		async_tx_ack(tx);
-		dma_wait_for_async_tx(tx);
-	}
+	async_tx_quiesce(&tx);
 }
 
 /*
@@ -3903,6 +3917,8 @@
 	rdev_dec_pending(rdev, conf->mddev);
 
 	if (!error && uptodate) {
+		trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
+					 raid_bi, 0);
 		bio_endio(raid_bi, 0);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
 			wake_up(&conf->wait_for_stripe);
@@ -4003,10 +4019,13 @@
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
+		trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+				      align_bi, disk_devt(mddev->gendisk),
+				      raid_bio->bi_sector);
 		generic_make_request(align_bi);
 		return 1;
 	} else {
@@ -4081,6 +4100,7 @@
 	struct stripe_head *sh;
 	struct mddev *mddev = cb->cb.data;
 	struct r5conf *conf = mddev->private;
+	int cnt = 0;
 
 	if (cb->list.next && !list_empty(&cb->list)) {
 		spin_lock_irq(&conf->device_lock);
@@ -4095,9 +4115,11 @@
 			smp_mb__before_clear_bit();
 			clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
 			__release_stripe(conf, sh);
+			cnt++;
 		}
 		spin_unlock_irq(&conf->device_lock);
 	}
+	trace_block_unplug(mddev->queue, cnt, !from_schedule);
 	kfree(cb);
 }
 
@@ -4355,6 +4377,8 @@
 		if ( rw == WRITE )
 			md_write_end(mddev);
 
+		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+					 bi, 0);
 		bio_endio(bi, 0);
 	}
 }
@@ -4731,8 +4755,11 @@
 		handled++;
 	}
 	remaining = raid5_dec_bi_active_stripes(raid_bio);
-	if (remaining == 0)
+	if (remaining == 0) {
+		trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
+					 raid_bio, 0);
 		bio_endio(raid_bio, 0);
+	}
 	if (atomic_dec_and_test(&conf->active_aligned_reads))
 		wake_up(&conf->wait_for_stripe);
 	return handled;
@@ -6095,7 +6122,7 @@
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    atomic_read(&conf->active_stripes) == 0 &&
 				    atomic_read(&conf->active_aligned_reads) == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		conf->quiesce = 1;
 		spin_unlock_irq(&conf->device_lock);
 		/* allow reshape to continue */

diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 0c3ced7..164afa7 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c

@@ -792,6 +792,7 @@
 			 * than an unsolicited DID_ABORT.
 			 */
 			sc->result = DID_RESET << 16;
+			break;
 
 		case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
 			if (ioc->bus_type == FC)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b63987c..1c0abd4 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig

@@ -104,6 +104,17 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called ti-ssp.
 
+config MFD_TI_AM335X_TSCADC
+	tristate "TI ADC / Touch Screen chip support"
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_MMIO
+	help
+	  If you say yes here you get support for Texas Instruments series
+	  of Touch Screen /ADC chips.
+	  To compile this driver as a module, choose M here: the
+	  module will be called ti_am335x_tscadc.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
@@ -253,6 +264,20 @@
 	  If you say yes here you get support for the TPS65912 series of
 	  PM chips with SPI interface.
 
+config MFD_TPS80031
+	bool "TI TPS80031/TPS80032 Power Management chips"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	help
+	  If you say yes here you get support for the Texas Instruments
+	  TPS80031/ TPS80032 Fully Integrated Power Management with Power
+	  Path and Battery Charger. The device provides five configurable
+	  step-down converters, 11 general purpose LDOs, USB OTG Module,
+	  ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
+	  Power Path from USB, 32K clock generator.
+
 config MENELAUS
 	bool "Texas Instruments TWL92330/Menelaus PM chip"
 	depends on I2C=y && ARCH_OMAP2
@@ -309,10 +334,10 @@
 
 config TWL6040_CORE
 	bool "Support for TWL6040 audio codec"
-	depends on I2C=y && GENERIC_HARDIRQS
+	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
-	select IRQ_DOMAIN
+	select REGMAP_IRQ
 	default n
 	help
 	  Say yes here if you want support for Texas Instruments TWL6040 audio
@@ -990,6 +1015,7 @@
 	depends on I2C=y && GENERIC_HARDIRQS
 	select MFD_CORE
 	select REGMAP_I2C
+	select REGMAP_IRQ
 	help
 	  If you say yes here you get support for the TPS65090 series of
 	  Power Management chips.
@@ -1034,6 +1060,7 @@
 	bool "STA2X11 multi function device support"
 	depends on STA2X11
 	select MFD_CORE
+	select REGMAP_MMIO
 
 config MFD_SYSCON
 	bool "System Controller Register R/W Based on Regmap"
@@ -1053,6 +1080,38 @@
 	  If you say yes here you get support for the Palmas
 	  series of PMIC chips from Texas Instruments.
 
+config MFD_VIPERBOARD
+        tristate "Support for Nano River Technologies Viperboard"
+	select MFD_CORE
+	depends on USB
+	default n
+	help
+	  Say yes here if you want support for Nano River Technologies
+	  Viperboard.
+	  There are mfd cell drivers available for i2c master, adc and
+	  both gpios found on the board. The spi part does not yet
+	  have a driver.
+	  You need to select the mfd cell drivers separately.
+	  The drivers do not support all features the board exposes.
+
+config MFD_RETU
+	tristate "Support for Retu multi-function device"
+	select MFD_CORE
+	depends on I2C
+	select REGMAP_IRQ
+	help
+	  Retu is a multi-function device found on Nokia Internet Tablets
+	  (770, N800 and N810).
+
+config MFD_AS3711
+	bool "Support for AS3711"
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	depends on I2C=y
+	help
+	  Support for the AS3711 PMIC from AMS
+
 endmenu
 endif
 

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 69f260a..8b977f8 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile

@@ -19,6 +19,7 @@
 obj-$(CONFIG_MFD_DAVINCI_VOICECODEC)	+= davinci_voicecodec.o
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 obj-$(CONFIG_MFD_TI_SSP)	+= ti-ssp.o
+obj-$(CONFIG_MFD_TI_AM335X_TSCADC)	+= ti_am335x_tscadc.o
 
 obj-$(CONFIG_MFD_STA2X11)	+= sta2x11-mfd.o
 obj-$(CONFIG_MFD_STMPE)		+= stmpe.o
@@ -55,18 +56,19 @@
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_TPS6507X)		+= tps6507x.o
 obj-$(CONFIG_MFD_TPS65217)	+= tps65217.o
-obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o tps65910-irq.o
+obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
 tps65912-objs                   := tps65912-core.o tps65912-irq.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
 obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
+obj-$(CONFIG_MFD_TPS80031)	+= tps80031.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
 obj-$(CONFIG_TWL4030_MADC)      += twl4030-madc.o
 obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 obj-$(CONFIG_MFD_TWL4030_AUDIO)	+= twl4030-audio.o
-obj-$(CONFIG_TWL6040_CORE)	+= twl6040-core.o twl6040-irq.o
+obj-$(CONFIG_TWL6040_CORE)	+= twl6040.o
 
 obj-$(CONFIG_MFD_MC13XXX)	+= mc13xxx-core.o
 obj-$(CONFIG_MFD_MC13XXX_SPI)	+= mc13xxx-spi.o
@@ -89,6 +91,7 @@
 
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
 
+obj-$(CONFIG_PMIC_DA9052)	+= da9052-irq.o
 obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
 obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
@@ -137,8 +140,11 @@
 obj-$(CONFIG_MFD_AAT2870_CORE)	+= aat2870-core.o
 obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
+obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
 obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o vexpress-sysreg.o
+obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
+obj-$(CONFIG_MFD_AS3711)	+= as3711.o

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 59da165..e1650ba 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c

@@ -586,38 +586,6 @@
 		return 0;
 }
 
-/* AB8500 GPIO Resources */
-static struct resource __devinitdata ab8500_gpio_resources[] = {
-	{
-		.name	= "GPIO_INT6",
-		.start	= AB8500_INT_GPIO6R,
-		.end	= AB8500_INT_GPIO41F,
-		.flags	= IORESOURCE_IRQ,
-	}
-};
-
-/* AB9540 GPIO Resources */
-static struct resource __devinitdata ab9540_gpio_resources[] = {
-	{
-		.name	= "GPIO_INT6",
-		.start	= AB8500_INT_GPIO6R,
-		.end	= AB8500_INT_GPIO41F,
-		.flags	= IORESOURCE_IRQ,
-	},
-	{
-		.name	= "GPIO_INT14",
-		.start	= AB9540_INT_GPIO50R,
-		.end	= AB9540_INT_GPIO54R,
-		.flags	= IORESOURCE_IRQ,
-	},
-	{
-		.name	= "GPIO_INT15",
-		.start	= AB9540_INT_GPIO50F,
-		.end	= AB9540_INT_GPIO54F,
-		.flags	= IORESOURCE_IRQ,
-	}
-};
-
 static struct resource ab8500_gpadc_resources[] = {
 	{
 		.name	= "HW_CONV_END",
@@ -979,6 +947,10 @@
 		.of_compatible = "stericsson,ab8500-regulator",
 	},
 	{
+		.name = "abx500-clk",
+		.of_compatible = "stericsson,abx500-clk",
+	},
+	{
 		.name = "ab8500-gpadc",
 		.of_compatible = "stericsson,ab8500-gpadc",
 		.num_resources = ARRAY_SIZE(ab8500_gpadc_resources),
@@ -1080,8 +1052,6 @@
 	{
 		.name = "ab8500-gpio",
 		.of_compatible = "stericsson,ab8500-gpio",
-		.num_resources = ARRAY_SIZE(ab8500_gpio_resources),
-		.resources = ab8500_gpio_resources,
 	},
 	{
 		.name = "ab8500-usb",
@@ -1098,8 +1068,6 @@
 static struct mfd_cell ab9540_devs[] = {
 	{
 		.name = "ab8500-gpio",
-		.num_resources = ARRAY_SIZE(ab9540_gpio_resources),
-		.resources = ab9540_gpio_resources,
 	},
 	{
 		.name = "ab9540-usb",
@@ -1284,7 +1252,7 @@
 	int i;
 	u8 value;
 
-	ab8500 = kzalloc(sizeof *ab8500, GFP_KERNEL);
+	ab8500 = devm_kzalloc(&pdev->dev, sizeof *ab8500, GFP_KERNEL);
 	if (!ab8500)
 		return -ENOMEM;
 
@@ -1294,10 +1262,8 @@
 	ab8500->dev = &pdev->dev;
 
 	resource = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!resource) {
-		ret = -ENODEV;
-		goto out_free_ab8500;
-	}
+	if (!resource)
+		return -ENODEV;
 
 	ab8500->irq = resource->start;
 
@@ -1320,7 +1286,7 @@
 		ret = get_register_interruptible(ab8500, AB8500_MISC,
 			AB8500_IC_NAME_REG, &value);
 		if (ret < 0)
-			goto out_free_ab8500;
+			return ret;
 
 		ab8500->version = value;
 	}
@@ -1328,7 +1294,7 @@
 	ret = get_register_interruptible(ab8500, AB8500_MISC,
 		AB8500_REV_REG, &value);
 	if (ret < 0)
-		goto out_free_ab8500;
+		return ret;
 
 	ab8500->chip_id = value;
 
@@ -1345,14 +1311,13 @@
 		ab8500->mask_size = AB8500_NUM_IRQ_REGS;
 		ab8500->irq_reg_offset = ab8500_irq_regoffset;
 	}
-	ab8500->mask = kzalloc(ab8500->mask_size, GFP_KERNEL);
+	ab8500->mask = devm_kzalloc(&pdev->dev, ab8500->mask_size, GFP_KERNEL);
 	if (!ab8500->mask)
 		return -ENOMEM;
-	ab8500->oldmask = kzalloc(ab8500->mask_size, GFP_KERNEL);
-	if (!ab8500->oldmask) {
-		ret = -ENOMEM;
-		goto out_freemask;
-	}
+	ab8500->oldmask = devm_kzalloc(&pdev->dev, ab8500->mask_size, GFP_KERNEL);
+	if (!ab8500->oldmask)
+		return -ENOMEM;
+
 	/*
 	 * ab8500 has switched off due to (SWITCH_OFF_STATUS):
 	 * 0x01 Swoff bit programming
@@ -1406,37 +1371,37 @@
 
 	ret = abx500_register_ops(ab8500->dev, &ab8500_ops);
 	if (ret)
-		goto out_freeoldmask;
+		return ret;
 
 	for (i = 0; i < ab8500->mask_size; i++)
 		ab8500->mask[i] = ab8500->oldmask[i] = 0xff;
 
 	ret = ab8500_irq_init(ab8500, np);
 	if (ret)
-		goto out_freeoldmask;
+		return ret;
 
 	/*  Activate this feature only in ab9540 */
 	/*  till tests are done on ab8500 1p2 or later*/
 	if (is_ab9540(ab8500)) {
-		ret = request_threaded_irq(ab8500->irq, NULL,
-					ab8500_hierarchical_irq,
-					IRQF_ONESHOT | IRQF_NO_SUSPEND,
-					"ab8500", ab8500);
+		ret = devm_request_threaded_irq(&pdev->dev, ab8500->irq, NULL,
+						ab8500_hierarchical_irq,
+						IRQF_ONESHOT | IRQF_NO_SUSPEND,
+						"ab8500", ab8500);
 	}
 	else {
-		ret = request_threaded_irq(ab8500->irq, NULL,
-					ab8500_irq,
-					IRQF_ONESHOT | IRQF_NO_SUSPEND,
-					"ab8500", ab8500);
+		ret = devm_request_threaded_irq(&pdev->dev, ab8500->irq, NULL,
+						ab8500_irq,
+						IRQF_ONESHOT | IRQF_NO_SUSPEND,
+						"ab8500", ab8500);
 		if (ret)
-			goto out_freeoldmask;
+			return ret;
 	}
 
 	ret = mfd_add_devices(ab8500->dev, 0, abx500_common_devs,
 			ARRAY_SIZE(abx500_common_devs), NULL,
 			ab8500->irq_base, ab8500->domain);
 	if (ret)
-		goto out_freeirq;
+		return ret;
 
 	if (is_ab9540(ab8500))
 		ret = mfd_add_devices(ab8500->dev, 0, ab9540_devs,
@@ -1447,14 +1412,14 @@
 				ARRAY_SIZE(ab8500_devs), NULL,
 				ab8500->irq_base, ab8500->domain);
 	if (ret)
-		goto out_freeirq;
+		return ret;
 
 	if (is_ab9540(ab8500) || is_ab8505(ab8500))
 		ret = mfd_add_devices(ab8500->dev, 0, ab9540_ab8505_devs,
 				ARRAY_SIZE(ab9540_ab8505_devs), NULL,
 				ab8500->irq_base, ab8500->domain);
 	if (ret)
-		goto out_freeirq;
+		return ret;
 
 	if (!no_bm) {
 		/* Add battery management devices */
@@ -1475,17 +1440,6 @@
 		dev_err(ab8500->dev, "error creating sysfs entries\n");
 
 	return ret;
-
-out_freeirq:
-	free_irq(ab8500->irq, ab8500);
-out_freeoldmask:
-	kfree(ab8500->oldmask);
-out_freemask:
-	kfree(ab8500->mask);
-out_free_ab8500:
-	kfree(ab8500);
-
-	return ret;
 }
 
 static int ab8500_remove(struct platform_device *pdev)
@@ -1498,11 +1452,6 @@
 		sysfs_remove_group(&ab8500->dev->kobj, &ab8500_attr_group);
 
 	mfd_remove_devices(ab8500->dev);
-	free_irq(ab8500->irq, ab8500);
-
-	kfree(ab8500->oldmask);
-	kfree(ab8500->mask);
-	kfree(ab8500);
 
 	return 0;
 }

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index c784f46..bc8a3ed 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c

@@ -292,6 +292,7 @@
 	struct device *dev = arizona->dev;
 	const char *type_name;
 	unsigned int reg, val;
+	int (*apply_patch)(struct arizona *) = NULL;
 	int ret, i;
 
 	dev_set_drvdata(arizona->dev, arizona);
@@ -391,7 +392,7 @@
 				arizona->type);
 			arizona->type = WM5102;
 		}
-		ret = wm5102_patch(arizona);
+		apply_patch = wm5102_patch;
 		break;
 #endif
 #ifdef CONFIG_MFD_WM5110
@@ -402,7 +403,7 @@
 				arizona->type);
 			arizona->type = WM5110;
 		}
-		ret = wm5110_patch(arizona);
+		apply_patch = wm5110_patch;
 		break;
 #endif
 	default:
@@ -412,9 +413,6 @@
 
 	dev_info(dev, "%s revision %c\n", type_name, arizona->rev + 'A');
 
-	if (ret != 0)
-		dev_err(arizona->dev, "Failed to apply patch: %d\n", ret);
-
 	/* If we have a /RESET GPIO we'll already be reset */
 	if (!arizona->pdata.reset) {
 		regcache_mark_dirty(arizona->regmap);
@@ -438,6 +436,15 @@
 		goto err_reset;
 	}
 
+	if (apply_patch) {
+		ret = apply_patch(arizona);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to apply patch: %d\n",
+				ret);
+			goto err_reset;
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(arizona->pdata.gpio_defaults); i++) {
 		if (!arizona->pdata.gpio_defaults[i])
 			continue;

diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index b1b0091..74713bf 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c

@@ -224,6 +224,7 @@
 	arizona->virq = irq_domain_add_linear(NULL, 2, &arizona_domain_ops,
 					      arizona);
 	if (!arizona->virq) {
+		dev_err(arizona->dev, "Failed to add core IRQ domain\n");
 		ret = -EINVAL;
 		goto err;
 	}

diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c
new file mode 100644
index 0000000..e994c96
--- /dev/null
+++ b/drivers/mfd/as3711.c

@@ -0,0 +1,217 @@
+/*
+ * AS3711 PMIC MFC driver
+ *
+ * Copyright (C) 2012 Renesas Electronics Corporation
+ * Author: Guennadi Liakhovetski, <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License as
+ * published by the Free Software Foundation
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mfd/as3711.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+enum {
+	AS3711_REGULATOR,
+	AS3711_BACKLIGHT,
+};
+
+/*
+ * Ok to have it static: it is only used during probing and multiple I2C devices
+ * cannot be probed simultaneously. Just make sure to avoid stale data.
+ */
+static struct mfd_cell as3711_subdevs[] = {
+	[AS3711_REGULATOR] = {.name = "as3711-regulator",},
+	[AS3711_BACKLIGHT] = {.name = "as3711-backlight",},
+};
+
+static bool as3711_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_GPIO_SIGNAL_IN:
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+	case AS3711_CHARGER_STATUS_1:
+	case AS3711_CHARGER_STATUS_2:
+	case AS3711_REG_STATUS:
+		return true;
+	}
+	return false;
+}
+
+static bool as3711_precious_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+		return true;
+	}
+	return false;
+}
+
+static bool as3711_readable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_SD_1_VOLTAGE:
+	case AS3711_SD_2_VOLTAGE:
+	case AS3711_SD_3_VOLTAGE:
+	case AS3711_SD_4_VOLTAGE:
+	case AS3711_LDO_1_VOLTAGE:
+	case AS3711_LDO_2_VOLTAGE:
+	case AS3711_LDO_3_VOLTAGE:
+	case AS3711_LDO_4_VOLTAGE:
+	case AS3711_LDO_5_VOLTAGE:
+	case AS3711_LDO_6_VOLTAGE:
+	case AS3711_LDO_7_VOLTAGE:
+	case AS3711_LDO_8_VOLTAGE:
+	case AS3711_SD_CONTROL:
+	case AS3711_GPIO_SIGNAL_OUT:
+	case AS3711_GPIO_SIGNAL_IN:
+	case AS3711_SD_CONTROL_1:
+	case AS3711_SD_CONTROL_2:
+	case AS3711_CURR_CONTROL:
+	case AS3711_CURR1_VALUE:
+	case AS3711_CURR2_VALUE:
+	case AS3711_CURR3_VALUE:
+	case AS3711_STEPUP_CONTROL_1:
+	case AS3711_STEPUP_CONTROL_2:
+	case AS3711_STEPUP_CONTROL_4:
+	case AS3711_STEPUP_CONTROL_5:
+	case AS3711_REG_STATUS:
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+	case AS3711_CHARGER_STATUS_1:
+	case AS3711_CHARGER_STATUS_2:
+	case AS3711_ASIC_ID_1:
+	case AS3711_ASIC_ID_2:
+		return true;
+	}
+	return false;
+}
+
+static const struct regmap_config as3711_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.volatile_reg = as3711_volatile_reg,
+	.readable_reg = as3711_readable_reg,
+	.precious_reg = as3711_precious_reg,
+	.max_register = AS3711_MAX_REGS,
+	.num_reg_defaults_raw = AS3711_MAX_REGS,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static int as3711_i2c_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	struct as3711 *as3711;
+	struct as3711_platform_data *pdata = client->dev.platform_data;
+	unsigned int id1, id2;
+	int ret;
+
+	if (!pdata)
+		dev_dbg(&client->dev, "Platform data not found\n");
+
+	as3711 = devm_kzalloc(&client->dev, sizeof(struct as3711), GFP_KERNEL);
+	if (!as3711) {
+		dev_err(&client->dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	as3711->dev = &client->dev;
+	i2c_set_clientdata(client, as3711);
+
+	if (client->irq)
+		dev_notice(&client->dev, "IRQ not supported yet\n");
+
+	as3711->regmap = devm_regmap_init_i2c(client, &as3711_regmap_config);
+	if (IS_ERR(as3711->regmap)) {
+		ret = PTR_ERR(as3711->regmap);
+		dev_err(&client->dev, "regmap initialization failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_1, &id1);
+	if (!ret)
+		ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_2, &id2);
+	if (ret < 0) {
+		dev_err(&client->dev, "regmap_read() failed: %d\n", ret);
+		return ret;
+	}
+	if (id1 != 0x8b)
+		return -ENODEV;
+	dev_info(as3711->dev, "AS3711 detected: %x:%x\n", id1, id2);
+
+	/* We can reuse as3711_subdevs[], it will be copied in mfd_add_devices() */
+	if (pdata) {
+		as3711_subdevs[AS3711_REGULATOR].platform_data = &pdata->regulator;
+		as3711_subdevs[AS3711_REGULATOR].pdata_size = sizeof(pdata->regulator);
+		as3711_subdevs[AS3711_BACKLIGHT].platform_data = &pdata->backlight;
+		as3711_subdevs[AS3711_BACKLIGHT].pdata_size = sizeof(pdata->backlight);
+	} else {
+		as3711_subdevs[AS3711_REGULATOR].platform_data = NULL;
+		as3711_subdevs[AS3711_REGULATOR].pdata_size = 0;
+		as3711_subdevs[AS3711_BACKLIGHT].platform_data = NULL;
+		as3711_subdevs[AS3711_BACKLIGHT].pdata_size = 0;
+	}
+
+	ret = mfd_add_devices(as3711->dev, -1, as3711_subdevs,
+			      ARRAY_SIZE(as3711_subdevs), NULL, 0, NULL);
+	if (ret < 0)
+		dev_err(&client->dev, "add mfd devices failed: %d\n", ret);
+
+	return ret;
+}
+
+static int as3711_i2c_remove(struct i2c_client *client)
+{
+	struct as3711 *as3711 = i2c_get_clientdata(client);
+
+	mfd_remove_devices(as3711->dev);
+	return 0;
+}
+
+static const struct i2c_device_id as3711_i2c_id[] = {
+	{.name = "as3711", .driver_data = 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, as3711_i2c_id);
+
+static struct i2c_driver as3711_i2c_driver = {
+	.driver = {
+		   .name = "as3711",
+		   .owner = THIS_MODULE,
+		   },
+	.probe = as3711_i2c_probe,
+	.remove = as3711_i2c_remove,
+	.id_table = as3711_i2c_id,
+};
+
+static int __init as3711_i2c_init(void)
+{
+	return i2c_add_driver(&as3711_i2c_driver);
+}
+/* Initialise early */
+subsys_initcall(as3711_i2c_init);
+
+static void __exit as3711_i2c_exit(void)
+{
+	i2c_del_driver(&as3711_i2c_driver);
+}
+module_exit(as3711_i2c_exit);
+
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_DESCRIPTION("AS3711 PMIC driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index 689b747..a3c9613 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c

@@ -15,7 +15,6 @@
 #include <linux/delay.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -24,16 +23,6 @@
 #include <linux/mfd/da9052/pdata.h>
 #include <linux/mfd/da9052/reg.h>
 
-#define DA9052_NUM_IRQ_REGS		4
-#define DA9052_IRQ_MASK_POS_1		0x01
-#define DA9052_IRQ_MASK_POS_2		0x02
-#define DA9052_IRQ_MASK_POS_3		0x04
-#define DA9052_IRQ_MASK_POS_4		0x08
-#define DA9052_IRQ_MASK_POS_5		0x10
-#define DA9052_IRQ_MASK_POS_6		0x20
-#define DA9052_IRQ_MASK_POS_7		0x40
-#define DA9052_IRQ_MASK_POS_8		0x80
-
 static bool da9052_reg_readable(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -425,15 +414,6 @@
 }
 EXPORT_SYMBOL_GPL(da9052_adc_manual_read);
 
-static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
-{
-	struct da9052 *da9052 = irq_data;
-
-	complete(&da9052->done);
-
-	return IRQ_HANDLED;
-}
-
 int da9052_adc_read_temp(struct da9052 *da9052)
 {
 	int tbat;
@@ -447,74 +427,6 @@
 }
 EXPORT_SYMBOL_GPL(da9052_adc_read_temp);
 
-static struct resource da9052_rtc_resource = {
-	.name = "ALM",
-	.start = DA9052_IRQ_ALARM,
-	.end   = DA9052_IRQ_ALARM,
-	.flags = IORESOURCE_IRQ,
-};
-
-static struct resource da9052_onkey_resource = {
-	.name = "ONKEY",
-	.start = DA9052_IRQ_NONKEY,
-	.end   = DA9052_IRQ_NONKEY,
-	.flags = IORESOURCE_IRQ,
-};
-
-static struct resource da9052_bat_resources[] = {
-	{
-		.name = "BATT TEMP",
-		.start = DA9052_IRQ_TBAT,
-		.end   = DA9052_IRQ_TBAT,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "DCIN DET",
-		.start = DA9052_IRQ_DCIN,
-		.end   = DA9052_IRQ_DCIN,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "DCIN REM",
-		.start = DA9052_IRQ_DCINREM,
-		.end   = DA9052_IRQ_DCINREM,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "VBUS DET",
-		.start = DA9052_IRQ_VBUS,
-		.end   = DA9052_IRQ_VBUS,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "VBUS REM",
-		.start = DA9052_IRQ_VBUSREM,
-		.end   = DA9052_IRQ_VBUSREM,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "CHG END",
-		.start = DA9052_IRQ_CHGEND,
-		.end   = DA9052_IRQ_CHGEND,
-		.flags = IORESOURCE_IRQ,
-	},
-};
-
-static struct resource da9052_tsi_resources[] = {
-	{
-		.name = "PENDWN",
-		.start = DA9052_IRQ_PENDOWN,
-		.end   = DA9052_IRQ_PENDOWN,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "TSIRDY",
-		.start = DA9052_IRQ_TSIREADY,
-		.end   = DA9052_IRQ_TSIREADY,
-		.flags = IORESOURCE_IRQ,
-	},
-};
-
 static struct mfd_cell da9052_subdev_info[] = {
 	{
 		.name = "da9052-regulator",
@@ -574,13 +486,9 @@
 	},
 	{
 		.name = "da9052-onkey",
-		.resources = &da9052_onkey_resource,
-		.num_resources = 1,
 	},
 	{
 		.name = "da9052-rtc",
-		.resources = &da9052_rtc_resource,
-		.num_resources = 1,
 	},
 	{
 		.name = "da9052-gpio",
@@ -602,160 +510,15 @@
 	},
 	{
 		.name = "da9052-tsi",
-		.resources = da9052_tsi_resources,
-		.num_resources = ARRAY_SIZE(da9052_tsi_resources),
 	},
 	{
 		.name = "da9052-bat",
-		.resources = da9052_bat_resources,
-		.num_resources = ARRAY_SIZE(da9052_bat_resources),
 	},
 	{
 		.name = "da9052-watchdog",
 	},
 };
 
-static struct regmap_irq da9052_irqs[] = {
-	[DA9052_IRQ_DCIN] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_VBUS] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_DCINREM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_VBUSREM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_VDDLOW] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_ALARM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_SEQRDY] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_COMP1V2] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_NONKEY] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_IDFLOAT] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_IDGND] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_CHGEND] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_TBAT] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_ADC_EOM] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_PENDOWN] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_TSIREADY] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_GPI0] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_GPI1] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_GPI2] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_GPI3] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_GPI4] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_GPI5] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_GPI6] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_GPI7] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_GPI8] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_GPI9] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_GPI10] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_GPI11] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_GPI12] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_GPI13] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_GPI14] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_GPI15] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-};
-
-static struct regmap_irq_chip da9052_regmap_irq_chip = {
-	.name = "da9052_irq",
-	.status_base = DA9052_EVENT_A_REG,
-	.mask_base = DA9052_IRQ_MASK_A_REG,
-	.ack_base = DA9052_EVENT_A_REG,
-	.num_regs = DA9052_NUM_IRQ_REGS,
-	.irqs = da9052_irqs,
-	.num_irqs = ARRAY_SIZE(da9052_irqs),
-};
-
 struct regmap_config da9052_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -782,45 +545,31 @@
 
 	da9052->chip_id = chip_id;
 
-	if (!pdata || !pdata->irq_base)
-		da9052->irq_base = -1;
-	else
-		da9052->irq_base = pdata->irq_base;
-
-	ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq,
-				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				  da9052->irq_base, &da9052_regmap_irq_chip,
-				  &da9052->irq_data);
-	if (ret < 0)
-		goto regmap_err;
-
-	da9052->irq_base = regmap_irq_chip_get_base(da9052->irq_data);
-
-	ret = request_threaded_irq(DA9052_IRQ_ADC_EOM, NULL, da9052_auxadc_irq,
-				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				   "adc irq", da9052);
-	if (ret != 0)
-		dev_err(da9052->dev, "DA9052 ADC IRQ failed ret=%d\n", ret);
+	ret = da9052_irq_init(da9052);
+	if (ret != 0) {
+		dev_err(da9052->dev, "da9052_irq_init failed: %d\n", ret);
+		return ret;
+	}
 
 	ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
 			      ARRAY_SIZE(da9052_subdev_info), NULL, 0, NULL);
-	if (ret)
+	if (ret) {
+		dev_err(da9052->dev, "mfd_add_devices failed: %d\n", ret);
 		goto err;
+	}
 
 	return 0;
 
 err:
-	free_irq(DA9052_IRQ_ADC_EOM, da9052);
-	mfd_remove_devices(da9052->dev);
-regmap_err:
+	da9052_irq_exit(da9052);
+
 	return ret;
 }
 
 void da9052_device_exit(struct da9052 *da9052)
 {
-	free_irq(DA9052_IRQ_ADC_EOM, da9052);
-	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
 	mfd_remove_devices(da9052->dev);
+	da9052_irq_exit(da9052);
 }
 
 MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");

diff --git a/drivers/mfd/da9052-irq.c b/drivers/mfd/da9052-irq.c
new file mode 100644
index 0000000..57ae784
--- /dev/null
+++ b/drivers/mfd/da9052-irq.c

@@ -0,0 +1,288 @@
+/*
+ * DA9052 interrupt support
+ *
+ * Author: Fabio Estevam <fabio.estevam@freescale.com>
+ * Based on arizona-irq.c, which is:
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/mfd/da9052/da9052.h>
+#include <linux/mfd/da9052/reg.h>
+
+#define DA9052_NUM_IRQ_REGS		4
+#define DA9052_IRQ_MASK_POS_1		0x01
+#define DA9052_IRQ_MASK_POS_2		0x02
+#define DA9052_IRQ_MASK_POS_3		0x04
+#define DA9052_IRQ_MASK_POS_4		0x08
+#define DA9052_IRQ_MASK_POS_5		0x10
+#define DA9052_IRQ_MASK_POS_6		0x20
+#define DA9052_IRQ_MASK_POS_7		0x40
+#define DA9052_IRQ_MASK_POS_8		0x80
+
+static struct regmap_irq da9052_irqs[] = {
+	[DA9052_IRQ_DCIN] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_VBUS] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_DCINREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_VBUSREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_VDDLOW] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ALARM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_SEQRDY] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_COMP1V2] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_NONKEY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_IDFLOAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_IDGND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_CHGEND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_TBAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ADC_EOM] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_PENDOWN] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_TSIREADY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI0] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI1] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI2] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI3] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI4] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI5] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI6] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI7] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI8] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI9] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI10] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI11] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI12] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI13] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI14] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI15] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+};
+
+static struct regmap_irq_chip da9052_regmap_irq_chip = {
+	.name = "da9052_irq",
+	.status_base = DA9052_EVENT_A_REG,
+	.mask_base = DA9052_IRQ_MASK_A_REG,
+	.ack_base = DA9052_EVENT_A_REG,
+	.num_regs = DA9052_NUM_IRQ_REGS,
+	.irqs = da9052_irqs,
+	.num_irqs = ARRAY_SIZE(da9052_irqs),
+};
+
+static int da9052_map_irq(struct da9052 *da9052, int irq)
+{
+	return regmap_irq_get_virq(da9052->irq_data, irq);
+}
+
+int da9052_enable_irq(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	enable_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_enable_irq);
+
+int da9052_disable_irq(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	disable_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_disable_irq);
+
+int da9052_disable_irq_nosync(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	disable_irq_nosync(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_disable_irq_nosync);
+
+int da9052_request_irq(struct da9052 *da9052, int irq, char *name,
+			   irq_handler_t handler, void *data)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	return request_threaded_irq(irq, NULL, handler,
+				     IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				     name, data);
+}
+EXPORT_SYMBOL_GPL(da9052_request_irq);
+
+void da9052_free_irq(struct da9052 *da9052, int irq, void *data)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return;
+
+	free_irq(irq, data);
+}
+EXPORT_SYMBOL_GPL(da9052_free_irq);
+
+static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
+{
+	struct da9052 *da9052 = irq_data;
+
+	complete(&da9052->done);
+
+	return IRQ_HANDLED;
+}
+
+int da9052_irq_init(struct da9052 *da9052)
+{
+	int ret;
+
+	ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq,
+				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				  -1, &da9052_regmap_irq_chip,
+				  &da9052->irq_data);
+	if (ret < 0) {
+		dev_err(da9052->dev, "regmap_add_irq_chip failed: %d\n", ret);
+		goto regmap_err;
+	}
+
+	ret = da9052_request_irq(da9052, DA9052_IRQ_ADC_EOM, "adc-irq",
+			    da9052_auxadc_irq, da9052);
+
+	if (ret != 0) {
+		dev_err(da9052->dev, "DA9052_IRQ_ADC_EOM failed: %d\n", ret);
+		goto request_irq_err;
+	}
+
+	return 0;
+
+request_irq_err:
+	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
+regmap_err:
+	return ret;
+
+}
+
+int da9052_irq_exit(struct da9052 *da9052)
+{
+	da9052_free_irq(da9052, DA9052_IRQ_ADC_EOM , da9052);
+	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
+
+	return 0;
+}

diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 2971056..dc8826d 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c

@@ -2763,7 +2763,7 @@
 
 void __init db8500_prcmu_early_init(void)
 {
-	if (cpu_is_u8500v2()) {
+	if (cpu_is_u8500v2() || cpu_is_u9540()) {
 		void *tcpm_base = ioremap_nocache(U8500_PRCMU_TCPM_BASE, SZ_4K);
 
 		if (tcpm_base != NULL) {
@@ -2781,7 +2781,11 @@
 			iounmap(tcpm_base);
 		}
 
-		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
+		if (cpu_is_u9540())
+			tcdm_base = ioremap_nocache(U8500_PRCMU_TCDM_BASE,
+						SZ_4K + SZ_8K) + SZ_8K;
+		else
+			tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
 	} else {
 		pr_err("prcmu: Unsupported chip version\n");
 		BUG();

diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index 0b8b55b..e80587f 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c

@@ -211,7 +211,7 @@
 	int ret;
 	int irq_base;
 
-	adc = kmalloc(sizeof(*adc), GFP_KERNEL);
+	adc = devm_kzalloc(&pdev->dev, sizeof(*adc), GFP_KERNEL);
 	if (!adc) {
 		dev_err(&pdev->dev, "Failed to allocate driver structure\n");
 		return -ENOMEM;
@@ -221,30 +221,27 @@
 	if (adc->irq < 0) {
 		ret = adc->irq;
 		dev_err(&pdev->dev, "Failed to get platform irq: %d\n", ret);
-		goto err_free;
+		return ret;
 	}
 
 	irq_base = platform_get_irq(pdev, 1);
 	if (irq_base < 0) {
-		ret = irq_base;
-		dev_err(&pdev->dev, "Failed to get irq base: %d\n", ret);
-		goto err_free;
+		dev_err(&pdev->dev, "Failed to get irq base: %d\n", irq_base);
+		return irq_base;
 	}
 
 	mem_base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem_base) {
-		ret = -ENOENT;
 		dev_err(&pdev->dev, "Failed to get platform mmio resource\n");
-		goto err_free;
+		return -ENOENT;
 	}
 
 	/* Only request the shared registers for the MFD driver */
 	adc->mem = request_mem_region(mem_base->start, JZ_REG_ADC_STATUS,
 					pdev->name);
 	if (!adc->mem) {
-		ret = -EBUSY;
 		dev_err(&pdev->dev, "Failed to request mmio memory region\n");
-		goto err_free;
+		return -EBUSY;
 	}
 
 	adc->base = ioremap_nocache(adc->mem->start, resource_size(adc->mem));
@@ -301,9 +298,6 @@
 	iounmap(adc->base);
 err_release_mem_region:
 	release_mem_region(adc->mem->start, resource_size(adc->mem));
-err_free:
-	kfree(adc);
-
 	return ret;
 }
 
@@ -325,8 +319,6 @@
 
 	platform_set_drvdata(pdev, NULL);
 
-	kfree(adc);
-
 	return 0;
 }
 

diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index 2ad24ca..d9d9303 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c

@@ -734,7 +734,7 @@
 	pci_read_config_dword(dev, ACPIBASE, &base_addr_cfg);
 	base_addr = base_addr_cfg & 0x0000ff80;
 	if (!base_addr) {
-		dev_err(&dev->dev, "I/O space for ACPI uninitialized\n");
+		dev_notice(&dev->dev, "I/O space for ACPI uninitialized\n");
 		lpc_ich_cells[LPC_GPIO].num_resources--;
 		goto gpe0_done;
 	}
@@ -760,7 +760,7 @@
 	pci_read_config_dword(dev, GPIOBASE, &base_addr_cfg);
 	base_addr = base_addr_cfg & 0x0000ff80;
 	if (!base_addr) {
-		dev_err(&dev->dev, "I/O space for GPIO uninitialized\n");
+		dev_notice(&dev->dev, "I/O space for GPIO uninitialized\n");
 		ret = -ENODEV;
 		goto gpio_done;
 	}
@@ -810,7 +810,7 @@
 	pci_read_config_dword(dev, ACPIBASE, &base_addr_cfg);
 	base_addr = base_addr_cfg & 0x0000ff80;
 	if (!base_addr) {
-		dev_err(&dev->dev, "I/O space for ACPI uninitialized\n");
+		dev_notice(&dev->dev, "I/O space for ACPI uninitialized\n");
 		ret = -ENODEV;
 		goto wdt_done;
 	}
@@ -830,12 +830,15 @@
 	 * we have to read RCBA from PCI Config space 0xf0 and use
 	 * it as base. GCS = RCBA + ICH6_GCS(0x3410).
 	 */
-	if (lpc_chipset_info[id->driver_data].iTCO_version == 2) {
+	if (lpc_chipset_info[id->driver_data].iTCO_version == 1) {
+		/* Don't register iomem for TCO ver 1 */
+		lpc_ich_cells[LPC_WDT].num_resources--;
+	} else {
 		pci_read_config_dword(dev, RCBABASE, &base_addr_cfg);
 		base_addr = base_addr_cfg & 0xffffc000;
 		if (!(base_addr_cfg & 1)) {
-			pr_err("RCBA is disabled by hardware/BIOS, "
-					"device disabled\n");
+			dev_notice(&dev->dev, "RCBA is disabled by "
+					"hardware/BIOS, device disabled\n");
 			ret = -ENODEV;
 			goto wdt_done;
 		}
@@ -871,6 +874,7 @@
 	 * successfully.
 	 */
 	if (!cell_added) {
+		dev_warn(&dev->dev, "No MFD cells added\n");
 		lpc_ich_restore_config_space(dev);
 		return -ENODEV;
 	}

diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index 1aba023..2a9b100 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c

@@ -119,6 +119,11 @@
 #define MC13XXX_REVISION_FAB		(0x03 << 11)
 #define MC13XXX_REVISION_ICIDCODE	(0x3f << 13)
 
+#define MC34708_REVISION_REVMETAL	(0x07 <<  0)
+#define MC34708_REVISION_REVFULL	(0x07 <<  3)
+#define MC34708_REVISION_FIN		(0x07 <<  6)
+#define MC34708_REVISION_FAB		(0x07 <<  9)
+
 #define MC13XXX_ADC1		44
 #define MC13XXX_ADC1_ADEN		(1 << 0)
 #define MC13XXX_ADC1_RAND		(1 << 1)
@@ -410,62 +415,52 @@
 	return IRQ_RETVAL(handled);
 }
 
-static const char *mc13xxx_chipname[] = {
-	[MC13XXX_ID_MC13783] = "mc13783",
-	[MC13XXX_ID_MC13892] = "mc13892",
-};
-
 #define maskval(reg, mask)	(((reg) & (mask)) >> __ffs(mask))
-static int mc13xxx_identify(struct mc13xxx *mc13xxx)
+static void mc13xxx_print_revision(struct mc13xxx *mc13xxx, u32 revision)
 {
-	u32 icid;
-	u32 revision;
-	int ret;
-
-	/*
-	 * Get the generation ID from register 46, as apparently some older
-	 * IC revisions only have this info at this location. Newer ICs seem to
-	 * have both.
-	 */
-	ret = mc13xxx_reg_read(mc13xxx, 46, &icid);
-	if (ret)
-		return ret;
-
-	icid = (icid >> 6) & 0x7;
-
-	switch (icid) {
-	case 2:
-		mc13xxx->ictype = MC13XXX_ID_MC13783;
-		break;
-	case 7:
-		mc13xxx->ictype = MC13XXX_ID_MC13892;
-		break;
-	default:
-		mc13xxx->ictype = MC13XXX_ID_INVALID;
-		break;
-	}
-
-	if (mc13xxx->ictype == MC13XXX_ID_MC13783 ||
-			mc13xxx->ictype == MC13XXX_ID_MC13892) {
-		ret = mc13xxx_reg_read(mc13xxx, MC13XXX_REVISION, &revision);
-
-		dev_info(mc13xxx->dev, "%s: rev: %d.%d, "
-				"fin: %d, fab: %d, icid: %d/%d\n",
-				mc13xxx_chipname[mc13xxx->ictype],
-				maskval(revision, MC13XXX_REVISION_REVFULL),
-				maskval(revision, MC13XXX_REVISION_REVMETAL),
-				maskval(revision, MC13XXX_REVISION_FIN),
-				maskval(revision, MC13XXX_REVISION_FAB),
-				maskval(revision, MC13XXX_REVISION_ICID),
-				maskval(revision, MC13XXX_REVISION_ICIDCODE));
-	}
-
-	return (mc13xxx->ictype == MC13XXX_ID_INVALID) ? -ENODEV : 0;
+	dev_info(mc13xxx->dev, "%s: rev: %d.%d, "
+			"fin: %d, fab: %d, icid: %d/%d\n",
+			mc13xxx->variant->name,
+			maskval(revision, MC13XXX_REVISION_REVFULL),
+			maskval(revision, MC13XXX_REVISION_REVMETAL),
+			maskval(revision, MC13XXX_REVISION_FIN),
+			maskval(revision, MC13XXX_REVISION_FAB),
+			maskval(revision, MC13XXX_REVISION_ICID),
+			maskval(revision, MC13XXX_REVISION_ICIDCODE));
 }
 
+static void mc34708_print_revision(struct mc13xxx *mc13xxx, u32 revision)
+{
+	dev_info(mc13xxx->dev, "%s: rev %d.%d, fin: %d, fab: %d\n",
+			mc13xxx->variant->name,
+			maskval(revision, MC34708_REVISION_REVFULL),
+			maskval(revision, MC34708_REVISION_REVMETAL),
+			maskval(revision, MC34708_REVISION_FIN),
+			maskval(revision, MC34708_REVISION_FAB));
+}
+
+/* These are only exported for mc13xxx-i2c and mc13xxx-spi */
+struct mc13xxx_variant mc13xxx_variant_mc13783 = {
+	.name = "mc13783",
+	.print_revision = mc13xxx_print_revision,
+};
+EXPORT_SYMBOL_GPL(mc13xxx_variant_mc13783);
+
+struct mc13xxx_variant mc13xxx_variant_mc13892 = {
+	.name = "mc13892",
+	.print_revision = mc13xxx_print_revision,
+};
+EXPORT_SYMBOL_GPL(mc13xxx_variant_mc13892);
+
+struct mc13xxx_variant mc13xxx_variant_mc34708 = {
+	.name = "mc34708",
+	.print_revision = mc34708_print_revision,
+};
+EXPORT_SYMBOL_GPL(mc13xxx_variant_mc34708);
+
 static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx)
 {
-	return mc13xxx_chipname[mc13xxx->ictype];
+	return mc13xxx->variant->name;
 }
 
 int mc13xxx_get_flags(struct mc13xxx *mc13xxx)
@@ -653,13 +648,16 @@
 		struct mc13xxx_platform_data *pdata, int irq)
 {
 	int ret;
+	u32 revision;
 
 	mc13xxx_lock(mc13xxx);
 
-	ret = mc13xxx_identify(mc13xxx);
+	ret = mc13xxx_reg_read(mc13xxx, MC13XXX_REVISION, &revision);
 	if (ret)
 		goto err_revision;
 
+	mc13xxx->variant->print_revision(mc13xxx, revision);
+
 	/* mask all irqs */
 	ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK0, 0x00ffffff);
 	if (ret)

diff --git a/drivers/mfd/mc13xxx-i2c.c b/drivers/mfd/mc13xxx-i2c.c
index 7957999..f745e27 100644
--- a/drivers/mfd/mc13xxx-i2c.c
+++ b/drivers/mfd/mc13xxx-i2c.c

@@ -24,7 +24,10 @@
 static const struct i2c_device_id mc13xxx_i2c_device_id[] = {
 	{
 		.name = "mc13892",
-		.driver_data = MC13XXX_ID_MC13892,
+		.driver_data = (kernel_ulong_t)&mc13xxx_variant_mc13892,
+	}, {
+		.name = "mc34708",
+		.driver_data = (kernel_ulong_t)&mc13xxx_variant_mc34708,
 	}, {
 		/* sentinel */
 	}
@@ -34,7 +37,10 @@
 static const struct of_device_id mc13xxx_dt_ids[] = {
 	{
 		.compatible = "fsl,mc13892",
-		.data = (void *) &mc13xxx_i2c_device_id[0],
+		.data = &mc13xxx_variant_mc13892,
+	}, {
+		.compatible = "fsl,mc34708",
+		.data = &mc13xxx_variant_mc34708,
 	}, {
 		/* sentinel */
 	}
@@ -76,11 +82,15 @@
 		return ret;
 	}
 
-	ret = mc13xxx_common_init(mc13xxx, pdata, client->irq);
+	if (client->dev.of_node) {
+		const struct of_device_id *of_id =
+			of_match_device(mc13xxx_dt_ids, &client->dev);
+		mc13xxx->variant = of_id->data;
+	} else {
+		mc13xxx->variant = (void *)id->driver_data;
+	}
 
-	if (ret == 0 && (id->driver_data != mc13xxx->ictype))
-		dev_warn(mc13xxx->dev,
-				"device id doesn't match auto detection!\n");
+	ret = mc13xxx_common_init(mc13xxx, pdata, client->irq);
 
 	return ret;
 }

diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index cb32f69..3032bae 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c

@@ -28,10 +28,13 @@
 static const struct spi_device_id mc13xxx_device_id[] = {
 	{
 		.name = "mc13783",
-		.driver_data = MC13XXX_ID_MC13783,
+		.driver_data = (kernel_ulong_t)&mc13xxx_variant_mc13783,
 	}, {
 		.name = "mc13892",
-		.driver_data = MC13XXX_ID_MC13892,
+		.driver_data = (kernel_ulong_t)&mc13xxx_variant_mc13892,
+	}, {
+		.name = "mc34708",
+		.driver_data = (kernel_ulong_t)&mc13xxx_variant_mc34708,
 	}, {
 		/* sentinel */
 	}
@@ -39,8 +42,9 @@
 MODULE_DEVICE_TABLE(spi, mc13xxx_device_id);
 
 static const struct of_device_id mc13xxx_dt_ids[] = {
-	{ .compatible = "fsl,mc13783", .data = (void *) MC13XXX_ID_MC13783, },
-	{ .compatible = "fsl,mc13892", .data = (void *) MC13XXX_ID_MC13892, },
+	{ .compatible = "fsl,mc13783", .data = &mc13xxx_variant_mc13783, },
+	{ .compatible = "fsl,mc13892", .data = &mc13xxx_variant_mc13892, },
+	{ .compatible = "fsl,mc34708", .data = &mc13xxx_variant_mc34708, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids);
@@ -144,19 +148,18 @@
 		return ret;
 	}
 
-	ret = mc13xxx_common_init(mc13xxx, pdata, spi->irq);
+	if (spi->dev.of_node) {
+		const struct of_device_id *of_id =
+			of_match_device(mc13xxx_dt_ids, &spi->dev);
 
-	if (ret) {
-		dev_set_drvdata(&spi->dev, NULL);
+		mc13xxx->variant = of_id->data;
 	} else {
-		const struct spi_device_id *devid =
-			spi_get_device_id(spi);
-		if (!devid || devid->driver_data != mc13xxx->ictype)
-			dev_warn(mc13xxx->dev,
-				"device id doesn't match auto detection!\n");
+		const struct spi_device_id *id_entry = spi_get_device_id(spi);
+
+		mc13xxx->variant = (void *)id_entry->driver_data;
 	}
 
-	return ret;
+	return mc13xxx_common_init(mc13xxx, pdata, spi->irq);
 }
 
 static int mc13xxx_spi_remove(struct spi_device *spi)

diff --git a/drivers/mfd/mc13xxx.h b/drivers/mfd/mc13xxx.h
index bbba06f..460ec5c 100644
--- a/drivers/mfd/mc13xxx.h
+++ b/drivers/mfd/mc13xxx.h

@@ -13,19 +13,25 @@
 #include <linux/regmap.h>
 #include <linux/mfd/mc13xxx.h>
 
-enum mc13xxx_id {
-	MC13XXX_ID_MC13783,
-	MC13XXX_ID_MC13892,
-	MC13XXX_ID_INVALID,
+#define MC13XXX_NUMREGS 0x3f
+
+struct mc13xxx;
+
+struct mc13xxx_variant {
+	const char *name;
+	void (*print_revision)(struct mc13xxx *mc13xxx, u32 revision);
 };
 
-#define MC13XXX_NUMREGS 0x3f
+extern struct mc13xxx_variant
+		mc13xxx_variant_mc13783,
+		mc13xxx_variant_mc13892,
+		mc13xxx_variant_mc34708;
 
 struct mc13xxx {
 	struct regmap *regmap;
 
 	struct device *dev;
-	enum mc13xxx_id ictype;
+	const struct mc13xxx_variant *variant;
 
 	struct mutex lock;
 	int irq;

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index f8b7771..7604f4e 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c

@@ -21,6 +21,10 @@
 #include <linux/irqdomain.h>
 #include <linux/of.h>
 
+static struct device_type mfd_dev_type = {
+	.name	= "mfd_device",
+};
+
 int mfd_cell_enable(struct platform_device *pdev)
 {
 	const struct mfd_cell *cell = mfd_get_cell(pdev);
@@ -91,6 +95,7 @@
 		goto fail_device;
 
 	pdev->dev.parent = parent;
+	pdev->dev.type = &mfd_dev_type;
 
 	if (parent->of_node && cell->of_compatible) {
 		for_each_child_of_node(parent->of_node, np) {
@@ -204,10 +209,16 @@
 
 static int mfd_remove_devices_fn(struct device *dev, void *c)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	const struct mfd_cell *cell = mfd_get_cell(pdev);
+	struct platform_device *pdev;
+	const struct mfd_cell *cell;
 	atomic_t **usage_count = c;
 
+	if (dev->type != &mfd_dev_type)
+		return 0;
+
+	pdev = to_platform_device(dev);
+	cell = mfd_get_cell(pdev);
+
 	/* find the base address of usage_count pointers (for freeing) */
 	if (!*usage_count || (cell->usage_count < *usage_count))
 		*usage_count = cell->usage_count;

diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 770a0d0..05164d7 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c

@@ -25,7 +25,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/gpio.h>
-#include <plat/cpu.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/usb-omap.h>
 #include <linux/pm_runtime.h>
@@ -384,7 +383,7 @@
 			reg &= ~OMAP_UHH_HOSTCONFIG_P3_CONNECT_STATUS;
 
 		/* Bypass the TLL module for PHY mode operation */
-		if (cpu_is_omap3430() && (omap_rev() <= OMAP3430_REV_ES2_1)) {
+		if (pdata->single_ulpi_bypass) {
 			dev_dbg(dev, "OMAP3 ES version <= ES2.1\n");
 			if (is_ehci_phy_mode(pdata->port_mode[0]) ||
 				is_ehci_phy_mode(pdata->port_mode[1]) ||

diff --git a/drivers/mfd/rc5t583-irq.c b/drivers/mfd/rc5t583-irq.c
index fe00cdd..b41db59 100644
--- a/drivers/mfd/rc5t583-irq.c
+++ b/drivers/mfd/rc5t583-irq.c

@@ -345,7 +345,7 @@
 	mutex_init(&rc5t583->irq_lock);
 
 	/* Initailize all int register to 0 */
-	for (i = 0; i < RC5T583_MAX_INTERRUPT_MASK_REGS; i++)  {
+	for (i = 0; i < RC5T583_MAX_INTERRUPT_EN_REGS; i++)  {
 		ret = rc5t583_write(rc5t583->dev, irq_en_add[i],
 				rc5t583->irq_en_reg[i]);
 		if (ret < 0)

diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c
new file mode 100644
index 0000000..7ff4a37
--- /dev/null
+++ b/drivers/mfd/retu-mfd.c

@@ -0,0 +1,264 @@
+/*
+ * Retu MFD driver
+ *
+ * Copyright (C) 2004, 2005 Nokia Corporation
+ *
+ * Based on code written by Juha Yrjölä, David Weinehall and Mikko Ylinen.
+ * Rewritten by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/retu.h>
+#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
+
+/* Registers */
+#define RETU_REG_ASICR		0x00		/* ASIC ID and revision */
+#define RETU_REG_ASICR_VILMA	(1 << 7)	/* Bit indicating Vilma */
+#define RETU_REG_IDR		0x01		/* Interrupt ID */
+#define RETU_REG_IMR		0x02		/* Interrupt mask */
+
+/* Interrupt sources */
+#define RETU_INT_PWR		0		/* Power button */
+
+struct retu_dev {
+	struct regmap			*regmap;
+	struct device			*dev;
+	struct mutex			mutex;
+	struct regmap_irq_chip_data	*irq_data;
+};
+
+static struct resource retu_pwrbutton_res[] = {
+	{
+		.name	= "retu-pwrbutton",
+		.start	= RETU_INT_PWR,
+		.end	= RETU_INT_PWR,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell retu_devs[] = {
+	{
+		.name		= "retu-wdt"
+	},
+	{
+		.name		= "retu-pwrbutton",
+		.resources	= retu_pwrbutton_res,
+		.num_resources	= ARRAY_SIZE(retu_pwrbutton_res),
+	}
+};
+
+static struct regmap_irq retu_irqs[] = {
+	[RETU_INT_PWR] = {
+		.mask = 1 << RETU_INT_PWR,
+	}
+};
+
+static struct regmap_irq_chip retu_irq_chip = {
+	.name		= "RETU",
+	.irqs		= retu_irqs,
+	.num_irqs	= ARRAY_SIZE(retu_irqs),
+	.num_regs	= 1,
+	.status_base	= RETU_REG_IDR,
+	.mask_base	= RETU_REG_IMR,
+	.ack_base	= RETU_REG_IDR,
+};
+
+/* Retu device registered for the power off. */
+static struct retu_dev *retu_pm_power_off;
+
+int retu_read(struct retu_dev *rdev, u8 reg)
+{
+	int ret;
+	int value;
+
+	mutex_lock(&rdev->mutex);
+	ret = regmap_read(rdev->regmap, reg, &value);
+	mutex_unlock(&rdev->mutex);
+
+	return ret ? ret : value;
+}
+EXPORT_SYMBOL_GPL(retu_read);
+
+int retu_write(struct retu_dev *rdev, u8 reg, u16 data)
+{
+	int ret;
+
+	mutex_lock(&rdev->mutex);
+	ret = regmap_write(rdev->regmap, reg, data);
+	mutex_unlock(&rdev->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(retu_write);
+
+static void retu_power_off(void)
+{
+	struct retu_dev *rdev = retu_pm_power_off;
+	int reg;
+
+	mutex_lock(&retu_pm_power_off->mutex);
+
+	/* Ignore power button state */
+	regmap_read(rdev->regmap, RETU_REG_CC1, &reg);
+	regmap_write(rdev->regmap, RETU_REG_CC1, reg | 2);
+
+	/* Expire watchdog immediately */
+	regmap_write(rdev->regmap, RETU_REG_WATCHDOG, 0);
+
+	/* Wait for poweroff */
+	for (;;)
+		cpu_relax();
+
+	mutex_unlock(&retu_pm_power_off->mutex);
+}
+
+static int retu_regmap_read(void *context, const void *reg, size_t reg_size,
+			    void *val, size_t val_size)
+{
+	int ret;
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	BUG_ON(reg_size != 1 || val_size != 2);
+
+	ret = i2c_smbus_read_word_data(i2c, *(u8 const *)reg);
+	if (ret < 0)
+		return ret;
+
+	*(u16 *)val = ret;
+	return 0;
+}
+
+static int retu_regmap_write(void *context, const void *data, size_t count)
+{
+	u8 reg;
+	u16 val;
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	BUG_ON(count != sizeof(reg) + sizeof(val));
+	memcpy(&reg, data, sizeof(reg));
+	memcpy(&val, data + sizeof(reg), sizeof(val));
+	return i2c_smbus_write_word_data(i2c, reg, val);
+}
+
+static struct regmap_bus retu_bus = {
+	.read = retu_regmap_read,
+	.write = retu_regmap_write,
+	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+};
+
+static struct regmap_config retu_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+};
+
+static int __devinit retu_probe(struct i2c_client *i2c,
+				const struct i2c_device_id *id)
+{
+	struct retu_dev *rdev;
+	int ret;
+
+	rdev = devm_kzalloc(&i2c->dev, sizeof(*rdev), GFP_KERNEL);
+	if (rdev == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, rdev);
+	rdev->dev = &i2c->dev;
+	mutex_init(&rdev->mutex);
+	rdev->regmap = devm_regmap_init(&i2c->dev, &retu_bus, &i2c->dev,
+					&retu_config);
+	if (IS_ERR(rdev->regmap))
+		return PTR_ERR(rdev->regmap);
+
+	ret = retu_read(rdev, RETU_REG_ASICR);
+	if (ret < 0) {
+		dev_err(rdev->dev, "could not read Retu revision: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(rdev->dev, "Retu%s v%d.%d found\n",
+		 (ret & RETU_REG_ASICR_VILMA) ? " & Vilma" : "",
+		 (ret >> 4) & 0x7, ret & 0xf);
+
+	/* Mask all RETU interrupts. */
+	ret = retu_write(rdev, RETU_REG_IMR, 0xffff);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_add_irq_chip(rdev->regmap, i2c->irq, IRQF_ONESHOT, -1,
+				  &retu_irq_chip, &rdev->irq_data);
+	if (ret < 0)
+		return ret;
+
+	ret = mfd_add_devices(rdev->dev, -1, retu_devs, ARRAY_SIZE(retu_devs),
+			      NULL, regmap_irq_chip_get_base(rdev->irq_data),
+			      NULL);
+	if (ret < 0) {
+		regmap_del_irq_chip(i2c->irq, rdev->irq_data);
+		return ret;
+	}
+
+	if (!pm_power_off) {
+		retu_pm_power_off = rdev;
+		pm_power_off	  = retu_power_off;
+	}
+
+	return 0;
+}
+
+static int __devexit retu_remove(struct i2c_client *i2c)
+{
+	struct retu_dev *rdev = i2c_get_clientdata(i2c);
+
+	if (retu_pm_power_off == rdev) {
+		pm_power_off	  = NULL;
+		retu_pm_power_off = NULL;
+	}
+	mfd_remove_devices(rdev->dev);
+	regmap_del_irq_chip(i2c->irq, rdev->irq_data);
+
+	return 0;
+}
+
+static const struct i2c_device_id retu_id[] = {
+	{ "retu-mfd", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, retu_id);
+
+static struct i2c_driver retu_driver = {
+	.driver		= {
+		.name = "retu-mfd",
+		.owner = THIS_MODULE,
+	},
+	.probe		= retu_probe,
+	.remove		= retu_remove,
+	.id_table	= retu_id,
+};
+module_i2c_driver(retu_driver);
+
+MODULE_DESCRIPTION("Retu MFD driver");
+MODULE_AUTHOR("Juha Yrjölä");
+MODULE_AUTHOR("David Weinehall");
+MODULE_AUTHOR("Mikko Ylinen");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 56d4377..3a44efa 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c

@@ -22,6 +22,7 @@
 
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/highmem.h>
 #include <linux/interrupt.h>

diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index c901fa5..0dd84e9 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c

@@ -24,67 +24,67 @@
 
 static struct regmap_irq s2mps11_irqs[] = {
 	[S2MPS11_IRQ_PWRONF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_PWRONF_MASK,
 	},
 	[S2MPS11_IRQ_PWRONR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_PWRONR_MASK,
 	},
 	[S2MPS11_IRQ_JIGONBF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_JIGONBF_MASK,
 	},
 	[S2MPS11_IRQ_JIGONBR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_JIGONBR_MASK,
 	},
 	[S2MPS11_IRQ_ACOKBF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_ACOKBF_MASK,
 	},
 	[S2MPS11_IRQ_ACOKBR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_ACOKBR_MASK,
 	},
 	[S2MPS11_IRQ_PWRON1S] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_PWRON1S_MASK,
 	},
 	[S2MPS11_IRQ_MRB] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S2MPS11_IRQ_MRB_MASK,
 	},
 	[S2MPS11_IRQ_RTC60S] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S2MPS11_IRQ_RTC60S_MASK,
 	},
 	[S2MPS11_IRQ_RTCA1] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S2MPS11_IRQ_RTCA1_MASK,
 	},
 	[S2MPS11_IRQ_RTCA2] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S2MPS11_IRQ_RTCA2_MASK,
 	},
 	[S2MPS11_IRQ_SMPL] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S2MPS11_IRQ_SMPL_MASK,
 	},
 	[S2MPS11_IRQ_RTC1S] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S2MPS11_IRQ_RTC1S_MASK,
 	},
 	[S2MPS11_IRQ_WTSR] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S2MPS11_IRQ_WTSR_MASK,
 	},
 	[S2MPS11_IRQ_INT120C] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S2MPS11_IRQ_INT120C_MASK,
 	},
 	[S2MPS11_IRQ_INT140C] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S2MPS11_IRQ_INT140C_MASK,
 	},
 };
@@ -92,146 +92,146 @@
 
 static struct regmap_irq s5m8767_irqs[] = {
 	[S5M8767_IRQ_PWRR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8767_IRQ_PWRR_MASK,
 	},
 	[S5M8767_IRQ_PWRF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8767_IRQ_PWRF_MASK,
 	},
 	[S5M8767_IRQ_PWR1S] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8767_IRQ_PWR1S_MASK,
 	},
 	[S5M8767_IRQ_JIGR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8767_IRQ_JIGR_MASK,
 	},
 	[S5M8767_IRQ_JIGF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8767_IRQ_JIGF_MASK,
 	},
 	[S5M8767_IRQ_LOWBAT2] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8767_IRQ_LOWBAT2_MASK,
 	},
 	[S5M8767_IRQ_LOWBAT1] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8767_IRQ_LOWBAT1_MASK,
 	},
 	[S5M8767_IRQ_MRB] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8767_IRQ_MRB_MASK,
 	},
 	[S5M8767_IRQ_DVSOK2] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8767_IRQ_DVSOK2_MASK,
 	},
 	[S5M8767_IRQ_DVSOK3] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8767_IRQ_DVSOK3_MASK,
 	},
 	[S5M8767_IRQ_DVSOK4] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8767_IRQ_DVSOK4_MASK,
 	},
 	[S5M8767_IRQ_RTC60S] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8767_IRQ_RTC60S_MASK,
 	},
 	[S5M8767_IRQ_RTCA1] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8767_IRQ_RTCA1_MASK,
 	},
 	[S5M8767_IRQ_RTCA2] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8767_IRQ_RTCA2_MASK,
 	},
 	[S5M8767_IRQ_SMPL] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8767_IRQ_SMPL_MASK,
 	},
 	[S5M8767_IRQ_RTC1S] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8767_IRQ_RTC1S_MASK,
 	},
 	[S5M8767_IRQ_WTSR] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8767_IRQ_WTSR_MASK,
 	},
 };
 
 static struct regmap_irq s5m8763_irqs[] = {
 	[S5M8763_IRQ_DCINF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8763_IRQ_DCINF_MASK,
 	},
 	[S5M8763_IRQ_DCINR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8763_IRQ_DCINR_MASK,
 	},
 	[S5M8763_IRQ_JIGF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8763_IRQ_JIGF_MASK,
 	},
 	[S5M8763_IRQ_JIGR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8763_IRQ_JIGR_MASK,
 	},
 	[S5M8763_IRQ_PWRONF] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8763_IRQ_PWRONF_MASK,
 	},
 	[S5M8763_IRQ_PWRONR] = {
-		.reg_offset = 1,
+		.reg_offset = 0,
 		.mask = S5M8763_IRQ_PWRONR_MASK,
 	},
 	[S5M8763_IRQ_WTSREVNT] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8763_IRQ_WTSREVNT_MASK,
 	},
 	[S5M8763_IRQ_SMPLEVNT] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8763_IRQ_SMPLEVNT_MASK,
 	},
 	[S5M8763_IRQ_ALARM1] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8763_IRQ_ALARM1_MASK,
 	},
 	[S5M8763_IRQ_ALARM0] = {
-		.reg_offset = 2,
+		.reg_offset = 1,
 		.mask = S5M8763_IRQ_ALARM0_MASK,
 	},
 	[S5M8763_IRQ_ONKEY1S] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8763_IRQ_ONKEY1S_MASK,
 	},
 	[S5M8763_IRQ_TOPOFFR] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8763_IRQ_TOPOFFR_MASK,
 	},
 	[S5M8763_IRQ_DCINOVPR] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8763_IRQ_DCINOVPR_MASK,
 	},
 	[S5M8763_IRQ_CHGRSTF] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8763_IRQ_CHGRSTF_MASK,
 	},
 	[S5M8763_IRQ_DONER] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8763_IRQ_DONER_MASK,
 	},
 	[S5M8763_IRQ_CHGFAULT] = {
-		.reg_offset = 3,
+		.reg_offset = 2,
 		.mask = S5M8763_IRQ_CHGFAULT_MASK,
 	},
 	[S5M8763_IRQ_LOBAT1] = {
-		.reg_offset = 4,
+		.reg_offset = 3,
 		.mask = S5M8763_IRQ_LOBAT1_MASK,
 	},
 	[S5M8763_IRQ_LOBAT2] = {
-		.reg_offset = 4,
+		.reg_offset = 3,
 		.mask = S5M8763_IRQ_LOBAT2_MASK,
 	},
 };

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index d6284ca..1225dcb 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c

@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2009-2011 Wind River Systems, Inc.
- * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini)
+ * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini, Davide Ciminaghi)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -27,21 +27,28 @@
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
-#include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/sta2x11-mfd.h>
+#include <linux/regmap.h>
 
 #include <asm/sta2x11.h>
 
+static inline int __reg_within_range(unsigned int r,
+				     unsigned int start,
+				     unsigned int end)
+{
+	return ((r >= start) && (r <= end));
+}
+
 /* This describes STA2X11 MFD chip for us, we may have several */
 struct sta2x11_mfd {
 	struct sta2x11_instance *instance;
-	spinlock_t lock;
+	struct regmap *regmap[sta2x11_n_mfd_plat_devs];
+	spinlock_t lock[sta2x11_n_mfd_plat_devs];
 	struct list_head list;
-	void __iomem *sctl_regs;
-	void __iomem *apbreg_regs;
+	void __iomem *regs[sta2x11_n_mfd_plat_devs];
 };
 
 static LIST_HEAD(sta2x11_mfd_list);
@@ -71,6 +78,7 @@
 
 static int sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags)
 {
+	int i;
 	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
 	struct sta2x11_instance *instance;
 
@@ -83,7 +91,8 @@
 	if (!mfd)
 		return -ENOMEM;
 	INIT_LIST_HEAD(&mfd->list);
-	spin_lock_init(&mfd->lock);
+	for (i = 0; i < ARRAY_SIZE(mfd->lock); i++)
+		spin_lock_init(&mfd->lock[i]);
 	mfd->instance = instance;
 	list_add(&mfd->list, &sta2x11_mfd_list);
 	return 0;
@@ -100,161 +109,276 @@
 	return 0;
 }
 
-/* These two functions are exported and are not expected to fail */
-u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+/* This function is exported and is not expected to fail */
+u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
+		       enum sta2x11_mfd_plat_dev index)
 {
 	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
 	u32 r;
 	unsigned long flags;
+	void __iomem *regs;
 
 	if (!mfd) {
 		dev_warn(&pdev->dev, ": can't access sctl regs\n");
 		return 0;
 	}
-	if (!mfd->sctl_regs) {
+
+	regs = mfd->regs[index];
+	if (!regs) {
 		dev_warn(&pdev->dev, ": system ctl not initialized\n");
 		return 0;
 	}
-	spin_lock_irqsave(&mfd->lock, flags);
-	r = readl(mfd->sctl_regs + reg);
+	spin_lock_irqsave(&mfd->lock[index], flags);
+	r = readl(regs + reg);
 	r &= ~mask;
 	r |= val;
 	if (mask)
-		writel(r, mfd->sctl_regs + reg);
-	spin_unlock_irqrestore(&mfd->lock, flags);
+		writel(r, regs + reg);
+	spin_unlock_irqrestore(&mfd->lock[index], flags);
 	return r;
 }
-EXPORT_SYMBOL(sta2x11_sctl_mask);
+EXPORT_SYMBOL(__sta2x11_mfd_mask);
 
-u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+int sta2x11_mfd_get_regs_data(struct platform_device *dev,
+			      enum sta2x11_mfd_plat_dev index,
+			      void __iomem **regs,
+			      spinlock_t **lock)
 {
-	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
-	u32 r;
-	unsigned long flags;
+	struct pci_dev *pdev = *(struct pci_dev **)(dev->dev.platform_data);
+	struct sta2x11_mfd *mfd;
 
-	if (!mfd) {
-		dev_warn(&pdev->dev, ": can't access apb regs\n");
-		return 0;
-	}
-	if (!mfd->apbreg_regs) {
-		dev_warn(&pdev->dev, ": apb bridge not initialized\n");
-		return 0;
-	}
-	spin_lock_irqsave(&mfd->lock, flags);
-	r = readl(mfd->apbreg_regs + reg);
-	r &= ~mask;
-	r |= val;
-	if (mask)
-		writel(r, mfd->apbreg_regs + reg);
-	spin_unlock_irqrestore(&mfd->lock, flags);
-	return r;
+	if (!pdev)
+		return -ENODEV;
+	mfd = sta2x11_mfd_find(pdev);
+	if (!mfd)
+		return -ENODEV;
+	if (index >= sta2x11_n_mfd_plat_devs)
+		return -ENODEV;
+	*regs = mfd->regs[index];
+	*lock = &mfd->lock[index];
+	pr_debug("%s %d *regs = %p\n", __func__, __LINE__, *regs);
+	return *regs ? 0 : -ENODEV;
 }
-EXPORT_SYMBOL(sta2x11_apbreg_mask);
+EXPORT_SYMBOL(sta2x11_mfd_get_regs_data);
 
-/* Two debugfs files, for our registers (FIXME: one instance only) */
-#define REG(regname) {.name = #regname, .offset = SCTL_ ## regname}
-static struct debugfs_reg32 sta2x11_sctl_regs[] = {
-	REG(SCCTL), REG(ARMCFG), REG(SCPLLCTL), REG(SCPLLFCTRL),
-	REG(SCRESFRACT), REG(SCRESCTRL1), REG(SCRESXTRL2), REG(SCPEREN0),
-	REG(SCPEREN1), REG(SCPEREN2), REG(SCGRST), REG(SCPCIPMCR1),
-	REG(SCPCIPMCR2), REG(SCPCIPMSR1), REG(SCPCIPMSR2), REG(SCPCIPMSR3),
-	REG(SCINTREN), REG(SCRISR), REG(SCCLKSTAT0), REG(SCCLKSTAT1),
-	REG(SCCLKSTAT2), REG(SCRSTSTA),
-};
-#undef REG
+/*
+ * Special sta2x11-mfd regmap lock/unlock functions
+ */
 
-static struct debugfs_regset32 sctl_regset = {
-	.regs = sta2x11_sctl_regs,
-	.nregs = ARRAY_SIZE(sta2x11_sctl_regs),
+static void sta2x11_regmap_lock(void *__lock)
+{
+	spinlock_t *lock = __lock;
+	spin_lock(lock);
+}
+
+static void sta2x11_regmap_unlock(void *__lock)
+{
+	spinlock_t *lock = __lock;
+	spin_unlock(lock);
+}
+
+/* OTP (one time programmable registers do not require locking */
+static void sta2x11_regmap_nolock(void *__lock)
+{
+}
+
+static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = STA2X11_MFD_SCTL_NAME,
+	[sta2x11_apbreg] = STA2X11_MFD_APBREG_NAME,
+	[sta2x11_apb_soc_regs] = STA2X11_MFD_APB_SOC_REGS_NAME,
+	[sta2x11_scr] = STA2X11_MFD_SCR_NAME,
 };
 
-#define REG(regname) {.name = #regname, .offset = regname}
-static struct debugfs_reg32 sta2x11_apbreg_regs[] = {
-	REG(APBREG_BSR), REG(APBREG_PAER), REG(APBREG_PWAC), REG(APBREG_PRAC),
-	REG(APBREG_PCG), REG(APBREG_PUR), REG(APBREG_EMU_PCG),
-};
-#undef REG
+static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return !__reg_within_range(reg, SCTL_SCPCIECSBRST, SCTL_SCRSTSTA);
+}
 
-static struct debugfs_regset32 apbreg_regset = {
-	.regs = sta2x11_apbreg_regs,
-	.nregs = ARRAY_SIZE(sta2x11_apbreg_regs),
+static struct regmap_config sta2x11_sctl_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = SCTL_SCRSTSTA,
+	.writeable_reg = sta2x11_sctl_writeable_reg,
 };
 
-static struct dentry *sta2x11_sctl_debugfs;
-static struct dentry *sta2x11_apbreg_debugfs;
+static bool sta2x11_scr_readable_reg(struct device *dev, unsigned int reg)
+{
+	return (reg == STA2X11_SECR_CR) ||
+		__reg_within_range(reg, STA2X11_SECR_FVR0, STA2X11_SECR_FVR1);
+}
 
-/* Probe for the two platform devices */
-static int sta2x11_sctl_probe(struct platform_device *dev)
+static bool sta2x11_scr_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return false;
+}
+
+static struct regmap_config sta2x11_scr_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_nolock,
+	.unlock = sta2x11_regmap_nolock,
+	.max_register = STA2X11_SECR_FVR1,
+	.readable_reg = sta2x11_scr_readable_reg,
+	.writeable_reg = sta2x11_scr_writeable_reg,
+};
+
+static bool sta2x11_apbreg_readable_reg(struct device *dev, unsigned int reg)
+{
+	/* Two blocks (CAN and MLB, SARAC) 0x100 bytes apart */
+	if (reg >= APBREG_BSR_SARAC)
+		reg -= APBREG_BSR_SARAC;
+	switch (reg) {
+	case APBREG_BSR:
+	case APBREG_PAER:
+	case APBREG_PWAC:
+	case APBREG_PRAC:
+	case APBREG_PCG:
+	case APBREG_PUR:
+	case APBREG_EMU_PCG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool sta2x11_apbreg_writeable_reg(struct device *dev, unsigned int reg)
+{
+	if (reg >= APBREG_BSR_SARAC)
+		reg -= APBREG_BSR_SARAC;
+	if (!sta2x11_apbreg_readable_reg(dev, reg))
+		return false;
+	return reg != APBREG_PAER;
+}
+
+static struct regmap_config sta2x11_apbreg_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = APBREG_EMU_PCG_SARAC,
+	.readable_reg = sta2x11_apbreg_readable_reg,
+	.writeable_reg = sta2x11_apbreg_writeable_reg,
+};
+
+static bool sta2x11_apb_soc_regs_readable_reg(struct device *dev,
+					      unsigned int reg)
+{
+	return reg <= PCIE_SoC_INT_ROUTER_STATUS3_REG ||
+		__reg_within_range(reg, DMA_IP_CTRL_REG, SPARE3_RESERVED) ||
+		__reg_within_range(reg, MASTER_LOCK_REG,
+				   SYSTEM_CONFIG_STATUS_REG) ||
+		reg == MSP_CLK_CTRL_REG ||
+		__reg_within_range(reg, COMPENSATION_REG1, TEST_CTL_REG);
+}
+
+static bool sta2x11_apb_soc_regs_writeable_reg(struct device *dev,
+					       unsigned int reg)
+{
+	if (!sta2x11_apb_soc_regs_readable_reg(dev, reg))
+		return false;
+	switch (reg) {
+	case PCIE_COMMON_CLOCK_CONFIG_0_4_0:
+	case SYSTEM_CONFIG_STATUS_REG:
+	case COMPENSATION_REG1:
+	case PCIE_SoC_INT_ROUTER_STATUS0_REG...PCIE_SoC_INT_ROUTER_STATUS3_REG:
+	case PCIE_PM_STATUS_0_PORT_0_4...PCIE_PM_STATUS_7_0_EP4:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static struct regmap_config sta2x11_apb_soc_regs_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = TEST_CTL_REG,
+	.readable_reg = sta2x11_apb_soc_regs_readable_reg,
+	.writeable_reg = sta2x11_apb_soc_regs_writeable_reg,
+};
+
+static struct regmap_config *
+sta2x11_mfd_regmap_configs[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = &sta2x11_sctl_regmap_config,
+	[sta2x11_apbreg] = &sta2x11_apbreg_regmap_config,
+	[sta2x11_apb_soc_regs] = &sta2x11_apb_soc_regs_regmap_config,
+	[sta2x11_scr] = &sta2x11_scr_regmap_config,
+};
+
+/* Probe for the four platform devices */
+
+static int sta2x11_mfd_platform_probe(struct platform_device *dev,
+				      enum sta2x11_mfd_plat_dev index)
 {
 	struct pci_dev **pdev;
 	struct sta2x11_mfd *mfd;
 	struct resource *res;
+	const char *name = sta2x11_mfd_names[index];
+	struct regmap_config *regmap_config = sta2x11_mfd_regmap_configs[index];
 
 	pdev = dev->dev.platform_data;
 	mfd = sta2x11_mfd_find(*pdev);
 	if (!mfd)
 		return -ENODEV;
+	if (!regmap_config)
+		return -ENODEV;
 
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!res)
 		return -ENOMEM;
 
-	if (!request_mem_region(res->start, resource_size(res),
-				"sta2x11-sctl"))
+	if (!request_mem_region(res->start, resource_size(res), name))
 		return -EBUSY;
 
-	mfd->sctl_regs = ioremap(res->start, resource_size(res));
-	if (!mfd->sctl_regs) {
+	mfd->regs[index] = ioremap(res->start, resource_size(res));
+	if (!mfd->regs[index]) {
 		release_mem_region(res->start, resource_size(res));
 		return -ENOMEM;
 	}
-	sctl_regset.base = mfd->sctl_regs;
-	sta2x11_sctl_debugfs = debugfs_create_regset32("sta2x11-sctl",
-						  S_IFREG | S_IRUGO,
-						  NULL, &sctl_regset);
+	regmap_config->lock_arg = &mfd->lock;
+	/*
+	   No caching, registers could be reached both via regmap and via
+	   void __iomem *
+	*/
+	regmap_config->cache_type = REGCACHE_NONE;
+	mfd->regmap[index] = devm_regmap_init_mmio(&dev->dev, mfd->regs[index],
+						   regmap_config);
+	WARN_ON(!mfd->regmap[index]);
+
 	return 0;
 }
 
+static int sta2x11_sctl_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_sctl);
+}
+
 static int sta2x11_apbreg_probe(struct platform_device *dev)
 {
-	struct pci_dev **pdev;
-	struct sta2x11_mfd *mfd;
-	struct resource *res;
-
-	pdev = dev->dev.platform_data;
-	dev_dbg(&dev->dev, "%s: pdata is %p\n", __func__, pdev);
-	dev_dbg(&dev->dev, "%s: *pdata is %p\n", __func__, *pdev);
-
-	mfd = sta2x11_mfd_find(*pdev);
-	if (!mfd)
-		return -ENODEV;
-
-	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENOMEM;
-
-	if (!request_mem_region(res->start, resource_size(res),
-				"sta2x11-apbreg"))
-		return -EBUSY;
-
-	mfd->apbreg_regs = ioremap(res->start, resource_size(res));
-	if (!mfd->apbreg_regs) {
-		release_mem_region(res->start, resource_size(res));
-		return -ENOMEM;
-	}
-	dev_dbg(&dev->dev, "%s: regbase %p\n", __func__, mfd->apbreg_regs);
-
-	apbreg_regset.base = mfd->apbreg_regs;
-	sta2x11_apbreg_debugfs = debugfs_create_regset32("sta2x11-apbreg",
-						  S_IFREG | S_IRUGO,
-						  NULL, &apbreg_regset);
-	return 0;
+	return sta2x11_mfd_platform_probe(dev, sta2x11_apbreg);
 }
 
-/* The two platform drivers */
+static int sta2x11_apb_soc_regs_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_apb_soc_regs);
+}
+
+static int sta2x11_scr_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_scr);
+}
+
+/* The three platform drivers */
 static struct platform_driver sta2x11_sctl_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-sctl",
+		.name	= STA2X11_MFD_SCTL_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_sctl_probe,
@@ -268,7 +392,7 @@
 
 static struct platform_driver sta2x11_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-apbreg",
+		.name	= STA2X11_MFD_APBREG_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_apbreg_probe,
@@ -280,13 +404,44 @@
 	return platform_driver_register(&sta2x11_platform_driver);
 }
 
+static struct platform_driver sta2x11_apb_soc_regs_platform_driver = {
+	.driver = {
+		.name	= STA2X11_MFD_APB_SOC_REGS_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sta2x11_apb_soc_regs_probe,
+};
+
+static int __init sta2x11_apb_soc_regs_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_apb_soc_regs_platform_driver);
+}
+
+static struct platform_driver sta2x11_scr_platform_driver = {
+	.driver = {
+		.name = STA2X11_MFD_SCR_NAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = sta2x11_scr_probe,
+};
+
+static int __init sta2x11_scr_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_scr_platform_driver);
+}
+
+
 /*
- * What follows is the PCI device that hosts the above two pdevs.
+ * What follows are the PCI devices that host the above pdevs.
  * Each logic block is 4kB and they are all consecutive: we use this info.
  */
 
-/* Bar 0 */
-enum bar0_cells {
+/* Mfd 0 device */
+
+/* Mfd 0, Bar 0 */
+enum mfd0_bar0_cells {
 	STA2X11_GPIO_0 = 0,
 	STA2X11_GPIO_1,
 	STA2X11_GPIO_2,
@@ -295,8 +450,8 @@
 	STA2X11_SCR,
 	STA2X11_TIME,
 };
-/* Bar 1 */
-enum bar1_cells {
+/* Mfd 0 , Bar 1 */
+enum mfd0_bar1_cells {
 	STA2X11_APBREG = 0,
 };
 #define CELL_4K(_name, _cell) { \
@@ -307,40 +462,71 @@
 
 static const struct resource gpio_resources[] = {
 	{
-		.name = "sta2x11_gpio", /* 4 consecutive cells, 1 driver */
+		/* 4 consecutive cells, 1 driver */
+		.name = STA2X11_MFD_GPIO_NAME,
 		.start = 0,
 		.end = (4 * 4096) - 1,
 		.flags = IORESOURCE_MEM,
 	}
 };
 static const struct resource sctl_resources[] = {
-	CELL_4K("sta2x11-sctl", STA2X11_SCTL),
+	CELL_4K(STA2X11_MFD_SCTL_NAME, STA2X11_SCTL),
 };
 static const struct resource scr_resources[] = {
-	CELL_4K("sta2x11-scr", STA2X11_SCR),
+	CELL_4K(STA2X11_MFD_SCR_NAME, STA2X11_SCR),
 };
 static const struct resource time_resources[] = {
-	CELL_4K("sta2x11-time", STA2X11_TIME),
+	CELL_4K(STA2X11_MFD_TIME_NAME, STA2X11_TIME),
 };
 
 static const struct resource apbreg_resources[] = {
-	CELL_4K("sta2x11-apbreg", STA2X11_APBREG),
+	CELL_4K(STA2X11_MFD_APBREG_NAME, STA2X11_APBREG),
 };
 
 #define DEV(_name, _r) \
 	{ .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, }
 
-static struct mfd_cell sta2x11_mfd_bar0[] = {
-	DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */
-	DEV("sta2x11-sctl", sctl_resources),
-	DEV("sta2x11-scr", scr_resources),
-	DEV("sta2x11-time", time_resources),
+static struct mfd_cell sta2x11_mfd0_bar0[] = {
+	/* offset 0: we add pdata later */
+	DEV(STA2X11_MFD_GPIO_NAME, gpio_resources),
+	DEV(STA2X11_MFD_SCTL_NAME, sctl_resources),
+	DEV(STA2X11_MFD_SCR_NAME,  scr_resources),
+	DEV(STA2X11_MFD_TIME_NAME, time_resources),
 };
 
-static struct mfd_cell sta2x11_mfd_bar1[] = {
-	DEV("sta2x11-apbreg", apbreg_resources),
+static struct mfd_cell sta2x11_mfd0_bar1[] = {
+	DEV(STA2X11_MFD_APBREG_NAME, apbreg_resources),
 };
 
+/* Mfd 1 devices */
+
+/* Mfd 1, Bar 0 */
+enum mfd1_bar0_cells {
+	STA2X11_VIC = 0,
+};
+
+/* Mfd 1, Bar 1 */
+enum mfd1_bar1_cells {
+	STA2X11_APB_SOC_REGS = 0,
+};
+
+static const __devinitconst struct resource vic_resources[] = {
+	CELL_4K(STA2X11_MFD_VIC_NAME, STA2X11_VIC),
+};
+
+static const __devinitconst struct resource apb_soc_regs_resources[] = {
+	CELL_4K(STA2X11_MFD_APB_SOC_REGS_NAME, STA2X11_APB_SOC_REGS),
+};
+
+static __devinitdata struct mfd_cell sta2x11_mfd1_bar0[] = {
+	DEV(STA2X11_MFD_VIC_NAME, vic_resources),
+};
+
+static __devinitdata struct mfd_cell sta2x11_mfd1_bar1[] = {
+	DEV(STA2X11_MFD_APB_SOC_REGS_NAME, apb_soc_regs_resources),
+};
+
+
 static int sta2x11_mfd_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	pci_save_state(pdev);
@@ -363,11 +549,63 @@
 	return 0;
 }
 
+struct sta2x11_mfd_bar_setup_data {
+	struct mfd_cell *cells;
+	int ncells;
+};
+
+struct sta2x11_mfd_setup_data {
+	struct sta2x11_mfd_bar_setup_data bars[2];
+};
+
+#define STA2X11_MFD0 0
+#define STA2X11_MFD1 1
+
+static struct sta2x11_mfd_setup_data mfd_setup_data[] = {
+	/* Mfd 0: gpio, sctl, scr, timers / apbregs */
+	[STA2X11_MFD0] = {
+		.bars = {
+			[0] = {
+				.cells = sta2x11_mfd0_bar0,
+				.ncells = ARRAY_SIZE(sta2x11_mfd0_bar0),
+			},
+			[1] = {
+				.cells = sta2x11_mfd0_bar1,
+				.ncells = ARRAY_SIZE(sta2x11_mfd0_bar1),
+			},
+		},
+	},
+	/* Mfd 1: vic / apb-soc-regs */
+	[STA2X11_MFD1] = {
+		.bars = {
+			[0] = {
+				.cells = sta2x11_mfd1_bar0,
+				.ncells = ARRAY_SIZE(sta2x11_mfd1_bar0),
+			},
+			[1] = {
+				.cells = sta2x11_mfd1_bar1,
+				.ncells = ARRAY_SIZE(sta2x11_mfd1_bar1),
+			},
+		},
+	},
+};
+
+static void sta2x11_mfd_setup(struct pci_dev *pdev,
+			      struct sta2x11_mfd_setup_data *sd)
+{
+	int i, j;
+	for (i = 0; i < ARRAY_SIZE(sd->bars); i++)
+		for (j = 0; j < sd->bars[i].ncells; j++) {
+			sd->bars[i].cells[j].pdata_size = sizeof(pdev);
+			sd->bars[i].cells[j].platform_data = &pdev;
+		}
+}
+
 static int sta2x11_mfd_probe(struct pci_dev *pdev,
-				       const struct pci_device_id *pci_id)
+			     const struct pci_device_id *pci_id)
 {
 	int err, i;
-	struct sta2x11_gpio_pdata *gpio_data;
+	struct sta2x11_mfd_setup_data *setup_data;
 
 	dev_info(&pdev->dev, "%s\n", __func__);
 
@@ -381,46 +619,29 @@
 	if (err)
 		dev_info(&pdev->dev, "Enable msi failed\n");
 
-	/* Read gpio config data as pci device's platform data */
-	gpio_data = dev_get_platdata(&pdev->dev);
-	if (!gpio_data)
-		dev_warn(&pdev->dev, "no gpio configuration\n");
-
-	dev_dbg(&pdev->dev, "%s, gpio_data = %p (%p)\n", __func__,
-		gpio_data, &gpio_data);
-	dev_dbg(&pdev->dev, "%s, pdev = %p (%p)\n", __func__,
-		pdev, &pdev);
+	setup_data = pci_id->device == PCI_DEVICE_ID_STMICRO_GPIO ?
+		&mfd_setup_data[STA2X11_MFD0] :
+		&mfd_setup_data[STA2X11_MFD1];
 
 	/* platform data is the pci device for all of them */
-	for (i = 0; i < ARRAY_SIZE(sta2x11_mfd_bar0); i++) {
-		sta2x11_mfd_bar0[i].pdata_size = sizeof(pdev);
-		sta2x11_mfd_bar0[i].platform_data = &pdev;
-	}
-	sta2x11_mfd_bar1[0].pdata_size = sizeof(pdev);
-	sta2x11_mfd_bar1[0].platform_data = &pdev;
+	sta2x11_mfd_setup(pdev, setup_data);
 
 	/* Record this pdev before mfd_add_devices: their probe looks for it */
-	sta2x11_mfd_add(pdev, GFP_ATOMIC);
+	if (!sta2x11_mfd_find(pdev))
+		sta2x11_mfd_add(pdev, GFP_ATOMIC);
 
-
-	err = mfd_add_devices(&pdev->dev, -1,
-			      sta2x11_mfd_bar0,
-			      ARRAY_SIZE(sta2x11_mfd_bar0),
-			      &pdev->resource[0],
-			      0, NULL);
-	if (err) {
-		dev_err(&pdev->dev, "mfd_add_devices[0] failed: %d\n", err);
-		goto err_disable;
-	}
-
-	err = mfd_add_devices(&pdev->dev, -1,
-			      sta2x11_mfd_bar1,
-			      ARRAY_SIZE(sta2x11_mfd_bar1),
-			      &pdev->resource[1],
-			      0, NULL);
-	if (err) {
-		dev_err(&pdev->dev, "mfd_add_devices[1] failed: %d\n", err);
-		goto err_disable;
+	/* Just 2 bars for all mfd's at present */
+	for (i = 0; i < 2; i++) {
+		err = mfd_add_devices(&pdev->dev, -1,
+				      setup_data->bars[i].cells,
+				      setup_data->bars[i].ncells,
+				      &pdev->resource[i],
+				      0, NULL);
+		if (err) {
+			dev_err(&pdev->dev,
+				"mfd_add_devices[%d] failed: %d\n", i, err);
+			goto err_disable;
+		}
 	}
 
 	return 0;
@@ -434,6 +655,7 @@
 
 static DEFINE_PCI_DEVICE_TABLE(sta2x11_mfd_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_GPIO)},
+	{PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_VIC)},
 	{0,},
 };
 
@@ -459,6 +681,8 @@
  */
 subsys_initcall(sta2x11_apbreg_init);
 subsys_initcall(sta2x11_sctl_init);
+subsys_initcall(sta2x11_apb_soc_regs_init);
+subsys_initcall(sta2x11_scr_init);
 rootfs_initcall(sta2x11_mfd_init);
 
 MODULE_LICENSE("GPL v2");

diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index 36df187..fd5fcb6 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c

@@ -82,11 +82,13 @@
 MODULE_DEVICE_TABLE(i2c, stmpe_id);
 
 static struct i2c_driver stmpe_i2c_driver = {
-	.driver.name	= "stmpe-i2c",
-	.driver.owner	= THIS_MODULE,
+	.driver = {
+		.name = "stmpe-i2c",
+		.owner = THIS_MODULE,
 #ifdef CONFIG_PM
-	.driver.pm	= &stmpe_dev_pm_ops,
+		.pm = &stmpe_dev_pm_ops,
 #endif
+	},
 	.probe		= stmpe_i2c_probe,
 	.remove		= stmpe_i2c_remove,
 	.id_table	= stmpe_i2c_id,

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 79e88d1..5e8e692 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c

@@ -7,11 +7,15 @@
  * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
  */
 
+#include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/mfd/core.h>
@@ -312,20 +316,17 @@
 static struct resource stmpe_keypad_resources[] = {
 	{
 		.name	= "KEYPAD",
-		.start	= 0,
-		.end	= 0,
 		.flags	= IORESOURCE_IRQ,
 	},
 	{
 		.name	= "KEYPAD_OVER",
-		.start	= 1,
-		.end	= 1,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
 
 static struct mfd_cell stmpe_keypad_cell = {
 	.name		= "stmpe-keypad",
+	.of_compatible  = "st,stmpe-keypad",
 	.resources	= stmpe_keypad_resources,
 	.num_resources	= ARRAY_SIZE(stmpe_keypad_resources),
 };
@@ -399,20 +400,17 @@
 static struct resource stmpe_ts_resources[] = {
 	{
 		.name	= "TOUCH_DET",
-		.start	= 0,
-		.end	= 0,
 		.flags	= IORESOURCE_IRQ,
 	},
 	{
 		.name	= "FIFO_TH",
-		.start	= 1,
-		.end	= 1,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
 
 static struct mfd_cell stmpe_ts_cell = {
 	.name		= "stmpe-ts",
+	.of_compatible	= "st,stmpe-ts",
 	.resources	= stmpe_ts_resources,
 	.num_resources	= ARRAY_SIZE(stmpe_ts_resources),
 };
@@ -528,12 +526,12 @@
 static struct stmpe_variant_block stmpe1601_blocks[] = {
 	{
 		.cell	= &stmpe_gpio_cell,
-		.irq	= STMPE24XX_IRQ_GPIOC,
+		.irq	= STMPE1601_IRQ_GPIOC,
 		.block	= STMPE_BLOCK_GPIO,
 	},
 	{
 		.cell	= &stmpe_keypad_cell,
-		.irq	= STMPE24XX_IRQ_KEYPAD,
+		.irq	= STMPE1601_IRQ_KEYPAD,
 		.block	= STMPE_BLOCK_KEYPAD,
 	},
 };
@@ -767,7 +765,9 @@
 	int i;
 
 	if (variant->id_val == STMPE801_ID) {
-		handle_nested_irq(stmpe->irq_base);
+		int base = irq_create_mapping(stmpe->domain, 0);
+
+		handle_nested_irq(base);
 		return IRQ_HANDLED;
 	}
 
@@ -788,8 +788,9 @@
 		while (status) {
 			int bit = __ffs(status);
 			int line = bank * 8 + bit;
+			int nestedirq = irq_create_mapping(stmpe->domain, line);
 
-			handle_nested_irq(stmpe->irq_base + line);
+			handle_nested_irq(nestedirq);
 			status &= ~(1 << bit);
 		}
 
@@ -830,7 +831,7 @@
 static void stmpe_irq_mask(struct irq_data *data)
 {
 	struct stmpe *stmpe = irq_data_get_irq_chip_data(data);
-	int offset = data->irq - stmpe->irq_base;
+	int offset = data->hwirq;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
@@ -840,7 +841,7 @@
 static void stmpe_irq_unmask(struct irq_data *data)
 {
 	struct stmpe *stmpe = irq_data_get_irq_chip_data(data);
-	int offset = data->irq - stmpe->irq_base;
+	int offset = data->hwirq;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
@@ -855,43 +856,59 @@
 	.irq_unmask		= stmpe_irq_unmask,
 };
 
-static int __devinit stmpe_irq_init(struct stmpe *stmpe)
+static int stmpe_irq_map(struct irq_domain *d, unsigned int virq,
+                                irq_hw_number_t hwirq)
 {
+	struct stmpe *stmpe = d->host_data;
 	struct irq_chip *chip = NULL;
-	int num_irqs = stmpe->variant->num_irqs;
-	int base = stmpe->irq_base;
-	int irq;
 
 	if (stmpe->variant->id_val != STMPE801_ID)
 		chip = &stmpe_irq_chip;
 
-	for (irq = base; irq < base + num_irqs; irq++) {
-		irq_set_chip_data(irq, stmpe);
-		irq_set_chip_and_handler(irq, chip, handle_edge_irq);
-		irq_set_nested_thread(irq, 1);
+	irq_set_chip_data(virq, stmpe);
+	irq_set_chip_and_handler(virq, chip, handle_edge_irq);
+	irq_set_nested_thread(virq, 1);
 #ifdef CONFIG_ARM
-		set_irq_flags(irq, IRQF_VALID);
+	set_irq_flags(virq, IRQF_VALID);
 #else
-		irq_set_noprobe(irq);
+	irq_set_noprobe(virq);
 #endif
-	}
 
 	return 0;
 }
 
-static void stmpe_irq_remove(struct stmpe *stmpe)
+static void stmpe_irq_unmap(struct irq_domain *d, unsigned int virq)
 {
-	int num_irqs = stmpe->variant->num_irqs;
-	int base = stmpe->irq_base;
-	int irq;
-
-	for (irq = base; irq < base + num_irqs; irq++) {
 #ifdef CONFIG_ARM
-		set_irq_flags(irq, 0);
+		set_irq_flags(virq, 0);
 #endif
-		irq_set_chip_and_handler(irq, NULL, NULL);
-		irq_set_chip_data(irq, NULL);
+		irq_set_chip_and_handler(virq, NULL, NULL);
+		irq_set_chip_data(virq, NULL);
+}
+
+static struct irq_domain_ops stmpe_irq_ops = {
+        .map    = stmpe_irq_map,
+        .unmap  = stmpe_irq_unmap,
+        .xlate  = irq_domain_xlate_twocell,
+};
+
+static int __devinit stmpe_irq_init(struct stmpe *stmpe,
+				struct device_node *np)
+{
+	int base = 0;
+	int num_irqs = stmpe->variant->num_irqs;
+
+	if (!np)
+		base = stmpe->irq_base;
+
+	stmpe->domain = irq_domain_add_simple(np, num_irqs, base,
+					      &stmpe_irq_ops, stmpe);
+	if (!stmpe->domain) {
+		dev_err(stmpe->dev, "Failed to create irqdomain\n");
+		return -ENOSYS;
 	}
+
+	return 0;
 }
 
 static int __devinit stmpe_chip_init(struct stmpe *stmpe)
@@ -942,13 +959,6 @@
 			else
 				icr |= STMPE_ICR_LSB_HIGH;
 		}
-
-		if (stmpe->pdata->irq_invert_polarity) {
-			if (id == STMPE801_ID)
-				icr ^= STMPE801_REG_SYS_CTRL_INT_HI;
-			else
-				icr ^= STMPE_ICR_LSB_HIGH;
-		}
 	}
 
 	if (stmpe->pdata->autosleep) {
@@ -961,10 +971,10 @@
 }
 
 static int __devinit stmpe_add_device(struct stmpe *stmpe,
-				      struct mfd_cell *cell, int irq)
+				      struct mfd_cell *cell)
 {
 	return mfd_add_devices(stmpe->dev, stmpe->pdata->id, cell, 1,
-			       NULL, stmpe->irq_base + irq, NULL);
+			       NULL, stmpe->irq_base, stmpe->domain);
 }
 
 static int __devinit stmpe_devices_init(struct stmpe *stmpe)
@@ -972,7 +982,7 @@
 	struct stmpe_variant_info *variant = stmpe->variant;
 	unsigned int platform_blocks = stmpe->pdata->blocks;
 	int ret = -EINVAL;
-	int i;
+	int i, j;
 
 	for (i = 0; i < variant->num_blocks; i++) {
 		struct stmpe_variant_block *block = &variant->blocks[i];
@@ -980,8 +990,17 @@
 		if (!(platform_blocks & block->block))
 			continue;
 
+		for (j = 0; j < block->cell->num_resources; j++) {
+			struct resource *res =
+				(struct resource *) &block->cell->resources[j];
+
+			/* Dynamically fill in a variant's IRQ. */
+			if (res->flags & IORESOURCE_IRQ)
+				res->start = res->end = block->irq + j;
+		}
+
 		platform_blocks &= ~block->block;
-		ret = stmpe_add_device(stmpe, block->cell, block->irq);
+		ret = stmpe_add_device(stmpe, block->cell);
 		if (ret)
 			return ret;
 	}
@@ -994,17 +1013,56 @@
 	return ret;
 }
 
+void __devinit stmpe_of_probe(struct stmpe_platform_data *pdata,
+			struct device_node *np)
+{
+	struct device_node *child;
+
+	pdata->id = -1;
+	pdata->irq_trigger = IRQF_TRIGGER_NONE;
+
+	of_property_read_u32(np, "st,autosleep-timeout",
+			&pdata->autosleep_timeout);
+
+	pdata->autosleep = (pdata->autosleep_timeout) ? true : false;
+
+	for_each_child_of_node(np, child) {
+		if (!strcmp(child->name, "stmpe_gpio")) {
+			pdata->blocks |= STMPE_BLOCK_GPIO;
+		} else if (!strcmp(child->name, "stmpe_keypad")) {
+			pdata->blocks |= STMPE_BLOCK_KEYPAD;
+		} else if (!strcmp(child->name, "stmpe_touchscreen")) {
+			pdata->blocks |= STMPE_BLOCK_TOUCHSCREEN;
+		} else if (!strcmp(child->name, "stmpe_adc")) {
+			pdata->blocks |= STMPE_BLOCK_ADC;
+		} else if (!strcmp(child->name, "stmpe_pwm")) {
+			pdata->blocks |= STMPE_BLOCK_PWM;
+		} else if (!strcmp(child->name, "stmpe_rotator")) {
+			pdata->blocks |= STMPE_BLOCK_ROTATOR;
+		}
+	}
+}
+
 /* Called from client specific probe routines */
 int __devinit stmpe_probe(struct stmpe_client_info *ci, int partnum)
 {
 	struct stmpe_platform_data *pdata = dev_get_platdata(ci->dev);
+	struct device_node *np = ci->dev->of_node;
 	struct stmpe *stmpe;
 	int ret;
 
-	if (!pdata)
-		return -EINVAL;
+	if (!pdata) {
+		if (!np)
+			return -EINVAL;
 
-	stmpe = kzalloc(sizeof(struct stmpe), GFP_KERNEL);
+		pdata = devm_kzalloc(ci->dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+
+		stmpe_of_probe(pdata, np);
+	}
+
+	stmpe = devm_kzalloc(ci->dev, sizeof(struct stmpe), GFP_KERNEL);
 	if (!stmpe)
 		return -ENOMEM;
 
@@ -1026,11 +1084,12 @@
 		ci->init(stmpe);
 
 	if (pdata->irq_over_gpio) {
-		ret = gpio_request_one(pdata->irq_gpio, GPIOF_DIR_IN, "stmpe");
+		ret = devm_gpio_request_one(ci->dev, pdata->irq_gpio,
+				GPIOF_DIR_IN, "stmpe");
 		if (ret) {
 			dev_err(stmpe->dev, "failed to request IRQ GPIO: %d\n",
 					ret);
-			goto out_free;
+			return ret;
 		}
 
 		stmpe->irq = gpio_to_irq(pdata->irq_gpio);
@@ -1047,51 +1106,40 @@
 			dev_err(stmpe->dev,
 				"%s does not support no-irq mode!\n",
 				stmpe->variant->name);
-			ret = -ENODEV;
-			goto free_gpio;
+			return -ENODEV;
 		}
 		stmpe->variant = stmpe_noirq_variant_info[stmpe->partnum];
+	} else if (pdata->irq_trigger == IRQF_TRIGGER_NONE) {
+		pdata->irq_trigger =
+			irqd_get_trigger_type(irq_get_irq_data(stmpe->irq));
 	}
 
 	ret = stmpe_chip_init(stmpe);
 	if (ret)
-		goto free_gpio;
+		return ret;
 
 	if (stmpe->irq >= 0) {
-		ret = stmpe_irq_init(stmpe);
+		ret = stmpe_irq_init(stmpe, np);
 		if (ret)
-			goto free_gpio;
+			return ret;
 
-		ret = request_threaded_irq(stmpe->irq, NULL, stmpe_irq,
-				pdata->irq_trigger | IRQF_ONESHOT,
+		ret = devm_request_threaded_irq(ci->dev, stmpe->irq, NULL,
+				stmpe_irq, pdata->irq_trigger | IRQF_ONESHOT,
 				"stmpe", stmpe);
 		if (ret) {
 			dev_err(stmpe->dev, "failed to request IRQ: %d\n",
 					ret);
-			goto out_removeirq;
+			return ret;
 		}
 	}
 
 	ret = stmpe_devices_init(stmpe);
-	if (ret) {
-		dev_err(stmpe->dev, "failed to add children\n");
-		goto out_removedevs;
-	}
+	if (!ret)
+		return 0;
 
-	return 0;
-
-out_removedevs:
+	dev_err(stmpe->dev, "failed to add children\n");
 	mfd_remove_devices(stmpe->dev);
-	if (stmpe->irq >= 0)
-		free_irq(stmpe->irq, stmpe);
-out_removeirq:
-	if (stmpe->irq >= 0)
-		stmpe_irq_remove(stmpe);
-free_gpio:
-	if (pdata->irq_over_gpio)
-		gpio_free(pdata->irq_gpio);
-out_free:
-	kfree(stmpe);
+
 	return ret;
 }
 
@@ -1099,16 +1147,6 @@
 {
 	mfd_remove_devices(stmpe->dev);
 
-	if (stmpe->irq >= 0) {
-		free_irq(stmpe->irq, stmpe);
-		stmpe_irq_remove(stmpe);
-	}
-
-	if (stmpe->pdata->irq_over_gpio)
-		gpio_free(stmpe->pdata->irq_gpio);
-
-	kfree(stmpe);
-
 	return 0;
 }
 

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
new file mode 100644
index 0000000..8ca3bf0
--- /dev/null
+++ b/drivers/mfd/ti_am335x_tscadc.c

@@ -0,0 +1,274 @@
+/*
+ * TI Touch Screen / ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/regmap.h>
+#include <linux/mfd/core.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mfd/ti_am335x_tscadc.h>
+#include <linux/input/ti_am335x_tsc.h>
+#include <linux/platform_data/ti_am335x_adc.h>
+
+static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg)
+{
+	unsigned int val;
+
+	regmap_read(tsadc->regmap_tscadc, reg, &val);
+	return val;
+}
+
+static void tscadc_writel(struct ti_tscadc_dev *tsadc, unsigned int reg,
+					unsigned int val)
+{
+	regmap_write(tsadc->regmap_tscadc, reg, val);
+}
+
+static const struct regmap_config tscadc_regmap_config = {
+	.name = "ti_tscadc",
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+};
+
+static void tscadc_idle_config(struct ti_tscadc_dev *config)
+{
+	unsigned int idleconfig;
+
+	idleconfig = STEPCONFIG_YNN | STEPCONFIG_INM_ADCREFM |
+			STEPCONFIG_INP_ADCREFM | STEPCONFIG_YPN;
+
+	tscadc_writel(config, REG_IDLECONFIG, idleconfig);
+}
+
+static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
+{
+	struct ti_tscadc_dev	*tscadc;
+	struct resource		*res;
+	struct clk		*clk;
+	struct mfd_tscadc_board	*pdata = pdev->dev.platform_data;
+	struct mfd_cell		*cell;
+	int			err, ctrl;
+	int			clk_value, clock_rate;
+	int			tsc_wires, adc_channels = 0, total_channels;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
+		return -EINVAL;
+	}
+
+	if (pdata->adc_init)
+		adc_channels = pdata->adc_init->adc_channels;
+
+	tsc_wires = pdata->tsc_init->wires;
+	total_channels = tsc_wires + adc_channels;
+
+	if (total_channels > 8) {
+		dev_err(&pdev->dev, "Number of i/p channels more than 8\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource defined.\n");
+		return -EINVAL;
+	}
+
+	/* Allocate memory for device */
+	tscadc = devm_kzalloc(&pdev->dev,
+			sizeof(struct ti_tscadc_dev), GFP_KERNEL);
+	if (!tscadc) {
+		dev_err(&pdev->dev, "failed to allocate memory.\n");
+		return -ENOMEM;
+	}
+	tscadc->dev = &pdev->dev;
+
+	err = platform_get_irq(pdev, 0);
+	if (err < 0) {
+		dev_err(&pdev->dev, "no irq ID is specified.\n");
+		goto ret;
+	} else
+		tscadc->irq = err;
+
+	res = devm_request_mem_region(&pdev->dev,
+			res->start, resource_size(res), pdev->name);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to reserve registers.\n");
+		return -EBUSY;
+	}
+
+	tscadc->tscadc_base = devm_ioremap(&pdev->dev,
+			res->start, resource_size(res));
+	if (!tscadc->tscadc_base) {
+		dev_err(&pdev->dev, "failed to map registers.\n");
+		return -ENOMEM;
+	}
+
+	tscadc->regmap_tscadc = devm_regmap_init_mmio(&pdev->dev,
+			tscadc->tscadc_base, &tscadc_regmap_config);
+	if (IS_ERR(tscadc->regmap_tscadc)) {
+		dev_err(&pdev->dev, "regmap init failed\n");
+		err = PTR_ERR(tscadc->regmap_tscadc);
+		goto ret;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	/*
+	 * The TSC_ADC_Subsystem has 2 clock domains
+	 * OCP_CLK and ADC_CLK.
+	 * The ADC clock is expected to run at target of 3MHz,
+	 * and expected to capture 12-bit data at a rate of 200 KSPS.
+	 * The TSC_ADC_SS controller design assumes the OCP clock is
+	 * at least 6x faster than the ADC clock.
+	 */
+	clk = clk_get(&pdev->dev, "adc_tsc_fck");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get TSC fck\n");
+		err = PTR_ERR(clk);
+		goto err_disable_clk;
+	}
+	clock_rate = clk_get_rate(clk);
+	clk_put(clk);
+	clk_value = clock_rate / ADC_CLK;
+	if (clk_value < MAX_CLK_DIV) {
+		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
+		err = -EINVAL;
+		goto err_disable_clk;
+	}
+	/* TSCADC_CLKDIV needs to be configured to the value minus 1 */
+	clk_value = clk_value - 1;
+	tscadc_writel(tscadc, REG_CLKDIV, clk_value);
+
+	/* Set the control register bits */
+	ctrl = CNTRLREG_STEPCONFIGWRT |
+			CNTRLREG_TSCENB |
+			CNTRLREG_STEPID |
+			CNTRLREG_4WIRE;
+	tscadc_writel(tscadc, REG_CTRL, ctrl);
+
+	/* Set register bits for Idle Config Mode */
+	tscadc_idle_config(tscadc);
+
+	/* Enable the TSC module enable bit */
+	ctrl = tscadc_readl(tscadc, REG_CTRL);
+	ctrl |= CNTRLREG_TSCSSENB;
+	tscadc_writel(tscadc, REG_CTRL, ctrl);
+
+	/* TSC Cell */
+	cell = &tscadc->cells[TSC_CELL];
+	cell->name = "tsc";
+	cell->platform_data = tscadc;
+	cell->pdata_size = sizeof(*tscadc);
+
+	/* ADC Cell */
+	cell = &tscadc->cells[ADC_CELL];
+	cell->name = "tiadc";
+	cell->platform_data = tscadc;
+	cell->pdata_size = sizeof(*tscadc);
+
+	err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells,
+			TSCADC_CELLS, NULL, 0, NULL);
+	if (err < 0)
+		goto err_disable_clk;
+
+	device_init_wakeup(&pdev->dev, true);
+	platform_set_drvdata(pdev, tscadc);
+
+	return 0;
+
+err_disable_clk:
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+ret:
+	return err;
+}
+
+static int __devexit ti_tscadc_remove(struct platform_device *pdev)
+{
+	struct ti_tscadc_dev	*tscadc = platform_get_drvdata(pdev);
+
+	tscadc_writel(tscadc, REG_SE, 0x00);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	mfd_remove_devices(tscadc->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tscadc_suspend(struct device *dev)
+{
+	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
+
+	tscadc_writel(tscadc_dev, REG_SE, 0x00);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tscadc_resume(struct device *dev)
+{
+	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
+	unsigned int restore, ctrl;
+
+	pm_runtime_get_sync(dev);
+
+	/* context restore */
+	ctrl = CNTRLREG_STEPCONFIGWRT | CNTRLREG_TSCENB |
+			CNTRLREG_STEPID | CNTRLREG_4WIRE;
+	tscadc_writel(tscadc_dev, REG_CTRL, ctrl);
+	tscadc_idle_config(tscadc_dev);
+	tscadc_writel(tscadc_dev, REG_SE, STPENB_STEPENB);
+	restore = tscadc_readl(tscadc_dev, REG_CTRL);
+	tscadc_writel(tscadc_dev, REG_CTRL,
+			(restore | CNTRLREG_TSCSSENB));
+
+	return 0;
+}
+
+static const struct dev_pm_ops tscadc_pm_ops = {
+	.suspend = tscadc_suspend,
+	.resume = tscadc_resume,
+};
+#define TSCADC_PM_OPS (&tscadc_pm_ops)
+#else
+#define TSCADC_PM_OPS NULL
+#endif
+
+static struct platform_driver ti_tscadc_driver = {
+	.driver = {
+		.name   = "ti_tscadc",
+		.owner	= THIS_MODULE,
+		.pm	= TSCADC_PM_OPS,
+	},
+	.probe	= ti_tscadc_probe,
+	.remove	= __devexit_p(ti_tscadc_remove),
+
+};
+
+module_platform_driver(ti_tscadc_driver);
+
+MODULE_DESCRIPTION("TI touchscreen / ADC MFD controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/mfd/tps6507x.c b/drivers/mfd/tps6507x.c
index 1b20349..409afa2 100644
--- a/drivers/mfd/tps6507x.c
+++ b/drivers/mfd/tps6507x.c

@@ -86,9 +86,9 @@
 			    const struct i2c_device_id *id)
 {
 	struct tps6507x_dev *tps6507x;
-	int ret = 0;
 
-	tps6507x = kzalloc(sizeof(struct tps6507x_dev), GFP_KERNEL);
+	tps6507x = devm_kzalloc(&i2c->dev, sizeof(struct tps6507x_dev),
+				GFP_KERNEL);
 	if (tps6507x == NULL)
 		return -ENOMEM;
 
@@ -98,19 +98,8 @@
 	tps6507x->read_dev = tps6507x_i2c_read_device;
 	tps6507x->write_dev = tps6507x_i2c_write_device;
 
-	ret = mfd_add_devices(tps6507x->dev, -1,
-			      tps6507x_devs, ARRAY_SIZE(tps6507x_devs),
-			      NULL, 0, NULL);
-
-	if (ret < 0)
-		goto err;
-
-	return ret;
-
-err:
-	mfd_remove_devices(tps6507x->dev);
-	kfree(tps6507x);
-	return ret;
+	return mfd_add_devices(tps6507x->dev, -1, tps6507x_devs,
+			       ARRAY_SIZE(tps6507x_devs), NULL, 0, NULL);
 }
 
 static int tps6507x_i2c_remove(struct i2c_client *i2c)
@@ -118,8 +107,6 @@
 	struct tps6507x_dev *tps6507x = i2c_get_clientdata(i2c);
 
 	mfd_remove_devices(tps6507x->dev);
-	kfree(tps6507x);
-
 	return 0;
 }
 

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 382a857..8d12a8e 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c

@@ -25,7 +25,6 @@
 #include <linux/i2c.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps65090.h>
-#include <linux/regmap.h>
 #include <linux/err.h>
 
 #define NUM_INT_REG 2
@@ -39,204 +38,102 @@
 #define TPS65090_INT_MSK	0x2
 #define TPS65090_INT_MSK2	0x3
 
-struct tps65090_irq_data {
-	u8		mask_reg;
-	u8		mask_pos;
-};
-
-#define TPS65090_IRQ(_reg, _mask_pos)		\
-	{					\
-		.mask_reg	= (_reg),	\
-		.mask_pos	= (_mask_pos),	\
-	}
-
-static const struct tps65090_irq_data tps65090_irqs[] = {
-	[0]		= TPS65090_IRQ(0, 0),
-	[1]		= TPS65090_IRQ(0, 1),
-	[2]		= TPS65090_IRQ(0, 2),
-	[3]		= TPS65090_IRQ(0, 3),
-	[4]		= TPS65090_IRQ(0, 4),
-	[5]		= TPS65090_IRQ(0, 5),
-	[6]		= TPS65090_IRQ(0, 6),
-	[7]		= TPS65090_IRQ(0, 7),
-	[8]		= TPS65090_IRQ(1, 0),
-	[9]		= TPS65090_IRQ(1, 1),
-	[10]		= TPS65090_IRQ(1, 2),
-	[11]		= TPS65090_IRQ(1, 3),
-	[12]		= TPS65090_IRQ(1, 4),
-	[13]		= TPS65090_IRQ(1, 5),
-	[14]		= TPS65090_IRQ(1, 6),
-	[15]		= TPS65090_IRQ(1, 7),
-};
+#define TPS65090_INT1_MASK_VAC_STATUS_CHANGE		1
+#define TPS65090_INT1_MASK_VSYS_STATUS_CHANGE		2
+#define TPS65090_INT1_MASK_BAT_STATUS_CHANGE		3
+#define TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE	4
+#define TPS65090_INT1_MASK_CHARGING_COMPLETE		5
+#define TPS65090_INT1_MASK_OVERLOAD_DCDC1		6
+#define TPS65090_INT1_MASK_OVERLOAD_DCDC2		7
+#define TPS65090_INT2_MASK_OVERLOAD_DCDC3		0
+#define TPS65090_INT2_MASK_OVERLOAD_FET1		1
+#define TPS65090_INT2_MASK_OVERLOAD_FET2		2
+#define TPS65090_INT2_MASK_OVERLOAD_FET3		3
+#define TPS65090_INT2_MASK_OVERLOAD_FET4		4
+#define TPS65090_INT2_MASK_OVERLOAD_FET5		5
+#define TPS65090_INT2_MASK_OVERLOAD_FET6		6
+#define TPS65090_INT2_MASK_OVERLOAD_FET7		7
 
 static struct mfd_cell tps65090s[] = {
 	{
 		.name = "tps65090-pmic",
 	},
 	{
-		.name = "tps65090-regulator",
+		.name = "tps65090-charger",
 	},
 };
 
-int tps65090_write(struct device *dev, int reg, uint8_t val)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_write(tps->rmap, reg, val);
-}
-EXPORT_SYMBOL_GPL(tps65090_write);
+static const struct regmap_irq tps65090_irqs[] = {
+	/* INT1 IRQs*/
+	[TPS65090_IRQ_VAC_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_VAC_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_VSYS_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_VSYS_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_BAT_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_BAT_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_CHARGING_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_CHARGING_COMPLETE] = {
+			.mask = TPS65090_INT1_MASK_CHARGING_COMPLETE,
+	},
+	[TPS65090_IRQ_OVERLOAD_DCDC1] = {
+			.mask = TPS65090_INT1_MASK_OVERLOAD_DCDC1,
+	},
+	[TPS65090_IRQ_OVERLOAD_DCDC2] = {
+			.mask = TPS65090_INT1_MASK_OVERLOAD_DCDC2,
+	},
+	/* INT2 IRQs*/
+	[TPS65090_IRQ_OVERLOAD_DCDC3] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_DCDC3,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET1] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET1,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET2] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET2,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET3] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET3,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET4] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET4,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET5] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET5,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET6] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET6,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET7] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET7,
+	},
+};
 
-int tps65090_read(struct device *dev, int reg, uint8_t *val)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	unsigned int temp_val;
-	int ret;
-	ret = regmap_read(tps->rmap, reg, &temp_val);
-	if (!ret)
-		*val = temp_val;
-	return ret;
-}
-EXPORT_SYMBOL_GPL(tps65090_read);
-
-int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u);
-}
-EXPORT_SYMBOL_GPL(tps65090_set_bits);
-
-int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u);
-}
-EXPORT_SYMBOL_GPL(tps65090_clr_bits);
-
-static void tps65090_irq_lock(struct irq_data *data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&tps65090->irq_lock);
-}
-
-static void tps65090_irq_mask(struct irq_data *irq_data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->hwirq;
-	const struct tps65090_irq_data *data = &tps65090_irqs[__irq];
-
-	tps65090_set_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg),
-		data->mask_pos);
-}
-
-static void tps65090_irq_unmask(struct irq_data *irq_data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->irq - tps65090->irq_base;
-	const struct tps65090_irq_data *data = &tps65090_irqs[__irq];
-
-	tps65090_clr_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg),
-		data->mask_pos);
-}
-
-static void tps65090_irq_sync_unlock(struct irq_data *data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
-
-	mutex_unlock(&tps65090->irq_lock);
-}
-
-static irqreturn_t tps65090_irq(int irq, void *data)
-{
-	struct tps65090 *tps65090 = data;
-	int ret = 0;
-	u8 status, mask;
-	unsigned long int acks = 0;
-	int i;
-
-	for (i = 0; i < NUM_INT_REG; i++) {
-		ret = tps65090_read(tps65090->dev, TPS65090_INT_MSK + i, &mask);
-		if (ret < 0) {
-			dev_err(tps65090->dev,
-				"failed to read mask reg [addr:%d]\n",
-				TPS65090_INT_MSK + i);
-			return IRQ_NONE;
-		}
-		ret = tps65090_read(tps65090->dev, TPS65090_INT_STS + i,
-			&status);
-		if (ret < 0) {
-			dev_err(tps65090->dev,
-				"failed to read status reg [addr:%d]\n",
-				 TPS65090_INT_STS + i);
-			return IRQ_NONE;
-		}
-		if (status) {
-			/* Ack only those interrupts which are not masked */
-			status &= (~mask);
-			ret = tps65090_write(tps65090->dev,
-					TPS65090_INT_STS + i, status);
-			if (ret < 0) {
-				dev_err(tps65090->dev,
-					"failed to write interrupt status\n");
-				return IRQ_NONE;
-			}
-			acks |= (status << (i * 8));
-		}
-	}
-
-	for_each_set_bit(i, &acks, ARRAY_SIZE(tps65090_irqs))
-		handle_nested_irq(tps65090->irq_base + i);
-	return acks ? IRQ_HANDLED : IRQ_NONE;
-}
-
-static int tps65090_irq_init(struct tps65090 *tps65090, int irq,
-	int irq_base)
-{
-	int i, ret;
-
-	if (!irq_base) {
-		dev_err(tps65090->dev, "IRQ base not set\n");
-		return -EINVAL;
-	}
-
-	mutex_init(&tps65090->irq_lock);
-
-	for (i = 0; i < NUM_INT_REG; i++)
-		tps65090_write(tps65090->dev, TPS65090_INT_MSK + i, 0xFF);
-
-	for (i = 0; i < NUM_INT_REG; i++)
-		tps65090_write(tps65090->dev, TPS65090_INT_STS + i, 0xff);
-
-	tps65090->irq_base = irq_base;
-	tps65090->irq_chip.name = "tps65090";
-	tps65090->irq_chip.irq_mask = tps65090_irq_mask;
-	tps65090->irq_chip.irq_unmask = tps65090_irq_unmask;
-	tps65090->irq_chip.irq_bus_lock = tps65090_irq_lock;
-	tps65090->irq_chip.irq_bus_sync_unlock = tps65090_irq_sync_unlock;
-
-	for (i = 0; i < ARRAY_SIZE(tps65090_irqs); i++) {
-		int __irq = i + tps65090->irq_base;
-		irq_set_chip_data(__irq, tps65090);
-		irq_set_chip_and_handler(__irq, &tps65090->irq_chip,
-					 handle_simple_irq);
-		irq_set_nested_thread(__irq, 1);
-#ifdef CONFIG_ARM
-		set_irq_flags(__irq, IRQF_VALID);
-#endif
-	}
-
-	ret = request_threaded_irq(irq, NULL, tps65090_irq, IRQF_ONESHOT,
-				"tps65090", tps65090);
-	if (!ret) {
-		device_init_wakeup(tps65090->dev, 1);
-		enable_irq_wake(irq);
-	}
-
-	return ret;
-}
+static struct regmap_irq_chip tps65090_irq_chip = {
+	.name = "tps65090",
+	.irqs = tps65090_irqs,
+	.num_irqs = ARRAY_SIZE(tps65090_irqs),
+	.num_regs = NUM_INT_REG,
+	.status_base = TPS65090_INT_STS,
+	.mask_base = TPS65090_INT_MSK,
+	.mask_invert = true,
+};
 
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
-	if (reg == TPS65090_INT_STS)
+	if ((reg == TPS65090_INT_STS) || (reg == TPS65090_INT_STS2))
 		return true;
 	else
 		return false;
@@ -263,36 +160,36 @@
 		return -EINVAL;
 	}
 
-	tps65090 = devm_kzalloc(&client->dev, sizeof(struct tps65090),
-		GFP_KERNEL);
-	if (tps65090 == NULL)
+	tps65090 = devm_kzalloc(&client->dev, sizeof(*tps65090), GFP_KERNEL);
+	if (!tps65090) {
+		dev_err(&client->dev, "mem alloc for tps65090 failed\n");
 		return -ENOMEM;
+	}
 
-	tps65090->client = client;
 	tps65090->dev = &client->dev;
 	i2c_set_clientdata(client, tps65090);
 
-	mutex_init(&tps65090->lock);
-
-	if (client->irq) {
-		ret = tps65090_irq_init(tps65090, client->irq, pdata->irq_base);
-		if (ret) {
-			dev_err(&client->dev, "IRQ init failed with err: %d\n",
-				ret);
-			goto err_exit;
-		}
-	}
-
-	tps65090->rmap = devm_regmap_init_i2c(tps65090->client,
-					      &tps65090_regmap_config);
+	tps65090->rmap = devm_regmap_init_i2c(client, &tps65090_regmap_config);
 	if (IS_ERR(tps65090->rmap)) {
 		ret = PTR_ERR(tps65090->rmap);
 		dev_err(&client->dev, "regmap_init failed with err: %d\n", ret);
-		goto err_irq_exit;
+		return ret;
+	}
+
+	if (client->irq) {
+		ret = regmap_add_irq_chip(tps65090->rmap, client->irq,
+			IRQF_ONESHOT | IRQF_TRIGGER_LOW, pdata->irq_base,
+			&tps65090_irq_chip, &tps65090->irq_data);
+			if (ret) {
+				dev_err(&client->dev,
+					"IRQ init failed with err: %d\n", ret);
+			return ret;
+		}
 	}
 
 	ret = mfd_add_devices(tps65090->dev, -1, tps65090s,
-			      ARRAY_SIZE(tps65090s), NULL, 0, NULL);
+		ARRAY_SIZE(tps65090s), NULL,
+		regmap_irq_chip_get_base(tps65090->irq_data), NULL);
 	if (ret) {
 		dev_err(&client->dev, "add mfd devices failed with err: %d\n",
 			ret);
@@ -303,8 +200,7 @@
 
 err_irq_exit:
 	if (client->irq)
-		free_irq(client->irq, tps65090);
-err_exit:
+		regmap_del_irq_chip(client->irq, tps65090->irq_data);
 	return ret;
 }
 
@@ -314,7 +210,7 @@
 
 	mfd_remove_devices(tps65090->dev);
 	if (client->irq)
-		free_irq(client->irq, tps65090);
+		regmap_del_irq_chip(client->irq, tps65090->irq_data);
 
 	return 0;
 }

diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index e14e252..b8f4864 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c

@@ -160,6 +160,7 @@
 	unsigned int version;
 	unsigned int chip_id = ids->driver_data;
 	const struct of_device_id *match;
+	bool status_off = false;
 	int ret;
 
 	if (client->dev.of_node) {
@@ -170,6 +171,8 @@
 			return -EINVAL;
 		}
 		chip_id = (unsigned int)match->data;
+		status_off = of_property_read_bool(client->dev.of_node,
+					"ti,pmic-shutdown-controller");
 	}
 
 	if (!chip_id) {
@@ -207,6 +210,15 @@
 		return ret;
 	}
 
+	/* Set the PMIC to shutdown on PWR_EN toggle */
+	if (status_off) {
+		ret = tps65217_set_bits(tps, TPS65217_REG_STATUS,
+				TPS65217_STATUS_OFF, TPS65217_STATUS_OFF,
+				TPS65217_PROTECT_NONE);
+		if (ret)
+			dev_warn(tps->dev, "unable to set the status OFF\n");
+	}
+
 	dev_info(tps->dev, "TPS65217 ID %#x version 1.%d\n",
 			(version & TPS65217_CHIPID_CHIP_MASK) >> 4,
 			version & TPS65217_CHIPID_REV_MASK);

diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 87ba7ad..721b918 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c

@@ -17,12 +17,14 @@
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 #include <linux/mfd/core.h>
@@ -92,6 +94,14 @@
 	[TPS6586X_INT_RTC_ALM2] = TPS6586X_IRQ(TPS6586X_INT_MASK4, 1 << 1),
 };
 
+static struct resource tps6586x_rtc_resources[] = {
+	{
+		.start  = TPS6586X_INT_RTC_ALM1,
+		.end	= TPS6586X_INT_RTC_ALM1,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
 static struct mfd_cell tps6586x_cell[] = {
 	{
 		.name = "tps6586x-gpio",
@@ -101,6 +111,8 @@
 	},
 	{
 		.name = "tps6586x-rtc",
+		.num_resources = ARRAY_SIZE(tps6586x_rtc_resources),
+		.resources = &tps6586x_rtc_resources[0],
 	},
 	{
 		.name = "tps6586x-onkey",
@@ -117,6 +129,7 @@
 	int			irq_base;
 	u32			irq_en;
 	u8			mask_reg[5];
+	struct irq_domain	*irq_domain;
 };
 
 static inline struct tps6586x *dev_to_tps6586x(struct device *dev)
@@ -185,6 +198,14 @@
 }
 EXPORT_SYMBOL_GPL(tps6586x_update);
 
+int tps6586x_irq_get_virq(struct device *dev, int irq)
+{
+	struct tps6586x *tps6586x = dev_to_tps6586x(dev);
+
+	return irq_create_mapping(tps6586x->irq_domain, irq);
+}
+EXPORT_SYMBOL_GPL(tps6586x_irq_get_virq);
+
 static int __remove_subdev(struct device *dev, void *unused)
 {
 	platform_device_unregister(to_platform_device(dev));
@@ -206,7 +227,7 @@
 static void tps6586x_irq_enable(struct irq_data *irq_data)
 {
 	struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->irq - tps6586x->irq_base;
+	unsigned int __irq = irq_data->hwirq;
 	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
 
 	tps6586x->mask_reg[data->mask_reg] &= ~data->mask_mask;
@@ -217,7 +238,7 @@
 {
 	struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data);
 
-	unsigned int __irq = irq_data->irq - tps6586x->irq_base;
+	unsigned int __irq = irq_data->hwirq;
 	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
 
 	tps6586x->mask_reg[data->mask_reg] |= data->mask_mask;
@@ -240,6 +261,39 @@
 	mutex_unlock(&tps6586x->irq_lock);
 }
 
+static struct irq_chip tps6586x_irq_chip = {
+	.name = "tps6586x",
+	.irq_bus_lock = tps6586x_irq_lock,
+	.irq_bus_sync_unlock = tps6586x_irq_sync_unlock,
+	.irq_disable = tps6586x_irq_disable,
+	.irq_enable = tps6586x_irq_enable,
+};
+
+static int tps6586x_irq_map(struct irq_domain *h, unsigned int virq,
+				irq_hw_number_t hw)
+{
+	struct tps6586x *tps6586x = h->host_data;
+
+	irq_set_chip_data(virq, tps6586x);
+	irq_set_chip_and_handler(virq, &tps6586x_irq_chip, handle_simple_irq);
+	irq_set_nested_thread(virq, 1);
+
+	/* ARM needs us to explicitly flag the IRQ as valid
+	 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+	set_irq_flags(virq, IRQF_VALID);
+#else
+	irq_set_noprobe(virq);
+#endif
+
+	return 0;
+}
+
+static struct irq_domain_ops tps6586x_domain_ops = {
+	.map    = tps6586x_irq_map,
+	.xlate  = irq_domain_xlate_twocell,
+};
+
 static irqreturn_t tps6586x_irq(int irq, void *data)
 {
 	struct tps6586x *tps6586x = data;
@@ -260,7 +314,8 @@
 		int i = __ffs(acks);
 
 		if (tps6586x->irq_en & (1 << i))
-			handle_nested_irq(tps6586x->irq_base + i);
+			handle_nested_irq(
+				irq_find_mapping(tps6586x->irq_domain, i));
 
 		acks &= ~(1 << i);
 	}
@@ -273,11 +328,8 @@
 {
 	int i, ret;
 	u8 tmp[4];
-
-	if (!irq_base) {
-		dev_warn(tps6586x->dev, "No interrupt support on IRQ base\n");
-		return -EINVAL;
-	}
+	int new_irq_base;
+	int irq_num = ARRAY_SIZE(tps6586x_irqs);
 
 	mutex_init(&tps6586x->irq_lock);
 	for (i = 0; i < 5; i++) {
@@ -287,25 +339,24 @@
 
 	tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, sizeof(tmp), tmp);
 
-	tps6586x->irq_base = irq_base;
-
-	tps6586x->irq_chip.name = "tps6586x";
-	tps6586x->irq_chip.irq_enable = tps6586x_irq_enable;
-	tps6586x->irq_chip.irq_disable = tps6586x_irq_disable;
-	tps6586x->irq_chip.irq_bus_lock = tps6586x_irq_lock;
-	tps6586x->irq_chip.irq_bus_sync_unlock = tps6586x_irq_sync_unlock;
-
-	for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) {
-		int __irq = i + tps6586x->irq_base;
-		irq_set_chip_data(__irq, tps6586x);
-		irq_set_chip_and_handler(__irq, &tps6586x->irq_chip,
-					 handle_simple_irq);
-		irq_set_nested_thread(__irq, 1);
-#ifdef CONFIG_ARM
-		set_irq_flags(__irq, IRQF_VALID);
-#endif
+	if  (irq_base > 0) {
+		new_irq_base = irq_alloc_descs(irq_base, 0, irq_num, -1);
+		if (new_irq_base < 0) {
+			dev_err(tps6586x->dev,
+				"Failed to alloc IRQs: %d\n", new_irq_base);
+			return new_irq_base;
+		}
+	} else {
+		new_irq_base = 0;
 	}
 
+	tps6586x->irq_domain = irq_domain_add_simple(tps6586x->dev->of_node,
+				irq_num, new_irq_base, &tps6586x_domain_ops,
+				tps6586x);
+	if (!tps6586x->irq_domain) {
+		dev_err(tps6586x->dev, "Failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
 	ret = request_threaded_irq(irq, NULL, tps6586x_irq, IRQF_ONESHOT,
 				   "tps6586x", tps6586x);
 
@@ -461,7 +512,7 @@
 
 	ret = mfd_add_devices(tps6586x->dev, -1,
 			      tps6586x_cell, ARRAY_SIZE(tps6586x_cell),
-			      NULL, 0, NULL);
+			      NULL, 0, tps6586x->irq_domain);
 	if (ret < 0) {
 		dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret);
 		goto err_mfd_add;

diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
deleted file mode 100644
index 09aab3e4..0000000
--- a/drivers/mfd/tps65910-irq.c
+++ /dev/null

@@ -1,260 +0,0 @@
-/*
- * tps65910-irq.c  --  TI TPS6591x
- *
- * Copyright 2010 Texas Instruments Inc.
- *
- * Author: Graeme Gregory <gg@slimlogic.co.uk>
- * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bug.h>
-#include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/gpio.h>
-#include <linux/mfd/tps65910.h>
-
-/*
- * This is a threaded IRQ handler so can access I2C/SPI.  Since all
- * interrupts are clear on read the IRQ line will be reasserted and
- * the physical IRQ will be handled again if another interrupt is
- * asserted while we run - in the normal course of events this is a
- * rare occurrence so we save I2C/SPI reads.  We're also assuming that
- * it's rare to get lots of interrupts firing simultaneously so try to
- * minimise I/O.
- */
-static irqreturn_t tps65910_irq(int irq, void *irq_data)
-{
-	struct tps65910 *tps65910 = irq_data;
-	unsigned int reg;
-	u32 irq_sts;
-	u32 irq_mask;
-	int i;
-
-	tps65910_reg_read(tps65910, TPS65910_INT_STS, &reg);
-	irq_sts = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_STS2, &reg);
-	irq_sts |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_STS3, &reg);
-		irq_sts |= reg << 16;
-	}
-
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
-	irq_mask = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
-	irq_mask |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
-		irq_mask |= reg << 16;
-	}
-
-	irq_sts &= ~irq_mask;
-
-	if (!irq_sts)
-		return IRQ_NONE;
-
-	for (i = 0; i < tps65910->irq_num; i++) {
-
-		if (!(irq_sts & (1 << i)))
-			continue;
-
-		handle_nested_irq(irq_find_mapping(tps65910->domain, i));
-	}
-
-	/* Write the STS register back to clear IRQs we handled */
-	reg = irq_sts & 0xFF;
-	irq_sts >>= 8;
-	tps65910_reg_write(tps65910, TPS65910_INT_STS, reg);
-	reg = irq_sts & 0xFF;
-	tps65910_reg_write(tps65910, TPS65910_INT_STS2, reg);
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		reg = irq_sts >> 8;
-		tps65910_reg_write(tps65910, TPS65910_INT_STS3, reg);
-	}
-
-	return IRQ_HANDLED;
-}
-
-static void tps65910_irq_lock(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&tps65910->irq_lock);
-}
-
-static void tps65910_irq_sync_unlock(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-	u32 reg_mask;
-	unsigned int reg;
-
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
-	reg_mask = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
-	reg_mask |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
-		reg_mask |= reg << 16;
-	}
-
-	if (tps65910->irq_mask != reg_mask) {
-		reg = tps65910->irq_mask & 0xFF;
-		tps65910_reg_write(tps65910, TPS65910_INT_MSK, reg);
-		reg = tps65910->irq_mask >> 8 & 0xFF;
-		tps65910_reg_write(tps65910, TPS65910_INT_MSK2, reg);
-		switch (tps65910_chip_id(tps65910)) {
-		case TPS65911:
-			reg = tps65910->irq_mask >> 16;
-			tps65910_reg_write(tps65910, TPS65910_INT_MSK3, reg);
-		}
-	}
-	mutex_unlock(&tps65910->irq_lock);
-}
-
-static void tps65910_irq_enable(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	tps65910->irq_mask &= ~(1 << data->hwirq);
-}
-
-static void tps65910_irq_disable(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	tps65910->irq_mask |= (1 << data->hwirq);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int tps65910_irq_set_wake(struct irq_data *data, unsigned int enable)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-	return irq_set_irq_wake(tps65910->chip_irq, enable);
-}
-#else
-#define tps65910_irq_set_wake NULL
-#endif
-
-static struct irq_chip tps65910_irq_chip = {
-	.name = "tps65910",
-	.irq_bus_lock = tps65910_irq_lock,
-	.irq_bus_sync_unlock = tps65910_irq_sync_unlock,
-	.irq_disable = tps65910_irq_disable,
-	.irq_enable = tps65910_irq_enable,
-	.irq_set_wake = tps65910_irq_set_wake,
-};
-
-static int tps65910_irq_map(struct irq_domain *h, unsigned int virq,
-				irq_hw_number_t hw)
-{
-	struct tps65910 *tps65910 = h->host_data;
-
-	irq_set_chip_data(virq, tps65910);
-	irq_set_chip_and_handler(virq, &tps65910_irq_chip, handle_edge_irq);
-	irq_set_nested_thread(virq, 1);
-
-	/* ARM needs us to explicitly flag the IRQ as valid
-	 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-	set_irq_flags(virq, IRQF_VALID);
-#else
-	irq_set_noprobe(virq);
-#endif
-
-	return 0;
-}
-
-static struct irq_domain_ops tps65910_domain_ops = {
-	.map	= tps65910_irq_map,
-	.xlate	= irq_domain_xlate_twocell,
-};
-
-int tps65910_irq_init(struct tps65910 *tps65910, int irq,
-		    struct tps65910_platform_data *pdata)
-{
-	int ret;
-	int flags = IRQF_ONESHOT;
-
-	if (!irq) {
-		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
-		return -EINVAL;
-	}
-
-	if (!pdata) {
-		dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
-		return -EINVAL;
-	}
-
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65910:
-		tps65910->irq_num = TPS65910_NUM_IRQ;
-		break;
-	case TPS65911:
-		tps65910->irq_num = TPS65911_NUM_IRQ;
-		break;
-	}
-
-	if (pdata->irq_base > 0) {
-		pdata->irq_base = irq_alloc_descs(pdata->irq_base, 0,
-					tps65910->irq_num, -1);
-		if (pdata->irq_base < 0) {
-			dev_warn(tps65910->dev, "Failed to alloc IRQs: %d\n",
-					pdata->irq_base);
-			return pdata->irq_base;
-		}
-	}
-
-	tps65910->irq_mask = 0xFFFFFF;
-
-	mutex_init(&tps65910->irq_lock);
-	tps65910->chip_irq = irq;
-	tps65910->irq_base = pdata->irq_base;
-
-	if (pdata->irq_base > 0)
-		tps65910->domain = irq_domain_add_legacy(tps65910->dev->of_node,
-					tps65910->irq_num,
-					pdata->irq_base,
-					0,
-					&tps65910_domain_ops, tps65910);
-	else
-		tps65910->domain = irq_domain_add_linear(tps65910->dev->of_node,
-					tps65910->irq_num,
-					&tps65910_domain_ops, tps65910);
-
-	if (!tps65910->domain) {
-		dev_err(tps65910->dev, "Failed to create IRQ domain\n");
-		return -ENOMEM;
-	}
-
-	ret = request_threaded_irq(irq, NULL, tps65910_irq, flags,
-				   "tps65910", tps65910);
-
-	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
-
-	if (ret != 0)
-		dev_err(tps65910->dev, "Failed to request IRQ: %d\n", ret);
-
-	return ret;
-}
-
-int tps65910_irq_exit(struct tps65910 *tps65910)
-{
-	if (tps65910->chip_irq)
-		free_irq(tps65910->chip_irq, tps65910);
-	return 0;
-}

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index ce05465..d792772 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c

@@ -19,6 +19,9 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/mfd/core.h>
 #include <linux/regmap.h>
 #include <linux/mfd/tps65910.h>
@@ -50,6 +53,219 @@
 };
 
 
+static const struct regmap_irq tps65911_irqs[] = {
+	/* INT_STS */
+	[TPS65911_IRQ_PWRHOLD_F] = {
+		.mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_VBAT_VMHI] = {
+		.mask = INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON] = {
+		.mask = INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON_LP] = {
+		.mask = INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRHOLD_R] = {
+		.mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_HOTDIE] = {
+		.mask = INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_ALARM] = {
+		.mask = INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_PERIOD] = {
+		.mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO0_R] = {
+		.mask = INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO0_F] = {
+		.mask = INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_R] = {
+		.mask = INT_MSK2_GPIO1_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_F] = {
+		.mask = INT_MSK2_GPIO1_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_R] = {
+		.mask = INT_MSK2_GPIO2_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_F] = {
+		.mask = INT_MSK2_GPIO2_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_R] = {
+		.mask = INT_MSK2_GPIO3_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_F] = {
+		.mask = INT_MSK2_GPIO3_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO4_R] = {
+		.mask = INT_MSK3_GPIO4_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO4_F] = {
+		.mask = INT_MSK3_GPIO4_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_R] = {
+		.mask = INT_MSK3_GPIO5_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_F] = {
+		.mask = INT_MSK3_GPIO5_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_WTCHDG] = {
+		.mask = INT_MSK3_WTCHDG_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_H] = {
+		.mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_L] = {
+		.mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_PWRDN] = {
+		.mask = INT_MSK3_PWRDN_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+};
+
+static const struct regmap_irq tps65910_irqs[] = {
+	/* INT_STS */
+	[TPS65910_IRQ_VBAT_VMBDCH] = {
+		.mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_VBAT_VMHI] = {
+		.mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON] = {
+		.mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON_LP] = {
+		.mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRHOLD] = {
+		.mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_HOTDIE] = {
+		.mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_ALARM] = {
+		.mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_PERIOD] = {
+		.mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65910_IRQ_GPIO_R] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65910_IRQ_GPIO_F] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+};
+
+static struct regmap_irq_chip tps65911_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65911_irqs,
+	.num_irqs = ARRAY_SIZE(tps65911_irqs),
+	.num_regs = 3,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_STS,
+};
+
+static struct regmap_irq_chip tps65910_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65910_irqs,
+	.num_irqs = ARRAY_SIZE(tps65910_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_STS,
+};
+
+static int tps65910_irq_init(struct tps65910 *tps65910, int irq,
+		    struct tps65910_platform_data *pdata)
+{
+	int ret = 0;
+	static struct regmap_irq_chip *tps6591x_irqs_chip;
+
+	if (!irq) {
+		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
+		return -EINVAL;
+	}
+
+	if (!pdata) {
+		dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
+		return -EINVAL;
+	}
+
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65910:
+		tps6591x_irqs_chip = &tps65910_irq_chip;
+		break;
+	case TPS65911:
+		tps6591x_irqs_chip = &tps65911_irq_chip;
+		break;
+	}
+
+	tps65910->chip_irq = irq;
+	ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
+		IRQF_ONESHOT, pdata->irq_base,
+		tps6591x_irqs_chip, &tps65910->irq_data);
+	if (ret < 0)
+		dev_warn(tps65910->dev, "Failed to add irq_chip %d\n", ret);
+	return ret;
+}
+
+static int tps65910_irq_exit(struct tps65910 *tps65910)
+{
+	if (tps65910->chip_irq > 0)
+		regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
+	return 0;
+}
+
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
 	struct tps65910 *tps65910 = dev_get_drvdata(dev);
@@ -270,7 +486,6 @@
 	tps65910->dev = &i2c->dev;
 	tps65910->i2c_client = i2c;
 	tps65910->id = chip_id;
-	mutex_init(&tps65910->io_mutex);
 
 	tps65910->regmap = devm_regmap_init_i2c(i2c, &tps65910_regmap_config);
 	if (IS_ERR(tps65910->regmap)) {
@@ -279,14 +494,6 @@
 		return ret;
 	}
 
-	ret = mfd_add_devices(tps65910->dev, -1,
-			      tps65910s, ARRAY_SIZE(tps65910s),
-			      NULL, 0, NULL);
-	if (ret < 0) {
-		dev_err(&i2c->dev, "mfd_add_devices failed: %d\n", ret);
-		return ret;
-	}
-
 	init_data->irq = pmic_plat_data->irq;
 	init_data->irq_base = pmic_plat_data->irq_base;
 
@@ -299,6 +506,15 @@
 		pm_power_off = tps65910_power_off;
 	}
 
+	ret = mfd_add_devices(tps65910->dev, -1,
+			      tps65910s, ARRAY_SIZE(tps65910s),
+			      NULL, 0,
+			      regmap_irq_get_domain(tps65910->irq_data));
+	if (ret < 0) {
+		dev_err(&i2c->dev, "mfd_add_devices failed: %d\n", ret);
+		return ret;
+	}
+
 	return ret;
 }
 

diff --git a/drivers/mfd/tps80031.c b/drivers/mfd/tps80031.c
new file mode 100644
index 0000000..10b51f7
--- /dev/null
+++ b/drivers/mfd/tps80031.c

@@ -0,0 +1,574 @@
+/*
+ * tps80031.c -- TI TPS80031/TPS80032 mfd core driver.
+ *
+ * MFD core driver for TI TPS80031/TPS80032 Fully Integrated
+ * Power Management with Power Path and Battery Charger
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/tps80031.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+static struct resource tps80031_rtc_resources[] = {
+	{
+		.start = TPS80031_INT_RTC_ALARM,
+		.end = TPS80031_INT_RTC_ALARM,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/* TPS80031 sub mfd devices */
+static struct mfd_cell tps80031_cell[] = {
+	{
+		.name = "tps80031-pmic",
+	},
+	{
+		.name = "tps80031-clock",
+	},
+	{
+		.name = "tps80031-rtc",
+		.num_resources = ARRAY_SIZE(tps80031_rtc_resources),
+		.resources = tps80031_rtc_resources,
+	},
+	{
+		.name = "tps80031-gpadc",
+	},
+	{
+		.name = "tps80031-fuel-gauge",
+	},
+	{
+		.name = "tps80031-charger",
+	},
+};
+
+static int tps80031_slave_address[TPS80031_NUM_SLAVES] = {
+	TPS80031_I2C_ID0_ADDR,
+	TPS80031_I2C_ID1_ADDR,
+	TPS80031_I2C_ID2_ADDR,
+	TPS80031_I2C_ID3_ADDR,
+};
+
+struct tps80031_pupd_data {
+	u8	reg;
+	u8	pullup_bit;
+	u8	pulldown_bit;
+};
+
+#define TPS80031_IRQ(_reg, _mask)			\
+	{							\
+		.reg_offset = (TPS80031_INT_MSK_LINE_##_reg) -	\
+				TPS80031_INT_MSK_LINE_A,	\
+		.mask = BIT(_mask),				\
+	}
+
+static const struct regmap_irq tps80031_main_irqs[] = {
+	[TPS80031_INT_PWRON]		= TPS80031_IRQ(A, 0),
+	[TPS80031_INT_RPWRON]		= TPS80031_IRQ(A, 1),
+	[TPS80031_INT_SYS_VLOW]		= TPS80031_IRQ(A, 2),
+	[TPS80031_INT_RTC_ALARM]	= TPS80031_IRQ(A, 3),
+	[TPS80031_INT_RTC_PERIOD]	= TPS80031_IRQ(A, 4),
+	[TPS80031_INT_HOT_DIE]		= TPS80031_IRQ(A, 5),
+	[TPS80031_INT_VXX_SHORT]	= TPS80031_IRQ(A, 6),
+	[TPS80031_INT_SPDURATION]	= TPS80031_IRQ(A, 7),
+	[TPS80031_INT_WATCHDOG]		= TPS80031_IRQ(B, 0),
+	[TPS80031_INT_BAT]		= TPS80031_IRQ(B, 1),
+	[TPS80031_INT_SIM]		= TPS80031_IRQ(B, 2),
+	[TPS80031_INT_MMC]		= TPS80031_IRQ(B, 3),
+	[TPS80031_INT_RES]		= TPS80031_IRQ(B, 4),
+	[TPS80031_INT_GPADC_RT]		= TPS80031_IRQ(B, 5),
+	[TPS80031_INT_GPADC_SW2_EOC]	= TPS80031_IRQ(B, 6),
+	[TPS80031_INT_CC_AUTOCAL]	= TPS80031_IRQ(B, 7),
+	[TPS80031_INT_ID_WKUP]		= TPS80031_IRQ(C, 0),
+	[TPS80031_INT_VBUSS_WKUP]	= TPS80031_IRQ(C, 1),
+	[TPS80031_INT_ID]		= TPS80031_IRQ(C, 2),
+	[TPS80031_INT_VBUS]		= TPS80031_IRQ(C, 3),
+	[TPS80031_INT_CHRG_CTRL]	= TPS80031_IRQ(C, 4),
+	[TPS80031_INT_EXT_CHRG]		= TPS80031_IRQ(C, 5),
+	[TPS80031_INT_INT_CHRG]		= TPS80031_IRQ(C, 6),
+	[TPS80031_INT_RES2]		= TPS80031_IRQ(C, 7),
+};
+
+static struct regmap_irq_chip tps80031_irq_chip = {
+	.name = "tps80031",
+	.irqs = tps80031_main_irqs,
+	.num_irqs = ARRAY_SIZE(tps80031_main_irqs),
+	.num_regs = 3,
+	.status_base = TPS80031_INT_STS_A,
+	.mask_base = TPS80031_INT_MSK_LINE_A,
+};
+
+#define PUPD_DATA(_reg, _pulldown_bit, _pullup_bit)	\
+	{						\
+		.reg = TPS80031_CFG_INPUT_PUPD##_reg,	\
+		.pulldown_bit = _pulldown_bit,		\
+		.pullup_bit = _pullup_bit,		\
+	}
+
+static const struct tps80031_pupd_data tps80031_pupds[] = {
+	[TPS80031_PREQ1]		= PUPD_DATA(1, BIT(0),	BIT(1)),
+	[TPS80031_PREQ2A]		= PUPD_DATA(1, BIT(2),	BIT(3)),
+	[TPS80031_PREQ2B]		= PUPD_DATA(1, BIT(4),	BIT(5)),
+	[TPS80031_PREQ2C]		= PUPD_DATA(1, BIT(6),	BIT(7)),
+	[TPS80031_PREQ3]		= PUPD_DATA(2, BIT(0),	BIT(1)),
+	[TPS80031_NRES_WARM]		= PUPD_DATA(2, 0,	BIT(2)),
+	[TPS80031_PWM_FORCE]		= PUPD_DATA(2, BIT(5),	0),
+	[TPS80031_CHRG_EXT_CHRG_STATZ]	= PUPD_DATA(2, 0,	BIT(6)),
+	[TPS80031_SIM]			= PUPD_DATA(3, BIT(0),	BIT(1)),
+	[TPS80031_MMC]			= PUPD_DATA(3, BIT(2),	BIT(3)),
+	[TPS80031_GPADC_START]		= PUPD_DATA(3, BIT(4),	0),
+	[TPS80031_DVSI2C_SCL]		= PUPD_DATA(4, 0,	BIT(0)),
+	[TPS80031_DVSI2C_SDA]		= PUPD_DATA(4, 0,	BIT(1)),
+	[TPS80031_CTLI2C_SCL]		= PUPD_DATA(4, 0,	BIT(2)),
+	[TPS80031_CTLI2C_SDA]		= PUPD_DATA(4, 0,	BIT(3)),
+};
+static struct tps80031 *tps80031_power_off_dev;
+
+int tps80031_ext_power_req_config(struct device *dev,
+		unsigned long ext_ctrl_flag, int preq_bit,
+		int state_reg_add, int trans_reg_add)
+{
+	u8 res_ass_reg = 0;
+	int preq_mask_bit = 0;
+	int ret;
+
+	if (!(ext_ctrl_flag & TPS80031_EXT_PWR_REQ))
+		return 0;
+
+	if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ1) {
+		res_ass_reg = TPS80031_PREQ1_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 5;
+	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ2) {
+		res_ass_reg = TPS80031_PREQ2_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 6;
+	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ3) {
+		res_ass_reg = TPS80031_PREQ3_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 7;
+	}
+
+	/* Configure REQ_ASS registers */
+	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, res_ass_reg,
+					BIT(preq_bit & 0x7));
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x setbit failed, err = %d\n",
+				res_ass_reg, ret);
+		return ret;
+	}
+
+	/* Unmask the PREQ */
+	ret = tps80031_clr_bits(dev, TPS80031_SLAVE_ID1,
+			TPS80031_PHOENIX_MSK_TRANSITION, BIT(preq_mask_bit));
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x clrbit failed, err = %d\n",
+			TPS80031_PHOENIX_MSK_TRANSITION, ret);
+		return ret;
+	}
+
+	/* Switch regulator control to resource now */
+	if (ext_ctrl_flag & (TPS80031_PWR_REQ_INPUT_PREQ2 |
+					TPS80031_PWR_REQ_INPUT_PREQ3)) {
+		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, state_reg_add,
+						0x0, TPS80031_STATE_MASK);
+		if (ret < 0)
+			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
+				state_reg_add, ret);
+	} else {
+		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, trans_reg_add,
+				TPS80031_TRANS_SLEEP_OFF,
+				TPS80031_TRANS_SLEEP_MASK);
+		if (ret < 0)
+			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
+				trans_reg_add, ret);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tps80031_ext_power_req_config);
+
+static void tps80031_power_off(void)
+{
+	dev_info(tps80031_power_off_dev->dev, "switching off PMU\n");
+	tps80031_write(tps80031_power_off_dev->dev, TPS80031_SLAVE_ID1,
+				TPS80031_PHOENIX_DEV_ON, TPS80031_DEVOFF);
+}
+
+static void tps80031_pupd_init(struct tps80031 *tps80031,
+			       struct tps80031_platform_data *pdata)
+{
+	struct tps80031_pupd_init_data *pupd_init_data = pdata->pupd_init_data;
+	int data_size = pdata->pupd_init_data_size;
+	int i;
+
+	for (i = 0; i < data_size; ++i) {
+		struct tps80031_pupd_init_data *pupd_init = &pupd_init_data[i];
+		const struct tps80031_pupd_data *pupd =
+			&tps80031_pupds[pupd_init->input_pin];
+		u8 update_value = 0;
+		u8 update_mask = pupd->pulldown_bit | pupd->pullup_bit;
+
+		if (pupd_init->setting == TPS80031_PUPD_PULLDOWN)
+			update_value = pupd->pulldown_bit;
+		else if (pupd_init->setting == TPS80031_PUPD_PULLUP)
+			update_value = pupd->pullup_bit;
+
+		tps80031_update(tps80031->dev, TPS80031_SLAVE_ID1, pupd->reg,
+				update_value, update_mask);
+	}
+}
+
+static int tps80031_init_ext_control(struct tps80031 *tps80031,
+			struct tps80031_platform_data *pdata)
+{
+	struct device *dev = tps80031->dev;
+	int ret;
+	int i;
+
+	/* Clear all external control for this rail */
+	for (i = 0; i < 9; ++i) {
+		ret = tps80031_write(dev, TPS80031_SLAVE_ID1,
+				TPS80031_PREQ1_RES_ASS_A + i, 0);
+		if (ret < 0) {
+			dev_err(dev, "reg 0x%02x write failed, err = %d\n",
+				TPS80031_PREQ1_RES_ASS_A + i, ret);
+			return ret;
+		}
+	}
+
+	/* Mask the PREQ */
+	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1,
+			TPS80031_PHOENIX_MSK_TRANSITION, 0x7 << 5);
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x set_bits failed, err = %d\n",
+			TPS80031_PHOENIX_MSK_TRANSITION, ret);
+		return ret;
+	}
+	return ret;
+}
+
+static int __devinit tps80031_irq_init(struct tps80031 *tps80031, int irq,
+				int irq_base)
+{
+	struct device *dev = tps80031->dev;
+	int i, ret;
+
+	/*
+	 * The MASK register used for updating status register when
+	 * interrupt occurs and LINE register used to pass the status
+	 * to actual interrupt line.  As per datasheet:
+	 * When INT_MSK_LINE [i] is set to 1, the associated interrupt
+	 * number i is INT line masked, which means that no interrupt is
+	 * generated on the INT line.
+	 * When INT_MSK_LINE [i] is set to 0, the associated interrupt
+	 * number i is  line enabled: An interrupt is generated on the
+	 * INT line.
+	 * In any case, the INT_STS [i] status bit may or may not be updated,
+	 * only linked to the INT_MSK_STS [i] configuration register bit.
+	 *
+	 * When INT_MSK_STS [i] is set to 1, the associated interrupt number
+	 * i is status masked, which means that no interrupt is stored in
+	 * the INT_STS[i] status bit. Note that no interrupt number i is
+	 * generated on the INT line, even if the INT_MSK_LINE [i] register
+	 * bit is set to 0.
+	 * When INT_MSK_STS [i] is set to 0, the associated interrupt number i
+	 * is status enabled: An interrupt status is updated in the INT_STS [i]
+	 * register. The interrupt may or may not be generated on the INT line,
+	 * depending on the INT_MSK_LINE [i] configuration register bit.
+	 */
+	for (i = 0; i < 3; i++)
+		tps80031_write(dev, TPS80031_SLAVE_ID2,
+				TPS80031_INT_MSK_STS_A + i, 0x00);
+
+	ret = regmap_add_irq_chip(tps80031->regmap[TPS80031_SLAVE_ID2], irq,
+			IRQF_ONESHOT, irq_base,
+			&tps80031_irq_chip, &tps80031->irq_data);
+	if (ret < 0) {
+		dev_err(dev, "add irq failed, err = %d\n", ret);
+		return ret;
+	}
+	return ret;
+}
+
+static bool rd_wr_reg_id0(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SMPS1_CFG_FORCE ... TPS80031_SMPS2_CFG_VOLTAGE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id1(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SECONDS_REG ... TPS80031_RTC_RESET_STATUS_REG:
+	case TPS80031_VALIDITY0 ... TPS80031_VALIDITY7:
+	case TPS80031_PHOENIX_START_CONDITION ... TPS80031_KEY_PRESS_DUR_CFG:
+	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
+	case TPS80031_BROADCAST_ADDR_ALL ... TPS80031_BROADCAST_ADDR_CLK_RST:
+	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
+	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
+	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
+	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
+	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
+	case TPS80031_BACKUP_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_volatile_reg_id1(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
+	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
+	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
+	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
+	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
+	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id2(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_USB_VENDOR_ID_LSB ... TPS80031_USB_OTG_REVISION:
+	case TPS80031_GPADC_CTRL ... TPS80031_CTRL_P1:
+	case TPS80031_RTCH0_LSB ... TPS80031_GPCH0_MSB:
+	case TPS80031_TOGGLE1 ... TPS80031_VIBMODE:
+	case TPS80031_PWM1ON ... TPS80031_PWM2OFF:
+	case TPS80031_FG_REG_00 ... TPS80031_FG_REG_11:
+	case TPS80031_INT_STS_A ... TPS80031_INT_MSK_STS_C:
+	case TPS80031_CONTROLLER_CTRL2 ... TPS80031_LED_PWM_CTRL2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id3(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_GPADC_TRIM0 ... TPS80031_GPADC_TRIM18:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config tps80031_regmap_configs[] = {
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id0,
+		.readable_reg = rd_wr_reg_id0,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id1,
+		.readable_reg = rd_wr_reg_id1,
+		.volatile_reg = is_volatile_reg_id1,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id2,
+		.readable_reg = rd_wr_reg_id2,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id3,
+		.readable_reg = rd_wr_reg_id3,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+};
+
+static int __devinit tps80031_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct tps80031_platform_data *pdata = client->dev.platform_data;
+	struct tps80031 *tps80031;
+	int ret;
+	uint8_t es_version;
+	uint8_t ep_ver;
+	int i;
+
+	if (!pdata) {
+		dev_err(&client->dev, "tps80031 requires platform data\n");
+		return -EINVAL;
+	}
+
+	tps80031 = devm_kzalloc(&client->dev, sizeof(*tps80031), GFP_KERNEL);
+	if (!tps80031) {
+		dev_err(&client->dev, "Malloc failed for tps80031\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031_slave_address[i] == client->addr)
+			tps80031->clients[i] = client;
+		else
+			tps80031->clients[i] = i2c_new_dummy(client->adapter,
+						tps80031_slave_address[i]);
+		if (!tps80031->clients[i]) {
+			dev_err(&client->dev, "can't attach client %d\n", i);
+			ret = -ENOMEM;
+			goto fail_client_reg;
+		}
+
+		i2c_set_clientdata(tps80031->clients[i], tps80031);
+		tps80031->regmap[i] = devm_regmap_init_i2c(tps80031->clients[i],
+					&tps80031_regmap_configs[i]);
+		if (IS_ERR(tps80031->regmap[i])) {
+			ret = PTR_ERR(tps80031->regmap[i]);
+			dev_err(&client->dev,
+				"regmap %d init failed, err %d\n", i, ret);
+			goto fail_client_reg;
+		}
+	}
+
+	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
+			TPS80031_JTAGVERNUM, &es_version);
+	if (ret < 0) {
+		dev_err(&client->dev,
+			"Silicon version number read failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
+			TPS80031_EPROM_REV, &ep_ver);
+	if (ret < 0) {
+		dev_err(&client->dev,
+			"Silicon eeprom version read failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n",
+					es_version, ep_ver);
+	tps80031->es_version = es_version;
+	tps80031->dev = &client->dev;
+	i2c_set_clientdata(client, tps80031);
+	tps80031->chip_info = id->driver_data;
+
+	ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base);
+	if (ret) {
+		dev_err(&client->dev, "IRQ init failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	tps80031_pupd_init(tps80031, pdata);
+
+	tps80031_init_ext_control(tps80031, pdata);
+
+	ret = mfd_add_devices(tps80031->dev, -1,
+			tps80031_cell, ARRAY_SIZE(tps80031_cell),
+			NULL, 0,
+			regmap_irq_get_domain(tps80031->irq_data));
+	if (ret < 0) {
+		dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret);
+		goto fail_mfd_add;
+	}
+
+	if (pdata->use_power_off && !pm_power_off) {
+		tps80031_power_off_dev = tps80031;
+		pm_power_off = tps80031_power_off;
+	}
+	return 0;
+
+fail_mfd_add:
+	regmap_del_irq_chip(client->irq, tps80031->irq_data);
+
+fail_client_reg:
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031->clients[i]  && (tps80031->clients[i] != client))
+			i2c_unregister_device(tps80031->clients[i]);
+	}
+	return ret;
+}
+
+static int __devexit tps80031_remove(struct i2c_client *client)
+{
+	struct tps80031 *tps80031 = i2c_get_clientdata(client);
+	int i;
+
+	if (tps80031_power_off_dev == tps80031) {
+		tps80031_power_off_dev = NULL;
+		pm_power_off = NULL;
+	}
+
+	mfd_remove_devices(tps80031->dev);
+
+	regmap_del_irq_chip(client->irq, tps80031->irq_data);
+
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031->clients[i] != client)
+			i2c_unregister_device(tps80031->clients[i]);
+	}
+	return 0;
+}
+
+static const struct i2c_device_id tps80031_id_table[] = {
+	{ "tps80031", TPS80031 },
+	{ "tps80032", TPS80032 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tps80031_id_table);
+
+static struct i2c_driver tps80031_driver = {
+	.driver	= {
+		.name	= "tps80031",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= tps80031_probe,
+	.remove		= __devexit_p(tps80031_remove),
+	.id_table	= tps80031_id_table,
+};
+
+static int __init tps80031_init(void)
+{
+	return i2c_add_driver(&tps80031_driver);
+}
+subsys_initcall(tps80031_init);
+
+static void __exit tps80031_exit(void)
+{
+	i2c_del_driver(&tps80031_driver);
+}
+module_exit(tps80031_exit);
+
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_DESCRIPTION("TPS80031 core driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 11b76c0..4f3baad 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c

@@ -32,6 +32,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/device.h>
@@ -65,9 +66,6 @@
 
 /* Triton Core internal information (BEGIN) */
 
-/* Last - for index max*/
-#define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
-
 #define TWL_NUM_SLAVES		4
 
 #define SUB_CHIP_ID0 0
@@ -171,13 +169,7 @@
 /* Structure for each TWL4030/TWL6030 Slave */
 struct twl_client {
 	struct i2c_client *client;
-	u8 address;
-
-	/* max numb of i2c_msg required is for read =2 */
-	struct i2c_msg xfer_msg[2];
-
-	/* To lock access to xfer_msg */
-	struct mutex xfer_lock;
+	struct regmap *regmap;
 };
 
 static struct twl_client twl_modules[TWL_NUM_SLAVES];
@@ -189,7 +181,7 @@
 };
 static struct twl_mapping *twl_map;
 
-static struct twl_mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
+static struct twl_mapping twl4030_map[] = {
 	/*
 	 * NOTE:  don't change this table without updating the
 	 * <linux/i2c/twl.h> defines for TWL4030_MODULE_*
@@ -197,34 +189,62 @@
 	 */
 
 	{ 0, TWL4030_BASEADD_USB },
-
 	{ 1, TWL4030_BASEADD_AUDIO_VOICE },
 	{ 1, TWL4030_BASEADD_GPIO },
 	{ 1, TWL4030_BASEADD_INTBR },
 	{ 1, TWL4030_BASEADD_PIH },
-	{ 1, TWL4030_BASEADD_TEST },
 
+	{ 1, TWL4030_BASEADD_TEST },
 	{ 2, TWL4030_BASEADD_KEYPAD },
 	{ 2, TWL4030_BASEADD_MADC },
 	{ 2, TWL4030_BASEADD_INTERRUPTS },
 	{ 2, TWL4030_BASEADD_LED },
+
 	{ 2, TWL4030_BASEADD_MAIN_CHARGE },
 	{ 2, TWL4030_BASEADD_PRECHARGE },
 	{ 2, TWL4030_BASEADD_PWM0 },
 	{ 2, TWL4030_BASEADD_PWM1 },
 	{ 2, TWL4030_BASEADD_PWMA },
+
 	{ 2, TWL4030_BASEADD_PWMB },
 	{ 2, TWL5031_BASEADD_ACCESSORY },
 	{ 2, TWL5031_BASEADD_INTERRUPTS },
-
 	{ 3, TWL4030_BASEADD_BACKUP },
 	{ 3, TWL4030_BASEADD_INT },
+
 	{ 3, TWL4030_BASEADD_PM_MASTER },
 	{ 3, TWL4030_BASEADD_PM_RECEIVER },
 	{ 3, TWL4030_BASEADD_RTC },
 	{ 3, TWL4030_BASEADD_SECURED_REG },
 };
 
+static struct regmap_config twl4030_regmap_config[4] = {
+	{
+		/* Address 0x48 */
+		.reg_bits = 8,
+		.val_bits = 8,
+		.max_register = 0xff,
+	},
+	{
+		/* Address 0x49 */
+		.reg_bits = 8,
+		.val_bits = 8,
+		.max_register = 0xff,
+	},
+	{
+		/* Address 0x4a */
+		.reg_bits = 8,
+		.val_bits = 8,
+		.max_register = 0xff,
+	},
+	{
+		/* Address 0x4b */
+		.reg_bits = 8,
+		.val_bits = 8,
+		.max_register = 0xff,
+	},
+};
+
 static struct twl_mapping twl6030_map[] = {
 	/*
 	 * NOTE:  don't change this table without updating the
@@ -254,14 +274,35 @@
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+
 	{ SUB_CHIP_ID0, TWL6030_BASEADD_PM_MASTER },
 	{ SUB_CHIP_ID0, TWL6030_BASEADD_PM_SLAVE_MISC },
-
 	{ SUB_CHIP_ID0, TWL6030_BASEADD_RTC },
 	{ SUB_CHIP_ID0, TWL6030_BASEADD_MEM },
 	{ SUB_CHIP_ID1, TWL6025_BASEADD_CHARGER },
 };
 
+static struct regmap_config twl6030_regmap_config[3] = {
+	{
+		/* Address 0x48 */
+		.reg_bits = 8,
+		.val_bits = 8,
+		.max_register = 0xff,
+	},
+	{
+		/* Address 0x49 */
+		.reg_bits = 8,
+		.val_bits = 8,
+		.max_register = 0xff,
+	},
+	{
+		/* Address 0x4a */
+		.reg_bits = 8,
+		.val_bits = 8,
+		.max_register = 0xff,
+	},
+};
+
 /*----------------------------------------------------------------------*/
 
 /* Exported Functions */
@@ -283,9 +324,8 @@
 	int ret;
 	int sid;
 	struct twl_client *twl;
-	struct i2c_msg *msg;
 
-	if (unlikely(mod_no > TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
@@ -301,32 +341,14 @@
 	}
 	twl = &twl_modules[sid];
 
-	mutex_lock(&twl->xfer_lock);
-	/*
-	 * [MSG1]: fill the register address data
-	 * fill the data Tx buffer
-	 */
-	msg = &twl->xfer_msg[0];
-	msg->addr = twl->address;
-	msg->len = num_bytes + 1;
-	msg->flags = 0;
-	msg->buf = value;
-	/* over write the first byte of buffer with the register address */
-	*value = twl_map[mod_no].base + reg;
-	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 1);
-	mutex_unlock(&twl->xfer_lock);
+	ret = regmap_bulk_write(twl->regmap, twl_map[mod_no].base + reg,
+				value, num_bytes);
 
-	/* i2c_transfer returns number of messages transferred */
-	if (ret != 1) {
-		pr_err("%s: i2c_write failed to transfer all messages\n",
-			DRIVER_NAME);
-		if (ret < 0)
-			return ret;
-		else
-			return -EIO;
-	} else {
-		return 0;
-	}
+	if (ret)
+		pr_err("%s: Write failed (mod %d, reg 0x%02x count %d)\n",
+		       DRIVER_NAME, mod_no, reg, num_bytes);
+
+	return ret;
 }
 EXPORT_SYMBOL(twl_i2c_write);
 
@@ -342,12 +364,10 @@
 int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 {
 	int ret;
-	u8 val;
 	int sid;
 	struct twl_client *twl;
-	struct i2c_msg *msg;
 
-	if (unlikely(mod_no > TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
@@ -363,34 +383,14 @@
 	}
 	twl = &twl_modules[sid];
 
-	mutex_lock(&twl->xfer_lock);
-	/* [MSG1] fill the register address data */
-	msg = &twl->xfer_msg[0];
-	msg->addr = twl->address;
-	msg->len = 1;
-	msg->flags = 0;	/* Read the register value */
-	val = twl_map[mod_no].base + reg;
-	msg->buf = &val;
-	/* [MSG2] fill the data rx buffer */
-	msg = &twl->xfer_msg[1];
-	msg->addr = twl->address;
-	msg->flags = I2C_M_RD;	/* Read the register value */
-	msg->len = num_bytes;	/* only n bytes */
-	msg->buf = value;
-	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 2);
-	mutex_unlock(&twl->xfer_lock);
+	ret = regmap_bulk_read(twl->regmap, twl_map[mod_no].base + reg,
+			       value, num_bytes);
 
-	/* i2c_transfer returns number of messages transferred */
-	if (ret != 2) {
-		pr_err("%s: i2c_read failed to transfer all messages\n",
-			DRIVER_NAME);
-		if (ret < 0)
-			return ret;
-		else
-			return -EIO;
-	} else {
-		return 0;
-	}
+	if (ret)
+		pr_err("%s: Read failed (mod %d, reg 0x%02x count %d)\n",
+		       DRIVER_NAME, mod_no, reg, num_bytes);
+
+	return ret;
 }
 EXPORT_SYMBOL(twl_i2c_read);
 
@@ -404,12 +404,7 @@
  */
 int twl_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
 {
-
-	/* 2 bytes offset 1 contains the data offset 0 is used by i2c_write */
-	u8 temp_buffer[2] = { 0 };
-	/* offset 1 contains the data */
-	temp_buffer[1] = value;
-	return twl_i2c_write(mod_no, temp_buffer, reg, 1);
+	return twl_i2c_write(mod_no, &value, reg, 1);
 }
 EXPORT_SYMBOL(twl_i2c_write_u8);
 
@@ -646,8 +641,9 @@
 			return PTR_ERR(child);
 	}
 
-	if (IS_ENABLED(CONFIG_TWL4030_MADC) && pdata->madc) {
-		child = add_child(2, "twl4030_madc",
+	if (IS_ENABLED(CONFIG_TWL4030_MADC) && pdata->madc &&
+	    twl_class_is_4030()) {
+		child = add_child(SUB_CHIP_ID2, "twl4030_madc",
 				pdata->madc, sizeof(*pdata->madc),
 				true, irq_base + MADC_INTR_OFFSET, 0);
 		if (IS_ERR(child))
@@ -663,15 +659,21 @@
 		 * HW security concerns, and "least privilege".
 		 */
 		sub_chip_id = twl_map[TWL_MODULE_RTC].sid;
-		child = add_child(sub_chip_id, "twl_rtc",
-				NULL, 0,
+		child = add_child(sub_chip_id, "twl_rtc", NULL, 0,
 				true, irq_base + RTC_INTR_OFFSET, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
 
-	if (IS_ENABLED(CONFIG_PWM_TWL6030) && twl_class_is_6030()) {
-		child = add_child(SUB_CHIP_ID1, "twl6030-pwm", NULL, 0,
+	if (IS_ENABLED(CONFIG_PWM_TWL)) {
+		child = add_child(SUB_CHIP_ID1, "twl-pwm", NULL, 0,
+				  false, 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (IS_ENABLED(CONFIG_PWM_TWL_LED)) {
+		child = add_child(SUB_CHIP_ID1, "twl-pwmled", NULL, 0,
 				  false, 0, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
@@ -723,9 +725,8 @@
 
 		}
 
-		child = add_child(0, "twl4030_usb",
-				pdata->usb, sizeof(*pdata->usb),
-				true,
+		child = add_child(SUB_CHIP_ID0, "twl4030_usb",
+				pdata->usb, sizeof(*pdata->usb), true,
 				/* irq0 = USB_PRES, irq1 = USB */
 				irq_base + USB_PRES_INTR_OFFSET,
 				irq_base + USB_INTR_OFFSET);
@@ -773,9 +774,8 @@
 
 		pdata->usb->features = features;
 
-		child = add_child(0, "twl6030_usb",
-			pdata->usb, sizeof(*pdata->usb),
-			true,
+		child = add_child(SUB_CHIP_ID0, "twl6030_usb",
+			pdata->usb, sizeof(*pdata->usb), true,
 			/* irq1 = VBUS_PRES, irq0 = USB ID */
 			irq_base + USBOTG_INTR_OFFSET,
 			irq_base + USB_PRES_INTR_OFFSET);
@@ -799,22 +799,22 @@
 	}
 
 	if (IS_ENABLED(CONFIG_TWL4030_WATCHDOG) && twl_class_is_4030()) {
-		child = add_child(0, "twl4030_wdt", NULL, 0, false, 0, 0);
+		child = add_child(SUB_CHIP_ID3, "twl4030_wdt", NULL, 0,
+				  false, 0, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
 
 	if (IS_ENABLED(CONFIG_INPUT_TWL4030_PWRBUTTON) && twl_class_is_4030()) {
-		child = add_child(1, "twl4030_pwrbutton",
-				NULL, 0, true, irq_base + 8 + 0, 0);
+		child = add_child(SUB_CHIP_ID3, "twl4030_pwrbutton", NULL, 0,
+				  true, irq_base + 8 + 0, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
 
 	if (IS_ENABLED(CONFIG_MFD_TWL4030_AUDIO) && pdata->audio &&
 	    twl_class_is_4030()) {
-		sub_chip_id = twl_map[TWL_MODULE_AUDIO_VOICE].sid;
-		child = add_child(sub_chip_id, "twl4030-audio",
+		child = add_child(SUB_CHIP_ID1, "twl4030-audio",
 				pdata->audio, sizeof(*pdata->audio),
 				false, 0, 0);
 		if (IS_ERR(child))
@@ -1054,7 +1054,7 @@
 
 	if (IS_ENABLED(CONFIG_CHARGER_TWL4030) && pdata->bci &&
 			!(features & (TPS_SUBSET | TWL5031))) {
-		child = add_child(3, "twl4030_bci",
+		child = add_child(SUB_CHIP_ID3, "twl4030_bci",
 				pdata->bci, sizeof(*pdata->bci), false,
 				/* irq0 = CHG_PRES, irq1 = BCI */
 				irq_base + BCI_PRES_INTR_OFFSET,
@@ -1077,8 +1077,8 @@
 {
 	int e = 0;
 
-	e = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	e = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, 0,
+			     TWL4030_PM_MASTER_PROTECT_KEY);
 	return e;
 }
 
@@ -1086,12 +1086,10 @@
 {
 	int e = 0;
 
-	e |= twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			TWL4030_PM_MASTER_KEY_CFG1,
-			TWL4030_PM_MASTER_PROTECT_KEY);
-	e |= twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			TWL4030_PM_MASTER_KEY_CFG2,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	e |= twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG1,
+			      TWL4030_PM_MASTER_PROTECT_KEY);
+	e |= twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG2,
+			      TWL4030_PM_MASTER_PROTECT_KEY);
 
 	return e;
 }
@@ -1176,6 +1174,7 @@
 	struct twl4030_platform_data	*pdata = client->dev.platform_data;
 	struct device_node		*node = client->dev.of_node;
 	struct platform_device		*pdev;
+	struct regmap_config		*twl_regmap_config;
 	int				irq_base = 0;
 	int				status;
 	unsigned			i, num_slaves;
@@ -1229,22 +1228,23 @@
 	if ((id->driver_data) & TWL6030_CLASS) {
 		twl_id = TWL6030_CLASS_ID;
 		twl_map = &twl6030_map[0];
+		twl_regmap_config = twl6030_regmap_config;
 		num_slaves = TWL_NUM_SLAVES - 1;
 	} else {
 		twl_id = TWL4030_CLASS_ID;
 		twl_map = &twl4030_map[0];
+		twl_regmap_config = twl4030_regmap_config;
 		num_slaves = TWL_NUM_SLAVES;
 	}
 
 	for (i = 0; i < num_slaves; i++) {
 		struct twl_client *twl = &twl_modules[i];
 
-		twl->address = client->addr + i;
 		if (i == 0) {
 			twl->client = client;
 		} else {
 			twl->client = i2c_new_dummy(client->adapter,
-					twl->address);
+						    client->addr + i);
 			if (!twl->client) {
 				dev_err(&client->dev,
 					"can't attach client %d\n", i);
@@ -1252,7 +1252,16 @@
 				goto fail;
 			}
 		}
-		mutex_init(&twl->xfer_lock);
+
+		twl->regmap = devm_regmap_init_i2c(twl->client,
+						   &twl_regmap_config[i]);
+		if (IS_ERR(twl->regmap)) {
+			status = PTR_ERR(twl->regmap);
+			dev_err(&client->dev,
+				"Failed to allocate regmap %d, err: %d\n", i,
+				status);
+			goto fail;
+		}
 	}
 
 	inuse = true;

diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index cdd1173..a5f9888 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c

@@ -295,8 +295,8 @@
 	irqreturn_t	ret;
 	u8		pih_isr;
 
-	ret = twl_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr,
-			REG_PIH_ISR_P1);
+	ret = twl_i2c_read_u8(TWL_MODULE_PIH, &pih_isr,
+			      REG_PIH_ISR_P1);
 	if (ret) {
 		pr_warning("twl4030: I2C error %d reading PIH ISR\n", ret);
 		return IRQ_NONE;
@@ -501,7 +501,7 @@
 		} imr;
 
 		/* byte[0] gets overwritten as we write ... */
-		imr.word = cpu_to_le32(agent->imr << 8);
+		imr.word = cpu_to_le32(agent->imr);
 		agent->imr_change_pending = false;
 
 		/* write the whole mask ... simpler than subsetting it */
@@ -526,7 +526,7 @@
 		 * any processor on the other IRQ line, EDR registers are
 		 * shared.
 		 */
-		status = twl_i2c_read(sih->module, bytes + 1,
+		status = twl_i2c_read(sih->module, bytes,
 				sih->edr_offset, sih->bytes_edr);
 		if (status) {
 			pr_err("twl4030: %s, %s --> %d\n", __func__,
@@ -538,7 +538,7 @@
 		while (edge_change) {
 			int		i = fls(edge_change) - 1;
 			struct irq_data	*idata;
-			int		byte = 1 + (i >> 2);
+			int		byte = i >> 2;
 			int		off = (i & 0x3) * 2;
 			unsigned int	type;
 

diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c
index a39dcf3..88ff9dc 100644
--- a/drivers/mfd/twl4030-madc.c
+++ b/drivers/mfd/twl4030-madc.c

@@ -173,7 +173,7 @@
 
 	volt = (raw_volt * TEMP_STEP_SIZE) / TEMP_PSR_R;
 	/* Getting and calculating the supply current in micro ampers */
-	ret = twl_i2c_read_u8(TWL4030_MODULE_MAIN_CHARGE, &val,
+	ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE, &val,
 		REG_BCICTL2);
 	if (ret < 0)
 		return ret;
@@ -196,7 +196,7 @@
 	int ret;
 	u8 val;
 
-	ret = twl_i2c_read_u8(TWL4030_MODULE_MAIN_CHARGE, &val,
+	ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE, &val,
 		TWL4030_BCI_BCICTL1);
 	if (ret)
 		return ret;
@@ -635,7 +635,7 @@
 	int ret;
 	u8 regval;
 
-	ret = twl_i2c_read_u8(TWL4030_MODULE_MAIN_CHARGE,
+	ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE,
 			      &regval, TWL4030_BCI_BCICTL1);
 	if (ret) {
 		dev_err(madc->dev, "unable to read BCICTL1 reg 0x%X",
@@ -646,7 +646,7 @@
 		regval |= chan ? TWL4030_BCI_ITHEN : TWL4030_BCI_TYPEN;
 	else
 		regval &= chan ? ~TWL4030_BCI_ITHEN : ~TWL4030_BCI_TYPEN;
-	ret = twl_i2c_write_u8(TWL4030_MODULE_MAIN_CHARGE,
+	ret = twl_i2c_write_u8(TWL_MODULE_MAIN_CHARGE,
 			       regval, TWL4030_BCI_BCICTL1);
 	if (ret) {
 		dev_err(madc->dev, "unable to write BCICTL1 reg 0x%X\n",
@@ -668,7 +668,7 @@
 	u8 regval;
 	int ret;
 
-	ret = twl_i2c_read_u8(TWL4030_MODULE_MAIN_CHARGE,
+	ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE,
 			      &regval, TWL4030_MADC_CTRL1);
 	if (ret) {
 		dev_err(madc->dev, "unable to read madc ctrl1 reg 0x%X\n",
@@ -725,7 +725,7 @@
 	if (ret < 0)
 		goto err_current_generator;
 
-	ret = twl_i2c_read_u8(TWL4030_MODULE_MAIN_CHARGE,
+	ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE,
 			      &regval, TWL4030_BCI_BCICTL1);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to read reg BCI CTL1 0x%X\n",
@@ -733,7 +733,7 @@
 		goto err_i2c;
 	}
 	regval |= TWL4030_BCI_MESBAT;
-	ret = twl_i2c_write_u8(TWL4030_MODULE_MAIN_CHARGE,
+	ret = twl_i2c_write_u8(TWL_MODULE_MAIN_CHARGE,
 			       regval, TWL4030_BCI_BCICTL1);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to write reg BCI Ctl1 0x%X\n",

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index a533206..4dae241 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c

@@ -128,12 +128,10 @@
 {
 	int err;
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
-				R_MEMORY_ADDRESS);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, address, R_MEMORY_ADDRESS);
 	if (err)
 		goto out;
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, byte,
-				R_MEMORY_DATA);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, byte, R_MEMORY_DATA);
 out:
 	return err;
 }
@@ -189,19 +187,16 @@
 	u8 data;
 
 	/* Set SLEEP to ACTIVE SEQ address for P3 */
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
-				R_SEQ_ADD_S2A3);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, address, R_SEQ_ADD_S2A3);
 	if (err)
 		goto out;
 
 	/* P3 LVL_WAKEUP should be on LEVEL */
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
-				R_P3_SW_EVENTS);
+	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &data, R_P3_SW_EVENTS);
 	if (err)
 		goto out;
 	data |= LVL_WAKEUP;
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
-				R_P3_SW_EVENTS);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, data, R_P3_SW_EVENTS);
 out:
 	if (err)
 		pr_err("TWL4030 wakeup sequence for P3 config error\n");
@@ -214,43 +209,38 @@
 	u8 data;
 
 	/* Set SLEEP to ACTIVE SEQ address for P1 and P2 */
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
-				R_SEQ_ADD_S2A12);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, address, R_SEQ_ADD_S2A12);
 	if (err)
 		goto out;
 
 	/* P1/P2 LVL_WAKEUP should be on LEVEL */
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
-				R_P1_SW_EVENTS);
+	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &data, R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
 	data |= LVL_WAKEUP;
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
-				R_P1_SW_EVENTS);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, data, R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
-				R_P2_SW_EVENTS);
+	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &data, R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
 	data |= LVL_WAKEUP;
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
-				R_P2_SW_EVENTS);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, data, R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
 	if (machine_is_omap_3430sdp() || machine_is_omap_ldp()) {
 		/* Disabling AC charger effect on sleep-active transitions */
-		err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
-					R_CFG_P1_TRANSITION);
+		err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &data,
+				      R_CFG_P1_TRANSITION);
 		if (err)
 			goto out;
 		data &= ~(1<<1);
-		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data ,
-					R_CFG_P1_TRANSITION);
+		err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, data,
+				       R_CFG_P1_TRANSITION);
 		if (err)
 			goto out;
 	}
@@ -267,8 +257,7 @@
 	int err;
 
 	/* Set ACTIVE to SLEEP SEQ address in T2 memory*/
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
-				R_SEQ_ADD_A2S);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, address, R_SEQ_ADD_A2S);
 
 	if (err)
 		pr_err("TWL4030 sleep sequence config error\n");
@@ -282,42 +271,35 @@
 	u8 rd_data;
 
 	/* Set WARM RESET SEQ address for P1 */
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
-				R_SEQ_ADD_WARM);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, address, R_SEQ_ADD_WARM);
 	if (err)
 		goto out;
 
 	/* P1/P2/P3 enable WARMRESET */
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
-				R_P1_SW_EVENTS);
+	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &rd_data, R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
 	rd_data |= ENABLE_WARMRESET;
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
-				R_P1_SW_EVENTS);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, rd_data, R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
-				R_P2_SW_EVENTS);
+	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &rd_data, R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
 	rd_data |= ENABLE_WARMRESET;
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
-				R_P2_SW_EVENTS);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, rd_data, R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
-				R_P3_SW_EVENTS);
+	err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &rd_data, R_P3_SW_EVENTS);
 	if (err)
 		goto out;
 
 	rd_data |= ENABLE_WARMRESET;
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
-				R_P3_SW_EVENTS);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, rd_data, R_P3_SW_EVENTS);
 out:
 	if (err)
 		pr_err("TWL4030 warmreset seq config error\n");
@@ -341,7 +323,7 @@
 	rconfig_addr = res_config_addrs[rconfig->resource];
 
 	/* Set resource group */
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &grp,
+	err = twl_i2c_read_u8(TWL_MODULE_PM_RECEIVER, &grp,
 			      rconfig_addr + DEV_GRP_OFFSET);
 	if (err) {
 		pr_err("TWL4030 Resource %d group could not be read\n",
@@ -352,7 +334,7 @@
 	if (rconfig->devgroup != TWL4030_RESCONFIG_UNDEF) {
 		grp &= ~DEV_GRP_MASK;
 		grp |= rconfig->devgroup << DEV_GRP_SHIFT;
-		err = twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+		err = twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER,
 				       grp, rconfig_addr + DEV_GRP_OFFSET);
 		if (err < 0) {
 			pr_err("TWL4030 failed to program devgroup\n");
@@ -361,7 +343,7 @@
 	}
 
 	/* Set resource types */
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &type,
+	err = twl_i2c_read_u8(TWL_MODULE_PM_RECEIVER, &type,
 				rconfig_addr + TYPE_OFFSET);
 	if (err < 0) {
 		pr_err("TWL4030 Resource %d type could not be read\n",
@@ -379,7 +361,7 @@
 		type |= rconfig->type2 << TYPE2_SHIFT;
 	}
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+	err = twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER,
 				type, rconfig_addr + TYPE_OFFSET);
 	if (err < 0) {
 		pr_err("TWL4030 failed to program resource type\n");
@@ -387,7 +369,7 @@
 	}
 
 	/* Set remap states */
-	err = twl_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &remap,
+	err = twl_i2c_read_u8(TWL_MODULE_PM_RECEIVER, &remap,
 			      rconfig_addr + REMAP_OFFSET);
 	if (err < 0) {
 		pr_err("TWL4030 Resource %d remap could not be read\n",
@@ -405,7 +387,7 @@
 		remap |= rconfig->remap_sleep << SLEEP_STATE_SHIFT;
 	}
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+	err = twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER,
 			       remap,
 			       rconfig_addr + REMAP_OFFSET);
 	if (err < 0) {
@@ -463,49 +445,47 @@
 {
 	int err = 0;
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			TWL4030_PM_MASTER_KEY_CFG1,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG1,
+			       TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err) {
 		pr_err("twl4030: unable to unlock PROTECT_KEY\n");
 		return err;
 	}
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			TWL4030_PM_MASTER_KEY_CFG2,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG2,
+			       TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err) {
 		pr_err("twl4030: unable to unlock PROTECT_KEY\n");
 		return err;
 	}
 
 	if (flags & TWL4030_WRST_SCRIPT) {
-		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
-				R_SEQ_ADD_WARM);
+		err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, END_OF_SCRIPT,
+				       R_SEQ_ADD_WARM);
 		if (err)
 			return err;
 	}
 	if (flags & TWL4030_WAKEUP12_SCRIPT) {
-		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
-				R_SEQ_ADD_S2A12);
+		err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, END_OF_SCRIPT,
+				       R_SEQ_ADD_S2A12);
 		if (err)
 			return err;
 	}
 	if (flags & TWL4030_WAKEUP3_SCRIPT) {
-		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
-				R_SEQ_ADD_S2A3);
+		err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, END_OF_SCRIPT,
+				       R_SEQ_ADD_S2A3);
 		if (err)
 			return err;
 	}
 	if (flags & TWL4030_SLEEP_SCRIPT) {
-		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
-				R_SEQ_ADD_A2S);
+		err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, END_OF_SCRIPT,
+				       R_SEQ_ADD_A2S);
 		if (err)
 			return err;
 	}
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, 0,
+			       TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err)
 		pr_err("TWL4030 Unable to relock registers\n");
 
@@ -521,7 +501,7 @@
 {
 	int err;
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, PWR_DEVOFF,
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, PWR_DEVOFF,
 			       TWL4030_PM_MASTER_P1_SW_EVENTS);
 	if (err)
 		pr_err("TWL4030 Unable to power off\n");
@@ -534,15 +514,13 @@
 	struct twl4030_resconfig *resconfig;
 	u8 val, address = twl4030_start_script_address;
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			TWL4030_PM_MASTER_KEY_CFG1,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG1,
+			       TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err)
 		goto unlock;
 
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			TWL4030_PM_MASTER_KEY_CFG2,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, TWL4030_PM_MASTER_KEY_CFG2,
+			       TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err)
 		goto unlock;
 
@@ -567,14 +545,14 @@
 	/* Board has to be wired properly to use this feature */
 	if (twl4030_scripts->use_poweroff && !pm_power_off) {
 		/* Default for SEQ_OFFSYNC is set, lets ensure this */
-		err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &val,
+		err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &val,
 				      TWL4030_PM_MASTER_CFG_P123_TRANSITION);
 		if (err) {
 			pr_warning("TWL4030 Unable to read registers\n");
 
 		} else if (!(val & SEQ_OFFSYNC)) {
 			val |= SEQ_OFFSYNC;
-			err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, val,
+			err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, val,
 					TWL4030_PM_MASTER_CFG_P123_TRANSITION);
 			if (err) {
 				pr_err("TWL4030 Unable to setup SEQ_OFFSYNC\n");
@@ -586,8 +564,8 @@
 	}
 
 relock:
-	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0,
-			TWL4030_PM_MASTER_PROTECT_KEY);
+	err = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, 0,
+			       TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err)
 		pr_err("TWL4030 Unable to relock registers\n");
 	return;

diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
index b76902f..277a8db 100644
--- a/drivers/mfd/twl6030-irq.c
+++ b/drivers/mfd/twl6030-irq.c

@@ -355,7 +355,7 @@
 	static struct irq_chip  twl6030_irq_chip;
 	int			status = 0;
 	int			i;
-	u8			mask[4];
+	u8			mask[3];
 
 	nr_irqs = TWL6030_NR_IRQS;
 
@@ -370,9 +370,9 @@
 
 	irq_end = irq_base + nr_irqs;
 
+	mask[0] = 0xFF;
 	mask[1] = 0xFF;
 	mask[2] = 0xFF;
-	mask[3] = 0xFF;
 
 	/* mask all int lines */
 	twl_i2c_write(TWL_MODULE_PIH, &mask[0], REG_INT_MSK_LINE_A, 3);

diff --git a/drivers/mfd/twl6040-irq.c b/drivers/mfd/twl6040-irq.c
deleted file mode 100644
index 4b42543..0000000
--- a/drivers/mfd/twl6040-irq.c
+++ /dev/null

@@ -1,205 +0,0 @@
-/*
- * Interrupt controller support for TWL6040
- *
- * Author:     Misael Lopez Cruz <misael.lopez@ti.com>
- *
- * Copyright:   (C) 2011 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/irqdomain.h>
-#include <linux/interrupt.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/twl6040.h>
-
-struct twl6040_irq_data {
-	int mask;
-	int status;
-};
-
-static struct twl6040_irq_data twl6040_irqs[] = {
-	{
-		.mask = TWL6040_THMSK,
-		.status = TWL6040_THINT,
-	},
-	{
-		.mask = TWL6040_PLUGMSK,
-		.status = TWL6040_PLUGINT | TWL6040_UNPLUGINT,
-	},
-	{
-		.mask = TWL6040_HOOKMSK,
-		.status = TWL6040_HOOKINT,
-	},
-	{
-		.mask = TWL6040_HFMSK,
-		.status = TWL6040_HFINT,
-	},
-	{
-		.mask = TWL6040_VIBMSK,
-		.status = TWL6040_VIBINT,
-	},
-	{
-		.mask = TWL6040_READYMSK,
-		.status = TWL6040_READYINT,
-	},
-};
-
-static inline
-struct twl6040_irq_data *irq_to_twl6040_irq(struct twl6040 *twl6040,
-					    int irq)
-{
-	return &twl6040_irqs[irq - twl6040->irq_base];
-}
-
-static void twl6040_irq_lock(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&twl6040->irq_mutex);
-}
-
-static void twl6040_irq_sync_unlock(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-
-	/* write back to hardware any change in irq mask */
-	if (twl6040->irq_masks_cur != twl6040->irq_masks_cache) {
-		twl6040->irq_masks_cache = twl6040->irq_masks_cur;
-		twl6040_reg_write(twl6040, TWL6040_REG_INTMR,
-				  twl6040->irq_masks_cur);
-	}
-
-	mutex_unlock(&twl6040->irq_mutex);
-}
-
-static void twl6040_irq_enable(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-	struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040,
-							       data->irq);
-
-	twl6040->irq_masks_cur &= ~irq_data->mask;
-}
-
-static void twl6040_irq_disable(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-	struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040,
-							       data->irq);
-
-	twl6040->irq_masks_cur |= irq_data->mask;
-}
-
-static struct irq_chip twl6040_irq_chip = {
-	.name			= "twl6040",
-	.irq_bus_lock		= twl6040_irq_lock,
-	.irq_bus_sync_unlock	= twl6040_irq_sync_unlock,
-	.irq_enable		= twl6040_irq_enable,
-	.irq_disable		= twl6040_irq_disable,
-};
-
-static irqreturn_t twl6040_irq_thread(int irq, void *data)
-{
-	struct twl6040 *twl6040 = data;
-	u8 intid;
-	int i;
-
-	intid = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
-
-	/* apply masking and report (backwards to handle READYINT first) */
-	for (i = ARRAY_SIZE(twl6040_irqs) - 1; i >= 0; i--) {
-		if (twl6040->irq_masks_cur & twl6040_irqs[i].mask)
-			intid &= ~twl6040_irqs[i].status;
-		if (intid & twl6040_irqs[i].status)
-			handle_nested_irq(twl6040->irq_base + i);
-	}
-
-	/* ack unmasked irqs */
-	twl6040_reg_write(twl6040, TWL6040_REG_INTID, intid);
-
-	return IRQ_HANDLED;
-}
-
-int twl6040_irq_init(struct twl6040 *twl6040)
-{
-	struct device_node *node = twl6040->dev->of_node;
-	int i, nr_irqs, irq_base, ret;
-	u8 val;
-
-	mutex_init(&twl6040->irq_mutex);
-
-	/* mask the individual interrupt sources */
-	twl6040->irq_masks_cur = TWL6040_ALLINT_MSK;
-	twl6040->irq_masks_cache = TWL6040_ALLINT_MSK;
-	twl6040_reg_write(twl6040, TWL6040_REG_INTMR, TWL6040_ALLINT_MSK);
-
-	nr_irqs = ARRAY_SIZE(twl6040_irqs);
-
-	irq_base = irq_alloc_descs(-1, 0, nr_irqs, 0);
-	if (IS_ERR_VALUE(irq_base)) {
-		dev_err(twl6040->dev, "Fail to allocate IRQ descs\n");
-		return irq_base;
-	}
-	twl6040->irq_base = irq_base;
-
-	irq_domain_add_legacy(node, ARRAY_SIZE(twl6040_irqs), irq_base, 0,
-			      &irq_domain_simple_ops, NULL);
-
-	/* Register them with genirq */
-	for (i = irq_base; i < irq_base + nr_irqs; i++) {
-		irq_set_chip_data(i, twl6040);
-		irq_set_chip_and_handler(i, &twl6040_irq_chip,
-					 handle_level_irq);
-		irq_set_nested_thread(i, 1);
-
-		/* ARM needs us to explicitly flag the IRQ as valid
-		 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-		set_irq_flags(i, IRQF_VALID);
-#else
-		irq_set_noprobe(i);
-#endif
-	}
-
-	ret = request_threaded_irq(twl6040->irq, NULL, twl6040_irq_thread,
-				   IRQF_ONESHOT, "twl6040", twl6040);
-	if (ret) {
-		dev_err(twl6040->dev, "failed to request IRQ %d: %d\n",
-			twl6040->irq, ret);
-		return ret;
-	}
-
-	/* reset interrupts */
-	val = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
-
-	/* interrupts cleared on write */
-	twl6040_clear_bits(twl6040, TWL6040_REG_ACCCTL, TWL6040_INTCLRMODE);
-
-	return 0;
-}
-EXPORT_SYMBOL(twl6040_irq_init);
-
-void twl6040_irq_exit(struct twl6040 *twl6040)
-{
-	free_irq(twl6040->irq, twl6040);
-}
-EXPORT_SYMBOL(twl6040_irq_exit);

diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040.c
similarity index 84%
rename from drivers/mfd/twl6040-core.c
rename to drivers/mfd/twl6040.c
index 3f2a1cf..583be76 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040.c

@@ -37,7 +37,6 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/regmap.h>
-#include <linux/err.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/twl6040.h>
 #include <linux/regulator/consumer.h>
@@ -104,7 +103,7 @@
 EXPORT_SYMBOL(twl6040_clear_bits);
 
 /* twl6040 codec manual power-up sequence */
-static int twl6040_power_up(struct twl6040 *twl6040)
+static int twl6040_power_up_manual(struct twl6040 *twl6040)
 {
 	u8 ldoctl, ncpctl, lppllctl;
 	int ret;
@@ -158,11 +157,12 @@
 	ldoctl &= ~(TWL6040_HSLDOENA | TWL6040_REFENA | TWL6040_OSCENA);
 	twl6040_reg_write(twl6040, TWL6040_REG_LDOCTL, ldoctl);
 
+	dev_err(twl6040->dev, "manual power-up failed\n");
 	return ret;
 }
 
 /* twl6040 manual power-down sequence */
-static void twl6040_power_down(struct twl6040 *twl6040)
+static void twl6040_power_down_manual(struct twl6040 *twl6040)
 {
 	u8 ncpctl, ldoctl, lppllctl;
 
@@ -192,45 +192,48 @@
 	twl6040_reg_write(twl6040, TWL6040_REG_LDOCTL, ldoctl);
 }
 
-static irqreturn_t twl6040_naudint_handler(int irq, void *data)
+static irqreturn_t twl6040_readyint_handler(int irq, void *data)
 {
 	struct twl6040 *twl6040 = data;
-	u8 intid, status;
 
-	intid = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
+	complete(&twl6040->ready);
 
-	if (intid & TWL6040_READYINT)
-		complete(&twl6040->ready);
+	return IRQ_HANDLED;
+}
 
-	if (intid & TWL6040_THINT) {
-		status = twl6040_reg_read(twl6040, TWL6040_REG_STATUS);
-		if (status & TWL6040_TSHUTDET) {
-			dev_warn(twl6040->dev,
-				 "Thermal shutdown, powering-off");
-			twl6040_power(twl6040, 0);
-		} else {
-			dev_warn(twl6040->dev,
-				 "Leaving thermal shutdown, powering-on");
-			twl6040_power(twl6040, 1);
-		}
+static irqreturn_t twl6040_thint_handler(int irq, void *data)
+{
+	struct twl6040 *twl6040 = data;
+	u8 status;
+
+	status = twl6040_reg_read(twl6040, TWL6040_REG_STATUS);
+	if (status & TWL6040_TSHUTDET) {
+		dev_warn(twl6040->dev, "Thermal shutdown, powering-off");
+		twl6040_power(twl6040, 0);
+	} else {
+		dev_warn(twl6040->dev, "Leaving thermal shutdown, powering-on");
+		twl6040_power(twl6040, 1);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static int twl6040_power_up_completion(struct twl6040 *twl6040,
-				       int naudint)
+static int twl6040_power_up_automatic(struct twl6040 *twl6040)
 {
 	int time_left;
-	u8 intid;
+
+	gpio_set_value(twl6040->audpwron, 1);
 
 	time_left = wait_for_completion_timeout(&twl6040->ready,
 						msecs_to_jiffies(144));
 	if (!time_left) {
+		u8 intid;
+
+		dev_warn(twl6040->dev, "timeout waiting for READYINT\n");
 		intid = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
 		if (!(intid & TWL6040_READYINT)) {
-			dev_err(twl6040->dev,
-				"timeout waiting for READYINT\n");
+			dev_err(twl6040->dev, "automatic power-up failed\n");
+			gpio_set_value(twl6040->audpwron, 0);
 			return -ETIMEDOUT;
 		}
 	}
@@ -240,8 +243,6 @@
 
 int twl6040_power(struct twl6040 *twl6040, int on)
 {
-	int audpwron = twl6040->audpwron;
-	int naudint = twl6040->irq;
 	int ret = 0;
 
 	mutex_lock(&twl6040->mutex);
@@ -251,23 +252,17 @@
 		if (twl6040->power_count++)
 			goto out;
 
-		if (gpio_is_valid(audpwron)) {
-			/* use AUDPWRON line */
-			gpio_set_value(audpwron, 1);
-			/* wait for power-up completion */
-			ret = twl6040_power_up_completion(twl6040, naudint);
+		if (gpio_is_valid(twl6040->audpwron)) {
+			/* use automatic power-up sequence */
+			ret = twl6040_power_up_automatic(twl6040);
 			if (ret) {
-				dev_err(twl6040->dev,
-					"automatic power-down failed\n");
 				twl6040->power_count = 0;
 				goto out;
 			}
 		} else {
 			/* use manual power-up sequence */
-			ret = twl6040_power_up(twl6040);
+			ret = twl6040_power_up_manual(twl6040);
 			if (ret) {
-				dev_err(twl6040->dev,
-					"manual power-up failed\n");
 				twl6040->power_count = 0;
 				goto out;
 			}
@@ -288,15 +283,15 @@
 		if (--twl6040->power_count)
 			goto out;
 
-		if (gpio_is_valid(audpwron)) {
+		if (gpio_is_valid(twl6040->audpwron)) {
 			/* use AUDPWRON line */
-			gpio_set_value(audpwron, 0);
+			gpio_set_value(twl6040->audpwron, 0);
 
 			/* power-down sequence latency */
 			usleep_range(500, 700);
 		} else {
 			/* use manual power-down sequence */
-			twl6040_power_down(twl6040);
+			twl6040_power_down_manual(twl6040);
 		}
 		twl6040->sysclk = 0;
 		twl6040->mclk = 0;
@@ -503,6 +498,25 @@
 	.readable_reg = twl6040_readable_reg,
 };
 
+static const struct regmap_irq twl6040_irqs[] = {
+	{ .reg_offset = 0, .mask = TWL6040_THINT, },
+	{ .reg_offset = 0, .mask = TWL6040_PLUGINT | TWL6040_UNPLUGINT, },
+	{ .reg_offset = 0, .mask = TWL6040_HOOKINT, },
+	{ .reg_offset = 0, .mask = TWL6040_HFINT, },
+	{ .reg_offset = 0, .mask = TWL6040_VIBINT, },
+	{ .reg_offset = 0, .mask = TWL6040_READYINT, },
+};
+
+static struct regmap_irq_chip twl6040_irq_chip = {
+	.name = "twl6040",
+	.irqs = twl6040_irqs,
+	.num_irqs = ARRAY_SIZE(twl6040_irqs),
+
+	.num_regs = 1,
+	.status_base = TWL6040_REG_INTID,
+	.mask_base = TWL6040_REG_INTMR,
+};
+
 static int __devinit twl6040_probe(struct i2c_client *client,
 				     const struct i2c_device_id *id)
 {
@@ -578,18 +592,31 @@
 			goto gpio_err;
 	}
 
-	/* codec interrupt */
-	ret = twl6040_irq_init(twl6040);
-	if (ret)
+	ret = regmap_add_irq_chip(twl6040->regmap, twl6040->irq,
+			IRQF_ONESHOT, 0, &twl6040_irq_chip,
+			&twl6040->irq_data);
+	if (ret < 0)
 		goto irq_init_err;
 
-	ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_READY,
-				   NULL, twl6040_naudint_handler, IRQF_ONESHOT,
+	twl6040->irq_ready = regmap_irq_get_virq(twl6040->irq_data,
+					       TWL6040_IRQ_READY);
+	twl6040->irq_th = regmap_irq_get_virq(twl6040->irq_data,
+					       TWL6040_IRQ_TH);
+
+	ret = request_threaded_irq(twl6040->irq_ready, NULL,
+				   twl6040_readyint_handler, IRQF_ONESHOT,
 				   "twl6040_irq_ready", twl6040);
 	if (ret) {
-		dev_err(twl6040->dev, "READY IRQ request failed: %d\n",
-			ret);
-		goto irq_err;
+		dev_err(twl6040->dev, "READY IRQ request failed: %d\n", ret);
+		goto readyirq_err;
+	}
+
+	ret = request_threaded_irq(twl6040->irq_th, NULL,
+				   twl6040_thint_handler, IRQF_ONESHOT,
+				   "twl6040_irq_th", twl6040);
+	if (ret) {
+		dev_err(twl6040->dev, "Thermal IRQ request failed: %d\n", ret);
+		goto thirq_err;
 	}
 
 	/* dual-access registers controlled by I2C only */
@@ -601,7 +628,7 @@
 	 * The ASoC codec can work without pdata, pass the platform_data only if
 	 * it has been provided.
 	 */
-	irq = twl6040->irq_base + TWL6040_IRQ_PLUG;
+	irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_PLUG);
 	cell = &twl6040->cells[children];
 	cell->name = "twl6040-codec";
 	twl6040_codec_rsrc[0].start = irq;
@@ -615,7 +642,7 @@
 	children++;
 
 	if (twl6040_has_vibra(pdata, node)) {
-		irq = twl6040->irq_base + TWL6040_IRQ_VIB;
+		irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_VIB);
 
 		cell = &twl6040->cells[children];
 		cell->name = "twl6040-vibra";
@@ -654,9 +681,11 @@
 	return 0;
 
 mfd_err:
-	free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
-irq_err:
-	twl6040_irq_exit(twl6040);
+	free_irq(twl6040->irq_th, twl6040);
+thirq_err:
+	free_irq(twl6040->irq_ready, twl6040);
+readyirq_err:
+	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
 irq_init_err:
 	if (gpio_is_valid(twl6040->audpwron))
 		gpio_free(twl6040->audpwron);
@@ -680,8 +709,9 @@
 	if (gpio_is_valid(twl6040->audpwron))
 		gpio_free(twl6040->audpwron);
 
-	free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
-	twl6040_irq_exit(twl6040);
+	free_irq(twl6040->irq_ready, twl6040);
+	free_irq(twl6040->irq_th, twl6040);
+	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
 
 	mfd_remove_devices(&client->dev);
 	i2c_set_clientdata(client, NULL);

diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
new file mode 100644
index 0000000..af2a670
--- /dev/null
+++ b/drivers/mfd/viperboard.c

@@ -0,0 +1,137 @@
+/*
+ *  Nano River Technologies viperboard driver
+ *
+ *  This is the core driver for the viperboard. There are cell drivers
+ *  available for I2C, ADC and both GPIOs. SPI is not yet supported.
+ *  The drivers do not support all features the board exposes. See user
+ *  manual of the viperboard.
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+#include <linux/mfd/core.h>
+#include <linux/mfd/viperboard.h>
+
+#include <linux/usb.h>
+
+
+static const struct usb_device_id vprbrd_table[] = {
+	{ USB_DEVICE(0x2058, 0x1005) },   /* Nano River Technologies */
+	{ }                               /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, vprbrd_table);
+
+static struct mfd_cell vprbrd_devs[] = {
+	{
+		.name = "viperboard-gpio",
+	},
+	{
+		.name = "viperboard-i2c",
+	},
+	{
+		.name = "viperboard-adc",
+	},
+};
+
+static int vprbrd_probe(struct usb_interface *interface,
+			      const struct usb_device_id *id)
+{
+	struct vprbrd *vb;
+
+	u16 version = 0;
+	int pipe, ret;
+
+	/* allocate memory for our device state and initialize it */
+	vb = kzalloc(sizeof(*vb), GFP_KERNEL);
+	if (vb == NULL) {
+		dev_err(&interface->dev, "Out of memory\n");
+		return -ENOMEM;
+	}
+
+	mutex_init(&vb->lock);
+
+	vb->usb_dev = usb_get_dev(interface_to_usbdev(interface));
+
+	/* save our data pointer in this interface device */
+	usb_set_intfdata(interface, vb);
+	dev_set_drvdata(&vb->pdev.dev, vb);
+
+	/* get version information, major first, minor then */
+	pipe = usb_rcvctrlpipe(vb->usb_dev, 0);
+	ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MAJOR,
+		VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, vb->buf, 1,
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret == 1)
+		version = vb->buf[0];
+
+	ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MINOR,
+		VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, vb->buf, 1,
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret == 1) {
+		version <<= 8;
+		version = version | vb->buf[0];
+	}
+
+	dev_info(&interface->dev,
+		 "version %x.%02x found at bus %03d address %03d\n",
+		 version >> 8, version & 0xff,
+		 vb->usb_dev->bus->busnum, vb->usb_dev->devnum);
+
+	ret = mfd_add_devices(&interface->dev, -1, vprbrd_devs,
+				ARRAY_SIZE(vprbrd_devs), NULL, 0, NULL);
+	if (ret != 0) {
+		dev_err(&interface->dev, "Failed to add mfd devices to core.");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (vb) {
+		usb_put_dev(vb->usb_dev);
+		kfree(vb);
+	}
+
+	return ret;
+}
+
+static void vprbrd_disconnect(struct usb_interface *interface)
+{
+	struct vprbrd *vb = usb_get_intfdata(interface);
+
+	mfd_remove_devices(&interface->dev);
+	usb_set_intfdata(interface, NULL);
+	usb_put_dev(vb->usb_dev);
+	kfree(vb);
+
+	dev_dbg(&interface->dev, "disconnected\n");
+}
+
+static struct usb_driver vprbrd_driver = {
+	.name		= "viperboard",
+	.probe		= vprbrd_probe,
+	.disconnect	= vprbrd_disconnect,
+	.id_table	= vprbrd_table,
+};
+
+module_usb_driver(vprbrd_driver);
+
+MODULE_DESCRIPTION("Nano River Technologies viperboard mfd core driver");
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 3141c4a..088872a 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c

@@ -56,6 +56,18 @@
 	{ 0x80, 0x0000 },
 };
 
+static const struct reg_default wm5102_revb_patch[] = {
+	{ 0x80, 0x0003 },
+	{ 0x081, 0xE022 },
+	{ 0x410, 0x6080 },
+	{ 0x418, 0x6080 },
+	{ 0x420, 0x6080 },
+	{ 0x428, 0xC000 },
+	{ 0x441, 0x8014 },
+	{ 0x458, 0x000b },
+	{ 0x80, 0x0000 },
+};
+
 /* We use a function so we can use ARRAY_SIZE() */
 int wm5102_patch(struct arizona *arizona)
 {
@@ -65,7 +77,9 @@
 					     wm5102_reva_patch,
 					     ARRAY_SIZE(wm5102_reva_patch));
 	default:
-		return 0;
+		return regmap_register_patch(arizona->regmap,
+					     wm5102_revb_patch,
+					     ARRAY_SIZE(wm5102_revb_patch));
 	}
 }
 
@@ -291,6 +305,7 @@
 	{ 0x000001AA, 0x0004 },   /* R426   - FLL2 GPIO Clock */ 
 	{ 0x00000200, 0x0006 },   /* R512   - Mic Charge Pump 1 */ 
 	{ 0x00000210, 0x00D4 },   /* R528   - LDO1 Control 1 */ 
+	{ 0x00000212, 0x0001 },   /* R530   - LDO1 Control 2 */
 	{ 0x00000213, 0x0344 },   /* R531   - LDO2 Control 1 */ 
 	{ 0x00000218, 0x01A6 },   /* R536   - Mic Bias Ctrl 1 */ 
 	{ 0x00000219, 0x01A6 },   /* R537   - Mic Bias Ctrl 2 */ 
@@ -1056,6 +1071,7 @@
 	case ARIZONA_FLL1_CONTROL_5:
 	case ARIZONA_FLL1_CONTROL_6:
 	case ARIZONA_FLL1_LOOP_FILTER_TEST_1:
+	case ARIZONA_FLL1_NCO_TEST_0:
 	case ARIZONA_FLL1_SYNCHRONISER_1:
 	case ARIZONA_FLL1_SYNCHRONISER_2:
 	case ARIZONA_FLL1_SYNCHRONISER_3:
@@ -1071,6 +1087,7 @@
 	case ARIZONA_FLL2_CONTROL_5:
 	case ARIZONA_FLL2_CONTROL_6:
 	case ARIZONA_FLL2_LOOP_FILTER_TEST_1:
+	case ARIZONA_FLL2_NCO_TEST_0:
 	case ARIZONA_FLL2_SYNCHRONISER_1:
 	case ARIZONA_FLL2_SYNCHRONISER_2:
 	case ARIZONA_FLL2_SYNCHRONISER_3:
@@ -1805,6 +1822,7 @@
 	case ARIZONA_DSP1_CLOCKING_1:
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
+	case ARIZONA_DSP1_STATUS_3:
 		return true;
 	default:
 		return false;
@@ -1813,15 +1831,23 @@
 
 static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 {
+	if (reg > 0xffff)
+		return true;
+
 	switch (reg) {
 	case ARIZONA_SOFTWARE_RESET:
 	case ARIZONA_DEVICE_REVISION:
 	case ARIZONA_OUTPUT_STATUS_1:
+	case ARIZONA_RAW_OUTPUT_STATUS_1:
+	case ARIZONA_SLIMBUS_RX_PORT_STATUS:
+	case ARIZONA_SLIMBUS_TX_PORT_STATUS:
 	case ARIZONA_SAMPLE_RATE_1_STATUS:
 	case ARIZONA_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_SAMPLE_RATE_3_STATUS:
 	case ARIZONA_HAPTICS_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_FLL1_NCO_TEST_0:
+	case ARIZONA_FLL2_NCO_TEST_0:
 	case ARIZONA_FX_CTRL2:
 	case ARIZONA_INTERRUPT_STATUS_1:
 	case ARIZONA_INTERRUPT_STATUS_2:
@@ -1847,6 +1873,7 @@
 	case ARIZONA_AOD_IRQ_RAW_STATUS:
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
+	case ARIZONA_DSP1_STATUS_3:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_MIC_DETECT_3:
 		return true;
@@ -1855,12 +1882,14 @@
 	}
 }
 
+#define WM5102_MAX_REGISTER 0x1a8fff
+
 const struct regmap_config wm5102_spi_regmap = {
 	.reg_bits = 32,
 	.pad_bits = 16,
 	.val_bits = 16,
 
-	.max_register = ARIZONA_DSP1_STATUS_2,
+	.max_register = WM5102_MAX_REGISTER,
 	.readable_reg = wm5102_readable_register,
 	.volatile_reg = wm5102_volatile_register,
 
@@ -1874,7 +1903,7 @@
 	.reg_bits = 32,
 	.val_bits = 16,
 
-	.max_register = ARIZONA_DSP1_STATUS_2,
+	.max_register = WM5102_MAX_REGISTER,
 	.readable_reg = wm5102_readable_register,
 	.volatile_reg = wm5102_volatile_register,
 

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index bcb226f..57c488d 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c

@@ -535,11 +535,10 @@
 			break;
 		case 2:
 		case 3:
+		default:
 			regmap_patch = wm8994_revc_patch;
 			patch_regs = ARRAY_SIZE(wm8994_revc_patch);
 			break;
-		default:
-			break;
 		}
 		break;
 
@@ -558,17 +557,9 @@
 		/* Revision C did not change the relevant layer */
 		if (wm8994->revision > 1)
 			wm8994->revision++;
-		switch (wm8994->revision) {
-		case 0:
-		case 1:
-		case 2:
-		case 3:
-			regmap_patch = wm1811_reva_patch;
-			patch_regs = ARRAY_SIZE(wm1811_reva_patch);
-			break;
-		default:
-			break;
-		}
+
+		regmap_patch = wm1811_reva_patch;
+		patch_regs = ARRAY_SIZE(wm1811_reva_patch);
 		break;
 
 	default:

diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 8d082b4..d971817 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c

@@ -53,6 +53,10 @@
 #include <linux/kthread.h>
 #include "xpc.h"
 
+#ifdef CONFIG_X86_64
+#include <asm/traps.h>
+#endif
+
 /* define two XPC debug device structures to be used with dev_dbg() et al */
 
 struct device_driver xpc_dbg_name = {
@@ -1079,6 +1083,9 @@
 	return NOTIFY_DONE;
 }
 
+/* Used to only allow one cpu to complete disconnect */
+static unsigned int xpc_die_disconnecting;
+
 /*
  * Notify other partitions to deactivate from us by first disengaging from all
  * references to our memory.
@@ -1092,6 +1099,9 @@
 	long keep_waiting;
 	long wait_to_print;
 
+	if (cmpxchg(&xpc_die_disconnecting, 0, 1))
+		return;
+
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
 	xpc_exiting = 1;
 
@@ -1159,7 +1169,7 @@
  * about the lack of a heartbeat.
  */
 static int
-xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
 {
 #ifdef CONFIG_IA64		/* !!! temporary kludge */
 	switch (event) {
@@ -1191,7 +1201,27 @@
 		break;
 	}
 #else
-	xpc_die_deactivate();
+	struct die_args *die_args = _die_args;
+
+	switch (event) {
+	case DIE_TRAP:
+		if (die_args->trapnr == X86_TRAP_DF)
+			xpc_die_deactivate();
+
+		if (((die_args->trapnr == X86_TRAP_MF) ||
+		     (die_args->trapnr == X86_TRAP_XF)) &&
+		    !user_mode_vm(die_args->regs))
+			xpc_die_deactivate();
+
+		break;
+	case DIE_INT3:
+	case DIE_DEBUG:
+		break;
+	case DIE_OOPS:
+	case DIE_GPF:
+	default:
+		xpc_die_deactivate();
+	}
 #endif
 
 	return NOTIFY_DONE;

diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index b648058..e4e218c 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile

@@ -49,6 +49,8 @@
 
 obj-$(CONFIG_MMC_REALTEK_PCI)	+= rtsx_pci_sdmmc.o
 
+obj-$(CONFIG_MMC_REALTEK_PCI)	+= rtsx_pci_sdmmc.o
+
 obj-$(CONFIG_MMC_SDHCI_PLTFM)		+= sdhci-pltfm.o
 obj-$(CONFIG_MMC_SDHCI_CNS3XXX)		+= sdhci-cns3xxx.o
 obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o

diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 12eff6f..571915d 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c

@@ -21,6 +21,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
@@ -382,8 +383,6 @@
 			0xFF, (u8)data->blocks);
 	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_BLOCK_CNT_H,
 			0xFF, (u8)(data->blocks >> 8));
-	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
-			CARD_DATA_SOURCE, 0x01, RING_BUFFER);
 
 	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0,
 			DMA_DONE_INT, DMA_DONE_INT);
@@ -407,6 +406,7 @@
 	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_DATA_SOURCE,
 			0x01, RING_BUFFER);
 
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_CFG2, 0xFF, cfg2);
 	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_TRANSFER, 0xFF,
 			trans_mode | SD_TRANSFER_START);
 	rtsx_pci_add_cmd(pcr, CHECK_REG_CMD, SD_TRANSFER,

diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c
index 9453931..7c057a0 100644
--- a/drivers/mtd/ar7part.c
+++ b/drivers/mtd/ar7part.c

@@ -26,19 +26,16 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/bootmem.h>
-#include <linux/magic.h>
 #include <linux/module.h>
 
+#include <uapi/linux/magic.h>
+
 #define AR7_PARTS	4
 #define ROOT_OFFSET	0xe0000
 
 #define LOADER_MAGIC1	le32_to_cpu(0xfeedfa42)
 #define LOADER_MAGIC2	le32_to_cpu(0xfeed1281)
 
-#ifndef SQUASHFS_MAGIC
-#define SQUASHFS_MAGIC	0x73717368
-#endif
-
 struct ar7_bin_rec {
 	unsigned int checksum;
 	unsigned int length;

diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c
index 63d2a64..6eeb84c 100644
--- a/drivers/mtd/bcm63xxpart.c
+++ b/drivers/mtd/bcm63xxpart.c

@@ -37,8 +37,7 @@
 
 #define BCM63XX_EXTENDED_SIZE	0xBFC00000	/* Extended flash address */
 
-#define BCM63XX_MIN_CFE_SIZE	0x10000		/* always at least 64KiB */
-#define BCM63XX_MIN_NVRAM_SIZE	0x10000		/* always at least 64KiB */
+#define BCM63XX_CFE_BLOCK_SIZE	0x10000		/* always at least 64KiB */
 
 #define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
 
@@ -79,7 +78,7 @@
 	unsigned int rootfsaddr, kerneladdr, spareaddr;
 	unsigned int rootfslen, kernellen, sparelen, totallen;
 	unsigned int cfelen, nvramlen;
-	int namelen = 0;
+	unsigned int cfe_erasesize;
 	int i;
 	u32 computed_crc;
 	bool rootfs_first = false;
@@ -87,8 +86,11 @@
 	if (bcm63xx_detect_cfe(master))
 		return -EINVAL;
 
-	cfelen = max_t(uint32_t, master->erasesize, BCM63XX_MIN_CFE_SIZE);
-	nvramlen = max_t(uint32_t, master->erasesize, BCM63XX_MIN_NVRAM_SIZE);
+	cfe_erasesize = max_t(uint32_t, master->erasesize,
+			      BCM63XX_CFE_BLOCK_SIZE);
+
+	cfelen = cfe_erasesize;
+	nvramlen = cfe_erasesize;
 
 	/* Allocate memory for buffer */
 	buf = vmalloc(sizeof(struct bcm_tag));
@@ -121,7 +123,6 @@
 		kerneladdr = kerneladdr - BCM63XX_EXTENDED_SIZE;
 		rootfsaddr = rootfsaddr - BCM63XX_EXTENDED_SIZE;
 		spareaddr = roundup(totallen, master->erasesize) + cfelen;
-		sparelen = master->size - spareaddr - nvramlen;
 
 		if (rootfsaddr < kerneladdr) {
 			/* default Broadcom layout */
@@ -139,19 +140,15 @@
 		rootfslen = 0;
 		rootfsaddr = 0;
 		spareaddr = cfelen;
-		sparelen = master->size - cfelen - nvramlen;
 	}
+	sparelen = master->size - spareaddr - nvramlen;
 
 	/* Determine number of partitions */
-	namelen = 8;
-	if (rootfslen > 0) {
+	if (rootfslen > 0)
 		nrparts++;
-		namelen += 6;
-	}
-	if (kernellen > 0) {
+
+	if (kernellen > 0)
 		nrparts++;
-		namelen += 6;
-	}
 
 	/* Ask kernel for more memory */
 	parts = kzalloc(sizeof(*parts) * nrparts + 10 * nrparts, GFP_KERNEL);
@@ -193,17 +190,16 @@
 	parts[curpart].name = "nvram";
 	parts[curpart].offset = master->size - nvramlen;
 	parts[curpart].size = nvramlen;
+	curpart++;
 
 	/* Global partition "linux" to make easy firmware upgrade */
-	curpart++;
 	parts[curpart].name = "linux";
 	parts[curpart].offset = cfelen;
 	parts[curpart].size = master->size - cfelen - nvramlen;
 
 	for (i = 0; i < nrparts; i++)
-		pr_info("Partition %d is %s offset %lx and length %lx\n", i,
-			parts[i].name, (long unsigned int)(parts[i].offset),
-			(long unsigned int)(parts[i].size));
+		pr_info("Partition %d is %s offset %llx and length %llx\n", i,
+			parts[i].name, parts[i].offset,	parts[i].size);
 
 	pr_info("Spare partition is offset %x and length %x\n",	spareaddr,
 		sparelen);

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 5ff5c4a..b861972 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c

@@ -1536,8 +1536,20 @@
 		UDELAY(map, chip, adr, 1);
 	}
 
-	/* reset on all failures. */
-	map_write( map, CMD(0xF0), chip->start );
+	/*
+	 * Recovery from write-buffer programming failures requires
+	 * the write-to-buffer-reset sequence.  Since the last part
+	 * of the sequence also works as a normal reset, we can run
+	 * the same commands regardless of why we are here.
+	 * See e.g.
+	 * http://www.spansion.com/Support/Application%20Notes/MirrorBit_Write_Buffer_Prog_Page_Buffer_Read_AN.pdf
+	 */
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xF0, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
 	xip_enable(map, chip, adr);
 	/* FIXME - should have reset delay before continuing */
 

diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index aed1b8a..c533f27 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c

@@ -56,8 +56,8 @@
 
 
 /* special size referring to all the remaining space in a partition */
-#define SIZE_REMAINING UINT_MAX
-#define OFFSET_CONTINUOUS UINT_MAX
+#define SIZE_REMAINING ULLONG_MAX
+#define OFFSET_CONTINUOUS ULLONG_MAX
 
 struct cmdline_mtd_partition {
 	struct cmdline_mtd_partition *next;
@@ -89,7 +89,7 @@
 				      int extra_mem_size)
 {
 	struct mtd_partition *parts;
-	unsigned long size, offset = OFFSET_CONTINUOUS;
+	unsigned long long size, offset = OFFSET_CONTINUOUS;
 	char *name;
 	int name_len;
 	unsigned char *extra_mem;
@@ -104,7 +104,8 @@
 	} else {
 		size = memparse(s, &s);
 		if (size < PAGE_SIZE) {
-			printk(KERN_ERR ERRP "partition size too small (%lx)\n", size);
+			printk(KERN_ERR ERRP "partition size too small (%llx)\n",
+			       size);
 			return ERR_PTR(-EINVAL);
 		}
 	}
@@ -296,7 +297,7 @@
 				    struct mtd_partition **pparts,
 				    struct mtd_part_parser_data *data)
 {
-	unsigned long offset;
+	unsigned long long offset;
 	int i, err;
 	struct cmdline_mtd_partition *part;
 	const char *mtd_id = master->name;
@@ -308,48 +309,52 @@
 			return err;
 	}
 
+	/*
+	 * Search for the partition definition matching master->name.
+	 * If master->name is not set, stop at first partition definition.
+	 */
 	for (part = partitions; part; part = part->next) {
-		if ((!mtd_id) || (!strcmp(part->mtd_id, mtd_id))) {
-			for (i = 0, offset = 0; i < part->num_parts; i++) {
-				if (part->parts[i].offset == OFFSET_CONTINUOUS)
-					part->parts[i].offset = offset;
-				else
-					offset = part->parts[i].offset;
-
-				if (part->parts[i].size == SIZE_REMAINING)
-					part->parts[i].size = master->size - offset;
-
-				if (part->parts[i].size == 0) {
-					printk(KERN_WARNING ERRP
-					       "%s: skipping zero sized partition\n",
-					       part->mtd_id);
-					part->num_parts--;
-					memmove(&part->parts[i],
-						&part->parts[i + 1],
-						sizeof(*part->parts) * (part->num_parts - i));
-					continue;
-				}
-
-				if (offset + part->parts[i].size > master->size) {
-					printk(KERN_WARNING ERRP
-					       "%s: partitioning exceeds flash size, truncating\n",
-					       part->mtd_id);
-					part->parts[i].size = master->size - offset;
-				}
-				offset += part->parts[i].size;
-			}
-
-			*pparts = kmemdup(part->parts,
-					sizeof(*part->parts) * part->num_parts,
-					GFP_KERNEL);
-			if (!*pparts)
-				return -ENOMEM;
-
-			return part->num_parts;
-		}
+		if ((!mtd_id) || (!strcmp(part->mtd_id, mtd_id)))
+			break;
 	}
 
-	return 0;
+	if (!part)
+		return 0;
+
+	for (i = 0, offset = 0; i < part->num_parts; i++) {
+		if (part->parts[i].offset == OFFSET_CONTINUOUS)
+			part->parts[i].offset = offset;
+		else
+			offset = part->parts[i].offset;
+
+		if (part->parts[i].size == SIZE_REMAINING)
+			part->parts[i].size = master->size - offset;
+
+		if (part->parts[i].size == 0) {
+			printk(KERN_WARNING ERRP
+			       "%s: skipping zero sized partition\n",
+			       part->mtd_id);
+			part->num_parts--;
+			memmove(&part->parts[i], &part->parts[i + 1],
+				sizeof(*part->parts) * (part->num_parts - i));
+			continue;
+		}
+
+		if (offset + part->parts[i].size > master->size) {
+			printk(KERN_WARNING ERRP
+			       "%s: partitioning exceeds flash size, truncating\n",
+			       part->mtd_id);
+			part->parts[i].size = master->size - offset;
+		}
+		offset += part->parts[i].size;
+	}
+
+	*pparts = kmemdup(part->parts, sizeof(*part->parts) * part->num_parts,
+			  GFP_KERNEL);
+	if (!*pparts)
+		return -ENOMEM;
+
+	return part->num_parts;
 }
 
 

diff --git a/drivers/mtd/devices/bcm47xxsflash.c b/drivers/mtd/devices/bcm47xxsflash.c
index 2dc5a6f..4714584 100644
--- a/drivers/mtd/devices/bcm47xxsflash.c
+++ b/drivers/mtd/devices/bcm47xxsflash.c

@@ -66,7 +66,7 @@
 	return err;
 }
 
-static int __devexit bcm47xxsflash_remove(struct platform_device *pdev)
+static int bcm47xxsflash_remove(struct platform_device *pdev)
 {
 	struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev);
 
@@ -77,7 +77,7 @@
 }
 
 static struct platform_driver bcma_sflash_driver = {
-	.remove = __devexit_p(bcm47xxsflash_remove),
+	.remove = bcm47xxsflash_remove,
 	.driver = {
 		.name = "bcma_sflash",
 		.owner = THIS_MODULE,

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 681e2ee..e081bfe 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c

@@ -62,6 +62,7 @@
 				memset(page_address(page), 0xff, PAGE_SIZE);
 				set_page_dirty(page);
 				unlock_page(page);
+				balance_dirty_pages_ratelimited(mapping);
 				break;
 			}
 
@@ -152,6 +153,7 @@
 			memcpy(page_address(page) + offset, buf, cpylen);
 			set_page_dirty(page);
 			unlock_page(page);
+			balance_dirty_pages_ratelimited(mapping);
 		}
 		page_cache_release(page);
 
@@ -433,7 +435,7 @@
 }
 
 
-static void __devexit block2mtd_exit(void)
+static void block2mtd_exit(void)
 {
 	struct list_head *pos, *next;
 

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index d34d83b..8510ccb 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c

@@ -1440,7 +1440,7 @@
 		oobdelta = mtd->ecclayout->oobavail;
 		break;
 	default:
-		oobdelta = 0;
+		return -EINVAL;
 	}
 	if ((len % DOC_LAYOUT_PAGE_SIZE) || (ooblen % oobdelta) ||
 	    (ofs % DOC_LAYOUT_PAGE_SIZE))

diff --git a/drivers/mtd/devices/docprobe.c b/drivers/mtd/devices/docprobe.c
index 706b847..88b3fd3 100644
--- a/drivers/mtd/devices/docprobe.c
+++ b/drivers/mtd/devices/docprobe.c

@@ -70,8 +70,6 @@
 	0xe0000, 0xe2000, 0xe4000, 0xe6000,
 	0xe8000, 0xea000, 0xec000, 0xee000,
 #endif /*  CONFIG_MTD_DOCPROBE_HIGH */
-#else
-#warning Unknown architecture for DiskOnChip. No default probe locations defined
 #endif
 	0xffffffff };
 

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 03838ba..4eeeb2d 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c

@@ -73,14 +73,6 @@
 #define	MAX_READY_WAIT_JIFFIES	(40 * HZ)	/* M25P16 specs 40s max chip erase */
 #define	MAX_CMD_SIZE		5
 
-#ifdef CONFIG_M25PXX_USE_FAST_READ
-#define OPCODE_READ 	OPCODE_FAST_READ
-#define FAST_READ_DUMMY_BYTE 1
-#else
-#define OPCODE_READ 	OPCODE_NORM_READ
-#define FAST_READ_DUMMY_BYTE 0
-#endif
-
 #define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
 
 /****************************************************************************/
@@ -93,6 +85,7 @@
 	u16			addr_width;
 	u8			erase_opcode;
 	u8			*command;
+	bool			fast_read;
 };
 
 static inline struct m25p *mtd_to_m25p(struct mtd_info *mtd)
@@ -168,6 +161,7 @@
 {
 	switch (JEDEC_MFR(jedec_id)) {
 	case CFI_MFR_MACRONIX:
+	case 0xEF /* winbond */:
 		flash->command[0] = enable ? OPCODE_EN4B : OPCODE_EX4B;
 		return spi_write(flash->spi, flash->command, 1);
 	default:
@@ -342,6 +336,7 @@
 	struct m25p *flash = mtd_to_m25p(mtd);
 	struct spi_transfer t[2];
 	struct spi_message m;
+	uint8_t opcode;
 
 	pr_debug("%s: %s from 0x%08x, len %zd\n", dev_name(&flash->spi->dev),
 			__func__, (u32)from, len);
@@ -354,7 +349,7 @@
 	 * Should add 1 byte DUMMY_BYTE.
 	 */
 	t[0].tx_buf = flash->command;
-	t[0].len = m25p_cmdsz(flash) + FAST_READ_DUMMY_BYTE;
+	t[0].len = m25p_cmdsz(flash) + (flash->fast_read ? 1 : 0);
 	spi_message_add_tail(&t[0], &m);
 
 	t[1].rx_buf = buf;
@@ -376,12 +371,14 @@
 	 */
 
 	/* Set up the write data buffer. */
-	flash->command[0] = OPCODE_READ;
+	opcode = flash->fast_read ? OPCODE_FAST_READ : OPCODE_NORM_READ;
+	flash->command[0] = opcode;
 	m25p_addr2cmd(flash, from, flash->command);
 
 	spi_sync(flash->spi, &m);
 
-	*retlen = m.actual_length - m25p_cmdsz(flash) - FAST_READ_DUMMY_BYTE;
+	*retlen = m.actual_length - m25p_cmdsz(flash) -
+			(flash->fast_read ? 1 : 0);
 
 	mutex_unlock(&flash->lock);
 
@@ -664,7 +661,8 @@
 	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
 
 	/* Micron */
-	{ "n25q128",  INFO(0x20ba18, 0, 64 * 1024, 256, 0) },
+	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024, 256, 0) },
+	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024, 256, 0) },
 	{ "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K) },
 
 	/* Spansion -- single (large) sector size only, at least
@@ -745,6 +743,8 @@
 	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
 	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
 	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
 
 	/* Catalyst / On Semiconductor -- non-JEDEC */
 	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1) },
@@ -756,7 +756,7 @@
 };
 MODULE_DEVICE_TABLE(spi, m25p_ids);
 
-static const struct spi_device_id *__devinit jedec_probe(struct spi_device *spi)
+static const struct spi_device_id *jedec_probe(struct spi_device *spi)
 {
 	int			tmp;
 	u8			code = OPCODE_RDID;
@@ -801,7 +801,7 @@
  * matches what the READ command supports, at least until this driver
  * understands FAST_READ (for clocks over 25 MHz).
  */
-static int __devinit m25p_probe(struct spi_device *spi)
+static int m25p_probe(struct spi_device *spi)
 {
 	const struct spi_device_id	*id = spi_get_device_id(spi);
 	struct flash_platform_data	*data;
@@ -809,9 +809,10 @@
 	struct flash_info		*info;
 	unsigned			i;
 	struct mtd_part_parser_data	ppdata;
+	struct device_node __maybe_unused *np = spi->dev.of_node;
 
 #ifdef CONFIG_MTD_OF_PARTS
-	if (!of_device_is_available(spi->dev.of_node))
+	if (!of_device_is_available(np))
 		return -ENODEV;
 #endif
 
@@ -863,7 +864,8 @@
 	flash = kzalloc(sizeof *flash, GFP_KERNEL);
 	if (!flash)
 		return -ENOMEM;
-	flash->command = kmalloc(MAX_CMD_SIZE + FAST_READ_DUMMY_BYTE, GFP_KERNEL);
+	flash->command = kmalloc(MAX_CMD_SIZE + (flash->fast_read ? 1 : 0),
+					GFP_KERNEL);
 	if (!flash->command) {
 		kfree(flash);
 		return -ENOMEM;
@@ -920,6 +922,16 @@
 	flash->page_size = info->page_size;
 	flash->mtd.writebufsize = flash->page_size;
 
+	flash->fast_read = false;
+#ifdef CONFIG_OF
+	if (np && of_property_read_bool(np, "m25p,fast-read"))
+		flash->fast_read = true;
+#endif
+
+#ifdef CONFIG_M25PXX_USE_FAST_READ
+	flash->fast_read = true;
+#endif
+
 	if (info->addr_width)
 		flash->addr_width = info->addr_width;
 	else {
@@ -961,7 +973,7 @@
 }
 
 
-static int __devexit m25p_remove(struct spi_device *spi)
+static int m25p_remove(struct spi_device *spi)
 {
 	struct m25p	*flash = dev_get_drvdata(&spi->dev);
 	int		status;
@@ -983,7 +995,7 @@
 	},
 	.id_table	= m25p_ids,
 	.probe	= m25p_probe,
-	.remove	= __devexit_p(m25p_remove),
+	.remove	= m25p_remove,
 
 	/* REVISIT: many of these chips have deep power-down modes, which
 	 * should clearly be entered on suspend() to minimize power use.

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 928fb0e..ea7ea7b 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c

@@ -618,7 +618,7 @@
 /*
  * Register DataFlash device with MTD subsystem.
  */
-static int __devinit
+static int
 add_dataflash_otp(struct spi_device *spi, char *name,
 		int nr_pages, int pagesize, int pageoffset, char revision)
 {
@@ -679,7 +679,7 @@
 	return err;
 }
 
-static inline int __devinit
+static inline int
 add_dataflash(struct spi_device *spi, char *name,
 		int nr_pages, int pagesize, int pageoffset)
 {
@@ -705,7 +705,7 @@
 #define IS_POW2PS	0x0001		/* uses 2^N byte pages */
 };
 
-static struct flash_info __devinitdata dataflash_data [] = {
+static struct flash_info dataflash_data[] = {
 
 	/*
 	 * NOTE:  chips with SUP_POW2PS (rev D and up) need two entries,
@@ -740,7 +740,7 @@
 	{ "at45db642d",  0x1f2800, 8192, 1024, 10, SUP_POW2PS | IS_POW2PS},
 };
 
-static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
+static struct flash_info *jedec_probe(struct spi_device *spi)
 {
 	int			tmp;
 	uint8_t			code = OP_READ_ID;
@@ -823,7 +823,7 @@
  *   AT45DB0642  64Mbit  (8M)    xx111xxx (0x3c)   8192   1056     11
  *   AT45DB1282  128Mbit (16M)   xx0100xx (0x10)  16384   1056     11
  */
-static int __devinit dataflash_probe(struct spi_device *spi)
+static int dataflash_probe(struct spi_device *spi)
 {
 	int status;
 	struct flash_info	*info;
@@ -897,7 +897,7 @@
 	return status;
 }
 
-static int __devexit dataflash_remove(struct spi_device *spi)
+static int dataflash_remove(struct spi_device *spi)
 {
 	struct dataflash	*flash = dev_get_drvdata(&spi->dev);
 	int			status;
@@ -920,7 +920,7 @@
 	},
 
 	.probe		= dataflash_probe,
-	.remove		= __devexit_p(dataflash_remove),
+	.remove		= dataflash_remove,
 
 	/* FIXME:  investigate suspend and resume... */
 };

diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c
index dcc3c95..2d2c2a5 100644
--- a/drivers/mtd/devices/spear_smi.c
+++ b/drivers/mtd/devices/spear_smi.c

@@ -756,7 +756,7 @@
 
 
 #ifdef CONFIG_OF
-static int __devinit spear_smi_probe_config_dt(struct platform_device *pdev,
+static int spear_smi_probe_config_dt(struct platform_device *pdev,
 					       struct device_node *np)
 {
 	struct spear_smi_plat_data *pdata = dev_get_platdata(&pdev->dev);
@@ -799,7 +799,7 @@
 	return 0;
 }
 #else
-static int __devinit spear_smi_probe_config_dt(struct platform_device *pdev,
+static int spear_smi_probe_config_dt(struct platform_device *pdev,
 					       struct device_node *np)
 {
 	return -ENOSYS;
@@ -901,7 +901,7 @@
  * and do proper init for any found one.
  * Returns 0 on success, non zero otherwise
  */
-static int __devinit spear_smi_probe(struct platform_device *pdev)
+static int spear_smi_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct spear_smi_plat_data *pdata = NULL;
@@ -1016,7 +1016,7 @@
  *
  * free all allocations and delete the partitions.
  */
-static int __devexit spear_smi_remove(struct platform_device *pdev)
+static int spear_smi_remove(struct platform_device *pdev)
 {
 	struct spear_smi *dev;
 	struct spear_snor_flash *flash;
@@ -1092,20 +1092,9 @@
 #endif
 	},
 	.probe = spear_smi_probe,
-	.remove = __devexit_p(spear_smi_remove),
+	.remove = spear_smi_remove,
 };
-
-static int spear_smi_init(void)
-{
-	return platform_driver_register(&spear_smi_driver);
-}
-module_init(spear_smi_init);
-
-static void spear_smi_exit(void)
-{
-	platform_driver_unregister(&spear_smi_driver);
-}
-module_exit(spear_smi_exit);
+module_platform_driver(spear_smi_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Ashish Priyadarshi, Shiraz Hashim <shiraz.hashim@st.com>");

diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
index ab8a2f4..8091b01 100644
--- a/drivers/mtd/devices/sst25l.c
+++ b/drivers/mtd/devices/sst25l.c

@@ -64,7 +64,7 @@
 
 #define to_sst25l_flash(x) container_of(x, struct sst25l_flash, mtd)
 
-static struct flash_info __devinitdata sst25l_flash_info[] = {
+static struct flash_info sst25l_flash_info[] = {
 	{"sst25lf020a", 0xbf43, 256, 1024, 4096},
 	{"sst25lf040a",	0xbf44,	256, 2048, 4096},
 };
@@ -313,7 +313,7 @@
 	return ret;
 }
 
-static struct flash_info *__devinit sst25l_match_device(struct spi_device *spi)
+static struct flash_info *sst25l_match_device(struct spi_device *spi)
 {
 	struct flash_info *flash_info = NULL;
 	struct spi_message m;
@@ -353,7 +353,7 @@
 	return flash_info;
 }
 
-static int __devinit sst25l_probe(struct spi_device *spi)
+static int sst25l_probe(struct spi_device *spi)
 {
 	struct flash_info *flash_info;
 	struct sst25l_flash *flash;
@@ -411,7 +411,7 @@
 	return 0;
 }
 
-static int __devexit sst25l_remove(struct spi_device *spi)
+static int sst25l_remove(struct spi_device *spi)
 {
 	struct sst25l_flash *flash = dev_get_drvdata(&spi->dev);
 	int ret;
@@ -428,7 +428,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sst25l_probe,
-	.remove		= __devexit_p(sst25l_remove),
+	.remove		= sst25l_remove,
 };
 
 module_spi_driver(sst25l_driver);

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index df30486..62ba82c 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig

@@ -358,13 +358,6 @@
 	  IXP2000 based board and would like to use the flash chips on it,
 	  say 'Y'.
 
-config MTD_FORTUNET
-	tristate "CFI Flash device mapped on the FortuNet board"
-	depends on MTD_CFI && SA1100_FORTUNET
-	help
-	  This enables access to the Flash on the FortuNet board.  If you
-	  have such a board, say 'Y'.
-
 config MTD_AUTCPU12
 	bool "NV-RAM mapping AUTCPU12 board"
 	depends on ARCH_AUTCPU12

diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index a0240ed..4ded287 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile

@@ -39,7 +39,6 @@
 obj-$(CONFIG_MTD_PCI)		+= pci.o
 obj-$(CONFIG_MTD_AUTCPU12)	+= autcpu12-nvram.o
 obj-$(CONFIG_MTD_IMPA7)		+= impa7.o
-obj-$(CONFIG_MTD_FORTUNET)	+= fortunet.o
 obj-$(CONFIG_MTD_UCLINUX)	+= uclinux.o
 obj-$(CONFIG_MTD_NETtel)	+= nettel.o
 obj-$(CONFIG_MTD_SCB2_FLASH)	+= scb2_flash.o

diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index e2875d6..f7207b0 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c

@@ -100,8 +100,8 @@
 }
 
 
-static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
-	const struct pci_device_id *ent)
+static int amd76xrom_init_one(struct pci_dev *pdev,
+			      const struct pci_device_id *ent)
 {
 	static char *rom_probe_types[] = { "cfi_probe", "jedec_probe", NULL };
 	u8 byte;
@@ -289,7 +289,7 @@
 }
 
 
-static void __devexit amd76xrom_remove_one (struct pci_dev *pdev)
+static void amd76xrom_remove_one(struct pci_dev *pdev)
 {
 	struct amd76xrom_window *window = &amd76xrom_window;
 
@@ -347,4 +347,3 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Eric Biederman <ebiederman@lnxi.com>");
 MODULE_DESCRIPTION("MTD map driver for BIOS chips on the AMD76X southbridge");
-

diff --git a/drivers/mtd/maps/autcpu12-nvram.c b/drivers/mtd/maps/autcpu12-nvram.c
index 76fb594..a2dc2ae 100644
--- a/drivers/mtd/maps/autcpu12-nvram.c
+++ b/drivers/mtd/maps/autcpu12-nvram.c

@@ -33,7 +33,7 @@
 	struct map_info map;
 };
 
-static int __devinit autcpu12_nvram_probe(struct platform_device *pdev)
+static int autcpu12_nvram_probe(struct platform_device *pdev)
 {
 	map_word tmp, save0, save1;
 	struct resource *res;
@@ -105,7 +105,7 @@
 	return -ENOMEM;
 }
 
-static int __devexit autcpu12_nvram_remove(struct platform_device *pdev)
+static int autcpu12_nvram_remove(struct platform_device *pdev)
 {
 	struct autcpu12_nvram_priv *priv = platform_get_drvdata(pdev);
 
@@ -121,7 +121,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= autcpu12_nvram_probe,
-	.remove		= __devexit_p(autcpu12_nvram_remove),
+	.remove		= autcpu12_nvram_remove,
 };
 module_platform_driver(autcpu12_nvram_driver);
 

diff --git a/drivers/mtd/maps/bfin-async-flash.c b/drivers/mtd/maps/bfin-async-flash.c
index ef5cde8..f833edf 100644
--- a/drivers/mtd/maps/bfin-async-flash.c
+++ b/drivers/mtd/maps/bfin-async-flash.c

@@ -30,7 +30,8 @@
 #include <linux/io.h>
 #include <asm/unaligned.h>
 
-#define pr_devinit(fmt, args...) ({ static const __devinitconst char __fmt[] = fmt; printk(__fmt, ## args); })
+#define pr_devinit(fmt, args...) \
+		({ static const char __fmt[] = fmt; printk(__fmt, ## args); })
 
 #define DRIVER_NAME "bfin-async-flash"
 
@@ -123,7 +124,7 @@
 
 static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", NULL };
 
-static int __devinit bfin_flash_probe(struct platform_device *pdev)
+static int bfin_flash_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct physmap_flash_data *pdata = pdev->dev.platform_data;
@@ -172,7 +173,7 @@
 	return 0;
 }
 
-static int __devexit bfin_flash_remove(struct platform_device *pdev)
+static int bfin_flash_remove(struct platform_device *pdev)
 {
 	struct async_state *state = platform_get_drvdata(pdev);
 	gpio_free(state->enet_flash_pin);
@@ -184,7 +185,7 @@
 
 static struct platform_driver bfin_flash_driver = {
 	.probe		= bfin_flash_probe,
-	.remove		= __devexit_p(bfin_flash_remove),
+	.remove		= bfin_flash_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 	},

diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c
index 3d0e762..586a1c7 100644
--- a/drivers/mtd/maps/ck804xrom.c
+++ b/drivers/mtd/maps/ck804xrom.c

@@ -112,8 +112,8 @@
 }
 
 
-static int __devinit ck804xrom_init_one (struct pci_dev *pdev,
-					 const struct pci_device_id *ent)
+static int ck804xrom_init_one(struct pci_dev *pdev,
+			      const struct pci_device_id *ent)
 {
 	static char *rom_probe_types[] = { "cfi_probe", "jedec_probe", NULL };
 	u8 byte;
@@ -320,7 +320,7 @@
 }
 
 
-static void __devexit ck804xrom_remove_one (struct pci_dev *pdev)
+static void ck804xrom_remove_one(struct pci_dev *pdev)
 {
 	struct ck804xrom_window *window = &ck804xrom_window;
 

diff --git a/drivers/mtd/maps/esb2rom.c b/drivers/mtd/maps/esb2rom.c
index 08322b1..ff8681a 100644
--- a/drivers/mtd/maps/esb2rom.c
+++ b/drivers/mtd/maps/esb2rom.c

@@ -144,7 +144,7 @@
 	pci_dev_put(window->pdev);
 }
 
-static int __devinit esb2rom_init_one(struct pci_dev *pdev,
+static int esb2rom_init_one(struct pci_dev *pdev,
 				      const struct pci_device_id *ent)
 {
 	static char *rom_probe_types[] = { "cfi_probe", "jedec_probe", NULL };
@@ -378,13 +378,13 @@
 	return 0;
 }
 
-static void __devexit esb2rom_remove_one (struct pci_dev *pdev)
+static void esb2rom_remove_one(struct pci_dev *pdev)
 {
 	struct esb2rom_window *window = &esb2rom_window;
 	esb2rom_cleanup(window);
 }
 
-static struct pci_device_id esb2rom_pci_tbl[] __devinitdata = {
+static struct pci_device_id esb2rom_pci_tbl[] = {
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0,
 	  PCI_ANY_ID, PCI_ANY_ID, },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,

diff --git a/drivers/mtd/maps/fortunet.c b/drivers/mtd/maps/fortunet.c
deleted file mode 100644
index 956e2e4..0000000
--- a/drivers/mtd/maps/fortunet.c
+++ /dev/null

@@ -1,277 +0,0 @@
-/* fortunet.c memory map
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-#include <asm/io.h>
-
-#define MAX_NUM_REGIONS		4
-#define MAX_NUM_PARTITIONS	8
-
-#define DEF_WINDOW_ADDR_PHY	0x00000000
-#define DEF_WINDOW_SIZE		0x00800000		// 8 Mega Bytes
-
-#define MTD_FORTUNET_PK		"MTD FortuNet: "
-
-#define MAX_NAME_SIZE		128
-
-struct map_region
-{
-	int			window_addr_physical;
-	int			altbankwidth;
-	struct map_info		map_info;
-	struct mtd_info		*mymtd;
-	struct mtd_partition	parts[MAX_NUM_PARTITIONS];
-	char			map_name[MAX_NAME_SIZE];
-	char			parts_name[MAX_NUM_PARTITIONS][MAX_NAME_SIZE];
-};
-
-static struct map_region	map_regions[MAX_NUM_REGIONS];
-static int			map_regions_set[MAX_NUM_REGIONS] = {0,0,0,0};
-static int			map_regions_parts[MAX_NUM_REGIONS] = {0,0,0,0};
-
-
-
-struct map_info default_map = {
-	.size = DEF_WINDOW_SIZE,
-	.bankwidth = 4,
-};
-
-static char * __init get_string_option(char *dest,int dest_size,char *sor)
-{
-	if(!dest_size)
-		return sor;
-	dest_size--;
-	while(*sor)
-	{
-		if(*sor==',')
-		{
-			sor++;
-			break;
-		}
-		else if(*sor=='\"')
-		{
-			sor++;
-			while(*sor)
-			{
-				if(*sor=='\"')
-				{
-					sor++;
-					break;
-				}
-				*dest = *sor;
-				dest++;
-				sor++;
-				dest_size--;
-				if(!dest_size)
-				{
-					*dest = 0;
-					return sor;
-				}
-			}
-		}
-		else
-		{
-			*dest = *sor;
-			dest++;
-			sor++;
-			dest_size--;
-			if(!dest_size)
-			{
-				*dest = 0;
-				return sor;
-			}
-		}
-	}
-	*dest = 0;
-	return sor;
-}
-
-static int __init MTD_New_Region(char *line)
-{
-	char	string[MAX_NAME_SIZE];
-	int	params[6];
-	get_options (get_string_option(string,sizeof(string),line),6,params);
-	if(params[0]<1)
-	{
-		printk(MTD_FORTUNET_PK "Bad parameters for MTD Region "
-			" name,region-number[,base,size,bankwidth,altbankwidth]\n");
-		return 1;
-	}
-	if((params[1]<0)||(params[1]>=MAX_NUM_REGIONS))
-	{
-		printk(MTD_FORTUNET_PK "Bad region index of %d only have 0..%u regions\n",
-			params[1],MAX_NUM_REGIONS-1);
-		return 1;
-	}
-	memset(&map_regions[params[1]],0,sizeof(map_regions[params[1]]));
-	memcpy(&map_regions[params[1]].map_info,
-		&default_map,sizeof(map_regions[params[1]].map_info));
-        map_regions_set[params[1]] = 1;
-        map_regions[params[1]].window_addr_physical = DEF_WINDOW_ADDR_PHY;
-        map_regions[params[1]].altbankwidth = 2;
-        map_regions[params[1]].mymtd = NULL;
-	map_regions[params[1]].map_info.name = map_regions[params[1]].map_name;
-	strcpy(map_regions[params[1]].map_info.name,string);
-	if(params[0]>1)
-	{
-		map_regions[params[1]].window_addr_physical = params[2];
-	}
-	if(params[0]>2)
-	{
-		map_regions[params[1]].map_info.size = params[3];
-	}
-	if(params[0]>3)
-	{
-		map_regions[params[1]].map_info.bankwidth = params[4];
-	}
-	if(params[0]>4)
-	{
-		map_regions[params[1]].altbankwidth = params[5];
-	}
-	return 1;
-}
-
-static int __init MTD_New_Partition(char *line)
-{
-	char	string[MAX_NAME_SIZE];
-	int	params[4];
-	get_options (get_string_option(string,sizeof(string),line),4,params);
-	if(params[0]<3)
-	{
-		printk(MTD_FORTUNET_PK "Bad parameters for MTD Partition "
-			" name,region-number,size,offset\n");
-		return 1;
-	}
-	if((params[1]<0)||(params[1]>=MAX_NUM_REGIONS))
-	{
-		printk(MTD_FORTUNET_PK "Bad region index of %d only have 0..%u regions\n",
-			params[1],MAX_NUM_REGIONS-1);
-		return 1;
-	}
-	if(map_regions_parts[params[1]]>=MAX_NUM_PARTITIONS)
-	{
-		printk(MTD_FORTUNET_PK "Out of space for partition in this region\n");
-		return 1;
-	}
-	map_regions[params[1]].parts[map_regions_parts[params[1]]].name =
-		map_regions[params[1]].	parts_name[map_regions_parts[params[1]]];
-	strcpy(map_regions[params[1]].parts[map_regions_parts[params[1]]].name,string);
-	map_regions[params[1]].parts[map_regions_parts[params[1]]].size =
-		params[2];
-	map_regions[params[1]].parts[map_regions_parts[params[1]]].offset =
-		params[3];
-	map_regions[params[1]].parts[map_regions_parts[params[1]]].mask_flags = 0;
-	map_regions_parts[params[1]]++;
-	return 1;
-}
-
-__setup("MTD_Region=", MTD_New_Region);
-__setup("MTD_Partition=", MTD_New_Partition);
-
-/* Backwards-spelling-compatibility */
-__setup("MTD_Partion=", MTD_New_Partition);
-
-static int __init init_fortunet(void)
-{
-	int	ix,iy;
-	for(iy=ix=0;ix<MAX_NUM_REGIONS;ix++)
-	{
-		if(map_regions_parts[ix]&&(!map_regions_set[ix]))
-		{
-			printk(MTD_FORTUNET_PK "Region %d is not setup (Setting to default)\n",
-				ix);
-			memset(&map_regions[ix],0,sizeof(map_regions[ix]));
-			memcpy(&map_regions[ix].map_info,&default_map,
-				sizeof(map_regions[ix].map_info));
-			map_regions_set[ix] = 1;
-			map_regions[ix].window_addr_physical = DEF_WINDOW_ADDR_PHY;
-			map_regions[ix].altbankwidth = 2;
-			map_regions[ix].mymtd = NULL;
-			map_regions[ix].map_info.name = map_regions[ix].map_name;
-			strcpy(map_regions[ix].map_info.name,"FORTUNET");
-		}
-		if(map_regions_set[ix])
-		{
-			iy++;
-			printk(KERN_NOTICE MTD_FORTUNET_PK "%s flash device at physically "
-				" address %x size %x\n",
-				map_regions[ix].map_info.name,
-				map_regions[ix].window_addr_physical,
-				map_regions[ix].map_info.size);
-
-			map_regions[ix].map_info.phys =	map_regions[ix].window_addr_physical,
-
-			map_regions[ix].map_info.virt =
-				ioremap_nocache(
-				map_regions[ix].window_addr_physical,
-				map_regions[ix].map_info.size);
-			if(!map_regions[ix].map_info.virt)
-			{
-				int j = 0;
-				printk(MTD_FORTUNET_PK "%s flash failed to ioremap!\n",
-					map_regions[ix].map_info.name);
-				for (j = 0 ; j < ix; j++)
-					iounmap(map_regions[j].map_info.virt);
-				return -ENXIO;
-			}
-			simple_map_init(&map_regions[ix].map_info);
-
-			printk(KERN_NOTICE MTD_FORTUNET_PK "%s flash is virtually at: %x\n",
-				map_regions[ix].map_info.name,
-				map_regions[ix].map_info.virt);
-			map_regions[ix].mymtd = do_map_probe("cfi_probe",
-				&map_regions[ix].map_info);
-			if((!map_regions[ix].mymtd)&&(
-				map_regions[ix].altbankwidth!=map_regions[ix].map_info.bankwidth))
-			{
-				printk(KERN_NOTICE MTD_FORTUNET_PK "Trying alternate bankwidth "
-					"for %s flash.\n",
-					map_regions[ix].map_info.name);
-				map_regions[ix].map_info.bankwidth =
-					map_regions[ix].altbankwidth;
-				map_regions[ix].mymtd = do_map_probe("cfi_probe",
-					&map_regions[ix].map_info);
-			}
-			map_regions[ix].mymtd->owner = THIS_MODULE;
-			mtd_device_register(map_regions[ix].mymtd,
-					    map_regions[ix].parts,
-					    map_regions_parts[ix]);
-		}
-	}
-	if(iy)
-		return 0;
-	return -ENXIO;
-}
-
-static void __exit cleanup_fortunet(void)
-{
-	int	ix;
-	for(ix=0;ix<MAX_NUM_REGIONS;ix++)
-	{
-		if(map_regions_set[ix])
-		{
-			if( map_regions[ix].mymtd )
-			{
-				mtd_device_unregister(map_regions[ix].mymtd);
-				map_destroy( map_regions[ix].mymtd );
-			}
-			iounmap((void *)map_regions[ix].map_info.virt);
-		}
-	}
-}
-
-module_init(init_fortunet);
-module_exit(cleanup_fortunet);
-
-MODULE_AUTHOR("FortuNet, Inc.");
-MODULE_DESCRIPTION("MTD map driver for FortuNet boards");

diff --git a/drivers/mtd/maps/gpio-addr-flash.c b/drivers/mtd/maps/gpio-addr-flash.c
index e4de96b..7b643de 100644
--- a/drivers/mtd/maps/gpio-addr-flash.c
+++ b/drivers/mtd/maps/gpio-addr-flash.c

@@ -26,7 +26,8 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#define pr_devinit(fmt, args...) ({ static const __devinitconst char __fmt[] = fmt; printk(__fmt, ## args); })
+#define pr_devinit(fmt, args...) \
+	({ static const char __fmt[] = fmt; printk(__fmt, ## args); })
 
 #define DRIVER_NAME "gpio-addr-flash"
 #define PFX DRIVER_NAME ": "
@@ -142,7 +143,8 @@
  *
  * See gf_copy_from() caveat.
  */
-static void gf_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+static void gf_copy_to(struct map_info *map, unsigned long to,
+		       const void *from, ssize_t len)
 {
 	struct async_state *state = gf_map_info_to_state(map);
 
@@ -185,7 +187,7 @@
  *	...
  * };
  */
-static int __devinit gpio_flash_probe(struct platform_device *pdev)
+static int gpio_flash_probe(struct platform_device *pdev)
 {
 	size_t i, arr_size;
 	struct physmap_flash_data *pdata;
@@ -258,7 +260,7 @@
 	return 0;
 }
 
-static int __devexit gpio_flash_remove(struct platform_device *pdev)
+static int gpio_flash_remove(struct platform_device *pdev)
 {
 	struct async_state *state = platform_get_drvdata(pdev);
 	size_t i = 0;
@@ -273,7 +275,7 @@
 
 static struct platform_driver gpio_flash_driver = {
 	.probe		= gpio_flash_probe,
-	.remove		= __devexit_p(gpio_flash_remove),
+	.remove		= gpio_flash_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 	},

diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c
index 6689dcb..c7478e1 100644
--- a/drivers/mtd/maps/ichxrom.c
+++ b/drivers/mtd/maps/ichxrom.c

@@ -84,8 +84,8 @@
 }
 
 
-static int __devinit ichxrom_init_one (struct pci_dev *pdev,
-	const struct pci_device_id *ent)
+static int ichxrom_init_one(struct pci_dev *pdev,
+			    const struct pci_device_id *ent)
 {
 	static char *rom_probe_types[] = { "cfi_probe", "jedec_probe", NULL };
 	struct ichxrom_window *window = &ichxrom_window;
@@ -315,13 +315,13 @@
 }
 
 
-static void __devexit ichxrom_remove_one (struct pci_dev *pdev)
+static void ichxrom_remove_one(struct pci_dev *pdev)
 {
 	struct ichxrom_window *window = &ichxrom_window;
 	ichxrom_cleanup(window);
 }
 
-static struct pci_device_id ichxrom_pci_tbl[] __devinitdata = {
+static struct pci_device_id ichxrom_pci_tbl[] = {
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0,
 	  PCI_ANY_ID, PCI_ANY_ID, },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,

diff --git a/drivers/mtd/maps/intel_vr_nor.c b/drivers/mtd/maps/intel_vr_nor.c
index 93f0317..3ee2ad1 100644
--- a/drivers/mtd/maps/intel_vr_nor.c
+++ b/drivers/mtd/maps/intel_vr_nor.c

@@ -63,24 +63,24 @@
 #define TIMING_BYTE_EN		(1 <<  0)	/* 8-bit vs 16-bit bus */
 #define TIMING_MASK		0x3FFF0000
 
-static void __devexit vr_nor_destroy_partitions(struct vr_nor_mtd *p)
+static void vr_nor_destroy_partitions(struct vr_nor_mtd *p)
 {
 	mtd_device_unregister(p->info);
 }
 
-static int __devinit vr_nor_init_partitions(struct vr_nor_mtd *p)
+static int vr_nor_init_partitions(struct vr_nor_mtd *p)
 {
 	/* register the flash bank */
 	/* partition the flash bank */
 	return mtd_device_parse_register(p->info, NULL, NULL, NULL, 0);
 }
 
-static void __devexit vr_nor_destroy_mtd_setup(struct vr_nor_mtd *p)
+static void vr_nor_destroy_mtd_setup(struct vr_nor_mtd *p)
 {
 	map_destroy(p->info);
 }
 
-static int __devinit vr_nor_mtd_setup(struct vr_nor_mtd *p)
+static int vr_nor_mtd_setup(struct vr_nor_mtd *p)
 {
 	static const char *probe_types[] =
 	    { "cfi_probe", "jedec_probe", NULL };
@@ -96,7 +96,7 @@
 	return 0;
 }
 
-static void __devexit vr_nor_destroy_maps(struct vr_nor_mtd *p)
+static void vr_nor_destroy_maps(struct vr_nor_mtd *p)
 {
 	unsigned int exp_timing_cs0;
 
@@ -116,7 +116,7 @@
  * Initialize the map_info structure and map the flash.
  * Returns 0 on success, nonzero otherwise.
  */
-static int __devinit vr_nor_init_maps(struct vr_nor_mtd *p)
+static int vr_nor_init_maps(struct vr_nor_mtd *p)
 {
 	unsigned long csr_phys, csr_len;
 	unsigned long win_phys, win_len;
@@ -176,7 +176,7 @@
 	{0,}
 };
 
-static void __devexit vr_nor_pci_remove(struct pci_dev *dev)
+static void vr_nor_pci_remove(struct pci_dev *dev)
 {
 	struct vr_nor_mtd *p = pci_get_drvdata(dev);
 
@@ -189,7 +189,7 @@
 	pci_disable_device(dev);
 }
 
-static int __devinit
+static int
 vr_nor_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct vr_nor_mtd *p = NULL;
@@ -256,7 +256,7 @@
 static struct pci_driver vr_nor_pci_driver = {
 	.name = DRV_NAME,
 	.probe = vr_nor_pci_probe,
-	.remove = __devexit_p(vr_nor_pci_remove),
+	.remove = vr_nor_pci_remove,
 	.id_table = vr_nor_pci_ids,
 };
 

diff --git a/drivers/mtd/maps/lantiq-flash.c b/drivers/mtd/maps/lantiq-flash.c
index c03456f..3c3c791 100644
--- a/drivers/mtd/maps/lantiq-flash.c
+++ b/drivers/mtd/maps/lantiq-flash.c

@@ -45,7 +45,7 @@
 };
 
 static const char ltq_map_name[] = "ltq_nor";
-static const char *ltq_probe_types[] __devinitconst = {
+static const char *ltq_probe_types[] = {
 					"cmdlinepart", "ofpart", NULL };
 
 static map_word
@@ -109,7 +109,7 @@
 	spin_unlock_irqrestore(&ebu_lock, flags);
 }
 
-static int __devinit
+static int
 ltq_mtd_probe(struct platform_device *pdev)
 {
 	struct mtd_part_parser_data ppdata;
@@ -185,7 +185,7 @@
 	return err;
 }
 
-static int __devexit
+static int
 ltq_mtd_remove(struct platform_device *pdev)
 {
 	struct ltq_mtd *ltq_mtd = platform_get_drvdata(pdev);
@@ -209,7 +209,7 @@
 
 static struct platform_driver ltq_mtd_driver = {
 	.probe = ltq_mtd_probe,
-	.remove = __devexit_p(ltq_mtd_remove),
+	.remove = ltq_mtd_remove,
 	.driver = {
 		.name = "ltq-nor",
 		.owner = THIS_MODULE,

diff --git a/drivers/mtd/maps/latch-addr-flash.c b/drivers/mtd/maps/latch-addr-flash.c
index 3c7ad17..ab0fead 100644
--- a/drivers/mtd/maps/latch-addr-flash.c
+++ b/drivers/mtd/maps/latch-addr-flash.c

@@ -125,7 +125,7 @@
 	return 0;
 }
 
-static int __devinit latch_addr_flash_probe(struct platform_device *dev)
+static int latch_addr_flash_probe(struct platform_device *dev)
 {
 	struct latch_addr_flash_data *latch_addr_data;
 	struct latch_addr_flash_info *info;
@@ -218,7 +218,7 @@
 
 static struct platform_driver latch_addr_flash_driver = {
 	.probe		= latch_addr_flash_probe,
-	.remove		= __devexit_p(latch_addr_flash_remove),
+	.remove		= latch_addr_flash_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 	},

diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
index 1c30c1a..ed82914 100644
--- a/drivers/mtd/maps/pci.c
+++ b/drivers/mtd/maps/pci.c

@@ -253,7 +253,7 @@
  * Generic code follows.
  */
 
-static int __devinit
+static int
 mtd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct mtd_pci_info *info = (struct mtd_pci_info *)id->driver_data;
@@ -308,7 +308,7 @@
 	return err;
 }
 
-static void __devexit
+static void
 mtd_pci_remove(struct pci_dev *dev)
 {
 	struct mtd_info *mtd = pci_get_drvdata(dev);
@@ -326,7 +326,7 @@
 static struct pci_driver mtd_pci_driver = {
 	.name =		"MTD PCI",
 	.probe =	mtd_pci_probe,
-	.remove =	__devexit_p(mtd_pci_remove),
+	.remove =	mtd_pci_remove,
 	.id_table =	mtd_pci_ids,
 };
 

diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 6f19aca..37cdc20 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c

@@ -77,7 +77,7 @@
 /* Helper function to handle probing of the obsolete "direct-mapped"
  * compatible binding, which has an extra "probe-type" property
  * describing the type of flash probe necessary. */
-static struct mtd_info * __devinit obsolete_probe(struct platform_device *dev,
+static struct mtd_info *obsolete_probe(struct platform_device *dev,
 						  struct map_info *map)
 {
 	struct device_node *dp = dev->dev.of_node;
@@ -116,7 +116,7 @@
    information. */
 static const char *part_probe_types_def[] = { "cmdlinepart", "RedBoot",
 					"ofpart", "ofoldpart", NULL };
-static const char ** __devinit of_get_probes(struct device_node *dp)
+static const char **of_get_probes(struct device_node *dp)
 {
 	const char *cp;
 	int cplen;
@@ -145,14 +145,14 @@
 	return res;
 }
 
-static void __devinit of_free_probes(const char **probes)
+static void of_free_probes(const char **probes)
 {
 	if (probes != part_probe_types_def)
 		kfree(probes);
 }
 
 static struct of_device_id of_flash_match[];
-static int __devinit of_flash_probe(struct platform_device *dev)
+static int of_flash_probe(struct platform_device *dev)
 {
 	const char **part_probe_types;
 	const struct of_device_id *match;
@@ -170,6 +170,7 @@
 	resource_size_t res_size;
 	struct mtd_part_parser_data ppdata;
 	bool map_indirect;
+	const char *mtd_name;
 
 	match = of_match_device(of_flash_match, &dev->dev);
 	if (!match)
@@ -178,6 +179,8 @@
 
 	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
 
+	of_property_read_string(dp, "linux,mtd-name", &mtd_name);
+
 	/*
 	 * Get number of "reg" tuples. Scan for MTD devices on area's
 	 * described by each "reg" region. This makes it possible (including
@@ -234,7 +237,7 @@
 			goto err_out;
 		}
 
-		info->list[i].map.name = dev_name(&dev->dev);
+		info->list[i].map.name = mtd_name ?: dev_name(&dev->dev);
 		info->list[i].map.phys = res.start;
 		info->list[i].map.size = res_size;
 		info->list[i].map.bankwidth = be32_to_cpup(width);
@@ -282,6 +285,7 @@
 	}
 
 	err = 0;
+	info->cmtd = NULL;
 	if (info->list_size == 1) {
 		info->cmtd = info->list[0].mtd;
 	} else if (info->list_size > 1) {
@@ -290,9 +294,10 @@
 		 */
 		info->cmtd = mtd_concat_create(mtd_list, info->list_size,
 					       dev_name(&dev->dev));
-		if (info->cmtd == NULL)
-			err = -ENXIO;
 	}
+	if (info->cmtd == NULL)
+		err = -ENXIO;
+
 	if (err)
 		goto err_out;
 

diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c
index 65bd1cd..afea93b 100644
--- a/drivers/mtd/maps/pismo.c
+++ b/drivers/mtd/maps/pismo.c

@@ -58,7 +58,7 @@
 	pismo->vpp(pismo->vpp_data, on);
 }
 
-static unsigned int __devinit pismo_width_to_bytes(unsigned int width)
+static unsigned int pismo_width_to_bytes(unsigned int width)
 {
 	width &= 15;
 	if (width > 2)
@@ -66,7 +66,7 @@
 	return 1 << width;
 }
 
-static int __devinit pismo_eeprom_read(struct i2c_client *client, void *buf,
+static int pismo_eeprom_read(struct i2c_client *client, void *buf,
 	u8 addr, size_t size)
 {
 	int ret;
@@ -88,7 +88,7 @@
 	return ret == ARRAY_SIZE(msg) ? size : -EIO;
 }
 
-static int __devinit pismo_add_device(struct pismo_data *pismo, int i,
+static int pismo_add_device(struct pismo_data *pismo, int i,
 	struct pismo_mem *region, const char *name, void *pdata, size_t psize)
 {
 	struct platform_device *dev;
@@ -129,7 +129,7 @@
 	return ret;
 }
 
-static int __devinit pismo_add_nor(struct pismo_data *pismo, int i,
+static int pismo_add_nor(struct pismo_data *pismo, int i,
 	struct pismo_mem *region)
 {
 	struct physmap_flash_data data = {
@@ -143,7 +143,7 @@
 		&data, sizeof(data));
 }
 
-static int __devinit pismo_add_sram(struct pismo_data *pismo, int i,
+static int pismo_add_sram(struct pismo_data *pismo, int i,
 	struct pismo_mem *region)
 {
 	struct platdata_mtd_ram data = {
@@ -154,7 +154,7 @@
 		&data, sizeof(data));
 }
 
-static void __devinit pismo_add_one(struct pismo_data *pismo, int i,
+static void pismo_add_one(struct pismo_data *pismo, int i,
 	const struct pismo_cs_block *cs, phys_addr_t base)
 {
 	struct device *dev = &pismo->client->dev;
@@ -197,7 +197,7 @@
 	}
 }
 
-static int __devexit pismo_remove(struct i2c_client *client)
+static int pismo_remove(struct i2c_client *client)
 {
 	struct pismo_data *pismo = i2c_get_clientdata(client);
 	int i;
@@ -210,7 +210,7 @@
 	return 0;
 }
 
-static int __devinit pismo_probe(struct i2c_client *client,
+static int pismo_probe(struct i2c_client *client,
 				 const struct i2c_device_id *id)
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
@@ -267,7 +267,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= pismo_probe,
-	.remove		= __devexit_p(pismo_remove),
+	.remove		= pismo_remove,
 	.id_table	= pismo_id,
 };
 

diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 81884c2..43e3dbb 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c

@@ -49,7 +49,7 @@
 static const char *probes[] = { "RedBoot", "cmdlinepart", NULL };
 
 
-static int __devinit pxa2xx_flash_probe(struct platform_device *pdev)
+static int pxa2xx_flash_probe(struct platform_device *pdev)
 {
 	struct flash_platform_data *flash = pdev->dev.platform_data;
 	struct pxa2xx_flash_info *info;
@@ -105,7 +105,7 @@
 	return 0;
 }
 
-static int __devexit pxa2xx_flash_remove(struct platform_device *dev)
+static int pxa2xx_flash_remove(struct platform_device *dev)
 {
 	struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
 
@@ -139,7 +139,7 @@
 		.owner		= THIS_MODULE,
 	},
 	.probe		= pxa2xx_flash_probe,
-	.remove		= __devexit_p(pxa2xx_flash_remove),
+	.remove		= pxa2xx_flash_remove,
 	.shutdown	= pxa2xx_flash_shutdown,
 };
 

diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index a675bdb..f694417 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c

@@ -149,8 +149,8 @@
 		plat->exit();
 }
 
-static struct sa_info *__devinit
-sa1100_setup_mtd(struct platform_device *pdev, struct flash_platform_data *plat)
+static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev,
+					struct flash_platform_data *plat)
 {
 	struct sa_info *info;
 	int nr, size, i, ret = 0;
@@ -246,7 +246,7 @@
 
 static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL };
 
-static int __devinit sa1100_mtd_probe(struct platform_device *pdev)
+static int sa1100_mtd_probe(struct platform_device *pdev)
 {
 	struct flash_platform_data *plat = pdev->dev.platform_data;
 	struct sa_info *info;

diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index 9dcbc68..71796137 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c

@@ -69,7 +69,7 @@
 };
 static int region_fail;
 
-static int __devinit
+static int
 scb2_fixup_mtd(struct mtd_info *mtd)
 {
 	int i;
@@ -133,7 +133,7 @@
 /* CSB5's 'Function Control Register' has bits for decoding @ >= 0xffc00000 */
 #define CSB5_FCR	0x41
 #define CSB5_FCR_DECODE_ALL 0x0e
-static int __devinit
+static int
 scb2_flash_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 {
 	u8 reg;
@@ -197,7 +197,7 @@
 	return 0;
 }
 
-static void __devexit
+static void
 scb2_flash_remove(struct pci_dev *dev)
 {
 	if (!scb2_mtd)
@@ -231,7 +231,7 @@
 	.name =     "Intel SCB2 BIOS Flash",
 	.id_table = scb2_flash_pci_ids,
 	.probe =    scb2_flash_probe,
-	.remove =   __devexit_p(scb2_flash_remove),
+	.remove =   scb2_flash_remove,
 };
 
 module_pci_driver(scb2_flash_driver);

diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c
index 175e537..d467f3b 100644
--- a/drivers/mtd/maps/sun_uflash.c
+++ b/drivers/mtd/maps/sun_uflash.c

@@ -108,7 +108,7 @@
 	return 0;
 }
 
-static int __devinit uflash_probe(struct platform_device *op)
+static int uflash_probe(struct platform_device *op)
 {
 	struct device_node *dp = op->dev.of_node;
 
@@ -121,7 +121,7 @@
 	return uflash_devinit(op, dp);
 }
 
-static int __devexit uflash_remove(struct platform_device *op)
+static int uflash_remove(struct platform_device *op)
 {
 	struct uflash_dev *up = dev_get_drvdata(&op->dev);
 
@@ -155,7 +155,7 @@
 		.of_match_table = uflash_match,
 	},
 	.probe		= uflash_probe,
-	.remove		= __devexit_p(uflash_remove),
+	.remove		= uflash_remove,
 };
 
 module_platform_driver(uflash_driver);

diff --git a/drivers/mtd/maps/vmu-flash.c b/drivers/mtd/maps/vmu-flash.c
index 2e2b094..6b223cf 100644
--- a/drivers/mtd/maps/vmu-flash.c
+++ b/drivers/mtd/maps/vmu-flash.c

@@ -596,7 +596,7 @@
 }
 
 /* Handles very basic info about the flash, queries for details */
-static int __devinit vmu_connect(struct maple_device *mdev)
+static int vmu_connect(struct maple_device *mdev)
 {
 	unsigned long test_flash_data, basic_flash_data;
 	int c, error;
@@ -690,7 +690,7 @@
 	return error;
 }
 
-static void __devexit vmu_disconnect(struct maple_device *mdev)
+static void vmu_disconnect(struct maple_device *mdev)
 {
 	struct memcard *card;
 	struct mdev_part *mpart;
@@ -772,7 +772,7 @@
 }
 
 
-static int __devinit probe_maple_vmu(struct device *dev)
+static int probe_maple_vmu(struct device *dev)
 {
 	int error;
 	struct maple_device *mdev = to_maple_dev(dev);
@@ -789,7 +789,7 @@
 	return 0;
 }
 
-static int __devexit remove_maple_vmu(struct device *dev)
+static int remove_maple_vmu(struct device *dev)
 {
 	struct maple_device *mdev = to_maple_dev(dev);
 
@@ -802,7 +802,7 @@
 	.drv = {
 		.name =		"Dreamcast_visual_memory",
 		.probe =	probe_maple_vmu,
-		.remove = 	__devexit_p(remove_maple_vmu),
+		.remove =	remove_maple_vmu,
 	},
 };
 

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index f1f0671..5ad39bb 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c

@@ -32,7 +32,6 @@
 #include <linux/hdreg.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
-#include <linux/kthread.h>
 #include <asm/uaccess.h>
 
 #include "mtdcore.h"
@@ -121,16 +120,14 @@
 
 int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
 {
-	if (kthread_should_stop())
-		return 1;
-
 	return dev->bg_stop;
 }
 EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
 
-static int mtd_blktrans_thread(void *arg)
+static void mtd_blktrans_work(struct work_struct *work)
 {
-	struct mtd_blktrans_dev *dev = arg;
+	struct mtd_blktrans_dev *dev =
+		container_of(work, struct mtd_blktrans_dev, work);
 	struct mtd_blktrans_ops *tr = dev->tr;
 	struct request_queue *rq = dev->rq;
 	struct request *req = NULL;
@@ -138,7 +135,7 @@
 
 	spin_lock_irq(rq->queue_lock);
 
-	while (!kthread_should_stop()) {
+	while (1) {
 		int res;
 
 		dev->bg_stop = false;
@@ -156,15 +153,7 @@
 				background_done = !dev->bg_stop;
 				continue;
 			}
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			if (kthread_should_stop())
-				set_current_state(TASK_RUNNING);
-
-			spin_unlock_irq(rq->queue_lock);
-			schedule();
-			spin_lock_irq(rq->queue_lock);
-			continue;
+			break;
 		}
 
 		spin_unlock_irq(rq->queue_lock);
@@ -185,8 +174,6 @@
 		__blk_end_request_all(req, -EIO);
 
 	spin_unlock_irq(rq->queue_lock);
-
-	return 0;
 }
 
 static void mtd_blktrans_request(struct request_queue *rq)
@@ -199,10 +186,8 @@
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
 			__blk_end_request_all(req, -ENODEV);
-	else {
-		dev->bg_stop = true;
-		wake_up_process(dev->thread);
-	}
+	else
+		queue_work(dev->wq, &dev->work);
 }
 
 static int blktrans_open(struct block_device *bdev, fmode_t mode)
@@ -325,7 +310,7 @@
 	return ret;
 }
 
-static const struct block_device_operations mtd_blktrans_ops = {
+static const struct block_device_operations mtd_block_ops = {
 	.owner		= THIS_MODULE,
 	.open		= blktrans_open,
 	.release	= blktrans_release,
@@ -401,7 +386,7 @@
 	gd->private_data = new;
 	gd->major = tr->major;
 	gd->first_minor = (new->devnum) << tr->part_bits;
-	gd->fops = &mtd_blktrans_ops;
+	gd->fops = &mtd_block_ops;
 
 	if (tr->part_bits)
 		if (new->devnum < 26)
@@ -437,14 +422,13 @@
 
 	gd->queue = new->rq;
 
-	/* Create processing thread */
-	/* TODO: workqueue ? */
-	new->thread = kthread_run(mtd_blktrans_thread, new,
-			"%s%d", tr->name, new->mtd->index);
-	if (IS_ERR(new->thread)) {
-		ret = PTR_ERR(new->thread);
+	/* Create processing workqueue */
+	new->wq = alloc_workqueue("%s%d", 0, 0,
+				  tr->name, new->mtd->index);
+	if (!new->wq)
 		goto error4;
-	}
+	INIT_WORK(&new->work, mtd_blktrans_work);
+
 	gd->driverfs_dev = &new->mtd->dev;
 
 	if (new->readonly)
@@ -484,9 +468,8 @@
 	/* Stop new requests to arrive */
 	del_gendisk(old->disk);
 
-
-	/* Stop the thread */
-	kthread_stop(old->thread);
+	/* Stop workqueue. This will perform any pending request. */
+	destroy_workqueue(old->wq);
 
 	/* Kill current requests */
 	spin_lock_irqsave(&old->queue_lock, flags);

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index f5b3f91..97bb8f6 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c

@@ -271,7 +271,7 @@
 
 		if (count[0] == 0xffffffff && count[1] == 0xffffffff)
 			mark_page_unused(cxt, page);
-		if (count[0] == 0xffffffff)
+		if (count[0] == 0xffffffff || count[1] != MTDOOPS_KERNMSG_MAGIC)
 			continue;
 		if (maxcount == 0xffffffff) {
 			maxcount = count[0];
@@ -289,14 +289,13 @@
 		}
 	}
 	if (maxcount == 0xffffffff) {
-		cxt->nextpage = 0;
-		cxt->nextcount = 1;
-		schedule_work(&cxt->work_erase);
-		return;
+		cxt->nextpage = cxt->oops_pages - 1;
+		cxt->nextcount = 0;
 	}
-
-	cxt->nextpage = maxpos;
-	cxt->nextcount = maxcount;
+	else {
+		cxt->nextpage = maxpos;
+		cxt->nextcount = maxcount;
+	}
 
 	mtdoops_inc_counter(cxt);
 }

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index dae191b..5819eb5 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig

@@ -50,16 +50,30 @@
 	  of these chips were reused by later, larger chips.
 
 config MTD_NAND_DENALI
-       depends on PCI
+        tristate "Support Denali NAND controller"
+        help
+	  Enable support for the Denali NAND controller.  This should be
+	  combined with either the PCI or platform drivers to provide device
+	  registration.
+
+config MTD_NAND_DENALI_PCI
         tristate "Support Denali NAND controller on Intel Moorestown"
+	depends on PCI && MTD_NAND_DENALI
         help
           Enable the driver for NAND flash on Intel Moorestown, using the
           Denali NAND controller core.
- 
+
+config MTD_NAND_DENALI_DT
+	tristate "Support Denali NAND controller as a DT device"
+	depends on HAVE_CLK && MTD_NAND_DENALI
+	help
+	  Enable the driver for NAND flash on platforms using a Denali NAND
+	  controller as a DT device.
+
 config MTD_NAND_DENALI_SCRATCH_REG_ADDR
         hex "Denali NAND size scratch register address"
         default "0xFF108018"
-        depends on MTD_NAND_DENALI
+        depends on MTD_NAND_DENALI_PCI
         help
           Some platforms place the NAND chip size in a scratch register
           because (some versions of) the driver aren't able to automatically
@@ -433,6 +447,14 @@
 	 block, such as SD card. So pay attention to it when you enable
 	 the GPMI.
 
+config MTD_NAND_BCM47XXNFLASH
+	tristate "Support for NAND flash on BCM4706 BCMA bus"
+	depends on BCMA_NFLASH
+	help
+	  BCMA bus can have various flash memories attached, they are
+	  registered by bcma as platform devices. This enables driver for
+	  NAND flash memories. For now only BCM4706 is supported.
+
 config MTD_NAND_PLATFORM
 	tristate "Support for generic platform NAND driver"
 	depends on HAS_IOMEM
@@ -499,12 +521,6 @@
 	  This enables the driver for the NAND flash controller on the
 	  MXC processors.
 
-config MTD_NAND_NOMADIK
-	tristate "ST Nomadik 8815 NAND support"
-	depends on ARCH_NOMADIK
-	help
-	  Driver for the NAND flash controller on the Nomadik, with ECC.
-
 config MTD_NAND_SH_FLCTL
 	tristate "Support for NAND on Renesas SuperH FLCTL"
 	depends on SUPERH || ARCH_SHMOBILE

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 6c7f2b3..d76d912 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile

@@ -11,6 +11,8 @@
 obj-$(CONFIG_MTD_NAND_CAFE)		+= cafe_nand.o
 obj-$(CONFIG_MTD_NAND_AMS_DELTA)	+= ams-delta.o
 obj-$(CONFIG_MTD_NAND_DENALI)		+= denali.o
+obj-$(CONFIG_MTD_NAND_DENALI_PCI)	+= denali_pci.o
+obj-$(CONFIG_MTD_NAND_DENALI_DT)	+= denali_dt.o
 obj-$(CONFIG_MTD_NAND_AU1550)		+= au1550nd.o
 obj-$(CONFIG_MTD_NAND_BF5XX)		+= bf5xx_nand.o
 obj-$(CONFIG_MTD_NAND_PPCHAMELEONEVB)	+= ppchameleonevb.o
@@ -45,11 +47,11 @@
 obj-$(CONFIG_MTD_NAND_SOCRATES)		+= socrates_nand.o
 obj-$(CONFIG_MTD_NAND_TXX9NDFMC)	+= txx9ndfmc.o
 obj-$(CONFIG_MTD_NAND_NUC900)		+= nuc900_nand.o
-obj-$(CONFIG_MTD_NAND_NOMADIK)		+= nomadik_nand.o
 obj-$(CONFIG_MTD_NAND_MPC5121_NFC)	+= mpc5121_nfc.o
 obj-$(CONFIG_MTD_NAND_RICOH)		+= r852.o
 obj-$(CONFIG_MTD_NAND_JZ4740)		+= jz4740_nand.o
 obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
 obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
+obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
 
 nand-objs := nand_base.o nand_bbt.o

diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c
index 9e7723a..f1d71cd 100644
--- a/drivers/mtd/nand/ams-delta.c
+++ b/drivers/mtd/nand/ams-delta.c

@@ -173,7 +173,7 @@
 /*
  * Main initialization routine
  */
-static int __devinit ams_delta_init(struct platform_device *pdev)
+static int ams_delta_init(struct platform_device *pdev)
 {
 	struct nand_chip *this;
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -270,7 +270,7 @@
 /*
  * Clean up routine
  */
-static int __devexit ams_delta_cleanup(struct platform_device *pdev)
+static int ams_delta_cleanup(struct platform_device *pdev)
 {
 	void __iomem *io_base = platform_get_drvdata(pdev);
 
@@ -289,7 +289,7 @@
 
 static struct platform_driver ams_delta_nand_driver = {
 	.probe		= ams_delta_init,
-	.remove		= __devexit_p(ams_delta_cleanup),
+	.remove		= ams_delta_cleanup,
 	.driver		= {
 		.name	= "ams-delta-nand",
 		.owner	= THIS_MODULE,

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 92623ac..90bdca6 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c

@@ -331,13 +331,13 @@
  *               12-bits                20-bytes                 21-bytes
  *               24-bits                39-bytes                 42-bytes
  */
-static int __devinit pmecc_get_ecc_bytes(int cap, int sector_size)
+static int pmecc_get_ecc_bytes(int cap, int sector_size)
 {
 	int m = 12 + sector_size / 512;
 	return (m * cap + 7) / 8;
 }
 
-static void __devinit pmecc_config_ecc_layout(struct nand_ecclayout *layout,
+static void pmecc_config_ecc_layout(struct nand_ecclayout *layout,
 	int oobsize, int ecc_len)
 {
 	int i;
@@ -353,7 +353,7 @@
 		oobsize - ecc_len - layout->oobfree[0].offset;
 }
 
-static void __devinit __iomem *pmecc_get_alpha_to(struct atmel_nand_host *host)
+static void __iomem *pmecc_get_alpha_to(struct atmel_nand_host *host)
 {
 	int table_size;
 
@@ -375,7 +375,7 @@
 	kfree(host->pmecc_delta);
 }
 
-static int __devinit pmecc_data_alloc(struct atmel_nand_host *host)
+static int pmecc_data_alloc(struct atmel_nand_host *host)
 {
 	const int cap = host->pmecc_corr_cap;
 
@@ -724,6 +724,7 @@
 	struct atmel_nand_host *host = nand_chip->priv;
 	int i, err_nbr, eccbytes;
 	uint8_t *buf_pos;
+	int total_err = 0;
 
 	eccbytes = nand_chip->ecc.bytes;
 	for (i = 0; i < eccbytes; i++)
@@ -751,12 +752,13 @@
 				pmecc_correct_data(mtd, buf_pos, ecc, i,
 					host->pmecc_bytes_per_sector, err_nbr);
 				mtd->ecc_stats.corrected += err_nbr;
+				total_err += err_nbr;
 			}
 		}
 		pmecc_stat >>= 1;
 	}
 
-	return 0;
+	return total_err;
 }
 
 static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
@@ -768,6 +770,7 @@
 	uint32_t *eccpos = chip->ecc.layout->eccpos;
 	uint32_t stat;
 	unsigned long end_time;
+	int bitflips = 0;
 
 	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
 	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
@@ -790,11 +793,14 @@
 	}
 
 	stat = pmecc_readl_relaxed(host->ecc, ISR);
-	if (stat != 0)
-		if (pmecc_correction(mtd, stat, buf, &oob[eccpos[0]]) != 0)
-			return -EIO;
+	if (stat != 0) {
+		bitflips = pmecc_correction(mtd, stat, buf, &oob[eccpos[0]]);
+		if (bitflips < 0)
+			/* uncorrectable errors */
+			return 0;
+	}
 
-	return 0;
+	return bitflips;
 }
 
 static int atmel_nand_pmecc_write_page(struct mtd_info *mtd,
@@ -1206,7 +1212,7 @@
 }
 
 #if defined(CONFIG_OF)
-static int __devinit atmel_of_init_port(struct atmel_nand_host *host,
+static int atmel_of_init_port(struct atmel_nand_host *host,
 					 struct device_node *np)
 {
 	u32 val, table_offset;
@@ -1293,7 +1299,7 @@
 	return 0;
 }
 #else
-static int __devinit atmel_of_init_port(struct atmel_nand_host *host,
+static int atmel_of_init_port(struct atmel_nand_host *host,
 					 struct device_node *np)
 {
 	return -EINVAL;

diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 5c47b20..217459d 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c

@@ -382,7 +382,7 @@
 	while(!this->dev_ready(mtd));
 }
 
-static int __devinit find_nand_cs(unsigned long nand_base)
+static int find_nand_cs(unsigned long nand_base)
 {
 	void __iomem *base =
 			(void __iomem *)KSEG1ADDR(AU1000_STATIC_MEM_PHYS_ADDR);
@@ -403,7 +403,7 @@
 	return -ENODEV;
 }
 
-static int __devinit au1550nd_probe(struct platform_device *pdev)
+static int au1550nd_probe(struct platform_device *pdev)
 {
 	struct au1550nd_platdata *pd;
 	struct au1550nd_ctx *ctx;
@@ -491,7 +491,7 @@
 	return ret;
 }
 
-static int __devexit au1550nd_remove(struct platform_device *pdev)
+static int au1550nd_remove(struct platform_device *pdev)
 {
 	struct au1550nd_ctx *ctx = platform_get_drvdata(pdev);
 	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -509,7 +509,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= au1550nd_probe,
-	.remove		= __devexit_p(au1550nd_remove),
+	.remove		= au1550nd_remove,
 };
 
 module_platform_driver(au1550nd_driver);

diff --git a/drivers/mtd/nand/bcm47xxnflash/Makefile b/drivers/mtd/nand/bcm47xxnflash/Makefile
new file mode 100644
index 0000000..f05b119
--- /dev/null
+++ b/drivers/mtd/nand/bcm47xxnflash/Makefile

@@ -0,0 +1,4 @@
+bcm47xxnflash-y				+= main.o
+bcm47xxnflash-y				+= ops_bcm4706.o
+
+obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash.o

diff --git a/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h
new file mode 100644
index 0000000..0bdb2ce
--- /dev/null
+++ b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h

@@ -0,0 +1,22 @@
+#ifndef __BCM47XXNFLASH_H
+#define __BCM47XXNFLASH_H
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+
+struct bcm47xxnflash {
+	struct bcma_drv_cc *cc;
+
+	struct nand_chip nand_chip;
+	struct mtd_info mtd;
+
+	unsigned curr_command;
+	int curr_page_addr;
+	int curr_column;
+
+	u8 id_data[8];
+};
+
+int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n);
+
+#endif /* BCM47XXNFLASH */

diff --git a/drivers/mtd/nand/bcm47xxnflash/main.c b/drivers/mtd/nand/bcm47xxnflash/main.c
new file mode 100644
index 0000000..2b8b05b
--- /dev/null
+++ b/drivers/mtd/nand/bcm47xxnflash/main.c

@@ -0,0 +1,108 @@
+/*
+ * BCM47XX NAND flash driver
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/bcma/bcma.h>
+
+#include "bcm47xxnflash.h"
+
+MODULE_DESCRIPTION("NAND flash driver for BCMA bus");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rafał Miłecki");
+
+static const char *probes[] = { "bcm47xxpart", NULL };
+
+static int bcm47xxnflash_probe(struct platform_device *pdev)
+{
+	struct bcma_nflash *nflash = dev_get_platdata(&pdev->dev);
+	struct bcm47xxnflash *b47n;
+	int err = 0;
+
+	b47n = kzalloc(sizeof(*b47n), GFP_KERNEL);
+	if (!b47n) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	b47n->nand_chip.priv = b47n;
+	b47n->mtd.owner = THIS_MODULE;
+	b47n->mtd.priv = &b47n->nand_chip; /* Required */
+	b47n->cc = container_of(nflash, struct bcma_drv_cc, nflash);
+
+	if (b47n->cc->core->bus->chipinfo.id == BCMA_CHIP_ID_BCM4706) {
+		err = bcm47xxnflash_ops_bcm4706_init(b47n);
+	} else {
+		pr_err("Device not supported\n");
+		err = -ENOTSUPP;
+	}
+	if (err) {
+		pr_err("Initialization failed: %d\n", err);
+		goto err_init;
+	}
+
+	err = mtd_device_parse_register(&b47n->mtd, probes, NULL, NULL, 0);
+	if (err) {
+		pr_err("Failed to register MTD device: %d\n", err);
+		goto err_dev_reg;
+	}
+
+	return 0;
+
+err_dev_reg:
+err_init:
+	kfree(b47n);
+out:
+	return err;
+}
+
+static int __devexit bcm47xxnflash_remove(struct platform_device *pdev)
+{
+	struct bcma_nflash *nflash = dev_get_platdata(&pdev->dev);
+
+	if (nflash->mtd)
+		mtd_device_unregister(nflash->mtd);
+
+	return 0;
+}
+
+static struct platform_driver bcm47xxnflash_driver = {
+	.remove = __devexit_p(bcm47xxnflash_remove),
+	.driver = {
+		.name = "bcma_nflash",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init bcm47xxnflash_init(void)
+{
+	int err;
+
+	/*
+	 * Platform device "bcma_nflash" exists on SoCs and is registered very
+	 * early, it won't be added during runtime (use platform_driver_probe).
+	 */
+	err = platform_driver_probe(&bcm47xxnflash_driver, bcm47xxnflash_probe);
+	if (err)
+		pr_err("Failed to register serial flash driver: %d\n", err);
+
+	return err;
+}
+
+static void __exit bcm47xxnflash_exit(void)
+{
+	platform_driver_unregister(&bcm47xxnflash_driver);
+}
+
+module_init(bcm47xxnflash_init);
+module_exit(bcm47xxnflash_exit);

diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
new file mode 100644
index 0000000..86c9a79
--- /dev/null
+++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c

@@ -0,0 +1,413 @@
+/*
+ * BCM47XX NAND flash driver
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/bcma/bcma.h>
+
+#include "bcm47xxnflash.h"
+
+/* Broadcom uses 1'000'000 but it seems to be too many. Tests on WNDR4500 has
+ * shown 164 retries as maxiumum. */
+#define NFLASH_READY_RETRIES		1000
+
+#define NFLASH_SECTOR_SIZE		512
+
+#define NCTL_CMD0			0x00010000
+#define NCTL_CMD1W			0x00080000
+#define NCTL_READ			0x00100000
+#define NCTL_WRITE			0x00200000
+#define NCTL_SPECADDR			0x01000000
+#define NCTL_READY			0x04000000
+#define NCTL_ERR			0x08000000
+#define NCTL_CSA			0x40000000
+#define NCTL_START			0x80000000
+
+/**************************************************
+ * Various helpers
+ **************************************************/
+
+static inline u8 bcm47xxnflash_ops_bcm4706_ns_to_cycle(u16 ns, u16 clock)
+{
+	return ((ns * 1000 * clock) / 1000000) + 1;
+}
+
+static int bcm47xxnflash_ops_bcm4706_ctl_cmd(struct bcma_drv_cc *cc, u32 code)
+{
+	int i = 0;
+
+	bcma_cc_write32(cc, BCMA_CC_NFLASH_CTL, NCTL_START | code);
+	for (i = 0; i < NFLASH_READY_RETRIES; i++) {
+		if (!(bcma_cc_read32(cc, BCMA_CC_NFLASH_CTL) & NCTL_START)) {
+			i = 0;
+			break;
+		}
+	}
+	if (i) {
+		pr_err("NFLASH control command not ready!\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static int bcm47xxnflash_ops_bcm4706_poll(struct bcma_drv_cc *cc)
+{
+	int i;
+
+	for (i = 0; i < NFLASH_READY_RETRIES; i++) {
+		if (bcma_cc_read32(cc, BCMA_CC_NFLASH_CTL) & NCTL_READY) {
+			if (bcma_cc_read32(cc, BCMA_CC_NFLASH_CTL) &
+			    BCMA_CC_NFLASH_CTL_ERR) {
+				pr_err("Error on polling\n");
+				return -EBUSY;
+			} else {
+				return 0;
+			}
+		}
+	}
+
+	pr_err("Polling timeout!\n");
+	return -EBUSY;
+}
+
+/**************************************************
+ * R/W
+ **************************************************/
+
+static void bcm47xxnflash_ops_bcm4706_read(struct mtd_info *mtd, uint8_t *buf,
+					   int len)
+{
+	struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv;
+	struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv;
+
+	u32 ctlcode;
+	u32 *dest = (u32 *)buf;
+	int i;
+	int toread;
+
+	BUG_ON(b47n->curr_page_addr & ~nand_chip->pagemask);
+	/* Don't validate column using nand_chip->page_shift, it may be bigger
+	 * when accessing OOB */
+
+	while (len) {
+		/* We can read maximum of 0x200 bytes at once */
+		toread = min(len, 0x200);
+
+		/* Set page and column */
+		bcma_cc_write32(b47n->cc, BCMA_CC_NFLASH_COL_ADDR,
+				b47n->curr_column);
+		bcma_cc_write32(b47n->cc, BCMA_CC_NFLASH_ROW_ADDR,
+				b47n->curr_page_addr);
+
+		/* Prepare to read */
+		ctlcode = NCTL_CSA | NCTL_CMD1W | 0x00040000 | 0x00020000 |
+			  NCTL_CMD0;
+		ctlcode |= NAND_CMD_READSTART << 8;
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(b47n->cc, ctlcode))
+			return;
+		if (bcm47xxnflash_ops_bcm4706_poll(b47n->cc))
+			return;
+
+		/* Eventually read some data :) */
+		for (i = 0; i < toread; i += 4, dest++) {
+			ctlcode = NCTL_CSA | 0x30000000 | NCTL_READ;
+			if (i == toread - 4) /* Last read goes without that */
+				ctlcode &= ~NCTL_CSA;
+			if (bcm47xxnflash_ops_bcm4706_ctl_cmd(b47n->cc,
+							      ctlcode))
+				return;
+			*dest = bcma_cc_read32(b47n->cc, BCMA_CC_NFLASH_DATA);
+		}
+
+		b47n->curr_column += toread;
+		len -= toread;
+	}
+}
+
+static void bcm47xxnflash_ops_bcm4706_write(struct mtd_info *mtd,
+					    const uint8_t *buf, int len)
+{
+	struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv;
+	struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv;
+	struct bcma_drv_cc *cc = b47n->cc;
+
+	u32 ctlcode;
+	const u32 *data = (u32 *)buf;
+	int i;
+
+	BUG_ON(b47n->curr_page_addr & ~nand_chip->pagemask);
+	/* Don't validate column using nand_chip->page_shift, it may be bigger
+	 * when accessing OOB */
+
+	for (i = 0; i < len; i += 4, data++) {
+		bcma_cc_write32(cc, BCMA_CC_NFLASH_DATA, *data);
+
+		ctlcode = NCTL_CSA | 0x30000000 | NCTL_WRITE;
+		if (i == len - 4) /* Last read goes without that */
+			ctlcode &= ~NCTL_CSA;
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, ctlcode)) {
+			pr_err("%s ctl_cmd didn't work!\n", __func__);
+			return;
+		}
+	}
+
+	b47n->curr_column += len;
+}
+
+/**************************************************
+ * NAND chip ops
+ **************************************************/
+
+/* Default nand_select_chip calls cmd_ctrl, which is not used in BCM4706 */
+static void bcm47xxnflash_ops_bcm4706_select_chip(struct mtd_info *mtd,
+						  int chip)
+{
+	return;
+}
+
+/*
+ * Default nand_command and nand_command_lp don't match BCM4706 hardware layout.
+ * For example, reading chip id is performed in a non-standard way.
+ * Setting column and page is also handled differently, we use a special
+ * registers of ChipCommon core. Hacking cmd_ctrl to understand and convert
+ * standard commands would be much more complicated.
+ */
+static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd,
+					      unsigned command, int column,
+					      int page_addr)
+{
+	struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv;
+	struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv;
+	struct bcma_drv_cc *cc = b47n->cc;
+	u32 ctlcode;
+	int i;
+
+	if (column != -1)
+		b47n->curr_column = column;
+	if (page_addr != -1)
+		b47n->curr_page_addr = page_addr;
+
+	switch (command) {
+	case NAND_CMD_RESET:
+		pr_warn("Chip reset not implemented yet\n");
+		break;
+	case NAND_CMD_READID:
+		ctlcode = NCTL_CSA | 0x01000000 | NCTL_CMD1W | NCTL_CMD0;
+		ctlcode |= NAND_CMD_READID;
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(b47n->cc, ctlcode)) {
+			pr_err("READID error\n");
+			break;
+		}
+
+		/*
+		 * Reading is specific, last one has to go without NCTL_CSA
+		 * bit. We don't know how many reads NAND subsystem is going
+		 * to perform, so cache everything.
+		 */
+		for (i = 0; i < ARRAY_SIZE(b47n->id_data); i++) {
+			ctlcode = NCTL_CSA | NCTL_READ;
+			if (i == ARRAY_SIZE(b47n->id_data) - 1)
+				ctlcode &= ~NCTL_CSA;
+			if (bcm47xxnflash_ops_bcm4706_ctl_cmd(b47n->cc,
+							      ctlcode)) {
+				pr_err("READID error\n");
+				break;
+			}
+			b47n->id_data[i] =
+				bcma_cc_read32(b47n->cc, BCMA_CC_NFLASH_DATA)
+				& 0xFF;
+		}
+
+		break;
+	case NAND_CMD_STATUS:
+		ctlcode = NCTL_CSA | NCTL_CMD0 | NAND_CMD_STATUS;
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, ctlcode))
+			pr_err("STATUS command error\n");
+		break;
+	case NAND_CMD_READ0:
+		break;
+	case NAND_CMD_READOOB:
+		if (page_addr != -1)
+			b47n->curr_column += mtd->writesize;
+		break;
+	case NAND_CMD_ERASE1:
+		bcma_cc_write32(cc, BCMA_CC_NFLASH_ROW_ADDR,
+				b47n->curr_page_addr);
+		ctlcode = 0x00040000 | NCTL_CMD1W | NCTL_CMD0 |
+			  NAND_CMD_ERASE1 | (NAND_CMD_ERASE2 << 8);
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, ctlcode))
+			pr_err("ERASE1 failed\n");
+		break;
+	case NAND_CMD_ERASE2:
+		break;
+	case NAND_CMD_SEQIN:
+		/* Set page and column */
+		bcma_cc_write32(cc, BCMA_CC_NFLASH_COL_ADDR,
+				b47n->curr_column);
+		bcma_cc_write32(cc, BCMA_CC_NFLASH_ROW_ADDR,
+				b47n->curr_page_addr);
+
+		/* Prepare to write */
+		ctlcode = 0x40000000 | 0x00040000 | 0x00020000 | 0x00010000;
+		ctlcode |= NAND_CMD_SEQIN;
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, ctlcode))
+			pr_err("SEQIN failed\n");
+		break;
+	case NAND_CMD_PAGEPROG:
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, 0x00010000 |
+							  NAND_CMD_PAGEPROG))
+			pr_err("PAGEPROG failed\n");
+		if (bcm47xxnflash_ops_bcm4706_poll(cc))
+			pr_err("PAGEPROG not ready\n");
+		break;
+	default:
+		pr_err("Command 0x%X unsupported\n", command);
+		break;
+	}
+	b47n->curr_command = command;
+}
+
+static u8 bcm47xxnflash_ops_bcm4706_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv;
+	struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv;
+	struct bcma_drv_cc *cc = b47n->cc;
+	u32 tmp = 0;
+
+	switch (b47n->curr_command) {
+	case NAND_CMD_READID:
+		if (b47n->curr_column >= ARRAY_SIZE(b47n->id_data)) {
+			pr_err("Requested invalid id_data: %d\n",
+			       b47n->curr_column);
+			return 0;
+		}
+		return b47n->id_data[b47n->curr_column++];
+	case NAND_CMD_STATUS:
+		if (bcm47xxnflash_ops_bcm4706_ctl_cmd(cc, NCTL_READ))
+			return 0;
+		return bcma_cc_read32(cc, BCMA_CC_NFLASH_DATA) & 0xff;
+	case NAND_CMD_READOOB:
+		bcm47xxnflash_ops_bcm4706_read(mtd, (u8 *)&tmp, 4);
+		return tmp & 0xFF;
+	}
+
+	pr_err("Invalid command for byte read: 0x%X\n", b47n->curr_command);
+	return 0;
+}
+
+static void bcm47xxnflash_ops_bcm4706_read_buf(struct mtd_info *mtd,
+					       uint8_t *buf, int len)
+{
+	struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv;
+	struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv;
+
+	switch (b47n->curr_command) {
+	case NAND_CMD_READ0:
+	case NAND_CMD_READOOB:
+		bcm47xxnflash_ops_bcm4706_read(mtd, buf, len);
+		return;
+	}
+
+	pr_err("Invalid command for buf read: 0x%X\n", b47n->curr_command);
+}
+
+static void bcm47xxnflash_ops_bcm4706_write_buf(struct mtd_info *mtd,
+						const uint8_t *buf, int len)
+{
+	struct nand_chip *nand_chip = (struct nand_chip *)mtd->priv;
+	struct bcm47xxnflash *b47n = (struct bcm47xxnflash *)nand_chip->priv;
+
+	switch (b47n->curr_command) {
+	case NAND_CMD_SEQIN:
+		bcm47xxnflash_ops_bcm4706_write(mtd, buf, len);
+		return;
+	}
+
+	pr_err("Invalid command for buf write: 0x%X\n", b47n->curr_command);
+}
+
+/**************************************************
+ * Init
+ **************************************************/
+
+int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
+{
+	int err;
+	u32 freq;
+	u16 clock;
+	u8 w0, w1, w2, w3, w4;
+
+	unsigned long chipsize; /* MiB */
+	u8 tbits, col_bits, col_size, row_bits, row_bsize;
+	u32 val;
+
+	b47n->nand_chip.select_chip = bcm47xxnflash_ops_bcm4706_select_chip;
+	b47n->nand_chip.cmdfunc = bcm47xxnflash_ops_bcm4706_cmdfunc;
+	b47n->nand_chip.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
+	b47n->nand_chip.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
+	b47n->nand_chip.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
+	b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH;
+	b47n->nand_chip.ecc.mode = NAND_ECC_NONE; /* TODO: implement ECC */
+
+	/* Enable NAND flash access */
+	bcma_cc_set32(b47n->cc, BCMA_CC_4706_FLASHSCFG,
+		      BCMA_CC_4706_FLASHSCFG_NF1);
+
+	/* Configure wait counters */
+	if (b47n->cc->status & BCMA_CC_CHIPST_4706_PKG_OPTION) {
+		freq = 100000000;
+	} else {
+		freq = bcma_chipco_pll_read(b47n->cc, 4);
+		freq = (freq * 0xFFF) >> 3;
+		freq = (freq * 25000000) >> 3;
+	}
+	clock = freq / 1000000;
+	w0 = bcm47xxnflash_ops_bcm4706_ns_to_cycle(15, clock);
+	w1 = bcm47xxnflash_ops_bcm4706_ns_to_cycle(20, clock);
+	w2 = bcm47xxnflash_ops_bcm4706_ns_to_cycle(10, clock);
+	w3 = bcm47xxnflash_ops_bcm4706_ns_to_cycle(10, clock);
+	w4 = bcm47xxnflash_ops_bcm4706_ns_to_cycle(100, clock);
+	bcma_cc_write32(b47n->cc, BCMA_CC_NFLASH_WAITCNT0,
+			(w4 << 24 | w3 << 18 | w2 << 12 | w1 << 6 | w0));
+
+	/* Scan NAND */
+	err = nand_scan(&b47n->mtd, 1);
+	if (err) {
+		pr_err("Could not scan NAND flash: %d\n", err);
+		goto exit;
+	}
+
+	/* Configure FLASH */
+	chipsize = b47n->nand_chip.chipsize >> 20;
+	tbits = ffs(chipsize); /* find first bit set */
+	if (!tbits || tbits != fls(chipsize)) {
+		pr_err("Invalid flash size: 0x%lX\n", chipsize);
+		err = -ENOTSUPP;
+		goto exit;
+	}
+	tbits += 19; /* Broadcom increases *index* by 20, we increase *pos* */
+
+	col_bits = b47n->nand_chip.page_shift + 1;
+	col_size = (col_bits + 7) / 8;
+
+	row_bits = tbits - col_bits + 1;
+	row_bsize = (row_bits + 7) / 8;
+
+	val = ((row_bsize - 1) << 6) | ((col_size - 1) << 4) | 2;
+	bcma_cc_write32(b47n->cc, BCMA_CC_NFLASH_CONF, val);
+
+exit:
+	if (err)
+		bcma_cc_mask32(b47n->cc, BCMA_CC_4706_FLASHSCFG,
+			       ~BCMA_CC_4706_FLASHSCFG_NF1);
+	return err;
+}

diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c
index ab0caa7..4271e94 100644
--- a/drivers/mtd/nand/bf5xx_nand.c
+++ b/drivers/mtd/nand/bf5xx_nand.c

@@ -658,7 +658,7 @@
 /*
  * Device management interface
  */
-static int __devinit bf5xx_nand_add_partition(struct bf5xx_nand_info *info)
+static int bf5xx_nand_add_partition(struct bf5xx_nand_info *info)
 {
 	struct mtd_info *mtd = &info->mtd;
 	struct mtd_partition *parts = info->platform->partitions;
@@ -667,7 +667,7 @@
 	return mtd_device_register(mtd, parts, nr);
 }
 
-static int __devexit bf5xx_nand_remove(struct platform_device *pdev)
+static int bf5xx_nand_remove(struct platform_device *pdev)
 {
 	struct bf5xx_nand_info *info = to_nand_info(pdev);
 
@@ -725,7 +725,7 @@
  * it can allocate all necessary resources then calls the
  * nand layer to look for devices
  */
-static int __devinit bf5xx_nand_probe(struct platform_device *pdev)
+static int bf5xx_nand_probe(struct platform_device *pdev)
 {
 	struct bf5xx_nand_platform *plat = to_nand_plat(pdev);
 	struct bf5xx_nand_info *info = NULL;
@@ -865,7 +865,7 @@
 /* driver device registration */
 static struct platform_driver bf5xx_nand_driver = {
 	.probe		= bf5xx_nand_probe,
-	.remove		= __devexit_p(bf5xx_nand_remove),
+	.remove		= bf5xx_nand_remove,
 	.suspend	= bf5xx_nand_suspend,
 	.resume		= bf5xx_nand_resume,
 	.driver		= {

diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 2bb7170..010d612 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c

@@ -585,7 +585,7 @@
 }
 
 /* F_2[X]/(X**6+X+1)  */
-static unsigned short __devinit gf64_mul(u8 a, u8 b)
+static unsigned short gf64_mul(u8 a, u8 b)
 {
 	u8 c;
 	unsigned int i;
@@ -604,7 +604,7 @@
 }
 
 /* F_64[X]/(X**2+X+A**-1) with A the generator of F_64[X]  */
-static u16 __devinit gf4096_mul(u16 a, u16 b)
+static u16 gf4096_mul(u16 a, u16 b)
 {
 	u8 ah, al, bh, bl, ch, cl;
 
@@ -619,14 +619,14 @@
 	return (ch << 6) ^ cl;
 }
 
-static int __devinit cafe_mul(int x)
+static int cafe_mul(int x)
 {
 	if (x == 0)
 		return 1;
 	return gf4096_mul(x, 0xe01);
 }
 
-static int __devinit cafe_nand_probe(struct pci_dev *pdev,
+static int cafe_nand_probe(struct pci_dev *pdev,
 				     const struct pci_device_id *ent)
 {
 	struct mtd_info *mtd;
@@ -821,7 +821,7 @@
 	return err;
 }
 
-static void __devexit cafe_nand_remove(struct pci_dev *pdev)
+static void cafe_nand_remove(struct pci_dev *pdev)
 {
 	struct mtd_info *mtd = pci_get_drvdata(pdev);
 	struct cafe_priv *cafe = mtd->priv;
@@ -887,7 +887,7 @@
 	.name = "CAFÉ NAND",
 	.id_table = cafe_nand_tbl,
 	.probe = cafe_nand_probe,
-	.remove = __devexit_p(cafe_nand_remove),
+	.remove = cafe_nand_remove,
 	.resume = cafe_nand_resume,
 };
 

diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c
index adb6c3e..2cdeab8 100644
--- a/drivers/mtd/nand/cs553x_nand.c
+++ b/drivers/mtd/nand/cs553x_nand.c

@@ -237,6 +237,7 @@
 	this->ecc.hwctl  = cs_enable_hwecc;
 	this->ecc.calculate = cs_calculate_ecc;
 	this->ecc.correct  = nand_correct_data;
+	this->ecc.strength = 1;
 
 	/* Enable the following for a flash based bad block table */
 	this->bbt_options = NAND_BBT_USE_FLASH;
@@ -247,8 +248,6 @@
 		goto out_ior;
 	}
 
-	this->ecc.strength = 1;
-
 	new_mtd->name = kasprintf(GFP_KERNEL, "cs553x_nand_cs%d", cs);
 
 	cs553x_mtd[cs] = new_mtd;

diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 945047a..3502606 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c

@@ -821,9 +821,16 @@
 	if (ret < 0)
 		goto err_scan;
 
-	ret = mtd_device_parse_register(&info->mtd, NULL, NULL, pdata->parts,
-					pdata->nr_parts);
+	if (pdata->parts)
+		ret = mtd_device_parse_register(&info->mtd, NULL, NULL,
+					pdata->parts, pdata->nr_parts);
+	else {
+		struct mtd_part_parser_data	ppdata;
 
+		ppdata.of_node = pdev->dev.of_node;
+		ret = mtd_device_parse_register(&info->mtd, NULL, &ppdata,
+						NULL, 0);
+	}
 	if (ret < 0)
 		goto err_scan;
 

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index e706a23..0c8bb6b 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c

@@ -16,14 +16,12 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  *
  */
-
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/wait.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/pci.h>
 #include <linux/mtd/mtd.h>
 #include <linux/module.h>
 
@@ -89,13 +87,6 @@
  * format the bank into the proper bits for the controller */
 #define BANK(x) ((x) << 24)
 
-/* List of platforms this NAND controller has be integrated into */
-static const struct pci_device_id denali_pci_ids[] = {
-	{ PCI_VDEVICE(INTEL, 0x0701), INTEL_CE4100 },
-	{ PCI_VDEVICE(INTEL, 0x0809), INTEL_MRST },
-	{ /* end: all zeroes */ }
-};
-
 /* forward declarations */
 static void clear_interrupts(struct denali_nand_info *denali);
 static uint32_t wait_for_irq(struct denali_nand_info *denali,
@@ -699,7 +690,7 @@
 
 	if (comp_res == 0) {
 		/* timeout */
-		printk(KERN_ERR "timeout occurred, status = 0x%x, mask = 0x%x\n",
+		pr_err("timeout occurred, status = 0x%x, mask = 0x%x\n",
 				intr_status, irq_mask);
 
 		intr_status = 0;
@@ -1305,8 +1296,7 @@
 		/* TODO: Read OOB data */
 		break;
 	default:
-		printk(KERN_ERR ": unsupported command"
-				" received 0x%x\n", cmd);
+		pr_err(": unsupported command received 0x%x\n", cmd);
 		break;
 	}
 }
@@ -1425,107 +1415,48 @@
 	denali->irq_status = 0;
 }
 
-/* driver entry point */
-static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+int denali_init(struct denali_nand_info *denali)
 {
-	int ret = -ENODEV;
-	resource_size_t csr_base, mem_base;
-	unsigned long csr_len, mem_len;
-	struct denali_nand_info *denali;
+	int ret;
 
-	denali = kzalloc(sizeof(*denali), GFP_KERNEL);
-	if (!denali)
-		return -ENOMEM;
-
-	ret = pci_enable_device(dev);
-	if (ret) {
-		printk(KERN_ERR "Spectra: pci_enable_device failed.\n");
-		goto failed_alloc_memery;
-	}
-
-	if (id->driver_data == INTEL_CE4100) {
+	if (denali->platform == INTEL_CE4100) {
 		/* Due to a silicon limitation, we can only support
 		 * ONFI timing mode 1 and below.
 		 */
 		if (onfi_timing_mode < -1 || onfi_timing_mode > 1) {
-			printk(KERN_ERR "Intel CE4100 only supports"
-					" ONFI timing mode 1 or below\n");
-			ret = -EINVAL;
-			goto failed_enable_dev;
-		}
-		denali->platform = INTEL_CE4100;
-		mem_base = pci_resource_start(dev, 0);
-		mem_len = pci_resource_len(dev, 1);
-		csr_base = pci_resource_start(dev, 1);
-		csr_len = pci_resource_len(dev, 1);
-	} else {
-		denali->platform = INTEL_MRST;
-		csr_base = pci_resource_start(dev, 0);
-		csr_len = pci_resource_len(dev, 0);
-		mem_base = pci_resource_start(dev, 1);
-		mem_len = pci_resource_len(dev, 1);
-		if (!mem_len) {
-			mem_base = csr_base + csr_len;
-			mem_len = csr_len;
+			pr_err("Intel CE4100 only supports ONFI timing mode 1 or below\n");
+			return -EINVAL;
 		}
 	}
 
 	/* Is 32-bit DMA supported? */
-	ret = dma_set_mask(&dev->dev, DMA_BIT_MASK(32));
+	ret = dma_set_mask(denali->dev, DMA_BIT_MASK(32));
 	if (ret) {
-		printk(KERN_ERR "Spectra: no usable DMA configuration\n");
-		goto failed_enable_dev;
+		pr_err("Spectra: no usable DMA configuration\n");
+		return ret;
 	}
-	denali->buf.dma_buf = dma_map_single(&dev->dev, denali->buf.buf,
+	denali->buf.dma_buf = dma_map_single(denali->dev, denali->buf.buf,
 					     DENALI_BUF_SIZE,
 					     DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(&dev->dev, denali->buf.dma_buf)) {
-		dev_err(&dev->dev, "Spectra: failed to map DMA buffer\n");
-		goto failed_enable_dev;
+	if (dma_mapping_error(denali->dev, denali->buf.dma_buf)) {
+		dev_err(denali->dev, "Spectra: failed to map DMA buffer\n");
+		return -EIO;
 	}
-
-	pci_set_master(dev);
-	denali->dev = &dev->dev;
-	denali->mtd.dev.parent = &dev->dev;
-
-	ret = pci_request_regions(dev, DENALI_NAND_NAME);
-	if (ret) {
-		printk(KERN_ERR "Spectra: Unable to request memory regions\n");
-		goto failed_dma_map;
-	}
-
-	denali->flash_reg = ioremap_nocache(csr_base, csr_len);
-	if (!denali->flash_reg) {
-		printk(KERN_ERR "Spectra: Unable to remap memory region\n");
-		ret = -ENOMEM;
-		goto failed_req_regions;
-	}
-
-	denali->flash_mem = ioremap_nocache(mem_base, mem_len);
-	if (!denali->flash_mem) {
-		printk(KERN_ERR "Spectra: ioremap_nocache failed!");
-		ret = -ENOMEM;
-		goto failed_remap_reg;
-	}
-
+	denali->mtd.dev.parent = denali->dev;
 	denali_hw_init(denali);
 	denali_drv_init(denali);
 
 	/* denali_isr register is done after all the hardware
 	 * initilization is finished*/
-	if (request_irq(dev->irq, denali_isr, IRQF_SHARED,
+	if (request_irq(denali->irq, denali_isr, IRQF_SHARED,
 			DENALI_NAND_NAME, denali)) {
-		printk(KERN_ERR "Spectra: Unable to allocate IRQ\n");
-		ret = -ENODEV;
-		goto failed_remap_mem;
+		pr_err("Spectra: Unable to allocate IRQ\n");
+		return -ENODEV;
 	}
 
 	/* now that our ISR is registered, we can enable interrupts */
 	denali_set_intr_modes(denali, true);
-
-	pci_set_drvdata(dev, denali);
-
 	denali->mtd.name = "denali-nand";
 	denali->mtd.owner = THIS_MODULE;
 	denali->mtd.priv = &denali->nand;
@@ -1549,8 +1480,7 @@
 	 */
 	if (denali->mtd.writesize > NAND_MAX_PAGESIZE + NAND_MAX_OOBSIZE) {
 		ret = -ENODEV;
-		printk(KERN_ERR "Spectra: device size not supported by this "
-			"version of MTD.");
+		pr_err("Spectra: device size not supported by this version of MTD.");
 		goto failed_req_irq;
 	}
 
@@ -1602,8 +1532,8 @@
 	} else if (denali->mtd.oobsize < (denali->bbtskipbytes +
 			ECC_8BITS * (denali->mtd.writesize /
 			ECC_SECTOR_SIZE))) {
-		printk(KERN_ERR "Your NAND chip OOB is not large enough to"
-				" contain 8bit ECC correction codes");
+		pr_err("Your NAND chip OOB is not large enough to \
+				contain 8bit ECC correction codes");
 		goto failed_req_irq;
 	} else {
 		denali->nand.ecc.strength = 8;
@@ -1655,56 +1585,24 @@
 
 	ret = mtd_device_register(&denali->mtd, NULL, 0);
 	if (ret) {
-		dev_err(&dev->dev, "Spectra: Failed to register MTD: %d\n",
+		dev_err(denali->dev, "Spectra: Failed to register MTD: %d\n",
 				ret);
 		goto failed_req_irq;
 	}
 	return 0;
 
 failed_req_irq:
-	denali_irq_cleanup(dev->irq, denali);
-failed_remap_mem:
-	iounmap(denali->flash_mem);
-failed_remap_reg:
-	iounmap(denali->flash_reg);
-failed_req_regions:
-	pci_release_regions(dev);
-failed_dma_map:
-	dma_unmap_single(&dev->dev, denali->buf.dma_buf, DENALI_BUF_SIZE,
-			 DMA_BIDIRECTIONAL);
-failed_enable_dev:
-	pci_disable_device(dev);
-failed_alloc_memery:
-	kfree(denali);
+	denali_irq_cleanup(denali->irq, denali);
+
 	return ret;
 }
+EXPORT_SYMBOL(denali_init);
 
 /* driver exit point */
-static void denali_pci_remove(struct pci_dev *dev)
+void denali_remove(struct denali_nand_info *denali)
 {
-	struct denali_nand_info *denali = pci_get_drvdata(dev);
-
-	nand_release(&denali->mtd);
-
-	denali_irq_cleanup(dev->irq, denali);
-
-	iounmap(denali->flash_reg);
-	iounmap(denali->flash_mem);
-	pci_release_regions(dev);
-	pci_disable_device(dev);
-	dma_unmap_single(&dev->dev, denali->buf.dma_buf, DENALI_BUF_SIZE,
-			 DMA_BIDIRECTIONAL);
-	pci_set_drvdata(dev, NULL);
-	kfree(denali);
+	denali_irq_cleanup(denali->irq, denali);
+	dma_unmap_single(denali->dev, denali->buf.dma_buf, DENALI_BUF_SIZE,
+			DMA_BIDIRECTIONAL);
 }
-
-MODULE_DEVICE_TABLE(pci, denali_pci_ids);
-
-static struct pci_driver denali_pci_driver = {
-	.name = DENALI_NAND_NAME,
-	.id_table = denali_pci_ids,
-	.probe = denali_pci_probe,
-	.remove = denali_pci_remove,
-};
-
-module_pci_driver(denali_pci_driver);
+EXPORT_SYMBOL(denali_remove);

diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index fabb9d5..cec5712 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h

@@ -466,6 +466,7 @@
 
 #define INTEL_CE4100	1
 #define INTEL_MRST	2
+#define DT		3
 
 struct denali_nand_info {
 	struct mtd_info mtd;
@@ -487,6 +488,7 @@
 	uint32_t irq_status;
 	int irq_debug_array[32];
 	int idx;
+	int irq;
 
 	uint32_t devnum;	/* represent how many nands connected */
 	uint32_t fwblks; /* represent how many blocks FW used */
@@ -496,4 +498,7 @@
 	uint32_t max_banks;
 };
 
+extern int denali_init(struct denali_nand_info *denali);
+extern void denali_remove(struct denali_nand_info *denali);
+
 #endif /*_LLD_NAND_*/

diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
new file mode 100644
index 0000000..546f8cb
--- /dev/null
+++ b/drivers/mtd/nand/denali_dt.c

@@ -0,0 +1,167 @@
+/*
+ * NAND Flash Controller Device Driver for DT
+ *
+ * Copyright © 2011, Picochip.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+
+#include "denali.h"
+
+struct denali_dt {
+	struct denali_nand_info	denali;
+	struct clk		*clk;
+};
+
+static void __iomem *request_and_map(struct device *dev,
+				     const struct resource *res)
+{
+	void __iomem *ptr;
+
+	if (!devm_request_mem_region(dev, res->start, resource_size(res),
+				     "denali-dt")) {
+		dev_err(dev, "unable to request %s\n", res->name);
+		return NULL;
+	}
+
+	ptr = devm_ioremap_nocache(dev, res->start, resource_size(res));
+	if (!res)
+		dev_err(dev, "ioremap_nocache of %s failed!", res->name);
+
+	return ptr;
+}
+
+static const struct of_device_id denali_nand_dt_ids[] = {
+		{ .compatible = "denali,denali-nand-dt" },
+		{ /* sentinel */ }
+	};
+
+MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
+
+static u64 denali_dma_mask;
+
+static int denali_dt_probe(struct platform_device *ofdev)
+{
+	struct resource *denali_reg, *nand_data;
+	struct denali_dt *dt;
+	struct denali_nand_info *denali;
+	int ret;
+	const struct of_device_id *of_id;
+
+	of_id = of_match_device(denali_nand_dt_ids, &ofdev->dev);
+	if (of_id) {
+		ofdev->id_entry = of_id->data;
+	} else {
+		pr_err("Failed to find the right device id.\n");
+		return -ENOMEM;
+	}
+
+	dt = devm_kzalloc(&ofdev->dev, sizeof(*dt), GFP_KERNEL);
+	if (!dt)
+		return -ENOMEM;
+	denali = &dt->denali;
+
+	denali_reg = platform_get_resource_byname(ofdev, IORESOURCE_MEM, "denali_reg");
+	nand_data = platform_get_resource_byname(ofdev, IORESOURCE_MEM, "nand_data");
+	if (!denali_reg || !nand_data) {
+		dev_err(&ofdev->dev, "resources not completely defined\n");
+		return -EINVAL;
+	}
+
+	denali->platform = DT;
+	denali->dev = &ofdev->dev;
+	denali->irq = platform_get_irq(ofdev, 0);
+	if (denali->irq < 0) {
+		dev_err(&ofdev->dev, "no irq defined\n");
+		return -ENXIO;
+	}
+
+	denali->flash_reg = request_and_map(&ofdev->dev, denali_reg);
+	if (!denali->flash_reg)
+		return -ENOMEM;
+
+	denali->flash_mem = request_and_map(&ofdev->dev, nand_data);
+	if (!denali->flash_mem)
+		return -ENOMEM;
+
+	if (!of_property_read_u32(ofdev->dev.of_node,
+		"dma-mask", (u32 *)&denali_dma_mask)) {
+		denali->dev->dma_mask = &denali_dma_mask;
+	} else {
+		denali->dev->dma_mask = NULL;
+	}
+
+	dt->clk = clk_get(&ofdev->dev, NULL);
+	if (IS_ERR(dt->clk)) {
+		dev_err(&ofdev->dev, "no clk available\n");
+		return PTR_ERR(dt->clk);
+	}
+	clk_prepare_enable(dt->clk);
+
+	ret = denali_init(denali);
+	if (ret)
+		goto out_disable_clk;
+
+	platform_set_drvdata(ofdev, dt);
+	return 0;
+
+out_disable_clk:
+	clk_disable_unprepare(dt->clk);
+	clk_put(dt->clk);
+
+	return ret;
+}
+
+static int denali_dt_remove(struct platform_device *ofdev)
+{
+	struct denali_dt *dt = platform_get_drvdata(ofdev);
+
+	denali_remove(&dt->denali);
+	clk_disable(dt->clk);
+	clk_put(dt->clk);
+
+	return 0;
+}
+
+static struct platform_driver denali_dt_driver = {
+	.probe		= denali_dt_probe,
+	.remove		= denali_dt_remove,
+	.driver		= {
+		.name	= "denali-nand-dt",
+		.owner	= THIS_MODULE,
+		.of_match_table	= of_match_ptr(denali_nand_dt_ids),
+	},
+};
+
+static int __init denali_init_dt(void)
+{
+	return platform_driver_register(&denali_dt_driver);
+}
+module_init(denali_init_dt);
+
+static void __exit denali_exit_dt(void)
+{
+	platform_driver_unregister(&denali_dt_driver);
+}
+module_exit(denali_exit_dt);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jamie Iles");
+MODULE_DESCRIPTION("DT driver for Denali NAND controller");

diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c
new file mode 100644
index 0000000..e3e4662
--- /dev/null
+++ b/drivers/mtd/nand/denali_pci.c

@@ -0,0 +1,144 @@
+/*
+ * NAND Flash Controller Device Driver
+ * Copyright © 2009-2010, Intel Corporation and its suppliers.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include "denali.h"
+
+#define DENALI_NAND_NAME    "denali-nand-pci"
+
+/* List of platforms this NAND controller has be integrated into */
+static DEFINE_PCI_DEVICE_TABLE(denali_pci_ids) = {
+	{ PCI_VDEVICE(INTEL, 0x0701), INTEL_CE4100 },
+	{ PCI_VDEVICE(INTEL, 0x0809), INTEL_MRST },
+	{ /* end: all zeroes */ }
+};
+MODULE_DEVICE_TABLE(pci, denali_pci_ids);
+
+static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	int ret = -ENODEV;
+	resource_size_t csr_base, mem_base;
+	unsigned long csr_len, mem_len;
+	struct denali_nand_info *denali;
+
+	denali = kzalloc(sizeof(*denali), GFP_KERNEL);
+	if (!denali)
+		return -ENOMEM;
+
+	ret = pci_enable_device(dev);
+	if (ret) {
+		pr_err("Spectra: pci_enable_device failed.\n");
+		goto failed_alloc_memery;
+	}
+
+	if (id->driver_data == INTEL_CE4100) {
+		denali->platform = INTEL_CE4100;
+		mem_base = pci_resource_start(dev, 0);
+		mem_len = pci_resource_len(dev, 1);
+		csr_base = pci_resource_start(dev, 1);
+		csr_len = pci_resource_len(dev, 1);
+	} else {
+		denali->platform = INTEL_MRST;
+		csr_base = pci_resource_start(dev, 0);
+		csr_len = pci_resource_len(dev, 0);
+		mem_base = pci_resource_start(dev, 1);
+		mem_len = pci_resource_len(dev, 1);
+		if (!mem_len) {
+			mem_base = csr_base + csr_len;
+			mem_len = csr_len;
+		}
+	}
+
+	pci_set_master(dev);
+	denali->dev = &dev->dev;
+	denali->irq = dev->irq;
+
+	ret = pci_request_regions(dev, DENALI_NAND_NAME);
+	if (ret) {
+		pr_err("Spectra: Unable to request memory regions\n");
+		goto failed_enable_dev;
+	}
+
+	denali->flash_reg = ioremap_nocache(csr_base, csr_len);
+	if (!denali->flash_reg) {
+		pr_err("Spectra: Unable to remap memory region\n");
+		ret = -ENOMEM;
+		goto failed_req_regions;
+	}
+
+	denali->flash_mem = ioremap_nocache(mem_base, mem_len);
+	if (!denali->flash_mem) {
+		pr_err("Spectra: ioremap_nocache failed!");
+		ret = -ENOMEM;
+		goto failed_remap_reg;
+	}
+
+	ret = denali_init(denali);
+	if (ret)
+		goto failed_remap_mem;
+
+	pci_set_drvdata(dev, denali);
+
+	return 0;
+
+failed_remap_mem:
+	iounmap(denali->flash_mem);
+failed_remap_reg:
+	iounmap(denali->flash_reg);
+failed_req_regions:
+	pci_release_regions(dev);
+failed_enable_dev:
+	pci_disable_device(dev);
+failed_alloc_memery:
+	kfree(denali);
+
+	return ret;
+}
+
+/* driver exit point */
+static void denali_pci_remove(struct pci_dev *dev)
+{
+	struct denali_nand_info *denali = pci_get_drvdata(dev);
+
+	denali_remove(denali);
+	iounmap(denali->flash_reg);
+	iounmap(denali->flash_mem);
+	pci_release_regions(dev);
+	pci_disable_device(dev);
+	pci_set_drvdata(dev, NULL);
+	kfree(denali);
+}
+
+static struct pci_driver denali_pci_driver = {
+	.name = DENALI_NAND_NAME,
+	.id_table = denali_pci_ids,
+	.probe = denali_pci_probe,
+	.remove = denali_pci_remove,
+};
+
+static int denali_init_pci(void)
+{
+	pr_info("Spectra MTD driver built on %s @ %s\n", __DATE__, __TIME__);
+	return pci_register_driver(&denali_pci_driver);
+}
+module_init(denali_init_pci);
+
+static void denali_exit_pci(void)
+{
+	pci_unregister_driver(&denali_pci_driver);
+}
+module_exit(denali_exit_pci);

diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 256eb30..81fa578 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c

@@ -53,8 +53,6 @@
 	0xe0000, 0xe2000, 0xe4000, 0xe6000,
 	0xe8000, 0xea000, 0xec000, 0xee000,
 #endif /*  CONFIG_MTD_DOCPROBE_HIGH */
-#else
-#warning Unknown architecture for DiskOnChip. No default probe locations defined
 #endif
 	0xffffffff };
 

diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 799da5d..18fa448 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c

@@ -46,6 +46,25 @@
 #include <linux/bitrev.h>
 
 /*
+ * In "reliable mode" consecutive 2k pages are used in parallel (in some
+ * fashion) to store the same data.  The data can be read back from the
+ * even-numbered pages in the normal manner; odd-numbered pages will appear to
+ * contain junk.  Systems that boot from the docg4 typically write the secondary
+ * program loader (SPL) code in this mode.  The SPL is loaded by the initial
+ * program loader (IPL, stored in the docg4's 2k NOR-like region that is mapped
+ * to the reset vector address).  This module parameter enables you to use this
+ * driver to write the SPL.  When in this mode, no more than 2k of data can be
+ * written at a time, because the addresses do not increment in the normal
+ * manner, and the starting offset must be within an even-numbered 2k region;
+ * i.e., invalid starting offsets are 0x800, 0xa00, 0xc00, 0xe00, 0x1800,
+ * 0x1a00, ...  Reliable mode is a special case and should not be used unless
+ * you know what you're doing.
+ */
+static bool reliable_mode;
+module_param(reliable_mode, bool, 0);
+MODULE_PARM_DESC(reliable_mode, "pages are programmed in reliable mode");
+
+/*
  * You'll want to ignore badblocks if you're reading a partition that contains
  * data written by the TrueFFS library (i.e., by PalmOS, Windows, etc), since
  * it does not use mtd nand's method for marking bad blocks (using oob area).
@@ -113,6 +132,7 @@
 #define DOCG4_SEQ_PAGEWRITE		0x16
 #define DOCG4_SEQ_PAGEPROG		0x1e
 #define DOCG4_SEQ_BLOCKERASE		0x24
+#define DOCG4_SEQ_SETMODE		0x45
 
 /* DOC_FLASHCOMMAND register commands */
 #define DOCG4_CMD_PAGE_READ             0x00
@@ -122,6 +142,8 @@
 #define DOC_CMD_PROG_BLOCK_ADDR		0x60
 #define DOCG4_CMD_PAGEWRITE		0x80
 #define DOC_CMD_PROG_CYCLE2		0x10
+#define DOCG4_CMD_FAST_MODE		0xa3 /* functionality guessed */
+#define DOC_CMD_RELIABLE_MODE		0x22
 #define DOC_CMD_RESET			0xff
 
 /* DOC_POWERMODE register bits */
@@ -190,17 +212,20 @@
 #define DOCG4_T                4   /* BCH alg corrects up to 4 bit errors */
 
 #define DOCG4_FACTORY_BBT_PAGE 16 /* page where read-only factory bbt lives */
+#define DOCG4_REDUNDANT_BBT_PAGE 24 /* page where redundant factory bbt lives */
 
 /*
- * Oob bytes 0 - 6 are available to the user.
- * Byte 7 is hamming ecc for first 7 bytes.  Bytes 8 - 14 are hw-generated ecc.
+ * Bytes 0, 1 are used as badblock marker.
+ * Bytes 2 - 6 are available to the user.
+ * Byte 7 is hamming ecc for first 7 oob bytes only.
+ * Bytes 8 - 14 are hw-generated ecc covering entire page + oob bytes 0 - 14.
  * Byte 15 (the last) is used by the driver as a "page written" flag.
  */
 static struct nand_ecclayout docg4_oobinfo = {
 	.eccbytes = 9,
 	.eccpos = {7, 8, 9, 10, 11, 12, 13, 14, 15},
-	.oobavail = 7,
-	.oobfree = { {0, 7} }
+	.oobavail = 5,
+	.oobfree = { {.offset = 2, .length = 5} }
 };
 
 /*
@@ -611,6 +636,14 @@
 	dev_dbg(doc->dev,
 	      "docg4: %s: g4 addr: %x\n", __func__, docg4_addr);
 	sequence_reset(mtd);
+
+	if (unlikely(reliable_mode)) {
+		writew(DOCG4_SEQ_SETMODE, docptr + DOC_FLASHSEQUENCE);
+		writew(DOCG4_CMD_FAST_MODE, docptr + DOC_FLASHCOMMAND);
+		writew(DOC_CMD_RELIABLE_MODE, docptr + DOC_FLASHCOMMAND);
+		write_nop(docptr);
+	}
+
 	writew(DOCG4_SEQ_PAGEWRITE, docptr + DOC_FLASHSEQUENCE);
 	writew(DOCG4_CMD_PAGEWRITE, docptr + DOC_FLASHCOMMAND);
 	write_nop(docptr);
@@ -691,6 +724,15 @@
 		break;
 
 	case NAND_CMD_SEQIN:
+		if (unlikely(reliable_mode)) {
+			uint16_t g4_page = g4_addr >> 16;
+
+			/* writes to odd-numbered 2k pages are invalid */
+			if (g4_page & 0x01)
+				dev_warn(doc->dev,
+					 "invalid reliable mode address\n");
+		}
+
 		write_page_prologue(mtd, g4_addr);
 
 		/* hack for deferred write of oob bytes */
@@ -979,16 +1021,15 @@
 	struct docg4_priv *doc = nand->priv;
 	uint32_t g4_addr = mtd_to_docg4_address(DOCG4_FACTORY_BBT_PAGE, 0);
 	uint8_t *buf;
-	int i, block, status;
+	int i, block;
+	__u32 eccfailed_stats = mtd->ecc_stats.failed;
 
 	buf = kzalloc(DOCG4_PAGE_SIZE, GFP_KERNEL);
 	if (buf == NULL)
 		return -ENOMEM;
 
 	read_page_prologue(mtd, g4_addr);
-	status = docg4_read_page(mtd, nand, buf, 0, DOCG4_FACTORY_BBT_PAGE);
-	if (status)
-		goto exit;
+	docg4_read_page(mtd, nand, buf, 0, DOCG4_FACTORY_BBT_PAGE);
 
 	/*
 	 * If no memory-based bbt was created, exit.  This will happen if module
@@ -1000,6 +1041,20 @@
 	if (nand->bbt == NULL)  /* no memory-based bbt */
 		goto exit;
 
+	if (mtd->ecc_stats.failed > eccfailed_stats) {
+		/*
+		 * Whoops, an ecc failure ocurred reading the factory bbt.
+		 * It is stored redundantly, so we get another chance.
+		 */
+		eccfailed_stats = mtd->ecc_stats.failed;
+		docg4_read_page(mtd, nand, buf, 0, DOCG4_REDUNDANT_BBT_PAGE);
+		if (mtd->ecc_stats.failed > eccfailed_stats) {
+			dev_warn(doc->dev,
+				 "The factory bbt could not be read!\n");
+			goto exit;
+		}
+	}
+
 	/*
 	 * Parse factory bbt and update memory-based bbt.  Factory bbt format is
 	 * simple: one bit per block, block numbers increase left to right (msb
@@ -1019,7 +1074,7 @@
 	}
  exit:
 	kfree(buf);
-	return status;
+	return 0;
 }
 
 static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index cc1480a..2065720 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c

@@ -109,20 +109,6 @@
 };
 
 /*
- * fsl_elbc_oob_lp_eccm* specify that LP NAND's OOB free area starts at offset
- * 1, so we have to adjust bad block pattern. This pattern should be used for
- * x8 chips only. So far hardware does not support x16 chips anyway.
- */
-static u8 scan_ff_pattern[] = { 0xff, };
-
-static struct nand_bbt_descr largepage_memorybased = {
-	.options = 0,
-	.offs = 0,
-	.len = 1,
-	.pattern = scan_ff_pattern,
-};
-
-/*
  * ELBC may use HW ECC, so that OOB offsets, that NAND core uses for bbt,
  * interfere with ECC positions, that's why we implement our own descriptors.
  * OOB {11, 5}, works for both SP and LP chips, with ECCM = 1 and ECCM = 0.
@@ -699,7 +685,6 @@
 			chip->ecc.layout = (priv->fmr & FMR_ECCM) ?
 			                   &fsl_elbc_oob_lp_eccm1 :
 			                   &fsl_elbc_oob_lp_eccm0;
-			chip->badblock_pattern = &largepage_memorybased;
 		}
 	} else {
 		dev_err(priv->dev,
@@ -814,7 +799,7 @@
 
 static DEFINE_MUTEX(fsl_elbc_nand_mutex);
 
-static int __devinit fsl_elbc_nand_probe(struct platform_device *pdev)
+static int fsl_elbc_nand_probe(struct platform_device *pdev)
 {
 	struct fsl_lbc_regs __iomem *lbc;
 	struct fsl_elbc_mtd *priv;

diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 3551a99..ad62226 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c

@@ -389,7 +389,7 @@
 			timing = IFC_FIR_OP_RBCD;
 
 		out_be32(&ifc->ifc_nand.nand_fir0,
-				(IFC_FIR_OP_CMD0 << IFC_NAND_FIR0_OP0_SHIFT) |
+				(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
 				(IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
 				(timing << IFC_NAND_FIR0_OP2_SHIFT));
 		out_be32(&ifc->ifc_nand.nand_fcr0,
@@ -754,7 +754,7 @@
 
 	/* READID */
 	out_be32(&ifc->ifc_nand.nand_fir0,
-			(IFC_FIR_OP_CMD0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
 			(IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
 			(IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT));
 	out_be32(&ifc->ifc_nand.nand_fcr0,
@@ -922,7 +922,7 @@
 
 static DEFINE_MUTEX(fsl_ifc_nand_mutex);
 
-static int __devinit fsl_ifc_nand_probe(struct platform_device *dev)
+static int fsl_ifc_nand_probe(struct platform_device *dev)
 {
 	struct fsl_ifc_regs __iomem *ifc;
 	struct fsl_ifc_mtd *priv;

diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c
index 45df542..5a8f5c4 100644
--- a/drivers/mtd/nand/fsl_upm.c
+++ b/drivers/mtd/nand/fsl_upm.c

@@ -152,7 +152,7 @@
 		fun_wait_rnb(fun);
 }
 
-static int __devinit fun_chip_init(struct fsl_upm_nand *fun,
+static int fun_chip_init(struct fsl_upm_nand *fun,
 				   const struct device_node *upm_np,
 				   const struct resource *io_res)
 {
@@ -201,7 +201,7 @@
 	return ret;
 }
 
-static int __devinit fun_probe(struct platform_device *ofdev)
+static int fun_probe(struct platform_device *ofdev)
 {
 	struct fsl_upm_nand *fun;
 	struct resource io_res;
@@ -318,7 +318,7 @@
 	return ret;
 }
 
-static int __devexit fun_remove(struct platform_device *ofdev)
+static int fun_remove(struct platform_device *ofdev)
 {
 	struct fsl_upm_nand *fun = dev_get_drvdata(&ofdev->dev);
 	int i;
@@ -350,7 +350,7 @@
 		.of_match_table = of_fun_match,
 	},
 	.probe		= fun_probe,
-	.remove		= __devexit_p(fun_remove),
+	.remove		= fun_remove,
 };
 
 module_platform_driver(of_fun_driver);

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 38d2624..1d74464 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c

@@ -361,7 +361,7 @@
 	struct nand_chip *this = mtd->priv;
 	struct fsmc_nand_data *host = container_of(mtd,
 					struct fsmc_nand_data, mtd);
-	void *__iomem *regs = host->regs_va;
+	void __iomem *regs = host->regs_va;
 	unsigned int bank = host->bank;
 
 	if (ctrl & NAND_CTRL_CHANGE) {
@@ -383,13 +383,13 @@
 			pc |= FSMC_ENABLE;
 		else
 			pc &= ~FSMC_ENABLE;
-		writel(pc, FSMC_NAND_REG(regs, bank, PC));
+		writel_relaxed(pc, FSMC_NAND_REG(regs, bank, PC));
 	}
 
 	mb();
 
 	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, this->IO_ADDR_W);
+		writeb_relaxed(cmd, this->IO_ADDR_W);
 }
 
 /*
@@ -426,14 +426,18 @@
 	tset = (tims->tset & FSMC_TSET_MASK) << FSMC_TSET_SHIFT;
 
 	if (busw)
-		writel(value | FSMC_DEVWID_16, FSMC_NAND_REG(regs, bank, PC));
+		writel_relaxed(value | FSMC_DEVWID_16,
+				FSMC_NAND_REG(regs, bank, PC));
 	else
-		writel(value | FSMC_DEVWID_8, FSMC_NAND_REG(regs, bank, PC));
+		writel_relaxed(value | FSMC_DEVWID_8,
+				FSMC_NAND_REG(regs, bank, PC));
 
-	writel(readl(FSMC_NAND_REG(regs, bank, PC)) | tclr | tar,
+	writel_relaxed(readl(FSMC_NAND_REG(regs, bank, PC)) | tclr | tar,
 			FSMC_NAND_REG(regs, bank, PC));
-	writel(thiz | thold | twait | tset, FSMC_NAND_REG(regs, bank, COMM));
-	writel(thiz | thold | twait | tset, FSMC_NAND_REG(regs, bank, ATTRIB));
+	writel_relaxed(thiz | thold | twait | tset,
+			FSMC_NAND_REG(regs, bank, COMM));
+	writel_relaxed(thiz | thold | twait | tset,
+			FSMC_NAND_REG(regs, bank, ATTRIB));
 }
 
 /*
@@ -446,11 +450,11 @@
 	void __iomem *regs = host->regs_va;
 	uint32_t bank = host->bank;
 
-	writel(readl(FSMC_NAND_REG(regs, bank, PC)) & ~FSMC_ECCPLEN_256,
+	writel_relaxed(readl(FSMC_NAND_REG(regs, bank, PC)) & ~FSMC_ECCPLEN_256,
 			FSMC_NAND_REG(regs, bank, PC));
-	writel(readl(FSMC_NAND_REG(regs, bank, PC)) & ~FSMC_ECCEN,
+	writel_relaxed(readl(FSMC_NAND_REG(regs, bank, PC)) & ~FSMC_ECCEN,
 			FSMC_NAND_REG(regs, bank, PC));
-	writel(readl(FSMC_NAND_REG(regs, bank, PC)) | FSMC_ECCEN,
+	writel_relaxed(readl(FSMC_NAND_REG(regs, bank, PC)) | FSMC_ECCEN,
 			FSMC_NAND_REG(regs, bank, PC));
 }
 
@@ -470,7 +474,7 @@
 	unsigned long deadline = jiffies + FSMC_BUSY_WAIT_TIMEOUT;
 
 	do {
-		if (readl(FSMC_NAND_REG(regs, bank, STS)) & FSMC_CODE_RDY)
+		if (readl_relaxed(FSMC_NAND_REG(regs, bank, STS)) & FSMC_CODE_RDY)
 			break;
 		else
 			cond_resched();
@@ -481,25 +485,25 @@
 		return -ETIMEDOUT;
 	}
 
-	ecc_tmp = readl(FSMC_NAND_REG(regs, bank, ECC1));
+	ecc_tmp = readl_relaxed(FSMC_NAND_REG(regs, bank, ECC1));
 	ecc[0] = (uint8_t) (ecc_tmp >> 0);
 	ecc[1] = (uint8_t) (ecc_tmp >> 8);
 	ecc[2] = (uint8_t) (ecc_tmp >> 16);
 	ecc[3] = (uint8_t) (ecc_tmp >> 24);
 
-	ecc_tmp = readl(FSMC_NAND_REG(regs, bank, ECC2));
+	ecc_tmp = readl_relaxed(FSMC_NAND_REG(regs, bank, ECC2));
 	ecc[4] = (uint8_t) (ecc_tmp >> 0);
 	ecc[5] = (uint8_t) (ecc_tmp >> 8);
 	ecc[6] = (uint8_t) (ecc_tmp >> 16);
 	ecc[7] = (uint8_t) (ecc_tmp >> 24);
 
-	ecc_tmp = readl(FSMC_NAND_REG(regs, bank, ECC3));
+	ecc_tmp = readl_relaxed(FSMC_NAND_REG(regs, bank, ECC3));
 	ecc[8] = (uint8_t) (ecc_tmp >> 0);
 	ecc[9] = (uint8_t) (ecc_tmp >> 8);
 	ecc[10] = (uint8_t) (ecc_tmp >> 16);
 	ecc[11] = (uint8_t) (ecc_tmp >> 24);
 
-	ecc_tmp = readl(FSMC_NAND_REG(regs, bank, STS));
+	ecc_tmp = readl_relaxed(FSMC_NAND_REG(regs, bank, STS));
 	ecc[12] = (uint8_t) (ecc_tmp >> 16);
 
 	return 0;
@@ -519,7 +523,7 @@
 	uint32_t bank = host->bank;
 	uint32_t ecc_tmp;
 
-	ecc_tmp = readl(FSMC_NAND_REG(regs, bank, ECC1));
+	ecc_tmp = readl_relaxed(FSMC_NAND_REG(regs, bank, ECC1));
 	ecc[0] = (uint8_t) (ecc_tmp >> 0);
 	ecc[1] = (uint8_t) (ecc_tmp >> 8);
 	ecc[2] = (uint8_t) (ecc_tmp >> 16);
@@ -601,7 +605,7 @@
 	dma_async_issue_pending(chan);
 
 	ret =
-	wait_for_completion_interruptible_timeout(&host->dma_access_complete,
+	wait_for_completion_timeout(&host->dma_access_complete,
 				msecs_to_jiffies(3000));
 	if (ret <= 0) {
 		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
@@ -628,10 +632,10 @@
 		uint32_t *p = (uint32_t *)buf;
 		len = len >> 2;
 		for (i = 0; i < len; i++)
-			writel(p[i], chip->IO_ADDR_W);
+			writel_relaxed(p[i], chip->IO_ADDR_W);
 	} else {
 		for (i = 0; i < len; i++)
-			writeb(buf[i], chip->IO_ADDR_W);
+			writeb_relaxed(buf[i], chip->IO_ADDR_W);
 	}
 }
 
@@ -651,10 +655,10 @@
 		uint32_t *p = (uint32_t *)buf;
 		len = len >> 2;
 		for (i = 0; i < len; i++)
-			p[i] = readl(chip->IO_ADDR_R);
+			p[i] = readl_relaxed(chip->IO_ADDR_R);
 	} else {
 		for (i = 0; i < len; i++)
-			buf[i] = readb(chip->IO_ADDR_R);
+			buf[i] = readb_relaxed(chip->IO_ADDR_R);
 	}
 }
 
@@ -783,7 +787,7 @@
 	uint32_t num_err, i;
 	uint32_t ecc1, ecc2, ecc3, ecc4;
 
-	num_err = (readl(FSMC_NAND_REG(regs, bank, STS)) >> 10) & 0xF;
+	num_err = (readl_relaxed(FSMC_NAND_REG(regs, bank, STS)) >> 10) & 0xF;
 
 	/* no bit flipping */
 	if (likely(num_err == 0))
@@ -826,10 +830,10 @@
 	 * uint64_t array and error offset indexes are populated in err_idx
 	 * array
 	 */
-	ecc1 = readl(FSMC_NAND_REG(regs, bank, ECC1));
-	ecc2 = readl(FSMC_NAND_REG(regs, bank, ECC2));
-	ecc3 = readl(FSMC_NAND_REG(regs, bank, ECC3));
-	ecc4 = readl(FSMC_NAND_REG(regs, bank, STS));
+	ecc1 = readl_relaxed(FSMC_NAND_REG(regs, bank, ECC1));
+	ecc2 = readl_relaxed(FSMC_NAND_REG(regs, bank, ECC2));
+	ecc3 = readl_relaxed(FSMC_NAND_REG(regs, bank, ECC3));
+	ecc4 = readl_relaxed(FSMC_NAND_REG(regs, bank, STS));
 
 	err_idx[0] = (ecc1 >> 0) & 0x1FFF;
 	err_idx[1] = (ecc1 >> 13) & 0x1FFF;
@@ -860,7 +864,7 @@
 }
 
 #ifdef CONFIG_OF
-static int __devinit fsmc_nand_probe_config_dt(struct platform_device *pdev,
+static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
 					       struct device_node *np)
 {
 	struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
@@ -876,15 +880,13 @@
 			return -EINVAL;
 		}
 	}
-	of_property_read_u32(np, "st,ale-off", &pdata->ale_off);
-	of_property_read_u32(np, "st,cle-off", &pdata->cle_off);
 	if (of_get_property(np, "nand-skip-bbtscan", NULL))
 		pdata->options = NAND_SKIP_BBTSCAN;
 
 	return 0;
 }
 #else
-static int __devinit fsmc_nand_probe_config_dt(struct platform_device *pdev,
+static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
 					       struct device_node *np)
 {
 	return -ENOSYS;
@@ -935,41 +937,28 @@
 	if (!res)
 		return -EINVAL;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-				pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory data resourse\n");
-		return -ENOENT;
-	}
-
-	host->data_pa = (dma_addr_t)res->start;
-	host->data_va = devm_ioremap(&pdev->dev, res->start,
-			resource_size(res));
+	host->data_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->data_va) {
 		dev_err(&pdev->dev, "data ioremap failed\n");
 		return -ENOMEM;
 	}
+	host->data_pa = (dma_addr_t)res->start;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start + pdata->ale_off,
-			resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory ale resourse\n");
-		return -ENOENT;
-	}
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
+	if (!res)
+		return -EINVAL;
 
-	host->addr_va = devm_ioremap(&pdev->dev, res->start + pdata->ale_off,
-			resource_size(res));
+	host->addr_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->addr_va) {
 		dev_err(&pdev->dev, "ale ioremap failed\n");
 		return -ENOMEM;
 	}
 
-	if (!devm_request_mem_region(&pdev->dev, res->start + pdata->cle_off,
-			resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory cle resourse\n");
-		return -ENOENT;
-	}
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
+	if (!res)
+		return -EINVAL;
 
-	host->cmd_va = devm_ioremap(&pdev->dev, res->start + pdata->cle_off,
-			resource_size(res));
+	host->cmd_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->cmd_va) {
 		dev_err(&pdev->dev, "ale ioremap failed\n");
 		return -ENOMEM;
@@ -979,14 +968,7 @@
 	if (!res)
 		return -EINVAL;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-			pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory regs resourse\n");
-		return -ENOENT;
-	}
-
-	host->regs_va = devm_ioremap(&pdev->dev, res->start,
-			resource_size(res));
+	host->regs_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->regs_va) {
 		dev_err(&pdev->dev, "regs ioremap failed\n");
 		return -ENOMEM;

diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index bc73bc5..e789e3f 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c

@@ -90,14 +90,14 @@
 {
 	struct nand_chip *this = mtd->priv;
 
-	writesb(this->IO_ADDR_W, buf, len);
+	iowrite8_rep(this->IO_ADDR_W, buf, len);
 }
 
 static void gpio_nand_readbuf(struct mtd_info *mtd, u_char *buf, int len)
 {
 	struct nand_chip *this = mtd->priv;
 
-	readsb(this->IO_ADDR_R, buf, len);
+	ioread8_rep(this->IO_ADDR_R, buf, len);
 }
 
 static void gpio_nand_writebuf16(struct mtd_info *mtd, const u_char *buf,
@@ -106,7 +106,7 @@
 	struct nand_chip *this = mtd->priv;
 
 	if (IS_ALIGNED((unsigned long)buf, 2)) {
-		writesw(this->IO_ADDR_W, buf, len>>1);
+		iowrite16_rep(this->IO_ADDR_W, buf, len>>1);
 	} else {
 		int i;
 		unsigned short *ptr = (unsigned short *)buf;
@@ -121,7 +121,7 @@
 	struct nand_chip *this = mtd->priv;
 
 	if (IS_ALIGNED((unsigned long)buf, 2)) {
-		readsw(this->IO_ADDR_R, buf, len>>1);
+		ioread16_rep(this->IO_ADDR_R, buf, len>>1);
 	} else {
 		int i;
 		unsigned short *ptr = (unsigned short *)buf;
@@ -134,7 +134,11 @@
 static int gpio_nand_devready(struct mtd_info *mtd)
 {
 	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
-	return gpio_get_value(gpiomtd->plat.gpio_rdy);
+
+	if (gpio_is_valid(gpiomtd->plat.gpio_rdy))
+		return gpio_get_value(gpiomtd->plat.gpio_rdy);
+
+	return 1;
 }
 
 #ifdef CONFIG_OF
@@ -227,7 +231,7 @@
 	return platform_get_resource(pdev, IORESOURCE_MEM, 1);
 }
 
-static int __devexit gpio_nand_remove(struct platform_device *dev)
+static int gpio_nand_remove(struct platform_device *dev)
 {
 	struct gpiomtd *gpiomtd = platform_get_drvdata(dev);
 	struct resource *res;
@@ -252,7 +256,8 @@
 	gpio_free(gpiomtd->plat.gpio_nce);
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
 		gpio_free(gpiomtd->plat.gpio_nwp);
-	gpio_free(gpiomtd->plat.gpio_rdy);
+	if (gpio_is_valid(gpiomtd->plat.gpio_rdy))
+		gpio_free(gpiomtd->plat.gpio_rdy);
 
 	kfree(gpiomtd);
 
@@ -277,7 +282,7 @@
 	return ptr;
 }
 
-static int __devinit gpio_nand_probe(struct platform_device *dev)
+static int gpio_nand_probe(struct platform_device *dev)
 {
 	struct gpiomtd *gpiomtd;
 	struct nand_chip *this;
@@ -336,10 +341,12 @@
 	if (ret)
 		goto err_cle;
 	gpio_direction_output(gpiomtd->plat.gpio_cle, 0);
-	ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
-	if (ret)
-		goto err_rdy;
-	gpio_direction_input(gpiomtd->plat.gpio_rdy);
+	if (gpio_is_valid(gpiomtd->plat.gpio_rdy)) {
+		ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
+		if (ret)
+			goto err_rdy;
+		gpio_direction_input(gpiomtd->plat.gpio_rdy);
+	}
 
 
 	this->IO_ADDR_W  = this->IO_ADDR_R;
@@ -386,7 +393,8 @@
 err_wp:
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
 		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
-	gpio_free(gpiomtd->plat.gpio_rdy);
+	if (gpio_is_valid(gpiomtd->plat.gpio_rdy))
+		gpio_free(gpiomtd->plat.gpio_rdy);
 err_rdy:
 	gpio_free(gpiomtd->plat.gpio_cle);
 err_cle:

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 3502acc..d84699c 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c

@@ -18,7 +18,6 @@
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
-#include <linux/mtd/gpmi-nand.h>
 #include <linux/delay.h>
 #include <linux/clk.h>
 
@@ -166,6 +165,15 @@
 	if (ret)
 		goto err_out;
 
+	/*
+	 * Reset BCH here, too. We got failures otherwise :(
+	 * See later BCH reset for explanation of MX23 handling
+	 */
+	ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this));
+	if (ret)
+		goto err_out;
+
+
 	/* Choose NAND mode. */
 	writel(BM_GPMI_CTRL1_GPMI_MODE, r->gpmi_regs + HW_GPMI_CTRL1_CLR);
 

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index d79696b..5cd141f 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c

@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/mtd/gpmi-nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/of.h>
@@ -33,6 +32,12 @@
 #include <linux/of_mtd.h>
 #include "gpmi-nand.h"
 
+/* Resource names for the GPMI NAND driver. */
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
+
 /* add our owner bbt descriptor */
 static uint8_t scan_ff_pattern[] = { 0xff };
 static struct nand_bbt_descr gpmi_bbt_descr = {
@@ -222,7 +227,7 @@
 
 		ret = dma_map_sg(this->dev, sgl, 1, dr);
 		if (ret == 0)
-			pr_err("map failed.\n");
+			pr_err("DMA mapping failed.\n");
 
 		this->direct_dma_map_ok = false;
 	}
@@ -314,7 +319,7 @@
 	return 0;
 }
 
-static int __devinit
+static int
 acquire_register_block(struct gpmi_nand_data *this, const char *res_name)
 {
 	struct platform_device *pdev = this->pdev;
@@ -355,7 +360,7 @@
 	res->bch_regs = NULL;
 }
 
-static int __devinit
+static int
 acquire_bch_irq(struct gpmi_nand_data *this, irq_handler_t irq_h)
 {
 	struct platform_device *pdev = this->pdev;
@@ -422,7 +427,7 @@
 		}
 }
 
-static int __devinit acquire_dma_channels(struct gpmi_nand_data *this)
+static int acquire_dma_channels(struct gpmi_nand_data *this)
 {
 	struct platform_device *pdev = this->pdev;
 	struct resource *r_dma;
@@ -456,7 +461,7 @@
 
 	dma_chan = dma_request_channel(mask, gpmi_dma_filter, this);
 	if (!dma_chan) {
-		pr_err("dma_request_channel failed.\n");
+		pr_err("Failed to request DMA channel.\n");
 		goto acquire_err;
 	}
 
@@ -487,7 +492,7 @@
 	"gpmi_apb", "gpmi_bch", "gpmi_bch_apb", "per1_bch",
 };
 
-static int __devinit gpmi_get_clks(struct gpmi_nand_data *this)
+static int gpmi_get_clks(struct gpmi_nand_data *this)
 {
 	struct resources *r = &this->resources;
 	char **extra_clks = NULL;
@@ -533,7 +538,7 @@
 	return -ENOMEM;
 }
 
-static int __devinit acquire_resources(struct gpmi_nand_data *this)
+static int acquire_resources(struct gpmi_nand_data *this)
 {
 	struct pinctrl *pinctrl;
 	int ret;
@@ -583,7 +588,7 @@
 	release_dma_channels(this);
 }
 
-static int __devinit init_hardware(struct gpmi_nand_data *this)
+static int init_hardware(struct gpmi_nand_data *this)
 {
 	int ret;
 
@@ -625,7 +630,8 @@
 						length, DMA_FROM_DEVICE);
 		if (dma_mapping_error(dev, dest_phys)) {
 			if (alt_size < length) {
-				pr_err("Alternate buffer is too small\n");
+				pr_err("%s, Alternate buffer is too small\n",
+					__func__);
 				return -ENOMEM;
 			}
 			goto map_failed;
@@ -675,7 +681,8 @@
 						DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, source_phys)) {
 			if (alt_size < length) {
-				pr_err("Alternate buffer is too small\n");
+				pr_err("%s, Alternate buffer is too small\n",
+					__func__);
 				return -ENOMEM;
 			}
 			goto map_failed;
@@ -763,7 +770,7 @@
 
 error_alloc:
 	gpmi_free_dma_buffer(this);
-	pr_err("allocate DMA buffer ret!!\n");
+	pr_err("Error allocating DMA buffers!\n");
 	return -ENOMEM;
 }
 
@@ -1474,7 +1481,7 @@
 	/* Set up the NFC geometry which is used by BCH. */
 	ret = bch_set_geometry(this);
 	if (ret) {
-		pr_err("set geometry ret : %d\n", ret);
+		pr_err("Error setting BCH geometry : %d\n", ret);
 		return ret;
 	}
 
@@ -1535,7 +1542,7 @@
 	gpmi_free_dma_buffer(this);
 }
 
-static int __devinit gpmi_nfc_init(struct gpmi_nand_data *this)
+static int gpmi_nfc_init(struct gpmi_nand_data *this)
 {
 	struct mtd_info  *mtd = &this->mtd;
 	struct nand_chip *chip = &this->nand;
@@ -1618,7 +1625,7 @@
 };
 MODULE_DEVICE_TABLE(of, gpmi_nand_id_table);
 
-static int __devinit gpmi_nand_probe(struct platform_device *pdev)
+static int gpmi_nand_probe(struct platform_device *pdev)
 {
 	struct gpmi_nand_data *this;
 	const struct of_device_id *of_id;
@@ -1668,7 +1675,7 @@
 	return ret;
 }
 
-static int __devexit gpmi_nand_remove(struct platform_device *pdev)
+static int gpmi_nand_remove(struct platform_device *pdev)
 {
 	struct gpmi_nand_data *this = platform_get_drvdata(pdev);
 
@@ -1685,7 +1692,7 @@
 		.of_match_table = gpmi_nand_id_table,
 	},
 	.probe   = gpmi_nand_probe,
-	.remove  = __devexit_p(gpmi_nand_remove),
+	.remove  = gpmi_nand_remove,
 	.id_table = gpmi_ids,
 };
 module_platform_driver(gpmi_nand_driver);

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 7ac25c1..3d93a5e 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h

@@ -130,7 +130,6 @@
 	/* System Interface */
 	struct device		*dev;
 	struct platform_device	*pdev;
-	struct gpmi_nand_platform_data	*pdata;
 
 	/* Resources */
 	struct resources	resources;

diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c
index 100b677..8d415f0 100644
--- a/drivers/mtd/nand/jz4740_nand.c
+++ b/drivers/mtd/nand/jz4740_nand.c

@@ -316,13 +316,17 @@
 	return ret;
 }
 
-static inline void jz_nand_iounmap_resource(struct resource *res, void __iomem *base)
+static inline void jz_nand_iounmap_resource(struct resource *res,
+					    void __iomem *base)
 {
 	iounmap(base);
 	release_mem_region(res->start, resource_size(res));
 }
 
-static int __devinit jz_nand_detect_bank(struct platform_device *pdev, struct jz_nand *nand, unsigned char bank, size_t chipnr, uint8_t *nand_maf_id, uint8_t *nand_dev_id) {
+static int jz_nand_detect_bank(struct platform_device *pdev,
+			       struct jz_nand *nand, unsigned char bank,
+			       size_t chipnr, uint8_t *nand_maf_id,
+			       uint8_t *nand_dev_id) {
 	int ret;
 	int gpio;
 	char gpio_name[9];
@@ -400,7 +404,7 @@
 	return ret;
 }
 
-static int __devinit jz_nand_probe(struct platform_device *pdev)
+static int jz_nand_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct jz_nand *nand;
@@ -541,7 +545,7 @@
 	return ret;
 }
 
-static int __devexit jz_nand_remove(struct platform_device *pdev)
+static int jz_nand_remove(struct platform_device *pdev)
 {
 	struct jz_nand *nand = platform_get_drvdata(pdev);
 	struct jz_nand_platform_data *pdata = pdev->dev.platform_data;
@@ -573,7 +577,7 @@
 
 static struct platform_driver jz_nand_driver = {
 	.probe = jz_nand_probe,
-	.remove = __devexit_p(jz_nand_remove),
+	.remove = jz_nand_remove,
 	.driver = {
 		.name = "jz4740-nand",
 		.owner = THIS_MODULE,

diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index c29b7ac..f182bef 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c

@@ -655,7 +655,7 @@
 /*
  * Probe for NAND controller
  */
-static int __devinit lpc32xx_nand_probe(struct platform_device *pdev)
+static int lpc32xx_nand_probe(struct platform_device *pdev)
 {
 	struct lpc32xx_nand_host *host;
 	struct mtd_info *mtd;
@@ -845,7 +845,7 @@
 /*
  * Remove NAND device
  */
-static int __devexit lpc32xx_nand_remove(struct platform_device *pdev)
+static int lpc32xx_nand_remove(struct platform_device *pdev)
 {
 	struct lpc32xx_nand_host *host = platform_get_drvdata(pdev);
 	struct mtd_info *mtd = &host->mtd;
@@ -907,7 +907,7 @@
 
 static struct platform_driver lpc32xx_nand_driver = {
 	.probe		= lpc32xx_nand_probe,
-	.remove		= __devexit_p(lpc32xx_nand_remove),
+	.remove		= lpc32xx_nand_remove,
 	.resume		= lpc32xx_nand_resume,
 	.suspend	= lpc32xx_nand_suspend,
 	.driver		= {

diff --git a/drivers/mtd/nand/lpc32xx_slc.c b/drivers/mtd/nand/lpc32xx_slc.c
index 32409c4..030b78c 100644
--- a/drivers/mtd/nand/lpc32xx_slc.c
+++ b/drivers/mtd/nand/lpc32xx_slc.c

@@ -755,7 +755,7 @@
 /*
  * Probe for NAND controller
  */
-static int __devinit lpc32xx_nand_probe(struct platform_device *pdev)
+static int lpc32xx_nand_probe(struct platform_device *pdev)
 {
 	struct lpc32xx_nand_host *host;
 	struct mtd_info *mtd;
@@ -949,7 +949,7 @@
 /*
  * Remove NAND device.
  */
-static int __devexit lpc32xx_nand_remove(struct platform_device *pdev)
+static int lpc32xx_nand_remove(struct platform_device *pdev)
 {
 	uint32_t tmp;
 	struct lpc32xx_nand_host *host = platform_get_drvdata(pdev);
@@ -1021,7 +1021,7 @@
 
 static struct platform_driver lpc32xx_nand_driver = {
 	.probe		= lpc32xx_nand_probe,
-	.remove		= __devexit_p(lpc32xx_nand_remove),
+	.remove		= lpc32xx_nand_remove,
 	.resume		= lpc32xx_nand_resume,
 	.suspend	= lpc32xx_nand_suspend,
 	.driver		= {

diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index f776c85..3c9cdcb 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c

@@ -626,7 +626,7 @@
 		iounmap(prv->csreg);
 }
 
-static int __devinit mpc5121_nfc_probe(struct platform_device *op)
+static int mpc5121_nfc_probe(struct platform_device *op)
 {
 	struct device_node *rootnode, *dn = op->dev.of_node;
 	struct device *dev = &op->dev;
@@ -827,7 +827,7 @@
 	return retval;
 }
 
-static int __devexit mpc5121_nfc_remove(struct platform_device *op)
+static int mpc5121_nfc_remove(struct platform_device *op)
 {
 	struct device *dev = &op->dev;
 	struct mtd_info *mtd = dev_get_drvdata(dev);
@@ -841,14 +841,14 @@
 	return 0;
 }
 
-static struct of_device_id mpc5121_nfc_match[] __devinitdata = {
+static struct of_device_id mpc5121_nfc_match[] = {
 	{ .compatible = "fsl,mpc5121-nfc", },
 	{},
 };
 
 static struct platform_driver mpc5121_nfc_driver = {
 	.probe		= mpc5121_nfc_probe,
-	.remove		= __devexit_p(mpc5121_nfc_remove),
+	.remove		= mpc5121_nfc_remove,
 	.driver		= {
 		.name = DRV_NAME,
 		.owner = THIS_MODULE,

diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 022dcdc..45204e4 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c

@@ -266,7 +266,8 @@
 	}
 };
 
-static const char *part_probes[] = { "RedBoot", "cmdlinepart", "ofpart", NULL };
+static const char const *part_probes[] = {
+	"cmdlinepart", "RedBoot", "ofpart", NULL };
 
 static void memcpy32_fromio(void *trg, const void __iomem  *src, size_t size)
 {
@@ -1378,7 +1379,7 @@
 }
 #endif
 
-static int __devinit mxcnd_probe(struct platform_device *pdev)
+static int mxcnd_probe(struct platform_device *pdev)
 {
 	struct nand_chip *this;
 	struct mtd_info *mtd;
@@ -1556,12 +1557,13 @@
 	return 0;
 
 escan:
-	clk_disable_unprepare(host->clk);
+	if (host->clk_act)
+		clk_disable_unprepare(host->clk);
 
 	return err;
 }
 
-static int __devexit mxcnd_remove(struct platform_device *pdev)
+static int mxcnd_remove(struct platform_device *pdev)
 {
 	struct mxc_nand_host *host = platform_get_drvdata(pdev);
 
@@ -1580,7 +1582,7 @@
 	},
 	.id_table = mxcnd_devtype,
 	.probe = mxcnd_probe,
-	.remove = __devexit_p(mxcnd_remove),
+	.remove = mxcnd_remove,
 };
 module_platform_driver(mxcnd_driver);
 

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 1a03b7f..8323ac9 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c

@@ -93,8 +93,7 @@
 		 .length = 78} }
 };
 
-static int nand_get_device(struct nand_chip *chip, struct mtd_info *mtd,
-			   int new_state);
+static int nand_get_device(struct mtd_info *mtd, int new_state);
 
 static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 			     struct mtd_oob_ops *ops);
@@ -130,15 +129,12 @@
  * nand_release_device - [GENERIC] release chip
  * @mtd: MTD device structure
  *
- * Deselect, release chip lock and wake up anyone waiting on the device.
+ * Release chip lock and wake up anyone waiting on the device.
  */
 static void nand_release_device(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd->priv;
 
-	/* De-select the NAND device */
-	chip->select_chip(mtd, -1);
-
 	/* Release the controller and the chip */
 	spin_lock(&chip->controller->lock);
 	chip->controller->active = NULL;
@@ -160,7 +156,7 @@
 }
 
 /**
- * nand_read_byte16 - [DEFAULT] read one byte endianess aware from the chip
+ * nand_read_byte16 - [DEFAULT] read one byte endianness aware from the chip
  * nand_read_byte16 - [DEFAULT] read one byte endianness aware from the chip
  * @mtd: MTD device structure
  *
@@ -303,7 +299,7 @@
 	if (getchip) {
 		chipnr = (int)(ofs >> chip->chip_shift);
 
-		nand_get_device(chip, mtd, FL_READING);
+		nand_get_device(mtd, FL_READING);
 
 		/* Select the NAND device */
 		chip->select_chip(mtd, chipnr);
@@ -333,8 +329,10 @@
 		i++;
 	} while (!res && i < 2 && (chip->bbt_options & NAND_BBT_SCAN2NDPAGE));
 
-	if (getchip)
+	if (getchip) {
+		chip->select_chip(mtd, -1);
 		nand_release_device(mtd);
+	}
 
 	return res;
 }
@@ -383,7 +381,7 @@
 		struct mtd_oob_ops ops;
 		loff_t wr_ofs = ofs;
 
-		nand_get_device(chip, mtd, FL_WRITING);
+		nand_get_device(mtd, FL_WRITING);
 
 		ops.datbuf = NULL;
 		ops.oobbuf = buf;
@@ -492,7 +490,7 @@
 void nand_wait_ready(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd->priv;
-	unsigned long timeo = jiffies + 2;
+	unsigned long timeo = jiffies + msecs_to_jiffies(20);
 
 	/* 400ms timeout */
 	if (in_interrupt() || oops_in_progress)
@@ -750,15 +748,15 @@
 
 /**
  * nand_get_device - [GENERIC] Get chip for selected access
- * @chip: the nand chip descriptor
  * @mtd: MTD device structure
  * @new_state: the state which is requested
  *
  * Get the device and lock it for exclusive access
  */
 static int
-nand_get_device(struct nand_chip *chip, struct mtd_info *mtd, int new_state)
+nand_get_device(struct mtd_info *mtd, int new_state)
 {
+	struct nand_chip *chip = mtd->priv;
 	spinlock_t *lock = &chip->controller->lock;
 	wait_queue_head_t *wq = &chip->controller->wq;
 	DECLARE_WAITQUEUE(wait, current);
@@ -865,6 +863,8 @@
 	led_trigger_event(nand_led_trigger, LED_OFF);
 
 	status = (int)chip->read_byte(mtd);
+	/* This can happen if in case of timeout or buggy dev_ready */
+	WARN_ON(!(status & NAND_STATUS_READY));
 	return status;
 }
 
@@ -899,7 +899,7 @@
 	/* Call wait ready function */
 	status = chip->waitfunc(mtd, chip);
 	/* See if device thinks it succeeded */
-	if (status & 0x01) {
+	if (status & NAND_STATUS_FAIL) {
 		pr_debug("%s: error status = 0x%08x\n",
 					__func__, status);
 		ret = -EIO;
@@ -932,7 +932,7 @@
 	if (ofs + len == mtd->size)
 		len -= mtd->erasesize;
 
-	nand_get_device(chip, mtd, FL_UNLOCKING);
+	nand_get_device(mtd, FL_UNLOCKING);
 
 	/* Shift to get chip number */
 	chipnr = ofs >> chip->chip_shift;
@@ -950,6 +950,7 @@
 	ret = __nand_unlock(mtd, ofs, len, 0);
 
 out:
+	chip->select_chip(mtd, -1);
 	nand_release_device(mtd);
 
 	return ret;
@@ -981,7 +982,7 @@
 	if (check_offs_len(mtd, ofs, len))
 		ret = -EINVAL;
 
-	nand_get_device(chip, mtd, FL_LOCKING);
+	nand_get_device(mtd, FL_LOCKING);
 
 	/* Shift to get chip number */
 	chipnr = ofs >> chip->chip_shift;
@@ -1004,7 +1005,7 @@
 	/* Call wait ready function */
 	status = chip->waitfunc(mtd, chip);
 	/* See if device thinks it succeeded */
-	if (status & 0x01) {
+	if (status & NAND_STATUS_FAIL) {
 		pr_debug("%s: error status = 0x%08x\n",
 					__func__, status);
 		ret = -EIO;
@@ -1014,6 +1015,7 @@
 	ret = __nand_unlock(mtd, ofs, len, 0x1);
 
 out:
+	chip->select_chip(mtd, -1);
 	nand_release_device(mtd);
 
 	return ret;
@@ -1550,6 +1552,7 @@
 			chip->select_chip(mtd, chipnr);
 		}
 	}
+	chip->select_chip(mtd, -1);
 
 	ops->retlen = ops->len - (size_t) readlen;
 	if (oob)
@@ -1577,11 +1580,10 @@
 static int nand_read(struct mtd_info *mtd, loff_t from, size_t len,
 		     size_t *retlen, uint8_t *buf)
 {
-	struct nand_chip *chip = mtd->priv;
 	struct mtd_oob_ops ops;
 	int ret;
 
-	nand_get_device(chip, mtd, FL_READING);
+	nand_get_device(mtd, FL_READING);
 	ops.len = len;
 	ops.datbuf = buf;
 	ops.oobbuf = NULL;
@@ -1804,6 +1806,7 @@
 			chip->select_chip(mtd, chipnr);
 		}
 	}
+	chip->select_chip(mtd, -1);
 
 	ops->oobretlen = ops->ooblen - readlen;
 
@@ -1827,7 +1830,6 @@
 static int nand_read_oob(struct mtd_info *mtd, loff_t from,
 			 struct mtd_oob_ops *ops)
 {
-	struct nand_chip *chip = mtd->priv;
 	int ret = -ENOTSUPP;
 
 	ops->retlen = 0;
@@ -1839,7 +1841,7 @@
 		return -EINVAL;
 	}
 
-	nand_get_device(chip, mtd, FL_READING);
+	nand_get_device(mtd, FL_READING);
 
 	switch (ops->mode) {
 	case MTD_OPS_PLACE_OOB:
@@ -2186,8 +2188,10 @@
 	chip->select_chip(mtd, chipnr);
 
 	/* Check, if it is write protected */
-	if (nand_check_wp(mtd))
-		return -EIO;
+	if (nand_check_wp(mtd)) {
+		ret = -EIO;
+		goto err_out;
+	}
 
 	realpage = (int)(to >> chip->page_shift);
 	page = realpage & chip->pagemask;
@@ -2199,8 +2203,10 @@
 		chip->pagebuf = -1;
 
 	/* Don't allow multipage oob writes with offset */
-	if (oob && ops->ooboffs && (ops->ooboffs + ops->ooblen > oobmaxlen))
-		return -EINVAL;
+	if (oob && ops->ooboffs && (ops->ooboffs + ops->ooblen > oobmaxlen)) {
+		ret = -EINVAL;
+		goto err_out;
+	}
 
 	while (1) {
 		int bytes = mtd->writesize;
@@ -2251,6 +2257,9 @@
 	ops->retlen = ops->len - writelen;
 	if (unlikely(oob))
 		ops->oobretlen = ops->ooblen;
+
+err_out:
+	chip->select_chip(mtd, -1);
 	return ret;
 }
 
@@ -2302,11 +2311,10 @@
 static int nand_write(struct mtd_info *mtd, loff_t to, size_t len,
 			  size_t *retlen, const uint8_t *buf)
 {
-	struct nand_chip *chip = mtd->priv;
 	struct mtd_oob_ops ops;
 	int ret;
 
-	nand_get_device(chip, mtd, FL_WRITING);
+	nand_get_device(mtd, FL_WRITING);
 	ops.len = len;
 	ops.datbuf = (uint8_t *)buf;
 	ops.oobbuf = NULL;
@@ -2377,8 +2385,10 @@
 	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
 
 	/* Check, if it is write protected */
-	if (nand_check_wp(mtd))
+	if (nand_check_wp(mtd)) {
+		chip->select_chip(mtd, -1);
 		return -EROFS;
+	}
 
 	/* Invalidate the page cache, if we write to the cached page */
 	if (page == chip->pagebuf)
@@ -2391,6 +2401,8 @@
 	else
 		status = chip->ecc.write_oob(mtd, chip, page & chip->pagemask);
 
+	chip->select_chip(mtd, -1);
+
 	if (status)
 		return status;
 
@@ -2408,7 +2420,6 @@
 static int nand_write_oob(struct mtd_info *mtd, loff_t to,
 			  struct mtd_oob_ops *ops)
 {
-	struct nand_chip *chip = mtd->priv;
 	int ret = -ENOTSUPP;
 
 	ops->retlen = 0;
@@ -2420,7 +2431,7 @@
 		return -EINVAL;
 	}
 
-	nand_get_device(chip, mtd, FL_WRITING);
+	nand_get_device(mtd, FL_WRITING);
 
 	switch (ops->mode) {
 	case MTD_OPS_PLACE_OOB:
@@ -2513,7 +2524,7 @@
 		return -EINVAL;
 
 	/* Grab the lock and see if the device is available */
-	nand_get_device(chip, mtd, FL_ERASING);
+	nand_get_device(mtd, FL_ERASING);
 
 	/* Shift to get first page */
 	page = (int)(instr->addr >> chip->page_shift);
@@ -2623,6 +2634,7 @@
 	ret = instr->state == MTD_ERASE_DONE ? 0 : -EIO;
 
 	/* Deselect and wake up anyone waiting on the device */
+	chip->select_chip(mtd, -1);
 	nand_release_device(mtd);
 
 	/* Do call back function */
@@ -2658,12 +2670,10 @@
  */
 static void nand_sync(struct mtd_info *mtd)
 {
-	struct nand_chip *chip = mtd->priv;
-
 	pr_debug("%s: called\n", __func__);
 
 	/* Grab the lock and see if the device is available */
-	nand_get_device(chip, mtd, FL_SYNCING);
+	nand_get_device(mtd, FL_SYNCING);
 	/* Release it and go back */
 	nand_release_device(mtd);
 }
@@ -2749,9 +2759,7 @@
  */
 static int nand_suspend(struct mtd_info *mtd)
 {
-	struct nand_chip *chip = mtd->priv;
-
-	return nand_get_device(chip, mtd, FL_PM_SUSPENDED);
+	return nand_get_device(mtd, FL_PM_SUSPENDED);
 }
 
 /**
@@ -2849,6 +2857,8 @@
 	int i;
 	int val;
 
+	/* ONFI need to be probed in 8 bits mode */
+	WARN_ON(chip->options & NAND_BUSWIDTH_16);
 	/* Try ONFI for unknown chip or LP */
 	chip->cmdfunc(mtd, NAND_CMD_READID, 0x20, -1);
 	if (chip->read_byte(mtd) != 'O' || chip->read_byte(mtd) != 'N' ||
@@ -2913,7 +2923,7 @@
  *
  * Check if an ID string is repeated within a given sequence of bytes at
  * specific repetition interval period (e.g., {0x20,0x01,0x7F,0x20} has a
- * period of 2). This is a helper function for nand_id_len(). Returns non-zero
+ * period of 3). This is a helper function for nand_id_len(). Returns non-zero
  * if the repetition has a period of @period; otherwise, returns zero.
  */
 static int nand_id_has_period(u8 *id_data, int arrlen, int period)
@@ -3242,11 +3252,15 @@
 			break;
 	}
 
-	/*
-	 * Check, if buswidth is correct. Hardware drivers should set
-	 * chip correct!
-	 */
-	if (busw != (chip->options & NAND_BUSWIDTH_16)) {
+	if (chip->options & NAND_BUSWIDTH_AUTO) {
+		WARN_ON(chip->options & NAND_BUSWIDTH_16);
+		chip->options |= busw;
+		nand_set_defaults(chip, busw);
+	} else if (busw != (chip->options & NAND_BUSWIDTH_16)) {
+		/*
+		 * Check, if buswidth is correct. Hardware drivers should set
+		 * chip correct!
+		 */
 		pr_info("NAND device: Manufacturer ID:"
 			" 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id,
 			*dev_id, nand_manuf_ids[maf_idx].name, mtd->name);
@@ -3285,10 +3299,10 @@
 		chip->cmdfunc = nand_command_lp;
 
 	pr_info("NAND device: Manufacturer ID: 0x%02x, Chip ID: 0x%02x (%s %s),"
-		" page size: %d, OOB size: %d\n",
+		" %dMiB, page size: %d, OOB size: %d\n",
 		*maf_id, *dev_id, nand_manuf_ids[maf_idx].name,
 		chip->onfi_version ? chip->onfi_params.model : type->name,
-		mtd->writesize, mtd->oobsize);
+		(int)(chip->chipsize >> 20), mtd->writesize, mtd->oobsize);
 
 	return type;
 }
@@ -3327,6 +3341,8 @@
 		return PTR_ERR(type);
 	}
 
+	chip->select_chip(mtd, -1);
+
 	/* Check for a chip array */
 	for (i = 1; i < maxchips; i++) {
 		chip->select_chip(mtd, i);
@@ -3336,8 +3352,11 @@
 		chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 		/* Read manufacturer and device IDs */
 		if (nand_maf_id != chip->read_byte(mtd) ||
-		    nand_dev_id != chip->read_byte(mtd))
+		    nand_dev_id != chip->read_byte(mtd)) {
+			chip->select_chip(mtd, -1);
 			break;
+		}
+		chip->select_chip(mtd, -1);
 	}
 	if (i > 1)
 		pr_info("%d NAND chips detected\n", i);
@@ -3596,9 +3615,6 @@
 	/* Initialize state */
 	chip->state = FL_READY;
 
-	/* De-select the device */
-	chip->select_chip(mtd, -1);
-
 	/* Invalidate the pagebuffer reference */
 	chip->pagebuf = -1;
 

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index a932c48..818b65c 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c

@@ -42,6 +42,8 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 
 /* Default simulator parameters values */
 #if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE)  || \
@@ -105,7 +107,6 @@
 static char *weakpages = NULL;
 static unsigned int bitflips = 0;
 static char *gravepages = NULL;
-static unsigned int rptwear = 0;
 static unsigned int overridesize = 0;
 static char *cache_file = NULL;
 static unsigned int bbt;
@@ -130,7 +131,6 @@
 module_param(weakpages,      charp, 0400);
 module_param(bitflips,       uint, 0400);
 module_param(gravepages,     charp, 0400);
-module_param(rptwear,        uint, 0400);
 module_param(overridesize,   uint, 0400);
 module_param(cache_file,     charp, 0400);
 module_param(bbt,	     uint, 0400);
@@ -162,7 +162,6 @@
 MODULE_PARM_DESC(gravepages,     "Pages that lose data [: maximum reads (defaults to 3)]"
 				 " separated by commas e.g. 1401:2 means page 1401"
 				 " can be read only twice before failing");
-MODULE_PARM_DESC(rptwear,        "Number of erases between reporting wear, if not zero");
 MODULE_PARM_DESC(overridesize,   "Specifies the NAND Flash size overriding the ID bytes. "
 				 "The size is specified in erase blocks and as the exponent of a power of two"
 				 " e.g. 5 means a size of 32 erase blocks");
@@ -286,6 +285,11 @@
 /* Maximum page cache pages needed to read or write a NAND page to the cache_file */
 #define NS_MAX_HELD_PAGES 16
 
+struct nandsim_debug_info {
+	struct dentry *dfs_root;
+	struct dentry *dfs_wear_report;
+};
+
 /*
  * A union to represent flash memory contents and flash buffer.
  */
@@ -365,6 +369,8 @@
 	void *file_buf;
 	struct page *held_pages[NS_MAX_HELD_PAGES];
 	int held_cnt;
+
+	struct nandsim_debug_info dbg;
 };
 
 /*
@@ -442,11 +448,123 @@
 static unsigned long *erase_block_wear = NULL;
 static unsigned int wear_eb_count = 0;
 static unsigned long total_wear = 0;
-static unsigned int rptwear_cnt = 0;
 
 /* MTD structure for NAND controller */
 static struct mtd_info *nsmtd;
 
+static int nandsim_debugfs_show(struct seq_file *m, void *private)
+{
+	unsigned long wmin = -1, wmax = 0, avg;
+	unsigned long deciles[10], decile_max[10], tot = 0;
+	unsigned int i;
+
+	/* Calc wear stats */
+	for (i = 0; i < wear_eb_count; ++i) {
+		unsigned long wear = erase_block_wear[i];
+		if (wear < wmin)
+			wmin = wear;
+		if (wear > wmax)
+			wmax = wear;
+		tot += wear;
+	}
+
+	for (i = 0; i < 9; ++i) {
+		deciles[i] = 0;
+		decile_max[i] = (wmax * (i + 1) + 5) / 10;
+	}
+	deciles[9] = 0;
+	decile_max[9] = wmax;
+	for (i = 0; i < wear_eb_count; ++i) {
+		int d;
+		unsigned long wear = erase_block_wear[i];
+		for (d = 0; d < 10; ++d)
+			if (wear <= decile_max[d]) {
+				deciles[d] += 1;
+				break;
+			}
+	}
+	avg = tot / wear_eb_count;
+
+	/* Output wear report */
+	seq_printf(m, "Total numbers of erases:  %lu\n", tot);
+	seq_printf(m, "Number of erase blocks:   %u\n", wear_eb_count);
+	seq_printf(m, "Average number of erases: %lu\n", avg);
+	seq_printf(m, "Maximum number of erases: %lu\n", wmax);
+	seq_printf(m, "Minimum number of erases: %lu\n", wmin);
+	for (i = 0; i < 10; ++i) {
+		unsigned long from = (i ? decile_max[i - 1] + 1 : 0);
+		if (from > decile_max[i])
+			continue;
+		seq_printf(m, "Number of ebs with erase counts from %lu to %lu : %lu\n",
+			from,
+			decile_max[i],
+			deciles[i]);
+	}
+
+	return 0;
+}
+
+static int nandsim_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, nandsim_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations dfs_fops = {
+	.open		= nandsim_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+/**
+ * nandsim_debugfs_create - initialize debugfs
+ * @dev: nandsim device description object
+ *
+ * This function creates all debugfs files for UBI device @ubi. Returns zero in
+ * case of success and a negative error code in case of failure.
+ */
+static int nandsim_debugfs_create(struct nandsim *dev)
+{
+	struct nandsim_debug_info *dbg = &dev->dbg;
+	struct dentry *dent;
+	int err;
+
+	if (!IS_ENABLED(CONFIG_DEBUG_FS))
+		return 0;
+
+	dent = debugfs_create_dir("nandsim", NULL);
+	if (IS_ERR_OR_NULL(dent)) {
+		int err = dent ? -ENODEV : PTR_ERR(dent);
+
+		NS_ERR("cannot create \"nandsim\" debugfs directory, err %d\n",
+			err);
+		return err;
+	}
+	dbg->dfs_root = dent;
+
+	dent = debugfs_create_file("wear_report", S_IRUSR,
+				   dbg->dfs_root, dev, &dfs_fops);
+	if (IS_ERR_OR_NULL(dent))
+		goto out_remove;
+	dbg->dfs_wear_report = dent;
+
+	return 0;
+
+out_remove:
+	debugfs_remove_recursive(dbg->dfs_root);
+	err = dent ? PTR_ERR(dent) : -ENODEV;
+	return err;
+}
+
+/**
+ * nandsim_debugfs_remove - destroy all debugfs files
+ */
+static void nandsim_debugfs_remove(struct nandsim *ns)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		debugfs_remove_recursive(ns->dbg.dfs_root);
+}
+
 /*
  * Allocate array of page pointers, create slab allocation for an array
  * and initialize the array by NULL pointers.
@@ -911,8 +1029,6 @@
 {
 	size_t mem;
 
-	if (!rptwear)
-		return 0;
 	wear_eb_count = div_u64(mtd->size, mtd->erasesize);
 	mem = wear_eb_count * sizeof(unsigned long);
 	if (mem / sizeof(unsigned long) != wear_eb_count) {
@@ -929,64 +1045,18 @@
 
 static void update_wear(unsigned int erase_block_no)
 {
-	unsigned long wmin = -1, wmax = 0, avg;
-	unsigned long deciles[10], decile_max[10], tot = 0;
-	unsigned int i;
-
 	if (!erase_block_wear)
 		return;
 	total_wear += 1;
+	/*
+	 * TODO: Notify this through a debugfs entry,
+	 * instead of showing an error message.
+	 */
 	if (total_wear == 0)
 		NS_ERR("Erase counter total overflow\n");
 	erase_block_wear[erase_block_no] += 1;
 	if (erase_block_wear[erase_block_no] == 0)
 		NS_ERR("Erase counter overflow for erase block %u\n", erase_block_no);
-	rptwear_cnt += 1;
-	if (rptwear_cnt < rptwear)
-		return;
-	rptwear_cnt = 0;
-	/* Calc wear stats */
-	for (i = 0; i < wear_eb_count; ++i) {
-		unsigned long wear = erase_block_wear[i];
-		if (wear < wmin)
-			wmin = wear;
-		if (wear > wmax)
-			wmax = wear;
-		tot += wear;
-	}
-	for (i = 0; i < 9; ++i) {
-		deciles[i] = 0;
-		decile_max[i] = (wmax * (i + 1) + 5) / 10;
-	}
-	deciles[9] = 0;
-	decile_max[9] = wmax;
-	for (i = 0; i < wear_eb_count; ++i) {
-		int d;
-		unsigned long wear = erase_block_wear[i];
-		for (d = 0; d < 10; ++d)
-			if (wear <= decile_max[d]) {
-				deciles[d] += 1;
-				break;
-			}
-	}
-	avg = tot / wear_eb_count;
-	/* Output wear report */
-	NS_INFO("*** Wear Report ***\n");
-	NS_INFO("Total numbers of erases:  %lu\n", tot);
-	NS_INFO("Number of erase blocks:   %u\n", wear_eb_count);
-	NS_INFO("Average number of erases: %lu\n", avg);
-	NS_INFO("Maximum number of erases: %lu\n", wmax);
-	NS_INFO("Minimum number of erases: %lu\n", wmin);
-	for (i = 0; i < 10; ++i) {
-		unsigned long from = (i ? decile_max[i - 1] + 1 : 0);
-		if (from > decile_max[i])
-			continue;
-		NS_INFO("Number of ebs with erase counts from %lu to %lu : %lu\n",
-			from,
-			decile_max[i],
-			deciles[i]);
-	}
-	NS_INFO("*** End of Wear Report ***\n");
 }
 
 /*
@@ -1397,10 +1467,7 @@
 	unsigned int page_no = ns->regs.row;
 
 	if (read_error(page_no)) {
-		int i;
-		memset(ns->buf.byte, 0xFF, num);
-		for (i = 0; i < num; ++i)
-			ns->buf.byte[i] = random32();
+		prandom_bytes(ns->buf.byte, num);
 		NS_WARN("simulating read error in page %u\n", page_no);
 		return 1;
 	}
@@ -2330,6 +2397,9 @@
 	if ((retval = setup_wear_reporting(nsmtd)) != 0)
 		goto err_exit;
 
+	if ((retval = nandsim_debugfs_create(nand)) != 0)
+		goto err_exit;
+
 	if ((retval = init_nandsim(nsmtd)) != 0)
 		goto err_exit;
 
@@ -2369,6 +2439,7 @@
 	struct nandsim *ns = ((struct nand_chip *)nsmtd->priv)->priv;
 	int i;
 
+	nandsim_debugfs_remove(ns);
 	free_nandsim(ns);    /* Free nandsim private resources */
 	nand_release(nsmtd); /* Unregister driver */
 	for (i = 0;i < ARRAY_SIZE(ns->partitions); ++i)

diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c
index 5fd3f01..8e148f1 100644
--- a/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c

@@ -197,7 +197,7 @@
 	return ret;
 }
 
-static int __devinit ndfc_probe(struct platform_device *ofdev)
+static int ndfc_probe(struct platform_device *ofdev)
 {
 	struct ndfc_controller *ndfc;
 	const __be32 *reg;
@@ -256,7 +256,7 @@
 	return 0;
 }
 
-static int __devexit ndfc_remove(struct platform_device *ofdev)
+static int ndfc_remove(struct platform_device *ofdev)
 {
 	struct ndfc_controller *ndfc = dev_get_drvdata(&ofdev->dev);
 
@@ -279,7 +279,7 @@
 		.of_match_table = ndfc_match,
 	},
 	.probe = ndfc_probe,
-	.remove = __devexit_p(ndfc_remove),
+	.remove = ndfc_remove,
 };
 
 module_platform_driver(ndfc_driver);

diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
deleted file mode 100644
index 9ee0c4e..0000000
--- a/drivers/mtd/nand/nomadik_nand.c
+++ /dev/null

@@ -1,235 +0,0 @@
-/*
- *  drivers/mtd/nand/nomadik_nand.c
- *
- *  Overview:
- *  	Driver for on-board NAND flash on Nomadik Platforms
- *
- * Copyright © 2007 STMicroelectronics Pvt. Ltd.
- * Author: Sachin Verma <sachin.verma@st.com>
- *
- * Copyright © 2009 Alessandro Rubini
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/nand_ecc.h>
-#include <linux/platform_device.h>
-#include <linux/mtd/partitions.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/platform_data/mtd-nomadik-nand.h>
-#include <mach/fsmc.h>
-
-#include <mtd/mtd-abi.h>
-
-struct nomadik_nand_host {
-	struct mtd_info		mtd;
-	struct nand_chip	nand;
-	void __iomem *data_va;
-	void __iomem *cmd_va;
-	void __iomem *addr_va;
-	struct nand_bbt_descr *bbt_desc;
-};
-
-static struct nand_ecclayout nomadik_ecc_layout = {
-	.eccbytes = 3 * 4,
-	.eccpos = { /* each subpage has 16 bytes: pos 2,3,4 hosts ECC */
-		0x02, 0x03, 0x04,
-		0x12, 0x13, 0x14,
-		0x22, 0x23, 0x24,
-		0x32, 0x33, 0x34},
-	/* let's keep bytes 5,6,7 for us, just in case we change ECC algo */
-	.oobfree = { {0x08, 0x08}, {0x18, 0x08}, {0x28, 0x08}, {0x38, 0x08} },
-};
-
-static void nomadik_ecc_control(struct mtd_info *mtd, int mode)
-{
-	/* No need to enable hw ecc, it's on by default */
-}
-
-static void nomadik_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
-{
-	struct nand_chip *nand = mtd->priv;
-	struct nomadik_nand_host *host = nand->priv;
-
-	if (cmd == NAND_CMD_NONE)
-		return;
-
-	if (ctrl & NAND_CLE)
-		writeb(cmd, host->cmd_va);
-	else
-		writeb(cmd, host->addr_va);
-}
-
-static int nomadik_nand_probe(struct platform_device *pdev)
-{
-	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
-	struct nomadik_nand_host *host;
-	struct mtd_info *mtd;
-	struct nand_chip *nand;
-	struct resource *res;
-	int ret = 0;
-
-	/* Allocate memory for the device structure (and zero it) */
-	host = kzalloc(sizeof(struct nomadik_nand_host), GFP_KERNEL);
-	if (!host) {
-		dev_err(&pdev->dev, "Failed to allocate device structure.\n");
-		return -ENOMEM;
-	}
-
-	/* Call the client's init function, if any */
-	if (pdata->init)
-		ret = pdata->init();
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Init function failed\n");
-		goto err;
-	}
-
-	/* ioremap three regions */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->addr_va = ioremap(res->start, resource_size(res));
-
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->data_va = ioremap(res->start, resource_size(res));
-
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->cmd_va = ioremap(res->start, resource_size(res));
-
-	if (!host->addr_va || !host->data_va || !host->cmd_va) {
-		ret = -ENOMEM;
-		goto err_unmap;
-	}
-
-	/* Link all private pointers */
-	mtd = &host->mtd;
-	nand = &host->nand;
-	mtd->priv = nand;
-	nand->priv = host;
-
-	host->mtd.owner = THIS_MODULE;
-	nand->IO_ADDR_R = host->data_va;
-	nand->IO_ADDR_W = host->data_va;
-	nand->cmd_ctrl = nomadik_cmd_ctrl;
-
-	/*
-	 * This stanza declares ECC_HW but uses soft routines. It's because
-	 * HW claims to make the calculation but not the correction. However,
-	 * I haven't managed to get the desired data out of it until now.
-	 */
-	nand->ecc.mode = NAND_ECC_SOFT;
-	nand->ecc.layout = &nomadik_ecc_layout;
-	nand->ecc.hwctl = nomadik_ecc_control;
-	nand->ecc.size = 512;
-	nand->ecc.bytes = 3;
-
-	nand->options = pdata->options;
-
-	/*
-	 * Scan to find existence of the device
-	 */
-	if (nand_scan(&host->mtd, 1)) {
-		ret = -ENXIO;
-		goto err_unmap;
-	}
-
-	mtd_device_register(&host->mtd, pdata->parts, pdata->nparts);
-
-	platform_set_drvdata(pdev, host);
-	return 0;
-
- err_unmap:
-	if (host->cmd_va)
-		iounmap(host->cmd_va);
-	if (host->data_va)
-		iounmap(host->data_va);
-	if (host->addr_va)
-		iounmap(host->addr_va);
- err:
-	kfree(host);
-	return ret;
-}
-
-/*
- * Clean up routine
- */
-static int nomadik_nand_remove(struct platform_device *pdev)
-{
-	struct nomadik_nand_host *host = platform_get_drvdata(pdev);
-	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
-
-	if (pdata->exit)
-		pdata->exit();
-
-	if (host) {
-		nand_release(&host->mtd);
-		iounmap(host->cmd_va);
-		iounmap(host->data_va);
-		iounmap(host->addr_va);
-		kfree(host);
-	}
-	return 0;
-}
-
-static int nomadik_nand_suspend(struct device *dev)
-{
-	struct nomadik_nand_host *host = dev_get_drvdata(dev);
-	int ret = 0;
-	if (host)
-		ret = mtd_suspend(&host->mtd);
-	return ret;
-}
-
-static int nomadik_nand_resume(struct device *dev)
-{
-	struct nomadik_nand_host *host = dev_get_drvdata(dev);
-	if (host)
-		mtd_resume(&host->mtd);
-	return 0;
-}
-
-static const struct dev_pm_ops nomadik_nand_pm_ops = {
-	.suspend = nomadik_nand_suspend,
-	.resume = nomadik_nand_resume,
-};
-
-static struct platform_driver nomadik_nand_driver = {
-	.probe = nomadik_nand_probe,
-	.remove = nomadik_nand_remove,
-	.driver = {
-		.owner = THIS_MODULE,
-		.name = "nomadik_nand",
-		.pm = &nomadik_nand_pm_ops,
-	},
-};
-
-module_platform_driver(nomadik_nand_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("ST Microelectronics (sachin.verma@st.com)");
-MODULE_DESCRIPTION("NAND driver for Nomadik Platform");

diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c
index 94dc46b..a619119 100644
--- a/drivers/mtd/nand/nuc900_nand.c
+++ b/drivers/mtd/nand/nuc900_nand.c

@@ -246,7 +246,7 @@
 	spin_unlock(&nand->lock);
 }
 
-static int __devinit nuc900_nand_probe(struct platform_device *pdev)
+static int nuc900_nand_probe(struct platform_device *pdev)
 {
 	struct nuc900_nand *nuc900_nand;
 	struct nand_chip *chip;
@@ -317,7 +317,7 @@
 	return retval;
 }
 
-static int __devexit nuc900_nand_remove(struct platform_device *pdev)
+static int nuc900_nand_remove(struct platform_device *pdev)
 {
 	struct nuc900_nand *nuc900_nand = platform_get_drvdata(pdev);
 	struct resource *res;
@@ -340,7 +340,7 @@
 
 static struct platform_driver nuc900_nand_driver = {
 	.probe		= nuc900_nand_probe,
-	.remove		= __devexit_p(nuc900_nand_remove),
+	.remove		= nuc900_nand_remove,
 	.driver		= {
 		.name	= "nuc900-fmi",
 		.owner	= THIS_MODULE,

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 1f34ba1..0002d5e 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c

@@ -1323,7 +1323,7 @@
 }
 #endif /* CONFIG_MTD_NAND_OMAP_BCH */
 
-static int __devinit omap_nand_probe(struct platform_device *pdev)
+static int omap_nand_probe(struct platform_device *pdev)
 {
 	struct omap_nand_info		*info;
 	struct omap_nand_platform_data	*pdata;

diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index aefaf8c..cd72b92 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c

@@ -194,7 +194,7 @@
 	return ret;
 }
 
-static int __devexit orion_nand_remove(struct platform_device *pdev)
+static int orion_nand_remove(struct platform_device *pdev)
 {
 	struct mtd_info *mtd = platform_get_drvdata(pdev);
 	struct nand_chip *nc = mtd->priv;
@@ -223,7 +223,7 @@
 #endif
 
 static struct platform_driver orion_nand_driver = {
-	.remove		= __devexit_p(orion_nand_remove),
+	.remove		= orion_nand_remove,
 	.driver		= {
 		.name	= "orion_nand",
 		.owner	= THIS_MODULE,

diff --git a/drivers/mtd/nand/pasemi_nand.c b/drivers/mtd/nand/pasemi_nand.c
index 1440e51..5a67082 100644
--- a/drivers/mtd/nand/pasemi_nand.c
+++ b/drivers/mtd/nand/pasemi_nand.c

@@ -89,7 +89,7 @@
 	return !!(inl(lpcctl) & LBICTRL_LPCCTL_NR);
 }
 
-static int __devinit pasemi_nand_probe(struct platform_device *ofdev)
+static int pasemi_nand_probe(struct platform_device *ofdev)
 {
 	struct pci_dev *pdev;
 	struct device_node *np = ofdev->dev.of_node;
@@ -184,7 +184,7 @@
 	return err;
 }
 
-static int __devexit pasemi_nand_remove(struct platform_device *ofdev)
+static int pasemi_nand_remove(struct platform_device *ofdev)
 {
 	struct nand_chip *chip;
 

diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index a47ee68..c004566 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c

@@ -28,7 +28,7 @@
 /*
  * Probe for the NAND device.
  */
-static int __devinit plat_nand_probe(struct platform_device *pdev)
+static int plat_nand_probe(struct platform_device *pdev)
 {
 	struct platform_nand_data *pdata = pdev->dev.platform_data;
 	struct mtd_part_parser_data ppdata;
@@ -134,7 +134,7 @@
 /*
  * Remove a NAND device.
  */
-static int __devexit plat_nand_remove(struct platform_device *pdev)
+static int plat_nand_remove(struct platform_device *pdev)
 {
 	struct plat_nand_data *data = platform_get_drvdata(pdev);
 	struct platform_nand_data *pdata = pdev->dev.platform_data;
@@ -160,7 +160,7 @@
 
 static struct platform_driver plat_nand_driver = {
 	.probe	= plat_nand_probe,
-	.remove	= __devexit_p(plat_nand_remove),
+	.remove	= plat_nand_remove,
 	.driver	= {
 		.name		= "gen_nand",
 		.owner		= THIS_MODULE,

diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 79ded48..df954b4 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c

@@ -730,11 +730,14 @@
 				      struct s3c2410_nand_mtd *mtd,
 				      struct s3c2410_nand_set *set)
 {
-	if (set)
+	if (set) {
 		mtd->mtd.name = set->name;
 
-	return mtd_device_parse_register(&mtd->mtd, NULL, NULL,
+		return mtd_device_parse_register(&mtd->mtd, NULL, NULL,
 					 set->partitions, set->nr_partitions);
+	}
+
+	return -ENODEV;
 }
 
 /**

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index f48ac5d..57b3971 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c

@@ -23,11 +23,18 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_mtd.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -106,6 +113,84 @@
 	writeb(0x0, FLTRCR(flctl));
 }
 
+static void flctl_dma_complete(void *param)
+{
+	struct sh_flctl *flctl = param;
+
+	complete(&flctl->dma_complete);
+}
+
+static void flctl_release_dma(struct sh_flctl *flctl)
+{
+	if (flctl->chan_fifo0_rx) {
+		dma_release_channel(flctl->chan_fifo0_rx);
+		flctl->chan_fifo0_rx = NULL;
+	}
+	if (flctl->chan_fifo0_tx) {
+		dma_release_channel(flctl->chan_fifo0_tx);
+		flctl->chan_fifo0_tx = NULL;
+	}
+}
+
+static void flctl_setup_dma(struct sh_flctl *flctl)
+{
+	dma_cap_mask_t mask;
+	struct dma_slave_config cfg;
+	struct platform_device *pdev = flctl->pdev;
+	struct sh_flctl_platform_data *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (!pdata)
+		return;
+
+	if (pdata->slave_id_fifo0_tx <= 0 || pdata->slave_id_fifo0_rx <= 0)
+		return;
+
+	/* We can only either use DMA for both Tx and Rx or not use it at all */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	flctl->chan_fifo0_tx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_fifo0_tx);
+	dev_dbg(&pdev->dev, "%s: TX: got channel %p\n", __func__,
+		flctl->chan_fifo0_tx);
+
+	if (!flctl->chan_fifo0_tx)
+		return;
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.slave_id = pdata->slave_id_fifo0_tx;
+	cfg.direction = DMA_MEM_TO_DEV;
+	cfg.dst_addr = (dma_addr_t)FLDTFIFO(flctl);
+	cfg.src_addr = 0;
+	ret = dmaengine_slave_config(flctl->chan_fifo0_tx, &cfg);
+	if (ret < 0)
+		goto err;
+
+	flctl->chan_fifo0_rx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_fifo0_rx);
+	dev_dbg(&pdev->dev, "%s: RX: got channel %p\n", __func__,
+		flctl->chan_fifo0_rx);
+
+	if (!flctl->chan_fifo0_rx)
+		goto err;
+
+	cfg.slave_id = pdata->slave_id_fifo0_rx;
+	cfg.direction = DMA_DEV_TO_MEM;
+	cfg.dst_addr = 0;
+	cfg.src_addr = (dma_addr_t)FLDTFIFO(flctl);
+	ret = dmaengine_slave_config(flctl->chan_fifo0_rx, &cfg);
+	if (ret < 0)
+		goto err;
+
+	init_completion(&flctl->dma_complete);
+
+	return;
+
+err:
+	flctl_release_dma(flctl);
+}
+
 static void set_addr(struct mtd_info *mtd, int column, int page_addr)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
@@ -225,7 +310,7 @@
 
 		for (i = 0; i < 3; i++) {
 			uint8_t org;
-			int index;
+			unsigned int index;
 
 			data = readl(ecc_reg[i]);
 
@@ -261,6 +346,70 @@
 	timeout_error(flctl, __func__);
 }
 
+static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
+					int len, enum dma_data_direction dir)
+{
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *chan;
+	enum dma_transfer_direction tr_dir;
+	dma_addr_t dma_addr;
+	dma_cookie_t cookie = -EINVAL;
+	uint32_t reg;
+	int ret;
+
+	if (dir == DMA_FROM_DEVICE) {
+		chan = flctl->chan_fifo0_rx;
+		tr_dir = DMA_DEV_TO_MEM;
+	} else {
+		chan = flctl->chan_fifo0_tx;
+		tr_dir = DMA_MEM_TO_DEV;
+	}
+
+	dma_addr = dma_map_single(chan->device->dev, buf, len, dir);
+
+	if (dma_addr)
+		desc = dmaengine_prep_slave_single(chan, dma_addr, len,
+			tr_dir, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	if (desc) {
+		reg = readl(FLINTDMACR(flctl));
+		reg |= DREQ0EN;
+		writel(reg, FLINTDMACR(flctl));
+
+		desc->callback = flctl_dma_complete;
+		desc->callback_param = flctl;
+		cookie = dmaengine_submit(desc);
+
+		dma_async_issue_pending(chan);
+	} else {
+		/* DMA failed, fall back to PIO */
+		flctl_release_dma(flctl);
+		dev_warn(&flctl->pdev->dev,
+			 "DMA failed, falling back to PIO\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret =
+	wait_for_completion_timeout(&flctl->dma_complete,
+				msecs_to_jiffies(3000));
+
+	if (ret <= 0) {
+		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+		dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n");
+	}
+
+out:
+	reg = readl(FLINTDMACR(flctl));
+	reg &= ~DREQ0EN;
+	writel(reg, FLINTDMACR(flctl));
+
+	dma_unmap_single(chan->device->dev, dma_addr, len, dir);
+
+	/* ret > 0 is success */
+	return ret;
+}
+
 static void read_datareg(struct sh_flctl *flctl, int offset)
 {
 	unsigned long data;
@@ -279,11 +428,20 @@
 
 	len_4align = (rlen + 3) / 4;
 
+	/* initiate DMA transfer */
+	if (flctl->chan_fifo0_rx && rlen >= 32 &&
+		flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_DEV_TO_MEM) > 0)
+			goto convert;	/* DMA success */
+
+	/* do polling transfer */
 	for (i = 0; i < len_4align; i++) {
 		wait_rfifo_ready(flctl);
 		buf[i] = readl(FLDTFIFO(flctl));
-		buf[i] = be32_to_cpu(buf[i]);
 	}
+
+convert:
+	for (i = 0; i < len_4align; i++)
+		buf[i] = be32_to_cpu(buf[i]);
 }
 
 static enum flctl_ecc_res_t read_ecfiforeg
@@ -305,28 +463,39 @@
 	return res;
 }
 
-static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+static void write_fiforeg(struct sh_flctl *flctl, int rlen,
+						unsigned int offset)
 {
 	int i, len_4align;
-	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
-	void *fifo_addr = (void *)FLDTFIFO(flctl);
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
 	for (i = 0; i < len_4align; i++) {
 		wait_wfifo_ready(flctl);
-		writel(cpu_to_be32(data[i]), fifo_addr);
+		writel(cpu_to_be32(buf[i]), FLDTFIFO(flctl));
 	}
 }
 
-static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
+						unsigned int offset)
 {
 	int i, len_4align;
-	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
+
+	for (i = 0; i < len_4align; i++)
+		buf[i] = cpu_to_be32(buf[i]);
+
+	/* initiate DMA transfer */
+	if (flctl->chan_fifo0_tx && rlen >= 32 &&
+		flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_MEM_TO_DEV) > 0)
+			return;	/* DMA success */
+
+	/* do polling transfer */
 	for (i = 0; i < len_4align; i++) {
 		wait_wecfifo_ready(flctl);
-		writel(cpu_to_be32(data[i]), FLECFIFO(flctl));
+		writel(buf[i], FLECFIFO(flctl));
 	}
 }
 
@@ -750,41 +919,35 @@
 static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 
-	memcpy(&flctl->done_buff[index], buf, len);
+	memcpy(&flctl->done_buff[flctl->index], buf, len);
 	flctl->index += len;
 }
 
 static uint8_t flctl_read_byte(struct mtd_info *mtd)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 	uint8_t data;
 
-	data = flctl->done_buff[index];
+	data = flctl->done_buff[flctl->index];
 	flctl->index++;
 	return data;
 }
 
 static uint16_t flctl_read_word(struct mtd_info *mtd)
 {
-       struct sh_flctl *flctl = mtd_to_flctl(mtd);
-       int index = flctl->index;
-       uint16_t data;
-       uint16_t *buf = (uint16_t *)&flctl->done_buff[index];
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint16_t *buf = (uint16_t *)&flctl->done_buff[flctl->index];
 
-       data = *buf;
-       flctl->index += 2;
-       return data;
+	flctl->index += 2;
+	return *buf;
 }
 
 static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 
-	memcpy(buf, &flctl->done_buff[index], len);
+	memcpy(buf, &flctl->done_buff[flctl->index], len);
 	flctl->index += len;
 }
 
@@ -858,7 +1021,74 @@
 	return IRQ_HANDLED;
 }
 
-static int __devinit flctl_probe(struct platform_device *pdev)
+#ifdef CONFIG_OF
+struct flctl_soc_config {
+	unsigned long flcmncr_val;
+	unsigned has_hwecc:1;
+	unsigned use_holden:1;
+};
+
+static struct flctl_soc_config flctl_sh7372_config = {
+	.flcmncr_val = CLK_16B_12L_4H | TYPESEL_SET | SHBUSSEL,
+	.has_hwecc = 1,
+	.use_holden = 1,
+};
+
+static const struct of_device_id of_flctl_match[] = {
+	{ .compatible = "renesas,shmobile-flctl-sh7372",
+				.data = &flctl_sh7372_config },
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_flctl_match);
+
+static struct sh_flctl_platform_data *flctl_parse_dt(struct device *dev)
+{
+	const struct of_device_id *match;
+	struct flctl_soc_config *config;
+	struct sh_flctl_platform_data *pdata;
+	struct device_node *dn = dev->of_node;
+	int ret;
+
+	match = of_match_device(of_flctl_match, dev);
+	if (match)
+		config = (struct flctl_soc_config *)match->data;
+	else {
+		dev_err(dev, "%s: no OF configuration attached\n", __func__);
+		return NULL;
+	}
+
+	pdata = devm_kzalloc(dev, sizeof(struct sh_flctl_platform_data),
+								GFP_KERNEL);
+	if (!pdata) {
+		dev_err(dev, "%s: failed to allocate config data\n", __func__);
+		return NULL;
+	}
+
+	/* set SoC specific options */
+	pdata->flcmncr_val = config->flcmncr_val;
+	pdata->has_hwecc = config->has_hwecc;
+	pdata->use_holden = config->use_holden;
+
+	/* parse user defined options */
+	ret = of_get_nand_bus_width(dn);
+	if (ret == 16)
+		pdata->flcmncr_val |= SEL_16BIT;
+	else if (ret != 8) {
+		dev_err(dev, "%s: invalid bus width\n", __func__);
+		return NULL;
+	}
+
+	return pdata;
+}
+#else /* CONFIG_OF */
+#define of_flctl_match NULL
+static struct sh_flctl_platform_data *flctl_parse_dt(struct device *dev)
+{
+	return NULL;
+}
+#endif /* CONFIG_OF */
+
+static int flctl_probe(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct sh_flctl *flctl;
@@ -867,12 +1097,7 @@
 	struct sh_flctl_platform_data *pdata;
 	int ret = -ENXIO;
 	int irq;
-
-	pdata = pdev->dev.platform_data;
-	if (pdata == NULL) {
-		dev_err(&pdev->dev, "no platform data defined\n");
-		return -EINVAL;
-	}
+	struct mtd_part_parser_data ppdata = {};
 
 	flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL);
 	if (!flctl) {
@@ -904,6 +1129,17 @@
 		goto err_flste;
 	}
 
+	if (pdev->dev.of_node)
+		pdata = flctl_parse_dt(&pdev->dev);
+	else
+		pdata = pdev->dev.platform_data;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "no setup data defined\n");
+		ret = -EINVAL;
+		goto err_pdata;
+	}
+
 	platform_set_drvdata(pdev, flctl);
 	flctl_mtd = &flctl->mtd;
 	nand = &flctl->chip;
@@ -932,6 +1168,8 @@
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_resume(&pdev->dev);
 
+	flctl_setup_dma(flctl);
+
 	ret = nand_scan_ident(flctl_mtd, 1, NULL);
 	if (ret)
 		goto err_chip;
@@ -944,12 +1182,16 @@
 	if (ret)
 		goto err_chip;
 
-	mtd_device_register(flctl_mtd, pdata->parts, pdata->nr_parts);
+	ppdata.of_node = pdev->dev.of_node;
+	ret = mtd_device_parse_register(flctl_mtd, NULL, &ppdata, pdata->parts,
+			pdata->nr_parts);
 
 	return 0;
 
 err_chip:
+	flctl_release_dma(flctl);
 	pm_runtime_disable(&pdev->dev);
+err_pdata:
 	free_irq(irq, flctl);
 err_flste:
 	iounmap(flctl->reg);
@@ -958,10 +1200,11 @@
 	return ret;
 }
 
-static int __devexit flctl_remove(struct platform_device *pdev)
+static int flctl_remove(struct platform_device *pdev)
 {
 	struct sh_flctl *flctl = platform_get_drvdata(pdev);
 
+	flctl_release_dma(flctl);
 	nand_release(&flctl->mtd);
 	pm_runtime_disable(&pdev->dev);
 	free_irq(platform_get_irq(pdev, 0), flctl);
@@ -976,6 +1219,7 @@
 	.driver = {
 		.name	= "sh_flctl",
 		.owner	= THIS_MODULE,
+		.of_match_table = of_flctl_match,
 	},
 };
 

diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c
index 3421e37..127bc42 100644
--- a/drivers/mtd/nand/sharpsl.c
+++ b/drivers/mtd/nand/sharpsl.c

@@ -106,7 +106,7 @@
 /*
  * Main initialization routine
  */
-static int __devinit sharpsl_nand_probe(struct platform_device *pdev)
+static int sharpsl_nand_probe(struct platform_device *pdev)
 {
 	struct nand_chip *this;
 	struct resource *r;
@@ -205,7 +205,7 @@
 /*
  * Clean up routine
  */
-static int __devexit sharpsl_nand_remove(struct platform_device *pdev)
+static int sharpsl_nand_remove(struct platform_device *pdev)
 {
 	struct sharpsl_nand *sharpsl = platform_get_drvdata(pdev);
 
@@ -228,7 +228,7 @@
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sharpsl_nand_probe,
-	.remove		= __devexit_p(sharpsl_nand_remove),
+	.remove		= sharpsl_nand_remove,
 };
 
 module_platform_driver(sharpsl_nand_driver);

diff --git a/drivers/mtd/nand/socrates_nand.c b/drivers/mtd/nand/socrates_nand.c
index f3f28fa..09dde7d 100644
--- a/drivers/mtd/nand/socrates_nand.c
+++ b/drivers/mtd/nand/socrates_nand.c

@@ -140,7 +140,7 @@
 /*
  * Probe for the NAND device.
  */
-static int __devinit socrates_nand_probe(struct platform_device *ofdev)
+static int socrates_nand_probe(struct platform_device *ofdev)
 {
 	struct socrates_nand_host *host;
 	struct mtd_info *mtd;
@@ -220,7 +220,7 @@
 /*
  * Remove a NAND device.
  */
-static int __devexit socrates_nand_remove(struct platform_device *ofdev)
+static int socrates_nand_remove(struct platform_device *ofdev)
 {
 	struct socrates_nand_host *host = dev_get_drvdata(&ofdev->dev);
 	struct mtd_info *mtd = &host->mtd;
@@ -251,7 +251,7 @@
 		.of_match_table = socrates_nand_match,
 	},
 	.probe		= socrates_nand_probe,
-	.remove		= __devexit_p(socrates_nand_remove),
+	.remove		= socrates_nand_remove,
 };
 
 module_platform_driver(socrates_nand_driver);

diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index d9127e2..dbd3aa5 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c

@@ -71,7 +71,10 @@
 		(*pparts)[i].name = (char *)partname;
 
 		if (of_get_property(pp, "read-only", &len))
-			(*pparts)[i].mask_flags = MTD_WRITEABLE;
+			(*pparts)[i].mask_flags |= MTD_WRITEABLE;
+
+		if (of_get_property(pp, "lock", &len))
+			(*pparts)[i].mask_flags |= MTD_POWERUP_LOCK;
 
 		i++;
 	}

diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index 1c4f97c..9f11562 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c

@@ -35,7 +35,7 @@
 	struct onenand_chip	onenand;
 };
 
-static int __devinit generic_onenand_probe(struct platform_device *pdev)
+static int generic_onenand_probe(struct platform_device *pdev)
 {
 	struct onenand_info *info;
 	struct onenand_platform_data *pdata = pdev->dev.platform_data;
@@ -88,7 +88,7 @@
 	return err;
 }
 
-static int __devexit generic_onenand_remove(struct platform_device *pdev)
+static int generic_onenand_remove(struct platform_device *pdev)
 {
 	struct onenand_info *info = platform_get_drvdata(pdev);
 	struct resource *res = pdev->resource;
@@ -112,7 +112,7 @@
 		.owner		= THIS_MODULE,
 	},
 	.probe		= generic_onenand_probe,
-	.remove		= __devexit_p(generic_onenand_remove),
+	.remove		= generic_onenand_remove,
 };
 
 module_platform_driver(generic_onenand_driver);

diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 00cd3da..065f3fe 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c

@@ -630,7 +630,7 @@
 	return ret;
 }
 
-static int __devinit omap2_onenand_probe(struct platform_device *pdev)
+static int omap2_onenand_probe(struct platform_device *pdev)
 {
 	struct omap_onenand_platform_data *pdata;
 	struct omap2_onenand *c;
@@ -799,7 +799,7 @@
 	return r;
 }
 
-static int __devexit omap2_onenand_remove(struct platform_device *pdev)
+static int omap2_onenand_remove(struct platform_device *pdev)
 {
 	struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
 
@@ -822,7 +822,7 @@
 
 static struct platform_driver omap2_onenand_driver = {
 	.probe		= omap2_onenand_probe,
-	.remove		= __devexit_p(omap2_onenand_remove),
+	.remove		= omap2_onenand_remove,
 	.shutdown	= omap2_onenand_shutdown,
 	.driver		= {
 		.name	= DRIVER_NAME,

diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c
index 8e4b3f2..33f2a8f 100644
--- a/drivers/mtd/onenand/samsung.c
+++ b/drivers/mtd/onenand/samsung.c

@@ -1053,7 +1053,7 @@
 	return err;
 }
 
-static int __devexit s3c_onenand_remove(struct platform_device *pdev)
+static int s3c_onenand_remove(struct platform_device *pdev)
 {
 	struct mtd_info *mtd = platform_get_drvdata(pdev);
 
@@ -1130,7 +1130,7 @@
 	},
 	.id_table	= s3c_onenand_driver_ids,
 	.probe          = s3c_onenand_probe,
-	.remove         = __devexit_p(s3c_onenand_remove),
+	.remove         = s3c_onenand_remove,
 };
 
 module_platform_driver(s3c_onenand_driver);

diff --git a/drivers/mtd/tests/mtd_nandbiterrs.c b/drivers/mtd/tests/mtd_nandbiterrs.c
index cc8d62c..207bf9a 100644
--- a/drivers/mtd/tests/mtd_nandbiterrs.c
+++ b/drivers/mtd/tests/mtd_nandbiterrs.c

@@ -39,6 +39,9 @@
  * this program; see the file COPYING. If not, write to the Free Software
  * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -47,8 +50,6 @@
 #include <linux/mtd/nand.h>
 #include <linux/slab.h>
 
-#define msg(FMT, VA...) pr_info("mtd_nandbiterrs: "FMT, ##VA)
-
 static int dev;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -103,7 +104,7 @@
 	struct erase_info ei;
 	loff_t addr = eraseblock * mtd->erasesize;
 
-	msg("erase_block\n");
+	pr_info("erase_block\n");
 
 	memset(&ei, 0, sizeof(struct erase_info));
 	ei.mtd  = mtd;
@@ -112,7 +113,7 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (err || ei.state == MTD_ERASE_FAILED) {
-		msg("error %d while erasing\n", err);
+		pr_err("error %d while erasing\n", err);
 		if (!err)
 			err = -EIO;
 		return err;
@@ -128,11 +129,11 @@
 	size_t written;
 
 	if (log)
-		msg("write_page\n");
+		pr_info("write_page\n");
 
 	err = mtd_write(mtd, offset, mtd->writesize, &written, wbuffer);
 	if (err || written != mtd->writesize) {
-		msg("error: write failed at %#llx\n", (long long)offset);
+		pr_err("error: write failed at %#llx\n", (long long)offset);
 		if (!err)
 			err = -EIO;
 	}
@@ -147,7 +148,7 @@
 	struct mtd_oob_ops ops;
 
 	if (log)
-		msg("rewrite page\n");
+		pr_info("rewrite page\n");
 
 	ops.mode      = MTD_OPS_RAW; /* No ECC */
 	ops.len       = mtd->writesize;
@@ -160,7 +161,7 @@
 
 	err = mtd_write_oob(mtd, offset, &ops);
 	if (err || ops.retlen != mtd->writesize) {
-		msg("error: write_oob failed (%d)\n", err);
+		pr_err("error: write_oob failed (%d)\n", err);
 		if (!err)
 			err = -EIO;
 	}
@@ -177,7 +178,7 @@
 	struct mtd_ecc_stats oldstats;
 
 	if (log)
-		msg("read_page\n");
+		pr_info("read_page\n");
 
 	/* Saving last mtd stats */
 	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
@@ -187,7 +188,7 @@
 		err = mtd->ecc_stats.corrected - oldstats.corrected;
 
 	if (err < 0 || read != mtd->writesize) {
-		msg("error: read failed at %#llx\n", (long long)offset);
+		pr_err("error: read failed at %#llx\n", (long long)offset);
 		if (err >= 0)
 			err = -EIO;
 	}
@@ -201,11 +202,11 @@
 	unsigned i, errs = 0;
 
 	if (log)
-		msg("verify_page\n");
+		pr_info("verify_page\n");
 
 	for (i = 0; i < mtd->writesize; i++) {
 		if (rbuffer[i] != hash(i+seed)) {
-			msg("Error: page offset %u, expected %02x, got %02x\n",
+			pr_err("Error: page offset %u, expected %02x, got %02x\n",
 				i, hash(i+seed), rbuffer[i]);
 			errs++;
 		}
@@ -230,13 +231,13 @@
 		for (bit = 7; bit >= 0; bit--) {
 			if (CBIT(wbuffer[byte], bit)) {
 				BCLR(wbuffer[byte], bit);
-				msg("Inserted biterror @ %u/%u\n", byte, bit);
+				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
 				return 0;
 			}
 		}
 		byte++;
 	}
-	msg("biterror: Failed to find a '1' bit\n");
+	pr_err("biterror: Failed to find a '1' bit\n");
 	return -EIO;
 }
 
@@ -248,7 +249,7 @@
 	unsigned i;
 	unsigned errs_per_subpage = 0;
 
-	msg("incremental biterrors test\n");
+	pr_info("incremental biterrors test\n");
 
 	for (i = 0; i < mtd->writesize; i++)
 		wbuffer[i] = hash(i+seed);
@@ -265,9 +266,9 @@
 
 		err = read_page(1);
 		if (err > 0)
-			msg("Read reported %d corrected bit errors\n", err);
+			pr_info("Read reported %d corrected bit errors\n", err);
 		if (err < 0) {
-			msg("After %d biterrors per subpage, read reported error %d\n",
+			pr_err("After %d biterrors per subpage, read reported error %d\n",
 				errs_per_subpage, err);
 			err = 0;
 			goto exit;
@@ -275,11 +276,11 @@
 
 		err = verify_page(1);
 		if (err) {
-			msg("ECC failure, read data is incorrect despite read success\n");
+			pr_err("ECC failure, read data is incorrect despite read success\n");
 			goto exit;
 		}
 
-		msg("Successfully corrected %d bit errors per subpage\n",
+		pr_info("Successfully corrected %d bit errors per subpage\n",
 			errs_per_subpage);
 
 		for (i = 0; i < subcount; i++) {
@@ -311,7 +312,7 @@
 
 	memset(bitstats, 0, sizeof(bitstats));
 
-	msg("overwrite biterrors test\n");
+	pr_info("overwrite biterrors test\n");
 
 	for (i = 0; i < mtd->writesize; i++)
 		wbuffer[i] = hash(i+seed);
@@ -329,18 +330,18 @@
 		err = read_page(0);
 		if (err >= 0) {
 			if (err >= MAXBITS) {
-				msg("Implausible number of bit errors corrected\n");
+				pr_info("Implausible number of bit errors corrected\n");
 				err = -EIO;
 				break;
 			}
 			bitstats[err]++;
 			if (err > max_corrected) {
 				max_corrected = err;
-				msg("Read reported %d corrected bit errors\n",
+				pr_info("Read reported %d corrected bit errors\n",
 					err);
 			}
 		} else { /* err < 0 */
-			msg("Read reported error %d\n", err);
+			pr_info("Read reported error %d\n", err);
 			err = 0;
 			break;
 		}
@@ -348,7 +349,7 @@
 		err = verify_page(0);
 		if (err) {
 			bitstats[max_corrected] = opno;
-			msg("ECC failure, read data is incorrect despite read success\n");
+			pr_info("ECC failure, read data is incorrect despite read success\n");
 			break;
 		}
 
@@ -357,9 +358,9 @@
 
 	/* At this point bitstats[0] contains the number of ops with no bit
 	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
-	msg("Bit error histogram (%d operations total):\n", opno);
+	pr_info("Bit error histogram (%d operations total):\n", opno);
 	for (i = 0; i < max_corrected; i++)
-		msg("Page reads with %3d corrected bit errors: %d\n",
+		pr_info("Page reads with %3d corrected bit errors: %d\n",
 			i, bitstats[i]);
 
 exit:
@@ -370,36 +371,36 @@
 {
 	int err = 0;
 
-	msg("\n");
-	msg("==================================================\n");
-	msg("MTD device: %d\n", dev);
+	printk("\n");
+	printk(KERN_INFO "==================================================\n");
+	pr_info("MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		msg("error: cannot get MTD device\n");
+		pr_err("error: cannot get MTD device\n");
 		goto exit_mtddev;
 	}
 
 	if (mtd->type != MTD_NANDFLASH) {
-		msg("this test requires NAND flash\n");
+		pr_info("this test requires NAND flash\n");
 		err = -ENODEV;
 		goto exit_nand;
 	}
 
-	msg("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
+	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
 		(unsigned long long)mtd->size, mtd->erasesize,
 		mtd->writesize, mtd->oobsize);
 
 	subsize  = mtd->writesize >> mtd->subpage_sft;
 	subcount = mtd->writesize / subsize;
 
-	msg("Device uses %d subpages of %d bytes\n", subcount, subsize);
+	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
 
 	offset     = page_offset * mtd->writesize;
 	eraseblock = mtd_div_by_eb(offset, mtd);
 
-	msg("Using page=%u, offset=%llu, eraseblock=%u\n",
+	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
 		page_offset, offset, eraseblock);
 
 	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
@@ -432,8 +433,8 @@
 		goto exit_error;
 
 	err = -EIO;
-	msg("finished successfully.\n");
-	msg("==================================================\n");
+	pr_info("finished successfully.\n");
+	printk(KERN_INFO "==================================================\n");
 
 exit_error:
 	kfree(rbuffer);

diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c
index b437fa4..1eee264 100644
--- a/drivers/mtd/tests/mtd_nandecctest.c
+++ b/drivers/mtd/tests/mtd_nandecctest.c

@@ -1,3 +1,5 @@
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/list.h>
@@ -264,13 +266,13 @@
 						correct_data, size);
 
 		if (err) {
-			pr_err("mtd_nandecctest: not ok - %s-%zd\n",
+			pr_err("not ok - %s-%zd\n",
 				nand_ecc_test[i].name, size);
 			dump_data_ecc(error_data, error_ecc,
 				correct_data, correct_ecc, size);
 			break;
 		}
-		pr_info("mtd_nandecctest: ok - %s-%zd\n",
+		pr_info("ok - %s-%zd\n",
 			nand_ecc_test[i].name, size);
 	}
 error:

diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index ed9b628..e827fa8 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c

@@ -19,6 +19,8 @@
  * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <asm/div64.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -28,8 +30,6 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-#define PRINT_PREF KERN_INFO "mtd_oobtest: "
-
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -80,13 +80,12 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (err) {
-		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
+		pr_err("error %d while erasing EB %d\n", err, ebnum);
 		return err;
 	}
 
 	if (ei.state == MTD_ERASE_FAILED) {
-		printk(PRINT_PREF "some erase error occurred at EB %d\n",
-		       ebnum);
+		pr_err("some erase error occurred at EB %d\n", ebnum);
 		return -EIO;
 	}
 
@@ -98,7 +97,7 @@
 	int err;
 	unsigned int i;
 
-	printk(PRINT_PREF "erasing whole device\n");
+	pr_info("erasing whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -107,7 +106,7 @@
 			return err;
 		cond_resched();
 	}
-	printk(PRINT_PREF "erased %u eraseblocks\n", i);
+	pr_info("erased %u eraseblocks\n", i);
 	return 0;
 }
 
@@ -141,9 +140,9 @@
 		ops.oobbuf    = writebuf;
 		err = mtd_write_oob(mtd, addr, &ops);
 		if (err || ops.oobretlen != use_len) {
-			printk(PRINT_PREF "error: writeoob failed at %#llx\n",
+			pr_err("error: writeoob failed at %#llx\n",
 			       (long long)addr);
-			printk(PRINT_PREF "error: use_len %d, use_offset %d\n",
+			pr_err("error: use_len %d, use_offset %d\n",
 			       use_len, use_offset);
 			errcnt += 1;
 			return err ? err : -1;
@@ -160,7 +159,7 @@
 	int err;
 	unsigned int i;
 
-	printk(PRINT_PREF "writing OOBs of whole device\n");
+	pr_info("writing OOBs of whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -168,10 +167,10 @@
 		if (err)
 			return err;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "written up to eraseblock %u\n", i);
+			pr_info("written up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "written %u eraseblocks\n", i);
+	pr_info("written %u eraseblocks\n", i);
 	return 0;
 }
 
@@ -194,17 +193,17 @@
 		ops.oobbuf    = readbuf;
 		err = mtd_read_oob(mtd, addr, &ops);
 		if (err || ops.oobretlen != use_len) {
-			printk(PRINT_PREF "error: readoob failed at %#llx\n",
+			pr_err("error: readoob failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
 			return err ? err : -1;
 		}
 		if (memcmp(readbuf, writebuf, use_len)) {
-			printk(PRINT_PREF "error: verify failed at %#llx\n",
+			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
 			if (errcnt > 1000) {
-				printk(PRINT_PREF "error: too many errors\n");
+				pr_err("error: too many errors\n");
 				return -1;
 			}
 		}
@@ -221,29 +220,28 @@
 			ops.oobbuf    = readbuf;
 			err = mtd_read_oob(mtd, addr, &ops);
 			if (err || ops.oobretlen != mtd->ecclayout->oobavail) {
-				printk(PRINT_PREF "error: readoob failed at "
-				       "%#llx\n", (long long)addr);
+				pr_err("error: readoob failed at %#llx\n",
+						(long long)addr);
 				errcnt += 1;
 				return err ? err : -1;
 			}
 			if (memcmp(readbuf + use_offset, writebuf, use_len)) {
-				printk(PRINT_PREF "error: verify failed at "
-				       "%#llx\n", (long long)addr);
+				pr_err("error: verify failed at %#llx\n",
+						(long long)addr);
 				errcnt += 1;
 				if (errcnt > 1000) {
-					printk(PRINT_PREF "error: too many "
-					       "errors\n");
+					pr_err("error: too many errors\n");
 					return -1;
 				}
 			}
 			for (k = 0; k < use_offset; ++k)
 				if (readbuf[k] != 0xff) {
-					printk(PRINT_PREF "error: verify 0xff "
+					pr_err("error: verify 0xff "
 					       "failed at %#llx\n",
 					       (long long)addr);
 					errcnt += 1;
 					if (errcnt > 1000) {
-						printk(PRINT_PREF "error: too "
+						pr_err("error: too "
 						       "many errors\n");
 						return -1;
 					}
@@ -251,12 +249,12 @@
 			for (k = use_offset + use_len;
 			     k < mtd->ecclayout->oobavail; ++k)
 				if (readbuf[k] != 0xff) {
-					printk(PRINT_PREF "error: verify 0xff "
+					pr_err("error: verify 0xff "
 					       "failed at %#llx\n",
 					       (long long)addr);
 					errcnt += 1;
 					if (errcnt > 1000) {
-						printk(PRINT_PREF "error: too "
+						pr_err("error: too "
 						       "many errors\n");
 						return -1;
 					}
@@ -286,17 +284,17 @@
 	ops.oobbuf    = readbuf;
 	err = mtd_read_oob(mtd, addr, &ops);
 	if (err || ops.oobretlen != len) {
-		printk(PRINT_PREF "error: readoob failed at %#llx\n",
+		pr_err("error: readoob failed at %#llx\n",
 		       (long long)addr);
 		errcnt += 1;
 		return err ? err : -1;
 	}
 	if (memcmp(readbuf, writebuf, len)) {
-		printk(PRINT_PREF "error: verify failed at %#llx\n",
+		pr_err("error: verify failed at %#llx\n",
 		       (long long)addr);
 		errcnt += 1;
 		if (errcnt > 1000) {
-			printk(PRINT_PREF "error: too many errors\n");
+			pr_err("error: too many errors\n");
 			return -1;
 		}
 	}
@@ -309,7 +307,7 @@
 	int err;
 	unsigned int i;
 
-	printk(PRINT_PREF "verifying all eraseblocks\n");
+	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -317,10 +315,10 @@
 		if (err)
 			return err;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "verified up to eraseblock %u\n", i);
+			pr_info("verified up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "verified %u eraseblocks\n", i);
+	pr_info("verified %u eraseblocks\n", i);
 	return 0;
 }
 
@@ -331,7 +329,7 @@
 
 	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
-		printk(PRINT_PREF "block %d is bad\n", ebnum);
+		pr_info("block %d is bad\n", ebnum);
 	return ret;
 }
 
@@ -341,18 +339,18 @@
 
 	bbt = kmalloc(ebcnt, GFP_KERNEL);
 	if (!bbt) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		return -ENOMEM;
 	}
 
-	printk(PRINT_PREF "scanning for bad eraseblocks\n");
+	pr_info("scanning for bad eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		bbt[i] = is_block_bad(i) ? 1 : 0;
 		if (bbt[i])
 			bad += 1;
 		cond_resched();
 	}
-	printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
+	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
 	return 0;
 }
 
@@ -368,22 +366,22 @@
 	printk(KERN_INFO "=================================================\n");
 
 	if (dev < 0) {
-		printk(PRINT_PREF "Please specify a valid mtd-device via module paramter\n");
-		printk(KERN_CRIT "CAREFUL: This test wipes all data on the specified MTD device!\n");
+		pr_info("Please specify a valid mtd-device via module parameter\n");
+		pr_crit("CAREFUL: This test wipes all data on the specified MTD device!\n");
 		return -EINVAL;
 	}
 
-	printk(PRINT_PREF "MTD device: %d\n", dev);
+	pr_info("MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		printk(PRINT_PREF "error: cannot get MTD device\n");
+		pr_err("error: cannot get MTD device\n");
 		return err;
 	}
 
 	if (mtd->type != MTD_NANDFLASH) {
-		printk(PRINT_PREF "this test requires NAND flash\n");
+		pr_info("this test requires NAND flash\n");
 		goto out;
 	}
 
@@ -392,7 +390,7 @@
 	ebcnt = tmp;
 	pgcnt = mtd->erasesize / mtd->writesize;
 
-	printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
+	pr_info("MTD device size %llu, eraseblock size %u, "
 	       "page size %u, count of eraseblocks %u, pages per "
 	       "eraseblock %u, OOB size %u\n",
 	       (unsigned long long)mtd->size, mtd->erasesize,
@@ -401,12 +399,12 @@
 	err = -ENOMEM;
 	readbuf = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!readbuf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 	writebuf = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!writebuf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 
@@ -420,7 +418,7 @@
 	vary_offset = 0;
 
 	/* First test: write all OOB, read it back and verify */
-	printk(PRINT_PREF "test 1 of 5\n");
+	pr_info("test 1 of 5\n");
 
 	err = erase_whole_device();
 	if (err)
@@ -440,7 +438,7 @@
 	 * Second test: write all OOB, a block at a time, read it back and
 	 * verify.
 	 */
-	printk(PRINT_PREF "test 2 of 5\n");
+	pr_info("test 2 of 5\n");
 
 	err = erase_whole_device();
 	if (err)
@@ -453,7 +451,7 @@
 
 	/* Check all eraseblocks */
 	simple_srand(3);
-	printk(PRINT_PREF "verifying all eraseblocks\n");
+	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -461,16 +459,16 @@
 		if (err)
 			goto out;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "verified up to eraseblock %u\n", i);
+			pr_info("verified up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "verified %u eraseblocks\n", i);
+	pr_info("verified %u eraseblocks\n", i);
 
 	/*
 	 * Third test: write OOB at varying offsets and lengths, read it back
 	 * and verify.
 	 */
-	printk(PRINT_PREF "test 3 of 5\n");
+	pr_info("test 3 of 5\n");
 
 	err = erase_whole_device();
 	if (err)
@@ -503,7 +501,7 @@
 	vary_offset = 0;
 
 	/* Fourth test: try to write off end of device */
-	printk(PRINT_PREF "test 4 of 5\n");
+	pr_info("test 4 of 5\n");
 
 	err = erase_whole_device();
 	if (err)
@@ -522,14 +520,14 @@
 	ops.ooboffs   = mtd->ecclayout->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = writebuf;
-	printk(PRINT_PREF "attempting to start write past end of OOB\n");
-	printk(PRINT_PREF "an error is expected...\n");
+	pr_info("attempting to start write past end of OOB\n");
+	pr_info("an error is expected...\n");
 	err = mtd_write_oob(mtd, addr0, &ops);
 	if (err) {
-		printk(PRINT_PREF "error occurred as expected\n");
+		pr_info("error occurred as expected\n");
 		err = 0;
 	} else {
-		printk(PRINT_PREF "error: can write past end of OOB\n");
+		pr_err("error: can write past end of OOB\n");
 		errcnt += 1;
 	}
 
@@ -542,19 +540,19 @@
 	ops.ooboffs   = mtd->ecclayout->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
-	printk(PRINT_PREF "attempting to start read past end of OOB\n");
-	printk(PRINT_PREF "an error is expected...\n");
+	pr_info("attempting to start read past end of OOB\n");
+	pr_info("an error is expected...\n");
 	err = mtd_read_oob(mtd, addr0, &ops);
 	if (err) {
-		printk(PRINT_PREF "error occurred as expected\n");
+		pr_info("error occurred as expected\n");
 		err = 0;
 	} else {
-		printk(PRINT_PREF "error: can read past end of OOB\n");
+		pr_err("error: can read past end of OOB\n");
 		errcnt += 1;
 	}
 
 	if (bbt[ebcnt - 1])
-		printk(PRINT_PREF "skipping end of device tests because last "
+		pr_info("skipping end of device tests because last "
 		       "block is bad\n");
 	else {
 		/* Attempt to write off end of device */
@@ -566,14 +564,14 @@
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = writebuf;
-		printk(PRINT_PREF "attempting to write past end of device\n");
-		printk(PRINT_PREF "an error is expected...\n");
+		pr_info("attempting to write past end of device\n");
+		pr_info("an error is expected...\n");
 		err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
-			printk(PRINT_PREF "error occurred as expected\n");
+			pr_info("error occurred as expected\n");
 			err = 0;
 		} else {
-			printk(PRINT_PREF "error: wrote past end of device\n");
+			pr_err("error: wrote past end of device\n");
 			errcnt += 1;
 		}
 
@@ -586,14 +584,14 @@
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = readbuf;
-		printk(PRINT_PREF "attempting to read past end of device\n");
-		printk(PRINT_PREF "an error is expected...\n");
+		pr_info("attempting to read past end of device\n");
+		pr_info("an error is expected...\n");
 		err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
-			printk(PRINT_PREF "error occurred as expected\n");
+			pr_info("error occurred as expected\n");
 			err = 0;
 		} else {
-			printk(PRINT_PREF "error: read past end of device\n");
+			pr_err("error: read past end of device\n");
 			errcnt += 1;
 		}
 
@@ -610,14 +608,14 @@
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = writebuf;
-		printk(PRINT_PREF "attempting to write past end of device\n");
-		printk(PRINT_PREF "an error is expected...\n");
+		pr_info("attempting to write past end of device\n");
+		pr_info("an error is expected...\n");
 		err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
-			printk(PRINT_PREF "error occurred as expected\n");
+			pr_info("error occurred as expected\n");
 			err = 0;
 		} else {
-			printk(PRINT_PREF "error: wrote past end of device\n");
+			pr_err("error: wrote past end of device\n");
 			errcnt += 1;
 		}
 
@@ -630,20 +628,20 @@
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = readbuf;
-		printk(PRINT_PREF "attempting to read past end of device\n");
-		printk(PRINT_PREF "an error is expected...\n");
+		pr_info("attempting to read past end of device\n");
+		pr_info("an error is expected...\n");
 		err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
-			printk(PRINT_PREF "error occurred as expected\n");
+			pr_info("error occurred as expected\n");
 			err = 0;
 		} else {
-			printk(PRINT_PREF "error: read past end of device\n");
+			pr_err("error: read past end of device\n");
 			errcnt += 1;
 		}
 	}
 
 	/* Fifth test: write / read across block boundaries */
-	printk(PRINT_PREF "test 5 of 5\n");
+	pr_info("test 5 of 5\n");
 
 	/* Erase all eraseblocks */
 	err = erase_whole_device();
@@ -652,7 +650,7 @@
 
 	/* Write all eraseblocks */
 	simple_srand(11);
-	printk(PRINT_PREF "writing OOBs of whole device\n");
+	pr_info("writing OOBs of whole device\n");
 	for (i = 0; i < ebcnt - 1; ++i) {
 		int cnt = 2;
 		int pg;
@@ -674,17 +672,16 @@
 			if (err)
 				goto out;
 			if (i % 256 == 0)
-				printk(PRINT_PREF "written up to eraseblock "
-				       "%u\n", i);
+				pr_info("written up to eraseblock %u\n", i);
 			cond_resched();
 			addr += mtd->writesize;
 		}
 	}
-	printk(PRINT_PREF "written %u eraseblocks\n", i);
+	pr_info("written %u eraseblocks\n", i);
 
 	/* Check all eraseblocks */
 	simple_srand(11);
-	printk(PRINT_PREF "verifying all eraseblocks\n");
+	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt - 1; ++i) {
 		if (bbt[i] || bbt[i + 1])
 			continue;
@@ -702,28 +699,28 @@
 		if (err)
 			goto out;
 		if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) {
-			printk(PRINT_PREF "error: verify failed at %#llx\n",
+			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
 			if (errcnt > 1000) {
-				printk(PRINT_PREF "error: too many errors\n");
+				pr_err("error: too many errors\n");
 				goto out;
 			}
 		}
 		if (i % 256 == 0)
-			printk(PRINT_PREF "verified up to eraseblock %u\n", i);
+			pr_info("verified up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "verified %u eraseblocks\n", i);
+	pr_info("verified %u eraseblocks\n", i);
 
-	printk(PRINT_PREF "finished with %d errors\n", errcnt);
+	pr_info("finished with %d errors\n", errcnt);
 out:
 	kfree(bbt);
 	kfree(writebuf);
 	kfree(readbuf);
 	put_mtd_device(mtd);
 	if (err)
-		printk(PRINT_PREF "error %d occurred\n", err);
+		pr_info("error %d occurred\n", err);
 	printk(KERN_INFO "=================================================\n");
 	return err;
 }

diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 252ddb0..f93a76f 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c

@@ -19,6 +19,8 @@
  * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <asm/div64.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -28,8 +30,6 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-#define PRINT_PREF KERN_INFO "mtd_pagetest: "
-
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -79,12 +79,12 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (err) {
-		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
+		pr_err("error %d while erasing EB %d\n", err, ebnum);
 		return err;
 	}
 
 	if (ei.state == MTD_ERASE_FAILED) {
-		printk(PRINT_PREF "some erase error occurred at EB %d\n",
+		pr_err("some erase error occurred at EB %d\n",
 		       ebnum);
 		return -EIO;
 	}
@@ -102,7 +102,7 @@
 	cond_resched();
 	err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf);
 	if (err || written != mtd->erasesize)
-		printk(PRINT_PREF "error: write failed at %#llx\n",
+		pr_err("error: write failed at %#llx\n",
 		       (long long)addr);
 
 	return err;
@@ -131,7 +131,7 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)addr0);
 			return err;
 		}
@@ -139,7 +139,7 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)(addrn - bufsize));
 			return err;
 		}
@@ -148,12 +148,12 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)addr);
 			break;
 		}
 		if (memcmp(twopages, writebuf + (j * pgsize), bufsize)) {
-			printk(PRINT_PREF "error: verify failed at %#llx\n",
+			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
 		}
@@ -166,7 +166,7 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)addr0);
 			return err;
 		}
@@ -174,7 +174,7 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)(addrn - bufsize));
 			return err;
 		}
@@ -183,14 +183,14 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)addr);
 			return err;
 		}
 		memcpy(boundary, writebuf + mtd->erasesize - pgsize, pgsize);
 		set_random_data(boundary + pgsize, pgsize);
 		if (memcmp(twopages, boundary, bufsize)) {
-			printk(PRINT_PREF "error: verify failed at %#llx\n",
+			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
 		}
@@ -206,10 +206,10 @@
 	loff_t addr, addr0, addrn;
 	unsigned char *pp1, *pp2, *pp3, *pp4;
 
-	printk(PRINT_PREF "crosstest\n");
+	pr_info("crosstest\n");
 	pp1 = kmalloc(pgsize * 4, GFP_KERNEL);
 	if (!pp1) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		return -ENOMEM;
 	}
 	pp2 = pp1 + pgsize;
@@ -231,7 +231,7 @@
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr);
 		kfree(pp1);
 		return err;
@@ -243,7 +243,7 @@
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr);
 		kfree(pp1);
 		return err;
@@ -251,12 +251,12 @@
 
 	/* Read first page to pp2 */
 	addr = addr0;
-	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
+	pr_info("reading page at %#llx\n", (long long)addr);
 	err = mtd_read(mtd, addr, pgsize, &read, pp2);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr);
 		kfree(pp1);
 		return err;
@@ -264,12 +264,12 @@
 
 	/* Read last page to pp3 */
 	addr = addrn - pgsize;
-	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
+	pr_info("reading page at %#llx\n", (long long)addr);
 	err = mtd_read(mtd, addr, pgsize, &read, pp3);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr);
 		kfree(pp1);
 		return err;
@@ -277,25 +277,25 @@
 
 	/* Read first page again to pp4 */
 	addr = addr0;
-	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
+	pr_info("reading page at %#llx\n", (long long)addr);
 	err = mtd_read(mtd, addr, pgsize, &read, pp4);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr);
 		kfree(pp1);
 		return err;
 	}
 
 	/* pp2 and pp4 should be the same */
-	printk(PRINT_PREF "verifying pages read at %#llx match\n",
+	pr_info("verifying pages read at %#llx match\n",
 	       (long long)addr0);
 	if (memcmp(pp2, pp4, pgsize)) {
-		printk(PRINT_PREF "verify failed!\n");
+		pr_err("verify failed!\n");
 		errcnt += 1;
 	} else if (!err)
-		printk(PRINT_PREF "crosstest ok\n");
+		pr_info("crosstest ok\n");
 	kfree(pp1);
 	return err;
 }
@@ -307,7 +307,7 @@
 	loff_t addr0;
 	char *readbuf = twopages;
 
-	printk(PRINT_PREF "erasecrosstest\n");
+	pr_info("erasecrosstest\n");
 
 	ebnum = 0;
 	addr0 = 0;
@@ -320,79 +320,79 @@
 	while (ebnum2 && bbt[ebnum2])
 		ebnum2 -= 1;
 
-	printk(PRINT_PREF "erasing block %d\n", ebnum);
+	pr_info("erasing block %d\n", ebnum);
 	err = erase_eraseblock(ebnum);
 	if (err)
 		return err;
 
-	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
+	pr_info("writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
 	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
-		printk(PRINT_PREF "error: write failed at %#llx\n",
+		pr_info("error: write failed at %#llx\n",
 		       (long long)addr0);
 		return err ? err : -1;
 	}
 
-	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
+	pr_info("reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
 	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr0);
 		return err ? err : -1;
 	}
 
-	printk(PRINT_PREF "verifying 1st page of block %d\n", ebnum);
+	pr_info("verifying 1st page of block %d\n", ebnum);
 	if (memcmp(writebuf, readbuf, pgsize)) {
-		printk(PRINT_PREF "verify failed!\n");
+		pr_err("verify failed!\n");
 		errcnt += 1;
 		return -1;
 	}
 
-	printk(PRINT_PREF "erasing block %d\n", ebnum);
+	pr_info("erasing block %d\n", ebnum);
 	err = erase_eraseblock(ebnum);
 	if (err)
 		return err;
 
-	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
+	pr_info("writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
 	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
-		printk(PRINT_PREF "error: write failed at %#llx\n",
+		pr_err("error: write failed at %#llx\n",
 		       (long long)addr0);
 		return err ? err : -1;
 	}
 
-	printk(PRINT_PREF "erasing block %d\n", ebnum2);
+	pr_info("erasing block %d\n", ebnum2);
 	err = erase_eraseblock(ebnum2);
 	if (err)
 		return err;
 
-	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
+	pr_info("reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
 	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr0);
 		return err ? err : -1;
 	}
 
-	printk(PRINT_PREF "verifying 1st page of block %d\n", ebnum);
+	pr_info("verifying 1st page of block %d\n", ebnum);
 	if (memcmp(writebuf, readbuf, pgsize)) {
-		printk(PRINT_PREF "verify failed!\n");
+		pr_err("verify failed!\n");
 		errcnt += 1;
 		return -1;
 	}
 
 	if (!err)
-		printk(PRINT_PREF "erasecrosstest ok\n");
+		pr_info("erasecrosstest ok\n");
 	return err;
 }
 
@@ -402,7 +402,7 @@
 	int err = 0, i, ebnum, ok = 1;
 	loff_t addr0;
 
-	printk(PRINT_PREF "erasetest\n");
+	pr_info("erasetest\n");
 
 	ebnum = 0;
 	addr0 = 0;
@@ -411,40 +411,40 @@
 		ebnum += 1;
 	}
 
-	printk(PRINT_PREF "erasing block %d\n", ebnum);
+	pr_info("erasing block %d\n", ebnum);
 	err = erase_eraseblock(ebnum);
 	if (err)
 		return err;
 
-	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
+	pr_info("writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
 	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
-		printk(PRINT_PREF "error: write failed at %#llx\n",
+		pr_err("error: write failed at %#llx\n",
 		       (long long)addr0);
 		return err ? err : -1;
 	}
 
-	printk(PRINT_PREF "erasing block %d\n", ebnum);
+	pr_info("erasing block %d\n", ebnum);
 	err = erase_eraseblock(ebnum);
 	if (err)
 		return err;
 
-	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
+	pr_info("reading 1st page of block %d\n", ebnum);
 	err = mtd_read(mtd, addr0, pgsize, &read, twopages);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n",
+		pr_err("error: read failed at %#llx\n",
 		       (long long)addr0);
 		return err ? err : -1;
 	}
 
-	printk(PRINT_PREF "verifying 1st page of block %d is all 0xff\n",
+	pr_info("verifying 1st page of block %d is all 0xff\n",
 	       ebnum);
 	for (i = 0; i < pgsize; ++i)
 		if (twopages[i] != 0xff) {
-			printk(PRINT_PREF "verifying all 0xff failed at %d\n",
+			pr_err("verifying all 0xff failed at %d\n",
 			       i);
 			errcnt += 1;
 			ok = 0;
@@ -452,7 +452,7 @@
 		}
 
 	if (ok && !err)
-		printk(PRINT_PREF "erasetest ok\n");
+		pr_info("erasetest ok\n");
 
 	return err;
 }
@@ -464,7 +464,7 @@
 
 	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
-		printk(PRINT_PREF "block %d is bad\n", ebnum);
+		pr_info("block %d is bad\n", ebnum);
 	return ret;
 }
 
@@ -474,18 +474,18 @@
 
 	bbt = kzalloc(ebcnt, GFP_KERNEL);
 	if (!bbt) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		return -ENOMEM;
 	}
 
-	printk(PRINT_PREF "scanning for bad eraseblocks\n");
+	pr_info("scanning for bad eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		bbt[i] = is_block_bad(i) ? 1 : 0;
 		if (bbt[i])
 			bad += 1;
 		cond_resched();
 	}
-	printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
+	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
 	return 0;
 }
 
@@ -499,22 +499,22 @@
 	printk(KERN_INFO "=================================================\n");
 
 	if (dev < 0) {
-		printk(PRINT_PREF "Please specify a valid mtd-device via module paramter\n");
-		printk(KERN_CRIT "CAREFUL: This test wipes all data on the specified MTD device!\n");
+		pr_info("Please specify a valid mtd-device via module parameter\n");
+		pr_crit("CAREFUL: This test wipes all data on the specified MTD device!\n");
 		return -EINVAL;
 	}
 
-	printk(PRINT_PREF "MTD device: %d\n", dev);
+	pr_info("MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		printk(PRINT_PREF "error: cannot get MTD device\n");
+		pr_err("error: cannot get MTD device\n");
 		return err;
 	}
 
 	if (mtd->type != MTD_NANDFLASH) {
-		printk(PRINT_PREF "this test requires NAND flash\n");
+		pr_info("this test requires NAND flash\n");
 		goto out;
 	}
 
@@ -524,7 +524,7 @@
 	pgcnt = mtd->erasesize / mtd->writesize;
 	pgsize = mtd->writesize;
 
-	printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
+	pr_info("MTD device size %llu, eraseblock size %u, "
 	       "page size %u, count of eraseblocks %u, pages per "
 	       "eraseblock %u, OOB size %u\n",
 	       (unsigned long long)mtd->size, mtd->erasesize,
@@ -534,17 +534,17 @@
 	bufsize = pgsize * 2;
 	writebuf = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!writebuf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 	twopages = kmalloc(bufsize, GFP_KERNEL);
 	if (!twopages) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 	boundary = kmalloc(bufsize, GFP_KERNEL);
 	if (!boundary) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 
@@ -553,7 +553,7 @@
 		goto out;
 
 	/* Erase all eraseblocks */
-	printk(PRINT_PREF "erasing whole device\n");
+	pr_info("erasing whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -562,11 +562,11 @@
 			goto out;
 		cond_resched();
 	}
-	printk(PRINT_PREF "erased %u eraseblocks\n", i);
+	pr_info("erased %u eraseblocks\n", i);
 
 	/* Write all eraseblocks */
 	simple_srand(1);
-	printk(PRINT_PREF "writing whole device\n");
+	pr_info("writing whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -574,14 +574,14 @@
 		if (err)
 			goto out;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "written up to eraseblock %u\n", i);
+			pr_info("written up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "written %u eraseblocks\n", i);
+	pr_info("written %u eraseblocks\n", i);
 
 	/* Check all eraseblocks */
 	simple_srand(1);
-	printk(PRINT_PREF "verifying all eraseblocks\n");
+	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -589,10 +589,10 @@
 		if (err)
 			goto out;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "verified up to eraseblock %u\n", i);
+			pr_info("verified up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "verified %u eraseblocks\n", i);
+	pr_info("verified %u eraseblocks\n", i);
 
 	err = crosstest();
 	if (err)
@@ -606,7 +606,7 @@
 	if (err)
 		goto out;
 
-	printk(PRINT_PREF "finished with %d errors\n", errcnt);
+	pr_info("finished with %d errors\n", errcnt);
 out:
 
 	kfree(bbt);
@@ -615,7 +615,7 @@
 	kfree(writebuf);
 	put_mtd_device(mtd);
 	if (err)
-		printk(PRINT_PREF "error %d occurred\n", err);
+		pr_info("error %d occurred\n", err);
 	printk(KERN_INFO "=================================================\n");
 	return err;
 }

diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 121aba1..266de04 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c

@@ -19,6 +19,8 @@
  * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -27,8 +29,6 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-#define PRINT_PREF KERN_INFO "mtd_readtest: "
-
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -51,12 +51,12 @@
 	void *oobbuf = iobuf1;
 
 	for (i = 0; i < pgcnt; i++) {
-		memset(buf, 0 , pgcnt);
+		memset(buf, 0 , pgsize);
 		ret = mtd_read(mtd, addr, pgsize, &read, buf);
 		if (ret == -EUCLEAN)
 			ret = 0;
 		if (ret || read != pgsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)addr);
 			if (!err)
 				err = ret;
@@ -77,7 +77,7 @@
 			ret = mtd_read_oob(mtd, addr, &ops);
 			if ((ret && !mtd_is_bitflip(ret)) ||
 					ops.oobretlen != mtd->oobsize) {
-				printk(PRINT_PREF "error: read oob failed at "
+				pr_err("error: read oob failed at "
 						  "%#llx\n", (long long)addr);
 				if (!err)
 					err = ret;
@@ -99,7 +99,7 @@
 	char line[128];
 	int pg, oob;
 
-	printk(PRINT_PREF "dumping eraseblock %d\n", ebnum);
+	pr_info("dumping eraseblock %d\n", ebnum);
 	n = mtd->erasesize;
 	for (i = 0; i < n;) {
 		char *p = line;
@@ -112,7 +112,7 @@
 	}
 	if (!mtd->oobsize)
 		return;
-	printk(PRINT_PREF "dumping oob from eraseblock %d\n", ebnum);
+	pr_info("dumping oob from eraseblock %d\n", ebnum);
 	n = mtd->oobsize;
 	for (pg = 0, i = 0; pg < pgcnt; pg++)
 		for (oob = 0; oob < n;) {
@@ -134,7 +134,7 @@
 
 	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
-		printk(PRINT_PREF "block %d is bad\n", ebnum);
+		pr_info("block %d is bad\n", ebnum);
 	return ret;
 }
 
@@ -144,21 +144,21 @@
 
 	bbt = kzalloc(ebcnt, GFP_KERNEL);
 	if (!bbt) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		return -ENOMEM;
 	}
 
 	if (!mtd_can_have_bb(mtd))
 		return 0;
 
-	printk(PRINT_PREF "scanning for bad eraseblocks\n");
+	pr_info("scanning for bad eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		bbt[i] = is_block_bad(i) ? 1 : 0;
 		if (bbt[i])
 			bad += 1;
 		cond_resched();
 	}
-	printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
+	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
 	return 0;
 }
 
@@ -171,21 +171,21 @@
 	printk(KERN_INFO "=================================================\n");
 
 	if (dev < 0) {
-		printk(PRINT_PREF "Please specify a valid mtd-device via module paramter\n");
+		pr_info("Please specify a valid mtd-device via module parameter\n");
 		return -EINVAL;
 	}
 
-	printk(PRINT_PREF "MTD device: %d\n", dev);
+	pr_info("MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		printk(PRINT_PREF "error: Cannot get MTD device\n");
+		pr_err("error: Cannot get MTD device\n");
 		return err;
 	}
 
 	if (mtd->writesize == 1) {
-		printk(PRINT_PREF "not NAND flash, assume page size is 512 "
+		pr_info("not NAND flash, assume page size is 512 "
 		       "bytes.\n");
 		pgsize = 512;
 	} else
@@ -196,7 +196,7 @@
 	ebcnt = tmp;
 	pgcnt = mtd->erasesize / pgsize;
 
-	printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
+	pr_info("MTD device size %llu, eraseblock size %u, "
 	       "page size %u, count of eraseblocks %u, pages per "
 	       "eraseblock %u, OOB size %u\n",
 	       (unsigned long long)mtd->size, mtd->erasesize,
@@ -205,12 +205,12 @@
 	err = -ENOMEM;
 	iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!iobuf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 	iobuf1 = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!iobuf1) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 
@@ -219,7 +219,7 @@
 		goto out;
 
 	/* Read all eraseblocks 1 page at a time */
-	printk(PRINT_PREF "testing page read\n");
+	pr_info("testing page read\n");
 	for (i = 0; i < ebcnt; ++i) {
 		int ret;
 
@@ -235,9 +235,9 @@
 	}
 
 	if (err)
-		printk(PRINT_PREF "finished with errors\n");
+		pr_info("finished with errors\n");
 	else
-		printk(PRINT_PREF "finished\n");
+		pr_info("finished\n");
 
 out:
 
@@ -246,7 +246,7 @@
 	kfree(bbt);
 	put_mtd_device(mtd);
 	if (err)
-		printk(PRINT_PREF "error %d occurred\n", err);
+		pr_info("error %d occurred\n", err);
 	printk(KERN_INFO "=================================================\n");
 	return err;
 }

diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 42b0f74..596cbea 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c

@@ -19,6 +19,8 @@
  * Author: Adrian Hunter <adrian.hunter@nokia.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -28,8 +30,6 @@
 #include <linux/sched.h>
 #include <linux/random.h>
 
-#define PRINT_PREF KERN_INFO "mtd_speedtest: "
-
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -70,12 +70,12 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (err) {
-		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
+		pr_err("error %d while erasing EB %d\n", err, ebnum);
 		return err;
 	}
 
 	if (ei.state == MTD_ERASE_FAILED) {
-		printk(PRINT_PREF "some erase error occurred at EB %d\n",
+		pr_err("some erase error occurred at EB %d\n",
 		       ebnum);
 		return -EIO;
 	}
@@ -96,13 +96,13 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (err) {
-		printk(PRINT_PREF "error %d while erasing EB %d, blocks %d\n",
+		pr_err("error %d while erasing EB %d, blocks %d\n",
 		       err, ebnum, blocks);
 		return err;
 	}
 
 	if (ei.state == MTD_ERASE_FAILED) {
-		printk(PRINT_PREF "some erase error occurred at EB %d,"
+		pr_err("some erase error occurred at EB %d,"
 		       "blocks %d\n", ebnum, blocks);
 		return -EIO;
 	}
@@ -134,7 +134,7 @@
 
 	err = mtd_write(mtd, addr, mtd->erasesize, &written, iobuf);
 	if (err || written != mtd->erasesize) {
-		printk(PRINT_PREF "error: write failed at %#llx\n", addr);
+		pr_err("error: write failed at %#llx\n", addr);
 		if (!err)
 			err = -EINVAL;
 	}
@@ -152,7 +152,7 @@
 	for (i = 0; i < pgcnt; i++) {
 		err = mtd_write(mtd, addr, pgsize, &written, buf);
 		if (err || written != pgsize) {
-			printk(PRINT_PREF "error: write failed at %#llx\n",
+			pr_err("error: write failed at %#llx\n",
 			       addr);
 			if (!err)
 				err = -EINVAL;
@@ -175,7 +175,7 @@
 	for (i = 0; i < n; i++) {
 		err = mtd_write(mtd, addr, sz, &written, buf);
 		if (err || written != sz) {
-			printk(PRINT_PREF "error: write failed at %#llx\n",
+			pr_err("error: write failed at %#llx\n",
 			       addr);
 			if (!err)
 				err = -EINVAL;
@@ -187,7 +187,7 @@
 	if (pgcnt % 2) {
 		err = mtd_write(mtd, addr, pgsize, &written, buf);
 		if (err || written != pgsize) {
-			printk(PRINT_PREF "error: write failed at %#llx\n",
+			pr_err("error: write failed at %#llx\n",
 			       addr);
 			if (!err)
 				err = -EINVAL;
@@ -208,7 +208,7 @@
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != mtd->erasesize) {
-		printk(PRINT_PREF "error: read failed at %#llx\n", addr);
+		pr_err("error: read failed at %#llx\n", addr);
 		if (!err)
 			err = -EINVAL;
 	}
@@ -229,7 +229,7 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != pgsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       addr);
 			if (!err)
 				err = -EINVAL;
@@ -255,7 +255,7 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != sz) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       addr);
 			if (!err)
 				err = -EINVAL;
@@ -270,7 +270,7 @@
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != pgsize) {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       addr);
 			if (!err)
 				err = -EINVAL;
@@ -287,7 +287,7 @@
 
 	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
-		printk(PRINT_PREF "block %d is bad\n", ebnum);
+		pr_info("block %d is bad\n", ebnum);
 	return ret;
 }
 
@@ -321,21 +321,21 @@
 
 	bbt = kzalloc(ebcnt, GFP_KERNEL);
 	if (!bbt) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		return -ENOMEM;
 	}
 
 	if (!mtd_can_have_bb(mtd))
 		goto out;
 
-	printk(PRINT_PREF "scanning for bad eraseblocks\n");
+	pr_info("scanning for bad eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		bbt[i] = is_block_bad(i) ? 1 : 0;
 		if (bbt[i])
 			bad += 1;
 		cond_resched();
 	}
-	printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
+	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
 out:
 	goodebcnt = ebcnt - bad;
 	return 0;
@@ -351,25 +351,25 @@
 	printk(KERN_INFO "=================================================\n");
 
 	if (dev < 0) {
-		printk(PRINT_PREF "Please specify a valid mtd-device via module paramter\n");
-		printk(KERN_CRIT "CAREFUL: This test wipes all data on the specified MTD device!\n");
+		pr_info("Please specify a valid mtd-device via module parameter\n");
+		pr_crit("CAREFUL: This test wipes all data on the specified MTD device!\n");
 		return -EINVAL;
 	}
 
 	if (count)
-		printk(PRINT_PREF "MTD device: %d    count: %d\n", dev, count);
+		pr_info("MTD device: %d    count: %d\n", dev, count);
 	else
-		printk(PRINT_PREF "MTD device: %d\n", dev);
+		pr_info("MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		printk(PRINT_PREF "error: cannot get MTD device\n");
+		pr_err("error: cannot get MTD device\n");
 		return err;
 	}
 
 	if (mtd->writesize == 1) {
-		printk(PRINT_PREF "not NAND flash, assume page size is 512 "
+		pr_info("not NAND flash, assume page size is 512 "
 		       "bytes.\n");
 		pgsize = 512;
 	} else
@@ -380,7 +380,7 @@
 	ebcnt = tmp;
 	pgcnt = mtd->erasesize / pgsize;
 
-	printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
+	pr_info("MTD device size %llu, eraseblock size %u, "
 	       "page size %u, count of eraseblocks %u, pages per "
 	       "eraseblock %u, OOB size %u\n",
 	       (unsigned long long)mtd->size, mtd->erasesize,
@@ -392,7 +392,7 @@
 	err = -ENOMEM;
 	iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!iobuf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 
@@ -407,7 +407,7 @@
 		goto out;
 
 	/* Write all eraseblocks, 1 eraseblock at a time */
-	printk(PRINT_PREF "testing eraseblock write speed\n");
+	pr_info("testing eraseblock write speed\n");
 	start_timing();
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -419,10 +419,10 @@
 	}
 	stop_timing();
 	speed = calc_speed();
-	printk(PRINT_PREF "eraseblock write speed is %ld KiB/s\n", speed);
+	pr_info("eraseblock write speed is %ld KiB/s\n", speed);
 
 	/* Read all eraseblocks, 1 eraseblock at a time */
-	printk(PRINT_PREF "testing eraseblock read speed\n");
+	pr_info("testing eraseblock read speed\n");
 	start_timing();
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -434,14 +434,14 @@
 	}
 	stop_timing();
 	speed = calc_speed();
-	printk(PRINT_PREF "eraseblock read speed is %ld KiB/s\n", speed);
+	pr_info("eraseblock read speed is %ld KiB/s\n", speed);
 
 	err = erase_whole_device();
 	if (err)
 		goto out;
 
 	/* Write all eraseblocks, 1 page at a time */
-	printk(PRINT_PREF "testing page write speed\n");
+	pr_info("testing page write speed\n");
 	start_timing();
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -453,10 +453,10 @@
 	}
 	stop_timing();
 	speed = calc_speed();
-	printk(PRINT_PREF "page write speed is %ld KiB/s\n", speed);
+	pr_info("page write speed is %ld KiB/s\n", speed);
 
 	/* Read all eraseblocks, 1 page at a time */
-	printk(PRINT_PREF "testing page read speed\n");
+	pr_info("testing page read speed\n");
 	start_timing();
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -468,14 +468,14 @@
 	}
 	stop_timing();
 	speed = calc_speed();
-	printk(PRINT_PREF "page read speed is %ld KiB/s\n", speed);
+	pr_info("page read speed is %ld KiB/s\n", speed);
 
 	err = erase_whole_device();
 	if (err)
 		goto out;
 
 	/* Write all eraseblocks, 2 pages at a time */
-	printk(PRINT_PREF "testing 2 page write speed\n");
+	pr_info("testing 2 page write speed\n");
 	start_timing();
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -487,10 +487,10 @@
 	}
 	stop_timing();
 	speed = calc_speed();
-	printk(PRINT_PREF "2 page write speed is %ld KiB/s\n", speed);
+	pr_info("2 page write speed is %ld KiB/s\n", speed);
 
 	/* Read all eraseblocks, 2 pages at a time */
-	printk(PRINT_PREF "testing 2 page read speed\n");
+	pr_info("testing 2 page read speed\n");
 	start_timing();
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -502,10 +502,10 @@
 	}
 	stop_timing();
 	speed = calc_speed();
-	printk(PRINT_PREF "2 page read speed is %ld KiB/s\n", speed);
+	pr_info("2 page read speed is %ld KiB/s\n", speed);
 
 	/* Erase all eraseblocks */
-	printk(PRINT_PREF "Testing erase speed\n");
+	pr_info("Testing erase speed\n");
 	start_timing();
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -517,12 +517,12 @@
 	}
 	stop_timing();
 	speed = calc_speed();
-	printk(PRINT_PREF "erase speed is %ld KiB/s\n", speed);
+	pr_info("erase speed is %ld KiB/s\n", speed);
 
 	/* Multi-block erase all eraseblocks */
 	for (k = 1; k < 7; k++) {
 		blocks = 1 << k;
-		printk(PRINT_PREF "Testing %dx multi-block erase speed\n",
+		pr_info("Testing %dx multi-block erase speed\n",
 		       blocks);
 		start_timing();
 		for (i = 0; i < ebcnt; ) {
@@ -541,16 +541,16 @@
 		}
 		stop_timing();
 		speed = calc_speed();
-		printk(PRINT_PREF "%dx multi-block erase speed is %ld KiB/s\n",
+		pr_info("%dx multi-block erase speed is %ld KiB/s\n",
 		       blocks, speed);
 	}
-	printk(PRINT_PREF "finished\n");
+	pr_info("finished\n");
 out:
 	kfree(iobuf);
 	kfree(bbt);
 	put_mtd_device(mtd);
 	if (err)
-		printk(PRINT_PREF "error %d occurred\n", err);
+		pr_info("error %d occurred\n", err);
 	printk(KERN_INFO "=================================================\n");
 	return err;
 }

diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index cb268ce..3729f67 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c

@@ -19,6 +19,8 @@
  * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -29,8 +31,6 @@
 #include <linux/vmalloc.h>
 #include <linux/random.h>
 
-#define PRINT_PREF KERN_INFO "mtd_stresstest: "
-
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -94,12 +94,12 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (unlikely(err)) {
-		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
+		pr_err("error %d while erasing EB %d\n", err, ebnum);
 		return err;
 	}
 
 	if (unlikely(ei.state == MTD_ERASE_FAILED)) {
-		printk(PRINT_PREF "some erase error occurred at EB %d\n",
+		pr_err("some erase error occurred at EB %d\n",
 		       ebnum);
 		return -EIO;
 	}
@@ -114,7 +114,7 @@
 
 	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
-		printk(PRINT_PREF "block %d is bad\n", ebnum);
+		pr_info("block %d is bad\n", ebnum);
 	return ret;
 }
 
@@ -137,7 +137,7 @@
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (unlikely(err || read != len)) {
-		printk(PRINT_PREF "error: read failed at 0x%llx\n",
+		pr_err("error: read failed at 0x%llx\n",
 		       (long long)addr);
 		if (!err)
 			err = -EINVAL;
@@ -174,7 +174,7 @@
 	addr = eb * mtd->erasesize + offs;
 	err = mtd_write(mtd, addr, len, &written, writebuf);
 	if (unlikely(err || written != len)) {
-		printk(PRINT_PREF "error: write failed at 0x%llx\n",
+		pr_err("error: write failed at 0x%llx\n",
 		       (long long)addr);
 		if (!err)
 			err = -EINVAL;
@@ -203,21 +203,21 @@
 
 	bbt = kzalloc(ebcnt, GFP_KERNEL);
 	if (!bbt) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		return -ENOMEM;
 	}
 
 	if (!mtd_can_have_bb(mtd))
 		return 0;
 
-	printk(PRINT_PREF "scanning for bad eraseblocks\n");
+	pr_info("scanning for bad eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		bbt[i] = is_block_bad(i) ? 1 : 0;
 		if (bbt[i])
 			bad += 1;
 		cond_resched();
 	}
-	printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
+	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
 	return 0;
 }
 
@@ -231,22 +231,22 @@
 	printk(KERN_INFO "=================================================\n");
 
 	if (dev < 0) {
-		printk(PRINT_PREF "Please specify a valid mtd-device via module paramter\n");
-		printk(KERN_CRIT "CAREFUL: This test wipes all data on the specified MTD device!\n");
+		pr_info("Please specify a valid mtd-device via module parameter\n");
+		pr_crit("CAREFUL: This test wipes all data on the specified MTD device!\n");
 		return -EINVAL;
 	}
 
-	printk(PRINT_PREF "MTD device: %d\n", dev);
+	pr_info("MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		printk(PRINT_PREF "error: cannot get MTD device\n");
+		pr_err("error: cannot get MTD device\n");
 		return err;
 	}
 
 	if (mtd->writesize == 1) {
-		printk(PRINT_PREF "not NAND flash, assume page size is 512 "
+		pr_info("not NAND flash, assume page size is 512 "
 		       "bytes.\n");
 		pgsize = 512;
 	} else
@@ -257,14 +257,14 @@
 	ebcnt = tmp;
 	pgcnt = mtd->erasesize / pgsize;
 
-	printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
+	pr_info("MTD device size %llu, eraseblock size %u, "
 	       "page size %u, count of eraseblocks %u, pages per "
 	       "eraseblock %u, OOB size %u\n",
 	       (unsigned long long)mtd->size, mtd->erasesize,
 	       pgsize, ebcnt, pgcnt, mtd->oobsize);
 
 	if (ebcnt < 2) {
-		printk(PRINT_PREF "error: need at least 2 eraseblocks\n");
+		pr_err("error: need at least 2 eraseblocks\n");
 		err = -ENOSPC;
 		goto out_put_mtd;
 	}
@@ -277,7 +277,7 @@
 	writebuf = vmalloc(bufsize);
 	offsets = kmalloc(ebcnt * sizeof(int), GFP_KERNEL);
 	if (!readbuf || !writebuf || !offsets) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out;
 	}
 	for (i = 0; i < ebcnt; i++)
@@ -290,16 +290,16 @@
 		goto out;
 
 	/* Do operations */
-	printk(PRINT_PREF "doing operations\n");
+	pr_info("doing operations\n");
 	for (op = 0; op < count; op++) {
 		if ((op & 1023) == 0)
-			printk(PRINT_PREF "%d operations done\n", op);
+			pr_info("%d operations done\n", op);
 		err = do_operation();
 		if (err)
 			goto out;
 		cond_resched();
 	}
-	printk(PRINT_PREF "finished, %d operations done\n", op);
+	pr_info("finished, %d operations done\n", op);
 
 out:
 	kfree(offsets);
@@ -309,7 +309,7 @@
 out_put_mtd:
 	put_mtd_device(mtd);
 	if (err)
-		printk(PRINT_PREF "error %d occurred\n", err);
+		pr_info("error %d occurred\n", err);
 	printk(KERN_INFO "=================================================\n");
 	return err;
 }

diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index 9667bf5..c880c22 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c

@@ -19,6 +19,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -27,8 +29,6 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-#define PRINT_PREF KERN_INFO "mtd_subpagetest: "
-
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -82,12 +82,12 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (err) {
-		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
+		pr_err("error %d while erasing EB %d\n", err, ebnum);
 		return err;
 	}
 
 	if (ei.state == MTD_ERASE_FAILED) {
-		printk(PRINT_PREF "some erase error occurred at EB %d\n",
+		pr_err("some erase error occurred at EB %d\n",
 		       ebnum);
 		return -EIO;
 	}
@@ -100,7 +100,7 @@
 	int err;
 	unsigned int i;
 
-	printk(PRINT_PREF "erasing whole device\n");
+	pr_info("erasing whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -109,7 +109,7 @@
 			return err;
 		cond_resched();
 	}
-	printk(PRINT_PREF "erased %u eraseblocks\n", i);
+	pr_info("erased %u eraseblocks\n", i);
 	return 0;
 }
 
@@ -122,11 +122,11 @@
 	set_random_data(writebuf, subpgsize);
 	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
-		printk(PRINT_PREF "error: write failed at %#llx\n",
+		pr_err("error: write failed at %#llx\n",
 		       (long long)addr);
 		if (written != subpgsize) {
-			printk(PRINT_PREF "  write size: %#x\n", subpgsize);
-			printk(PRINT_PREF "  written: %#zx\n", written);
+			pr_err("  write size: %#x\n", subpgsize);
+			pr_err("  written: %#zx\n", written);
 		}
 		return err ? err : -1;
 	}
@@ -136,11 +136,11 @@
 	set_random_data(writebuf, subpgsize);
 	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
-		printk(PRINT_PREF "error: write failed at %#llx\n",
+		pr_err("error: write failed at %#llx\n",
 		       (long long)addr);
 		if (written != subpgsize) {
-			printk(PRINT_PREF "  write size: %#x\n", subpgsize);
-			printk(PRINT_PREF "  written: %#zx\n", written);
+			pr_err("  write size: %#x\n", subpgsize);
+			pr_err("  written: %#zx\n", written);
 		}
 		return err ? err : -1;
 	}
@@ -160,12 +160,12 @@
 		set_random_data(writebuf, subpgsize * k);
 		err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf);
 		if (unlikely(err || written != subpgsize * k)) {
-			printk(PRINT_PREF "error: write failed at %#llx\n",
+			pr_err("error: write failed at %#llx\n",
 			       (long long)addr);
 			if (written != subpgsize) {
-				printk(PRINT_PREF "  write size: %#x\n",
+				pr_err("  write size: %#x\n",
 				       subpgsize * k);
-				printk(PRINT_PREF "  written: %#08zx\n",
+				pr_err("  written: %#08zx\n",
 				       written);
 			}
 			return err ? err : -1;
@@ -198,23 +198,23 @@
 	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
 		if (mtd_is_bitflip(err) && read == subpgsize) {
-			printk(PRINT_PREF "ECC correction at %#llx\n",
+			pr_info("ECC correction at %#llx\n",
 			       (long long)addr);
 			err = 0;
 		} else {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)addr);
 			return err ? err : -1;
 		}
 	}
 	if (unlikely(memcmp(readbuf, writebuf, subpgsize))) {
-		printk(PRINT_PREF "error: verify failed at %#llx\n",
+		pr_err("error: verify failed at %#llx\n",
 		       (long long)addr);
-		printk(PRINT_PREF "------------- written----------------\n");
+		pr_info("------------- written----------------\n");
 		print_subpage(writebuf);
-		printk(PRINT_PREF "------------- read ------------------\n");
+		pr_info("------------- read ------------------\n");
 		print_subpage(readbuf);
-		printk(PRINT_PREF "-------------------------------------\n");
+		pr_info("-------------------------------------\n");
 		errcnt += 1;
 	}
 
@@ -225,23 +225,23 @@
 	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
 		if (mtd_is_bitflip(err) && read == subpgsize) {
-			printk(PRINT_PREF "ECC correction at %#llx\n",
+			pr_info("ECC correction at %#llx\n",
 			       (long long)addr);
 			err = 0;
 		} else {
-			printk(PRINT_PREF "error: read failed at %#llx\n",
+			pr_err("error: read failed at %#llx\n",
 			       (long long)addr);
 			return err ? err : -1;
 		}
 	}
 	if (unlikely(memcmp(readbuf, writebuf, subpgsize))) {
-		printk(PRINT_PREF "error: verify failed at %#llx\n",
+		pr_info("error: verify failed at %#llx\n",
 		       (long long)addr);
-		printk(PRINT_PREF "------------- written----------------\n");
+		pr_info("------------- written----------------\n");
 		print_subpage(writebuf);
-		printk(PRINT_PREF "------------- read ------------------\n");
+		pr_info("------------- read ------------------\n");
 		print_subpage(readbuf);
-		printk(PRINT_PREF "-------------------------------------\n");
+		pr_info("-------------------------------------\n");
 		errcnt += 1;
 	}
 
@@ -262,17 +262,17 @@
 		err = mtd_read(mtd, addr, subpgsize * k, &read, readbuf);
 		if (unlikely(err || read != subpgsize * k)) {
 			if (mtd_is_bitflip(err) && read == subpgsize * k) {
-				printk(PRINT_PREF "ECC correction at %#llx\n",
+				pr_info("ECC correction at %#llx\n",
 				       (long long)addr);
 				err = 0;
 			} else {
-				printk(PRINT_PREF "error: read failed at "
+				pr_err("error: read failed at "
 				       "%#llx\n", (long long)addr);
 				return err ? err : -1;
 			}
 		}
 		if (unlikely(memcmp(readbuf, writebuf, subpgsize * k))) {
-			printk(PRINT_PREF "error: verify failed at %#llx\n",
+			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
 		}
@@ -295,17 +295,17 @@
 		err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 		if (unlikely(err || read != subpgsize)) {
 			if (mtd_is_bitflip(err) && read == subpgsize) {
-				printk(PRINT_PREF "ECC correction at %#llx\n",
+				pr_info("ECC correction at %#llx\n",
 				       (long long)addr);
 				err = 0;
 			} else {
-				printk(PRINT_PREF "error: read failed at "
+				pr_err("error: read failed at "
 				       "%#llx\n", (long long)addr);
 				return err ? err : -1;
 			}
 		}
 		if (unlikely(memcmp(readbuf, writebuf, subpgsize))) {
-			printk(PRINT_PREF "error: verify 0xff failed at "
+			pr_err("error: verify 0xff failed at "
 			       "%#llx\n", (long long)addr);
 			errcnt += 1;
 		}
@@ -320,7 +320,7 @@
 	int err;
 	unsigned int i;
 
-	printk(PRINT_PREF "verifying all eraseblocks for 0xff\n");
+	pr_info("verifying all eraseblocks for 0xff\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -328,10 +328,10 @@
 		if (err)
 			return err;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "verified up to eraseblock %u\n", i);
+			pr_info("verified up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "verified %u eraseblocks\n", i);
+	pr_info("verified %u eraseblocks\n", i);
 	return 0;
 }
 
@@ -342,7 +342,7 @@
 
 	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
-		printk(PRINT_PREF "block %d is bad\n", ebnum);
+		pr_info("block %d is bad\n", ebnum);
 	return ret;
 }
 
@@ -352,18 +352,18 @@
 
 	bbt = kzalloc(ebcnt, GFP_KERNEL);
 	if (!bbt) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		return -ENOMEM;
 	}
 
-	printk(PRINT_PREF "scanning for bad eraseblocks\n");
+	pr_info("scanning for bad eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		bbt[i] = is_block_bad(i) ? 1 : 0;
 		if (bbt[i])
 			bad += 1;
 		cond_resched();
 	}
-	printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
+	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
 	return 0;
 }
 
@@ -377,22 +377,22 @@
 	printk(KERN_INFO "=================================================\n");
 
 	if (dev < 0) {
-		printk(PRINT_PREF "Please specify a valid mtd-device via module paramter\n");
-		printk(KERN_CRIT "CAREFUL: This test wipes all data on the specified MTD device!\n");
+		pr_info("Please specify a valid mtd-device via module parameter\n");
+		pr_crit("CAREFUL: This test wipes all data on the specified MTD device!\n");
 		return -EINVAL;
 	}
 
-	printk(PRINT_PREF "MTD device: %d\n", dev);
+	pr_info("MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		printk(PRINT_PREF "error: cannot get MTD device\n");
+		pr_err("error: cannot get MTD device\n");
 		return err;
 	}
 
 	if (mtd->type != MTD_NANDFLASH) {
-		printk(PRINT_PREF "this test requires NAND flash\n");
+		pr_info("this test requires NAND flash\n");
 		goto out;
 	}
 
@@ -402,7 +402,7 @@
 	ebcnt = tmp;
 	pgcnt = mtd->erasesize / mtd->writesize;
 
-	printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
+	pr_info("MTD device size %llu, eraseblock size %u, "
 	       "page size %u, subpage size %u, count of eraseblocks %u, "
 	       "pages per eraseblock %u, OOB size %u\n",
 	       (unsigned long long)mtd->size, mtd->erasesize,
@@ -412,12 +412,12 @@
 	bufsize = subpgsize * 32;
 	writebuf = kmalloc(bufsize, GFP_KERNEL);
 	if (!writebuf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_info("error: cannot allocate memory\n");
 		goto out;
 	}
 	readbuf = kmalloc(bufsize, GFP_KERNEL);
 	if (!readbuf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_info("error: cannot allocate memory\n");
 		goto out;
 	}
 
@@ -429,7 +429,7 @@
 	if (err)
 		goto out;
 
-	printk(PRINT_PREF "writing whole device\n");
+	pr_info("writing whole device\n");
 	simple_srand(1);
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -438,13 +438,13 @@
 		if (unlikely(err))
 			goto out;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "written up to eraseblock %u\n", i);
+			pr_info("written up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "written %u eraseblocks\n", i);
+	pr_info("written %u eraseblocks\n", i);
 
 	simple_srand(1);
-	printk(PRINT_PREF "verifying all eraseblocks\n");
+	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -452,10 +452,10 @@
 		if (unlikely(err))
 			goto out;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "verified up to eraseblock %u\n", i);
+			pr_info("verified up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "verified %u eraseblocks\n", i);
+	pr_info("verified %u eraseblocks\n", i);
 
 	err = erase_whole_device();
 	if (err)
@@ -467,7 +467,7 @@
 
 	/* Write all eraseblocks */
 	simple_srand(3);
-	printk(PRINT_PREF "writing whole device\n");
+	pr_info("writing whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -475,14 +475,14 @@
 		if (unlikely(err))
 			goto out;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "written up to eraseblock %u\n", i);
+			pr_info("written up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "written %u eraseblocks\n", i);
+	pr_info("written %u eraseblocks\n", i);
 
 	/* Check all eraseblocks */
 	simple_srand(3);
-	printk(PRINT_PREF "verifying all eraseblocks\n");
+	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -490,10 +490,10 @@
 		if (unlikely(err))
 			goto out;
 		if (i % 256 == 0)
-			printk(PRINT_PREF "verified up to eraseblock %u\n", i);
+			pr_info("verified up to eraseblock %u\n", i);
 		cond_resched();
 	}
-	printk(PRINT_PREF "verified %u eraseblocks\n", i);
+	pr_info("verified %u eraseblocks\n", i);
 
 	err = erase_whole_device();
 	if (err)
@@ -503,7 +503,7 @@
 	if (err)
 		goto out;
 
-	printk(PRINT_PREF "finished with %d errors\n", errcnt);
+	pr_info("finished with %d errors\n", errcnt);
 
 out:
 	kfree(bbt);
@@ -511,7 +511,7 @@
 	kfree(writebuf);
 	put_mtd_device(mtd);
 	if (err)
-		printk(PRINT_PREF "error %d occurred\n", err);
+		pr_info("error %d occurred\n", err);
 	printk(KERN_INFO "=================================================\n");
 	return err;
 }

diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index b65861b..c4cde1e 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c

@@ -23,6 +23,8 @@
  * damage caused by this program.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -31,7 +33,6 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
-#define PRINT_PREF KERN_INFO "mtd_torturetest: "
 #define RETRIES 3
 
 static int eb = 8;
@@ -107,12 +108,12 @@
 
 	err = mtd_erase(mtd, &ei);
 	if (err) {
-		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
+		pr_err("error %d while erasing EB %d\n", err, ebnum);
 		return err;
 	}
 
 	if (ei.state == MTD_ERASE_FAILED) {
-		printk(PRINT_PREF "some erase error occurred at EB %d\n",
+		pr_err("some erase error occurred at EB %d\n",
 		       ebnum);
 		return -EIO;
 	}
@@ -139,40 +140,40 @@
 retry:
 	err = mtd_read(mtd, addr, len, &read, check_buf);
 	if (mtd_is_bitflip(err))
-		printk(PRINT_PREF "single bit flip occurred at EB %d "
+		pr_err("single bit flip occurred at EB %d "
 		       "MTD reported that it was fixed.\n", ebnum);
 	else if (err) {
-		printk(PRINT_PREF "error %d while reading EB %d, "
+		pr_err("error %d while reading EB %d, "
 		       "read %zd\n", err, ebnum, read);
 		return err;
 	}
 
 	if (read != len) {
-		printk(PRINT_PREF "failed to read %zd bytes from EB %d, "
+		pr_err("failed to read %zd bytes from EB %d, "
 		       "read only %zd, but no error reported\n",
 		       len, ebnum, read);
 		return -EIO;
 	}
 
 	if (memcmp(buf, check_buf, len)) {
-		printk(PRINT_PREF "read wrong data from EB %d\n", ebnum);
+		pr_err("read wrong data from EB %d\n", ebnum);
 		report_corrupt(check_buf, buf);
 
 		if (retries++ < RETRIES) {
 			/* Try read again */
 			yield();
-			printk(PRINT_PREF "re-try reading data from EB %d\n",
+			pr_info("re-try reading data from EB %d\n",
 			       ebnum);
 			goto retry;
 		} else {
-			printk(PRINT_PREF "retried %d times, still errors, "
+			pr_info("retried %d times, still errors, "
 			       "give-up\n", RETRIES);
 			return -EINVAL;
 		}
 	}
 
 	if (retries != 0)
-		printk(PRINT_PREF "only attempt number %d was OK (!!!)\n",
+		pr_info("only attempt number %d was OK (!!!)\n",
 		       retries);
 
 	return 0;
@@ -191,12 +192,12 @@
 	}
 	err = mtd_write(mtd, addr, len, &written, buf);
 	if (err) {
-		printk(PRINT_PREF "error %d while writing EB %d, written %zd"
+		pr_err("error %d while writing EB %d, written %zd"
 		      " bytes\n", err, ebnum, written);
 		return err;
 	}
 	if (written != len) {
-		printk(PRINT_PREF "written only %zd bytes of %zd, but no error"
+		pr_info("written only %zd bytes of %zd, but no error"
 		       " reported\n", written, len);
 		return -EIO;
 	}
@@ -211,64 +212,64 @@
 
 	printk(KERN_INFO "\n");
 	printk(KERN_INFO "=================================================\n");
-	printk(PRINT_PREF "Warning: this program is trying to wear out your "
+	pr_info("Warning: this program is trying to wear out your "
 	       "flash, stop it if this is not wanted.\n");
 
 	if (dev < 0) {
-		printk(PRINT_PREF "Please specify a valid mtd-device via module paramter\n");
-		printk(KERN_CRIT "CAREFUL: This test wipes all data on the specified MTD device!\n");
+		pr_info("Please specify a valid mtd-device via module parameter\n");
+		pr_crit("CAREFUL: This test wipes all data on the specified MTD device!\n");
 		return -EINVAL;
 	}
 
-	printk(PRINT_PREF "MTD device: %d\n", dev);
-	printk(PRINT_PREF "torture %d eraseblocks (%d-%d) of mtd%d\n",
+	pr_info("MTD device: %d\n", dev);
+	pr_info("torture %d eraseblocks (%d-%d) of mtd%d\n",
 	       ebcnt, eb, eb + ebcnt - 1, dev);
 	if (pgcnt)
-		printk(PRINT_PREF "torturing just %d pages per eraseblock\n",
+		pr_info("torturing just %d pages per eraseblock\n",
 			pgcnt);
-	printk(PRINT_PREF "write verify %s\n", check ? "enabled" : "disabled");
+	pr_info("write verify %s\n", check ? "enabled" : "disabled");
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
 		err = PTR_ERR(mtd);
-		printk(PRINT_PREF "error: cannot get MTD device\n");
+		pr_err("error: cannot get MTD device\n");
 		return err;
 	}
 
 	if (mtd->writesize == 1) {
-		printk(PRINT_PREF "not NAND flash, assume page size is 512 "
+		pr_info("not NAND flash, assume page size is 512 "
 		       "bytes.\n");
 		pgsize = 512;
 	} else
 		pgsize = mtd->writesize;
 
 	if (pgcnt && (pgcnt > mtd->erasesize / pgsize || pgcnt < 0)) {
-		printk(PRINT_PREF "error: invalid pgcnt value %d\n", pgcnt);
+		pr_err("error: invalid pgcnt value %d\n", pgcnt);
 		goto out_mtd;
 	}
 
 	err = -ENOMEM;
 	patt_5A5 = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!patt_5A5) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out_mtd;
 	}
 
 	patt_A5A = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!patt_A5A) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out_patt_5A5;
 	}
 
 	patt_FF = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!patt_FF) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out_patt_A5A;
 	}
 
 	check_buf = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!check_buf) {
-		printk(PRINT_PREF "error: cannot allocate memory\n");
+		pr_err("error: cannot allocate memory\n");
 		goto out_patt_FF;
 	}
 
@@ -295,13 +296,13 @@
 			err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize);
 
 			if (err < 0) {
-				printk(PRINT_PREF "block_isbad() returned %d "
+				pr_info("block_isbad() returned %d "
 				       "for EB %d\n", err, i);
 				goto out;
 			}
 
 			if (err) {
-				printk("EB %d is bad. Skip it.\n", i);
+				pr_err("EB %d is bad. Skip it.\n", i);
 				bad_ebs[i - eb] = 1;
 			}
 		}
@@ -329,7 +330,7 @@
 					continue;
 				err = check_eraseblock(i, patt_FF);
 				if (err) {
-					printk(PRINT_PREF "verify failed"
+					pr_info("verify failed"
 					       " for 0xFF... pattern\n");
 					goto out;
 				}
@@ -362,7 +363,7 @@
 					patt = patt_A5A;
 				err = check_eraseblock(i, patt);
 				if (err) {
-					printk(PRINT_PREF "verify failed for %s"
+					pr_info("verify failed for %s"
 					       " pattern\n",
 					       ((eb + erase_cycles) & 1) ?
 					       "0x55AA55..." : "0xAA55AA...");
@@ -380,7 +381,7 @@
 			stop_timing();
 			ms = (finish.tv_sec - start.tv_sec) * 1000 +
 			     (finish.tv_usec - start.tv_usec) / 1000;
-			printk(PRINT_PREF "%08u erase cycles done, took %lu "
+			pr_info("%08u erase cycles done, took %lu "
 			       "milliseconds (%lu seconds)\n",
 			       erase_cycles, ms, ms / 1000);
 			start_timing();
@@ -391,7 +392,7 @@
 	}
 out:
 
-	printk(PRINT_PREF "finished after %u erase cycles\n",
+	pr_info("finished after %u erase cycles\n",
 	       erase_cycles);
 	kfree(check_buf);
 out_patt_FF:
@@ -403,7 +404,7 @@
 out_mtd:
 	put_mtd_device(mtd);
 	if (err)
-		printk(PRINT_PREF "error %d occurred during torturing\n", err);
+		pr_info("error %d occurred during torturing\n", err);
 	printk(KERN_INFO "=================================================\n");
 	return err;
 }
@@ -441,9 +442,9 @@
 			       &bits) >= 0)
 			pages++;
 
-	printk(PRINT_PREF "verify fails on %d pages, %d bytes/%d bits\n",
+	pr_info("verify fails on %d pages, %d bytes/%d bits\n",
 	       pages, bytes, bits);
-	printk(PRINT_PREF "The following is a list of all differences between"
+	pr_info("The following is a list of all differences between"
 	       " what was read from flash and what was expected\n");
 
 	for (i = 0; i < check_len; i += pgsize) {
@@ -457,7 +458,7 @@
 		printk("-------------------------------------------------------"
 		       "----------------------------------\n");
 
-		printk(PRINT_PREF "Page %zd has %d bytes/%d bits failing verify,"
+		pr_info("Page %zd has %d bytes/%d bits failing verify,"
 		       " starting at offset 0x%x\n",
 		       (mtd->erasesize - check_len + i) / pgsize,
 		       bytes, bits, first);

diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index fec406b..c071d41 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c

@@ -322,7 +322,6 @@
 int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb,
 			int pnum, const struct ubi_vid_hdr *vid_hdr)
 {
-	void *buf;
 	int len, err, second_is_newer, bitflips = 0, corrupted = 0;
 	uint32_t data_crc, crc;
 	struct ubi_vid_hdr *vh = NULL;
@@ -393,18 +392,14 @@
 	/* Read the data of the copy and check the CRC */
 
 	len = be32_to_cpu(vid_hdr->data_size);
-	buf = vmalloc(len);
-	if (!buf) {
-		err = -ENOMEM;
-		goto out_free_vidh;
-	}
 
-	err = ubi_io_read_data(ubi, buf, pnum, 0, len);
+	mutex_lock(&ubi->buf_mutex);
+	err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, len);
 	if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err))
-		goto out_free_buf;
+		goto out_unlock;
 
 	data_crc = be32_to_cpu(vid_hdr->data_crc);
-	crc = crc32(UBI_CRC32_INIT, buf, len);
+	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, len);
 	if (crc != data_crc) {
 		dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",
 			pnum, crc, data_crc);
@@ -415,8 +410,8 @@
 		dbg_bld("PEB %d CRC is OK", pnum);
 		bitflips = !!err;
 	}
+	mutex_unlock(&ubi->buf_mutex);
 
-	vfree(buf);
 	ubi_free_vid_hdr(ubi, vh);
 
 	if (second_is_newer)
@@ -426,8 +421,8 @@
 
 	return second_is_newer | (bitflips << 1) | (corrupted << 2);
 
-out_free_buf:
-	vfree(buf);
+out_unlock:
+	mutex_unlock(&ubi->buf_mutex);
 out_free_vidh:
 	ubi_free_vid_hdr(ubi, vh);
 	return err;
@@ -1453,7 +1448,7 @@
 		goto out_wl;
 
 #ifdef CONFIG_MTD_UBI_FASTMAP
-	if (ubi->fm && ubi->dbg->chk_gen) {
+	if (ubi->fm && ubi_dbg_chk_gen(ubi)) {
 		struct ubi_attach_info *scan_ai;
 
 		scan_ai = alloc_ai("ubi_ckh_aeb_slab_cache");
@@ -1503,7 +1498,7 @@
 	struct ubi_ainf_peb *aeb, *last_aeb;
 	uint8_t *buf;
 
-	if (!ubi->dbg->chk_gen)
+	if (!ubi_dbg_chk_gen(ubi))
 		return 0;
 
 	/*

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 344b4cb..a561335 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c

@@ -825,8 +825,7 @@
 		 * No available PEBs to re-size the volume, clear the flag on
 		 * flash and exit.
 		 */
-		memcpy(&vtbl_rec, &ubi->vtbl[vol_id],
-		       sizeof(struct ubi_vtbl_record));
+		vtbl_rec = ubi->vtbl[vol_id];
 		err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
 		if (err)
 			ubi_err("cannot clean auto-resize flag for volume %d",
@@ -986,14 +985,10 @@
 	if (!ubi->fm_buf)
 		goto out_free;
 #endif
-	err = ubi_debugging_init_dev(ubi);
-	if (err)
-		goto out_free;
-
 	err = ubi_attach(ubi, 0);
 	if (err) {
 		ubi_err("failed to attach mtd%d, error %d", mtd->index, err);
-		goto out_debugging;
+		goto out_free;
 	}
 
 	if (ubi->autoresize_vol_id != -1) {
@@ -1060,8 +1055,6 @@
 	ubi_wl_close(ubi);
 	ubi_free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
-out_debugging:
-	ubi_debugging_exit_dev(ubi);
 out_free:
 	vfree(ubi->peb_buf);
 	vfree(ubi->fm_buf);
@@ -1139,7 +1132,6 @@
 	ubi_free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
 	put_mtd_device(ubi->mtd);
-	ubi_debugging_exit_dev(ubi);
 	vfree(ubi->peb_buf);
 	vfree(ubi->fm_buf);
 	ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num);

diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 26908a5..63cb1d7 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c

@@ -217,32 +217,6 @@
 	pr_err("\t1st 16 characters of name: %s\n", nm);
 }
 
-/**
- * ubi_debugging_init_dev - initialize debugging for an UBI device.
- * @ubi: UBI device description object
- *
- * This function initializes debugging-related data for UBI device @ubi.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- */
-int ubi_debugging_init_dev(struct ubi_device *ubi)
-{
-	ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL);
-	if (!ubi->dbg)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/**
- * ubi_debugging_exit_dev - free debugging data for an UBI device.
- * @ubi: UBI device description object
- */
-void ubi_debugging_exit_dev(struct ubi_device *ubi)
-{
-	kfree(ubi->dbg);
-}
-
 /*
  * Root directory for UBI stuff in debugfs. Contains sub-directories which
  * contain the stuff specific to particular UBI devices.
@@ -295,7 +269,7 @@
 	ubi = ubi_get_device(ubi_num);
 	if (!ubi)
 		return -ENODEV;
-	d = ubi->dbg;
+	d = &ubi->dbg;
 
 	if (dent == d->dfs_chk_gen)
 		val = d->chk_gen;
@@ -341,7 +315,7 @@
 	ubi = ubi_get_device(ubi_num);
 	if (!ubi)
 		return -ENODEV;
-	d = ubi->dbg;
+	d = &ubi->dbg;
 
 	buf_size = min_t(size_t, count, (sizeof(buf) - 1));
 	if (copy_from_user(buf, user_buf, buf_size)) {
@@ -398,7 +372,7 @@
 	unsigned long ubi_num = ubi->ubi_num;
 	const char *fname;
 	struct dentry *dent;
-	struct ubi_debug_info *d = ubi->dbg;
+	struct ubi_debug_info *d = &ubi->dbg;
 
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
@@ -471,5 +445,5 @@
 void ubi_debugfs_exit_dev(struct ubi_device *ubi)
 {
 	if (IS_ENABLED(CONFIG_DEBUG_FS))
-		debugfs_remove_recursive(ubi->dbg->dfs_dir);
+		debugfs_remove_recursive(ubi->dbg.dfs_dir);
 }

diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 3dbc877..33f8f3b 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h

@@ -60,51 +60,11 @@
 void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req);
 int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
 			  int len);
-int ubi_debugging_init_dev(struct ubi_device *ubi);
-void ubi_debugging_exit_dev(struct ubi_device *ubi);
 int ubi_debugfs_init(void);
 void ubi_debugfs_exit(void);
 int ubi_debugfs_init_dev(struct ubi_device *ubi);
 void ubi_debugfs_exit_dev(struct ubi_device *ubi);
 
-/*
- * The UBI debugfs directory name pattern and maximum name length (3 for "ubi"
- * + 2 for the number plus 1 for the trailing zero byte.
- */
-#define UBI_DFS_DIR_NAME "ubi%d"
-#define UBI_DFS_DIR_LEN  (3 + 2 + 1)
-
-/**
- * struct ubi_debug_info - debugging information for an UBI device.
- *
- * @chk_gen: if UBI general extra checks are enabled
- * @chk_io: if UBI I/O extra checks are enabled
- * @disable_bgt: disable the background task for testing purposes
- * @emulate_bitflips: emulate bit-flips for testing purposes
- * @emulate_io_failures: emulate write/erase failures for testing purposes
- * @dfs_dir_name: name of debugfs directory containing files of this UBI device
- * @dfs_dir: direntry object of the UBI device debugfs directory
- * @dfs_chk_gen: debugfs knob to enable UBI general extra checks
- * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks
- * @dfs_disable_bgt: debugfs knob to disable the background task
- * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips
- * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures
- */
-struct ubi_debug_info {
-	unsigned int chk_gen:1;
-	unsigned int chk_io:1;
-	unsigned int disable_bgt:1;
-	unsigned int emulate_bitflips:1;
-	unsigned int emulate_io_failures:1;
-	char dfs_dir_name[UBI_DFS_DIR_LEN + 1];
-	struct dentry *dfs_dir;
-	struct dentry *dfs_chk_gen;
-	struct dentry *dfs_chk_io;
-	struct dentry *dfs_disable_bgt;
-	struct dentry *dfs_emulate_bitflips;
-	struct dentry *dfs_emulate_io_failures;
-};
-
 /**
  * ubi_dbg_is_bgt_disabled - if the background thread is disabled.
  * @ubi: UBI device description object
@@ -114,7 +74,7 @@
  */
 static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi)
 {
-	return ubi->dbg->disable_bgt;
+	return ubi->dbg.disable_bgt;
 }
 
 /**
@@ -125,7 +85,7 @@
  */
 static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi)
 {
-	if (ubi->dbg->emulate_bitflips)
+	if (ubi->dbg.emulate_bitflips)
 		return !(random32() % 200);
 	return 0;
 }
@@ -139,7 +99,7 @@
  */
 static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi)
 {
-	if (ubi->dbg->emulate_io_failures)
+	if (ubi->dbg.emulate_io_failures)
 		return !(random32() % 500);
 	return 0;
 }
@@ -153,9 +113,18 @@
  */
 static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi)
 {
-	if (ubi->dbg->emulate_io_failures)
+	if (ubi->dbg.emulate_io_failures)
 		return !(random32() % 400);
 	return 0;
 }
 
+static inline int ubi_dbg_chk_io(const struct ubi_device *ubi)
+{
+	return ubi->dbg.chk_io;
+}
+
+static inline int ubi_dbg_chk_gen(const struct ubi_device *ubi)
+{
+	return ubi->dbg.chk_gen;
+}
 #endif /* !__UBI_DEBUG_H__ */

diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index 1a5f53c..0648c69 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c

@@ -814,10 +814,8 @@
 	if (max_sqnum > ai->max_sqnum)
 		ai->max_sqnum = max_sqnum;
 
-	list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) {
-		list_del(&tmp_aeb->u.list);
-		list_add_tail(&tmp_aeb->u.list, &ai->free);
-	}
+	list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list)
+		list_move_tail(&tmp_aeb->u.list, &ai->free);
 
 	/*
 	 * If fastmap is leaking PEBs (must not happen), raise a

diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index 4bd4db8..b93807b 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c

@@ -171,17 +171,17 @@
 static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len,
 		       size_t *retlen, unsigned char *buf)
 {
-	int err = 0, lnum, offs, total_read;
+	int err = 0, lnum, offs, bytes_left;
 	struct gluebi_device *gluebi;
 
 	gluebi = container_of(mtd, struct gluebi_device, mtd);
 	lnum = div_u64_rem(from, mtd->erasesize, &offs);
-	total_read = len;
-	while (total_read) {
+	bytes_left = len;
+	while (bytes_left) {
 		size_t to_read = mtd->erasesize - offs;
 
-		if (to_read > total_read)
-			to_read = total_read;
+		if (to_read > bytes_left)
+			to_read = bytes_left;
 
 		err = ubi_read(gluebi->desc, lnum, buf, offs, to_read);
 		if (err)
@@ -189,11 +189,11 @@
 
 		lnum += 1;
 		offs = 0;
-		total_read -= to_read;
+		bytes_left -= to_read;
 		buf += to_read;
 	}
 
-	*retlen = len - total_read;
+	*retlen = len - bytes_left;
 	return err;
 }
 
@@ -211,7 +211,7 @@
 static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
 			size_t *retlen, const u_char *buf)
 {
-	int err = 0, lnum, offs, total_written;
+	int err = 0, lnum, offs, bytes_left;
 	struct gluebi_device *gluebi;
 
 	gluebi = container_of(mtd, struct gluebi_device, mtd);
@@ -220,12 +220,12 @@
 	if (len % mtd->writesize || offs % mtd->writesize)
 		return -EINVAL;
 
-	total_written = len;
-	while (total_written) {
+	bytes_left = len;
+	while (bytes_left) {
 		size_t to_write = mtd->erasesize - offs;
 
-		if (to_write > total_written)
-			to_write = total_written;
+		if (to_write > bytes_left)
+			to_write = bytes_left;
 
 		err = ubi_leb_write(gluebi->desc, lnum, buf, offs, to_write);
 		if (err)
@@ -233,11 +233,11 @@
 
 		lnum += 1;
 		offs = 0;
-		total_written -= to_write;
+		bytes_left -= to_write;
 		buf += to_write;
 	}
 
-	*retlen = len - total_written;
+	*retlen = len - bytes_left;
 	return err;
 }
 

diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 78a1dcb..bf79def 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c

@@ -1132,7 +1132,7 @@
 {
 	int err;
 
-	if (!ubi->dbg->chk_io)
+	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
 	err = ubi_io_is_bad(ubi, pnum);
@@ -1159,7 +1159,7 @@
 	int err;
 	uint32_t magic;
 
-	if (!ubi->dbg->chk_io)
+	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
 	magic = be32_to_cpu(ec_hdr->magic);
@@ -1197,7 +1197,7 @@
 	uint32_t crc, hdr_crc;
 	struct ubi_ec_hdr *ec_hdr;
 
-	if (!ubi->dbg->chk_io)
+	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
 	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
@@ -1241,7 +1241,7 @@
 	int err;
 	uint32_t magic;
 
-	if (!ubi->dbg->chk_io)
+	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
 	magic = be32_to_cpu(vid_hdr->magic);
@@ -1282,7 +1282,7 @@
 	struct ubi_vid_hdr *vid_hdr;
 	void *p;
 
-	if (!ubi->dbg->chk_io)
+	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
@@ -1334,7 +1334,7 @@
 	void *buf1;
 	loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
 
-	if (!ubi->dbg->chk_io)
+	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
 	buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
@@ -1398,7 +1398,7 @@
 	void *buf;
 	loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
 
-	if (!ubi->dbg->chk_io)
+	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
 	buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);

diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 7d57469..8ea6297 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h

@@ -85,6 +85,13 @@
 #define UBI_UNKNOWN -1
 
 /*
+ * The UBI debugfs directory name pattern and maximum name length (3 for "ubi"
+ * + 2 for the number plus 1 for the trailing zero byte.
+ */
+#define UBI_DFS_DIR_NAME "ubi%d"
+#define UBI_DFS_DIR_LEN  (3 + 2 + 1)
+
+/*
  * Error codes returned by the I/O sub-system.
  *
  * UBI_IO_FF: the read region of flash contains only 0xFFs
@@ -342,6 +349,37 @@
 struct ubi_wl_entry;
 
 /**
+ * struct ubi_debug_info - debugging information for an UBI device.
+ *
+ * @chk_gen: if UBI general extra checks are enabled
+ * @chk_io: if UBI I/O extra checks are enabled
+ * @disable_bgt: disable the background task for testing purposes
+ * @emulate_bitflips: emulate bit-flips for testing purposes
+ * @emulate_io_failures: emulate write/erase failures for testing purposes
+ * @dfs_dir_name: name of debugfs directory containing files of this UBI device
+ * @dfs_dir: direntry object of the UBI device debugfs directory
+ * @dfs_chk_gen: debugfs knob to enable UBI general extra checks
+ * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks
+ * @dfs_disable_bgt: debugfs knob to disable the background task
+ * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips
+ * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures
+ */
+struct ubi_debug_info {
+	unsigned int chk_gen:1;
+	unsigned int chk_io:1;
+	unsigned int disable_bgt:1;
+	unsigned int emulate_bitflips:1;
+	unsigned int emulate_io_failures:1;
+	char dfs_dir_name[UBI_DFS_DIR_LEN + 1];
+	struct dentry *dfs_dir;
+	struct dentry *dfs_chk_gen;
+	struct dentry *dfs_chk_io;
+	struct dentry *dfs_disable_bgt;
+	struct dentry *dfs_emulate_bitflips;
+	struct dentry *dfs_emulate_io_failures;
+};
+
+/**
  * struct ubi_device - UBI device description structure
  * @dev: UBI device object to use the the Linux device model
  * @cdev: character device object to create character device
@@ -545,7 +583,7 @@
 	struct mutex buf_mutex;
 	struct mutex ckvol_mutex;
 
-	struct ubi_debug_info *dbg;
+	struct ubi_debug_info dbg;
 };
 
 /**

diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 9f2ebd8..ec2c2dc 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c

@@ -64,8 +64,7 @@
 		return 0;
 	}
 
-	memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id],
-	       sizeof(struct ubi_vtbl_record));
+	vtbl_rec = ubi->vtbl[vol->vol_id];
 	vtbl_rec.upd_marker = 1;
 
 	mutex_lock(&ubi->device_mutex);
@@ -93,8 +92,7 @@
 
 	dbg_gen("clear update marker for volume %d", vol->vol_id);
 
-	memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id],
-	       sizeof(struct ubi_vtbl_record));
+	vtbl_rec = ubi->vtbl[vol->vol_id];
 	ubi_assert(vol->upd_marker && vtbl_rec.upd_marker);
 	vtbl_rec.upd_marker = 0;
 

diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 9169e58..8330703 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c

@@ -535,7 +535,7 @@
 	}
 
 	/* Change volume table record */
-	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
+	vtbl_rec = ubi->vtbl[vol_id];
 	vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs);
 	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
 	if (err)
@@ -847,7 +847,7 @@
 {
 	int i, err = 0;
 
-	if (!ubi->dbg->chk_gen)
+	if (!ubi_dbg_chk_gen(ubi))
 		return 0;
 
 	for (i = 0; i < ubi->vtbl_slots; i++) {

diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 926e3df..d77b1c1 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c

@@ -858,7 +858,7 @@
  */
 static void self_vtbl_check(const struct ubi_device *ubi)
 {
-	if (!ubi->dbg->chk_gen)
+	if (!ubi_dbg_chk_gen(ubi))
 		return;
 
 	if (vtbl_check(ubi, ubi->vtbl)) {

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 2144f61..5df49d3 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c

@@ -1,5 +1,4 @@
 /*
- * @ubi: UBI device description object
  * Copyright (c) International Business Machines Corp., 2006
  *
  * This program is free software; you can redistribute it and/or modify
@@ -2050,7 +2049,7 @@
 	long long read_ec;
 	struct ubi_ec_hdr *ec_hdr;
 
-	if (!ubi->dbg->chk_gen)
+	if (!ubi_dbg_chk_gen(ubi))
 		return 0;
 
 	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
@@ -2090,7 +2089,7 @@
 static int self_check_in_wl_tree(const struct ubi_device *ubi,
 				 struct ubi_wl_entry *e, struct rb_root *root)
 {
-	if (!ubi->dbg->chk_gen)
+	if (!ubi_dbg_chk_gen(ubi))
 		return 0;
 
 	if (in_wl_tree(e, root))
@@ -2116,7 +2115,7 @@
 	struct ubi_wl_entry *p;
 	int i;
 
-	if (!ubi->dbg->chk_gen)
+	if (!ubi_dbg_chk_gen(ubi))
 		return 0;
 
 	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ef2cb24..b7d45f3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c

@@ -4431,8 +4431,6 @@
 
 	list_del(&bond->bond_list);
 
-	bond_work_cancel_all(bond);
-
 	bond_debug_unregister(bond);
 
 	__hw_addr_flush(&bond->mc_list);

diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c
index 0f59170..6433b81 100644
--- a/drivers/net/can/sja1000/sja1000_of_platform.c
+++ b/drivers/net/can/sja1000/sja1000_of_platform.c

@@ -121,7 +121,7 @@
 	}
 
 	irq = irq_of_parse_and_map(np, 0);
-	if (irq == NO_IRQ) {
+	if (irq == 0) {
 		dev_err(&ofdev->dev, "no irq found\n");
 		err = -ENODEV;
 		goto exit_unmap_mem;

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index a2998be..01588b6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

@@ -1832,7 +1832,6 @@
 			bool config_hash)
 {
 	struct bnx2x_config_rss_params params = {NULL};
-	int i;
 
 	/* Although RSS is meaningless when there is a single HW queue we
 	 * still need it enabled in order to have HW Rx hash generated.
@@ -1864,9 +1863,7 @@
 
 	if (config_hash) {
 		/* RSS keys */
-		for (i = 0; i < sizeof(params.rss_key) / 4; i++)
-			params.rss_key[i] = random32();
-
+		prandom_bytes(params.rss_key, sizeof(params.rss_key));
 		__set_bit(BNX2X_RSS_SET_SRCH, &params.rss_flags);
 	}
 

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index abf26c7..3bc1912 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h

@@ -616,7 +616,7 @@
 	return adapter->eeh_error || adapter->hw_error || adapter->fw_timeout;
 }
 
-static inline bool be_crit_error(struct be_adapter *adapter)
+static inline bool be_hw_error(struct be_adapter *adapter)
 {
 	return adapter->eeh_error || adapter->hw_error;
 }

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index f2875aa..8a250c3 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c

@@ -298,7 +298,12 @@
 
 void be_async_mcc_disable(struct be_adapter *adapter)
 {
+	spin_lock_bh(&adapter->mcc_cq_lock);
+
 	adapter->mcc_obj.rearm_cq = false;
+	be_cq_notify(adapter, adapter->mcc_obj.cq.id, false, 0);
+
+	spin_unlock_bh(&adapter->mcc_cq_lock);
 }
 
 int be_process_mcc(struct be_adapter *adapter)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index f95612b..9dca22b 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -1689,15 +1689,41 @@
 	struct be_queue_info *rxq = &rxo->q;
 	struct be_queue_info *rx_cq = &rxo->cq;
 	struct be_rx_compl_info *rxcp;
+	struct be_adapter *adapter = rxo->adapter;
+	int flush_wait = 0;
 	u16 tail;
 
-	/* First cleanup pending rx completions */
-	while ((rxcp = be_rx_compl_get(rxo)) != NULL) {
-		be_rx_compl_discard(rxo, rxcp);
-		be_cq_notify(rxo->adapter, rx_cq->id, false, 1);
+	/* Consume pending rx completions.
+	 * Wait for the flush completion (identified by zero num_rcvd)
+	 * to arrive. Notify CQ even when there are no more CQ entries
+	 * for HW to flush partially coalesced CQ entries.
+	 * In Lancer, there is no need to wait for flush compl.
+	 */
+	for (;;) {
+		rxcp = be_rx_compl_get(rxo);
+		if (rxcp == NULL) {
+			if (lancer_chip(adapter))
+				break;
+
+			if (flush_wait++ > 10 || be_hw_error(adapter)) {
+				dev_warn(&adapter->pdev->dev,
+					 "did not receive flush compl\n");
+				break;
+			}
+			be_cq_notify(adapter, rx_cq->id, true, 0);
+			mdelay(1);
+		} else {
+			be_rx_compl_discard(rxo, rxcp);
+			be_cq_notify(adapter, rx_cq->id, true, 1);
+			if (rxcp->num_rcvd == 0)
+				break;
+		}
 	}
 
-	/* Then free posted rx buffer that were not used */
+	/* After cleanup, leave the CQ in unarmed state */
+	be_cq_notify(adapter, rx_cq->id, false, 0);
+
+	/* Then free posted rx buffers that were not used */
 	tail = (rxq->head + rxq->len - atomic_read(&rxq->used)) % rxq->len;
 	for (; atomic_read(&rxq->used) > 0; index_inc(&tail, rxq->len)) {
 		page_info = get_rx_page_info(rxo, tail);
@@ -2157,7 +2183,7 @@
 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
 	u32 i;
 
-	if (be_crit_error(adapter))
+	if (be_hw_error(adapter))
 		return;
 
 	if (lancer_chip(adapter)) {
@@ -2398,13 +2424,22 @@
 
 	be_roce_dev_close(adapter);
 
-	be_async_mcc_disable(adapter);
-
 	if (!lancer_chip(adapter))
 		be_intr_set(adapter, false);
 
-	for_all_evt_queues(adapter, eqo, i) {
+	for_all_evt_queues(adapter, eqo, i)
 		napi_disable(&eqo->napi);
+
+	be_async_mcc_disable(adapter);
+
+	/* Wait for all pending tx completions to arrive so that
+	 * all tx skbs are freed.
+	 */
+	be_tx_compl_clean(adapter);
+
+	be_rx_qs_destroy(adapter);
+
+	for_all_evt_queues(adapter, eqo, i) {
 		if (msix_enabled(adapter))
 			synchronize_irq(be_msix_vec_get(adapter, eqo));
 		else
@@ -2414,12 +2449,6 @@
 
 	be_irq_unregister(adapter);
 
-	/* Wait for all pending tx completions to arrive so that
-	 * all tx skbs are freed.
-	 */
-	be_tx_compl_clean(adapter);
-
-	be_rx_qs_destroy(adapter);
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 5ba6e1c..ec490d7 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig

@@ -94,9 +94,8 @@
 
 config FEC_PTP
 	bool "PTP Hardware Clock (PHC)"
-	depends on FEC && ARCH_MXC
+	depends on FEC && ARCH_MXC && !SOC_IMX25 && !SOC_IMX27 && !SOC_IMX35 && !SOC_IMX5
 	select PTP_1588_CLOCK
-	default y if SOC_IMX6Q
 	--help---
 	  Say Y here if you want to use PTP Hardware Clock (PHC) in the
 	  driver.  Only the basic clock operations have been implemented.

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h
index 8364815..99b6c2a 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h
+++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h

@@ -39,26 +39,6 @@
  * hcp_*  - structures, variables and functions releated to Hypervisor Calls
  */
 
-static inline u32 get_longbusy_msecs(int long_busy_ret_code)
-{
-	switch (long_busy_ret_code) {
-	case H_LONG_BUSY_ORDER_1_MSEC:
-		return 1;
-	case H_LONG_BUSY_ORDER_10_MSEC:
-		return 10;
-	case H_LONG_BUSY_ORDER_100_MSEC:
-		return 100;
-	case H_LONG_BUSY_ORDER_1_SEC:
-		return 1000;
-	case H_LONG_BUSY_ORDER_10_SEC:
-		return 10000;
-	case H_LONG_BUSY_ORDER_100_SEC:
-		return 100000;
-	default:
-		return 1;
-	}
-}
-
 /* Number of pages which can be registered at once by H_REGISTER_HEA_RPAGES */
 #define EHEA_MAX_RPAGE 512
 

diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 83f0ea9..8ebc352 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c

@@ -4761,7 +4761,7 @@
 	struct ksz_dma_buf *dma_buf;
 	struct net_device *dev = NULL;
 
-	spin_lock(&hw_priv->hwlock);
+	spin_lock_irq(&hw_priv->hwlock);
 	last = info->last;
 
 	while (info->avail < info->alloc) {
@@ -4795,7 +4795,7 @@
 		info->avail++;
 	}
 	info->last = last;
-	spin_unlock(&hw_priv->hwlock);
+	spin_unlock_irq(&hw_priv->hwlock);
 
 	/* Notify the network subsystem that the packet has been sent. */
 	if (dev)
@@ -5259,11 +5259,15 @@
 	struct dev_info *hw_priv = priv->adapter;
 	struct ksz_hw *hw = &hw_priv->hw;
 
+	spin_lock(&hw_priv->hwlock);
+
 	hw_read_intr(hw, &int_enable);
 
 	/* Not our interrupt! */
-	if (!int_enable)
+	if (!int_enable) {
+		spin_unlock(&hw_priv->hwlock);
 		return IRQ_NONE;
+	}
 
 	do {
 		hw_ack_intr(hw, int_enable);
@@ -5310,6 +5314,8 @@
 
 	hw_ena_intr(hw);
 
+	spin_unlock(&hw_priv->hwlock);
+
 	return IRQ_HANDLED;
 }
 

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 5379024..bc7ec64 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h

@@ -36,8 +36,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 0
-#define _QLCNIC_LINUX_SUBVERSION 29
-#define QLCNIC_LINUX_VERSIONID  "5.0.29"
+#define _QLCNIC_LINUX_SUBVERSION 30
+#define QLCNIC_LINUX_VERSIONID  "5.0.30"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 58f094c..b14b8f0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c

@@ -134,7 +134,7 @@
 	__le32 *tmp_buf;
 	struct qlcnic_cmd_args cmd;
 	struct qlcnic_hardware_context *ahw;
-	struct qlcnic_dump_template_hdr *tmpl_hdr, *tmp_tmpl;
+	struct qlcnic_dump_template_hdr *tmpl_hdr;
 	dma_addr_t tmp_addr_t = 0;
 
 	ahw = adapter->ahw;
@@ -150,6 +150,8 @@
 	}
 	temp_size = cmd.rsp.arg2;
 	version = cmd.rsp.arg3;
+	dev_info(&adapter->pdev->dev,
+		 "minidump template version = 0x%x", version);
 	if (!temp_size)
 		return -EIO;
 
@@ -174,7 +176,6 @@
 		err = -EIO;
 		goto error;
 	}
-	tmp_tmpl = tmp_addr;
 	ahw->fw_dump.tmpl_hdr = vzalloc(temp_size);
 	if (!ahw->fw_dump.tmpl_hdr) {
 		err = -EIO;

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index fc48e00..7a6d5eb 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c

@@ -365,7 +365,7 @@
 qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter,
 		struct cmd_desc_type0 *cmd_desc_arr, int nr_desc)
 {
-	u32 i, producer, consumer;
+	u32 i, producer;
 	struct qlcnic_cmd_buffer *pbuf;
 	struct cmd_desc_type0 *cmd_desc;
 	struct qlcnic_host_tx_ring *tx_ring;
@@ -379,7 +379,6 @@
 	__netif_tx_lock_bh(tx_ring->txq);
 
 	producer = tx_ring->producer;
-	consumer = tx_ring->sw_consumer;
 
 	if (nr_desc >= qlcnic_tx_avail(tx_ring)) {
 		netif_tx_stop_queue(tx_ring->txq);
@@ -402,7 +401,7 @@
 		pbuf->frag_count = 0;
 
 		memcpy(&tx_ring->desc_head[producer],
-			&cmd_desc_arr[i], sizeof(struct cmd_desc_type0));
+		       cmd_desc, sizeof(struct cmd_desc_type0));
 
 		producer = get_next_index(producer, tx_ring->num_desc);
 		i++;

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index a7554d9..d833f59 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c

@@ -445,13 +445,10 @@
 qlcnic_set_function_modes(struct qlcnic_adapter *adapter)
 {
 	u8 id;
-	u32 ref_count;
 	int i, ret = 1;
 	u32 data = QLCNIC_MGMT_FUNC;
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
-	/* If other drivers are not in use set their privilege level */
-	ref_count = QLCRD32(adapter, QLCNIC_CRB_DRV_ACTIVE);
 	ret = qlcnic_api_lock(adapter);
 	if (ret)
 		goto err_lock;
@@ -531,11 +528,9 @@
 {
 	u32 offset;
 	void __iomem *mem_ptr0 = NULL;
-	resource_size_t mem_base;
 	unsigned long mem_len, pci_len0 = 0, bar0_len;
 
 	/* remap phys address */
-	mem_base = pci_resource_start(pdev, 0);	/* 0 is for BAR 0 */
 	mem_len = pci_resource_len(pdev, 0);
 
 	qlcnic_get_bar_length(pdev->device, &bar0_len);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index 12ff292..0b8d862 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c

@@ -197,7 +197,7 @@
 	int i, k, timeout = 0;
 	void __iomem *base = adapter->ahw->pci_base0;
 	u32 addr, data;
-	u8 opcode, no_ops;
+	u8 no_ops;
 	struct __ctrl *ctr = &entry->region.ctrl;
 	struct qlcnic_dump_template_hdr *t_hdr = adapter->ahw->fw_dump.tmpl_hdr;
 
@@ -206,7 +206,6 @@
 
 	for (i = 0; i < no_ops; i++) {
 		k = 0;
-		opcode = 0;
 		for (k = 0; k < 8; k++) {
 			if (!(ctr->opcode & (1 << k)))
 				continue;

diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index cb6fc5a..5ac9332 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c

@@ -577,28 +577,30 @@
 {
 	struct net_device *dev = dev_instance;
 	struct cp_private *cp;
+	int handled = 0;
 	u16 status;
 
 	if (unlikely(dev == NULL))
 		return IRQ_NONE;
 	cp = netdev_priv(dev);
 
+	spin_lock(&cp->lock);
+
 	status = cpr16(IntrStatus);
 	if (!status || (status == 0xFFFF))
-		return IRQ_NONE;
+		goto out_unlock;
+
+	handled = 1;
 
 	netif_dbg(cp, intr, dev, "intr, status %04x cmd %02x cpcmd %04x\n",
 		  status, cpr8(Cmd), cpr16(CpCmd));
 
 	cpw16(IntrStatus, status & ~cp_rx_intr_mask);
 
-	spin_lock(&cp->lock);
-
 	/* close possible race's with dev_close */
 	if (unlikely(!netif_running(dev))) {
 		cpw16(IntrMask, 0);
-		spin_unlock(&cp->lock);
-		return IRQ_HANDLED;
+		goto out_unlock;
 	}
 
 	if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr))
@@ -612,7 +614,6 @@
 	if (status & LinkChg)
 		mii_check_media(&cp->mii_if, netif_msg_link(cp), false);
 
-	spin_unlock(&cp->lock);
 
 	if (status & PciErr) {
 		u16 pci_status;
@@ -625,7 +626,10 @@
 		/* TODO: reset hardware */
 	}
 
-	return IRQ_HANDLED;
+out_unlock:
+	spin_unlock(&cp->lock);
+
+	return IRQ_RETVAL(handled);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER

diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 022b45b..a670d23 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c

@@ -2386,8 +2386,6 @@
 	{},
 };
 MODULE_DEVICE_TABLE(of, smc91x_match);
-#else
-#define smc91x_match NULL
 #endif
 
 static struct dev_pm_ops smc_drv_pm_ops = {
@@ -2402,7 +2400,7 @@
 		.name	= CARDNAME,
 		.owner	= THIS_MODULE,
 		.pm	= &smc_drv_pm_ops,
-		.of_match_table = smc91x_match,
+		.of_match_table = of_match_ptr(smc91x_match),
 	},
 };
 

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 4616bf2..e112877 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c

@@ -2575,11 +2575,13 @@
 #define SMSC911X_PM_OPS NULL
 #endif
 
+#ifdef CONFIG_OF
 static const struct of_device_id smsc911x_dt_ids[] = {
 	{ .compatible = "smsc,lan9115", },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, smsc911x_dt_ids);
+#endif
 
 static struct platform_driver smsc911x_driver = {
 	.probe = smsc911x_drv_probe,
@@ -2588,7 +2590,7 @@
 		.name	= SMSC_CHIPNAME,
 		.owner	= THIS_MODULE,
 		.pm	= SMSC911X_PM_OPS,
-		.of_match_table = smsc911x_dt_ids,
+		.of_match_table = of_match_ptr(smsc911x_dt_ids),
 	},
 };
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 023a4fb..b05df89 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h

@@ -127,14 +127,14 @@
 }
 static inline void stmmac_unregister_platform(void)
 {
-	platform_driver_register(&stmmac_pltfr_driver);
+	platform_driver_unregister(&stmmac_pltfr_driver);
 }
 #else
 static inline int stmmac_register_platform(void)
 {
 	pr_debug("stmmac: do not register the platf driver\n");
 
-	return -EINVAL;
+	return 0;
 }
 static inline void stmmac_unregister_platform(void)
 {
@@ -162,7 +162,7 @@
 {
 	pr_debug("stmmac: do not register the PCI driver\n");
 
-	return -EINVAL;
+	return 0;
 }
 static inline void stmmac_unregister_pci(void)
 {

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 542edbc..f07c061 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

@@ -2194,18 +2194,20 @@
  */
 static int __init stmmac_init(void)
 {
-	int err_plt = 0;
-	int err_pci = 0;
+	int ret;
 
-	err_plt = stmmac_register_platform();
-	err_pci = stmmac_register_pci();
-
-	if ((err_pci) && (err_plt)) {
-		pr_err("stmmac: driver registration failed\n");
-		return -EINVAL;
-	}
-
+	ret = stmmac_register_platform();
+	if (ret)
+		goto err;
+	ret = stmmac_register_pci();
+	if (ret)
+		goto err_pci;
 	return 0;
+err_pci:
+	stmmac_unregister_platform();
+err:
+	pr_err("stmmac: driver registration failed\n");
+	return ret;
 }
 
 static void __exit stmmac_exit(void)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 3377667..5e62c1a 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c

@@ -27,8 +27,6 @@
 #include <linux/uaccess.h>
 #include <linux/workqueue.h>
 
-#include <plat/clock.h>
-
 #include "cpts.h"
 
 #ifdef CONFIG_TI_CPTS

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 40b426e..504f7f1 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c

@@ -138,6 +138,8 @@
 	/* only used for fasnyc */
 	unsigned int flags;
 	u16 queue_index;
+	struct list_head next;
+	struct tun_struct *detached;
 };
 
 struct tun_flow_entry {
@@ -182,6 +184,8 @@
 	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
 	struct timer_list flow_gc_timer;
 	unsigned long ageing_time;
+	unsigned int numdisabled;
+	struct list_head disabled;
 };
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -385,6 +389,23 @@
 	netif_set_real_num_rx_queues(tun->dev, tun->numqueues);
 }
 
+static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile)
+{
+	tfile->detached = tun;
+	list_add_tail(&tfile->next, &tun->disabled);
+	++tun->numdisabled;
+}
+
+static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
+{
+	struct tun_struct *tun = tfile->detached;
+
+	tfile->detached = NULL;
+	list_del_init(&tfile->next);
+	--tun->numdisabled;
+	return tun;
+}
+
 static void __tun_detach(struct tun_file *tfile, bool clean)
 {
 	struct tun_file *ntfile;
@@ -406,20 +427,25 @@
 		ntfile->queue_index = index;
 
 		--tun->numqueues;
-		sock_put(&tfile->sk);
+		if (clean)
+			sock_put(&tfile->sk);
+		else
+			tun_disable_queue(tun, tfile);
 
 		synchronize_net();
 		tun_flow_delete_by_queue(tun, tun->numqueues + 1);
 		/* Drop read queue */
 		skb_queue_purge(&tfile->sk.sk_receive_queue);
 		tun_set_real_num_queues(tun);
-
-		if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST))
-			if (dev->reg_state == NETREG_REGISTERED)
-				unregister_netdevice(dev);
-	}
+	} else if (tfile->detached && clean)
+		tun = tun_enable_queue(tfile);
 
 	if (clean) {
+		if (tun && tun->numqueues == 0 && tun->numdisabled == 0 &&
+		    !(tun->flags & TUN_PERSIST))
+			if (tun->dev->reg_state == NETREG_REGISTERED)
+				unregister_netdevice(tun->dev);
+
 		BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
 				 &tfile->socket.flags));
 		sk_release_kernel(&tfile->sk);
@@ -436,7 +462,7 @@
 static void tun_detach_all(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
-	struct tun_file *tfile;
+	struct tun_file *tfile, *tmp;
 	int i, n = tun->numqueues;
 
 	for (i = 0; i < n; i++) {
@@ -457,6 +483,12 @@
 		skb_queue_purge(&tfile->sk.sk_receive_queue);
 		sock_put(&tfile->sk);
 	}
+	list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
+		tun_enable_queue(tfile);
+		skb_queue_purge(&tfile->sk.sk_receive_queue);
+		sock_put(&tfile->sk);
+	}
+	BUG_ON(tun->numdisabled != 0);
 }
 
 static int tun_attach(struct tun_struct *tun, struct file *file)
@@ -473,7 +505,8 @@
 		goto out;
 
 	err = -E2BIG;
-	if (tun->numqueues == MAX_TAP_QUEUES)
+	if (!tfile->detached &&
+	    tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES)
 		goto out;
 
 	err = 0;
@@ -487,9 +520,13 @@
 	tfile->queue_index = tun->numqueues;
 	rcu_assign_pointer(tfile->tun, tun);
 	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
-	sock_hold(&tfile->sk);
 	tun->numqueues++;
 
+	if (tfile->detached)
+		tun_enable_queue(tfile);
+	else
+		sock_hold(&tfile->sk);
+
 	tun_set_real_num_queues(tun);
 
 	/* device is allowed to go away first, so no need to hold extra
@@ -1162,6 +1199,7 @@
 		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
 	}
 
+	skb_reset_network_header(skb);
 	rxhash = skb_get_rxhash(skb);
 	netif_rx_ni(skb);
 
@@ -1349,6 +1387,7 @@
 {
 	struct tun_struct *tun = netdev_priv(dev);
 
+	BUG_ON(!(list_empty(&tun->disabled)));
 	tun_flow_uninit(tun);
 	free_netdev(dev);
 }
@@ -1543,6 +1582,10 @@
 		err = tun_attach(tun, file);
 		if (err < 0)
 			return err;
+
+		if (tun->flags & TUN_TAP_MQ &&
+		    (tun->numqueues + tun->numdisabled > 1))
+			return err;
 	}
 	else {
 		char *name;
@@ -1601,6 +1644,7 @@
 			TUN_USER_FEATURES;
 		dev->features = dev->hw_features;
 
+		INIT_LIST_HEAD(&tun->disabled);
 		err = tun_attach(tun, file);
 		if (err < 0)
 			goto err_free_dev;
@@ -1755,32 +1799,28 @@
 {
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun;
-	struct net_device *dev;
 	int ret = 0;
 
 	rtnl_lock();
 
 	if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
-		dev = __dev_get_by_name(tfile->net, ifr->ifr_name);
-		if (!dev) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-
-		tun = netdev_priv(dev);
-		if (dev->netdev_ops != &tap_netdev_ops &&
-			dev->netdev_ops != &tun_netdev_ops)
+		tun = tfile->detached;
+		if (!tun)
 			ret = -EINVAL;
 		else if (tun_not_capable(tun))
 			ret = -EPERM;
 		else
 			ret = tun_attach(tun, file);
-	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
-		__tun_detach(tfile, false);
-	else
+	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
+		tun = rcu_dereference_protected(tfile->tun,
+						lockdep_rtnl_is_held());
+		if (!tun || !(tun->flags & TUN_TAP_MQ))
+			ret = -EINVAL;
+		else
+			__tun_detach(tfile, false);
+	} else
 		ret = -EINVAL;
 
-unlock:
 	rtnl_unlock();
 	return ret;
 }
@@ -2092,6 +2132,7 @@
 
 	file->private_data = tfile;
 	set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags);
+	INIT_LIST_HEAD(&tfile->next);
 
 	return 0;
 }

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index d012982..3f3d12d 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c

@@ -457,12 +457,6 @@
 }
 EXPORT_SYMBOL_GPL(usbnet_cdc_bind);
 
-static int cdc_manage_power(struct usbnet *dev, int on)
-{
-	dev->intf->needs_remote_wakeup = on;
-	return 0;
-}
-
 static const struct driver_info	cdc_info = {
 	.description =	"CDC Ethernet Device",
 	.flags =	FLAG_ETHER | FLAG_POINTTOPOINT,
@@ -470,7 +464,7 @@
 	.bind =		usbnet_cdc_bind,
 	.unbind =	usbnet_cdc_unbind,
 	.status =	usbnet_cdc_status,
-	.manage_power =	cdc_manage_power,
+	.manage_power =	usbnet_manage_power,
 };
 
 static const struct driver_info wwan_info = {
@@ -479,7 +473,7 @@
 	.bind =		usbnet_cdc_bind,
 	.unbind =	usbnet_cdc_unbind,
 	.status =	usbnet_cdc_status,
-	.manage_power =	cdc_manage_power,
+	.manage_power =	usbnet_manage_power,
 };
 
 /*-------------------------------------------------------------------------*/
@@ -487,6 +481,7 @@
 #define HUAWEI_VENDOR_ID	0x12D1
 #define NOVATEL_VENDOR_ID	0x1410
 #define ZTE_VENDOR_ID		0x19D2
+#define DELL_VENDOR_ID		0x413C
 
 static const struct usb_device_id	products [] = {
 /*
@@ -594,27 +589,29 @@
 
 /* Novatel USB551L and MC551 - handled by qmi_wwan */
 {
-	.match_flags    =   USB_DEVICE_ID_MATCH_VENDOR
-		 | USB_DEVICE_ID_MATCH_PRODUCT
-		 | USB_DEVICE_ID_MATCH_INT_INFO,
-	.idVendor               = NOVATEL_VENDOR_ID,
-	.idProduct		= 0xB001,
-	.bInterfaceClass	= USB_CLASS_COMM,
-	.bInterfaceSubClass	= USB_CDC_SUBCLASS_ETHERNET,
-	.bInterfaceProtocol	= USB_CDC_PROTO_NONE,
+	USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0xB001, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
 	.driver_info = 0,
 },
 
 /* Novatel E362 - handled by qmi_wwan */
 {
-	.match_flags    =   USB_DEVICE_ID_MATCH_VENDOR
-		 | USB_DEVICE_ID_MATCH_PRODUCT
-		 | USB_DEVICE_ID_MATCH_INT_INFO,
-	.idVendor               = NOVATEL_VENDOR_ID,
-	.idProduct		= 0x9010,
-	.bInterfaceClass	= USB_CLASS_COMM,
-	.bInterfaceSubClass	= USB_CDC_SUBCLASS_ETHERNET,
-	.bInterfaceProtocol	= USB_CDC_PROTO_NONE,
+	USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0x9010, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = 0,
+},
+
+/* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */
+{
+	USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8195, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = 0,
+},
+
+/* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */
+{
+	USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8196, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
 	.driver_info = 0,
 },
 

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index d38bc20..71b6e92 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c

@@ -1129,19 +1129,13 @@
 	usbnet_disconnect(intf);
 }
 
-static int cdc_ncm_manage_power(struct usbnet *dev, int status)
-{
-	dev->intf->needs_remote_wakeup = status;
-	return 0;
-}
-
 static const struct driver_info cdc_ncm_info = {
 	.description = "CDC NCM",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.check_connect = cdc_ncm_check_connect,
-	.manage_power = cdc_ncm_manage_power,
+	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
 	.tx_fixup = cdc_ncm_tx_fixup,
@@ -1155,7 +1149,7 @@
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.check_connect = cdc_ncm_check_connect,
-	.manage_power = cdc_ncm_manage_power,
+	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
 	.tx_fixup = cdc_ncm_tx_fixup,

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 1ea91f4..91d7cb9 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c

@@ -383,6 +383,20 @@
 		                              USB_CDC_PROTO_NONE),
 		.driver_info        = (unsigned long)&qmi_wwan_info,
 	},
+	{	/* Dell Wireless 5800 (Novatel E362) */
+		USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8195,
+					      USB_CLASS_COMM,
+					      USB_CDC_SUBCLASS_ETHERNET,
+					      USB_CDC_PROTO_NONE),
+		.driver_info        = (unsigned long)&qmi_wwan_info,
+	},
+	{	/* Dell Wireless 5800 V2 (Novatel E362) */
+		USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8196,
+					      USB_CLASS_COMM,
+					      USB_CDC_SUBCLASS_ETHERNET,
+					      USB_CDC_PROTO_NONE),
+		.driver_info        = (unsigned long)&qmi_wwan_info,
+	},
 
 	/* 3. Combined interface devices matching on interface number */
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
@@ -419,6 +433,7 @@
 	{QMI_FIXED_INTF(0x19d2, 0x0199, 1)},	/* ZTE MF820S */
 	{QMI_FIXED_INTF(0x19d2, 0x0200, 1)},
 	{QMI_FIXED_INTF(0x19d2, 0x0257, 3)},	/* ZTE MF821 */
+	{QMI_FIXED_INTF(0x19d2, 0x0284, 4)},	/* ZTE MF880 */
 	{QMI_FIXED_INTF(0x19d2, 0x0326, 4)},	/* ZTE MF821D */
 	{QMI_FIXED_INTF(0x19d2, 0x1008, 4)},	/* ZTE (Vodafone) K3570-Z */
 	{QMI_FIXED_INTF(0x19d2, 0x1010, 4)},	/* ZTE (Vodafone) K3571-Z */

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index c04110b..3d4bf01 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c

@@ -719,7 +719,8 @@
 	dev->flags = 0;
 	del_timer_sync (&dev->delay);
 	tasklet_kill (&dev->bh);
-	if (info->manage_power)
+	if (info->manage_power &&
+	    !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags))
 		info->manage_power(dev, 0);
 	else
 		usb_autopm_put_interface(dev->intf);
@@ -794,14 +795,14 @@
 	tasklet_schedule (&dev->bh);
 	if (info->manage_power) {
 		retval = info->manage_power(dev, 1);
-		if (retval < 0)
-			goto done_manage_power_error;
-		usb_autopm_put_interface(dev->intf);
+		if (retval < 0) {
+			retval = 0;
+			set_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
+		} else {
+			usb_autopm_put_interface(dev->intf);
+		}
 	}
 	return retval;
-
-done_manage_power_error:
-	clear_bit(EVENT_DEV_OPEN, &dev->flags);
 done:
 	usb_autopm_put_interface(dev->intf);
 done_nopm:
@@ -1615,6 +1616,16 @@
 }
 EXPORT_SYMBOL(usbnet_device_suggests_idle);
 
+/*
+ * For devices that can do without special commands
+ */
+int usbnet_manage_power(struct usbnet *dev, int on)
+{
+	dev->intf->needs_remote_wakeup = on;
+	return 0;
+}
+EXPORT_SYMBOL(usbnet_manage_power);
+
 /*-------------------------------------------------------------------------*/
 static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 			     u16 value, u16 index, void *data, u16 size)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 68d64f0..a6fcf15 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c

@@ -130,7 +130,6 @@
 		struct virtio_net_hdr hdr;
 		struct virtio_net_hdr_mrg_rxbuf mhdr;
 	};
-	unsigned int num_sg;
 };
 
 struct padded_vnet_hdr {
@@ -530,10 +529,10 @@
 			err = add_recvbuf_small(rq, gfp);
 
 		oom = err == -ENOMEM;
-		if (err < 0)
+		if (err)
 			break;
 		++rq->num;
-	} while (err > 0);
+	} while (rq->vq->num_free);
 	if (unlikely(rq->num > rq->max))
 		rq->max = rq->num;
 	virtqueue_kick(rq->vq);
@@ -640,10 +639,10 @@
 	return 0;
 }
 
-static unsigned int free_old_xmit_skbs(struct send_queue *sq)
+static void free_old_xmit_skbs(struct send_queue *sq)
 {
 	struct sk_buff *skb;
-	unsigned int len, tot_sgs = 0;
+	unsigned int len;
 	struct virtnet_info *vi = sq->vq->vdev->priv;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
@@ -655,10 +654,8 @@
 		stats->tx_packets++;
 		u64_stats_update_end(&stats->tx_syncp);
 
-		tot_sgs += skb_vnet_hdr(skb)->num_sg;
 		dev_kfree_skb_any(skb);
 	}
-	return tot_sgs;
 }
 
 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
@@ -666,6 +663,7 @@
 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 	struct virtnet_info *vi = sq->vq->vdev->priv;
+	unsigned num_sg;
 
 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
 
@@ -704,8 +702,8 @@
 	else
 		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
 
-	hdr->num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
-	return virtqueue_add_buf(sq->vq, sq->sg, hdr->num_sg,
+	num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	return virtqueue_add_buf(sq->vq, sq->sg, num_sg,
 				 0, skb, GFP_ATOMIC);
 }
 
@@ -714,28 +712,20 @@
 	struct virtnet_info *vi = netdev_priv(dev);
 	int qnum = skb_get_queue_mapping(skb);
 	struct send_queue *sq = &vi->sq[qnum];
-	int capacity;
+	int err;
 
 	/* Free up any pending old buffers before queueing new ones. */
 	free_old_xmit_skbs(sq);
 
 	/* Try to transmit */
-	capacity = xmit_skb(sq, skb);
+	err = xmit_skb(sq, skb);
 
-	/* This can happen with OOM and indirect buffers. */
-	if (unlikely(capacity < 0)) {
-		if (likely(capacity == -ENOMEM)) {
-			if (net_ratelimit())
-				dev_warn(&dev->dev,
-					 "TXQ (%d) failure: out of memory\n",
-					 qnum);
-		} else {
-			dev->stats.tx_fifo_errors++;
-			if (net_ratelimit())
-				dev_warn(&dev->dev,
-					 "Unexpected TXQ (%d) failure: %d\n",
-					 qnum, capacity);
-		}
+	/* This should not happen! */
+	if (unlikely(err)) {
+		dev->stats.tx_fifo_errors++;
+		if (net_ratelimit())
+			dev_warn(&dev->dev,
+				 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
 		dev->stats.tx_dropped++;
 		kfree_skb(skb);
 		return NETDEV_TX_OK;
@@ -748,12 +738,12 @@
 
 	/* Apparently nice girls don't return TX_BUSY; stop the queue
 	 * before it gets out of hand.  Naturally, this wastes entries. */
-	if (capacity < 2+MAX_SKB_FRAGS) {
+	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
 		netif_stop_subqueue(dev, qnum);
 		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 			/* More just got used, free them then recheck. */
-			capacity += free_old_xmit_skbs(sq);
-			if (capacity >= 2+MAX_SKB_FRAGS) {
+			free_old_xmit_skbs(sq);
+			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
 				netif_start_subqueue(dev, qnum);
 				virtqueue_disable_cb(sq->vq);
 			}

diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/net/wimax/i2400m/i2400m-usb.h
index 6650fde..9f1e947 100644
--- a/drivers/net/wimax/i2400m/i2400m-usb.h
+++ b/drivers/net/wimax/i2400m/i2400m-usb.h

@@ -152,6 +152,9 @@
 	/* Device IDs */
 	USB_DEVICE_ID_I6050 = 0x0186,
 	USB_DEVICE_ID_I6050_2 = 0x0188,
+	USB_DEVICE_ID_I6150 = 0x07d6,
+	USB_DEVICE_ID_I6150_2 = 0x07d7,
+	USB_DEVICE_ID_I6150_3 = 0x07d9,
 	USB_DEVICE_ID_I6250 = 0x0187,
 };
 

diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index 713d033..080f363 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c

@@ -510,6 +510,9 @@
 	switch (id->idProduct) {
 	case USB_DEVICE_ID_I6050:
 	case USB_DEVICE_ID_I6050_2:
+	case USB_DEVICE_ID_I6150:
+	case USB_DEVICE_ID_I6150_2:
+	case USB_DEVICE_ID_I6150_3:
 	case USB_DEVICE_ID_I6250:
 		i2400mu->i6050 = 1;
 		break;
@@ -759,6 +762,9 @@
 struct usb_device_id i2400mu_id_table[] = {
 	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6050) },
 	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6050_2) },
+	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150) },
+	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_2) },
+	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_3) },
 	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6250) },
 	{ USB_DEVICE(0x8086, 0x0181) },
 	{ USB_DEVICE(0x8086, 0x1403) },

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 6deaae1..28aa05f 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig

@@ -156,11 +156,7 @@
 	---help---
 	  This enables support for FullMAC PCI/Cardbus prism54 devices. This
 	  driver is now deprecated in favor for the SoftMAC driver, p54pci.
-	  p54pci supports FullMAC PCI/Cardbus devices as well. For details on
-	  the scheduled removal of this driver on the kernel see the feature
-	  removal schedule:
-
-	  Documentation/feature-removal-schedule.txt
+	  p54pci supports FullMAC PCI/Cardbus devices as well.
 
 	  For more information refer to the p54 wiki:
 

diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile
index 062dfdf..67156ef 100644
--- a/drivers/net/wireless/Makefile
+++ b/drivers/net/wireless/Makefile

@@ -47,7 +47,7 @@
 
 obj-$(CONFIG_P54_COMMON)	+= p54/
 
-obj-$(CONFIG_ATH_COMMON)	+= ath/
+obj-$(CONFIG_ATH_CARDS)		+= ath/
 
 obj-$(CONFIG_MAC80211_HWSIM)	+= mac80211_hwsim.o
 

diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 4ffb6a5..44f8b3f 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c

@@ -685,6 +685,14 @@
 	 * to mac80211.
 	 */
 	rx_status = IEEE80211_SKB_RXCB(entry->skb);
+
+	/* Ensure that all fields of rx_status are initialized
+	 * properly. The skb->cb array was used for driver
+	 * specific informations, so rx_status might contain
+	 * garbage.
+	 */
+	memset(rx_status, 0, sizeof(*rx_status));
+
 	rx_status->mactime = rxdesc.timestamp;
 	rx_status->band = rt2x00dev->curr_band;
 	rx_status->freq = rt2x00dev->curr_freq;

diff --git a/drivers/of/base.c b/drivers/of/base.c
index be84640..2390ddb 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c

@@ -629,7 +629,7 @@
 	read_unlock(&devtree_lock);
 	return np;
 }
-EXPORT_SYMBOL(of_find_matching_node);
+EXPORT_SYMBOL(of_find_matching_node_and_match);
 
 /**
  * of_modalias_node - Lookup appropriate modalias for a device node
@@ -1114,13 +1114,36 @@
 }
 EXPORT_SYMBOL(of_parse_phandle_with_args);
 
+#if defined(CONFIG_OF_DYNAMIC)
+static int of_property_notify(int action, struct device_node *np,
+			      struct property *prop)
+{
+	struct of_prop_reconfig pr;
+
+	pr.dn = np;
+	pr.prop = prop;
+	return of_reconfig_notify(action, &pr);
+}
+#else
+static int of_property_notify(int action, struct device_node *np,
+			      struct property *prop)
+{
+	return 0;
+}
+#endif
+
 /**
- * prom_add_property - Add a property to a node
+ * of_add_property - Add a property to a node
  */
-int prom_add_property(struct device_node *np, struct property *prop)
+int of_add_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
+	int rc;
+
+	rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
+	if (rc)
+		return rc;
 
 	prop->next = NULL;
 	write_lock_irqsave(&devtree_lock, flags);
@@ -1146,18 +1169,23 @@
 }
 
 /**
- * prom_remove_property - Remove a property from a node.
+ * of_remove_property - Remove a property from a node.
  *
  * Note that we don't actually remove it, since we have given out
  * who-knows-how-many pointers to the data using get-property.
  * Instead we just move the property to the "dead properties"
  * list, so it won't be found any more.
  */
-int prom_remove_property(struct device_node *np, struct property *prop)
+int of_remove_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
 	int found = 0;
+	int rc;
+
+	rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
@@ -1187,7 +1215,7 @@
 }
 
 /*
- * prom_update_property - Update a property in a node, if the property does
+ * of_update_property - Update a property in a node, if the property does
  * not exist, add it.
  *
  * Note that we don't actually remove it, since we have given out
@@ -1195,19 +1223,22 @@
  * Instead we just move the property to the "dead properties" list,
  * and add the new property to the property list
  */
-int prom_update_property(struct device_node *np,
-			 struct property *newprop)
+int of_update_property(struct device_node *np, struct property *newprop)
 {
 	struct property **next, *oldprop;
 	unsigned long flags;
-	int found = 0;
+	int rc, found = 0;
+
+	rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
+	if (rc)
+		return rc;
 
 	if (!newprop->name)
 		return -EINVAL;
 
 	oldprop = of_find_property(np, newprop->name, NULL);
 	if (!oldprop)
-		return prom_add_property(np, newprop);
+		return of_add_property(np, newprop);
 
 	write_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
@@ -1246,12 +1277,55 @@
  * device tree nodes.
  */
 
+static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
+
+int of_reconfig_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
+
+int of_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
+
+int of_reconfig_notify(unsigned long action, void *p)
+{
+	int rc;
+
+	rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
+	return notifier_to_errno(rc);
+}
+
+#ifdef CONFIG_PROC_DEVICETREE
+static void of_add_proc_dt_entry(struct device_node *dn)
+{
+	struct proc_dir_entry *ent;
+
+	ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
+	if (ent)
+		proc_device_tree_add_node(dn, ent);
+}
+#else
+static void of_add_proc_dt_entry(struct device_node *dn)
+{
+	return;
+}
+#endif
+
 /**
  * of_attach_node - Plug a device node into the tree and global list.
  */
-void of_attach_node(struct device_node *np)
+int of_attach_node(struct device_node *np)
 {
 	unsigned long flags;
+	int rc;
+
+	rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 	np->sibling = np->parent->child;
@@ -1259,24 +1333,61 @@
 	np->parent->child = np;
 	of_allnodes = np;
 	write_unlock_irqrestore(&devtree_lock, flags);
+
+	of_add_proc_dt_entry(np);
+	return 0;
 }
 
+#ifdef CONFIG_PROC_DEVICETREE
+static void of_remove_proc_dt_entry(struct device_node *dn)
+{
+	struct device_node *parent = dn->parent;
+	struct property *prop = dn->properties;
+
+	while (prop) {
+		remove_proc_entry(prop->name, dn->pde);
+		prop = prop->next;
+	}
+
+	if (dn->pde)
+		remove_proc_entry(dn->pde->name, parent->pde);
+}
+#else
+static void of_remove_proc_dt_entry(struct device_node *dn)
+{
+	return;
+}
+#endif
+
 /**
  * of_detach_node - "Unplug" a node from the device tree.
  *
  * The caller must hold a reference to the node.  The memory associated with
  * the node is not freed until its refcount goes to zero.
  */
-void of_detach_node(struct device_node *np)
+int of_detach_node(struct device_node *np)
 {
 	struct device_node *parent;
 	unsigned long flags;
+	int rc = 0;
+
+	rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 
+	if (of_node_check_flag(np, OF_DETACHED)) {
+		/* someone already detached it */
+		write_unlock_irqrestore(&devtree_lock, flags);
+		return rc;
+	}
+
 	parent = np->parent;
-	if (!parent)
-		goto out_unlock;
+	if (!parent) {
+		write_unlock_irqrestore(&devtree_lock, flags);
+		return rc;
+	}
 
 	if (of_allnodes == np)
 		of_allnodes = np->allnext;
@@ -1301,9 +1412,10 @@
 	}
 
 	of_node_set_flag(np, OF_DETACHED);
-
-out_unlock:
 	write_unlock_irqrestore(&devtree_lock, flags);
+
+	of_remove_proc_dt_entry(np);
+	return rc;
 }
 #endif /* defined(CONFIG_OF_DYNAMIC) */
 

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index a65c39c..808be06 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c

@@ -488,14 +488,8 @@
 		depth++;
 		pathp = (char *)p;
 		p = ALIGN(p + strlen(pathp) + 1, 4);
-		if ((*pathp) == '/') {
-			const char *lp, *np;
-			for (lp = NULL, np = pathp; *np; np++)
-				if ((*np) == '/')
-					lp = np+1;
-			if (lp != NULL)
-				pathp = lp;
-		}
+		if (*pathp == '/')
+			pathp = kbasename(pathp);
 		rc = it(p, pathp, depth, data);
 		if (rc != 0)
 			break;

diff --git a/drivers/pinctrl/pinctrl-exynos5440.c b/drivers/pinctrl/pinctrl-exynos5440.c
index b8635f6..07db895 100644
--- a/drivers/pinctrl/pinctrl-exynos5440.c
+++ b/drivers/pinctrl/pinctrl-exynos5440.c

@@ -117,7 +117,7 @@
 };
 
 /* list of all possible config options supported */
-struct pin_config {
+static struct pin_config {
 	char		*prop_cfg;
 	unsigned int	cfg_type;
 } pcfgs[] = {

diff --git a/drivers/pinctrl/pinctrl-samsung.c b/drivers/pinctrl/pinctrl-samsung.c
index 8f31b65..864fed8 100644
--- a/drivers/pinctrl/pinctrl-samsung.c
+++ b/drivers/pinctrl/pinctrl-samsung.c

@@ -37,7 +37,7 @@
 #define FSUFFIX_LEN		sizeof(FUNCTION_SUFFIX)
 
 /* list of all possible config options supported */
-struct pin_config {
+static struct pin_config {
 	char		*prop_cfg;
 	unsigned int	cfg_type;
 } pcfgs[] = {

diff --git a/drivers/pinctrl/pinctrl-samsung.h b/drivers/pinctrl/pinctrl-samsung.h
index 5addfd1..e2d4e67 100644
--- a/drivers/pinctrl/pinctrl-samsung.h
+++ b/drivers/pinctrl/pinctrl-samsung.h

@@ -104,7 +104,7 @@
 
 /**
  * struct samsung_pin_bank: represent a controller pin-bank.
- * @reg_offset: starting offset of the pin-bank registers.
+ * @pctl_offset: starting offset of the pin-bank registers.
  * @pin_base: starting pin number of the bank.
  * @nr_pins: number of pins included in this bank.
  * @func_width: width of the function selector bit field.

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 6b0ebde..be79040 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c

@@ -32,7 +32,7 @@
 
 #define	ASUS_NB_WMI_FILE	"asus-nb-wmi"
 
-MODULE_AUTHOR("Corentin Chary <corentincj@iksaif.net>");
+MODULE_AUTHOR("Corentin Chary <corentin.chary@gmail.com>");
 MODULE_DESCRIPTION("Asus Notebooks WMI Hotkey Driver");
 MODULE_LICENSE("GPL");
 

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index c0e9ff4..f80ae4d 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c

@@ -51,7 +51,7 @@
 
 #include "asus-wmi.h"
 
-MODULE_AUTHOR("Corentin Chary <corentincj@iksaif.net>, "
+MODULE_AUTHOR("Corentin Chary <corentin.chary@gmail.com>, "
 	      "Yong Wang <yong.y.wang@intel.com>");
 MODULE_DESCRIPTION("Asus Generic WMI Driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c
index 5838332..60cb76a 100644
--- a/drivers/platform/x86/eeepc-wmi.c
+++ b/drivers/platform/x86/eeepc-wmi.c

@@ -39,7 +39,7 @@
 
 #define	EEEPC_WMI_FILE	"eeepc-wmi"
 
-MODULE_AUTHOR("Corentin Chary <corentincj@iksaif.net>");
+MODULE_AUTHOR("Corentin Chary <corentin.chary@gmail.com>");
 MODULE_DESCRIPTION("Eee PC WMI Hotkey Driver");
 MODULE_LICENSE("GPL");
 

diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index adb3a4b..6ba047f 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c

@@ -239,44 +239,37 @@
 	int uV;
 
 	/* If there is no battery, it cannot be charged */
-	if (!is_batt_present(cm)) {
-		val.intval = 0;
-		goto out;
-	}
+	if (!is_batt_present(cm))
+		return false;
 
 	if (cm->fuel_gauge && desc->fullbatt_full_capacity > 0) {
+		val.intval = 0;
+
 		/* Not full if capacity of fuel gauge isn't full */
 		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
 				POWER_SUPPLY_PROP_CHARGE_FULL, &val);
-		if (!ret && val.intval > desc->fullbatt_full_capacity) {
-			val.intval = 1;
-			goto out;
-		}
+		if (!ret && val.intval > desc->fullbatt_full_capacity)
+			return true;
 	}
 
 	/* Full, if it's over the fullbatt voltage */
 	if (desc->fullbatt_uV > 0) {
 		ret = get_batt_uV(cm, &uV);
-		if (!ret && uV >= desc->fullbatt_uV) {
-			val.intval = 1;
-			goto out;
-		}
+		if (!ret && uV >= desc->fullbatt_uV)
+			return true;
 	}
 
 	/* Full, if the capacity is more than fullbatt_soc */
 	if (cm->fuel_gauge && desc->fullbatt_soc > 0) {
+		val.intval = 0;
+
 		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
 				POWER_SUPPLY_PROP_CAPACITY, &val);
-		if (!ret && val.intval >= desc->fullbatt_soc) {
-			val.intval = 1;
-			goto out;
-		}
+		if (!ret && val.intval >= desc->fullbatt_soc)
+			return true;
 	}
 
-	val.intval = 0;
-
-out:
-	return val.intval ? true : false;
+	return false;
 }
 
 /**
@@ -489,8 +482,9 @@
 		return;
 	}
 
-	diff = desc->fullbatt_uV;
-	diff -= batt_uV;
+	diff = desc->fullbatt_uV - batt_uV;
+	if (diff < 0)
+		return;
 
 	dev_info(cm->dev, "VBATT dropped %duV after full-batt.\n", diff);
 

diff --git a/drivers/power/da9052-battery.c b/drivers/power/da9052-battery.c
index bb0df89..3c5c2e4 100644
--- a/drivers/power/da9052-battery.c
+++ b/drivers/power/da9052-battery.c

@@ -440,8 +440,10 @@
 static irqreturn_t da9052_bat_irq(int irq, void *data)
 {
 	struct da9052_battery *bat = data;
+	int virq;
 
-	irq -= bat->da9052->irq_base;
+	virq = regmap_irq_get_virq(bat->da9052->irq_data, irq);
+	irq -= virq;
 
 	if (irq == DA9052_IRQ_CHGEND)
 		bat->status = POWER_SUPPLY_STATUS_FULL;
@@ -567,7 +569,7 @@
 	.get_property	= da9052_bat_get_property,
 };
 
-static const char *const da9052_bat_irqs[] = {
+static char *da9052_bat_irqs[] = {
 	"BATT TEMP",
 	"DCIN DET",
 	"DCIN REM",
@@ -576,12 +578,20 @@
 	"CHG END",
 };
 
+static int da9052_bat_irq_bits[] = {
+	DA9052_IRQ_TBAT,
+	DA9052_IRQ_DCIN,
+	DA9052_IRQ_DCINREM,
+	DA9052_IRQ_VBUS,
+	DA9052_IRQ_VBUSREM,
+	DA9052_IRQ_CHGEND,
+};
+
 static s32 da9052_bat_probe(struct platform_device *pdev)
 {
 	struct da9052_pdata *pdata;
 	struct da9052_battery *bat;
 	int ret;
-	int irq;
 	int i;
 
 	bat = kzalloc(sizeof(struct da9052_battery), GFP_KERNEL);
@@ -602,15 +612,14 @@
 		bat->psy.use_for_apm = 1;
 
 	for (i = 0; i < ARRAY_SIZE(da9052_bat_irqs); i++) {
-		irq = platform_get_irq_byname(pdev, da9052_bat_irqs[i]);
-		ret = request_threaded_irq(bat->da9052->irq_base + irq,
-					   NULL, da9052_bat_irq,
-					   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-					   da9052_bat_irqs[i], bat);
+		ret = da9052_request_irq(bat->da9052,
+				da9052_bat_irq_bits[i], da9052_bat_irqs[i],
+				da9052_bat_irq, bat);
+
 		if (ret != 0) {
 			dev_err(bat->da9052->dev,
-				"DA9052 failed to request %s IRQ %d: %d\n",
-				da9052_bat_irqs[i], irq, ret);
+				"DA9052 failed to request %s IRQ: %d\n",
+				da9052_bat_irqs[i], ret);
 			goto err;
 		}
 	}
@@ -623,23 +632,20 @@
 	return 0;
 
 err:
-	while (--i >= 0) {
-		irq = platform_get_irq_byname(pdev, da9052_bat_irqs[i]);
-		free_irq(bat->da9052->irq_base + irq, bat);
-	}
+	while (--i >= 0)
+		da9052_free_irq(bat->da9052, da9052_bat_irq_bits[i], bat);
+
 	kfree(bat);
 	return ret;
 }
 static int da9052_bat_remove(struct platform_device *pdev)
 {
 	int i;
-	int irq;
 	struct da9052_battery *bat = platform_get_drvdata(pdev);
 
-	for (i = 0; i < ARRAY_SIZE(da9052_bat_irqs); i++) {
-		irq = platform_get_irq_byname(pdev, da9052_bat_irqs[i]);
-		free_irq(bat->da9052->irq_base + irq, bat);
-	}
+	for (i = 0; i < ARRAY_SIZE(da9052_bat_irqs); i++)
+		da9052_free_irq(bat->da9052, da9052_bat_irq_bits[i], bat);
+
 	power_supply_unregister(&bat->psy);
 	kfree(bat);
 

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index ed81720..e513cd9 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig

@@ -112,6 +112,17 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-samsung.
 
+config PWM_SPEAR
+	tristate "STMicroelectronics SPEAr PWM support"
+	depends on PLAT_SPEAR
+	depends on OF
+	help
+	  Generic PWM framework driver for the PWM controller on ST
+	  SPEAr SoCs.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-spear.
+
 config PWM_TEGRA
 	tristate "NVIDIA Tegra PWM support"
 	depends on ARCH_TEGRA
@@ -125,6 +136,7 @@
 config  PWM_TIECAP
 	tristate "ECAP PWM support"
 	depends on SOC_AM33XX
+	select PWM_TIPWMSS
 	help
 	  PWM driver support for the ECAP APWM controller found on AM33XX
 	  TI SOC
@@ -135,6 +147,7 @@
 config  PWM_TIEHRPWM
 	tristate "EHRPWM PWM support"
 	depends on SOC_AM33XX
+	select PWM_TIPWMSS
 	help
 	  PWM driver support for the EHRPWM controller found on AM33XX
 	  TI SOC
@@ -142,14 +155,32 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-tiehrpwm.
 
-config PWM_TWL6030
-	tristate "TWL6030 PWM support"
+config  PWM_TIPWMSS
+	bool
+	depends on SOC_AM33XX && (PWM_TIEHRPWM || PWM_TIECAP)
+	help
+	  PWM Subsystem driver support for AM33xx SOC.
+
+	  PWM submodules require PWM config space access from submodule
+	  drivers and require common parent driver support.
+
+config PWM_TWL
+	tristate "TWL4030/6030 PWM support"
 	depends on TWL4030_CORE
 	help
-	  Generic PWM framework driver for TWL6030.
+	  Generic PWM framework driver for TWL4030/6030.
 
 	  To compile this driver as a module, choose M here: the module
-	  will be called pwm-twl6030.
+	  will be called pwm-twl.
+
+config PWM_TWL_LED
+	tristate "TWL4030/6030 PWM support for LED drivers"
+	depends on TWL4030_CORE
+	help
+	  Generic PWM framework driver for TWL4030/6030 LED terminals.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-twl-led.
 
 config PWM_VT8500
 	tristate "vt8500 pwm support"

diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index acfe482..62a2963 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile

@@ -8,8 +8,11 @@
 obj-$(CONFIG_PWM_PUV3)		+= pwm-puv3.o
 obj-$(CONFIG_PWM_PXA)		+= pwm-pxa.o
 obj-$(CONFIG_PWM_SAMSUNG)	+= pwm-samsung.o
+obj-$(CONFIG_PWM_SPEAR)		+= pwm-spear.o
 obj-$(CONFIG_PWM_TEGRA)		+= pwm-tegra.o
 obj-$(CONFIG_PWM_TIECAP)	+= pwm-tiecap.o
 obj-$(CONFIG_PWM_TIEHRPWM)	+= pwm-tiehrpwm.o
-obj-$(CONFIG_PWM_TWL6030)	+= pwm-twl6030.o
+obj-$(CONFIG_PWM_TIPWMSS)	+= pwm-tipwmss.o
+obj-$(CONFIG_PWM_TWL)		+= pwm-twl.o
+obj-$(CONFIG_PWM_TWL_LED)	+= pwm-twl-led.o
 obj-$(CONFIG_PWM_VT8500)	+= pwm-vt8500.o

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index f5acdaa..903138b 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c

@@ -32,6 +32,9 @@
 
 #define MAX_PWMS 1024
 
+/* flags in the third cell of the DT PWM specifier */
+#define PWM_SPEC_POLARITY	(1 << 0)
+
 static DEFINE_MUTEX(pwm_lookup_lock);
 static LIST_HEAD(pwm_lookup_list);
 static DEFINE_MUTEX(pwm_lock);
@@ -129,6 +132,32 @@
 	return 0;
 }
 
+struct pwm_device *
+of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
+{
+	struct pwm_device *pwm;
+
+	if (pc->of_pwm_n_cells < 3)
+		return ERR_PTR(-EINVAL);
+
+	if (args->args[0] >= pc->npwm)
+		return ERR_PTR(-EINVAL);
+
+	pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+	if (IS_ERR(pwm))
+		return pwm;
+
+	pwm_set_period(pwm, args->args[1]);
+
+	if (args->args[2] & PWM_SPEC_POLARITY)
+		pwm_set_polarity(pwm, PWM_POLARITY_INVERSED);
+	else
+		pwm_set_polarity(pwm, PWM_POLARITY_NORMAL);
+
+	return pwm;
+}
+EXPORT_SYMBOL_GPL(of_pwm_xlate_with_flags);
+
 static struct pwm_device *
 of_pwm_simple_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
 {

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 8f26e9f..65a86bd 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c

@@ -235,7 +235,7 @@
 {
 	const struct of_device_id *of_id =
 			of_match_device(imx_pwm_dt_ids, &pdev->dev);
-	struct imx_pwm_data *data;
+	const struct imx_pwm_data *data;
 	struct imx_chip *imx;
 	struct resource *r;
 	int ret = 0;

diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c
index 015a822..1410644 100644
--- a/drivers/pwm/pwm-lpc32xx.c
+++ b/drivers/pwm/pwm-lpc32xx.c

@@ -49,9 +49,24 @@
 		c = 0; /* 0 set division by 256 */
 	period_cycles = c;
 
+	/* The duty-cycle value is as follows:
+	 *
+	 *  DUTY-CYCLE     HIGH LEVEL
+	 *      1            99.9%
+	 *      25           90.0%
+	 *      128          50.0%
+	 *      220          10.0%
+	 *      255           0.1%
+	 *      0             0.0%
+	 *
+	 * In other words, the register value is duty-cycle % 256 with
+	 * duty-cycle in the range 1-256.
+	 */
 	c = 256 * duty_ns;
 	do_div(c, period_ns);
-	duty_cycles = c;
+	if (c > 255)
+		c = 255;
+	duty_cycles = 256 - c;
 
 	writel(PWM_ENABLE | PWM_RELOADV(period_cycles) | PWM_DUTY(duty_cycles),
 		lpc32xx->base + (pwm->hwpwm << 2));
@@ -106,6 +121,7 @@
 	lpc32xx->chip.dev = &pdev->dev;
 	lpc32xx->chip.ops = &lpc32xx_pwm_ops;
 	lpc32xx->chip.npwm = 2;
+	lpc32xx->chip.base = -1;
 
 	ret = pwmchip_add(&lpc32xx->chip);
 	if (ret < 0) {
@@ -121,8 +137,11 @@
 static int lpc32xx_pwm_remove(struct platform_device *pdev)
 {
 	struct lpc32xx_pwm_chip *lpc32xx = platform_get_drvdata(pdev);
+	unsigned int i;
 
-	clk_disable(lpc32xx->clk);
+	for (i = 0; i < lpc32xx->chip.npwm; i++)
+		pwm_disable(&lpc32xx->chip.pwms[i]);
+
 	return pwmchip_remove(&lpc32xx->chip);
 }
 

diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c
index e9b15d0..5207e6c 100644
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c

@@ -222,6 +222,7 @@
 
 	/* calculate base of control bits in TCON */
 	s3c->tcon_base = id == 0 ? 0 : (id * 4) + 4;
+	s3c->pwm_id = id;
 	s3c->chip.dev = &pdev->dev;
 	s3c->chip.ops = &s3c_pwm_ops;
 	s3c->chip.base = -1;

diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c
new file mode 100644
index 0000000..83b21d9
--- /dev/null
+++ b/drivers/pwm/pwm-spear.c

@@ -0,0 +1,276 @@
+/*
+ * ST Microelectronics SPEAr Pulse Width Modulator driver
+ *
+ * Copyright (C) 2012 ST Microelectronics
+ * Shiraz Hashim <shiraz.hashim@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define NUM_PWM		4
+
+/* PWM registers and bits definitions */
+#define PWMCR			0x00	/* Control Register */
+#define PWMCR_PWM_ENABLE	0x1
+#define PWMCR_PRESCALE_SHIFT	2
+#define PWMCR_MIN_PRESCALE	0x00
+#define PWMCR_MAX_PRESCALE	0x3FFF
+
+#define PWMDCR			0x04	/* Duty Cycle Register */
+#define PWMDCR_MIN_DUTY		0x0001
+#define PWMDCR_MAX_DUTY		0xFFFF
+
+#define PWMPCR			0x08	/* Period Register */
+#define PWMPCR_MIN_PERIOD	0x0001
+#define PWMPCR_MAX_PERIOD	0xFFFF
+
+/* Following only available on 13xx SoCs */
+#define PWMMCR			0x3C	/* Master Control Register */
+#define PWMMCR_PWM_ENABLE	0x1
+
+/**
+ * struct spear_pwm_chip - struct representing pwm chip
+ *
+ * @mmio_base: base address of pwm chip
+ * @clk: pointer to clk structure of pwm chip
+ * @chip: linux pwm chip representation
+ * @dev: pointer to device structure of pwm chip
+ */
+struct spear_pwm_chip {
+	void __iomem *mmio_base;
+	struct clk *clk;
+	struct pwm_chip chip;
+	struct device *dev;
+};
+
+static inline struct spear_pwm_chip *to_spear_pwm_chip(struct pwm_chip *chip)
+{
+	return container_of(chip, struct spear_pwm_chip, chip);
+}
+
+static inline u32 spear_pwm_readl(struct spear_pwm_chip *chip, unsigned int num,
+				  unsigned long offset)
+{
+	return readl_relaxed(chip->mmio_base + (num << 4) + offset);
+}
+
+static inline void spear_pwm_writel(struct spear_pwm_chip *chip,
+				    unsigned int num, unsigned long offset,
+				    unsigned long val)
+{
+	writel_relaxed(val, chip->mmio_base + (num << 4) + offset);
+}
+
+static int spear_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			    int duty_ns, int period_ns)
+{
+	struct spear_pwm_chip *pc = to_spear_pwm_chip(chip);
+	u64 val, div, clk_rate;
+	unsigned long prescale = PWMCR_MIN_PRESCALE, pv, dc;
+	int ret;
+
+	/*
+	 * Find pv, dc and prescale to suit duty_ns and period_ns. This is done
+	 * according to formulas described below:
+	 *
+	 * period_ns = 10^9 * (PRESCALE + 1) * PV / PWM_CLK_RATE
+	 * duty_ns = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE
+	 *
+	 * PV = (PWM_CLK_RATE * period_ns) / (10^9 * (PRESCALE + 1))
+	 * DC = (PWM_CLK_RATE * duty_ns) / (10^9 * (PRESCALE + 1))
+	 */
+	clk_rate = clk_get_rate(pc->clk);
+	while (1) {
+		div = 1000000000;
+		div *= 1 + prescale;
+		val = clk_rate * period_ns;
+		pv = div64_u64(val, div);
+		val = clk_rate * duty_ns;
+		dc = div64_u64(val, div);
+
+		/* if duty_ns and period_ns are not achievable then return */
+		if (pv < PWMPCR_MIN_PERIOD || dc < PWMDCR_MIN_DUTY)
+			return -EINVAL;
+
+		/*
+		 * if pv and dc have crossed their upper limit, then increase
+		 * prescale and recalculate pv and dc.
+		 */
+		if (pv > PWMPCR_MAX_PERIOD || dc > PWMDCR_MAX_DUTY) {
+			if (++prescale > PWMCR_MAX_PRESCALE)
+				return -EINVAL;
+			continue;
+		}
+		break;
+	}
+
+	/*
+	 * NOTE: the clock to PWM has to be enabled first before writing to the
+	 * registers.
+	 */
+	ret = clk_enable(pc->clk);
+	if (ret)
+		return ret;
+
+	spear_pwm_writel(pc, pwm->hwpwm, PWMCR,
+			prescale << PWMCR_PRESCALE_SHIFT);
+	spear_pwm_writel(pc, pwm->hwpwm, PWMDCR, dc);
+	spear_pwm_writel(pc, pwm->hwpwm, PWMPCR, pv);
+	clk_disable(pc->clk);
+
+	return 0;
+}
+
+static int spear_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct spear_pwm_chip *pc = to_spear_pwm_chip(chip);
+	int rc = 0;
+	u32 val;
+
+	rc = clk_enable(pc->clk);
+	if (!rc)
+		return rc;
+
+	val = spear_pwm_readl(pc, pwm->hwpwm, PWMCR);
+	val |= PWMCR_PWM_ENABLE;
+	spear_pwm_writel(pc, pwm->hwpwm, PWMCR, val);
+
+	return 0;
+}
+
+static void spear_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct spear_pwm_chip *pc = to_spear_pwm_chip(chip);
+	u32 val;
+
+	val = spear_pwm_readl(pc, pwm->hwpwm, PWMCR);
+	val &= ~PWMCR_PWM_ENABLE;
+	spear_pwm_writel(pc, pwm->hwpwm, PWMCR, val);
+
+	clk_disable(pc->clk);
+}
+
+static const struct pwm_ops spear_pwm_ops = {
+	.config = spear_pwm_config,
+	.enable = spear_pwm_enable,
+	.disable = spear_pwm_disable,
+	.owner = THIS_MODULE,
+};
+
+static int spear_pwm_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct spear_pwm_chip *pc;
+	struct resource *r;
+	int ret;
+	u32 val;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "no memory resources defined\n");
+		return -ENODEV;
+	}
+
+	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
+	if (!pc) {
+		dev_err(&pdev->dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	pc->mmio_base = devm_request_and_ioremap(&pdev->dev, r);
+	if (!pc->mmio_base)
+		return -EADDRNOTAVAIL;
+
+	pc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(pc->clk))
+		return PTR_ERR(pc->clk);
+
+	pc->dev = &pdev->dev;
+	platform_set_drvdata(pdev, pc);
+
+	pc->chip.dev = &pdev->dev;
+	pc->chip.ops = &spear_pwm_ops;
+	pc->chip.base = -1;
+	pc->chip.npwm = NUM_PWM;
+
+	ret = clk_prepare(pc->clk);
+	if (!ret)
+		return ret;
+
+	if (of_device_is_compatible(np, "st,spear1340-pwm")) {
+		ret = clk_enable(pc->clk);
+		if (!ret) {
+			clk_unprepare(pc->clk);
+			return ret;
+		}
+		/*
+		 * Following enables PWM chip, channels would still be
+		 * enabled individually through their control register
+		 */
+		val = readl_relaxed(pc->mmio_base + PWMMCR);
+		val |= PWMMCR_PWM_ENABLE;
+		writel_relaxed(val, pc->mmio_base + PWMMCR);
+
+		clk_disable(pc->clk);
+	}
+
+	ret = pwmchip_add(&pc->chip);
+	if (!ret) {
+		clk_unprepare(pc->clk);
+		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
+	}
+
+	return ret;
+}
+
+static int spear_pwm_remove(struct platform_device *pdev)
+{
+	struct spear_pwm_chip *pc = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < NUM_PWM; i++)
+		pwm_disable(&pc->chip.pwms[i]);
+
+	/* clk was prepared in probe, hence unprepare it here */
+	clk_unprepare(pc->clk);
+	return pwmchip_remove(&pc->chip);
+}
+
+static struct of_device_id spear_pwm_of_match[] = {
+	{ .compatible = "st,spear320-pwm" },
+	{ .compatible = "st,spear1340-pwm" },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, spear_pwm_of_match);
+
+static struct platform_driver spear_pwm_driver = {
+	.driver = {
+		.name = "spear-pwm",
+		.of_match_table = spear_pwm_of_match,
+	},
+	.probe = spear_pwm_probe,
+	.remove = spear_pwm_remove,
+};
+
+module_platform_driver(spear_pwm_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Shiraz Hashim <shiraz.hashim@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.com>");
+MODULE_ALIAS("platform:spear-pwm");

diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c
index 87c091b..5cf016d 100644
--- a/drivers/pwm/pwm-tiecap.c
+++ b/drivers/pwm/pwm-tiecap.c

@@ -25,6 +25,10 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+
+#include "pwm-tipwmss.h"
 
 /* ECAP registers and bits definitions */
 #define CAP1			0x08
@@ -184,12 +188,24 @@
 	.owner		= THIS_MODULE,
 };
 
+static const struct of_device_id ecap_of_match[] = {
+	{ .compatible	= "ti,am33xx-ecap" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ecap_of_match);
+
 static int ecap_pwm_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct resource *r;
 	struct clk *clk;
 	struct ecap_pwm_chip *pc;
+	u16 status;
+	struct pinctrl *pinctrl;
+
+	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(pinctrl))
+		dev_warn(&pdev->dev, "unable to select pin group\n");
 
 	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
 	if (!pc) {
@@ -211,6 +227,8 @@
 
 	pc->chip.dev = &pdev->dev;
 	pc->chip.ops = &ecap_pwm_ops;
+	pc->chip.of_xlate = of_pwm_xlate_with_flags;
+	pc->chip.of_pwm_n_cells = 3;
 	pc->chip.base = -1;
 	pc->chip.npwm = 1;
 
@@ -231,14 +249,40 @@
 	}
 
 	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	status = pwmss_submodule_state_change(pdev->dev.parent,
+			PWMSS_ECAPCLK_EN);
+	if (!(status & PWMSS_ECAPCLK_EN_ACK)) {
+		dev_err(&pdev->dev, "PWMSS config space clock enable failed\n");
+		ret = -EINVAL;
+		goto pwmss_clk_failure;
+	}
+
+	pm_runtime_put_sync(&pdev->dev);
+
 	platform_set_drvdata(pdev, pc);
 	return 0;
+
+pwmss_clk_failure:
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	pwmchip_remove(&pc->chip);
+	return ret;
 }
 
 static int ecap_pwm_remove(struct platform_device *pdev)
 {
 	struct ecap_pwm_chip *pc = platform_get_drvdata(pdev);
 
+	pm_runtime_get_sync(&pdev->dev);
+	/*
+	 * Due to hardware misbehaviour, acknowledge of the stop_req
+	 * is missing. Hence checking of the status bit skipped.
+	 */
+	pwmss_submodule_state_change(pdev->dev.parent, PWMSS_ECAPCLK_STOP_REQ);
+	pm_runtime_put_sync(&pdev->dev);
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return pwmchip_remove(&pc->chip);
@@ -246,7 +290,9 @@
 
 static struct platform_driver ecap_pwm_driver = {
 	.driver = {
-		.name = "ecap",
+		.name	= "ecap",
+		.owner	= THIS_MODULE,
+		.of_match_table = ecap_of_match,
 	},
 	.probe = ecap_pwm_probe,
 	.remove = ecap_pwm_remove,

diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index 9ffd389..72a6dd4 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c

@@ -25,6 +25,10 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+
+#include "pwm-tipwmss.h"
 
 /* EHRPWM registers and bits definitions */
 
@@ -115,6 +119,7 @@
 	void __iomem	*mmio_base;
 	unsigned long period_cycles[NUM_PWM_CHANNEL];
 	enum pwm_polarity polarity[NUM_PWM_CHANNEL];
+	struct	clk	*tbclk;
 };
 
 static inline struct ehrpwm_pwm_chip *to_ehrpwm_pwm_chip(struct pwm_chip *chip)
@@ -335,6 +340,9 @@
 	/* Channels polarity can be configured from action qualifier module */
 	configure_polarity(pc, pwm->hwpwm);
 
+	/* Enable TBCLK before enabling PWM device */
+	clk_enable(pc->tbclk);
+
 	/* Enable time counter for free_run */
 	ehrpwm_modify(pc->mmio_base, TBCTL, TBCTL_RUN_MASK, TBCTL_FREE_RUN);
 	return 0;
@@ -363,6 +371,9 @@
 
 	ehrpwm_modify(pc->mmio_base, AQCSFRC, aqcsfrc_mask, aqcsfrc_val);
 
+	/* Disabling TBCLK on PWM disable */
+	clk_disable(pc->tbclk);
+
 	/* Stop Time base counter */
 	ehrpwm_modify(pc->mmio_base, TBCTL, TBCTL_RUN_MASK, TBCTL_STOP_NEXT);
 
@@ -392,12 +403,24 @@
 	.owner		= THIS_MODULE,
 };
 
+static const struct of_device_id ehrpwm_of_match[] = {
+	{ .compatible	= "ti,am33xx-ehrpwm" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ehrpwm_of_match);
+
 static int ehrpwm_pwm_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct resource *r;
 	struct clk *clk;
 	struct ehrpwm_pwm_chip *pc;
+	u16 status;
+	struct pinctrl *pinctrl;
+
+	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(pinctrl))
+		dev_warn(&pdev->dev, "unable to select pin group\n");
 
 	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
 	if (!pc) {
@@ -419,6 +442,8 @@
 
 	pc->chip.dev = &pdev->dev;
 	pc->chip.ops = &ehrpwm_pwm_ops;
+	pc->chip.of_xlate = of_pwm_xlate_with_flags;
+	pc->chip.of_pwm_n_cells = 3;
 	pc->chip.base = -1;
 	pc->chip.npwm = NUM_PWM_CHANNEL;
 
@@ -432,6 +457,13 @@
 	if (!pc->mmio_base)
 		return  -EADDRNOTAVAIL;
 
+	/* Acquire tbclk for Time Base EHRPWM submodule */
+	pc->tbclk = devm_clk_get(&pdev->dev, "tbclk");
+	if (IS_ERR(pc->tbclk)) {
+		dev_err(&pdev->dev, "Failed to get tbclk\n");
+		return PTR_ERR(pc->tbclk);
+	}
+
 	ret = pwmchip_add(&pc->chip);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
@@ -439,14 +471,40 @@
 	}
 
 	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	status = pwmss_submodule_state_change(pdev->dev.parent,
+			PWMSS_EPWMCLK_EN);
+	if (!(status & PWMSS_EPWMCLK_EN_ACK)) {
+		dev_err(&pdev->dev, "PWMSS config space clock enable failed\n");
+		ret = -EINVAL;
+		goto pwmss_clk_failure;
+	}
+
+	pm_runtime_put_sync(&pdev->dev);
+
 	platform_set_drvdata(pdev, pc);
 	return 0;
+
+pwmss_clk_failure:
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	pwmchip_remove(&pc->chip);
+	return ret;
 }
 
 static int ehrpwm_pwm_remove(struct platform_device *pdev)
 {
 	struct ehrpwm_pwm_chip *pc = platform_get_drvdata(pdev);
 
+	pm_runtime_get_sync(&pdev->dev);
+	/*
+	 * Due to hardware misbehaviour, acknowledge of the stop_req
+	 * is missing. Hence checking of the status bit skipped.
+	 */
+	pwmss_submodule_state_change(pdev->dev.parent, PWMSS_EPWMCLK_STOP_REQ);
+	pm_runtime_put_sync(&pdev->dev);
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return pwmchip_remove(&pc->chip);
@@ -454,7 +512,9 @@
 
 static struct platform_driver ehrpwm_pwm_driver = {
 	.driver = {
-		.name = "ehrpwm",
+		.name	= "ehrpwm",
+		.owner	= THIS_MODULE,
+		.of_match_table = ehrpwm_of_match,
 	},
 	.probe = ehrpwm_pwm_probe,
 	.remove = ehrpwm_pwm_remove,

diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/pwm/pwm-tipwmss.c
new file mode 100644
index 0000000..3448a1c
--- /dev/null
+++ b/drivers/pwm/pwm-tipwmss.c

@@ -0,0 +1,139 @@
+/*
+ * TI PWM Subsystem driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/of_device.h>
+
+#include "pwm-tipwmss.h"
+
+#define PWMSS_CLKCONFIG		0x8	/* Clock gating reg */
+#define PWMSS_CLKSTATUS		0xc	/* Clock gating status reg */
+
+struct pwmss_info {
+	void __iomem	*mmio_base;
+	struct mutex	pwmss_lock;
+	u16		pwmss_clkconfig;
+};
+
+u16 pwmss_submodule_state_change(struct device *dev, int set)
+{
+	struct pwmss_info *info = dev_get_drvdata(dev);
+	u16 val;
+
+	mutex_lock(&info->pwmss_lock);
+	val = readw(info->mmio_base + PWMSS_CLKCONFIG);
+	val |= set;
+	writew(val , info->mmio_base + PWMSS_CLKCONFIG);
+	mutex_unlock(&info->pwmss_lock);
+
+	return readw(info->mmio_base + PWMSS_CLKSTATUS);
+}
+EXPORT_SYMBOL(pwmss_submodule_state_change);
+
+static const struct of_device_id pwmss_of_match[] = {
+	{ .compatible	= "ti,am33xx-pwmss" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, pwmss_of_match);
+
+static int pwmss_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct resource *r;
+	struct pwmss_info *info;
+	struct device_node *node = pdev->dev.of_node;
+
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&pdev->dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	mutex_init(&info->pwmss_lock);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "no memory resource defined\n");
+		return -ENODEV;
+	}
+
+	info->mmio_base = devm_request_and_ioremap(&pdev->dev, r);
+	if (!info->mmio_base)
+		return -EADDRNOTAVAIL;
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+	platform_set_drvdata(pdev, info);
+
+	/* Populate all the child nodes here... */
+	ret = of_platform_populate(node, NULL, NULL, &pdev->dev);
+	if (ret)
+		dev_err(&pdev->dev, "no child node found\n");
+
+	return ret;
+}
+
+static int pwmss_remove(struct platform_device *pdev)
+{
+	struct pwmss_info *info = platform_get_drvdata(pdev);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	mutex_destroy(&info->pwmss_lock);
+	return 0;
+}
+
+static int pwmss_suspend(struct device *dev)
+{
+	struct pwmss_info *info = dev_get_drvdata(dev);
+
+	info->pwmss_clkconfig = readw(info->mmio_base + PWMSS_CLKCONFIG);
+	pm_runtime_put_sync(dev);
+	return 0;
+}
+
+static int pwmss_resume(struct device *dev)
+{
+	struct pwmss_info *info = dev_get_drvdata(dev);
+
+	pm_runtime_get_sync(dev);
+	writew(info->pwmss_clkconfig, info->mmio_base + PWMSS_CLKCONFIG);
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(pwmss_pm_ops, pwmss_suspend, pwmss_resume);
+
+static struct platform_driver pwmss_driver = {
+	.driver	= {
+		.name	= "pwmss",
+		.owner	= THIS_MODULE,
+		.pm	= &pwmss_pm_ops,
+		.of_match_table	= pwmss_of_match,
+	},
+	.probe	= pwmss_probe,
+	.remove	= pwmss_remove,
+};
+
+module_platform_driver(pwmss_driver);
+
+MODULE_DESCRIPTION("PWM Subsystem driver");
+MODULE_AUTHOR("Texas Instruments");
+MODULE_LICENSE("GPL");

diff --git a/drivers/pwm/pwm-tipwmss.h b/drivers/pwm/pwm-tipwmss.h
new file mode 100644
index 0000000..11f76a1
--- /dev/null
+++ b/drivers/pwm/pwm-tipwmss.h

@@ -0,0 +1,39 @@
+/*
+ * TI PWM Subsystem driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __TIPWMSS_H
+#define __TIPWMSS_H
+
+#ifdef CONFIG_PWM_TIPWMSS
+/* PWM substem clock gating */
+#define PWMSS_ECAPCLK_EN	BIT(0)
+#define PWMSS_ECAPCLK_STOP_REQ	BIT(1)
+#define PWMSS_EPWMCLK_EN	BIT(8)
+#define PWMSS_EPWMCLK_STOP_REQ	BIT(9)
+
+#define PWMSS_ECAPCLK_EN_ACK	BIT(0)
+#define PWMSS_EPWMCLK_EN_ACK	BIT(8)
+
+extern u16 pwmss_submodule_state_change(struct device *dev, int set);
+#else
+static inline u16 pwmss_submodule_state_change(struct device *dev, int set)
+{
+	/* return success status value */
+	return 0xFFFF;
+}
+#endif
+#endif	/* __TIPWMSS_H */

diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c
new file mode 100644
index 0000000..9dfa0f3
--- /dev/null
+++ b/drivers/pwm/pwm-twl-led.c

@@ -0,0 +1,344 @@
+/*
+ * Driver for TWL4030/6030 Pulse Width Modulator used as LED driver
+ *
+ * Copyright (C) 2012 Texas Instruments
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This driver is a complete rewrite of the former pwm-twl6030.c authorded by:
+ * Hemanth V <hemanthv@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/i2c/twl.h>
+#include <linux/slab.h>
+
+/*
+ * This driver handles the PWM driven LED terminals of TWL4030 and TWL6030.
+ * To generate the signal on TWL4030:
+ *  - LEDA uses PWMA
+ *  - LEDB uses PWMB
+ * TWL6030 has one LED pin with dedicated LEDPWM
+ */
+
+#define TWL4030_LED_MAX		0x7f
+#define TWL6030_LED_MAX		0xff
+
+/* Registers, bits and macro for TWL4030 */
+#define TWL4030_LEDEN_REG	0x00
+#define TWL4030_PWMA_REG	0x01
+
+#define TWL4030_LEDXON		(1 << 0)
+#define TWL4030_LEDXPWM		(1 << 4)
+#define TWL4030_LED_PINS	(TWL4030_LEDXON | TWL4030_LEDXPWM)
+#define TWL4030_LED_TOGGLE(led, x)	((x) << (led))
+
+/* Register, bits and macro for TWL6030 */
+#define TWL6030_LED_PWM_CTRL1	0xf4
+#define TWL6030_LED_PWM_CTRL2	0xf5
+
+#define TWL6040_LED_MODE_HW	0x00
+#define TWL6040_LED_MODE_ON	0x01
+#define TWL6040_LED_MODE_OFF	0x02
+#define TWL6040_LED_MODE_MASK	0x03
+
+struct twl_pwmled_chip {
+	struct pwm_chip chip;
+	struct mutex mutex;
+};
+
+static inline struct twl_pwmled_chip *to_twl(struct pwm_chip *chip)
+{
+	return container_of(chip, struct twl_pwmled_chip, chip);
+}
+
+static int twl4030_pwmled_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			      int duty_ns, int period_ns)
+{
+	int duty_cycle = DIV_ROUND_UP(duty_ns * TWL4030_LED_MAX, period_ns) + 1;
+	u8 pwm_config[2] = { 1, 0 };
+	int base, ret;
+
+	/*
+	 * To configure the duty period:
+	 * On-cycle is set to 1 (the minimum allowed value)
+	 * The off time of 0 is not configurable, so the mapping is:
+	 * 0 -> off cycle = 2,
+	 * 1 -> off cycle = 2,
+	 * 2 -> off cycle = 3,
+	 * 126 - > off cycle 127,
+	 * 127 - > off cycle 1
+	 * When on cycle == off cycle the PWM will be always on
+	 */
+	if (duty_cycle == 1)
+		duty_cycle = 2;
+	else if (duty_cycle > TWL4030_LED_MAX)
+		duty_cycle = 1;
+
+	base = pwm->hwpwm * 2 + TWL4030_PWMA_REG;
+
+	pwm_config[1] = duty_cycle;
+
+	ret = twl_i2c_write(TWL4030_MODULE_LED, pwm_config, base, 2);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to configure PWM\n", pwm->label);
+
+	return ret;
+}
+
+static int twl4030_pwmled_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwmled_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_LED, &val, TWL4030_LEDEN_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read LEDEN\n", pwm->label);
+		goto out;
+	}
+
+	val |= TWL4030_LED_TOGGLE(pwm->hwpwm, TWL4030_LED_PINS);
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_LED, val, TWL4030_LEDEN_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to enable PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+	return ret;
+}
+
+static void twl4030_pwmled_disable(struct pwm_chip *chip,
+				   struct pwm_device *pwm)
+{
+	struct twl_pwmled_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_LED, &val, TWL4030_LEDEN_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read LEDEN\n", pwm->label);
+		goto out;
+	}
+
+	val &= ~TWL4030_LED_TOGGLE(pwm->hwpwm, TWL4030_LED_PINS);
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_LED, val, TWL4030_LEDEN_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+}
+
+static int twl6030_pwmled_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			      int duty_ns, int period_ns)
+{
+	int duty_cycle = (duty_ns * TWL6030_LED_MAX) / period_ns;
+	u8 on_time;
+	int ret;
+
+	on_time = duty_cycle & 0xff;
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, on_time,
+			       TWL6030_LED_PWM_CTRL1);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to configure PWM\n", pwm->label);
+
+	return ret;
+}
+
+static int twl6030_pwmled_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwmled_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL6030_MODULE_ID1, &val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read PWM_CTRL2\n",
+			pwm->label);
+		goto out;
+	}
+
+	val &= ~TWL6040_LED_MODE_MASK;
+	val |= TWL6040_LED_MODE_ON;
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to enable PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+	return ret;
+}
+
+static void twl6030_pwmled_disable(struct pwm_chip *chip,
+				   struct pwm_device *pwm)
+{
+	struct twl_pwmled_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL6030_MODULE_ID1, &val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read PWM_CTRL2\n",
+			pwm->label);
+		goto out;
+	}
+
+	val &= ~TWL6040_LED_MODE_MASK;
+	val |= TWL6040_LED_MODE_OFF;
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+}
+
+static int twl6030_pwmled_request(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwmled_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL6030_MODULE_ID1, &val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read PWM_CTRL2\n",
+			pwm->label);
+		goto out;
+	}
+
+	val &= ~TWL6040_LED_MODE_MASK;
+	val |= TWL6040_LED_MODE_OFF;
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to request PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+	return ret;
+}
+
+static void twl6030_pwmled_free(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwmled_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL6030_MODULE_ID1, &val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read PWM_CTRL2\n",
+			pwm->label);
+		goto out;
+	}
+
+	val &= ~TWL6040_LED_MODE_MASK;
+	val |= TWL6040_LED_MODE_HW;
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_LED_PWM_CTRL2);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to free PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+}
+
+static const struct pwm_ops twl4030_pwmled_ops = {
+	.enable = twl4030_pwmled_enable,
+	.disable = twl4030_pwmled_disable,
+	.config = twl4030_pwmled_config,
+};
+
+static const struct pwm_ops twl6030_pwmled_ops = {
+	.enable = twl6030_pwmled_enable,
+	.disable = twl6030_pwmled_disable,
+	.config = twl6030_pwmled_config,
+	.request = twl6030_pwmled_request,
+	.free = twl6030_pwmled_free,
+};
+
+static int twl_pwmled_probe(struct platform_device *pdev)
+{
+	struct twl_pwmled_chip *twl;
+	int ret;
+
+	twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL);
+	if (!twl)
+		return -ENOMEM;
+
+	if (twl_class_is_4030()) {
+		twl->chip.ops = &twl4030_pwmled_ops;
+		twl->chip.npwm = 2;
+	} else {
+		twl->chip.ops = &twl6030_pwmled_ops;
+		twl->chip.npwm = 1;
+	}
+
+	twl->chip.dev = &pdev->dev;
+	twl->chip.base = -1;
+
+	mutex_init(&twl->mutex);
+
+	ret = pwmchip_add(&twl->chip);
+	if (ret < 0)
+		return ret;
+
+	platform_set_drvdata(pdev, twl);
+
+	return 0;
+}
+
+static int twl_pwmled_remove(struct platform_device *pdev)
+{
+	struct twl_pwmled_chip *twl = platform_get_drvdata(pdev);
+
+	return pwmchip_remove(&twl->chip);
+}
+
+#ifdef CONFIG_OF
+static struct of_device_id twl_pwmled_of_match[] = {
+	{ .compatible = "ti,twl4030-pwmled" },
+	{ .compatible = "ti,twl6030-pwmled" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, twl_pwmled_of_match);
+#endif
+
+static struct platform_driver twl_pwmled_driver = {
+	.driver = {
+		.name = "twl-pwmled",
+		.of_match_table = of_match_ptr(twl_pwmled_of_match),
+	},
+	.probe = twl_pwmled_probe,
+	.remove = twl_pwmled_remove,
+};
+module_platform_driver(twl_pwmled_driver);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
+MODULE_DESCRIPTION("PWM driver for TWL4030 and TWL6030 LED outputs");
+MODULE_ALIAS("platform:twl-pwmled");
+MODULE_LICENSE("GPL");

diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
new file mode 100644
index 0000000..e65db95
--- /dev/null
+++ b/drivers/pwm/pwm-twl.c

@@ -0,0 +1,359 @@
+/*
+ * Driver for TWL4030/6030 Generic Pulse Width Modulator
+ *
+ * Copyright (C) 2012 Texas Instruments
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/i2c/twl.h>
+#include <linux/slab.h>
+
+/*
+ * This driver handles the PWMs of TWL4030 and TWL6030.
+ * The TRM names for the PWMs on TWL4030 are: PWM0, PWM1
+ * TWL6030 also have two PWMs named in the TRM as PWM1, PWM2
+ */
+
+#define TWL_PWM_MAX		0x7f
+
+/* Registers, bits and macro for TWL4030 */
+#define TWL4030_GPBR1_REG	0x0c
+#define TWL4030_PMBR1_REG	0x0d
+
+/* GPBR1 register bits */
+#define TWL4030_PWMXCLK_ENABLE	(1 << 0)
+#define TWL4030_PWMX_ENABLE	(1 << 2)
+#define TWL4030_PWMX_BITS	(TWL4030_PWMX_ENABLE | TWL4030_PWMXCLK_ENABLE)
+#define TWL4030_PWM_TOGGLE(pwm, x)	((x) << (pwm))
+
+/* PMBR1 register bits */
+#define TWL4030_GPIO6_PWM0_MUTE_MASK		(0x03 << 2)
+#define TWL4030_GPIO6_PWM0_MUTE_PWM0		(0x01 << 2)
+#define TWL4030_GPIO7_VIBRASYNC_PWM1_MASK	(0x03 << 4)
+#define TWL4030_GPIO7_VIBRASYNC_PWM1_PWM1	(0x03 << 4)
+
+/* Register, bits and macro for TWL6030 */
+#define TWL6030_TOGGLE3_REG	0x92
+
+#define TWL6030_PWMXR		(1 << 0)
+#define TWL6030_PWMXS		(1 << 1)
+#define TWL6030_PWMXEN		(1 << 2)
+#define TWL6030_PWM_TOGGLE(pwm, x)	((x) << (pwm * 3))
+
+struct twl_pwm_chip {
+	struct pwm_chip chip;
+	struct mutex mutex;
+	u8 twl6030_toggle3;
+	u8 twl4030_pwm_mux;
+};
+
+static inline struct twl_pwm_chip *to_twl(struct pwm_chip *chip)
+{
+	return container_of(chip, struct twl_pwm_chip, chip);
+}
+
+static int twl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			      int duty_ns, int period_ns)
+{
+	int duty_cycle = DIV_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1;
+	u8 pwm_config[2] = { 1, 0 };
+	int base, ret;
+
+	/*
+	 * To configure the duty period:
+	 * On-cycle is set to 1 (the minimum allowed value)
+	 * The off time of 0 is not configurable, so the mapping is:
+	 * 0 -> off cycle = 2,
+	 * 1 -> off cycle = 2,
+	 * 2 -> off cycle = 3,
+	 * 126 - > off cycle 127,
+	 * 127 - > off cycle 1
+	 * When on cycle == off cycle the PWM will be always on
+	 */
+	if (duty_cycle == 1)
+		duty_cycle = 2;
+	else if (duty_cycle > TWL_PWM_MAX)
+		duty_cycle = 1;
+
+	base = pwm->hwpwm * 3;
+
+	pwm_config[1] = duty_cycle;
+
+	ret = twl_i2c_write(TWL_MODULE_PWM, pwm_config, base, 2);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to configure PWM\n", pwm->label);
+
+	return ret;
+}
+
+static int twl4030_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwm_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_INTBR, &val, TWL4030_GPBR1_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read GPBR1\n", pwm->label);
+		goto out;
+	}
+
+	val |= TWL4030_PWM_TOGGLE(pwm->hwpwm, TWL4030_PWMXCLK_ENABLE);
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_INTBR, val, TWL4030_GPBR1_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to enable PWM\n", pwm->label);
+
+	val |= TWL4030_PWM_TOGGLE(pwm->hwpwm, TWL4030_PWMX_ENABLE);
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_INTBR, val, TWL4030_GPBR1_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to enable PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+	return ret;
+}
+
+static void twl4030_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwm_chip *twl = to_twl(chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_INTBR, &val, TWL4030_GPBR1_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read GPBR1\n", pwm->label);
+		goto out;
+	}
+
+	val &= ~TWL4030_PWM_TOGGLE(pwm->hwpwm, TWL4030_PWMX_ENABLE);
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_INTBR, val, TWL4030_GPBR1_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label);
+
+	val &= ~TWL4030_PWM_TOGGLE(pwm->hwpwm, TWL4030_PWMXCLK_ENABLE);
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_INTBR, val, TWL4030_GPBR1_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+}
+
+static int twl4030_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwm_chip *twl = to_twl(chip);
+	int ret;
+	u8 val, mask, bits;
+
+	if (pwm->hwpwm == 1) {
+		mask = TWL4030_GPIO7_VIBRASYNC_PWM1_MASK;
+		bits = TWL4030_GPIO7_VIBRASYNC_PWM1_PWM1;
+	} else {
+		mask = TWL4030_GPIO6_PWM0_MUTE_MASK;
+		bits = TWL4030_GPIO6_PWM0_MUTE_PWM0;
+	}
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_INTBR, &val, TWL4030_PMBR1_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read PMBR1\n", pwm->label);
+		goto out;
+	}
+
+	/* Save the current MUX configuration for the PWM */
+	twl->twl4030_pwm_mux &= ~mask;
+	twl->twl4030_pwm_mux |= (val & mask);
+
+	/* Select PWM functionality */
+	val &= ~mask;
+	val |= bits;
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_INTBR, val, TWL4030_PMBR1_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to request PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+	return ret;
+}
+
+static void twl4030_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwm_chip *twl = container_of(chip, struct twl_pwm_chip,
+						chip);
+	int ret;
+	u8 val, mask;
+
+	if (pwm->hwpwm == 1)
+		mask = TWL4030_GPIO7_VIBRASYNC_PWM1_MASK;
+	else
+		mask = TWL4030_GPIO6_PWM0_MUTE_MASK;
+
+	mutex_lock(&twl->mutex);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_INTBR, &val, TWL4030_PMBR1_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read PMBR1\n", pwm->label);
+		goto out;
+	}
+
+	/* Restore the MUX configuration for the PWM */
+	val &= ~mask;
+	val |= (twl->twl4030_pwm_mux & mask);
+
+	ret = twl_i2c_write_u8(TWL4030_MODULE_INTBR, val, TWL4030_PMBR1_REG);
+	if (ret < 0)
+		dev_err(chip->dev, "%s: Failed to free PWM\n", pwm->label);
+
+out:
+	mutex_unlock(&twl->mutex);
+}
+
+static int twl6030_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwm_chip *twl = container_of(chip, struct twl_pwm_chip,
+						chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	val = twl->twl6030_toggle3;
+	val |= TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXS | TWL6030_PWMXEN);
+	val &= ~TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXR);
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_TOGGLE3_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to enable PWM\n", pwm->label);
+		goto out;
+	}
+
+	twl->twl6030_toggle3 = val;
+out:
+	mutex_unlock(&twl->mutex);
+	return 0;
+}
+
+static void twl6030_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct twl_pwm_chip *twl = container_of(chip, struct twl_pwm_chip,
+						chip);
+	int ret;
+	u8 val;
+
+	mutex_lock(&twl->mutex);
+	val = twl->twl6030_toggle3;
+	val |= TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXR);
+	val &= ~TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXS | TWL6030_PWMXEN);
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_TOGGLE3_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to read TOGGLE3\n", pwm->label);
+		goto out;
+	}
+
+	val |= TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXS | TWL6030_PWMXEN);
+
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_TOGGLE3_REG);
+	if (ret < 0) {
+		dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label);
+		goto out;
+	}
+
+	twl->twl6030_toggle3 = val;
+out:
+	mutex_unlock(&twl->mutex);
+}
+
+static const struct pwm_ops twl4030_pwm_ops = {
+	.config = twl_pwm_config,
+	.enable = twl4030_pwm_enable,
+	.disable = twl4030_pwm_disable,
+	.request = twl4030_pwm_request,
+	.free = twl4030_pwm_free,
+};
+
+static const struct pwm_ops twl6030_pwm_ops = {
+	.config = twl_pwm_config,
+	.enable = twl6030_pwm_enable,
+	.disable = twl6030_pwm_disable,
+};
+
+static int twl_pwm_probe(struct platform_device *pdev)
+{
+	struct twl_pwm_chip *twl;
+	int ret;
+
+	twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL);
+	if (!twl)
+		return -ENOMEM;
+
+	if (twl_class_is_4030())
+		twl->chip.ops = &twl4030_pwm_ops;
+	else
+		twl->chip.ops = &twl6030_pwm_ops;
+
+	twl->chip.dev = &pdev->dev;
+	twl->chip.base = -1;
+	twl->chip.npwm = 2;
+
+	mutex_init(&twl->mutex);
+
+	ret = pwmchip_add(&twl->chip);
+	if (ret < 0)
+		return ret;
+
+	platform_set_drvdata(pdev, twl);
+
+	return 0;
+}
+
+static int twl_pwm_remove(struct platform_device *pdev)
+{
+	struct twl_pwm_chip *twl = platform_get_drvdata(pdev);
+
+	return pwmchip_remove(&twl->chip);
+}
+
+#ifdef CONFIG_OF
+static struct of_device_id twl_pwm_of_match[] = {
+	{ .compatible = "ti,twl4030-pwm" },
+	{ .compatible = "ti,twl6030-pwm" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, twl_pwm_of_match);
+#endif
+
+static struct platform_driver twl_pwm_driver = {
+	.driver = {
+		.name = "twl-pwm",
+		.of_match_table = of_match_ptr(twl_pwm_of_match),
+	},
+	.probe = twl_pwm_probe,
+	.remove = twl_pwm_remove,
+};
+module_platform_driver(twl_pwm_driver);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
+MODULE_DESCRIPTION("PWM driver for TWL4030 and TWL6030");
+MODULE_ALIAS("platform:twl-pwm");
+MODULE_LICENSE("GPL");

diff --git a/drivers/pwm/pwm-twl6030.c b/drivers/pwm/pwm-twl6030.c
deleted file mode 100644
index 378a7e2..0000000
--- a/drivers/pwm/pwm-twl6030.c
+++ /dev/null

@@ -1,184 +0,0 @@
-/*
- * twl6030_pwm.c
- * Driver for PHOENIX (TWL6030) Pulse Width Modulator
- *
- * Copyright (C) 2010 Texas Instruments
- * Author: Hemanth V <hemanthv@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/pwm.h>
-#include <linux/i2c/twl.h>
-#include <linux/slab.h>
-
-#define LED_PWM_CTRL1	0xF4
-#define LED_PWM_CTRL2	0xF5
-
-/* Max value for CTRL1 register */
-#define PWM_CTRL1_MAX	255
-
-/* Pull down disable */
-#define PWM_CTRL2_DIS_PD	(1 << 6)
-
-/* Current control 2.5 milli Amps */
-#define PWM_CTRL2_CURR_02	(2 << 4)
-
-/* LED supply source */
-#define PWM_CTRL2_SRC_VAC	(1 << 2)
-
-/* LED modes */
-#define PWM_CTRL2_MODE_HW	(0 << 0)
-#define PWM_CTRL2_MODE_SW	(1 << 0)
-#define PWM_CTRL2_MODE_DIS	(2 << 0)
-
-#define PWM_CTRL2_MODE_MASK	0x3
-
-struct twl6030_pwm_chip {
-	struct pwm_chip chip;
-};
-
-static int twl6030_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	int ret;
-	u8 val;
-
-	/* Configure PWM */
-	val = PWM_CTRL2_DIS_PD | PWM_CTRL2_CURR_02 | PWM_CTRL2_SRC_VAC |
-	      PWM_CTRL2_MODE_HW;
-
-	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, LED_PWM_CTRL2);
-	if (ret < 0) {
-		dev_err(chip->dev, "%s: Failed to configure PWM, Error %d\n",
-			pwm->label, ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int twl6030_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			      int duty_ns, int period_ns)
-{
-	u8 duty_cycle = (duty_ns * PWM_CTRL1_MAX) / period_ns;
-	int ret;
-
-	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, duty_cycle, LED_PWM_CTRL1);
-	if (ret < 0) {
-		pr_err("%s: Failed to configure PWM, Error %d\n",
-			pwm->label, ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int twl6030_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	int ret;
-	u8 val;
-
-	ret = twl_i2c_read_u8(TWL6030_MODULE_ID1, &val, LED_PWM_CTRL2);
-	if (ret < 0) {
-		dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n",
-			pwm->label, ret);
-		return ret;
-	}
-
-	/* Change mode to software control */
-	val &= ~PWM_CTRL2_MODE_MASK;
-	val |= PWM_CTRL2_MODE_SW;
-
-	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, LED_PWM_CTRL2);
-	if (ret < 0) {
-		dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n",
-			pwm->label, ret);
-		return ret;
-	}
-
-	twl_i2c_read_u8(TWL6030_MODULE_ID1, &val, LED_PWM_CTRL2);
-	return 0;
-}
-
-static void twl6030_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	int ret;
-	u8 val;
-
-	ret = twl_i2c_read_u8(TWL6030_MODULE_ID1, &val, LED_PWM_CTRL2);
-	if (ret < 0) {
-		dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
-			pwm->label, ret);
-		return;
-	}
-
-	val &= ~PWM_CTRL2_MODE_MASK;
-	val |= PWM_CTRL2_MODE_HW;
-
-	ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, LED_PWM_CTRL2);
-	if (ret < 0) {
-		dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
-			pwm->label, ret);
-	}
-}
-
-static const struct pwm_ops twl6030_pwm_ops = {
-	.request = twl6030_pwm_request,
-	.config = twl6030_pwm_config,
-	.enable = twl6030_pwm_enable,
-	.disable = twl6030_pwm_disable,
-};
-
-static int twl6030_pwm_probe(struct platform_device *pdev)
-{
-	struct twl6030_pwm_chip *twl6030;
-	int ret;
-
-	twl6030 = devm_kzalloc(&pdev->dev, sizeof(*twl6030), GFP_KERNEL);
-	if (!twl6030)
-		return -ENOMEM;
-
-	twl6030->chip.dev = &pdev->dev;
-	twl6030->chip.ops = &twl6030_pwm_ops;
-	twl6030->chip.base = -1;
-	twl6030->chip.npwm = 1;
-
-	ret = pwmchip_add(&twl6030->chip);
-	if (ret < 0)
-		return ret;
-
-	platform_set_drvdata(pdev, twl6030);
-
-	return 0;
-}
-
-static int twl6030_pwm_remove(struct platform_device *pdev)
-{
-	struct twl6030_pwm_chip *twl6030 = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&twl6030->chip);
-}
-
-static struct platform_driver twl6030_pwm_driver = {
-	.driver = {
-		.name = "twl6030-pwm",
-	},
-	.probe = twl6030_pwm_probe,
-	.remove = twl6030_pwm_remove,
-};
-module_platform_driver(twl6030_pwm_driver);
-
-MODULE_ALIAS("platform:twl6030-pwm");
-MODULE_LICENSE("GPL");

diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index ad14389..b0ba2d4 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c

@@ -1,7 +1,8 @@
 /*
  * drivers/pwm/pwm-vt8500.c
  *
- *  Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
+ * Copyright (C) 2012 Tony Prisk <linux@prisktech.co.nz>
+ * Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
  *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -21,14 +22,24 @@
 #include <linux/io.h>
 #include <linux/pwm.h>
 #include <linux/delay.h>
+#include <linux/clk.h>
 
 #include <asm/div64.h>
 
-#define VT8500_NR_PWMS 4
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+
+/*
+ * SoC architecture allocates register space for 4 PWMs but only
+ * 2 are currently implemented.
+ */
+#define VT8500_NR_PWMS	2
 
 struct vt8500_chip {
 	struct pwm_chip chip;
 	void __iomem *base;
+	struct clk *clk;
 };
 
 #define to_vt8500_chip(chip)	container_of(chip, struct vt8500_chip, chip)
@@ -51,8 +62,15 @@
 	struct vt8500_chip *vt8500 = to_vt8500_chip(chip);
 	unsigned long long c;
 	unsigned long period_cycles, prescale, pv, dc;
+	int err;
 
-	c = 25000000/2; /* wild guess --- need to implement clocks */
+	err = clk_enable(vt8500->clk);
+	if (err < 0) {
+		dev_err(chip->dev, "failed to enable clock\n");
+		return err;
+	}
+
+	c = clk_get_rate(vt8500->clk);
 	c = c * period_ns;
 	do_div(c, 1000000000);
 	period_cycles = c;
@@ -64,8 +82,10 @@
 	if (pv > 4095)
 		pv = 4095;
 
-	if (prescale > 1023)
+	if (prescale > 1023) {
+		clk_disable(vt8500->clk);
 		return -EINVAL;
+	}
 
 	c = (unsigned long long)pv * duty_ns;
 	do_div(c, period_ns);
@@ -80,13 +100,21 @@
 	pwm_busy_wait(vt8500->base + 0x40 + pwm->hwpwm, (1 << 3));
 	writel(dc, vt8500->base + 0xc + (pwm->hwpwm << 4));
 
+	clk_disable(vt8500->clk);
 	return 0;
 }
 
 static int vt8500_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
+	int err;
 	struct vt8500_chip *vt8500 = to_vt8500_chip(chip);
 
+	err = clk_enable(vt8500->clk);
+	if (err < 0) {
+		dev_err(chip->dev, "failed to enable clock\n");
+		return err;
+	}
+
 	pwm_busy_wait(vt8500->base + 0x40 + pwm->hwpwm, (1 << 0));
 	writel(5, vt8500->base + (pwm->hwpwm << 4));
 	return 0;
@@ -98,6 +126,8 @@
 
 	pwm_busy_wait(vt8500->base + 0x40 + pwm->hwpwm, (1 << 0));
 	writel(0, vt8500->base + (pwm->hwpwm << 4));
+
+	clk_disable(vt8500->clk);
 }
 
 static struct pwm_ops vt8500_pwm_ops = {
@@ -107,12 +137,24 @@
 	.owner = THIS_MODULE,
 };
 
-static int __devinit pwm_probe(struct platform_device *pdev)
+static const struct of_device_id vt8500_pwm_dt_ids[] = {
+	{ .compatible = "via,vt8500-pwm", },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, vt8500_pwm_dt_ids);
+
+static int vt8500_pwm_probe(struct platform_device *pdev)
 {
 	struct vt8500_chip *chip;
 	struct resource *r;
+	struct device_node *np = pdev->dev.of_node;
 	int ret;
 
+	if (!np) {
+		dev_err(&pdev->dev, "invalid devicetree node\n");
+		return -EINVAL;
+	}
+
 	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
 	if (chip == NULL) {
 		dev_err(&pdev->dev, "failed to allocate memory\n");
@@ -124,6 +166,12 @@
 	chip->chip.base = -1;
 	chip->chip.npwm = VT8500_NR_PWMS;
 
+	chip->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(chip->clk)) {
+		dev_err(&pdev->dev, "clock source not specified\n");
+		return PTR_ERR(chip->clk);
+	}
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (r == NULL) {
 		dev_err(&pdev->dev, "no memory resource defined\n");
@@ -131,18 +179,26 @@
 	}
 
 	chip->base = devm_request_and_ioremap(&pdev->dev, r);
-	if (chip->base == NULL)
+	if (!chip->base)
 		return -EADDRNOTAVAIL;
 
-	ret = pwmchip_add(&chip->chip);
-	if (ret < 0)
+	ret = clk_prepare(chip->clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to prepare clock\n");
 		return ret;
+	}
+
+	ret = pwmchip_add(&chip->chip);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to add PWM chip\n");
+		return ret;
+	}
 
 	platform_set_drvdata(pdev, chip);
 	return ret;
 }
 
-static int __devexit pwm_remove(struct platform_device *pdev)
+static int vt8500_pwm_remove(struct platform_device *pdev)
 {
 	struct vt8500_chip *chip;
 
@@ -150,28 +206,22 @@
 	if (chip == NULL)
 		return -ENODEV;
 
+	clk_unprepare(chip->clk);
+
 	return pwmchip_remove(&chip->chip);
 }
 
-static struct platform_driver pwm_driver = {
+static struct platform_driver vt8500_pwm_driver = {
+	.probe		= vt8500_pwm_probe,
+	.remove		= vt8500_pwm_remove,
 	.driver		= {
 		.name	= "vt8500-pwm",
 		.owner	= THIS_MODULE,
+		.of_match_table = vt8500_pwm_dt_ids,
 	},
-	.probe		= pwm_probe,
-	.remove		= __devexit_p(pwm_remove),
 };
+module_platform_driver(vt8500_pwm_driver);
 
-static int __init pwm_init(void)
-{
-	return platform_driver_register(&pwm_driver);
-}
-arch_initcall(pwm_init);
-
-static void __exit pwm_exit(void)
-{
-	platform_driver_unregister(&pwm_driver);
-}
-module_exit(pwm_exit);
-
-MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VT8500 PWM Driver");
+MODULE_AUTHOR("Tony Prisk <linux@prisktech.co.nz>");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 1859f71..027096f 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c

@@ -764,7 +764,7 @@
 
 	/* add message to the remote processor's virtqueue */
 	err = virtqueue_add_buf(vrp->svq, &sg, 1, 0, msg, GFP_KERNEL);
-	if (err < 0) {
+	if (err) {
 		/*
 		 * need to reclaim the buffer here, otherwise it's lost
 		 * (memory won't leak, but rpmsg won't use it again for TX).
@@ -776,8 +776,6 @@
 
 	/* tell the remote processor it has a pending message to read */
 	virtqueue_kick(vrp->svq);
-
-	err = 0;
 out:
 	mutex_unlock(&vrp->tx_lock);
 	return err;
@@ -980,7 +978,7 @@
 
 		err = virtqueue_add_buf(vrp->rvq, &sg, 0, 1, cpu_addr,
 								GFP_KERNEL);
-		WARN_ON(err < 0); /* sanity check; this can't really happen */
+		WARN_ON(err); /* sanity check; this can't really happen */
 	}
 
 	/* suppress "tx-complete" interrupts */

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 19c03ab..d0cea02 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig

@@ -269,6 +269,15 @@
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-x1205.
 
+config RTC_DRV_PCF8523
+	tristate "NXP PCF8523"
+	help
+	  If you say yes here you get support for the NXP PCF8523 RTC
+	  chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-pcf8523.
+
 config RTC_DRV_PCF8563
 	tristate "Philips PCF8563/Epson RTC8564"
 	help
@@ -600,6 +609,16 @@
 	  Say y here to support the RTC driver for Dialog Semiconductor
 	  DA9052-BC and DA9053-AA/Bx PMICs.
 
+config RTC_DRV_DA9055
+	tristate "Dialog Semiconductor DA9055 RTC"
+	depends on MFD_DA9055
+	help
+	  If you say yes here you will get support for the
+	  RTC of the Dialog DA9055 PMIC.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-da9055
+
 config RTC_DRV_EFI
 	tristate "EFI RTC"
 	depends on IA64
@@ -768,7 +787,7 @@
 
 config RTC_DRV_IMXDI
 	tristate "Freescale IMX DryIce Real Time Clock"
-	depends on SOC_IMX25
+	depends on ARCH_MXC
 	help
 	   Support for Freescale IMX DryIce RTC
 
@@ -777,11 +796,13 @@
 
 config RTC_DRV_OMAP
 	tristate "TI OMAP1"
-	depends on ARCH_OMAP15XX || ARCH_OMAP16XX || ARCH_OMAP730 || ARCH_DAVINCI_DA8XX
+	depends on ARCH_OMAP15XX || ARCH_OMAP16XX || ARCH_OMAP730 || ARCH_DAVINCI_DA8XX || SOC_AM33XX
 	help
-	  Say "yes" here to support the real time clock on TI OMAP1 and
-	  DA8xx/OMAP-L13x chips.  This driver can also be built as a
-	  module called rtc-omap.
+	  Say "yes" here to support the on chip real time clock
+	  present on TI OMAP1, AM33xx and DA8xx/OMAP-L13x.
+
+	  This driver can also be built as a module, if so, module
+	  will be called rtc-omap.
 
 config HAVE_S3C_RTC
 	bool

diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 56297f0..c3f62c8 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile

@@ -29,6 +29,7 @@
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
 obj-$(CONFIG_RTC_DRV_DA9052)	+= rtc-da9052.o
+obj-$(CONFIG_RTC_DRV_DA9055)	+= rtc-da9055.o
 obj-$(CONFIG_RTC_DRV_DAVINCI)	+= rtc-davinci.o
 obj-$(CONFIG_RTC_DRV_DM355EVM)	+= rtc-dm355evm.o
 obj-$(CONFIG_RTC_DRV_VRTC)	+= rtc-mrst.o
@@ -76,6 +77,7 @@
 obj-$(CONFIG_RTC_DRV_NUC900)	+= rtc-nuc900.o
 obj-$(CONFIG_RTC_DRV_OMAP)	+= rtc-omap.o
 obj-$(CONFIG_RTC_DRV_PCAP)	+= rtc-pcap.o
+obj-$(CONFIG_RTC_DRV_PCF8523)	+= rtc-pcf8523.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PCF2123)	+= rtc-pcf2123.o

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index f8a0aab..5143629 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c

@@ -244,7 +244,6 @@
 		rtc_proc_del_device(rtc);
 		device_unregister(&rtc->dev);
 		rtc->ops = NULL;
-		ida_simple_remove(&rtc_ida, rtc->id);
 		mutex_unlock(&rtc->ops_lock);
 		put_device(&rtc->dev);
 	}

diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c
new file mode 100644
index 0000000..96bafc5
--- /dev/null
+++ b/drivers/rtc/rtc-da9055.c

@@ -0,0 +1,413 @@
+/*
+ * Real time clock driver for DA9055
+ *
+ * Copyright(c) 2012 Dialog Semiconductor Ltd.
+ *
+ * Author: Dajun Dajun Chen <dajun.chen@diasemi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#include <linux/mfd/da9055/core.h>
+#include <linux/mfd/da9055/reg.h>
+#include <linux/mfd/da9055/pdata.h>
+
+struct da9055_rtc {
+	struct rtc_device *rtc;
+	struct da9055 *da9055;
+	int alarm_enable;
+};
+
+static int da9055_rtc_enable_alarm(struct da9055_rtc *rtc, bool enable)
+{
+	int ret;
+	if (enable) {
+		ret = da9055_reg_update(rtc->da9055, DA9055_REG_ALARM_Y,
+					DA9055_RTC_ALM_EN,
+					DA9055_RTC_ALM_EN);
+		if (ret != 0)
+			dev_err(rtc->da9055->dev, "Failed to enable ALM: %d\n",
+				ret);
+		rtc->alarm_enable = 1;
+	} else {
+		ret = da9055_reg_update(rtc->da9055, DA9055_REG_ALARM_Y,
+					DA9055_RTC_ALM_EN, 0);
+		if (ret != 0)
+			dev_err(rtc->da9055->dev,
+				"Failed to disable ALM: %d\n", ret);
+		rtc->alarm_enable = 0;
+	}
+	return ret;
+}
+
+static irqreturn_t da9055_rtc_alm_irq(int irq, void *data)
+{
+	struct da9055_rtc *rtc = data;
+
+	da9055_rtc_enable_alarm(rtc, 0);
+	rtc_update_irq(rtc->rtc, 1, RTC_IRQF | RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static int da9055_read_alarm(struct da9055 *da9055, struct rtc_time *rtc_tm)
+{
+	int ret;
+	uint8_t v[5];
+
+	ret = da9055_group_read(da9055, DA9055_REG_ALARM_MI, 5, v);
+	if (ret != 0) {
+		dev_err(da9055->dev, "Failed to group read ALM: %d\n", ret);
+		return ret;
+	}
+
+	rtc_tm->tm_year = (v[4] & DA9055_RTC_ALM_YEAR) + 100;
+	rtc_tm->tm_mon  = (v[3] & DA9055_RTC_ALM_MONTH) - 1;
+	rtc_tm->tm_mday = v[2] & DA9055_RTC_ALM_DAY;
+	rtc_tm->tm_hour = v[1] & DA9055_RTC_ALM_HOUR;
+	rtc_tm->tm_min  = v[0] & DA9055_RTC_ALM_MIN;
+
+	return rtc_valid_tm(rtc_tm);
+}
+
+static int da9055_set_alarm(struct da9055 *da9055, struct rtc_time *rtc_tm)
+{
+	int ret;
+	uint8_t v[2];
+
+	rtc_tm->tm_year -= 100;
+	rtc_tm->tm_mon += 1;
+
+	ret = da9055_reg_update(da9055, DA9055_REG_ALARM_MI,
+				DA9055_RTC_ALM_MIN, rtc_tm->tm_min);
+	if (ret != 0) {
+		dev_err(da9055->dev, "Failed to write ALRM MIN: %d\n", ret);
+		return ret;
+	}
+
+	v[0] = rtc_tm->tm_hour;
+	v[1] = rtc_tm->tm_mday;
+
+	ret = da9055_group_write(da9055, DA9055_REG_ALARM_H, 2, v);
+	if (ret < 0)
+		return ret;
+
+	ret = da9055_reg_update(da9055, DA9055_REG_ALARM_MO,
+				DA9055_RTC_ALM_MONTH, rtc_tm->tm_mon);
+	if (ret < 0)
+		dev_err(da9055->dev, "Failed to write ALM Month:%d\n", ret);
+
+	ret = da9055_reg_update(da9055, DA9055_REG_ALARM_Y,
+				DA9055_RTC_ALM_YEAR, rtc_tm->tm_year);
+	if (ret < 0)
+		dev_err(da9055->dev, "Failed to write ALM Year:%d\n", ret);
+
+	return ret;
+}
+
+static int da9055_rtc_get_alarm_status(struct da9055 *da9055)
+{
+	int ret;
+
+	ret = da9055_reg_read(da9055, DA9055_REG_ALARM_Y);
+	if (ret < 0) {
+		dev_err(da9055->dev, "Failed to read ALM: %d\n", ret);
+		return ret;
+	}
+	ret &= DA9055_RTC_ALM_EN;
+	return (ret > 0) ? 1 : 0;
+}
+
+static int da9055_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
+{
+	struct da9055_rtc *rtc = dev_get_drvdata(dev);
+	uint8_t v[6];
+	int ret;
+
+	ret = da9055_reg_read(rtc->da9055, DA9055_REG_COUNT_S);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Registers are only valid when RTC_READ
+	 * status bit is asserted
+	 */
+	if (!(ret & DA9055_RTC_READ))
+		return -EBUSY;
+
+	ret = da9055_group_read(rtc->da9055, DA9055_REG_COUNT_S, 6, v);
+	if (ret < 0) {
+		dev_err(rtc->da9055->dev, "Failed to read RTC time : %d\n",
+			ret);
+		return ret;
+	}
+
+	rtc_tm->tm_year = (v[5] & DA9055_RTC_YEAR) + 100;
+	rtc_tm->tm_mon  = (v[4] & DA9055_RTC_MONTH) - 1;
+	rtc_tm->tm_mday = v[3] & DA9055_RTC_DAY;
+	rtc_tm->tm_hour = v[2] & DA9055_RTC_HOUR;
+	rtc_tm->tm_min  = v[1] & DA9055_RTC_MIN;
+	rtc_tm->tm_sec  = v[0] & DA9055_RTC_SEC;
+
+	return rtc_valid_tm(rtc_tm);
+}
+
+static int da9055_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct da9055_rtc *rtc;
+	uint8_t v[6];
+
+	rtc = dev_get_drvdata(dev);
+
+	v[0] = tm->tm_sec;
+	v[1] = tm->tm_min;
+	v[2] = tm->tm_hour;
+	v[3] = tm->tm_mday;
+	v[4] = tm->tm_mon + 1;
+	v[5] = tm->tm_year - 100;
+
+	return da9055_group_write(rtc->da9055, DA9055_REG_COUNT_S, 6, v);
+}
+
+static int da9055_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	int ret;
+	struct rtc_time *tm = &alrm->time;
+	struct da9055_rtc *rtc = dev_get_drvdata(dev);
+
+	ret = da9055_read_alarm(rtc->da9055, tm);
+
+	if (ret)
+		return ret;
+
+	alrm->enabled = da9055_rtc_get_alarm_status(rtc->da9055);
+
+	return 0;
+}
+
+static int da9055_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	int ret;
+	struct rtc_time *tm = &alrm->time;
+	struct da9055_rtc *rtc = dev_get_drvdata(dev);
+
+	ret = da9055_rtc_enable_alarm(rtc, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = da9055_set_alarm(rtc->da9055, tm);
+	if (ret)
+		return ret;
+
+	ret = da9055_rtc_enable_alarm(rtc, 1);
+
+	return ret;
+}
+
+static int da9055_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct da9055_rtc *rtc = dev_get_drvdata(dev);
+
+	return da9055_rtc_enable_alarm(rtc, enabled);
+}
+
+static const struct rtc_class_ops da9055_rtc_ops = {
+	.read_time	= da9055_rtc_read_time,
+	.set_time	= da9055_rtc_set_time,
+	.read_alarm	= da9055_rtc_read_alarm,
+	.set_alarm	= da9055_rtc_set_alarm,
+	.alarm_irq_enable = da9055_rtc_alarm_irq_enable,
+};
+
+static int __init da9055_rtc_device_init(struct da9055 *da9055,
+					struct da9055_pdata *pdata)
+{
+	int ret;
+
+	/* Enable RTC and the internal Crystal */
+	ret = da9055_reg_update(da9055, DA9055_REG_CONTROL_B,
+				DA9055_RTC_EN, DA9055_RTC_EN);
+	if (ret < 0)
+		return ret;
+	ret = da9055_reg_update(da9055, DA9055_REG_EN_32K,
+				DA9055_CRYSTAL_EN, DA9055_CRYSTAL_EN);
+	if (ret < 0)
+		return ret;
+
+	/* Enable RTC in Power Down mode */
+	ret = da9055_reg_update(da9055, DA9055_REG_CONTROL_B,
+				DA9055_RTC_MODE_PD, DA9055_RTC_MODE_PD);
+	if (ret < 0)
+		return ret;
+
+	/* Enable RTC in Reset mode */
+	if (pdata && pdata->reset_enable) {
+		ret = da9055_reg_update(da9055, DA9055_REG_CONTROL_B,
+					DA9055_RTC_MODE_SD,
+					DA9055_RTC_MODE_SD <<
+					DA9055_RTC_MODE_SD_SHIFT);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Disable the RTC TICK ALM */
+	ret = da9055_reg_update(da9055, DA9055_REG_ALARM_MO,
+				DA9055_RTC_TICK_WAKE_MASK, 0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int da9055_rtc_probe(struct platform_device *pdev)
+{
+	struct da9055_rtc *rtc;
+	struct da9055_pdata *pdata = NULL;
+	int ret, alm_irq;
+
+	rtc = devm_kzalloc(&pdev->dev, sizeof(struct da9055_rtc), GFP_KERNEL);
+	if (!rtc)
+		return -ENOMEM;
+
+	rtc->da9055 = dev_get_drvdata(pdev->dev.parent);
+	pdata = rtc->da9055->dev->platform_data;
+	platform_set_drvdata(pdev, rtc);
+
+	ret = da9055_rtc_device_init(rtc->da9055, pdata);
+	if (ret < 0)
+		goto err_rtc;
+
+	ret = da9055_reg_read(rtc->da9055, DA9055_REG_ALARM_Y);
+	if (ret < 0)
+		goto err_rtc;
+
+	if (ret & DA9055_RTC_ALM_EN)
+		rtc->alarm_enable = 1;
+
+	device_init_wakeup(&pdev->dev, 1);
+
+	rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+					&da9055_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc->rtc)) {
+		ret = PTR_ERR(rtc->rtc);
+		goto err_rtc;
+	}
+
+	alm_irq = platform_get_irq_byname(pdev, "ALM");
+	alm_irq = regmap_irq_get_virq(rtc->da9055->irq_data, alm_irq);
+	ret = devm_request_threaded_irq(&pdev->dev, alm_irq, NULL,
+					da9055_rtc_alm_irq,
+					IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+					"ALM", rtc);
+	if (ret != 0)
+		dev_err(rtc->da9055->dev, "irq registration failed: %d\n", ret);
+
+err_rtc:
+	return ret;
+
+}
+
+static int da9055_rtc_remove(struct platform_device *pdev)
+{
+	struct da9055_rtc *rtc = pdev->dev.platform_data;
+
+	rtc_device_unregister(rtc->rtc);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/* Turn off the alarm if it should not be a wake source. */
+static int da9055_rtc_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct da9055_rtc *rtc = dev_get_drvdata(&pdev->dev);
+	int ret;
+
+	if (!device_may_wakeup(&pdev->dev)) {
+		/* Disable the ALM IRQ */
+		ret = da9055_rtc_enable_alarm(rtc, 0);
+		if (ret < 0)
+			dev_err(&pdev->dev, "Failed to disable RTC ALM\n");
+	}
+
+	return 0;
+}
+
+/* Enable the alarm if it should be enabled (in case it was disabled to
+ * prevent use as a wake source).
+ */
+static int da9055_rtc_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct da9055_rtc *rtc = dev_get_drvdata(&pdev->dev);
+	int ret;
+
+	if (!device_may_wakeup(&pdev->dev)) {
+		if (rtc->alarm_enable) {
+			ret = da9055_rtc_enable_alarm(rtc, 1);
+			if (ret < 0)
+				dev_err(&pdev->dev,
+					"Failed to restart RTC ALM\n");
+		}
+	}
+
+	return 0;
+}
+
+/* Unconditionally disable the alarm */
+static int da9055_rtc_freeze(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct da9055_rtc *rtc = dev_get_drvdata(&pdev->dev);
+	int ret;
+
+	ret = da9055_rtc_enable_alarm(rtc, 0);
+	if (ret < 0)
+		dev_err(&pdev->dev, "Failed to freeze RTC ALMs\n");
+
+	return 0;
+
+}
+#else
+#define da9055_rtc_suspend NULL
+#define da9055_rtc_resume NULL
+#define da9055_rtc_freeze NULL
+#endif
+
+static const struct dev_pm_ops da9055_rtc_pm_ops = {
+	.suspend = da9055_rtc_suspend,
+	.resume = da9055_rtc_resume,
+
+	.freeze = da9055_rtc_freeze,
+	.thaw = da9055_rtc_resume,
+	.restore = da9055_rtc_resume,
+
+	.poweroff = da9055_rtc_suspend,
+};
+
+static struct platform_driver da9055_rtc_driver = {
+	.probe  = da9055_rtc_probe,
+	.remove = da9055_rtc_remove,
+	.driver = {
+		.name   = "da9055-rtc",
+		.owner  = THIS_MODULE,
+		.pm = &da9055_rtc_pm_ops,
+	},
+};
+
+module_platform_driver(da9055_rtc_driver);
+
+MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
+MODULE_DESCRIPTION("RTC driver for Dialog DA9055 PMIC");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:da9055-rtc");

diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
index 14c2109..07cd03e 100644
--- a/drivers/rtc/rtc-davinci.c
+++ b/drivers/rtc/rtc-davinci.c

@@ -485,7 +485,7 @@
 	struct resource *res, *mem;
 	int ret = 0;
 
-	davinci_rtc = kzalloc(sizeof(struct davinci_rtc), GFP_KERNEL);
+	davinci_rtc = devm_kzalloc(&pdev->dev, sizeof(struct davinci_rtc), GFP_KERNEL);
 	if (!davinci_rtc) {
 		dev_dbg(dev, "could not allocate memory for private data\n");
 		return -ENOMEM;
@@ -494,15 +494,13 @@
 	davinci_rtc->irq = platform_get_irq(pdev, 0);
 	if (davinci_rtc->irq < 0) {
 		dev_err(dev, "no RTC irq\n");
-		ret = davinci_rtc->irq;
-		goto fail1;
+		return davinci_rtc->irq;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		dev_err(dev, "no mem resource\n");
-		ret = -EINVAL;
-		goto fail1;
+		return -EINVAL;
 	}
 
 	davinci_rtc->pbase = res->start;
@@ -513,8 +511,7 @@
 	if (!mem) {
 		dev_err(dev, "RTC registers at %08x are not free\n",
 			davinci_rtc->pbase);
-		ret = -EBUSY;
-		goto fail1;
+		return -EBUSY;
 	}
 
 	davinci_rtc->base = ioremap(davinci_rtc->pbase, davinci_rtc->base_size);
@@ -529,8 +526,9 @@
 	davinci_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
 				    &davinci_rtc_ops, THIS_MODULE);
 	if (IS_ERR(davinci_rtc->rtc)) {
-		dev_err(dev, "unable to register RTC device, err %ld\n",
-				PTR_ERR(davinci_rtc->rtc));
+		ret = PTR_ERR(davinci_rtc->rtc);
+		dev_err(dev, "unable to register RTC device, err %d\n",
+				ret);
 		goto fail3;
 	}
 
@@ -566,9 +564,6 @@
 	iounmap(davinci_rtc->base);
 fail2:
 	release_mem_region(davinci_rtc->pbase, davinci_rtc->base_size);
-fail1:
-	kfree(davinci_rtc);
-
 	return ret;
 }
 
@@ -589,8 +584,6 @@
 
 	platform_set_drvdata(pdev, NULL);
 
-	kfree(davinci_rtc);
-
 	return 0;
 }
 

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index cace6d3..9a86b4b 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c

@@ -379,25 +379,6 @@
 		err = put_user(rtc->irq_freq, (unsigned long __user *)uarg);
 		break;
 
-#if 0
-	case RTC_EPOCH_SET:
-#ifndef rtc_epoch
-		/*
-		 * There were no RTC clocks before 1900.
-		 */
-		if (arg < 1900) {
-			err = -EINVAL;
-			break;
-		}
-		rtc_epoch = arg;
-		err = 0;
-#endif
-		break;
-
-	case RTC_EPOCH_READ:
-		err = put_user(rtc_epoch, (unsigned long __user *)uarg);
-		break;
-#endif
 	case RTC_WKALM_SET:
 		mutex_unlock(&rtc->ops_lock);
 		if (copy_from_user(&alarm, uarg, sizeof(alarm)))

diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 4eed510..8da7a5c 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c

@@ -36,7 +36,9 @@
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <linux/of.h>
 
 /* DryIce Register Definitions */
 
@@ -495,10 +497,20 @@
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id dryice_dt_ids[] = {
+	{ .compatible = "fsl,imx25-rtc" },
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, dryice_dt_ids);
+#endif
+
 static struct platform_driver dryice_rtc_driver = {
 	.driver = {
 		   .name = "imxdi_rtc",
 		   .owner = THIS_MODULE,
+		   .of_match_table = of_match_ptr(dryice_dt_ids),
 		   },
 	.remove = __devexit_p(dryice_rtc_remove),
 };

diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 0b614e3..6009714 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c

@@ -20,6 +20,9 @@
 #include <linux/rtc.h>
 #include <linux/bcd.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
 
 #include <asm/io.h>
 
@@ -38,6 +41,8 @@
  * the SoC). See the BOARD-SPECIFIC CUSTOMIZATION comment.
  */
 
+#define	DRIVER_NAME			"omap_rtc"
+
 #define OMAP_RTC_BASE			0xfffb4800
 
 /* RTC registers */
@@ -64,6 +69,9 @@
 #define OMAP_RTC_COMP_MSB_REG		0x50
 #define OMAP_RTC_OSC_REG		0x54
 
+#define OMAP_RTC_KICK0_REG		0x6c
+#define OMAP_RTC_KICK1_REG		0x70
+
 /* OMAP_RTC_CTRL_REG bit fields: */
 #define OMAP_RTC_CTRL_SPLIT		(1<<7)
 #define OMAP_RTC_CTRL_DISABLE		(1<<6)
@@ -88,10 +96,18 @@
 #define OMAP_RTC_INTERRUPTS_IT_ALARM    (1<<3)
 #define OMAP_RTC_INTERRUPTS_IT_TIMER    (1<<2)
 
+/* OMAP_RTC_KICKER values */
+#define	KICK0_VALUE			0x83e70b13
+#define	KICK1_VALUE			0x95a4f1e0
+
+#define	OMAP_RTC_HAS_KICKER		0x1
+
 static void __iomem	*rtc_base;
 
-#define rtc_read(addr)		__raw_readb(rtc_base + (addr))
-#define rtc_write(val, addr)	__raw_writeb(val, rtc_base + (addr))
+#define rtc_read(addr)		readb(rtc_base + (addr))
+#define rtc_write(val, addr)	writeb(val, rtc_base + (addr))
+
+#define rtc_writel(val, addr)	writel(val, rtc_base + (addr))
 
 
 /* we rely on the rtc framework to handle locking (rtc->ops_lock),
@@ -285,11 +301,38 @@
 static int omap_rtc_alarm;
 static int omap_rtc_timer;
 
+#define	OMAP_RTC_DATA_DA830_IDX	1
+
+static struct platform_device_id omap_rtc_devtype[] = {
+	{
+		.name	= DRIVER_NAME,
+	}, {
+		.name	= "da830-rtc",
+		.driver_data = OMAP_RTC_HAS_KICKER,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(platform, omap_rtc_devtype);
+
+static const struct of_device_id omap_rtc_of_match[] = {
+	{	.compatible	= "ti,da830-rtc",
+		.data		= &omap_rtc_devtype[OMAP_RTC_DATA_DA830_IDX],
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, omap_rtc_of_match);
+
 static int __init omap_rtc_probe(struct platform_device *pdev)
 {
 	struct resource		*res, *mem;
 	struct rtc_device	*rtc;
 	u8			reg, new_ctrl;
+	const struct platform_device_id *id_entry;
+	const struct of_device_id *of_id;
+
+	of_id = of_match_device(omap_rtc_of_match, &pdev->dev);
+	if (of_id)
+		pdev->id_entry = of_id->data;
 
 	omap_rtc_timer = platform_get_irq(pdev, 0);
 	if (omap_rtc_timer <= 0) {
@@ -322,6 +365,16 @@
 		goto fail;
 	}
 
+	/* Enable the clock/module so that we can access the registers */
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	id_entry = platform_get_device_id(pdev);
+	if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER)) {
+		rtc_writel(KICK0_VALUE, OMAP_RTC_KICK0_REG);
+		rtc_writel(KICK1_VALUE, OMAP_RTC_KICK1_REG);
+	}
+
 	rtc = rtc_device_register(pdev->name, &pdev->dev,
 			&omap_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc)) {
@@ -398,6 +451,10 @@
 fail1:
 	rtc_device_unregister(rtc);
 fail0:
+	if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER))
+		rtc_writel(0, OMAP_RTC_KICK0_REG);
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 	iounmap(rtc_base);
 fail:
 	release_mem_region(mem->start, resource_size(mem));
@@ -408,6 +465,8 @@
 {
 	struct rtc_device	*rtc = platform_get_drvdata(pdev);
 	struct resource		*mem = dev_get_drvdata(&rtc->dev);
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(pdev);
 
 	device_init_wakeup(&pdev->dev, 0);
 
@@ -420,6 +479,13 @@
 		free_irq(omap_rtc_alarm, rtc);
 
 	rtc_device_unregister(rtc);
+	if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER))
+		rtc_writel(0, OMAP_RTC_KICK0_REG);
+
+	/* Disable the clock/module */
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
 	iounmap(rtc_base);
 	release_mem_region(mem->start, resource_size(mem));
 	return 0;
@@ -442,11 +508,17 @@
 	else
 		rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
 
+	/* Disable the clock/module */
+	pm_runtime_put_sync(&pdev->dev);
+
 	return 0;
 }
 
 static int omap_rtc_resume(struct platform_device *pdev)
 {
+	/* Enable the clock/module so that we can access the registers */
+	pm_runtime_get_sync(&pdev->dev);
+
 	if (device_may_wakeup(&pdev->dev))
 		disable_irq_wake(omap_rtc_alarm);
 	else
@@ -471,9 +543,11 @@
 	.resume		= omap_rtc_resume,
 	.shutdown	= omap_rtc_shutdown,
 	.driver		= {
-		.name	= "omap_rtc",
+		.name	= DRIVER_NAME,
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(omap_rtc_of_match),
 	},
+	.id_table	= omap_rtc_devtype,
 };
 
 static int __init rtc_init(void)

diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c
new file mode 100644
index 0000000..be05a64
--- /dev/null
+++ b/drivers/rtc/rtc-pcf8523.c

@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bcd.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/of.h>
+
+#define DRIVER_NAME "rtc-pcf8523"
+
+#define REG_CONTROL1 0x00
+#define REG_CONTROL1_CAP_SEL (1 << 7)
+#define REG_CONTROL1_STOP    (1 << 5)
+
+#define REG_CONTROL3 0x02
+#define REG_CONTROL3_PM_BLD (1 << 7) /* battery low detection disabled */
+#define REG_CONTROL3_PM_VDD (1 << 6) /* switch-over disabled */
+#define REG_CONTROL3_PM_DSM (1 << 5) /* direct switching mode */
+#define REG_CONTROL3_PM_MASK 0xe0
+
+#define REG_SECONDS  0x03
+#define REG_SECONDS_OS (1 << 7)
+
+#define REG_MINUTES  0x04
+#define REG_HOURS    0x05
+#define REG_DAYS     0x06
+#define REG_WEEKDAYS 0x07
+#define REG_MONTHS   0x08
+#define REG_YEARS    0x09
+
+struct pcf8523 {
+	struct rtc_device *rtc;
+};
+
+static int pcf8523_read(struct i2c_client *client, u8 reg, u8 *valuep)
+{
+	struct i2c_msg msgs[2];
+	u8 value = 0;
+	int err;
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = 0;
+	msgs[0].len = sizeof(reg);
+	msgs[0].buf = &reg;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = sizeof(value);
+	msgs[1].buf = &value;
+
+	err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (err < 0)
+		return err;
+
+	*valuep = value;
+
+	return 0;
+}
+
+static int pcf8523_write(struct i2c_client *client, u8 reg, u8 value)
+{
+	u8 buffer[2] = { reg, value };
+	struct i2c_msg msg;
+	int err;
+
+	msg.addr = client->addr;
+	msg.flags = 0;
+	msg.len = sizeof(buffer);
+	msg.buf = buffer;
+
+	err = i2c_transfer(client->adapter, &msg, 1);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int pcf8523_select_capacitance(struct i2c_client *client, bool high)
+{
+	u8 value;
+	int err;
+
+	err = pcf8523_read(client, REG_CONTROL1, &value);
+	if (err < 0)
+		return err;
+
+	if (!high)
+		value &= ~REG_CONTROL1_CAP_SEL;
+	else
+		value |= REG_CONTROL1_CAP_SEL;
+
+	err = pcf8523_write(client, REG_CONTROL1, value);
+	if (err < 0)
+		return err;
+
+	return err;
+}
+
+static int pcf8523_set_pm(struct i2c_client *client, u8 pm)
+{
+	u8 value;
+	int err;
+
+	err = pcf8523_read(client, REG_CONTROL3, &value);
+	if (err < 0)
+		return err;
+
+	value = (value & ~REG_CONTROL3_PM_MASK) | pm;
+
+	err = pcf8523_write(client, REG_CONTROL3, value);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int pcf8523_stop_rtc(struct i2c_client *client)
+{
+	u8 value;
+	int err;
+
+	err = pcf8523_read(client, REG_CONTROL1, &value);
+	if (err < 0)
+		return err;
+
+	value |= REG_CONTROL1_STOP;
+
+	err = pcf8523_write(client, REG_CONTROL1, value);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int pcf8523_start_rtc(struct i2c_client *client)
+{
+	u8 value;
+	int err;
+
+	err = pcf8523_read(client, REG_CONTROL1, &value);
+	if (err < 0)
+		return err;
+
+	value &= ~REG_CONTROL1_STOP;
+
+	err = pcf8523_write(client, REG_CONTROL1, value);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	u8 start = REG_SECONDS, regs[7];
+	struct i2c_msg msgs[2];
+	int err;
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1;
+	msgs[0].buf = &start;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = sizeof(regs);
+	msgs[1].buf = regs;
+
+	err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (err < 0)
+		return err;
+
+	if (regs[0] & REG_SECONDS_OS) {
+		/*
+		 * If the oscillator was stopped, try to clear the flag. Upon
+		 * power-up the flag is always set, but if we cannot clear it
+		 * the oscillator isn't running properly for some reason. The
+		 * sensible thing therefore is to return an error, signalling
+		 * that the clock cannot be assumed to be correct.
+		 */
+
+		regs[0] &= ~REG_SECONDS_OS;
+
+		err = pcf8523_write(client, REG_SECONDS, regs[0]);
+		if (err < 0)
+			return err;
+
+		err = pcf8523_read(client, REG_SECONDS, &regs[0]);
+		if (err < 0)
+			return err;
+
+		if (regs[0] & REG_SECONDS_OS)
+			return -EAGAIN;
+	}
+
+	tm->tm_sec = bcd2bin(regs[0] & 0x7f);
+	tm->tm_min = bcd2bin(regs[1] & 0x7f);
+	tm->tm_hour = bcd2bin(regs[2] & 0x3f);
+	tm->tm_mday = bcd2bin(regs[3] & 0x3f);
+	tm->tm_wday = regs[4] & 0x7;
+	tm->tm_mon = bcd2bin(regs[5] & 0x1f);
+	tm->tm_year = bcd2bin(regs[6]) + 100;
+
+	return rtc_valid_tm(tm);
+}
+
+static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct i2c_msg msg;
+	u8 regs[8];
+	int err;
+
+	err = pcf8523_stop_rtc(client);
+	if (err < 0)
+		return err;
+
+	regs[0] = REG_SECONDS;
+	regs[1] = bin2bcd(tm->tm_sec);
+	regs[2] = bin2bcd(tm->tm_min);
+	regs[3] = bin2bcd(tm->tm_hour);
+	regs[4] = bin2bcd(tm->tm_mday);
+	regs[5] = tm->tm_wday;
+	regs[6] = bin2bcd(tm->tm_mon);
+	regs[7] = bin2bcd(tm->tm_year - 100);
+
+	msg.addr = client->addr;
+	msg.flags = 0;
+	msg.len = sizeof(regs);
+	msg.buf = regs;
+
+	err = i2c_transfer(client->adapter, &msg, 1);
+	if (err < 0) {
+		/*
+		 * If the time cannot be set, restart the RTC anyway. Note
+		 * that errors are ignored if the RTC cannot be started so
+		 * that we have a chance to propagate the original error.
+		 */
+		pcf8523_start_rtc(client);
+		return err;
+	}
+
+	return pcf8523_start_rtc(client);
+}
+
+static const struct rtc_class_ops pcf8523_rtc_ops = {
+	.read_time = pcf8523_rtc_read_time,
+	.set_time = pcf8523_rtc_set_time,
+};
+
+static int pcf8523_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct pcf8523 *pcf;
+	int err;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -ENODEV;
+
+	pcf = devm_kzalloc(&client->dev, sizeof(*pcf), GFP_KERNEL);
+	if (!pcf)
+		return -ENOMEM;
+
+	err = pcf8523_select_capacitance(client, true);
+	if (err < 0)
+		return err;
+
+	err = pcf8523_set_pm(client, 0);
+	if (err < 0)
+		return err;
+
+	pcf->rtc = rtc_device_register(DRIVER_NAME, &client->dev,
+				       &pcf8523_rtc_ops, THIS_MODULE);
+	if (IS_ERR(pcf->rtc))
+		return PTR_ERR(pcf->rtc);
+
+	i2c_set_clientdata(client, pcf);
+
+	return 0;
+}
+
+static int pcf8523_remove(struct i2c_client *client)
+{
+	struct pcf8523 *pcf = i2c_get_clientdata(client);
+
+	rtc_device_unregister(pcf->rtc);
+
+	return 0;
+}
+
+static const struct i2c_device_id pcf8523_id[] = {
+	{ "pcf8523", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pcf8523_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id pcf8523_of_match[] = {
+	{ .compatible = "nxp,pcf8523" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, pcf8523_of_match);
+#endif
+
+static struct i2c_driver pcf8523_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(pcf8523_of_match),
+	},
+	.probe = pcf8523_probe,
+	.remove = pcf8523_remove,
+	.id_table = pcf8523_id,
+};
+module_i2c_driver(pcf8523_driver);
+
+MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
+MODULE_DESCRIPTION("NXP PCF8523 RTC driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index a7a2a99..4bd9414 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c

@@ -47,8 +47,6 @@
 /* I have yet to find an S3C implementation with more than one
  * of these rtc blocks in */
 
-static struct resource *s3c_rtc_mem;
-
 static struct clk *rtc_clk;
 static void __iomem *s3c_rtc_base;
 static int s3c_rtc_alarmno = NO_IRQ;
@@ -427,21 +425,13 @@
 {
 	struct rtc_device *rtc = platform_get_drvdata(dev);
 
-	free_irq(s3c_rtc_alarmno, rtc);
-	free_irq(s3c_rtc_tickno, rtc);
-
 	platform_set_drvdata(dev, NULL);
 	rtc_device_unregister(rtc);
 
 	s3c_rtc_setaie(&dev->dev, 0);
 
-	clk_put(rtc_clk);
 	rtc_clk = NULL;
 
-	iounmap(s3c_rtc_base);
-	release_resource(s3c_rtc_mem);
-	kfree(s3c_rtc_mem);
-
 	return 0;
 }
 
@@ -496,28 +486,18 @@
 		return -ENOENT;
 	}
 
-	s3c_rtc_mem = request_mem_region(res->start, resource_size(res),
-					 pdev->name);
-
-	if (s3c_rtc_mem == NULL) {
-		dev_err(&pdev->dev, "failed to reserve memory region\n");
-		ret = -ENOENT;
-		goto err_nores;
-	}
-
-	s3c_rtc_base = ioremap(res->start, resource_size(res));
+	s3c_rtc_base = devm_request_and_ioremap(&pdev->dev, res);
 	if (s3c_rtc_base == NULL) {
-		dev_err(&pdev->dev, "failed ioremap()\n");
-		ret = -EINVAL;
-		goto err_nomap;
+		dev_err(&pdev->dev, "failed to ioremap memory region\n");
+		return -EINVAL;
 	}
 
-	rtc_clk = clk_get(&pdev->dev, "rtc");
+	rtc_clk = devm_clk_get(&pdev->dev, "rtc");
 	if (IS_ERR(rtc_clk)) {
 		dev_err(&pdev->dev, "failed to find rtc clock source\n");
 		ret = PTR_ERR(rtc_clk);
 		rtc_clk = NULL;
-		goto err_clk;
+		return ret;
 	}
 
 	clk_enable(rtc_clk);
@@ -576,28 +556,24 @@
 
 	s3c_rtc_setfreq(&pdev->dev, 1);
 
-	ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
+	ret = devm_request_irq(&pdev->dev, s3c_rtc_alarmno, s3c_rtc_alarmirq,
 			  0,  "s3c2410-rtc alarm", rtc);
 	if (ret) {
 		dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
 		goto err_alarm_irq;
 	}
 
-	ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
+	ret = devm_request_irq(&pdev->dev, s3c_rtc_tickno, s3c_rtc_tickirq,
 			  0,  "s3c2410-rtc tick", rtc);
 	if (ret) {
 		dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
-		free_irq(s3c_rtc_alarmno, rtc);
-		goto err_tick_irq;
+		goto err_alarm_irq;
 	}
 
 	clk_disable(rtc_clk);
 
 	return 0;
 
- err_tick_irq:
-	free_irq(s3c_rtc_alarmno, rtc);
-
  err_alarm_irq:
 	platform_set_drvdata(pdev, NULL);
 	rtc_device_unregister(rtc);
@@ -605,15 +581,7 @@
  err_nortc:
 	s3c_rtc_enable(pdev, 0);
 	clk_disable(rtc_clk);
-	clk_put(rtc_clk);
 
- err_clk:
-	iounmap(s3c_rtc_base);
-
- err_nomap:
-	release_resource(s3c_rtc_mem);
-
- err_nores:
 	return ret;
 }
 
@@ -695,8 +663,6 @@
 	{},
 };
 MODULE_DEVICE_TABLE(of, s3c_rtc_dt_match);
-#else
-#define s3c_rtc_dt_match NULL
 #endif
 
 static struct platform_device_id s3c_rtc_driver_ids[] = {
@@ -727,7 +693,7 @@
 	.driver		= {
 		.name	= "s3c-rtc",
 		.owner	= THIS_MODULE,
-		.of_match_table	= s3c_rtc_dt_match,
+		.of_match_table	= of_match_ptr(s3c_rtc_dt_match),
 	},
 };
 

diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index bb507d2..141fc94 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c

@@ -363,35 +363,42 @@
 		dev_err(&pdev->dev, "no resource defined\n");
 		return -EBUSY;
 	}
-	if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "rtc region already claimed\n");
-		return -EBUSY;
-	}
 
-	config = kzalloc(sizeof(*config), GFP_KERNEL);
+	config = devm_kzalloc(&pdev->dev, sizeof(*config), GFP_KERNEL);
 	if (!config) {
 		dev_err(&pdev->dev, "out of memory\n");
-		status = -ENOMEM;
-		goto err_release_region;
+		return -ENOMEM;
 	}
 
-	config->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(config->clk)) {
-		status = PTR_ERR(config->clk);
-		goto err_kfree;
+	/* alarm irqs */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no update irq?\n");
+		return irq;
 	}
 
-	status = clk_enable(config->clk);
-	if (status < 0)
-		goto err_clk_put;
+	status = devm_request_irq(&pdev->dev, irq, spear_rtc_irq, 0, pdev->name,
+			config);
+	if (status) {
+		dev_err(&pdev->dev, "Alarm interrupt IRQ%d already claimed\n",
+				irq);
+		return status;
+	}
 
-	config->ioaddr = ioremap(res->start, resource_size(res));
+	config->ioaddr = devm_request_and_ioremap(&pdev->dev, res);
 	if (!config->ioaddr) {
-		dev_err(&pdev->dev, "ioremap fail\n");
-		status = -ENOMEM;
-		goto err_disable_clock;
+		dev_err(&pdev->dev, "request-ioremap fail\n");
+		return -ENOMEM;
 	}
 
+	config->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(config->clk))
+		return PTR_ERR(config->clk);
+
+	status = clk_prepare_enable(config->clk);
+	if (status < 0)
+		return status;
+
 	spin_lock_init(&config->lock);
 	platform_set_drvdata(pdev, config);
 
@@ -401,42 +408,19 @@
 		dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
 				PTR_ERR(config->rtc));
 		status = PTR_ERR(config->rtc);
-		goto err_iounmap;
+		goto err_disable_clock;
 	}
 
-	/* alarm irqs */
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no update irq?\n");
-		status = irq;
-		goto err_clear_platdata;
-	}
-
-	status = request_irq(irq, spear_rtc_irq, 0, pdev->name, config);
-	if (status) {
-		dev_err(&pdev->dev, "Alarm interrupt IRQ%d already \
-				claimed\n", irq);
-		goto err_clear_platdata;
-	}
+	config->rtc->uie_unsupported = 1;
 
 	if (!device_can_wakeup(&pdev->dev))
 		device_init_wakeup(&pdev->dev, 1);
 
 	return 0;
 
-err_clear_platdata:
-	platform_set_drvdata(pdev, NULL);
-	rtc_device_unregister(config->rtc);
-err_iounmap:
-	iounmap(config->ioaddr);
 err_disable_clock:
-	clk_disable(config->clk);
-err_clk_put:
-	clk_put(config->clk);
-err_kfree:
-	kfree(config);
-err_release_region:
-	release_mem_region(res->start, resource_size(res));
+	platform_set_drvdata(pdev, NULL);
+	clk_disable_unprepare(config->clk);
 
 	return status;
 }
@@ -444,24 +428,11 @@
 static int __devexit spear_rtc_remove(struct platform_device *pdev)
 {
 	struct spear_rtc_config *config = platform_get_drvdata(pdev);
-	int irq;
-	struct resource *res;
 
-	/* leave rtc running, but disable irqs */
-	spear_rtc_disable_interrupt(config);
-	device_init_wakeup(&pdev->dev, 0);
-	irq = platform_get_irq(pdev, 0);
-	if (irq)
-		free_irq(irq, pdev);
-	clk_disable(config->clk);
-	clk_put(config->clk);
-	iounmap(config->ioaddr);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-	platform_set_drvdata(pdev, NULL);
 	rtc_device_unregister(config->rtc);
-	kfree(config);
+	spear_rtc_disable_interrupt(config);
+	clk_disable_unprepare(config->clk);
+	device_init_wakeup(&pdev->dev, 0);
 
 	return 0;
 }

diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index 7e96254b..974b9ae 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c

@@ -152,24 +152,24 @@
 
 	if ((test1 = platform_device_alloc("rtc-test", 1)) == NULL) {
 		err = -ENOMEM;
-		goto exit_free_test0;
+		goto exit_put_test0;
 	}
 
 	if ((err = platform_device_add(test0)))
-		goto exit_free_test1;
+		goto exit_put_test1;
 
 	if ((err = platform_device_add(test1)))
-		goto exit_device_unregister;
+		goto exit_del_test0;
 
 	return 0;
 
-exit_device_unregister:
-	platform_device_unregister(test0);
+exit_del_test0:
+	platform_device_del(test0);
 
-exit_free_test1:
+exit_put_test1:
 	platform_device_put(test1);
 
-exit_free_test0:
+exit_put_test0:
 	platform_device_put(test0);
 
 exit_driver_unregister:

diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c
index 073108d..22eb4eb 100644
--- a/drivers/rtc/rtc-tps65910.c
+++ b/drivers/rtc/rtc-tps65910.c

@@ -247,6 +247,13 @@
 		return ret;
 
 	dev_dbg(&pdev->dev, "Enabling rtc-tps65910.\n");
+
+	/* Enable RTC digital power domain */
+	ret = regmap_update_bits(tps65910->regmap, TPS65910_DEVCTRL,
+		DEVCTRL_RTC_PWDN_MASK, 0 << DEVCTRL_RTC_PWDN_SHIFT);
+	if (ret < 0)
+		return ret;
+
 	rtc_reg = TPS65910_RTC_CTRL_STOP_RTC;
 	ret = regmap_write(tps65910->regmap, TPS65910_RTC_CTRL, rtc_reg);
 	if (ret < 0)
@@ -261,7 +268,7 @@
 
 	ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
 		tps65910_rtc_interrupt, IRQF_TRIGGER_LOW,
-		"rtc-tps65910", &pdev->dev);
+		dev_name(&pdev->dev), &pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "IRQ is not free.\n");
 		return ret;

diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 9277d94..8b7464c 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c

@@ -233,7 +233,7 @@
  */
 static int twl_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	unsigned char rtc_data[ALL_TIME_REGS];
 	int ret;
 	u8 save_control;
 	u8 rtc_control;
@@ -300,15 +300,15 @@
 static int twl_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	unsigned char save_control;
-	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	unsigned char rtc_data[ALL_TIME_REGS];
 	int ret;
 
-	rtc_data[1] = bin2bcd(tm->tm_sec);
-	rtc_data[2] = bin2bcd(tm->tm_min);
-	rtc_data[3] = bin2bcd(tm->tm_hour);
-	rtc_data[4] = bin2bcd(tm->tm_mday);
-	rtc_data[5] = bin2bcd(tm->tm_mon + 1);
-	rtc_data[6] = bin2bcd(tm->tm_year - 100);
+	rtc_data[0] = bin2bcd(tm->tm_sec);
+	rtc_data[1] = bin2bcd(tm->tm_min);
+	rtc_data[2] = bin2bcd(tm->tm_hour);
+	rtc_data[3] = bin2bcd(tm->tm_mday);
+	rtc_data[4] = bin2bcd(tm->tm_mon + 1);
+	rtc_data[5] = bin2bcd(tm->tm_year - 100);
 
 	/* Stop RTC while updating the TC registers */
 	ret = twl_rtc_read_u8(&save_control, REG_RTC_CTRL_REG);
@@ -341,7 +341,7 @@
  */
 static int twl_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
-	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	unsigned char rtc_data[ALL_TIME_REGS];
 	int ret;
 
 	ret = twl_i2c_read(TWL_MODULE_RTC, rtc_data,
@@ -368,19 +368,19 @@
 
 static int twl_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
-	unsigned char alarm_data[ALL_TIME_REGS + 1];
+	unsigned char alarm_data[ALL_TIME_REGS];
 	int ret;
 
 	ret = twl_rtc_alarm_irq_enable(dev, 0);
 	if (ret)
 		goto out;
 
-	alarm_data[1] = bin2bcd(alm->time.tm_sec);
-	alarm_data[2] = bin2bcd(alm->time.tm_min);
-	alarm_data[3] = bin2bcd(alm->time.tm_hour);
-	alarm_data[4] = bin2bcd(alm->time.tm_mday);
-	alarm_data[5] = bin2bcd(alm->time.tm_mon + 1);
-	alarm_data[6] = bin2bcd(alm->time.tm_year - 100);
+	alarm_data[0] = bin2bcd(alm->time.tm_sec);
+	alarm_data[1] = bin2bcd(alm->time.tm_min);
+	alarm_data[2] = bin2bcd(alm->time.tm_hour);
+	alarm_data[3] = bin2bcd(alm->time.tm_mday);
+	alarm_data[4] = bin2bcd(alm->time.tm_mon + 1);
+	alarm_data[5] = bin2bcd(alm->time.tm_year - 100);
 
 	/* update all the alarm registers in one shot */
 	ret = twl_i2c_write(TWL_MODULE_RTC, alarm_data,

diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c
index 07bf193..14e2d8c 100644
--- a/drivers/rtc/rtc-vt8500.c
+++ b/drivers/rtc/rtc-vt8500.c

@@ -210,7 +210,8 @@
 	struct vt8500_rtc *vt8500_rtc;
 	int ret;
 
-	vt8500_rtc = kzalloc(sizeof(struct vt8500_rtc), GFP_KERNEL);
+	vt8500_rtc = devm_kzalloc(&pdev->dev,
+			   sizeof(struct vt8500_rtc), GFP_KERNEL);
 	if (!vt8500_rtc)
 		return -ENOMEM;
 
@@ -220,15 +221,13 @@
 	vt8500_rtc->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!vt8500_rtc->res) {
 		dev_err(&pdev->dev, "No I/O memory resource defined\n");
-		ret = -ENXIO;
-		goto err_free;
+		return -ENXIO;
 	}
 
 	vt8500_rtc->irq_alarm = platform_get_irq(pdev, 0);
 	if (vt8500_rtc->irq_alarm < 0) {
 		dev_err(&pdev->dev, "No alarm IRQ resource defined\n");
-		ret = -ENXIO;
-		goto err_free;
+		return -ENXIO;
 	}
 
 	vt8500_rtc->res = request_mem_region(vt8500_rtc->res->start,
@@ -236,8 +235,7 @@
 					     "vt8500-rtc");
 	if (vt8500_rtc->res == NULL) {
 		dev_err(&pdev->dev, "failed to request I/O memory\n");
-		ret = -EBUSY;
-		goto err_free;
+		return -EBUSY;
 	}
 
 	vt8500_rtc->regbase = ioremap(vt8500_rtc->res->start,
@@ -278,8 +276,6 @@
 err_release:
 	release_mem_region(vt8500_rtc->res->start,
 			   resource_size(vt8500_rtc->res));
-err_free:
-	kfree(vt8500_rtc);
 	return ret;
 }
 
@@ -297,7 +293,6 @@
 	release_mem_region(vt8500_rtc->res->start,
 			   resource_size(vt8500_rtc->res));
 
-	kfree(vt8500_rtc);
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;

diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 2ebe03a..4a909d7 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c

@@ -2144,7 +2144,7 @@
 	 */
 	port_id = fip->port_id;
 	if (fip->probe_tries)
-		port_id = prandom32(&fip->rnd_state) & 0xffff;
+		port_id = prandom_u32_state(&fip->rnd_state) & 0xffff;
 	else if (!port_id)
 		port_id = fip->lp->wwpn & 0xffff;
 	if (!port_id || port_id == 0xffff)
@@ -2169,7 +2169,7 @@
 static void fcoe_ctlr_vn_start(struct fcoe_ctlr *fip)
 {
 	fip->probe_tries = 0;
-	prandom32_seed(&fip->rnd_state, fip->lp->wwpn);
+	prandom_seed_state(&fip->rnd_state, fip->lp->wwpn);
 	fcoe_ctlr_vn_restart(fip);
 }
 

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9032e91..f1bf5af 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -1418,7 +1418,7 @@
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost;
 
-	if (blk_queue_dead(q))
+	if (blk_queue_dying(q))
 		return 0;
 
 	shost = sdev->host;

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index dd8dc27..74ab67a 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c

@@ -215,7 +215,7 @@
 static int virtscsi_kick_event(struct virtio_scsi *vscsi,
 			       struct virtio_scsi_event_node *event_node)
 {
-	int ret;
+	int err;
 	struct scatterlist sg;
 	unsigned long flags;
 
@@ -223,13 +223,14 @@
 
 	spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
 
-	ret = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1, event_node, GFP_ATOMIC);
-	if (ret >= 0)
+	err = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1, event_node,
+				GFP_ATOMIC);
+	if (!err)
 		virtqueue_kick(vscsi->event_vq.vq);
 
 	spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags);
 
-	return ret;
+	return err;
 }
 
 static int virtscsi_kick_event_all(struct virtio_scsi *vscsi)
@@ -410,22 +411,23 @@
 {
 	unsigned int out_num, in_num;
 	unsigned long flags;
-	int ret;
+	int err;
+	bool needs_kick = false;
 
 	spin_lock_irqsave(&tgt->tgt_lock, flags);
 	virtscsi_map_cmd(tgt, cmd, &out_num, &in_num, req_size, resp_size);
 
 	spin_lock(&vq->vq_lock);
-	ret = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp);
+	err = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp);
 	spin_unlock(&tgt->tgt_lock);
-	if (ret >= 0)
-		ret = virtqueue_kick_prepare(vq->vq);
+	if (!err)
+		needs_kick = virtqueue_kick_prepare(vq->vq);
 
 	spin_unlock_irqrestore(&vq->vq_lock, flags);
 
-	if (ret > 0)
+	if (needs_kick)
 		virtqueue_notify(vq->vq);
-	return ret;
+	return err;
 }
 
 static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
@@ -467,7 +469,7 @@
 
 	if (virtscsi_kick_cmd(tgt, &vscsi->req_vq, cmd,
 			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
-			      GFP_ATOMIC) >= 0)
+			      GFP_ATOMIC) == 0)
 		ret = 0;
 	else
 		mempool_free(cmd, virtscsi_cmd_pool);

diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 75c0c4f..ab34497 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c

@@ -20,6 +20,7 @@
 #include <linux/spi/spi.h>
 #include <linux/slab.h>
 #include <linux/platform_data/atmel.h>
+#include <linux/of.h>
 
 #include <asm/io.h>
 #include <asm/gpio.h>
@@ -768,6 +769,10 @@
 
 	/* chipselect must have been muxed as GPIO (e.g. in board setup) */
 	npcs_pin = (unsigned int)spi->controller_data;
+
+	if (gpio_is_valid(spi->cs_gpio))
+		npcs_pin = spi->cs_gpio;
+
 	asd = spi->controller_state;
 	if (!asd) {
 		asd = kzalloc(sizeof(struct atmel_spi_device), GFP_KERNEL);
@@ -937,8 +942,9 @@
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 
+	master->dev.of_node = pdev->dev.of_node;
 	master->bus_num = pdev->id;
-	master->num_chipselect = 4;
+	master->num_chipselect = master->dev.of_node ? 0 : 4;
 	master->setup = atmel_spi_setup;
 	master->transfer = atmel_spi_transfer;
 	master->cleanup = atmel_spi_cleanup;
@@ -1064,11 +1070,20 @@
 #define	atmel_spi_resume	NULL
 #endif
 
+#if defined(CONFIG_OF)
+static const struct of_device_id atmel_spi_dt_ids[] = {
+	{ .compatible = "atmel,at91rm9200-spi" },
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, atmel_spi_dt_ids);
+#endif
 
 static struct platform_driver atmel_spi_driver = {
 	.driver		= {
 		.name	= "atmel_spi",
 		.owner	= THIS_MODULE,
+		.of_match_table	= of_match_ptr(atmel_spi_dt_ids),
 	},
 	.suspend	= atmel_spi_suspend,
 	.resume		= atmel_spi_resume,

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 4dd7b7c..ad93231 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c

@@ -215,6 +215,10 @@
 	writel(0, regs + S3C64XX_SPI_PACKET_CNT);
 
 	val = readl(regs + S3C64XX_SPI_CH_CFG);
+	val &= ~(S3C64XX_SPI_CH_RXCH_ON | S3C64XX_SPI_CH_TXCH_ON);
+	writel(val, regs + S3C64XX_SPI_CH_CFG);
+
+	val = readl(regs + S3C64XX_SPI_CH_CFG);
 	val |= S3C64XX_SPI_CH_SW_RST;
 	val &= ~S3C64XX_SPI_CH_HS_EN;
 	writel(val, regs + S3C64XX_SPI_CH_CFG);
@@ -248,10 +252,6 @@
 	val = readl(regs + S3C64XX_SPI_MODE_CFG);
 	val &= ~(S3C64XX_SPI_MODE_TXDMA_ON | S3C64XX_SPI_MODE_RXDMA_ON);
 	writel(val, regs + S3C64XX_SPI_MODE_CFG);
-
-	val = readl(regs + S3C64XX_SPI_CH_CFG);
-	val &= ~(S3C64XX_SPI_CH_RXCH_ON | S3C64XX_SPI_CH_TXCH_ON);
-	writel(val, regs + S3C64XX_SPI_CH_CFG);
 }
 
 static void s3c64xx_spi_dmacb(void *data)
@@ -771,8 +771,6 @@
 			if (list_is_last(&xfer->transfer_list,
 						&msg->transfers))
 				cs_toggle = 1;
-			else
-				disable_cs(sdd, spi);
 		}
 
 		msg->actual_length += xfer->len;

diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c
index 32f7b55..60cfae5 100644
--- a/drivers/spi/spi-sh-hspi.c
+++ b/drivers/spi/spi-sh-hspi.c

@@ -290,7 +290,7 @@
 	}
 
 	clk = clk_get(NULL, "shyway_clk");
-	if (!clk) {
+	if (IS_ERR(clk)) {
 		dev_err(&pdev->dev, "shyway_clk is required\n");
 		ret = -EINVAL;
 		goto error0;

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ab095ac..19ee901 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c

@@ -824,6 +824,7 @@
 	struct spi_device *spi;
 	struct device_node *nc;
 	const __be32 *prop;
+	char modalias[SPI_NAME_SIZE + 4];
 	int rc;
 	int len;
 
@@ -887,7 +888,9 @@
 		spi->dev.of_node = nc;
 
 		/* Register the new device */
-		request_module(spi->modalias);
+		snprintf(modalias, sizeof(modalias), "%s%s", SPI_MODULE_PREFIX,
+			 spi->modalias);
+		request_module(modalias);
 		rc = spi_add_device(spi);
 		if (rc) {
 			dev_err(&master->dev, "spi_device register error %s\n",

diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 4a36e9a..2d12e8a 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c

@@ -35,6 +35,7 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/pid_namespace.h>
 
 #include "binder.h"
 #include "binder_trace.h"
@@ -2320,7 +2321,7 @@
 		if (t->from) {
 			struct task_struct *sender = t->from->proc->tsk;
 			tr.sender_pid = task_tgid_nr_ns(sender,
-							current->nsproxy->pid_ns);
+							task_active_pid_ns(current));
 		} else {
 			tr.sender_pid = 0;
 		}

diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 23f797e..57d6b29 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c

@@ -41,8 +41,7 @@
 #include <linux/of.h>
 #include <linux/gpio.h>
 #include <linux/pinctrl/consumer.h>
-
-#include <plat/omap-serial.h>
+#include <linux/platform_data/serial-omap.h>
 
 #define OMAP_MAX_HSUART_PORTS	6
 

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 57cc9c6..f1c6c54 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c

@@ -251,7 +251,7 @@
 		/* best case is 32bit-aligned source address */
 		if ((0x02 & (unsigned long) src) == 0) {
 			if (len >= 4) {
-				writesl(fifo, src + index, len >> 2);
+				iowrite32_rep(fifo, src + index, len >> 2);
 				index += len & ~0x03;
 			}
 			if (len & 0x02) {
@@ -260,7 +260,7 @@
 			}
 		} else {
 			if (len >= 2) {
-				writesw(fifo, src + index, len >> 1);
+				iowrite16_rep(fifo, src + index, len >> 1);
 				index += len & ~0x01;
 			}
 		}
@@ -268,7 +268,7 @@
 			musb_writeb(fifo, 0, src[index]);
 	} else  {
 		/* byte aligned */
-		writesb(fifo, src, len);
+		iowrite8_rep(fifo, src, len);
 	}
 }
 
@@ -294,7 +294,7 @@
 		/* best case is 32bit-aligned destination address */
 		if ((0x02 & (unsigned long) dst) == 0) {
 			if (len >= 4) {
-				readsl(fifo, dst, len >> 2);
+				ioread32_rep(fifo, dst, len >> 2);
 				index = len & ~0x03;
 			}
 			if (len & 0x02) {
@@ -303,7 +303,7 @@
 			}
 		} else {
 			if (len >= 2) {
-				readsw(fifo, dst, len >> 1);
+				ioread16_rep(fifo, dst, len >> 1);
 				index = len & ~0x01;
 			}
 		}
@@ -311,7 +311,7 @@
 			dst[index] = musb_readb(fifo, 0);
 	} else  {
 		/* byte aligned */
-		readsb(fifo, dst, len);
+		ioread8_rep(fifo, dst, len);
 	}
 }
 #endif

diff --git a/drivers/usb/musb/musb_io.h b/drivers/usb/musb/musb_io.h
index 565ad16..eebeed7 100644
--- a/drivers/usb/musb/musb_io.h
+++ b/drivers/usb/musb/musb_io.h

@@ -37,27 +37,6 @@
 
 #include <linux/io.h>
 
-#if !defined(CONFIG_ARM) && !defined(CONFIG_SUPERH) \
-	&& !defined(CONFIG_AVR32) && !defined(CONFIG_PPC32) \
-	&& !defined(CONFIG_PPC64) && !defined(CONFIG_BLACKFIN) \
-	&& !defined(CONFIG_MIPS) && !defined(CONFIG_M68K) \
-	&& !defined(CONFIG_XTENSA)
-static inline void readsl(const void __iomem *addr, void *buf, int len)
-	{ insl((unsigned long)addr, buf, len); }
-static inline void readsw(const void __iomem *addr, void *buf, int len)
-	{ insw((unsigned long)addr, buf, len); }
-static inline void readsb(const void __iomem *addr, void *buf, int len)
-	{ insb((unsigned long)addr, buf, len); }
-
-static inline void writesl(const void __iomem *addr, const void *buf, int len)
-	{ outsl((unsigned long)addr, buf, len); }
-static inline void writesw(const void __iomem *addr, const void *buf, int len)
-	{ outsw((unsigned long)addr, buf, len); }
-static inline void writesb(const void __iomem *addr, const void *buf, int len)
-	{ outsb((unsigned long)addr, buf, len); }
-
-#endif
-
 #ifndef CONFIG_BLACKFIN
 
 /* NOTE:  these offsets are all in bytes */

diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index 8bde6fc..3969813 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c

@@ -22,6 +22,7 @@
 #include <linux/prefetch.h>
 #include <linux/usb.h>
 #include <linux/irq.h>
+#include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb/nop-usb-xceiv.h>
@@ -198,7 +199,7 @@
 		/* Best case is 32bit-aligned destination address */
 		if ((0x02 & (unsigned long) buf) == 0) {
 			if (len >= 4) {
-				writesl(fifo, buf, len >> 2);
+				iowrite32_rep(fifo, buf, len >> 2);
 				buf += (len & ~0x03);
 				len &= 0x03;
 			}
@@ -245,7 +246,7 @@
 		/* Best case is 32bit-aligned destination address */
 		if ((0x02 & (unsigned long) buf) == 0) {
 			if (len >= 4) {
-				readsl(fifo, buf, len >> 2);
+				ioread32_rep(fifo, buf, len >> 2);
 				buf += (len & ~0x03);
 				len &= 0x03;
 			}

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 7eb73c5..5de6e7f 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig

@@ -6,6 +6,7 @@
 
 config OMAP_USB2
 	tristate "OMAP USB2 PHY Driver"
+	depends on ARCH_OMAP2PLUS
 	select USB_OTG_UTILS
 	help
 	  Enable this to support the transceiver that is part of SOC. This

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 6c11994..b28e66c4 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c

@@ -43,6 +43,10 @@
 	u16 cmd;
 	u8 msix_pos;
 
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
 	vdev->reset_works = (pci_reset_function(pdev) == 0);
 	pci_save_state(pdev);
 	vdev->pci_saved_state = pci_store_saved_state(pdev);
@@ -51,8 +55,11 @@
 			 __func__, dev_name(&pdev->dev));
 
 	ret = vfio_config_init(vdev);
-	if (ret)
-		goto out;
+	if (ret) {
+		pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state);
+		pci_disable_device(pdev);
+		return ret;
+	}
 
 	if (likely(!nointxmask))
 		vdev->pci_2_3 = pci_intx_mask_supported(pdev);
@@ -77,24 +84,15 @@
 	} else
 		vdev->msix_bar = 0xFF;
 
-	ret = pci_enable_device(pdev);
-	if (ret)
-		goto out;
-
-	return ret;
-
-out:
-	kfree(vdev->pci_saved_state);
-	vdev->pci_saved_state = NULL;
-	vfio_config_free(vdev);
-	return ret;
+	return 0;
 }
 
 static void vfio_pci_disable(struct vfio_pci_device *vdev)
 {
+	struct pci_dev *pdev = vdev->pdev;
 	int bar;
 
-	pci_disable_device(vdev->pdev);
+	pci_disable_device(pdev);
 
 	vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
 				VFIO_IRQ_SET_ACTION_TRIGGER,
@@ -104,22 +102,40 @@
 
 	vfio_config_free(vdev);
 
-	pci_reset_function(vdev->pdev);
-
-	if (pci_load_and_free_saved_state(vdev->pdev,
-					  &vdev->pci_saved_state) == 0)
-		pci_restore_state(vdev->pdev);
-	else
-		pr_info("%s: Couldn't reload %s saved state\n",
-			__func__, dev_name(&vdev->pdev->dev));
-
 	for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
 		if (!vdev->barmap[bar])
 			continue;
-		pci_iounmap(vdev->pdev, vdev->barmap[bar]);
-		pci_release_selected_regions(vdev->pdev, 1 << bar);
+		pci_iounmap(pdev, vdev->barmap[bar]);
+		pci_release_selected_regions(pdev, 1 << bar);
 		vdev->barmap[bar] = NULL;
 	}
+
+	/*
+	 * If we have saved state, restore it.  If we can reset the device,
+	 * even better.  Resetting with current state seems better than
+	 * nothing, but saving and restoring current state without reset
+	 * is just busy work.
+	 */
+	if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {
+		pr_info("%s: Couldn't reload %s saved state\n",
+			__func__, dev_name(&pdev->dev));
+
+		if (!vdev->reset_works)
+			return;
+
+		pci_save_state(pdev);
+	}
+
+	/*
+	 * Disable INTx and MSI, presumably to avoid spurious interrupts
+	 * during reset.  Stolen from pci_reset_function()
+	 */
+	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+
+	if (vdev->reset_works)
+		__pci_reset_function(pdev);
+
+	pci_restore_state(pdev);
 }
 
 static void vfio_pci_release(void *device_data)
@@ -327,15 +343,10 @@
 			    hdr.count > vfio_pci_get_irq_count(vdev, hdr.index))
 				return -EINVAL;
 
-			data = kmalloc(hdr.count * size, GFP_KERNEL);
-			if (!data)
-				return -ENOMEM;
-
-			if (copy_from_user(data, (void __user *)(arg + minsz),
-					   hdr.count * size)) {
-				kfree(data);
-				return -EFAULT;
-			}
+			data = memdup_user((void __user *)(arg + minsz),
+					   hdr.count * size);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
 		}
 
 		mutex_lock(&vdev->igate);
@@ -562,9 +573,9 @@
 
 	return 0;
 
-out_virqfd:
-	vfio_pci_virqfd_exit();
 out_driver:
+	vfio_pci_virqfd_exit();
+out_virqfd:
 	vfio_pci_uninit_perm_bits();
 	return ret;
 }

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 56097c6..12c264d 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c

@@ -191,6 +191,17 @@
 	kref_put(&container->kref, vfio_container_release);
 }
 
+static void vfio_group_unlock_and_free(struct vfio_group *group)
+{
+	mutex_unlock(&vfio.group_lock);
+	/*
+	 * Unregister outside of lock.  A spurious callback is harmless now
+	 * that the group is no longer in vfio.group_list.
+	 */
+	iommu_group_unregister_notifier(group->iommu_group, &group->nb);
+	kfree(group);
+}
+
 /**
  * Group objects - create, release, get, put, search
  */
@@ -229,8 +240,7 @@
 
 	minor = vfio_alloc_group_minor(group);
 	if (minor < 0) {
-		mutex_unlock(&vfio.group_lock);
-		kfree(group);
+		vfio_group_unlock_and_free(group);
 		return ERR_PTR(minor);
 	}
 
@@ -239,8 +249,7 @@
 		if (tmp->iommu_group == iommu_group) {
 			vfio_group_get(tmp);
 			vfio_free_group_minor(minor);
-			mutex_unlock(&vfio.group_lock);
-			kfree(group);
+			vfio_group_unlock_and_free(group);
 			return tmp;
 		}
 	}
@@ -249,8 +258,7 @@
 			    group, "%d", iommu_group_id(iommu_group));
 	if (IS_ERR(dev)) {
 		vfio_free_group_minor(minor);
-		mutex_unlock(&vfio.group_lock);
-		kfree(group);
+		vfio_group_unlock_and_free(group);
 		return (struct vfio_group *)dev; /* ERR_PTR */
 	}
 
@@ -274,16 +282,7 @@
 	device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor));
 	list_del(&group->vfio_next);
 	vfio_free_group_minor(group->minor);
-
-	mutex_unlock(&vfio.group_lock);
-
-	/*
-	 * Unregister outside of lock.  A spurious callback is harmless now
-	 * that the group is no longer in vfio.group_list.
-	 */
-	iommu_group_unregister_notifier(group->iommu_group, &group->nb);
-
-	kfree(group);
+	vfio_group_unlock_and_free(group);
 }
 
 static void vfio_group_put(struct vfio_group *group)
@@ -466,8 +465,9 @@
 {
 	struct vfio_group *group = data;
 	struct vfio_device *device;
+	struct device_driver *drv = ACCESS_ONCE(dev->driver);
 
-	if (!dev->driver || vfio_whitelisted_driver(dev->driver))
+	if (!drv || vfio_whitelisted_driver(drv))
 		return 0;
 
 	device = vfio_group_get_device(group, dev);

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 9c31277..e7068c5 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig

@@ -2140,14 +2140,16 @@
 	  To compile as a module, choose M here: the module name is udlfb.
 
 config FB_IBM_GXT4500
-	tristate "Framebuffer support for IBM GXT4500P adaptor"
+	tristate "Framebuffer support for IBM GXT4000P/4500P/6000P/6500P adaptors"
 	depends on FB && PPC
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
 	---help---
-	  Say Y here to enable support for the IBM GXT4500P display
-	  adaptor, found on some IBM System P (pSeries) machines.
+	  Say Y here to enable support for the IBM GXT4000P/6000P and
+	  GXT4500P/6500P display adaptor based on Raster Engine RC1000,
+	  found on some IBM System P (pSeries) machines. This driver
+	  doesn't use Geometry Engine GT1000.
 
 config FB_PS3
 	tristate "PS3 GPU framebuffer driver"

diff --git a/drivers/video/backlight/88pm860x_bl.c b/drivers/video/backlight/88pm860x_bl.c
index b7ec34c..c072ed9 100644
--- a/drivers/video/backlight/88pm860x_bl.c
+++ b/drivers/video/backlight/88pm860x_bl.c

@@ -117,8 +117,8 @@
 	data->current_brightness = value;
 	return 0;
 out:
-	dev_dbg(chip->dev, "set brightness %d failure with return "
-		"value:%d\n", value, ret);
+	dev_dbg(chip->dev, "set brightness %d failure with return value: %d\n",
+		value, ret);
 	return ret;
 }
 
@@ -208,22 +208,19 @@
 	res = platform_get_resource_byname(pdev, IORESOURCE_REG, "duty cycle");
 	if (!res) {
 		dev_err(&pdev->dev, "No REG resource for duty cycle\n");
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 	data->reg_duty_cycle = res->start;
 	res = platform_get_resource_byname(pdev, IORESOURCE_REG, "always on");
 	if (!res) {
 		dev_err(&pdev->dev, "No REG resorce for always on\n");
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 	data->reg_always_on = res->start;
 	res = platform_get_resource_byname(pdev, IORESOURCE_REG, "current");
 	if (!res) {
 		dev_err(&pdev->dev, "No REG resource for current\n");
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 	data->reg_current = res->start;
 
@@ -231,8 +228,7 @@
 	sprintf(name, "backlight-%d", pdev->id);
 	data->port = pdev->id;
 	data->chip = chip;
-	data->i2c = (chip->id == CHIP_PM8606) ? chip->client	\
-			: chip->companion;
+	data->i2c = (chip->id == CHIP_PM8606) ? chip->client : chip->companion;
 	data->current_brightness = MAX_BRIGHTNESS;
 	if (pm860x_backlight_dt_init(pdev, data, name)) {
 		if (pdata) {
@@ -263,8 +259,6 @@
 	return 0;
 out_brt:
 	backlight_device_unregister(bl);
-out:
-	devm_kfree(&pdev->dev, data);
 	return ret;
 }
 

diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index df1cbb7..de5e5e7 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c

@@ -106,10 +106,9 @@
 	pwm_channel_writel(&pwmbl->pwmc, PWM_CPRD,
 			pwmbl->pdata->pwm_compare_max);
 
-	dev_info(&pwmbl->pdev->dev, "Atmel PWM backlight driver "
-			"(%lu Hz)\n", pwmbl->pwmc.mck /
-			pwmbl->pdata->pwm_compare_max /
-			(1 << prescale));
+	dev_info(&pwmbl->pdev->dev, "Atmel PWM backlight driver (%lu Hz)\n",
+		pwmbl->pwmc.mck / pwmbl->pdata->pwm_compare_max /
+		(1 << prescale));
 
 	return pwm_channel_enable(&pwmbl->pwmc);
 }

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 297db2f..345f666 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c

@@ -370,6 +370,35 @@
 }
 EXPORT_SYMBOL(backlight_device_unregister);
 
+#ifdef CONFIG_OF
+static int of_parent_match(struct device *dev, void *data)
+{
+	return dev->parent && dev->parent->of_node == data;
+}
+
+/**
+ * of_find_backlight_by_node() - find backlight device by device-tree node
+ * @node: device-tree node of the backlight device
+ *
+ * Returns a pointer to the backlight device corresponding to the given DT
+ * node or NULL if no such backlight device exists or if the device hasn't
+ * been probed yet.
+ *
+ * This function obtains a reference on the backlight device and it is the
+ * caller's responsibility to drop the reference by calling put_device() on
+ * the backlight device's .dev field.
+ */
+struct backlight_device *of_find_backlight_by_node(struct device_node *node)
+{
+	struct device *dev;
+
+	dev = class_find_device(backlight_class, NULL, node, of_parent_match);
+
+	return dev ? to_backlight_device(dev) : NULL;
+}
+EXPORT_SYMBOL(of_find_backlight_by_node);
+#endif
+
 static void __exit backlight_class_exit(void)
 {
 	class_destroy(backlight_class);

diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index eaaebf2..e323fcb 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c

@@ -6,8 +6,8 @@
  *  Based on Sharp's 2.4 Backlight Driver
  *
  *  Copyright (c) 2008 Marvell International Ltd.
- *  	Converted to SPI device based LCD/Backlight device driver
- *  	by Eric Miao <eric.miao@marvell.com>
+ *	Converted to SPI device based LCD/Backlight device driver
+ *	by Eric Miao <eric.miao@marvell.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -192,7 +192,7 @@
 {
 	int adj;
 
-	switch(mode) {
+	switch (mode) {
 	case CORGI_LCD_MODE_VGA:
 		/* Setting for VGA */
 		adj = sharpsl_param.phadadj;
@@ -409,10 +409,10 @@
 	cont = !!(intensity & 0x20) ^ lcd->gpio_backlight_cont_inverted;
 
 	if (gpio_is_valid(lcd->gpio_backlight_cont))
-		gpio_set_value(lcd->gpio_backlight_cont, cont);
+		gpio_set_value_cansleep(lcd->gpio_backlight_cont, cont);
 
 	if (gpio_is_valid(lcd->gpio_backlight_on))
-		gpio_set_value(lcd->gpio_backlight_on, intensity);
+		gpio_set_value_cansleep(lcd->gpio_backlight_on, intensity);
 
 	if (lcd->kick_battery)
 		lcd->kick_battery();
@@ -495,8 +495,9 @@
 		err = devm_gpio_request(&spi->dev, pdata->gpio_backlight_on,
 					"BL_ON");
 		if (err) {
-			dev_err(&spi->dev, "failed to request GPIO%d for "
-				"backlight_on\n", pdata->gpio_backlight_on);
+			dev_err(&spi->dev,
+				"failed to request GPIO%d for backlight_on\n",
+				pdata->gpio_backlight_on);
 			return err;
 		}
 
@@ -508,8 +509,9 @@
 		err = devm_gpio_request(&spi->dev, pdata->gpio_backlight_cont,
 					"BL_CONT");
 		if (err) {
-			dev_err(&spi->dev, "failed to request GPIO%d for "
-				"backlight_cont\n", pdata->gpio_backlight_cont);
+			dev_err(&spi->dev,
+				"failed to request GPIO%d for backlight_cont\n",
+				pdata->gpio_backlight_cont);
 			return err;
 		}
 

diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 573c7ec..8179cef 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c

@@ -2,10 +2,10 @@
  * Backlight driver for Dialog Semiconductor DA9030/DA9034
  *
  * Copyright (C) 2008 Compulab, Ltd.
- * 	Mike Rapoport <mike@compulab.co.il>
+ *	Mike Rapoport <mike@compulab.co.il>
  *
  * Copyright (C) 2006-2008 Marvell International Ltd.
- * 	Eric Miao <eric.miao@marvell.com>
+ *	Eric Miao <eric.miao@marvell.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -164,15 +164,14 @@
 #ifdef CONFIG_PM
 static int da903x_backlight_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct backlight_device *bl = platform_get_drvdata(pdev);
+	struct backlight_device *bl = dev_get_drvdata(dev);
+
 	return da903x_backlight_set(bl, 0);
 }
 
 static int da903x_backlight_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct backlight_device *bl = platform_get_drvdata(pdev);
+	struct backlight_device *bl = dev_get_drvdata(dev);
 
 	backlight_update_status(bl);
 	return 0;
@@ -199,7 +198,7 @@
 module_platform_driver(da903x_backlight_driver);
 
 MODULE_DESCRIPTION("Backlight Driver for Dialog Semiconductor DA9030/DA9034");
-MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>"
-	      "Mike Rapoport <mike@compulab.co.il>");
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
+MODULE_AUTHOR("Mike Rapoport <mike@compulab.co.il>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:da903x-backlight");

diff --git a/drivers/video/backlight/da9052_bl.c b/drivers/video/backlight/da9052_bl.c
index ac19618..842da5a 100644
--- a/drivers/video/backlight/da9052_bl.c
+++ b/drivers/video/backlight/da9052_bl.c

@@ -34,7 +34,7 @@
 	DA9052_TYPE_WLED3,
 };
 
-static unsigned char wled_bank[] = {
+static const unsigned char wled_bank[] = {
 	DA9052_LED1_CONF_REG,
 	DA9052_LED2_CONF_REG,
 	DA9052_LED3_CONF_REG,

diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index 8c660fc..0ae155b 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c

@@ -97,8 +97,8 @@
 	props.max_brightness = machinfo->max_intensity;
 	bd = backlight_device_register(name, &pdev->dev, NULL, &genericbl_ops,
 				       &props);
-	if (IS_ERR (bd))
-		return PTR_ERR (bd);
+	if (IS_ERR(bd))
+		return PTR_ERR(bd);
 
 	platform_set_drvdata(pdev, bd);
 

diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index c999663..5cefd73 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c

@@ -26,7 +26,7 @@
 #define HP680_DEFAULT_INTENSITY 10
 
 static int hp680bl_suspended;
-static int current_intensity = 0;
+static int current_intensity;
 static DEFINE_SPINLOCK(bl_lock);
 
 static void hp680bl_send_intensity(struct backlight_device *bd)
@@ -168,7 +168,7 @@
 static void __exit hp680bl_exit(void)
 {
 	platform_device_unregister(hp680bl_device);
- 	platform_driver_unregister(&hp680bl_driver);
+	platform_driver_unregister(&hp680bl_driver);
 }
 
 module_init(hp680bl_init);

diff --git a/drivers/video/backlight/ili9320.c b/drivers/video/backlight/ili9320.c
index 66cc313..1235bf9 100644
--- a/drivers/video/backlight/ili9320.c
+++ b/drivers/video/backlight/ili9320.c

@@ -45,7 +45,7 @@
 	/* second message is the data to transfer */
 
 	data[0] = spi->id | ILI9320_SPI_DATA  | ILI9320_SPI_WRITE;
- 	data[1] = value >> 8;
+	data[1] = value >> 8;
 	data[2] = value;
 
 	return spi_sync(spi->dev, &spi->message);
@@ -56,11 +56,10 @@
 	dev_dbg(ili->dev, "write: reg=%02x, val=%04x\n", reg, value);
 	return ili->write(ili, reg, value);
 }
-
 EXPORT_SYMBOL_GPL(ili9320_write);
 
 int ili9320_write_regs(struct ili9320 *ili,
-		       struct ili9320_reg *values,
+		       const struct ili9320_reg *values,
 		       int nr_values)
 {
 	int index;
@@ -74,7 +73,6 @@
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(ili9320_write_regs);
 
 static void ili9320_reset(struct ili9320 *lcd)
@@ -260,7 +258,6 @@
 
 	return ret;
 }
-
 EXPORT_SYMBOL_GPL(ili9320_probe_spi);
 
 int ili9320_remove(struct ili9320 *ili)
@@ -271,7 +268,6 @@
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(ili9320_remove);
 
 #ifdef CONFIG_PM
@@ -296,20 +292,17 @@
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(ili9320_suspend);
 
 int ili9320_resume(struct ili9320 *lcd)
 {
 	dev_info(lcd->dev, "resuming from power state %d\n", lcd->power);
 
-	if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP) {
+	if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP)
 		ili9320_write(lcd, ILI9320_POWER1, 0x00);
-	}
 
 	return ili9320_power(lcd, FB_BLANK_UNBLANK);
 }
-
 EXPORT_SYMBOL_GPL(ili9320_resume);
 #endif
 
@@ -318,7 +311,6 @@
 {
 	ili9320_power(lcd, FB_BLANK_POWERDOWN);
 }
-
 EXPORT_SYMBOL_GPL(ili9320_shutdown);
 
 MODULE_AUTHOR("Ben Dooks <ben-linux@fluff.org>");

diff --git a/drivers/video/backlight/ili9320.h b/drivers/video/backlight/ili9320.h
index e388eca..e0db738 100644
--- a/drivers/video/backlight/ili9320.h
+++ b/drivers/video/backlight/ili9320.h

@@ -63,7 +63,7 @@
 			 unsigned int reg, unsigned int value);
 
 extern int ili9320_write_regs(struct ili9320 *ili,
-			      struct ili9320_reg *values,
+			      const struct ili9320_reg *values,
 			      int nr_values);
 
 /* Device probe */

diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index 16f593b..fef6ce4 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c

@@ -48,7 +48,7 @@
 
 	jornada_ssp_end();
 
-	return (BL_MAX_BRIGHT - ret);
+	return BL_MAX_BRIGHT - ret;
 }
 
 static int jornada_bl_update_status(struct backlight_device *bd)
@@ -77,18 +77,23 @@
 			goto out;
 		}
 
-		/* at this point we expect that the mcu has accepted
-		   our command and is waiting for our new value
-		   please note that maximum brightness is 255,
-		   but due to physical layout it is equal to 0, so we simply
-		   invert the value (MAX VALUE - NEW VALUE). */
-		if (jornada_ssp_byte(BL_MAX_BRIGHT - bd->props.brightness) != TXDUMMY) {
+		/*
+		 * at this point we expect that the mcu has accepted
+		 * our command and is waiting for our new value
+		 * please note that maximum brightness is 255,
+		 * but due to physical layout it is equal to 0, so we simply
+		 * invert the value (MAX VALUE - NEW VALUE).
+		 */
+		if (jornada_ssp_byte(BL_MAX_BRIGHT - bd->props.brightness)
+			!= TXDUMMY) {
 			pr_err("set brightness failed\n");
 			ret = -ETIMEDOUT;
 		}
 
-		/* If infact we get an TXDUMMY as output we are happy and dont
-		   make any further comments about it */
+		/*
+		 * If infact we get an TXDUMMY as output we are happy and dont
+		 * make any further comments about it
+		 */
 out:
 	jornada_ssp_end();
 
@@ -121,9 +126,11 @@
 
 	bd->props.power = FB_BLANK_UNBLANK;
 	bd->props.brightness = BL_DEF_BRIGHT;
-	/* note. make sure max brightness is set otherwise
-	   you will get seemingly non-related errors when
-	   trying to change brightness */
+	/*
+	 * note. make sure max brightness is set otherwise
+	 * you will get seemingly non-related errors when
+	 * trying to change brightness
+	 */
 	jornada_bl_update_status(bd);
 
 	platform_set_drvdata(pdev, bd);

diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
index f5aa0a5..9a35196 100644
--- a/drivers/video/backlight/l4f00242t03.c
+++ b/drivers/video/backlight/l4f00242t03.c

@@ -4,7 +4,7 @@
  * Copyright 2007-2009 Freescale Semiconductor, Inc. All Rights Reserved.
  *
  * Copyright (c) 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
- * 	Inspired by Marek Vasut work in l4f00242t03.c
+ *	Inspired by Marek Vasut work in l4f00242t03.c
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -33,7 +33,6 @@
 	struct regulator *core_reg;
 };
 
-
 static void l4f00242t03_reset(unsigned int gpio)
 {
 	pr_debug("l4f00242t03_reset.\n");

diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index a5d0d02..34fb6bd 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c

@@ -108,7 +108,7 @@
 static ssize_t lcd_store_power(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	int rc = -ENXIO;
+	int rc;
 	struct lcd_device *ld = to_lcd_device(dev);
 	unsigned long power;
 
@@ -116,6 +116,8 @@
 	if (rc)
 		return rc;
 
+	rc = -ENXIO;
+
 	mutex_lock(&ld->ops_lock);
 	if (ld->ops && ld->ops->set_power) {
 		pr_debug("set power to %lu\n", power);
@@ -144,7 +146,7 @@
 static ssize_t lcd_store_contrast(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	int rc = -ENXIO;
+	int rc;
 	struct lcd_device *ld = to_lcd_device(dev);
 	unsigned long contrast;
 
@@ -152,6 +154,8 @@
 	if (rc)
 		return rc;
 
+	rc = -ENXIO;
+
 	mutex_lock(&ld->ops_lock);
 	if (ld->ops && ld->ops->set_contrast) {
 		pr_debug("set contrast to %lu\n", contrast);

diff --git a/drivers/video/backlight/lm3630_bl.c b/drivers/video/backlight/lm3630_bl.c
index 0207bc0..a6d637b 100644
--- a/drivers/video/backlight/lm3630_bl.c
+++ b/drivers/video/backlight/lm3630_bl.c

@@ -37,7 +37,7 @@
 	BLED_2
 };
 
-static const char *bled_name[] = {
+static const char * const bled_name[] = {
 	[BLED_ALL] = "lm3630_bled",	/*Bank1 controls all string */
 	[BLED_1] = "lm3630_bled1",	/*Bank1 controls bled1 */
 	[BLED_2] = "lm3630_bled2",	/*Bank1 or 2 controls bled2 */

diff --git a/drivers/video/backlight/lm3639_bl.c b/drivers/video/backlight/lm3639_bl.c
index b0e1e8b..7ab2d2a 100644
--- a/drivers/video/backlight/lm3639_bl.c
+++ b/drivers/video/backlight/lm3639_bl.c

@@ -214,7 +214,7 @@
 
 }
 
-static DEVICE_ATTR(bled_mode, 0666, NULL, lm3639_bled_mode_store);
+static DEVICE_ATTR(bled_mode, S_IWUSR, NULL, lm3639_bled_mode_store);
 
 /* torch */
 static void lm3639_torch_brightness_set(struct led_classdev *cdev,

diff --git a/drivers/video/backlight/lms283gf05.c b/drivers/video/backlight/lms283gf05.c
index b29c707..55819b3 100644
--- a/drivers/video/backlight/lms283gf05.c
+++ b/drivers/video/backlight/lms283gf05.c

@@ -31,7 +31,7 @@
 };
 
 /* Magic sequences supplied by manufacturer, for details refer to datasheet */
-static struct lms283gf05_seq disp_initseq[] = {
+static const struct lms283gf05_seq disp_initseq[] = {
 	/* REG, VALUE, DELAY */
 	{ 0x07, 0x0000, 0 },
 	{ 0x13, 0x0000, 10 },
@@ -78,7 +78,7 @@
 	{ 0x22, 0x0000, 0 }
 };
 
-static struct lms283gf05_seq disp_pdwnseq[] = {
+static const struct lms283gf05_seq disp_pdwnseq[] = {
 	{ 0x07, 0x0016, 30 },
 
 	{ 0x07, 0x0004, 0 },
@@ -104,7 +104,7 @@
 }
 
 static void lms283gf05_toggle(struct spi_device *spi,
-			struct lms283gf05_seq *seq, int sz)
+				const struct lms283gf05_seq *seq, int sz)
 {
 	char buf[3];
 	int i;
@@ -158,13 +158,10 @@
 	int ret = 0;
 
 	if (pdata != NULL) {
-		ret = devm_gpio_request(&spi->dev, pdata->reset_gpio,
-					"LMS285GF05 RESET");
-		if (ret)
-			return ret;
-
-		ret = gpio_direction_output(pdata->reset_gpio,
-						!pdata->reset_inverted);
+		ret = devm_gpio_request_one(&spi->dev, pdata->reset_gpio,
+				GPIOF_DIR_OUT | (!pdata->reset_inverted ?
+				GPIOF_INIT_HIGH : GPIOF_INIT_LOW),
+				"LMS285GF05 RESET");
 		if (ret)
 			return ret;
 	}

diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 3a6d541..146fea8 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c

@@ -107,7 +107,6 @@
 }
 EXPORT_SYMBOL(locomolcd_power);
 
-
 static int current_intensity;
 
 static int locomolcd_set_intensity(struct backlight_device *bd)
@@ -122,13 +121,25 @@
 		intensity = 0;
 
 	switch (intensity) {
-	/* AC and non-AC are handled differently, but produce same results in sharp code? */
-	case 0: locomo_frontlight_set(locomolcd_dev, 0, 0, 161); break;
-	case 1: locomo_frontlight_set(locomolcd_dev, 117, 0, 161); break;
-	case 2: locomo_frontlight_set(locomolcd_dev, 163, 0, 148); break;
-	case 3: locomo_frontlight_set(locomolcd_dev, 194, 0, 161); break;
-	case 4: locomo_frontlight_set(locomolcd_dev, 194, 1, 161); break;
-
+	/*
+	 * AC and non-AC are handled differently,
+	 * but produce same results in sharp code?
+	 */
+	case 0:
+		locomo_frontlight_set(locomolcd_dev, 0, 0, 161);
+		break;
+	case 1:
+		locomo_frontlight_set(locomolcd_dev, 117, 0, 161);
+		break;
+	case 2:
+		locomo_frontlight_set(locomolcd_dev, 163, 0, 148);
+		break;
+	case 3:
+		locomo_frontlight_set(locomolcd_dev, 194, 0, 161);
+		break;
+	case 4:
+		locomo_frontlight_set(locomolcd_dev, 194, 1, 161);
+		break;
 	default:
 		return -ENODEV;
 	}
@@ -175,9 +186,11 @@
 
 	locomo_gpio_set_dir(ldev->dev.parent, LOCOMO_GPIO_FL_VR, 0);
 
-	/* the poodle_lcd_power function is called for the first time
+	/*
+	 * the poodle_lcd_power function is called for the first time
 	 * from fs_initcall, which is before locomo is activated.
-	 * We need to recall poodle_lcd_power here*/
+	 * We need to recall poodle_lcd_power here
+	 */
 	if (machine_is_poodle())
 		locomolcd_power(1);
 
@@ -190,8 +203,8 @@
 							&ldev->dev, NULL,
 							&locomobl_data, &props);
 
-	if (IS_ERR (locomolcd_bl_device))
-		return PTR_ERR (locomolcd_bl_device);
+	if (IS_ERR(locomolcd_bl_device))
+		return PTR_ERR(locomolcd_bl_device);
 
 	/* Set up frontlight so that screen is readable */
 	locomolcd_bl_device->props.brightness = 2;
@@ -226,7 +239,6 @@
 	.resume = locomolcd_resume,
 };
 
-
 static int __init locomolcd_init(void)
 {
 	return locomo_driver_register(&poodle_lcd_driver);

diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index fd985e0..6e4db0c 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c

@@ -15,6 +15,7 @@
 #include <linux/backlight.h>
 #include <linux/err.h>
 #include <linux/platform_data/lp855x.h>
+#include <linux/pwm.h>
 
 /* Registers */
 #define BRIGHTNESS_CTRL		0x00
@@ -34,22 +35,19 @@
 	struct i2c_client *client;
 	struct backlight_device *bl;
 	struct device *dev;
-	struct mutex xfer_lock;
 	struct lp855x_platform_data *pdata;
+	struct pwm_device *pwm;
 };
 
 static int lp855x_read_byte(struct lp855x *lp, u8 reg, u8 *data)
 {
 	int ret;
 
-	mutex_lock(&lp->xfer_lock);
 	ret = i2c_smbus_read_byte_data(lp->client, reg);
 	if (ret < 0) {
-		mutex_unlock(&lp->xfer_lock);
 		dev_err(lp->dev, "failed to read 0x%.2x\n", reg);
 		return ret;
 	}
-	mutex_unlock(&lp->xfer_lock);
 
 	*data = (u8)ret;
 	return 0;
@@ -57,13 +55,7 @@
 
 static int lp855x_write_byte(struct lp855x *lp, u8 reg, u8 data)
 {
-	int ret;
-
-	mutex_lock(&lp->xfer_lock);
-	ret = i2c_smbus_write_byte_data(lp->client, reg, data);
-	mutex_unlock(&lp->xfer_lock);
-
-	return ret;
+	return i2c_smbus_write_byte_data(lp->client, reg, data);
 }
 
 static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr)
@@ -121,6 +113,28 @@
 	return ret;
 }
 
+static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
+{
+	unsigned int period = lp->pdata->period_ns;
+	unsigned int duty = br * period / max_br;
+	struct pwm_device *pwm;
+
+	/* request pwm device with the consumer name */
+	if (!lp->pwm) {
+		pwm = devm_pwm_get(lp->dev, lp->chipname);
+		if (IS_ERR(pwm))
+			return;
+
+		lp->pwm = pwm;
+	}
+
+	pwm_config(lp->pwm, duty, period);
+	if (duty)
+		pwm_enable(lp->pwm);
+	else
+		pwm_disable(lp->pwm);
+}
+
 static int lp855x_bl_update_status(struct backlight_device *bl)
 {
 	struct lp855x *lp = bl_get_data(bl);
@@ -130,12 +144,10 @@
 		bl->props.brightness = 0;
 
 	if (mode == PWM_BASED) {
-		struct lp855x_pwm_data *pd = &lp->pdata->pwm_data;
 		int br = bl->props.brightness;
 		int max_br = bl->props.max_brightness;
 
-		if (pd->pwm_set_intensity)
-			pd->pwm_set_intensity(br, max_br);
+		lp855x_pwm_ctrl(lp, br, max_br);
 
 	} else if (mode == REGISTER_BASED) {
 		u8 val = bl->props.brightness;
@@ -150,14 +162,7 @@
 	struct lp855x *lp = bl_get_data(bl);
 	enum lp855x_brightness_ctrl_mode mode = lp->pdata->mode;
 
-	if (mode == PWM_BASED) {
-		struct lp855x_pwm_data *pd = &lp->pdata->pwm_data;
-		int max_br = bl->props.max_brightness;
-
-		if (pd->pwm_get_intensity)
-			bl->props.brightness = pd->pwm_get_intensity(max_br);
-
-	} else if (mode == REGISTER_BASED) {
+	if (mode == REGISTER_BASED) {
 		u8 val = 0;
 
 		lp855x_read_byte(lp, BRIGHTNESS_CTRL, &val);
@@ -266,8 +271,6 @@
 	lp->chip_id = id->driver_data;
 	i2c_set_clientdata(cl, lp);
 
-	mutex_init(&lp->xfer_lock);
-
 	ret = lp855x_init_registers(lp);
 	if (ret) {
 		dev_err(lp->dev, "i2c communication err: %d", ret);

diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c
index c6bec7a..2c9bce0 100644
--- a/drivers/video/backlight/max8925_bl.c
+++ b/drivers/video/backlight/max8925_bl.c

@@ -120,15 +120,13 @@
 	res = platform_get_resource(pdev, IORESOURCE_REG, 0);
 	if (!res) {
 		dev_err(&pdev->dev, "No REG resource for mode control!\n");
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 	data->reg_mode_cntl = res->start;
 	res = platform_get_resource(pdev, IORESOURCE_REG, 1);
 	if (!res) {
 		dev_err(&pdev->dev, "No REG resource for control!\n");
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 	data->reg_cntl = res->start;
 
@@ -142,8 +140,7 @@
 					&max8925_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
-		ret = PTR_ERR(bl);
-		goto out;
+		return PTR_ERR(bl);
 	}
 	bl->props.brightness = MAX_BRIGHTNESS;
 
@@ -166,8 +163,6 @@
 	return 0;
 out_brt:
 	backlight_device_unregister(bl);
-out:
-	devm_kfree(&pdev->dev, data);
 	return ret;
 }
 

diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index 9a046a4..af31c26 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c

@@ -42,12 +42,12 @@
 	struct omap_backlight_config *pdata;
 };
 
-static void inline omapbl_send_intensity(int intensity)
+static inline void omapbl_send_intensity(int intensity)
 {
 	omap_writeb(intensity, OMAP_PWL_ENABLE);
 }
 
-static void inline omapbl_send_enable(int enable)
+static inline void omapbl_send_enable(int enable)
 {
 	omap_writeb(enable, OMAP_PWL_CLK_ENABLE);
 }

diff --git a/drivers/video/backlight/pandora_bl.c b/drivers/video/backlight/pandora_bl.c
index 4ec3074..633b0a2 100644
--- a/drivers/video/backlight/pandora_bl.c
+++ b/drivers/video/backlight/pandora_bl.c

@@ -71,8 +71,7 @@
 		 * set PWM duty cycle to max. TPS61161 seems to use this
 		 * to calibrate it's PWM sensitivity when it starts.
 		 */
-		twl_i2c_write_u8(TWL4030_MODULE_PWM0, MAX_VALUE,
-					TWL_PWM0_OFF);
+		twl_i2c_write_u8(TWL_MODULE_PWM, MAX_VALUE, TWL_PWM0_OFF);
 
 		/* first enable clock, then PWM0 out */
 		twl_i2c_read_u8(TWL4030_MODULE_INTBR, &r, TWL_INTBR_GPBR1);
@@ -90,8 +89,7 @@
 		usleep_range(2000, 10000);
 	}
 
-	twl_i2c_write_u8(TWL4030_MODULE_PWM0, MIN_VALUE + brightness,
-				TWL_PWM0_OFF);
+	twl_i2c_write_u8(TWL_MODULE_PWM, MIN_VALUE + brightness, TWL_PWM0_OFF);
 
 done:
 	if (brightness != 0)
@@ -132,7 +130,7 @@
 	platform_set_drvdata(pdev, bl);
 
 	/* 64 cycle period, ON position 0 */
-	twl_i2c_write_u8(TWL4030_MODULE_PWM0, 0x80, TWL_PWM0_ON);
+	twl_i2c_write_u8(TWL_MODULE_PWM, 0x80, TWL_PWM0_ON);
 
 	bl->props.state |= PANDORABL_WAS_OFF;
 	bl->props.brightness = MAX_USER_VALUE;

diff --git a/drivers/video/backlight/pcf50633-backlight.c b/drivers/video/backlight/pcf50633-backlight.c
index 0087396..e87c7a3 100644
--- a/drivers/video/backlight/pcf50633-backlight.c
+++ b/drivers/video/backlight/pcf50633-backlight.c

@@ -52,7 +52,7 @@
 	pcf_bl->brightness_limit = limit & 0x3f;
 	backlight_update_status(pcf_bl->bl);
 
-    return 0;
+	return 0;
 }
 
 static int pcf50633_bl_update_status(struct backlight_device *bl)
@@ -136,8 +136,10 @@
 
 	pcf50633_reg_write(pcf_bl->pcf, PCF50633_REG_LEDDIM, pdata->ramp_time);
 
-	/* Should be different from bl_props.brightness, so we do not exit
-	 * update_status early the first time it's called */
+	/*
+	 * Should be different from bl_props.brightness, so we do not exit
+	 * update_status early the first time it's called
+	 */
 	pcf_bl->brightness = pcf_bl->bl->props.brightness + 1;
 
 	backlight_update_status(pcf_bl->bl);

diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c
index 894bfc5..17a6b83 100644
--- a/drivers/video/backlight/platform_lcd.c
+++ b/drivers/video/backlight/platform_lcd.c

@@ -27,7 +27,7 @@
 	struct plat_lcd_data	*pdata;
 
 	unsigned int		 power;
-	unsigned int		 suspended : 1;
+	unsigned int		 suspended:1;
 };
 
 static inline struct platform_lcd *to_our_lcd(struct lcd_device *lcd)

diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c
index 484e10d..3e1c113 100644
--- a/drivers/video/backlight/s6e63m0.c
+++ b/drivers/video/backlight/s6e63m0.c

@@ -757,7 +757,7 @@
 	lcd->spi = spi;
 	lcd->dev = &spi->dev;
 
-	lcd->lcd_pd = (struct lcd_platform_data *)spi->dev.platform_data;
+	lcd->lcd_pd = spi->dev.platform_data;
 	if (!lcd->lcd_pd) {
 		dev_err(&spi->dev, "platform data is NULL.\n");
 		return -EFAULT;

diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 146ffb9..ad2325f 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c

@@ -2,7 +2,7 @@
  * tdo24m - SPI-based drivers for Toppoly TDO24M series LCD panels
  *
  * Copyright (C) 2008 Marvell International Ltd.
- * 	Eric Miao <eric.miao@marvell.com>
+ *	Eric Miao <eric.miao@marvell.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -47,7 +47,7 @@
 			((x1) << 9) | 0x100 | (x2))
 #define CMD_NULL	(-1)
 
-static uint32_t lcd_panel_reset[] = {
+static const uint32_t lcd_panel_reset[] = {
 	CMD0(0x1), /* reset */
 	CMD0(0x0), /* nop */
 	CMD0(0x0), /* nop */
@@ -55,7 +55,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_panel_on[] = {
+static const uint32_t lcd_panel_on[] = {
 	CMD0(0x29),		/* Display ON */
 	CMD2(0xB8, 0xFF, 0xF9),	/* Output Control */
 	CMD0(0x11),		/* Sleep out */
@@ -63,7 +63,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_panel_off[] = {
+static const uint32_t lcd_panel_off[] = {
 	CMD0(0x28),		/* Display OFF */
 	CMD2(0xB8, 0x80, 0x02),	/* Output Control */
 	CMD0(0x10),		/* Sleep in */
@@ -71,7 +71,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_vga_pass_through_tdo24m[] = {
+static const uint32_t lcd_vga_pass_through_tdo24m[] = {
 	CMD1(0xB0, 0x16),
 	CMD1(0xBC, 0x80),
 	CMD1(0xE1, 0x00),
@@ -80,7 +80,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_qvga_pass_through_tdo24m[] = {
+static const uint32_t lcd_qvga_pass_through_tdo24m[] = {
 	CMD1(0xB0, 0x16),
 	CMD1(0xBC, 0x81),
 	CMD1(0xE1, 0x00),
@@ -89,8 +89,8 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_vga_transfer_tdo24m[] = {
-	CMD1(0xcf, 0x02), 	/* Blanking period control (1) */
+static const uint32_t lcd_vga_transfer_tdo24m[] = {
+	CMD1(0xcf, 0x02),	/* Blanking period control (1) */
 	CMD2(0xd0, 0x08, 0x04),	/* Blanking period control (2) */
 	CMD1(0xd1, 0x01),	/* CKV timing control on/off */
 	CMD2(0xd2, 0x14, 0x00),	/* CKV 1,2 timing control */
@@ -102,7 +102,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_qvga_transfer[] = {
+static const uint32_t lcd_qvga_transfer[] = {
 	CMD1(0xd6, 0x02),	/* Blanking period control (1) */
 	CMD2(0xd7, 0x08, 0x04),	/* Blanking period control (2) */
 	CMD1(0xd8, 0x01),	/* CKV timing control on/off */
@@ -115,7 +115,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_vga_pass_through_tdo35s[] = {
+static const uint32_t lcd_vga_pass_through_tdo35s[] = {
 	CMD1(0xB0, 0x16),
 	CMD1(0xBC, 0x80),
 	CMD1(0xE1, 0x00),
@@ -123,7 +123,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_qvga_pass_through_tdo35s[] = {
+static const uint32_t lcd_qvga_pass_through_tdo35s[] = {
 	CMD1(0xB0, 0x16),
 	CMD1(0xBC, 0x81),
 	CMD1(0xE1, 0x00),
@@ -131,8 +131,8 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_vga_transfer_tdo35s[] = {
-	CMD1(0xcf, 0x02), 	/* Blanking period control (1) */
+static const uint32_t lcd_vga_transfer_tdo35s[] = {
+	CMD1(0xcf, 0x02),	/* Blanking period control (1) */
 	CMD2(0xd0, 0x08, 0x04),	/* Blanking period control (2) */
 	CMD1(0xd1, 0x01),	/* CKV timing control on/off */
 	CMD2(0xd2, 0x00, 0x1e),	/* CKV 1,2 timing control */
@@ -144,7 +144,7 @@
 	CMD_NULL,
 };
 
-static uint32_t lcd_panel_config[] = {
+static const uint32_t lcd_panel_config[] = {
 	CMD2(0xb8, 0xff, 0xf9),	/* Output control */
 	CMD0(0x11),		/* sleep out */
 	CMD1(0xba, 0x01),	/* Display mode (1) */
@@ -175,10 +175,11 @@
 	CMD_NULL,
 };
 
-static int tdo24m_writes(struct tdo24m *lcd, uint32_t *array)
+static int tdo24m_writes(struct tdo24m *lcd, const uint32_t *array)
 {
 	struct spi_transfer *x = &lcd->xfer;
-	uint32_t data, *p = array;
+	const uint32_t *p = array;
+	uint32_t data;
 	int nparams, err = 0;
 
 	for (; *p != CMD_NULL; p++) {

diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index a0521ab..588682c 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c

@@ -92,14 +92,12 @@
 
 	data->comadj = sharpsl_param.comadj == -1 ? COMADJ_DEFAULT : sharpsl_param.comadj;
 
-	ret = devm_gpio_request(&client->dev, TOSA_GPIO_BL_C20MA, "backlight");
+	ret = devm_gpio_request_one(&client->dev, TOSA_GPIO_BL_C20MA,
+				GPIOF_OUT_INIT_LOW, "backlight");
 	if (ret) {
 		dev_dbg(&data->bl->dev, "Unable to request gpio!\n");
 		return ret;
 	}
-	ret = gpio_direction_output(TOSA_GPIO_BL_C20MA, 0);
-	if (ret)
-		return ret;
 
 	i2c_set_clientdata(client, data);
 	data->i2c = client;
@@ -163,7 +161,6 @@
 	{ },
 };
 
-
 static struct i2c_driver tosa_bl_driver = {
 	.driver = {
 		.name		= "tosa-bl",

diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index 86fff88..96bae94 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c

@@ -63,7 +63,7 @@
 int tosa_bl_enable(struct spi_device *spi, int enable)
 {
 	/* bl_enable GP04=1 otherwise GP04=0*/
-	return tosa_tg_send(spi, TG_GPODR2, enable? 0x01 : 0x00);
+	return tosa_tg_send(spi, TG_GPODR2, enable ? 0x01 : 0x00);
 }
 EXPORT_SYMBOL(tosa_bl_enable);
 
@@ -91,15 +91,17 @@
 	tosa_tg_send(spi, TG_PNLCTL, value);
 
 	/* TG LCD pannel power up */
-	tosa_tg_send(spi, TG_PINICTL,0x4);
+	tosa_tg_send(spi, TG_PINICTL, 0x4);
 	mdelay(50);
 
 	/* TG LCD GVSS */
-	tosa_tg_send(spi, TG_PINICTL,0x0);
+	tosa_tg_send(spi, TG_PINICTL, 0x0);
 
 	if (!data->i2c) {
-		/* after the pannel is powered up the first time, we can access the i2c bus */
-		/* so probe for the DAC */
+		/*
+		 * after the pannel is powered up the first time,
+		 * we can access the i2c bus so probe for the DAC
+		 */
 		struct i2c_adapter *adap = i2c_get_adapter(0);
 		struct i2c_board_info info = {
 			.type	= "tosa-bl",
@@ -115,11 +117,11 @@
 	struct spi_device *spi = data->spi;
 
 	/* TG LCD VHSA off */
-	tosa_tg_send(spi, TG_PINICTL,0x4);
+	tosa_tg_send(spi, TG_PINICTL, 0x4);
 	mdelay(50);
 
 	/* TG LCD signal off */
-	tosa_tg_send(spi, TG_PINICTL,0x6);
+	tosa_tg_send(spi, TG_PINICTL, 0x6);
 	mdelay(50);
 
 	/* TG Off */
@@ -193,17 +195,13 @@
 	data->spi = spi;
 	dev_set_drvdata(&spi->dev, data);
 
-	ret = devm_gpio_request(&spi->dev, TOSA_GPIO_TG_ON, "tg #pwr");
+	ret = devm_gpio_request_one(&spi->dev, TOSA_GPIO_TG_ON,
+				GPIOF_OUT_INIT_LOW, "tg #pwr");
 	if (ret < 0)
 		goto err_gpio_tg;
 
 	mdelay(60);
 
-	ret = gpio_direction_output(TOSA_GPIO_TG_ON, 0);
-	if (ret < 0)
-		goto err_gpio_tg;
-
-	mdelay(60);
 	tosa_lcd_tg_init(data);
 
 	tosa_lcd_tg_on(data);

diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
index 712b0ac..45e81b4 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c

@@ -26,7 +26,7 @@
 
 /* Device initialisation sequences */
 
-static struct ili9320_reg vgg_init1[] = {
+static const struct ili9320_reg vgg_init1[] = {
 	{
 		.address = ILI9320_POWER1,
 		.value	 = ILI9320_POWER1_AP(0) | ILI9320_POWER1_BT(0),
@@ -43,7 +43,7 @@
 	},
 };
 
-static struct ili9320_reg vgg_init2[] = {
+static const struct ili9320_reg vgg_init2[] = {
 	{
 		.address = ILI9320_POWER1,
 		.value   = (ILI9320_POWER1_AP(3) | ILI9320_POWER1_APE |
@@ -54,7 +54,7 @@
 	}
 };
 
-static struct ili9320_reg vgg_gamma[] = {
+static const struct ili9320_reg vgg_gamma[] = {
 	{
 		.address = ILI9320_GAMMA1,
 		.value	 = 0x0000,
@@ -89,7 +89,7 @@
 
 };
 
-static struct ili9320_reg vgg_init0[] = {
+static const struct ili9320_reg vgg_init0[] = {
 	[0]	= {
 		/* set direction and scan mode gate */
 		.address = ILI9320_DRIVER,
@@ -217,7 +217,7 @@
 }
 #else
 #define vgg2432a4_suspend	NULL
-#define vgg2432a4_resume 	NULL
+#define vgg2432a4_resume	NULL
 #endif
 
 static struct ili9320_client vgg2432a4_client = {

diff --git a/drivers/video/gxt4500.c b/drivers/video/gxt4500.c
index 0e9afa4..4bdea6e 100644
--- a/drivers/video/gxt4500.c
+++ b/drivers/video/gxt4500.c

@@ -1,5 +1,6 @@
 /*
- * Frame buffer device for IBM GXT4500P and GXT6000P display adaptors
+ * Frame buffer device for IBM GXT4500P/6500P and GXT4000P/6000P
+ * display adaptors
  *
  * Copyright (C) 2006 Paul Mackerras, IBM Corp. <paulus@samba.org>
  */
@@ -14,6 +15,8 @@
 #include <linux/string.h>
 
 #define PCI_DEVICE_ID_IBM_GXT4500P	0x21c
+#define PCI_DEVICE_ID_IBM_GXT6500P	0x21b
+#define PCI_DEVICE_ID_IBM_GXT4000P	0x16e
 #define PCI_DEVICE_ID_IBM_GXT6000P	0x170
 
 /* GXT4500P registers */
@@ -173,6 +176,8 @@
 /* List of supported cards */
 enum gxt_cards {
 	GXT4500P,
+	GXT6500P,
+	GXT4000P,
 	GXT6000P
 };
 
@@ -182,6 +187,8 @@
 	const char *cardname;
 } cardinfo[] = {
 	[GXT4500P] = { .refclk_ps = 9259, .cardname = "IBM GXT4500P" },
+	[GXT6500P] = { .refclk_ps = 9259, .cardname = "IBM GXT6500P" },
+	[GXT4000P] = { .refclk_ps = 40000, .cardname = "IBM GXT4000P" },
 	[GXT6000P] = { .refclk_ps = 40000, .cardname = "IBM GXT6000P" },
 };
 
@@ -736,6 +743,10 @@
 static const struct pci_device_id gxt4500_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_GXT4500P),
 	  .driver_data = GXT4500P },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_GXT6500P),
+	  .driver_data = GXT6500P },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_GXT4000P),
+	  .driver_data = GXT4000P },
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_GXT6000P),
 	  .driver_data = GXT6000P },
 	{ 0 }
@@ -768,7 +779,7 @@
 module_exit(gxt4500_exit);
 
 MODULE_AUTHOR("Paul Mackerras <paulus@samba.org>");
-MODULE_DESCRIPTION("FBDev driver for IBM GXT4500P/6000P");
+MODULE_DESCRIPTION("FBDev driver for IBM GXT4500P/6500P and GXT4000P/6000P");
 MODULE_LICENSE("GPL");
 module_param(mode_option, charp, 0);
 MODULE_PARM_DESC(mode_option, "Specify resolution as \"<xres>x<yres>[-<bpp>][@<refresh>]\"");

diff --git a/drivers/video/omap2/Kconfig b/drivers/video/omap2/Kconfig
index 346d67d..b07b2b0 100644
--- a/drivers/video/omap2/Kconfig
+++ b/drivers/video/omap2/Kconfig

@@ -1,6 +1,10 @@
 config OMAP2_VRFB
 	bool
 
+if ARCH_OMAP2PLUS
+
 source "drivers/video/omap2/dss/Kconfig"
 source "drivers/video/omap2/omapfb/Kconfig"
 source "drivers/video/omap2/displays/Kconfig"
+
+endif

diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 4939e0c..d294f67 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c

@@ -796,9 +796,6 @@
 	struct device_node *node;
 	int ret;
 
-	if (!(mfmsr() & MSR_GS))
-		return 0;
-
 	node = of_find_node_by_path("/hypervisor");
 	if (!node)
 		return 0;

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 809b0de..ee59b74 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c

@@ -10,33 +10,32 @@
 static ssize_t device_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%04x\n", dev->id.device);
 }
 static ssize_t vendor_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%04x\n", dev->id.vendor);
 }
 static ssize_t status_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));
 }
 static ssize_t modalias_show(struct device *_d,
 			     struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "virtio:d%08Xv%08X\n",
 		       dev->id.device, dev->id.vendor);
 }
 static ssize_t features_show(struct device *_d,
 			     struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	unsigned int i;
 	ssize_t len = 0;
 
@@ -71,10 +70,10 @@
 static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
 {
 	unsigned int i;
-	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_dv);
 	const struct virtio_device_id *ids;
 
-	ids = container_of(_dr, struct virtio_driver, driver)->id_table;
+	ids = drv_to_virtio(_dr)->id_table;
 	for (i = 0; ids[i].device; i++)
 		if (virtio_id_match(dev, &ids[i]))
 			return 1;
@@ -83,7 +82,7 @@
 
 static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
 {
-	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_dv);
 
 	return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
 			      dev->id.device, dev->id.vendor);
@@ -98,8 +97,7 @@
 					 unsigned int fbit)
 {
 	unsigned int i;
-	struct virtio_driver *drv = container_of(vdev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 
 	for (i = 0; i < drv->feature_table_size; i++)
 		if (drv->feature_table[i] == fbit)
@@ -111,9 +109,8 @@
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-	struct virtio_driver *drv = container_of(dev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_device *dev = dev_to_virtio(_d);
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 	u32 device_features;
 
 	/* We have a driver! */
@@ -152,9 +149,8 @@
 
 static int virtio_dev_remove(struct device *_d)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-	struct virtio_driver *drv = container_of(dev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_device *dev = dev_to_virtio(_d);
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 
 	drv->remove(dev);
 

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 2a70558..d19fe3e 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c

@@ -139,10 +139,9 @@
 		struct page *page = balloon_page_enqueue(vb_dev_info);
 
 		if (!page) {
-			if (printk_ratelimit())
-				dev_printk(KERN_INFO, &vb->vdev->dev,
-					   "Out of puff! Can't get %u pages\n",
-					   VIRTIO_BALLOON_PAGES_PER_PAGE);
+			dev_info_ratelimited(&vb->vdev->dev,
+					     "Out of puff! Can't get %u pages\n",
+					     VIRTIO_BALLOON_PAGES_PER_PAGE);
 			/* Sleep for at least 1/5 of a second before retry. */
 			msleep(200);
 			break;

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 6b1b7e1..634f80b 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c

@@ -225,7 +225,7 @@
 
 	/* We write the queue's selector into the notification register to
 	 * signal the other end */
-	writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
+	writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
 }
 
 /* Notify all virtqueues on an interrupt. */
@@ -266,7 +266,7 @@
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
 	struct virtio_mmio_vq_info *info = vq->priv;
 	unsigned long flags, size;
-	unsigned int index = virtqueue_get_queue_index(vq);
+	unsigned int index = vq->index;
 
 	spin_lock_irqsave(&vm_dev->lock, flags);
 	list_del(&info->node);
@@ -521,25 +521,33 @@
 	int err;
 	struct resource resources[2] = {};
 	char *str;
-	long long int base;
+	long long int base, size;
+	unsigned int irq;
 	int processed, consumed = 0;
 	struct platform_device *pdev;
 
-	resources[0].flags = IORESOURCE_MEM;
-	resources[1].flags = IORESOURCE_IRQ;
+	/* Consume "size" part of the command line parameter */
+	size = memparse(device, &str);
 
-	resources[0].end = memparse(device, &str) - 1;
-
+	/* Get "@<base>:<irq>[:<id>]" chunks */
 	processed = sscanf(str, "@%lli:%u%n:%d%n",
-			&base, &resources[1].start, &consumed,
+			&base, &irq, &consumed,
 			&vm_cmdline_id, &consumed);
 
-	if (processed < 2 || processed > 3 || str[consumed])
+	/*
+	 * sscanf() must processes at least 2 chunks; also there
+	 * must be no extra characters after the last chunk, so
+	 * str[consumed] must be '\0'
+	 */
+	if (processed < 2 || str[consumed])
 		return -EINVAL;
 
+	resources[0].flags = IORESOURCE_MEM;
 	resources[0].start = base;
-	resources[0].end += base;
-	resources[1].end = resources[1].start;
+	resources[0].end = base + size - 1;
+
+	resources[1].flags = IORESOURCE_IRQ;
+	resources[1].start = resources[1].end = irq;
 
 	if (!vm_cmdline_parent_registered) {
 		err = device_register(&vm_cmdline_parent);

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index c33aea3..e3ecc94 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c

@@ -203,8 +203,7 @@
 
 	/* we write the queue's selector into the notification register to
 	 * signal the other end */
-	iowrite16(virtqueue_get_queue_index(vq),
-		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
 /* Handle a configuration change: Tell driver if it wants to know. */
@@ -479,8 +478,7 @@
 	list_del(&info->node);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
-	iowrite16(virtqueue_get_queue_index(vq),
-		vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
 	if (vp_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
@@ -830,16 +828,4 @@
 #endif
 };
 
-static int __init virtio_pci_init(void)
-{
-	return pci_register_driver(&virtio_pci_driver);
-}
-
-module_init(virtio_pci_init);
-
-static void __exit virtio_pci_exit(void)
-{
-	pci_unregister_driver(&virtio_pci_driver);
-}
-
-module_exit(virtio_pci_exit);
+module_pci_driver(virtio_pci_driver);

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e639584..ffd7e7d 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c

@@ -93,8 +93,6 @@
 	/* Host publishes avail event idx */
 	bool event;
 
-	/* Number of free buffers */
-	unsigned int num_free;
 	/* Head of free buffer list. */
 	unsigned int free_head;
 	/* Number we've added since last sync. */
@@ -106,9 +104,6 @@
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
 
-	/* Index of the queue */
-	int queue_index;
-
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
@@ -135,6 +130,13 @@
 	unsigned head;
 	int i;
 
+	/*
+	 * We require lowmem mappings for the descriptors because
+	 * otherwise virt_to_phys will give us bogus addresses in the
+	 * virtqueue.
+	 */
+	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
+
 	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
 	if (!desc)
 		return -ENOMEM;
@@ -160,7 +162,7 @@
 	desc[i-1].next = 0;
 
 	/* We're about to use a buffer */
-	vq->num_free--;
+	vq->vq.num_free--;
 
 	/* Use a single buffer which doesn't continue */
 	head = vq->free_head;
@@ -174,13 +176,6 @@
 	return head;
 }
 
-int virtqueue_get_queue_index(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	return vq->queue_index;
-}
-EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);
-
 /**
  * virtqueue_add_buf - expose buffer to other end
  * @vq: the struct virtqueue we're talking about.
@@ -193,10 +188,7 @@
  * Caller must ensure we don't call this with other virtqueue operations
  * at the same time (except where noted).
  *
- * Returns remaining capacity of queue or a negative error
- * (ie. ENOSPC).  Note that it only really makes sense to treat all
- * positive return values as "available": indirect buffers mean that
- * we can put an entire sg[] array inside a single queue entry.
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM).
  */
 int virtqueue_add_buf(struct virtqueue *_vq,
 		      struct scatterlist sg[],
@@ -228,7 +220,7 @@
 
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && (out + in) > 1 && vq->num_free) {
+	if (vq->indirect && (out + in) > 1 && vq->vq.num_free) {
 		head = vring_add_indirect(vq, sg, out, in, gfp);
 		if (likely(head >= 0))
 			goto add_head;
@@ -237,9 +229,9 @@
 	BUG_ON(out + in > vq->vring.num);
 	BUG_ON(out + in == 0);
 
-	if (vq->num_free < out + in) {
+	if (vq->vq.num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
-			 out + in, vq->num_free);
+			 out + in, vq->vq.num_free);
 		/* FIXME: for historical reasons, we force a notify here if
 		 * there are outgoing parts to the buffer.  Presumably the
 		 * host should service the ring ASAP. */
@@ -250,7 +242,7 @@
 	}
 
 	/* We're about to use some buffers from the free list. */
-	vq->num_free -= out + in;
+	vq->vq.num_free -= out + in;
 
 	head = vq->free_head;
 	for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
@@ -296,7 +288,7 @@
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
 
-	return vq->num_free;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
@@ -393,13 +385,13 @@
 
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
-		vq->num_free++;
+		vq->vq.num_free++;
 	}
 
 	vq->vring.desc[i].next = vq->free_head;
 	vq->free_head = head;
 	/* Plus final descriptor */
-	vq->num_free++;
+	vq->vq.num_free++;
 }
 
 static inline bool more_used(const struct vring_virtqueue *vq)
@@ -599,7 +591,7 @@
 		return buf;
 	}
 	/* That should have freed everything. */
-	BUG_ON(vq->num_free != vq->vring.num);
+	BUG_ON(vq->vq.num_free != vq->vring.num);
 
 	END_USE(vq);
 	return NULL;
@@ -653,12 +645,13 @@
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
+	vq->vq.num_free = num;
+	vq->vq.index = index;
 	vq->notify = notify;
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
-	vq->queue_index = index;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
@@ -673,7 +666,6 @@
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 
 	/* Put everything in free lists. */
-	vq->num_free = num;
 	vq->free_head = 0;
 	for (i = 0; i < num-1; i++) {
 		vq->vring.desc[i].next = i+1;

diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig
index c433a74..e8ca63a 100644
--- a/drivers/w1/masters/Kconfig
+++ b/drivers/w1/masters/Kconfig

@@ -60,6 +60,7 @@
 
 config HDQ_MASTER_OMAP
 	tristate "OMAP HDQ driver"
+	depends on ARCH_OMAP
 	help
 	  Say Y here if you want support for the 1-wire or HDQ Interface
 	  on an OMAP processor.

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 58db6df..af47e75 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c

@@ -338,9 +338,8 @@
 				enum dma_data_direction dir,
 				struct dma_attrs *attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t map, phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = xen_phys_to_bus(phys);
-	void *map;
 
 	BUG_ON(dir == DMA_NONE);
 	/*
@@ -356,10 +355,10 @@
 	 * Oh well, have to allocate and map a bounce buffer.
 	 */
 	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
-	if (!map)
+	if (map == SWIOTLB_MAP_ERROR)
 		return DMA_ERROR_CODE;
 
-	dev_addr = xen_virt_to_bus(map);
+	dev_addr = xen_phys_to_bus(map);
 
 	/*
 	 * Ensure that the address returned is DMA'ble
@@ -389,7 +388,7 @@
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
-		swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
 		return;
 	}
 
@@ -434,8 +433,7 @@
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
-		swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
-				       target);
+		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
 		return;
 	}
 
@@ -494,11 +492,12 @@
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length) ||
 		    range_straddles_page_boundary(paddr, sg->length)) {
-			void *map = swiotlb_tbl_map_single(hwdev,
-							   start_dma_addr,
-							   sg_phys(sg),
-							   sg->length, dir);
-			if (!map) {
+			phys_addr_t map = swiotlb_tbl_map_single(hwdev,
+								 start_dma_addr,
+								 sg_phys(sg),
+								 sg->length,
+								 dir);
+			if (map == SWIOTLB_MAP_ERROR) {
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
 				xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
@@ -506,7 +505,7 @@
 				sgl[0].dma_length = 0;
 				return DMA_ERROR_CODE;
 			}
-			sg->dma_address = xen_virt_to_bus(map);
+			sg->dma_address = xen_phys_to_bus(map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;

diff --git a/fs/Kconfig b/fs/Kconfig
index f95ae3a..cfe512f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig

@@ -28,8 +28,8 @@
 	tristate
 	default y if EXT2_FS=y && EXT2_FS_XATTR
 	default y if EXT3_FS=y && EXT3_FS_XATTR
-	default y if EXT4_FS=y && EXT4_FS_XATTR
-	default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR
+	default y if EXT4_FS=y
+	default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS
 
 source "fs/reiserfs/Kconfig"
 source "fs/jfs/Kconfig"
@@ -220,6 +220,7 @@
 source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
 source "fs/exofs/Kconfig"
+source "fs/f2fs/Kconfig"
 
 endif # MISC_FILESYSTEMS
 

diff --git a/fs/Makefile b/fs/Makefile
index 1d7af79..9d53192 100644
--- a/fs/Makefile
+++ b/fs/Makefile

@@ -123,6 +123,7 @@
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
 obj-$(CONFIG_BTRFS_FS)		+= btrfs/
 obj-$(CONFIG_GFS2_FS)           += gfs2/
+obj-$(CONFIG_F2FS_FS)		+= f2fs/
 obj-y				+= exofs/ # Multiple modules
 obj-$(CONFIG_CEPH_FS)		+= ceph/
 obj-$(CONFIG_PSTORE)		+= pstore/

diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index e9bad50..5f95d1e 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c

@@ -45,6 +45,14 @@
 	return block_read_full_page(page, adfs_get_block);
 }
 
+static void adfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size)
+		truncate_pagecache(inode, to, inode->i_size);
+}
+
 static int adfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -55,11 +63,8 @@
 	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				adfs_get_block,
 				&ADFS_I(mapping->host)->mmu_private);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		adfs_write_failed(mapping, pos + len);
 
 	return ret;
 }

diff --git a/fs/affs/file.c b/fs/affs/file.c
index 2f4c935..af3261b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c

@@ -39,7 +39,6 @@
 };
 
 const struct inode_operations affs_file_inode_operations = {
-	.truncate	= affs_truncate,
 	.setattr	= affs_notify_change,
 };
 
@@ -402,6 +401,16 @@
 	return block_read_full_page(page, affs_get_block);
 }
 
+static void affs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		affs_truncate(inode);
+	}
+}
+
 static int affs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -412,11 +421,8 @@
 	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				affs_get_block,
 				&AFFS_I(mapping->host)->mmu_private);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		affs_write_failed(mapping, pos + len);
 
 	return ret;
 }

diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 15c4842..0e092d0 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c

@@ -237,9 +237,12 @@
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
+		error = inode_newsize_ok(inode, attr->ia_size);
 		if (error)
 			return error;
+
+		truncate_setsize(inode, attr->ia_size);
+		affs_truncate(inode);
 	}
 
 	setattr_copy(inode, attr);

diff --git a/fs/attr.c b/fs/attr.c
index cce7df5..1449adb 100644
--- a/fs/attr.c
+++ b/fs/attr.c

@@ -49,14 +49,15 @@
 	/* Make sure a caller can chown. */
 	if ((ia_valid & ATTR_UID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
-	     !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN))
+	     !uid_eq(attr->ia_uid, inode->i_uid)) &&
+	    !inode_capable(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure caller can chgrp. */
 	if ((ia_valid & ATTR_GID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
 	    (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-	    !capable(CAP_CHOWN))
+	    !inode_capable(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure a caller can chmod. */
@@ -65,7 +66,8 @@
 			return -EPERM;
 		/* Also check the setgid bit! */
 		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
-				inode->i_gid) && !capable(CAP_FSETID))
+				inode->i_gid) &&
+		    !inode_capable(inode, CAP_FSETID))
 			attr->ia_mode &= ~S_ISGID;
 	}
 
@@ -157,7 +159,8 @@
 	if (ia_valid & ATTR_MODE) {
 		umode_t mode = attr->ia_mode;
 
-		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+		if (!in_group_p(inode->i_gid) &&
+		    !inode_capable(inode, CAP_FSETID))
 			mode &= ~S_ISGID;
 		inode->i_mode = mode;
 	}

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 908e184..b785e77 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h

@@ -74,8 +74,8 @@
 	unsigned long last_used;
 	atomic_t count;
 
-	uid_t uid;
-	gid_t gid;
+	kuid_t uid;
+	kgid_t gid;
 };
 
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry is in the process of expiring */
@@ -89,8 +89,8 @@
 	struct qstr name;
 	u32 dev;
 	u64 ino;
-	uid_t uid;
-	gid_t gid;
+	kuid_t uid;
+	kgid_t gid;
 	pid_t pid;
 	pid_t tgid;
 	/* This is for status reporting upon return */

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index a162141..9f68a37 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c

@@ -437,8 +437,8 @@
 		err = 0;
 		autofs4_expire_wait(path.dentry);
 		spin_lock(&sbi->fs_lock);
-		param->requester.uid = ino->uid;
-		param->requester.gid = ino->gid;
+		param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
+		param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
 		spin_unlock(&sbi->fs_lock);
 	}
 	path_put(&path);

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8a4fed8..b104726 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c

@@ -36,8 +36,8 @@
 
 void autofs4_clean_ino(struct autofs_info *ino)
 {
-	ino->uid = 0;
-	ino->gid = 0;
+	ino->uid = GLOBAL_ROOT_UID;
+	ino->gid = GLOBAL_ROOT_GID;
 	ino->last_used = jiffies;
 }
 
@@ -79,10 +79,12 @@
 		return 0;
 
 	seq_printf(m, ",fd=%d", sbi->pipefd);
-	if (root_inode->i_uid != 0)
-		seq_printf(m, ",uid=%u", root_inode->i_uid);
-	if (root_inode->i_gid != 0)
-		seq_printf(m, ",gid=%u", root_inode->i_gid);
+	if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			from_kuid_munged(&init_user_ns, root_inode->i_uid));
+	if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			from_kgid_munged(&init_user_ns, root_inode->i_gid));
 	seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
 	seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
@@ -126,7 +128,7 @@
 	{Opt_err, NULL}
 };
 
-static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
+static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
 		pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
 {
 	char *p;
@@ -159,12 +161,16 @@
 		case Opt_uid:
 			if (match_int(args, &option))
 				return 1;
-			*uid = option;
+			*uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(*uid))
+				return 1;
 			break;
 		case Opt_gid:
 			if (match_int(args, &option))
 				return 1;
-			*gid = option;
+			*gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(*gid))
+				return 1;
 			break;
 		case Opt_pgrp:
 			if (match_int(args, &option))

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index dce436e..03bc1d3 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c

@@ -154,6 +154,7 @@
 	case autofs_ptype_expire_direct:
 	{
 		struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
+		struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns;
 
 		pktsz = sizeof(*packet);
 
@@ -163,8 +164,8 @@
 		packet->name[wq->name.len] = '\0';
 		packet->dev = wq->dev;
 		packet->ino = wq->ino;
-		packet->uid = wq->uid;
-		packet->gid = wq->gid;
+		packet->uid = from_kuid_munged(user_ns, wq->uid);
+		packet->gid = from_kgid_munged(user_ns, wq->gid);
 		packet->pid = wq->pid;
 		packet->tgid = wq->tgid;
 		break;

diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index b1342ff..922ad46 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c

@@ -16,7 +16,7 @@
 #include <linux/poll.h>
 
 
-static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t bad_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	return -EIO;
 }

diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index f20e8a7..ad3ea14 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c

@@ -161,6 +161,14 @@
 	return block_read_full_page(page, bfs_get_block);
 }
 
+static void bfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size)
+		truncate_pagecache(inode, to, inode->i_size);
+}
+
 static int bfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -169,11 +177,8 @@
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep,
 				bfs_get_block);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		bfs_write_failed(mapping, pos + len);
 
 	return ret;
 }

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6d7d164..0c42cdb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c

@@ -1601,8 +1601,10 @@
 	info->thread = NULL;
 
 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
-	if (psinfo == NULL)
+	if (psinfo == NULL) {
+		info->psinfo.data = NULL; /* So we don't free this wrongly */
 		return 0;
+	}
 
 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
 

diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 4e6cce5..037a3e2 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c

@@ -42,7 +42,6 @@
 			return -ENOEXEC;
 	}
 
-	bprm->recursion_depth++; /* Well, the bang-shell is implicit... */
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index b0b70fb..0c8869f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c

@@ -117,10 +117,6 @@
 	if (!enabled)
 		goto _ret;
 
-	retval = -ENOEXEC;
-	if (bprm->recursion_depth > BINPRM_MAX_RECURSION)
-		goto _ret;
-
 	/* to keep locking time low, we copy the interpreter string */
 	read_lock(&entries_lock);
 	fmt = check_file(bprm);
@@ -176,7 +172,10 @@
 		goto _error;
 	bprm->argc ++;
 
-	bprm->interp = iname;	/* for binfmt_script */
+	/* Update interp in case binfmt_script needs it. */
+	retval = bprm_change_interp(iname, bprm);
+	if (retval < 0)
+		goto _error;
 
 	interp_file = open_exec (iname);
 	retval = PTR_ERR (interp_file);
@@ -197,8 +196,6 @@
 	if (retval < 0)
 		goto _error;
 
-	bprm->recursion_depth++;
-
 	retval = search_binary_handler(bprm);
 	if (retval < 0)
 		goto _error;

diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 8c95499..5027a3e 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c

@@ -22,15 +22,13 @@
 	char interp[BINPRM_BUF_SIZE];
 	int retval;
 
-	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') ||
-	    (bprm->recursion_depth > BINPRM_MAX_RECURSION))
+	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
 		return -ENOEXEC;
 	/*
 	 * This section does the #! interpretation.
 	 * Sorta complicated, but hopefully it will work.  -TYT
 	 */
 
-	bprm->recursion_depth++;
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
@@ -82,7 +80,9 @@
 	retval = copy_strings_kernel(1, &i_name, bprm);
 	if (retval) return retval; 
 	bprm->argc++;
-	bprm->interp = interp;
+	retval = bprm_change_interp(interp, bprm);
+	if (retval < 0)
+		return retval;
 
 	/*
 	 * OK, now restart the process with the interpreter's dentry.

diff --git a/fs/block_dev.c b/fs/block_dev.c
index ab3a456..172f849 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -321,7 +321,7 @@
  * for a block special file file->f_path.dentry->d_inode->i_size is zero
  * so we compute the size by hand (just as in block_read/write above)
  */
-static loff_t block_llseek(struct file *file, loff_t offset, int origin)
+static loff_t block_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *bd_inode = file->f_mapping->host;
 	loff_t size;
@@ -331,7 +331,7 @@
 	size = i_size_read(bd_inode);
 
 	retval = -EINVAL;
-	switch (origin) {
+	switch (whence) {
 		case SEEK_END:
 			offset += size;
 			break;

diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index d7fcdba..7df3e0f 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile

@@ -8,7 +8,7 @@
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
 	   export.o tree-log.o free-space-cache.o zlib.o lzo.o \
 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
-	   reada.o backref.o ulist.o qgroup.o send.o
+	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 0c16e3d..e15d2b0 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c

@@ -121,6 +121,8 @@
 			ret = posix_acl_equiv_mode(acl, &inode->i_mode);
 			if (ret < 0)
 				return ret;
+			if (ret == 0)
+				acl = NULL;
 		}
 		ret = 0;
 		break;

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 208d8aa..04edf69 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c

@@ -461,6 +461,7 @@
 		     pos2 = n2, n2 = pos2->next) {
 			struct __prelim_ref *ref2;
 			struct __prelim_ref *xchg;
+			struct extent_inode_elem *eie;
 
 			ref2 = list_entry(pos2, struct __prelim_ref, list);
 
@@ -472,12 +473,20 @@
 					ref1 = ref2;
 					ref2 = xchg;
 				}
-				ref1->count += ref2->count;
 			} else {
 				if (ref1->parent != ref2->parent)
 					continue;
-				ref1->count += ref2->count;
 			}
+
+			eie = ref1->inode_list;
+			while (eie && eie->next)
+				eie = eie->next;
+			if (eie)
+				eie->next = ref2->inode_list;
+			else
+				ref1->inode_list = ref2->inode_list;
+			ref1->count += ref2->count;
+
 			list_del(&ref2->list);
 			kfree(ref2);
 		}
@@ -890,8 +899,7 @@
 	while (!list_empty(&prefs)) {
 		ref = list_first_entry(&prefs, struct __prelim_ref, list);
 		list_del(&ref->list);
-		if (ref->count < 0)
-			WARN_ON(1);
+		WARN_ON(ref->count < 0);
 		if (ref->count && ref->root_id && ref->parent == 0) {
 			/* no parent == root of tree */
 			ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ed8ca7c..2a8c242 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h

@@ -39,6 +39,7 @@
 #define BTRFS_INODE_HAS_ORPHAN_ITEM		5
 #define BTRFS_INODE_HAS_ASYNC_EXTENT		6
 #define BTRFS_INODE_NEEDS_FULL_SYNC		7
+#define BTRFS_INODE_COPY_EVERYTHING		8
 
 /* in memory btrfs inode */
 struct btrfs_inode {
@@ -90,6 +91,9 @@
 
 	unsigned long runtime_flags;
 
+	/* Keep track of who's O_SYNC/fsycing currently */
+	atomic_t sync_writers;
+
 	/* full 64 bit generation number, struct vfs_inode doesn't have a big
 	 * enough field for this.
 	 */

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 5a3e45d..11d47bf 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c

@@ -137,7 +137,7 @@
 	unsigned int never_written:1;	/* block was added because it was
 					 * referenced, not because it was
 					 * written */
-	unsigned int mirror_num:2;	/* large enough to hold
+	unsigned int mirror_num;	/* large enough to hold
 					 * BTRFS_SUPER_MIRROR_MAX */
 	struct btrfsic_dev_state *dev_state;
 	u64 dev_bytenr;		/* key, physical byte num on disk */
@@ -723,7 +723,7 @@
 		}
 
 		num_copies =
-		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
+		    btrfs_num_copies(state->root->fs_info,
 				     next_bytenr, state->metablock_size);
 		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
 			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
@@ -903,7 +903,7 @@
 		}
 
 		num_copies =
-		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
+		    btrfs_num_copies(state->root->fs_info,
 				     next_bytenr, state->metablock_size);
 		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
 			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
@@ -1287,7 +1287,7 @@
 	*next_blockp = NULL;
 	if (0 == *num_copiesp) {
 		*num_copiesp =
-		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
+		    btrfs_num_copies(state->root->fs_info,
 				     next_bytenr, state->metablock_size);
 		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
 			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
@@ -1489,7 +1489,7 @@
 			chunk_len = num_bytes;
 
 		num_copies =
-		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
+		    btrfs_num_copies(state->root->fs_info,
 				     next_bytenr, state->datablock_size);
 		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
 			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
@@ -1582,9 +1582,21 @@
 	struct btrfs_device *device;
 
 	length = len;
-	ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
+	ret = btrfs_map_block(state->root->fs_info, READ,
 			      bytenr, &length, &multi, mirror_num);
 
+	if (ret) {
+		block_ctx_out->start = 0;
+		block_ctx_out->dev_bytenr = 0;
+		block_ctx_out->len = 0;
+		block_ctx_out->dev = NULL;
+		block_ctx_out->datav = NULL;
+		block_ctx_out->pagev = NULL;
+		block_ctx_out->mem_to_free = NULL;
+
+		return ret;
+	}
+
 	device = multi->stripes[0].dev;
 	block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
 	block_ctx_out->dev_bytenr = multi->stripes[0].physical;
@@ -1594,8 +1606,7 @@
 	block_ctx_out->pagev = NULL;
 	block_ctx_out->mem_to_free = NULL;
 
-	if (0 == ret)
-		kfree(multi);
+	kfree(multi);
 	if (NULL == block_ctx_out->dev) {
 		ret = -ENXIO;
 		printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
@@ -2463,7 +2474,7 @@
 		}
 
 		num_copies =
-		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
+		    btrfs_num_copies(state->root->fs_info,
 				     next_bytenr, BTRFS_SUPER_INFO_SIZE);
 		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
 			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
@@ -2960,7 +2971,7 @@
 	struct btrfsic_block_data_ctx block_ctx;
 	int match = 0;
 
-	num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
+	num_copies = btrfs_num_copies(state->root->fs_info,
 				      bytenr, state->metablock_size);
 
 	for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c6467aa..94ab2f8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c

@@ -687,7 +687,8 @@
 
 			ret = btrfs_map_bio(root, READ, comp_bio,
 					    mirror_num, 0);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				bio_endio(comp_bio, ret);
 
 			bio_put(comp_bio);
 
@@ -712,7 +713,8 @@
 	}
 
 	ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
-	BUG_ON(ret); /* -ENOMEM */
+	if (ret)
+		bio_endio(comp_bio, ret);
 
 	bio_put(comp_bio);
 	return 0;

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cdfb4c4..eea5da7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c

@@ -38,8 +38,7 @@
 			      struct extent_buffer *dst_buf,
 			      struct extent_buffer *src_buf);
 static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		    struct btrfs_path *path, int level, int slot,
-		    int tree_mod_log);
+		    struct btrfs_path *path, int level, int slot);
 static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
 				 struct extent_buffer *eb);
 struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
@@ -776,8 +775,7 @@
 
 static noinline void
 tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
-			  struct extent_buffer *eb,
-			  struct btrfs_disk_key *disk_key, int slot, int atomic)
+			  struct extent_buffer *eb, int slot, int atomic)
 {
 	int ret;
 
@@ -1361,19 +1359,16 @@
 	u64 search_start;
 	int ret;
 
-	if (trans->transaction != root->fs_info->running_transaction) {
-		printk(KERN_CRIT "trans %llu running %llu\n",
+	if (trans->transaction != root->fs_info->running_transaction)
+		WARN(1, KERN_CRIT "trans %llu running %llu\n",
 		       (unsigned long long)trans->transid,
 		       (unsigned long long)
 		       root->fs_info->running_transaction->transid);
-		WARN_ON(1);
-	}
-	if (trans->transid != root->fs_info->generation) {
-		printk(KERN_CRIT "trans %llu running %llu\n",
+
+	if (trans->transid != root->fs_info->generation)
+		WARN(1, KERN_CRIT "trans %llu running %llu\n",
 		       (unsigned long long)trans->transid,
 		       (unsigned long long)root->fs_info->generation);
-		WARN_ON(1);
-	}
 
 	if (!should_cow_block(trans, root, buf)) {
 		*cow_ret = buf;
@@ -1469,10 +1464,8 @@
 	if (cache_only && parent_level != 1)
 		return 0;
 
-	if (trans->transaction != root->fs_info->running_transaction)
-		WARN_ON(1);
-	if (trans->transid != root->fs_info->generation)
-		WARN_ON(1);
+	WARN_ON(trans->transaction != root->fs_info->running_transaction);
+	WARN_ON(trans->transid != root->fs_info->generation);
 
 	parent_nritems = btrfs_header_nritems(parent);
 	blocksize = btrfs_level_size(root, parent_level - 1);
@@ -1827,7 +1820,7 @@
 		if (btrfs_header_nritems(right) == 0) {
 			clean_tree_block(trans, root, right);
 			btrfs_tree_unlock(right);
-			del_ptr(trans, root, path, level + 1, pslot + 1, 1);
+			del_ptr(trans, root, path, level + 1, pslot + 1);
 			root_sub_used(root, right->len);
 			btrfs_free_tree_block(trans, root, right, 0, 1);
 			free_extent_buffer_stale(right);
@@ -1836,7 +1829,7 @@
 			struct btrfs_disk_key right_key;
 			btrfs_node_key(right, &right_key, 0);
 			tree_mod_log_set_node_key(root->fs_info, parent,
-						  &right_key, pslot + 1, 0);
+						  pslot + 1, 0);
 			btrfs_set_node_key(parent, &right_key, pslot + 1);
 			btrfs_mark_buffer_dirty(parent);
 		}
@@ -1871,7 +1864,7 @@
 	if (btrfs_header_nritems(mid) == 0) {
 		clean_tree_block(trans, root, mid);
 		btrfs_tree_unlock(mid);
-		del_ptr(trans, root, path, level + 1, pslot, 1);
+		del_ptr(trans, root, path, level + 1, pslot);
 		root_sub_used(root, mid->len);
 		btrfs_free_tree_block(trans, root, mid, 0, 1);
 		free_extent_buffer_stale(mid);
@@ -1880,7 +1873,7 @@
 		/* update the parent key to reflect our changes */
 		struct btrfs_disk_key mid_key;
 		btrfs_node_key(mid, &mid_key, 0);
-		tree_mod_log_set_node_key(root->fs_info, parent, &mid_key,
+		tree_mod_log_set_node_key(root->fs_info, parent,
 					  pslot, 0);
 		btrfs_set_node_key(parent, &mid_key, pslot);
 		btrfs_mark_buffer_dirty(parent);
@@ -1980,7 +1973,7 @@
 			orig_slot += left_nr;
 			btrfs_node_key(mid, &disk_key, 0);
 			tree_mod_log_set_node_key(root->fs_info, parent,
-						  &disk_key, pslot, 0);
+						  pslot, 0);
 			btrfs_set_node_key(parent, &disk_key, pslot);
 			btrfs_mark_buffer_dirty(parent);
 			if (btrfs_header_nritems(left) > orig_slot) {
@@ -2033,7 +2026,7 @@
 
 			btrfs_node_key(right, &disk_key, 0);
 			tree_mod_log_set_node_key(root->fs_info, parent,
-						  &disk_key, pslot + 1, 0);
+						  pslot + 1, 0);
 			btrfs_set_node_key(parent, &disk_key, pslot + 1);
 			btrfs_mark_buffer_dirty(parent);
 
@@ -2219,6 +2212,9 @@
 	int no_skips = 0;
 	struct extent_buffer *t;
 
+	if (path->really_keep_locks)
+		return;
+
 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
 		if (!path->nodes[i])
 			break;
@@ -2266,7 +2262,7 @@
 {
 	int i;
 
-	if (path->keep_locks)
+	if (path->keep_locks || path->really_keep_locks)
 		return;
 
 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -2499,7 +2495,7 @@
 	if (!cow)
 		write_lock_level = -1;
 
-	if (cow && (p->keep_locks || p->lowest_level))
+	if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level))
 		write_lock_level = BTRFS_MAX_LEVEL;
 
 	min_write_lock_level = write_lock_level;
@@ -2568,7 +2564,10 @@
 			 * must have write locks on this node and the
 			 * parent
 			 */
-			if (level + 1 > write_lock_level) {
+			if (level > write_lock_level ||
+			    (level + 1 > write_lock_level &&
+			    level + 1 < BTRFS_MAX_LEVEL &&
+			    p->nodes[level + 1])) {
 				write_lock_level = level + 1;
 				btrfs_release_path(p);
 				goto again;
@@ -2917,7 +2916,7 @@
 		if (!path->nodes[i])
 			break;
 		t = path->nodes[i];
-		tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1);
+		tree_mod_log_set_node_key(root->fs_info, t, tslot, 1);
 		btrfs_set_node_key(t, key, tslot);
 		btrfs_mark_buffer_dirty(path->nodes[i]);
 		if (tslot != 0)
@@ -3302,14 +3301,21 @@
  */
 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
 {
+	struct btrfs_item *start_item;
+	struct btrfs_item *end_item;
+	struct btrfs_map_token token;
 	int data_len;
 	int nritems = btrfs_header_nritems(l);
 	int end = min(nritems, start + nr) - 1;
 
 	if (!nr)
 		return 0;
-	data_len = btrfs_item_end_nr(l, start);
-	data_len = data_len - btrfs_item_offset_nr(l, end);
+	btrfs_init_map_token(&token);
+	start_item = btrfs_item_nr(l, start);
+	end_item = btrfs_item_nr(l, end);
+	data_len = btrfs_token_item_offset(l, start_item, &token) +
+		btrfs_token_item_size(l, start_item, &token);
+	data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
 	data_len += sizeof(struct btrfs_item) * nr;
 	WARN_ON(data_len < 0);
 	return data_len;
@@ -3403,8 +3409,7 @@
 	if (push_items == 0)
 		goto out_unlock;
 
-	if (!empty && push_items == left_nritems)
-		WARN_ON(1);
+	WARN_ON(!empty && push_items == left_nritems);
 
 	/* push left to right */
 	right_nritems = btrfs_header_nritems(right);
@@ -3642,11 +3647,9 @@
 	btrfs_set_header_nritems(left, old_left_nritems + push_items);
 
 	/* fixup right node */
-	if (push_items > right_nritems) {
-		printk(KERN_CRIT "push items %d nr %u\n", push_items,
+	if (push_items > right_nritems)
+		WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
 		       right_nritems);
-		WARN_ON(1);
-	}
 
 	if (push_items < right_nritems) {
 		push_space = btrfs_item_offset_nr(right, push_items - 1) -
@@ -4602,8 +4605,7 @@
  * empty a node.
  */
 static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		    struct btrfs_path *path, int level, int slot,
-		    int tree_mod_log)
+		    struct btrfs_path *path, int level, int slot)
 {
 	struct extent_buffer *parent = path->nodes[level];
 	u32 nritems;
@@ -4611,7 +4613,7 @@
 
 	nritems = btrfs_header_nritems(parent);
 	if (slot != nritems - 1) {
-		if (tree_mod_log && level)
+		if (level)
 			tree_mod_log_eb_move(root->fs_info, parent, slot,
 					     slot + 1, nritems - slot - 1);
 		memmove_extent_buffer(parent,
@@ -4619,7 +4621,7 @@
 			      btrfs_node_key_ptr_offset(slot + 1),
 			      sizeof(struct btrfs_key_ptr) *
 			      (nritems - slot - 1));
-	} else if (tree_mod_log && level) {
+	} else if (level) {
 		ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
 					      MOD_LOG_KEY_REMOVE);
 		BUG_ON(ret < 0);
@@ -4656,7 +4658,7 @@
 				    struct extent_buffer *leaf)
 {
 	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
-	del_ptr(trans, root, path, 1, path->slots[1], 1);
+	del_ptr(trans, root, path, 1, path->slots[1]);
 
 	/*
 	 * btrfs_free_extent is expensive, we want to make sure we
@@ -5123,13 +5125,13 @@
 	right_path->search_commit_root = 1;
 	right_path->skip_locking = 1;
 
-	spin_lock(&left_root->root_times_lock);
+	spin_lock(&left_root->root_item_lock);
 	left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
-	spin_unlock(&left_root->root_times_lock);
+	spin_unlock(&left_root->root_item_lock);
 
-	spin_lock(&right_root->root_times_lock);
+	spin_lock(&right_root->root_item_lock);
 	right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
-	spin_unlock(&right_root->root_times_lock);
+	spin_unlock(&right_root->root_item_lock);
 
 	trans = btrfs_join_transaction(left_root);
 	if (IS_ERR(trans)) {
@@ -5224,15 +5226,15 @@
 				goto out;
 			}
 
-			spin_lock(&left_root->root_times_lock);
+			spin_lock(&left_root->root_item_lock);
 			ctransid = btrfs_root_ctransid(&left_root->root_item);
-			spin_unlock(&left_root->root_times_lock);
+			spin_unlock(&left_root->root_item_lock);
 			if (ctransid != left_start_ctransid)
 				left_start_ctransid = 0;
 
-			spin_lock(&right_root->root_times_lock);
+			spin_lock(&right_root->root_item_lock);
 			ctransid = btrfs_root_ctransid(&right_root->root_item);
-			spin_unlock(&right_root->root_times_lock);
+			spin_unlock(&right_root->root_item_lock);
 			if (ctransid != right_start_ctransid)
 				right_start_ctransid = 0;
 
@@ -5496,6 +5498,139 @@
 	return btrfs_next_old_leaf(root, path, 0);
 }
 
+/* Release the path up to but not including the given level */
+static void btrfs_release_level(struct btrfs_path *path, int level)
+{
+	int i;
+
+	for (i = 0; i < level; i++) {
+		path->slots[i] = 0;
+		if (!path->nodes[i])
+			continue;
+		if (path->locks[i]) {
+			btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
+			path->locks[i] = 0;
+		}
+		free_extent_buffer(path->nodes[i]);
+		path->nodes[i] = NULL;
+	}
+}
+
+/*
+ * This function assumes 2 things
+ *
+ * 1) You are using path->keep_locks
+ * 2) You are not inserting items.
+ *
+ * If either of these are not true do not use this function. If you need a next
+ * leaf with either of these not being true then this function can be easily
+ * adapted to do that, but at the moment these are the limitations.
+ */
+int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root, struct btrfs_path *path,
+			  int del)
+{
+	struct extent_buffer *b;
+	struct btrfs_key key;
+	u32 nritems;
+	int level = 1;
+	int slot;
+	int ret = 1;
+	int write_lock_level = BTRFS_MAX_LEVEL;
+	int ins_len = del ? -1 : 0;
+
+	WARN_ON(!(path->keep_locks || path->really_keep_locks));
+
+	nritems = btrfs_header_nritems(path->nodes[0]);
+	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
+
+	while (path->nodes[level]) {
+		nritems = btrfs_header_nritems(path->nodes[level]);
+		if (!(path->locks[level] & BTRFS_WRITE_LOCK)) {
+search:
+			btrfs_release_path(path);
+			ret = btrfs_search_slot(trans, root, &key, path,
+						ins_len, 1);
+			if (ret < 0)
+				goto out;
+			level = 1;
+			continue;
+		}
+
+		if (path->slots[level] >= nritems - 1) {
+			level++;
+			continue;
+		}
+
+		btrfs_release_level(path, level);
+		break;
+	}
+
+	if (!path->nodes[level]) {
+		ret = 1;
+		goto out;
+	}
+
+	path->slots[level]++;
+	b = path->nodes[level];
+
+	while (b) {
+		level = btrfs_header_level(b);
+
+		if (!should_cow_block(trans, root, b))
+			goto cow_done;
+
+		btrfs_set_path_blocking(path);
+		ret = btrfs_cow_block(trans, root, b,
+				      path->nodes[level + 1],
+				      path->slots[level + 1], &b);
+		if (ret)
+			goto out;
+cow_done:
+		path->nodes[level] = b;
+		btrfs_clear_path_blocking(path, NULL, 0);
+		if (level != 0) {
+			ret = setup_nodes_for_search(trans, root, path, b,
+						     level, ins_len,
+						     &write_lock_level);
+			if (ret == -EAGAIN)
+				goto search;
+			if (ret)
+				goto out;
+
+			b = path->nodes[level];
+			slot = path->slots[level];
+
+			ret = read_block_for_search(trans, root, path,
+						    &b, level, slot, &key, 0);
+			if (ret == -EAGAIN)
+				goto search;
+			if (ret)
+				goto out;
+			level = btrfs_header_level(b);
+			if (!btrfs_try_tree_write_lock(b)) {
+				btrfs_set_path_blocking(path);
+				btrfs_tree_lock(b);
+				btrfs_clear_path_blocking(path, b,
+							  BTRFS_WRITE_LOCK);
+			}
+			path->locks[level] = BTRFS_WRITE_LOCK;
+			path->nodes[level] = b;
+			path->slots[level] = 0;
+		} else {
+			path->slots[level] = 0;
+			ret = 0;
+			break;
+		}
+	}
+
+out:
+	if (ret)
+		btrfs_release_path(path);
+
+	return ret;
+}
+
 int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
 			u64 time_seq)
 {

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 596617e..547b7b0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h

@@ -48,7 +48,7 @@
 
 #define BTRFS_MAGIC "_BHRfS_M"
 
-#define BTRFS_MAX_MIRRORS 2
+#define BTRFS_MAX_MIRRORS 3
 
 #define BTRFS_MAX_LEVEL 8
 
@@ -142,6 +142,8 @@
 
 #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
 
+#define BTRFS_DEV_REPLACE_DEVID 0
+
 /*
  * the max metadata block size.  This limit is somewhat artificial,
  * but the memmove costs go through the roof for larger blocks.
@@ -172,6 +174,9 @@
 /* four bytes for CRC32 */
 #define BTRFS_EMPTY_DIR_SIZE 0
 
+/* spefic to btrfs_map_block(), therefore not in include/linux/blk_types.h */
+#define REQ_GET_READ_MIRRORS	(1 << 30)
+
 #define BTRFS_FT_UNKNOWN	0
 #define BTRFS_FT_REG_FILE	1
 #define BTRFS_FT_DIR		2
@@ -571,6 +576,7 @@
 	unsigned int skip_locking:1;
 	unsigned int leave_spinning:1;
 	unsigned int search_commit_root:1;
+	unsigned int really_keep_locks:1;
 };
 
 /*
@@ -885,6 +891,59 @@
 	__le64 values[BTRFS_DEV_STAT_VALUES_MAX];
 } __attribute__ ((__packed__));
 
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS	0
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID	1
+#define BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED	0
+#define BTRFS_DEV_REPLACE_ITEM_STATE_STARTED		1
+#define BTRFS_DEV_REPLACE_ITEM_STATE_SUSPENDED		2
+#define BTRFS_DEV_REPLACE_ITEM_STATE_FINISHED		3
+#define BTRFS_DEV_REPLACE_ITEM_STATE_CANCELED		4
+
+struct btrfs_dev_replace {
+	u64 replace_state;	/* see #define above */
+	u64 time_started;	/* seconds since 1-Jan-1970 */
+	u64 time_stopped;	/* seconds since 1-Jan-1970 */
+	atomic64_t num_write_errors;
+	atomic64_t num_uncorrectable_read_errors;
+
+	u64 cursor_left;
+	u64 committed_cursor_left;
+	u64 cursor_left_last_write_of_item;
+	u64 cursor_right;
+
+	u64 cont_reading_from_srcdev_mode;	/* see #define above */
+
+	int is_valid;
+	int item_needs_writeback;
+	struct btrfs_device *srcdev;
+	struct btrfs_device *tgtdev;
+
+	pid_t lock_owner;
+	atomic_t nesting_level;
+	struct mutex lock_finishing_cancel_unmount;
+	struct mutex lock_management_lock;
+	struct mutex lock;
+
+	struct btrfs_scrub_progress scrub_progress;
+};
+
+struct btrfs_dev_replace_item {
+	/*
+	 * grow this item struct at the end for future enhancements and keep
+	 * the existing values unchanged
+	 */
+	__le64 src_devid;
+	__le64 cursor_left;
+	__le64 cursor_right;
+	__le64 cont_reading_from_srcdev_mode;
+
+	__le64 replace_state;
+	__le64 time_started;
+	__le64 time_stopped;
+	__le64 num_write_errors;
+	__le64 num_uncorrectable_read_errors;
+} __attribute__ ((__packed__));
+
 /* different types of block groups (and chunks) */
 #define BTRFS_BLOCK_GROUP_DATA		(1ULL << 0)
 #define BTRFS_BLOCK_GROUP_SYSTEM	(1ULL << 1)
@@ -1333,6 +1392,7 @@
 	struct btrfs_workers generic_worker;
 	struct btrfs_workers workers;
 	struct btrfs_workers delalloc_workers;
+	struct btrfs_workers flush_workers;
 	struct btrfs_workers endio_workers;
 	struct btrfs_workers endio_meta_workers;
 	struct btrfs_workers endio_meta_write_workers;
@@ -1429,6 +1489,8 @@
 	struct rw_semaphore scrub_super_lock;
 	int scrub_workers_refcnt;
 	struct btrfs_workers scrub_workers;
+	struct btrfs_workers scrub_wr_completion_workers;
+	struct btrfs_workers scrub_nocow_workers;
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 	u32 check_integrity_print_mask;
@@ -1470,6 +1532,11 @@
 	int backup_root_index;
 
 	int num_tolerated_disk_barrier_failures;
+
+	/* device replace state */
+	struct btrfs_dev_replace dev_replace;
+
+	atomic_t mutually_exclusive_operation_running;
 };
 
 /*
@@ -1579,7 +1646,7 @@
 
 	int force_cow;
 
-	spinlock_t root_times_lock;
+	spinlock_t root_item_lock;
 };
 
 struct btrfs_ioctl_defrag_range_args {
@@ -1723,6 +1790,12 @@
 #define BTRFS_DEV_STATS_KEY	249
 
 /*
+ * Persistantly stores the device replace state in the device tree.
+ * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
+ */
+#define BTRFS_DEV_REPLACE_KEY	250
+
+/*
  * string items are for debugging.  They just store a short string of
  * data in the FS
  */
@@ -1787,7 +1860,7 @@
 
 static inline void btrfs_init_map_token (struct btrfs_map_token *token)
 {
-	memset(token, 0, sizeof(*token));
+	token->kaddr = NULL;
 }
 
 /* some macros to generate set/get funcs for the struct fields.  This
@@ -2755,6 +2828,49 @@
 BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item,
 		   rsv_excl, 64);
 
+/* btrfs_dev_replace_item */
+BTRFS_SETGET_FUNCS(dev_replace_src_devid,
+		   struct btrfs_dev_replace_item, src_devid, 64);
+BTRFS_SETGET_FUNCS(dev_replace_cont_reading_from_srcdev_mode,
+		   struct btrfs_dev_replace_item, cont_reading_from_srcdev_mode,
+		   64);
+BTRFS_SETGET_FUNCS(dev_replace_replace_state, struct btrfs_dev_replace_item,
+		   replace_state, 64);
+BTRFS_SETGET_FUNCS(dev_replace_time_started, struct btrfs_dev_replace_item,
+		   time_started, 64);
+BTRFS_SETGET_FUNCS(dev_replace_time_stopped, struct btrfs_dev_replace_item,
+		   time_stopped, 64);
+BTRFS_SETGET_FUNCS(dev_replace_num_write_errors, struct btrfs_dev_replace_item,
+		   num_write_errors, 64);
+BTRFS_SETGET_FUNCS(dev_replace_num_uncorrectable_read_errors,
+		   struct btrfs_dev_replace_item, num_uncorrectable_read_errors,
+		   64);
+BTRFS_SETGET_FUNCS(dev_replace_cursor_left, struct btrfs_dev_replace_item,
+		   cursor_left, 64);
+BTRFS_SETGET_FUNCS(dev_replace_cursor_right, struct btrfs_dev_replace_item,
+		   cursor_right, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_src_devid,
+			 struct btrfs_dev_replace_item, src_devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cont_reading_from_srcdev_mode,
+			 struct btrfs_dev_replace_item,
+			 cont_reading_from_srcdev_mode, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_replace_state,
+			 struct btrfs_dev_replace_item, replace_state, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_started,
+			 struct btrfs_dev_replace_item, time_started, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_stopped,
+			 struct btrfs_dev_replace_item, time_stopped, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_write_errors,
+			 struct btrfs_dev_replace_item, num_write_errors, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_uncorrectable_read_errors,
+			 struct btrfs_dev_replace_item,
+			 num_uncorrectable_read_errors, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_left,
+			 struct btrfs_dev_replace_item, cursor_left, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
+			 struct btrfs_dev_replace_item, cursor_right, 64);
+
 static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
 {
 	return sb->s_fs_info;
@@ -2900,6 +3016,18 @@
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+
+enum btrfs_reserve_flush_enum {
+	/* If we are in the transaction, we can't flush anything.*/
+	BTRFS_RESERVE_NO_FLUSH,
+	/*
+	 * Flushing delalloc may cause deadlock somewhere, in this
+	 * case, use FLUSH LIMIT
+	 */
+	BTRFS_RESERVE_FLUSH_LIMIT,
+	BTRFS_RESERVE_FLUSH_ALL,
+};
+
 int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
@@ -2919,19 +3047,13 @@
 void btrfs_free_block_rsv(struct btrfs_root *root,
 			  struct btrfs_block_rsv *rsv);
 int btrfs_block_rsv_add(struct btrfs_root *root,
-			struct btrfs_block_rsv *block_rsv,
-			u64 num_bytes);
-int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
-				struct btrfs_block_rsv *block_rsv,
-				u64 num_bytes);
+			struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+			enum btrfs_reserve_flush_enum flush);
 int btrfs_block_rsv_check(struct btrfs_root *root,
 			  struct btrfs_block_rsv *block_rsv, int min_factor);
 int btrfs_block_rsv_refill(struct btrfs_root *root,
-			  struct btrfs_block_rsv *block_rsv,
-			  u64 min_reserved);
-int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
-				   struct btrfs_block_rsv *block_rsv,
-				   u64 min_reserved);
+			   struct btrfs_block_rsv *block_rsv, u64 min_reserved,
+			   enum btrfs_reserve_flush_enum flush);
 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
 			    struct btrfs_block_rsv *dst_rsv,
 			    u64 num_bytes);
@@ -2955,6 +3077,7 @@
 int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
 int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 					 struct btrfs_fs_info *fs_info);
+int __get_raid_index(u64 flags);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 		     int level, int *slot);
@@ -3065,6 +3188,9 @@
 }
 
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root, struct btrfs_path *path,
+			  int del);
 int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
 			u64 time_seq);
 static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3157,6 +3283,8 @@
 			     struct btrfs_root *root);
 
 /* dir-item.c */
+int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+			  const char *name, int name_len);
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, const char *name,
 			  int name_len, struct inode *dir,
@@ -3256,6 +3384,7 @@
 			     struct btrfs_root *root,
 			     struct btrfs_path *path, u64 objectid,
 			     u64 bytenr, int mod);
+u64 btrfs_file_extent_length(struct btrfs_path *path);
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
@@ -3271,6 +3400,19 @@
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
 /* inode.c */
+struct btrfs_delalloc_work {
+	struct inode *inode;
+	int wait;
+	int delay_iput;
+	struct completion completion;
+	struct list_head list;
+	struct btrfs_work work;
+};
+
+struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
+						    int wait, int delay_iput);
+void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
+
 struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
 					   size_t pg_offset, u64 start, u64 len,
 					   int create);
@@ -3370,9 +3512,12 @@
 				struct btrfs_ioctl_space_info *space);
 
 /* file.c */
+int btrfs_auto_defrag_init(void);
+void btrfs_auto_defrag_exit(void);
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 			   struct inode *inode);
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
+void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			     int skip_pinned);
@@ -3519,15 +3664,16 @@
 			      struct btrfs_pending_snapshot *pending);
 
 /* scrub.c */
-int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
-		    struct btrfs_scrub_progress *progress, int readonly);
+int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+		    u64 end, struct btrfs_scrub_progress *progress,
+		    int readonly, int is_dev_replace);
 void btrfs_scrub_pause(struct btrfs_root *root);
 void btrfs_scrub_pause_super(struct btrfs_root *root);
 void btrfs_scrub_continue(struct btrfs_root *root);
 void btrfs_scrub_continue_super(struct btrfs_root *root);
-int __btrfs_scrub_cancel(struct btrfs_fs_info *info);
-int btrfs_scrub_cancel(struct btrfs_root *root);
-int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
+int btrfs_scrub_cancel(struct btrfs_fs_info *info);
+int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
+			   struct btrfs_device *dev);
 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
 			 struct btrfs_scrub_progress *progress);

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 478f66b..3483603 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c

@@ -651,7 +651,8 @@
 	 */
 	if (!src_rsv || (!trans->bytes_reserved &&
 			 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
-		ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
+		ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
+					  BTRFS_RESERVE_NO_FLUSH);
 		/*
 		 * Since we're under a transaction reserve_metadata_bytes could
 		 * try to commit the transaction which will make it return
@@ -686,7 +687,8 @@
 		 * reserve something strictly for us.  If not be a pain and try
 		 * to steal from the delalloc block rsv.
 		 */
-		ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
+		ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
+					  BTRFS_RESERVE_NO_FLUSH);
 		if (!ret)
 			goto out;
 
@@ -1255,7 +1257,6 @@
 	struct btrfs_delayed_node *delayed_node = NULL;
 	struct btrfs_root *root;
 	struct btrfs_block_rsv *block_rsv;
-	unsigned long nr = 0;
 	int need_requeue = 0;
 	int ret;
 
@@ -1316,11 +1317,9 @@
 					   delayed_node);
 	mutex_unlock(&delayed_node->mutex);
 
-	nr = trans->blocks_used;
-
 	trans->block_rsv = block_rsv;
 	btrfs_end_transaction_dmeta(trans, root);
-	__btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty_nodelay(root);
 free_path:
 	btrfs_free_path(path);
 out:

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
new file mode 100644
index 0000000..66dbc8d
--- /dev/null
+++ b/fs/btrfs/dev-replace.c

@@ -0,0 +1,856 @@
+/*
+ * Copyright (C) STRATO AG 2012.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/sched.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
+#include <linux/random.h>
+#include <linux/iocontext.h>
+#include <linux/capability.h>
+#include <linux/kthread.h>
+#include <linux/math64.h>
+#include <asm/div64.h>
+#include "compat.h"
+#include "ctree.h"
+#include "extent_map.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "volumes.h"
+#include "async-thread.h"
+#include "check-integrity.h"
+#include "rcu-string.h"
+#include "dev-replace.h"
+
+static u64 btrfs_get_seconds_since_1970(void);
+static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
+				       int scrub_ret);
+static void btrfs_dev_replace_update_device_in_mapping_tree(
+						struct btrfs_fs_info *fs_info,
+						struct btrfs_device *srcdev,
+						struct btrfs_device *tgtdev);
+static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
+					 char *srcdev_name,
+					 struct btrfs_device **device);
+static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
+static int btrfs_dev_replace_kthread(void *data);
+static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
+
+
+int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_key key;
+	struct btrfs_root *dev_root = fs_info->dev_root;
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+	struct extent_buffer *eb;
+	int slot;
+	int ret = 0;
+	struct btrfs_path *path = NULL;
+	int item_size;
+	struct btrfs_dev_replace_item *ptr;
+	u64 src_devid;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	key.objectid = 0;
+	key.type = BTRFS_DEV_REPLACE_KEY;
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
+	if (ret) {
+no_valid_dev_replace_entry_found:
+		ret = 0;
+		dev_replace->replace_state =
+			BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED;
+		dev_replace->cont_reading_from_srcdev_mode =
+		    BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS;
+		dev_replace->replace_state = 0;
+		dev_replace->time_started = 0;
+		dev_replace->time_stopped = 0;
+		atomic64_set(&dev_replace->num_write_errors, 0);
+		atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
+		dev_replace->cursor_left = 0;
+		dev_replace->committed_cursor_left = 0;
+		dev_replace->cursor_left_last_write_of_item = 0;
+		dev_replace->cursor_right = 0;
+		dev_replace->srcdev = NULL;
+		dev_replace->tgtdev = NULL;
+		dev_replace->is_valid = 0;
+		dev_replace->item_needs_writeback = 0;
+		goto out;
+	}
+	slot = path->slots[0];
+	eb = path->nodes[0];
+	item_size = btrfs_item_size_nr(eb, slot);
+	ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item);
+
+	if (item_size != sizeof(struct btrfs_dev_replace_item)) {
+		pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n");
+		goto no_valid_dev_replace_entry_found;
+	}
+
+	src_devid = btrfs_dev_replace_src_devid(eb, ptr);
+	dev_replace->cont_reading_from_srcdev_mode =
+		btrfs_dev_replace_cont_reading_from_srcdev_mode(eb, ptr);
+	dev_replace->replace_state = btrfs_dev_replace_replace_state(eb, ptr);
+	dev_replace->time_started = btrfs_dev_replace_time_started(eb, ptr);
+	dev_replace->time_stopped =
+		btrfs_dev_replace_time_stopped(eb, ptr);
+	atomic64_set(&dev_replace->num_write_errors,
+		     btrfs_dev_replace_num_write_errors(eb, ptr));
+	atomic64_set(&dev_replace->num_uncorrectable_read_errors,
+		     btrfs_dev_replace_num_uncorrectable_read_errors(eb, ptr));
+	dev_replace->cursor_left = btrfs_dev_replace_cursor_left(eb, ptr);
+	dev_replace->committed_cursor_left = dev_replace->cursor_left;
+	dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left;
+	dev_replace->cursor_right = btrfs_dev_replace_cursor_right(eb, ptr);
+	dev_replace->is_valid = 1;
+
+	dev_replace->item_needs_writeback = 0;
+	switch (dev_replace->replace_state) {
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+		dev_replace->srcdev = NULL;
+		dev_replace->tgtdev = NULL;
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+		dev_replace->srcdev = btrfs_find_device(fs_info, src_devid,
+							NULL, NULL);
+		dev_replace->tgtdev = btrfs_find_device(fs_info,
+							BTRFS_DEV_REPLACE_DEVID,
+							NULL, NULL);
+		/*
+		 * allow 'btrfs dev replace_cancel' if src/tgt device is
+		 * missing
+		 */
+		if (!dev_replace->srcdev &&
+		    !btrfs_test_opt(dev_root, DEGRADED)) {
+			ret = -EIO;
+			pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
+				(unsigned long long)src_devid);
+		}
+		if (!dev_replace->tgtdev &&
+		    !btrfs_test_opt(dev_root, DEGRADED)) {
+			ret = -EIO;
+			pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
+				(unsigned long long)BTRFS_DEV_REPLACE_DEVID);
+		}
+		if (dev_replace->tgtdev) {
+			if (dev_replace->srcdev) {
+				dev_replace->tgtdev->total_bytes =
+					dev_replace->srcdev->total_bytes;
+				dev_replace->tgtdev->disk_total_bytes =
+					dev_replace->srcdev->disk_total_bytes;
+				dev_replace->tgtdev->bytes_used =
+					dev_replace->srcdev->bytes_used;
+			}
+			dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1;
+			btrfs_init_dev_replace_tgtdev_for_resume(fs_info,
+				dev_replace->tgtdev);
+		}
+		break;
+	}
+
+out:
+	if (path)
+		btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * called from commit_transaction. Writes changed device replace state to
+ * disk.
+ */
+int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
+			  struct btrfs_fs_info *fs_info)
+{
+	int ret;
+	struct btrfs_root *dev_root = fs_info->dev_root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct extent_buffer *eb;
+	struct btrfs_dev_replace_item *ptr;
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+
+	btrfs_dev_replace_lock(dev_replace);
+	if (!dev_replace->is_valid ||
+	    !dev_replace->item_needs_writeback) {
+		btrfs_dev_replace_unlock(dev_replace);
+		return 0;
+	}
+	btrfs_dev_replace_unlock(dev_replace);
+
+	key.objectid = 0;
+	key.type = BTRFS_DEV_REPLACE_KEY;
+	key.offset = 0;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
+	if (ret < 0) {
+		pr_warn("btrfs: error %d while searching for dev_replace item!\n",
+			ret);
+		goto out;
+	}
+
+	if (ret == 0 &&
+	    btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
+		/*
+		 * need to delete old one and insert a new one.
+		 * Since no attempt is made to recover any old state, if the
+		 * dev_replace state is 'running', the data on the target
+		 * drive is lost.
+		 * It would be possible to recover the state: just make sure
+		 * that the beginning of the item is never changed and always
+		 * contains all the essential information. Then read this
+		 * minimal set of information and use it as a base for the
+		 * new state.
+		 */
+		ret = btrfs_del_item(trans, dev_root, path);
+		if (ret != 0) {
+			pr_warn("btrfs: delete too small dev_replace item failed %d!\n",
+				ret);
+			goto out;
+		}
+		ret = 1;
+	}
+
+	if (ret == 1) {
+		/* need to insert a new item */
+		btrfs_release_path(path);
+		ret = btrfs_insert_empty_item(trans, dev_root, path,
+					      &key, sizeof(*ptr));
+		if (ret < 0) {
+			pr_warn("btrfs: insert dev_replace item failed %d!\n",
+				ret);
+			goto out;
+		}
+	}
+
+	eb = path->nodes[0];
+	ptr = btrfs_item_ptr(eb, path->slots[0],
+			     struct btrfs_dev_replace_item);
+
+	btrfs_dev_replace_lock(dev_replace);
+	if (dev_replace->srcdev)
+		btrfs_set_dev_replace_src_devid(eb, ptr,
+			dev_replace->srcdev->devid);
+	else
+		btrfs_set_dev_replace_src_devid(eb, ptr, (u64)-1);
+	btrfs_set_dev_replace_cont_reading_from_srcdev_mode(eb, ptr,
+		dev_replace->cont_reading_from_srcdev_mode);
+	btrfs_set_dev_replace_replace_state(eb, ptr,
+		dev_replace->replace_state);
+	btrfs_set_dev_replace_time_started(eb, ptr, dev_replace->time_started);
+	btrfs_set_dev_replace_time_stopped(eb, ptr, dev_replace->time_stopped);
+	btrfs_set_dev_replace_num_write_errors(eb, ptr,
+		atomic64_read(&dev_replace->num_write_errors));
+	btrfs_set_dev_replace_num_uncorrectable_read_errors(eb, ptr,
+		atomic64_read(&dev_replace->num_uncorrectable_read_errors));
+	dev_replace->cursor_left_last_write_of_item =
+		dev_replace->cursor_left;
+	btrfs_set_dev_replace_cursor_left(eb, ptr,
+		dev_replace->cursor_left_last_write_of_item);
+	btrfs_set_dev_replace_cursor_right(eb, ptr,
+		dev_replace->cursor_right);
+	dev_replace->item_needs_writeback = 0;
+	btrfs_dev_replace_unlock(dev_replace);
+
+	btrfs_mark_buffer_dirty(eb);
+
+out:
+	btrfs_free_path(path);
+
+	return ret;
+}
+
+void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+
+	dev_replace->committed_cursor_left =
+		dev_replace->cursor_left_last_write_of_item;
+}
+
+static u64 btrfs_get_seconds_since_1970(void)
+{
+	struct timespec t = CURRENT_TIME_SEC;
+
+	return t.tv_sec;
+}
+
+int btrfs_dev_replace_start(struct btrfs_root *root,
+			    struct btrfs_ioctl_dev_replace_args *args)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+	int ret;
+	struct btrfs_device *tgt_device = NULL;
+	struct btrfs_device *src_device = NULL;
+
+	switch (args->start.cont_reading_from_srcdev_mode) {
+	case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
+	case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
+	    args->start.tgtdev_name[0] == '\0')
+		return -EINVAL;
+
+	mutex_lock(&fs_info->volume_mutex);
+	ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name,
+					    &tgt_device);
+	if (ret) {
+		pr_err("btrfs: target device %s is invalid!\n",
+		       args->start.tgtdev_name);
+		mutex_unlock(&fs_info->volume_mutex);
+		return -EINVAL;
+	}
+
+	ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid,
+					    args->start.srcdev_name,
+					    &src_device);
+	mutex_unlock(&fs_info->volume_mutex);
+	if (ret) {
+		ret = -EINVAL;
+		goto leave_no_lock;
+	}
+
+	if (tgt_device->total_bytes < src_device->total_bytes) {
+		pr_err("btrfs: target device is smaller than source device!\n");
+		ret = -EINVAL;
+		goto leave_no_lock;
+	}
+
+	btrfs_dev_replace_lock(dev_replace);
+	switch (dev_replace->replace_state) {
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+		args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
+		goto leave;
+	}
+
+	dev_replace->cont_reading_from_srcdev_mode =
+		args->start.cont_reading_from_srcdev_mode;
+	WARN_ON(!src_device);
+	dev_replace->srcdev = src_device;
+	WARN_ON(!tgt_device);
+	dev_replace->tgtdev = tgt_device;
+
+	printk_in_rcu(KERN_INFO
+		      "btrfs: dev_replace from %s (devid %llu) to %s) started\n",
+		      src_device->missing ? "<missing disk>" :
+		        rcu_str_deref(src_device->name),
+		      src_device->devid,
+		      rcu_str_deref(tgt_device->name));
+
+	tgt_device->total_bytes = src_device->total_bytes;
+	tgt_device->disk_total_bytes = src_device->disk_total_bytes;
+	tgt_device->bytes_used = src_device->bytes_used;
+
+	/*
+	 * from now on, the writes to the srcdev are all duplicated to
+	 * go to the tgtdev as well (refer to btrfs_map_block()).
+	 */
+	dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
+	dev_replace->time_started = btrfs_get_seconds_since_1970();
+	dev_replace->cursor_left = 0;
+	dev_replace->committed_cursor_left = 0;
+	dev_replace->cursor_left_last_write_of_item = 0;
+	dev_replace->cursor_right = 0;
+	dev_replace->is_valid = 1;
+	dev_replace->item_needs_writeback = 1;
+	args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
+	btrfs_dev_replace_unlock(dev_replace);
+
+	btrfs_wait_ordered_extents(root, 0);
+
+	/* force writing the updated state information to disk */
+	trans = btrfs_start_transaction(root, 0);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		btrfs_dev_replace_lock(dev_replace);
+		goto leave;
+	}
+
+	ret = btrfs_commit_transaction(trans, root);
+	WARN_ON(ret);
+
+	/* the disk copy procedure reuses the scrub code */
+	ret = btrfs_scrub_dev(fs_info, src_device->devid, 0,
+			      src_device->total_bytes,
+			      &dev_replace->scrub_progress, 0, 1);
+
+	ret = btrfs_dev_replace_finishing(root->fs_info, ret);
+	WARN_ON(ret);
+
+	return 0;
+
+leave:
+	dev_replace->srcdev = NULL;
+	dev_replace->tgtdev = NULL;
+	btrfs_dev_replace_unlock(dev_replace);
+leave_no_lock:
+	if (tgt_device)
+		btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+	return ret;
+}
+
+static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
+				       int scrub_ret)
+{
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+	struct btrfs_device *tgt_device;
+	struct btrfs_device *src_device;
+	struct btrfs_root *root = fs_info->tree_root;
+	u8 uuid_tmp[BTRFS_UUID_SIZE];
+	struct btrfs_trans_handle *trans;
+	int ret = 0;
+
+	/* don't allow cancel or unmount to disturb the finishing procedure */
+	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
+
+	btrfs_dev_replace_lock(dev_replace);
+	/* was the operation canceled, or is it finished? */
+	if (dev_replace->replace_state !=
+	    BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
+		btrfs_dev_replace_unlock(dev_replace);
+		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+		return 0;
+	}
+
+	tgt_device = dev_replace->tgtdev;
+	src_device = dev_replace->srcdev;
+	btrfs_dev_replace_unlock(dev_replace);
+
+	/* replace old device with new one in mapping tree */
+	if (!scrub_ret)
+		btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
+								src_device,
+								tgt_device);
+
+	/*
+	 * flush all outstanding I/O and inode extent mappings before the
+	 * copy operation is declared as being finished
+	 */
+	btrfs_start_delalloc_inodes(root, 0);
+	btrfs_wait_ordered_extents(root, 0);
+
+	trans = btrfs_start_transaction(root, 0);
+	if (IS_ERR(trans)) {
+		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+		return PTR_ERR(trans);
+	}
+	ret = btrfs_commit_transaction(trans, root);
+	WARN_ON(ret);
+
+	/* keep away write_all_supers() during the finishing procedure */
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+	btrfs_dev_replace_lock(dev_replace);
+	dev_replace->replace_state =
+		scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
+			  : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
+	dev_replace->tgtdev = NULL;
+	dev_replace->srcdev = NULL;
+	dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+	dev_replace->item_needs_writeback = 1;
+
+	if (scrub_ret) {
+		printk_in_rcu(KERN_ERR
+			      "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
+			      src_device->missing ? "<missing disk>" :
+			        rcu_str_deref(src_device->name),
+			      src_device->devid,
+			      rcu_str_deref(tgt_device->name), scrub_ret);
+		btrfs_dev_replace_unlock(dev_replace);
+		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+		if (tgt_device)
+			btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+
+		return 0;
+	}
+
+	printk_in_rcu(KERN_INFO
+		      "btrfs: dev_replace from %s (devid %llu) to %s) finished\n",
+		      src_device->missing ? "<missing disk>" :
+		        rcu_str_deref(src_device->name),
+		      src_device->devid,
+		      rcu_str_deref(tgt_device->name));
+	tgt_device->is_tgtdev_for_dev_replace = 0;
+	tgt_device->devid = src_device->devid;
+	src_device->devid = BTRFS_DEV_REPLACE_DEVID;
+	tgt_device->bytes_used = src_device->bytes_used;
+	memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp));
+	memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid));
+	memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid));
+	tgt_device->total_bytes = src_device->total_bytes;
+	tgt_device->disk_total_bytes = src_device->disk_total_bytes;
+	tgt_device->bytes_used = src_device->bytes_used;
+	if (fs_info->sb->s_bdev == src_device->bdev)
+		fs_info->sb->s_bdev = tgt_device->bdev;
+	if (fs_info->fs_devices->latest_bdev == src_device->bdev)
+		fs_info->fs_devices->latest_bdev = tgt_device->bdev;
+	list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
+
+	btrfs_rm_dev_replace_srcdev(fs_info, src_device);
+	if (src_device->bdev) {
+		/* zero out the old super */
+		btrfs_scratch_superblock(src_device);
+	}
+	/*
+	 * this is again a consistent state where no dev_replace procedure
+	 * is running, the target device is part of the filesystem, the
+	 * source device is not part of the filesystem anymore and its 1st
+	 * superblock is scratched out so that it is no longer marked to
+	 * belong to this filesystem.
+	 */
+	btrfs_dev_replace_unlock(dev_replace);
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
+	/* write back the superblocks */
+	trans = btrfs_start_transaction(root, 0);
+	if (!IS_ERR(trans))
+		btrfs_commit_transaction(trans, root);
+
+	mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+
+	return 0;
+}
+
+static void btrfs_dev_replace_update_device_in_mapping_tree(
+						struct btrfs_fs_info *fs_info,
+						struct btrfs_device *srcdev,
+						struct btrfs_device *tgtdev)
+{
+	struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+	struct extent_map *em;
+	struct map_lookup *map;
+	u64 start = 0;
+	int i;
+
+	write_lock(&em_tree->lock);
+	do {
+		em = lookup_extent_mapping(em_tree, start, (u64)-1);
+		if (!em)
+			break;
+		map = (struct map_lookup *)em->bdev;
+		for (i = 0; i < map->num_stripes; i++)
+			if (srcdev == map->stripes[i].dev)
+				map->stripes[i].dev = tgtdev;
+		start = em->start + em->len;
+		free_extent_map(em);
+	} while (start);
+	write_unlock(&em_tree->lock);
+}
+
+static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
+					 char *srcdev_name,
+					 struct btrfs_device **device)
+{
+	int ret;
+
+	if (srcdevid) {
+		ret = 0;
+		*device = btrfs_find_device(root->fs_info, srcdevid, NULL,
+					    NULL);
+		if (!*device)
+			ret = -ENOENT;
+	} else {
+		ret = btrfs_find_device_missing_or_by_path(root, srcdev_name,
+							   device);
+	}
+	return ret;
+}
+
+void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
+			      struct btrfs_ioctl_dev_replace_args *args)
+{
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+
+	btrfs_dev_replace_lock(dev_replace);
+	/* even if !dev_replace_is_valid, the values are good enough for
+	 * the replace_status ioctl */
+	args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
+	args->status.replace_state = dev_replace->replace_state;
+	args->status.time_started = dev_replace->time_started;
+	args->status.time_stopped = dev_replace->time_stopped;
+	args->status.num_write_errors =
+		atomic64_read(&dev_replace->num_write_errors);
+	args->status.num_uncorrectable_read_errors =
+		atomic64_read(&dev_replace->num_uncorrectable_read_errors);
+	switch (dev_replace->replace_state) {
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+		args->status.progress_1000 = 0;
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+		args->status.progress_1000 = 1000;
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+		args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
+			div64_u64(dev_replace->srcdev->total_bytes, 1000));
+		break;
+	}
+	btrfs_dev_replace_unlock(dev_replace);
+}
+
+int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
+			     struct btrfs_ioctl_dev_replace_args *args)
+{
+	args->result = __btrfs_dev_replace_cancel(fs_info);
+	return 0;
+}
+
+static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+	struct btrfs_device *tgt_device = NULL;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root = fs_info->tree_root;
+	u64 result;
+	int ret;
+
+	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
+	btrfs_dev_replace_lock(dev_replace);
+	switch (dev_replace->replace_state) {
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+		result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
+		btrfs_dev_replace_unlock(dev_replace);
+		goto leave;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+		result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
+		tgt_device = dev_replace->tgtdev;
+		dev_replace->tgtdev = NULL;
+		dev_replace->srcdev = NULL;
+		break;
+	}
+	dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
+	dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+	dev_replace->item_needs_writeback = 1;
+	btrfs_dev_replace_unlock(dev_replace);
+	btrfs_scrub_cancel(fs_info);
+
+	trans = btrfs_start_transaction(root, 0);
+	if (IS_ERR(trans)) {
+		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+		return PTR_ERR(trans);
+	}
+	ret = btrfs_commit_transaction(trans, root);
+	WARN_ON(ret);
+	if (tgt_device)
+		btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+
+leave:
+	mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+	return result;
+}
+
+void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+
+	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
+	btrfs_dev_replace_lock(dev_replace);
+	switch (dev_replace->replace_state) {
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+		dev_replace->replace_state =
+			BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
+		dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+		dev_replace->item_needs_writeback = 1;
+		pr_info("btrfs: suspending dev_replace for unmount\n");
+		break;
+	}
+
+	btrfs_dev_replace_unlock(dev_replace);
+	mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+}
+
+/* resume dev_replace procedure that was interrupted by unmount */
+int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
+{
+	struct task_struct *task;
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+
+	btrfs_dev_replace_lock(dev_replace);
+	switch (dev_replace->replace_state) {
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+		btrfs_dev_replace_unlock(dev_replace);
+		return 0;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+		dev_replace->replace_state =
+			BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
+		break;
+	}
+	if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) {
+		pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n"
+			"btrfs: you may cancel the operation after 'mount -o degraded'\n");
+		btrfs_dev_replace_unlock(dev_replace);
+		return 0;
+	}
+	btrfs_dev_replace_unlock(dev_replace);
+
+	WARN_ON(atomic_xchg(
+		&fs_info->mutually_exclusive_operation_running, 1));
+	task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
+	return PTR_RET(task);
+}
+
+static int btrfs_dev_replace_kthread(void *data)
+{
+	struct btrfs_fs_info *fs_info = data;
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+	struct btrfs_ioctl_dev_replace_args *status_args;
+	u64 progress;
+
+	status_args = kzalloc(sizeof(*status_args), GFP_NOFS);
+	if (status_args) {
+		btrfs_dev_replace_status(fs_info, status_args);
+		progress = status_args->status.progress_1000;
+		kfree(status_args);
+		do_div(progress, 10);
+		printk_in_rcu(KERN_INFO
+			      "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
+			      dev_replace->srcdev->missing ? "<missing disk>" :
+				rcu_str_deref(dev_replace->srcdev->name),
+			      dev_replace->srcdev->devid,
+			      dev_replace->tgtdev ?
+				rcu_str_deref(dev_replace->tgtdev->name) :
+				"<missing target disk>",
+			      (unsigned int)progress);
+	}
+	btrfs_dev_replace_continue_on_mount(fs_info);
+	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+
+	return 0;
+}
+
+static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+	int ret;
+
+	ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid,
+			      dev_replace->committed_cursor_left,
+			      dev_replace->srcdev->total_bytes,
+			      &dev_replace->scrub_progress, 0, 1);
+	ret = btrfs_dev_replace_finishing(fs_info, ret);
+	WARN_ON(ret);
+	return 0;
+}
+
+int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
+{
+	if (!dev_replace->is_valid)
+		return 0;
+
+	switch (dev_replace->replace_state) {
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
+		return 0;
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+		/*
+		 * return true even if tgtdev is missing (this is
+		 * something that can happen if the dev_replace
+		 * procedure is suspended by an umount and then
+		 * the tgtdev is missing (or "btrfs dev scan") was
+		 * not called and the the filesystem is remounted
+		 * in degraded state. This does not stop the
+		 * dev_replace procedure. It needs to be canceled
+		 * manually if the cancelation is wanted.
+		 */
+		break;
+	}
+	return 1;
+}
+
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace)
+{
+	/* the beginning is just an optimization for the typical case */
+	if (atomic_read(&dev_replace->nesting_level) == 0) {
+acquire_lock:
+		/* this is not a nested case where the same thread
+		 * is trying to acqurire the same lock twice */
+		mutex_lock(&dev_replace->lock);
+		mutex_lock(&dev_replace->lock_management_lock);
+		dev_replace->lock_owner = current->pid;
+		atomic_inc(&dev_replace->nesting_level);
+		mutex_unlock(&dev_replace->lock_management_lock);
+		return;
+	}
+
+	mutex_lock(&dev_replace->lock_management_lock);
+	if (atomic_read(&dev_replace->nesting_level) > 0 &&
+	    dev_replace->lock_owner == current->pid) {
+		WARN_ON(!mutex_is_locked(&dev_replace->lock));
+		atomic_inc(&dev_replace->nesting_level);
+		mutex_unlock(&dev_replace->lock_management_lock);
+		return;
+	}
+
+	mutex_unlock(&dev_replace->lock_management_lock);
+	goto acquire_lock;
+}
+
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
+{
+	WARN_ON(!mutex_is_locked(&dev_replace->lock));
+	mutex_lock(&dev_replace->lock_management_lock);
+	WARN_ON(atomic_read(&dev_replace->nesting_level) < 1);
+	WARN_ON(dev_replace->lock_owner != current->pid);
+	atomic_dec(&dev_replace->nesting_level);
+	if (atomic_read(&dev_replace->nesting_level) == 0) {
+		dev_replace->lock_owner = 0;
+		mutex_unlock(&dev_replace->lock_management_lock);
+		mutex_unlock(&dev_replace->lock);
+	} else {
+		mutex_unlock(&dev_replace->lock_management_lock);
+	}
+}

diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
new file mode 100644
index 0000000..20035cb
--- /dev/null
+++ b/fs/btrfs/dev-replace.h

@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) STRATO AG 2012.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#if !defined(__BTRFS_DEV_REPLACE__)
+#define __BTRFS_DEV_REPLACE__
+
+struct btrfs_ioctl_dev_replace_args;
+
+int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
+int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
+			  struct btrfs_fs_info *fs_info);
+void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
+int btrfs_dev_replace_start(struct btrfs_root *root,
+			    struct btrfs_ioctl_dev_replace_args *args);
+void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
+			      struct btrfs_ioctl_dev_replace_args *args);
+int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
+			     struct btrfs_ioctl_dev_replace_args *args);
+void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
+int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
+int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
+
+static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
+{
+	atomic64_inc(stat_value);
+}
+#endif

diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c1a074d..502c215 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c

@@ -213,6 +213,65 @@
 	return btrfs_match_dir_item_name(root, path, name, name_len);
 }
 
+int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+				   const char *name, int name_len)
+{
+	int ret;
+	struct btrfs_key key;
+	struct btrfs_dir_item *di;
+	int data_size;
+	struct extent_buffer *leaf;
+	int slot;
+	struct btrfs_path *path;
+
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = dir;
+	btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+	key.offset = btrfs_name_hash(name, name_len);
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+
+	/* return back any errors */
+	if (ret < 0)
+		goto out;
+
+	/* nothing found, we're safe */
+	if (ret > 0) {
+		ret = 0;
+		goto out;
+	}
+
+	/* we found an item, look for our name in the item */
+	di = btrfs_match_dir_item_name(root, path, name, name_len);
+	if (di) {
+		/* our exact name was found */
+		ret = -EEXIST;
+		goto out;
+	}
+
+	/*
+	 * see if there is room in the item to insert this
+	 * name
+	 */
+	data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item);
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	if (data_size + btrfs_item_size_nr(leaf, slot) +
+	    sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) {
+		ret = -EOVERFLOW;
+	} else {
+		/* plenty of insertion room */
+		ret = 0;
+	}
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 /*
  * lookup a directory item based on index.  'dir' is the objectid
  * we're searching in, and 'mod' tells us if you plan on deleting the

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 22a0439..a8f652d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -45,6 +45,7 @@
 #include "inode-map.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
+#include "dev-replace.h"
 
 #ifdef CONFIG_X86
 #include <asm/cpufeature.h>
@@ -387,7 +388,7 @@
 		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
 			break;
 
-		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+		num_copies = btrfs_num_copies(root->fs_info,
 					      eb->start, eb->len);
 		if (num_copies == 1)
 			break;
@@ -852,11 +853,16 @@
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset)
 {
+	int ret;
+
 	/*
 	 * when we're called for a write, we're already in the async
 	 * submission context.  Just jump into btrfs_map_bio
 	 */
-	return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+	ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+	if (ret)
+		bio_endio(bio, ret);
+	return ret;
 }
 
 static int check_async_write(struct inode *inode, unsigned long bio_flags)
@@ -878,7 +884,6 @@
 	int ret;
 
 	if (!(rw & REQ_WRITE)) {
-
 		/*
 		 * called for a read, do the setup so that checksum validation
 		 * can happen in the async kernel threads
@@ -886,26 +891,32 @@
 		ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
 					  bio, 1);
 		if (ret)
-			return ret;
-		return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-				     mirror_num, 0);
+			goto out_w_error;
+		ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+				    mirror_num, 0);
 	} else if (!async) {
 		ret = btree_csum_one_bio(bio);
 		if (ret)
-			return ret;
-		return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-				     mirror_num, 0);
+			goto out_w_error;
+		ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+				    mirror_num, 0);
+	} else {
+		/*
+		 * kthread helpers are used to submit writes so that
+		 * checksumming can happen in parallel across all CPUs
+		 */
+		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+					  inode, rw, bio, mirror_num, 0,
+					  bio_offset,
+					  __btree_submit_bio_start,
+					  __btree_submit_bio_done);
 	}
 
-	/*
-	 * kthread helpers are used to submit writes so that checksumming
-	 * can happen in parallel across all CPUs
-	 */
-	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-				   inode, rw, bio, mirror_num, 0,
-				   bio_offset,
-				   __btree_submit_bio_start,
-				   __btree_submit_bio_done);
+	if (ret) {
+out_w_error:
+		bio_endio(bio, ret);
+	}
+	return ret;
 }
 
 #ifdef CONFIG_MIGRATION
@@ -990,6 +1001,7 @@
 
 static int btree_set_page_dirty(struct page *page)
 {
+#ifdef DEBUG
 	struct extent_buffer *eb;
 
 	BUG_ON(!PagePrivate(page));
@@ -998,6 +1010,7 @@
 	BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
 	BUG_ON(!atomic_read(&eb->refs));
 	btrfs_assert_tree_locked(eb);
+#endif
 	return __set_page_dirty_nobuffers(page);
 }
 
@@ -1129,11 +1142,11 @@
 					  root->fs_info->dirty_metadata_bytes);
 			}
 			spin_unlock(&root->fs_info->delalloc_lock);
-		}
 
-		/* ugh, clear_extent_buffer_dirty needs to lock the page */
-		btrfs_set_lock_blocking(buf);
-		clear_extent_buffer_dirty(buf);
+			/* ugh, clear_extent_buffer_dirty needs to lock the page */
+			btrfs_set_lock_blocking(buf);
+			clear_extent_buffer_dirty(buf);
+		}
 	}
 }
 
@@ -1193,7 +1206,7 @@
 	root->root_key.objectid = objectid;
 	root->anon_dev = 0;
 
-	spin_lock_init(&root->root_times_lock);
+	spin_lock_init(&root->root_item_lock);
 }
 
 static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
@@ -2131,6 +2144,11 @@
 	init_rwsem(&fs_info->extent_commit_sem);
 	init_rwsem(&fs_info->cleanup_work_sem);
 	init_rwsem(&fs_info->subvol_sem);
+	fs_info->dev_replace.lock_owner = 0;
+	atomic_set(&fs_info->dev_replace.nesting_level, 0);
+	mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
+	mutex_init(&fs_info->dev_replace.lock_management_lock);
+	mutex_init(&fs_info->dev_replace.lock);
 
 	spin_lock_init(&fs_info->qgroup_lock);
 	fs_info->qgroup_tree = RB_ROOT;
@@ -2279,6 +2297,10 @@
 			   fs_info->thread_pool_size,
 			   &fs_info->generic_worker);
 
+	btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
+			   fs_info->thread_pool_size,
+			   &fs_info->generic_worker);
+
 	btrfs_init_workers(&fs_info->submit_workers, "submit",
 			   min_t(u64, fs_devices->num_devices,
 			   fs_info->thread_pool_size),
@@ -2350,6 +2372,7 @@
 	ret |= btrfs_start_workers(&fs_info->delayed_workers);
 	ret |= btrfs_start_workers(&fs_info->caching_workers);
 	ret |= btrfs_start_workers(&fs_info->readahead_workers);
+	ret |= btrfs_start_workers(&fs_info->flush_workers);
 	if (ret) {
 		err = -ENOMEM;
 		goto fail_sb_buffer;
@@ -2418,7 +2441,11 @@
 		goto fail_tree_roots;
 	}
 
-	btrfs_close_extra_devices(fs_devices);
+	/*
+	 * keep the device that is marked to be the target device for the
+	 * dev_replace procedure
+	 */
+	btrfs_close_extra_devices(fs_info, fs_devices, 0);
 
 	if (!fs_devices->latest_bdev) {
 		printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
@@ -2490,6 +2517,14 @@
 		goto fail_block_groups;
 	}
 
+	ret = btrfs_init_dev_replace(fs_info);
+	if (ret) {
+		pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+		goto fail_block_groups;
+	}
+
+	btrfs_close_extra_devices(fs_info, fs_devices, 1);
+
 	ret = btrfs_init_space_info(fs_info);
 	if (ret) {
 		printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@@ -2503,6 +2538,13 @@
 	}
 	fs_info->num_tolerated_disk_barrier_failures =
 		btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
+	if (fs_info->fs_devices->missing_devices >
+	     fs_info->num_tolerated_disk_barrier_failures &&
+	    !(sb->s_flags & MS_RDONLY)) {
+		printk(KERN_WARNING
+		       "Btrfs: too many missing devices, writeable mount is not allowed\n");
+		goto fail_block_groups;
+	}
 
 	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
 					       "btrfs-cleaner");
@@ -2631,6 +2673,13 @@
 		return ret;
 	}
 
+	ret = btrfs_resume_dev_replace_async(fs_info);
+	if (ret) {
+		pr_warn("btrfs: failed to resume dev_replace\n");
+		close_ctree(tree_root);
+		return ret;
+	}
+
 	return 0;
 
 fail_qgroup:
@@ -2667,6 +2716,7 @@
 	btrfs_stop_workers(&fs_info->submit_workers);
 	btrfs_stop_workers(&fs_info->delayed_workers);
 	btrfs_stop_workers(&fs_info->caching_workers);
+	btrfs_stop_workers(&fs_info->flush_workers);
 fail_alloc:
 fail_iput:
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3270,16 +3320,18 @@
 	smp_mb();
 
 	/* pause restriper - we want to resume on mount */
-	btrfs_pause_balance(root->fs_info);
+	btrfs_pause_balance(fs_info);
 
-	btrfs_scrub_cancel(root);
+	btrfs_dev_replace_suspend_for_unmount(fs_info);
+
+	btrfs_scrub_cancel(fs_info);
 
 	/* wait for any defraggers to finish */
 	wait_event(fs_info->transaction_wait,
 		   (atomic_read(&fs_info->defrag_running) == 0));
 
 	/* clear out the rbtree of defraggable inodes */
-	btrfs_run_defrag_inodes(fs_info);
+	btrfs_cleanup_defrag_inodes(fs_info);
 
 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
 		ret = btrfs_commit_super(root);
@@ -3339,6 +3391,7 @@
 	btrfs_stop_workers(&fs_info->delayed_workers);
 	btrfs_stop_workers(&fs_info->caching_workers);
 	btrfs_stop_workers(&fs_info->readahead_workers);
+	btrfs_stop_workers(&fs_info->flush_workers);
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 	if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3383,14 +3436,12 @@
 	int was_dirty;
 
 	btrfs_assert_tree_locked(buf);
-	if (transid != root->fs_info->generation) {
-		printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
+	if (transid != root->fs_info->generation)
+		WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
 		       "found %llu running %llu\n",
 			(unsigned long long)buf->start,
 			(unsigned long long)transid,
 			(unsigned long long)root->fs_info->generation);
-		WARN_ON(1);
-	}
 	was_dirty = set_extent_buffer_dirty(buf);
 	if (!was_dirty) {
 		spin_lock(&root->fs_info->delalloc_lock);
@@ -3399,7 +3450,8 @@
 	}
 }
 
-void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
+					int flush_delayed)
 {
 	/*
 	 * looks as though older kernels can get into trouble with
@@ -3411,7 +3463,8 @@
 	if (current->flags & PF_MEMALLOC)
 		return;
 
-	btrfs_balance_delayed_items(root);
+	if (flush_delayed)
+		btrfs_balance_delayed_items(root);
 
 	num_dirty = root->fs_info->dirty_metadata_bytes;
 
@@ -3422,25 +3475,14 @@
 	return;
 }
 
-void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+void btrfs_btree_balance_dirty(struct btrfs_root *root)
 {
-	/*
-	 * looks as though older kernels can get into trouble with
-	 * this code, they end up stuck in balance_dirty_pages forever
-	 */
-	u64 num_dirty;
-	unsigned long thresh = 32 * 1024 * 1024;
+	__btrfs_btree_balance_dirty(root, 1);
+}
 
-	if (current->flags & PF_MEMALLOC)
-		return;
-
-	num_dirty = root->fs_info->dirty_metadata_bytes;
-
-	if (num_dirty > thresh) {
-		balance_dirty_pages_ratelimited(
-				   root->fs_info->btree_inode->i_mapping);
-	}
-	return;
+void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root)
+{
+	__btrfs_btree_balance_dirty(root, 0);
 }
 
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)

diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 2025a91..305c33e 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h

@@ -62,8 +62,8 @@
 struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
 					      struct btrfs_key *location);
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
-void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
-void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
+void btrfs_btree_balance_dirty(struct btrfs_root *root);
+void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
 void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 06b2635..521e9d4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c

@@ -33,6 +33,7 @@
 #include "volumes.h"
 #include "locking.h"
 #include "free-space-cache.h"
+#include "math.h"
 
 #undef SCRAMBLE_DELAYED_REFS
 
@@ -649,24 +650,6 @@
 	rcu_read_unlock();
 }
 
-static u64 div_factor(u64 num, int factor)
-{
-	if (factor == 10)
-		return num;
-	num *= factor;
-	do_div(num, 10);
-	return num;
-}
-
-static u64 div_factor_fine(u64 num, int factor)
-{
-	if (factor == 100)
-		return num;
-	num *= factor;
-	do_div(num, 100);
-	return num;
-}
-
 u64 btrfs_find_block_group(struct btrfs_root *root,
 			   u64 search_start, u64 search_hint, int owner)
 {
@@ -1835,7 +1818,7 @@
 
 
 	/* Tell the block device(s) that the sectors can be discarded */
-	ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
+	ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
 			      bytenr, &num_bytes, &bbio, 0);
 	/* Error condition is -ENOMEM */
 	if (!ret) {
@@ -2314,6 +2297,9 @@
 				kfree(extent_op);
 
 				if (ret) {
+					list_del_init(&locked_ref->cluster);
+					mutex_unlock(&locked_ref->mutex);
+
 					printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
 					spin_lock(&delayed_refs->lock);
 					return ret;
@@ -2356,6 +2342,10 @@
 		count++;
 
 		if (ret) {
+			if (locked_ref) {
+				list_del_init(&locked_ref->cluster);
+				mutex_unlock(&locked_ref->mutex);
+			}
 			printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
 			spin_lock(&delayed_refs->lock);
 			return ret;
@@ -3661,7 +3651,7 @@
 
 static int can_overcommit(struct btrfs_root *root,
 			  struct btrfs_space_info *space_info, u64 bytes,
-			  int flush)
+			  enum btrfs_reserve_flush_enum flush)
 {
 	u64 profile = btrfs_get_alloc_profile(root, 0);
 	u64 avail;
@@ -3685,11 +3675,11 @@
 		avail >>= 1;
 
 	/*
-	 * If we aren't flushing don't let us overcommit too much, say
-	 * 1/8th of the space.  If we can flush, let it overcommit up to
-	 * 1/2 of the space.
+	 * If we aren't flushing all things, let us overcommit up to
+	 * 1/2th of the space. If we can flush, don't let us overcommit
+	 * too much, let it overcommit up to 1/8 of the space.
 	 */
-	if (flush)
+	if (flush == BTRFS_RESERVE_FLUSH_ALL)
 		avail >>= 3;
 	else
 		avail >>= 1;
@@ -3699,6 +3689,20 @@
 	return 0;
 }
 
+static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
+					       unsigned long nr_pages,
+					       enum wb_reason reason)
+{
+	if (!writeback_in_progress(sb->s_bdi) &&
+	    down_read_trylock(&sb->s_umount)) {
+		writeback_inodes_sb_nr(sb, nr_pages, reason);
+		up_read(&sb->s_umount);
+		return 1;
+	}
+
+	return 0;
+}
+
 /*
  * shrink metadata reservation for delalloc
  */
@@ -3713,6 +3717,7 @@
 	long time_left;
 	unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
 	int loops = 0;
+	enum btrfs_reserve_flush_enum flush;
 
 	trans = (struct btrfs_trans_handle *)current->journal_info;
 	block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -3730,8 +3735,9 @@
 	while (delalloc_bytes && loops < 3) {
 		max_reclaim = min(delalloc_bytes, to_reclaim);
 		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
-		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
-					       WB_REASON_FS_FREE_SPACE);
+		writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
+						    nr_pages,
+						    WB_REASON_FS_FREE_SPACE);
 
 		/*
 		 * We need to wait for the async pages to actually start before
@@ -3740,8 +3746,12 @@
 		wait_event(root->fs_info->async_submit_wait,
 			   !atomic_read(&root->fs_info->async_delalloc_pages));
 
+		if (!trans)
+			flush = BTRFS_RESERVE_FLUSH_ALL;
+		else
+			flush = BTRFS_RESERVE_NO_FLUSH;
 		spin_lock(&space_info->lock);
-		if (can_overcommit(root, space_info, orig, !trans)) {
+		if (can_overcommit(root, space_info, orig, flush)) {
 			spin_unlock(&space_info->lock);
 			break;
 		}
@@ -3899,7 +3909,8 @@
  */
 static int reserve_metadata_bytes(struct btrfs_root *root,
 				  struct btrfs_block_rsv *block_rsv,
-				  u64 orig_bytes, int flush)
+				  u64 orig_bytes,
+				  enum btrfs_reserve_flush_enum flush)
 {
 	struct btrfs_space_info *space_info = block_rsv->space_info;
 	u64 used;
@@ -3912,10 +3923,11 @@
 	ret = 0;
 	spin_lock(&space_info->lock);
 	/*
-	 * We only want to wait if somebody other than us is flushing and we are
-	 * actually alloed to flush.
+	 * We only want to wait if somebody other than us is flushing and we
+	 * are actually allowed to flush all things.
 	 */
-	while (flush && !flushing && space_info->flush) {
+	while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
+	       space_info->flush) {
 		spin_unlock(&space_info->lock);
 		/*
 		 * If we have a trans handle we can't wait because the flusher
@@ -3981,23 +3993,40 @@
 	 * Couldn't make our reservation, save our place so while we're trying
 	 * to reclaim space we can actually use it instead of somebody else
 	 * stealing it from us.
+	 *
+	 * We make the other tasks wait for the flush only when we can flush
+	 * all things.
 	 */
-	if (ret && flush) {
+	if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {
 		flushing = true;
 		space_info->flush = 1;
 	}
 
 	spin_unlock(&space_info->lock);
 
-	if (!ret || !flush)
+	if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
 		goto out;
 
 	ret = flush_space(root, space_info, num_bytes, orig_bytes,
 			  flush_state);
 	flush_state++;
+
+	/*
+	 * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
+	 * would happen. So skip delalloc flush.
+	 */
+	if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
+	    (flush_state == FLUSH_DELALLOC ||
+	     flush_state == FLUSH_DELALLOC_WAIT))
+		flush_state = ALLOC_CHUNK;
+
 	if (!ret)
 		goto again;
-	else if (flush_state <= COMMIT_TRANS)
+	else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
+		 flush_state < COMMIT_TRANS)
+		goto again;
+	else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
+		 flush_state <= COMMIT_TRANS)
 		goto again;
 
 out:
@@ -4148,9 +4177,9 @@
 	kfree(rsv);
 }
 
-static inline int __block_rsv_add(struct btrfs_root *root,
-				  struct btrfs_block_rsv *block_rsv,
-				  u64 num_bytes, int flush)
+int btrfs_block_rsv_add(struct btrfs_root *root,
+			struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+			enum btrfs_reserve_flush_enum flush)
 {
 	int ret;
 
@@ -4166,20 +4195,6 @@
 	return ret;
 }
 
-int btrfs_block_rsv_add(struct btrfs_root *root,
-			struct btrfs_block_rsv *block_rsv,
-			u64 num_bytes)
-{
-	return __block_rsv_add(root, block_rsv, num_bytes, 1);
-}
-
-int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
-				struct btrfs_block_rsv *block_rsv,
-				u64 num_bytes)
-{
-	return __block_rsv_add(root, block_rsv, num_bytes, 0);
-}
-
 int btrfs_block_rsv_check(struct btrfs_root *root,
 			  struct btrfs_block_rsv *block_rsv, int min_factor)
 {
@@ -4198,9 +4213,9 @@
 	return ret;
 }
 
-static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
-					   struct btrfs_block_rsv *block_rsv,
-					   u64 min_reserved, int flush)
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+			   struct btrfs_block_rsv *block_rsv, u64 min_reserved,
+			   enum btrfs_reserve_flush_enum flush)
 {
 	u64 num_bytes = 0;
 	int ret = -ENOSPC;
@@ -4228,20 +4243,6 @@
 	return ret;
 }
 
-int btrfs_block_rsv_refill(struct btrfs_root *root,
-			   struct btrfs_block_rsv *block_rsv,
-			   u64 min_reserved)
-{
-	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1);
-}
-
-int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
-				   struct btrfs_block_rsv *block_rsv,
-				   u64 min_reserved)
-{
-	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0);
-}
-
 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
 			    struct btrfs_block_rsv *dst_rsv,
 			    u64 num_bytes)
@@ -4532,17 +4533,27 @@
 	u64 csum_bytes;
 	unsigned nr_extents = 0;
 	int extra_reserve = 0;
-	int flush = 1;
+	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
 	int ret;
+	bool delalloc_lock = true;
 
-	/* Need to be holding the i_mutex here if we aren't free space cache */
-	if (btrfs_is_free_space_inode(inode))
-		flush = 0;
+	/* If we are a free space inode we need to not flush since we will be in
+	 * the middle of a transaction commit.  We also don't need the delalloc
+	 * mutex since we won't race with anybody.  We need this mostly to make
+	 * lockdep shut its filthy mouth.
+	 */
+	if (btrfs_is_free_space_inode(inode)) {
+		flush = BTRFS_RESERVE_NO_FLUSH;
+		delalloc_lock = false;
+	}
 
-	if (flush && btrfs_transaction_in_commit(root->fs_info))
+	if (flush != BTRFS_RESERVE_NO_FLUSH &&
+	    btrfs_transaction_in_commit(root->fs_info))
 		schedule_timeout(1);
 
-	mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
+	if (delalloc_lock)
+		mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
+
 	num_bytes = ALIGN(num_bytes, root->sectorsize);
 
 	spin_lock(&BTRFS_I(inode)->lock);
@@ -4572,7 +4583,11 @@
 		ret = btrfs_qgroup_reserve(root, num_bytes +
 					   nr_extents * root->leafsize);
 		if (ret) {
-			mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+			spin_lock(&BTRFS_I(inode)->lock);
+			calc_csum_metadata_size(inode, num_bytes, 0);
+			spin_unlock(&BTRFS_I(inode)->lock);
+			if (delalloc_lock)
+				mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
 			return ret;
 		}
 	}
@@ -4607,7 +4622,12 @@
 						      btrfs_ino(inode),
 						      to_free, 0);
 		}
-		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+		if (root->fs_info->quota_enabled) {
+			btrfs_qgroup_free(root, num_bytes +
+						nr_extents * root->leafsize);
+		}
+		if (delalloc_lock)
+			mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
 		return ret;
 	}
 
@@ -4619,7 +4639,9 @@
 	}
 	BTRFS_I(inode)->reserved_extents += nr_extents;
 	spin_unlock(&BTRFS_I(inode)->lock);
-	mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+
+	if (delalloc_lock)
+		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
 
 	if (to_reserve)
 		trace_btrfs_space_reservation(root->fs_info,"delalloc",
@@ -4969,9 +4991,13 @@
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_block_group_cache *cache = NULL;
+	struct btrfs_space_info *space_info;
+	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
 	u64 len;
+	bool readonly;
 
 	while (start <= end) {
+		readonly = false;
 		if (!cache ||
 		    start >= cache->key.objectid + cache->key.offset) {
 			if (cache)
@@ -4989,15 +5015,30 @@
 		}
 
 		start += len;
+		space_info = cache->space_info;
 
-		spin_lock(&cache->space_info->lock);
+		spin_lock(&space_info->lock);
 		spin_lock(&cache->lock);
 		cache->pinned -= len;
-		cache->space_info->bytes_pinned -= len;
-		if (cache->ro)
-			cache->space_info->bytes_readonly += len;
+		space_info->bytes_pinned -= len;
+		if (cache->ro) {
+			space_info->bytes_readonly += len;
+			readonly = true;
+		}
 		spin_unlock(&cache->lock);
-		spin_unlock(&cache->space_info->lock);
+		if (!readonly && global_rsv->space_info == space_info) {
+			spin_lock(&global_rsv->lock);
+			if (!global_rsv->full) {
+				len = min(len, global_rsv->size -
+					  global_rsv->reserved);
+				global_rsv->reserved += len;
+				space_info->bytes_may_use += len;
+				if (global_rsv->reserved >= global_rsv->size)
+					global_rsv->full = 1;
+			}
+			spin_unlock(&global_rsv->lock);
+		}
+		spin_unlock(&space_info->lock);
 	}
 
 	if (cache)
@@ -5466,7 +5507,7 @@
 	return 0;
 }
 
-static int __get_block_group_index(u64 flags)
+int __get_raid_index(u64 flags)
 {
 	int index;
 
@@ -5486,7 +5527,7 @@
 
 static int get_block_group_index(struct btrfs_block_group_cache *cache)
 {
-	return __get_block_group_index(cache->flags);
+	return __get_raid_index(cache->flags);
 }
 
 enum btrfs_loop_type {
@@ -6269,7 +6310,8 @@
 	block_rsv = get_block_rsv(trans, root);
 
 	if (block_rsv->size == 0) {
-		ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
+		ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+					     BTRFS_RESERVE_NO_FLUSH);
 		/*
 		 * If we couldn't reserve metadata bytes try and use some from
 		 * the global reserve.
@@ -6292,11 +6334,11 @@
 		static DEFINE_RATELIMIT_STATE(_rs,
 				DEFAULT_RATELIMIT_INTERVAL,
 				/*DEFAULT_RATELIMIT_BURST*/ 2);
-		if (__ratelimit(&_rs)) {
-			printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
-			WARN_ON(1);
-		}
-		ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
+		if (__ratelimit(&_rs))
+			WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n",
+			     ret);
+		ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+					     BTRFS_RESERVE_NO_FLUSH);
 		if (!ret) {
 			return block_rsv;
 		} else if (ret && block_rsv != global_rsv) {
@@ -7427,7 +7469,7 @@
 	 */
 	target = get_restripe_target(root->fs_info, block_group->flags);
 	if (target) {
-		index = __get_block_group_index(extended_to_chunk(target));
+		index = __get_raid_index(extended_to_chunk(target));
 	} else {
 		/*
 		 * this is just a balance, so if we were marked as full
@@ -7461,7 +7503,8 @@
 		 * check to make sure we can actually find a chunk with enough
 		 * space to fit our block group in.
 		 */
-		if (device->total_bytes > device->bytes_used + min_free) {
+		if (device->total_bytes > device->bytes_used + min_free &&
+		    !device->is_tgtdev_for_dev_replace) {
 			ret = find_free_dev_extent(device, min_free,
 						   &dev_offset, NULL);
 			if (!ret)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 472873a..1b319df 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c

@@ -341,12 +341,10 @@
 {
 	struct rb_node *node;
 
-	if (end < start) {
-		printk(KERN_ERR "btrfs end < start %llu %llu\n",
+	if (end < start)
+		WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
 		       (unsigned long long)end,
 		       (unsigned long long)start);
-		WARN_ON(1);
-	}
 	state->start = start;
 	state->end = end;
 
@@ -1919,12 +1917,12 @@
  * the standard behavior is to write all copies in a raid setup. here we only
  * want to write the one bad copy. so we do the mapping for ourselves and issue
  * submit_bio directly.
- * to avoid any synchonization issues, wait for the data after writing, which
+ * to avoid any synchronization issues, wait for the data after writing, which
  * actually prevents the read that triggered the error from finishing.
  * currently, there can be no more than two copies of every data bit. thus,
  * exactly one rewrite is required.
  */
-int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
+int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
 			u64 length, u64 logical, struct page *page,
 			int mirror_num)
 {
@@ -1946,7 +1944,7 @@
 	bio->bi_size = 0;
 	map_length = length;
 
-	ret = btrfs_map_block(map_tree, WRITE, logical,
+	ret = btrfs_map_block(fs_info, WRITE, logical,
 			      &map_length, &bbio, mirror_num);
 	if (ret) {
 		bio_put(bio);
@@ -1984,14 +1982,13 @@
 int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
 			 int mirror_num)
 {
-	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
 	u64 start = eb->start;
 	unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
 	int ret = 0;
 
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = extent_buffer_page(eb, i);
-		ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
+		ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
 					start, p, mirror_num);
 		if (ret)
 			break;
@@ -2010,7 +2007,7 @@
 	u64 private;
 	u64 private_failure;
 	struct io_failure_record *failrec;
-	struct btrfs_mapping_tree *map_tree;
+	struct btrfs_fs_info *fs_info;
 	struct extent_state *state;
 	int num_copies;
 	int did_repair = 0;
@@ -2046,11 +2043,11 @@
 	spin_unlock(&BTRFS_I(inode)->io_tree.lock);
 
 	if (state && state->start == failrec->start) {
-		map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
-		num_copies = btrfs_num_copies(map_tree, failrec->logical,
-						failrec->len);
+		fs_info = BTRFS_I(inode)->root->fs_info;
+		num_copies = btrfs_num_copies(fs_info, failrec->logical,
+					      failrec->len);
 		if (num_copies > 1)  {
-			ret = repair_io_failure(map_tree, start, failrec->len,
+			ret = repair_io_failure(fs_info, start, failrec->len,
 						failrec->logical, page,
 						failrec->failed_mirror);
 			did_repair = !ret;
@@ -2159,9 +2156,8 @@
 		 * clean_io_failure() clean all those errors at once.
 		 */
 	}
-	num_copies = btrfs_num_copies(
-			      &BTRFS_I(inode)->root->fs_info->mapping_tree,
-			      failrec->logical, failrec->len);
+	num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
+				      failrec->logical, failrec->len);
 	if (num_copies == 1) {
 		/*
 		 * we only have a single copy of the data, so don't bother with
@@ -2466,10 +2462,6 @@
 	return bio;
 }
 
-/*
- * Since writes are async, they will only return -ENOMEM.
- * Reads can return the full range of I/O error conditions.
- */
 static int __must_check submit_one_bio(int rw, struct bio *bio,
 				       int mirror_num, unsigned long bio_flags)
 {
@@ -4721,10 +4713,9 @@
 	}
 
 	if (start + min_len > eb->len) {
-		printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
+		WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
 		       "wanted %lu %lu\n", (unsigned long long)eb->start,
 		       eb->len, start, min_len);
-		WARN_ON(1);
 		return -EINVAL;
 	}
 

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 711d12b..2eacfab 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h

@@ -337,9 +337,9 @@
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
 		gfp_t gfp_flags);
 
-struct btrfs_mapping_tree;
+struct btrfs_fs_info;
 
-int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
+int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
 			u64 length, u64 logical, struct page *page,
 			int mirror_num);
 int end_extent_writepage(struct page *page, int err, u64 start, u64 end);

diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index ce9f792..f169d6b 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c

@@ -49,7 +49,7 @@
 struct extent_map *alloc_extent_map(void)
 {
 	struct extent_map *em;
-	em = kmem_cache_alloc(extent_map_cache, GFP_NOFS);
+	em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
 	if (!em)
 		return NULL;
 	em->in_tree = 0;
@@ -198,16 +198,15 @@
 			merge = rb_entry(rb, struct extent_map, rb_node);
 		if (rb && mergable_maps(merge, em)) {
 			em->start = merge->start;
+			em->orig_start = merge->orig_start;
 			em->len += merge->len;
 			em->block_len += merge->block_len;
 			em->block_start = merge->block_start;
 			merge->in_tree = 0;
-			if (merge->generation > em->generation) {
-				em->mod_start = em->start;
-				em->mod_len = em->len;
-				em->generation = merge->generation;
-				list_move(&em->list, &tree->modified_extents);
-			}
+			em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
+			em->mod_start = merge->mod_start;
+			em->generation = max(em->generation, merge->generation);
+			list_move(&em->list, &tree->modified_extents);
 
 			list_del_init(&merge->list);
 			rb_erase(&merge->rb_node, &tree->map);
@@ -223,11 +222,8 @@
 		em->block_len += merge->len;
 		rb_erase(&merge->rb_node, &tree->map);
 		merge->in_tree = 0;
-		if (merge->generation > em->generation) {
-			em->mod_len = em->len;
-			em->generation = merge->generation;
-			list_move(&em->list, &tree->modified_extents);
-		}
+		em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
+		em->generation = max(em->generation, merge->generation);
 		list_del_init(&merge->list);
 		free_extent_map(merge);
 	}
@@ -265,9 +261,9 @@
 	em->mod_start = em->start;
 	em->mod_len = em->len;
 
-	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+	if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
 		prealloc = true;
-		clear_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+		clear_bit(EXTENT_FLAG_FILLING, &em->flags);
 	}
 
 	try_merge_map(tree, em);

diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 6792255..922943c 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h

@@ -14,6 +14,7 @@
 #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
 #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
 #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
+#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
 
 struct extent_map {
 	struct rb_node rb_node;
@@ -24,6 +25,7 @@
 	u64 mod_start;
 	u64 mod_len;
 	u64 orig_start;
+	u64 orig_block_len;
 	u64 block_start;
 	u64 block_len;
 	u64 generation;

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 1ad08e4e4..bd38cef 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c

@@ -133,7 +133,6 @@
 	return ERR_PTR(ret);
 }
 
-
 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
 			     struct btrfs_path *path, u64 objectid,
@@ -151,6 +150,26 @@
 	return ret;
 }
 
+u64 btrfs_file_extent_length(struct btrfs_path *path)
+{
+	int extent_type;
+	struct btrfs_file_extent_item *fi;
+	u64 len;
+
+	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_file_extent_item);
+	extent_type = btrfs_file_extent_type(path->nodes[0], fi);
+
+	if (extent_type == BTRFS_FILE_EXTENT_REG ||
+	    extent_type == BTRFS_FILE_EXTENT_PREALLOC)
+		len = btrfs_file_extent_num_bytes(path->nodes[0], fi);
+	else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+		len = btrfs_file_extent_inline_len(path->nodes[0], fi);
+	else
+		BUG();
+
+	return len;
+}
 
 static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 				   struct inode *inode, struct bio *bio,

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a8ee75c..77061bf 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c

@@ -41,6 +41,7 @@
 #include "compat.h"
 #include "volumes.h"
 
+static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
  * when auto defrag is enabled we
  * queue up these defrag structs to remember which
@@ -90,7 +91,7 @@
  * If an existing record is found the defrag item you
  * pass in is freed
  */
-static void __btrfs_add_inode_defrag(struct inode *inode,
+static int __btrfs_add_inode_defrag(struct inode *inode,
 				    struct inode_defrag *defrag)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -118,18 +119,24 @@
 				entry->transid = defrag->transid;
 			if (defrag->last_offset > entry->last_offset)
 				entry->last_offset = defrag->last_offset;
-			goto exists;
+			return -EEXIST;
 		}
 	}
 	set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
 	rb_link_node(&defrag->rb_node, parent, p);
 	rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
-	return;
+	return 0;
+}
 
-exists:
-	kfree(defrag);
-	return;
+static inline int __need_auto_defrag(struct btrfs_root *root)
+{
+	if (!btrfs_test_opt(root, AUTO_DEFRAG))
+		return 0;
 
+	if (btrfs_fs_closing(root->fs_info))
+		return 0;
+
+	return 1;
 }
 
 /*
@@ -142,11 +149,9 @@
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct inode_defrag *defrag;
 	u64 transid;
+	int ret;
 
-	if (!btrfs_test_opt(root, AUTO_DEFRAG))
-		return 0;
-
-	if (btrfs_fs_closing(root->fs_info))
+	if (!__need_auto_defrag(root))
 		return 0;
 
 	if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
@@ -157,7 +162,7 @@
 	else
 		transid = BTRFS_I(inode)->root->last_trans;
 
-	defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
+	defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
 	if (!defrag)
 		return -ENOMEM;
 
@@ -166,20 +171,56 @@
 	defrag->root = root->root_key.objectid;
 
 	spin_lock(&root->fs_info->defrag_inodes_lock);
-	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
-		__btrfs_add_inode_defrag(inode, defrag);
-	else
-		kfree(defrag);
+	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
+		/*
+		 * If we set IN_DEFRAG flag and evict the inode from memory,
+		 * and then re-read this inode, this new inode doesn't have
+		 * IN_DEFRAG flag. At the case, we may find the existed defrag.
+		 */
+		ret = __btrfs_add_inode_defrag(inode, defrag);
+		if (ret)
+			kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+	} else {
+		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+	}
 	spin_unlock(&root->fs_info->defrag_inodes_lock);
 	return 0;
 }
 
 /*
- * must be called with the defrag_inodes lock held
+ * Requeue the defrag object. If there is a defrag object that points to
+ * the same inode in the tree, we will merge them together (by
+ * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
  */
-struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
-					     u64 root, u64 ino,
-					     struct rb_node **next)
+void btrfs_requeue_inode_defrag(struct inode *inode,
+				struct inode_defrag *defrag)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret;
+
+	if (!__need_auto_defrag(root))
+		goto out;
+
+	/*
+	 * Here we don't check the IN_DEFRAG flag, because we need merge
+	 * them together.
+	 */
+	spin_lock(&root->fs_info->defrag_inodes_lock);
+	ret = __btrfs_add_inode_defrag(inode, defrag);
+	spin_unlock(&root->fs_info->defrag_inodes_lock);
+	if (ret)
+		goto out;
+	return;
+out:
+	kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+}
+
+/*
+ * pick the defragable inode that we want, if it doesn't exist, we will get
+ * the next one.
+ */
+static struct inode_defrag *
+btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
 {
 	struct inode_defrag *entry = NULL;
 	struct inode_defrag tmp;
@@ -190,7 +231,8 @@
 	tmp.ino = ino;
 	tmp.root = root;
 
-	p = info->defrag_inodes.rb_node;
+	spin_lock(&fs_info->defrag_inodes_lock);
+	p = fs_info->defrag_inodes.rb_node;
 	while (p) {
 		parent = p;
 		entry = rb_entry(parent, struct inode_defrag, rb_node);
@@ -201,17 +243,109 @@
 		else if (ret > 0)
 			p = parent->rb_right;
 		else
-			return entry;
+			goto out;
 	}
 
-	if (next) {
-		while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
-			parent = rb_next(parent);
+	if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
+		parent = rb_next(parent);
+		if (parent)
 			entry = rb_entry(parent, struct inode_defrag, rb_node);
-		}
-		*next = parent;
+		else
+			entry = NULL;
 	}
-	return NULL;
+out:
+	if (entry)
+		rb_erase(parent, &fs_info->defrag_inodes);
+	spin_unlock(&fs_info->defrag_inodes_lock);
+	return entry;
+}
+
+void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
+{
+	struct inode_defrag *defrag;
+	struct rb_node *node;
+
+	spin_lock(&fs_info->defrag_inodes_lock);
+	node = rb_first(&fs_info->defrag_inodes);
+	while (node) {
+		rb_erase(node, &fs_info->defrag_inodes);
+		defrag = rb_entry(node, struct inode_defrag, rb_node);
+		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+
+		if (need_resched()) {
+			spin_unlock(&fs_info->defrag_inodes_lock);
+			cond_resched();
+			spin_lock(&fs_info->defrag_inodes_lock);
+		}
+
+		node = rb_first(&fs_info->defrag_inodes);
+	}
+	spin_unlock(&fs_info->defrag_inodes_lock);
+}
+
+#define BTRFS_DEFRAG_BATCH	1024
+
+static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
+				    struct inode_defrag *defrag)
+{
+	struct btrfs_root *inode_root;
+	struct inode *inode;
+	struct btrfs_key key;
+	struct btrfs_ioctl_defrag_range_args range;
+	int num_defrag;
+
+	/* get the inode */
+	key.objectid = defrag->root;
+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+	key.offset = (u64)-1;
+	inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(inode_root)) {
+		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+		return PTR_ERR(inode_root);
+	}
+
+	key.objectid = defrag->ino;
+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+	key.offset = 0;
+	inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
+	if (IS_ERR(inode)) {
+		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+		return PTR_ERR(inode);
+	}
+
+	/* do a chunk of defrag */
+	clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
+	memset(&range, 0, sizeof(range));
+	range.len = (u64)-1;
+	range.start = defrag->last_offset;
+
+	sb_start_write(fs_info->sb);
+	num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
+				       BTRFS_DEFRAG_BATCH);
+	sb_end_write(fs_info->sb);
+	/*
+	 * if we filled the whole defrag batch, there
+	 * must be more work to do.  Queue this defrag
+	 * again
+	 */
+	if (num_defrag == BTRFS_DEFRAG_BATCH) {
+		defrag->last_offset = range.start;
+		btrfs_requeue_inode_defrag(inode, defrag);
+	} else if (defrag->last_offset && !defrag->cycled) {
+		/*
+		 * we didn't fill our defrag batch, but
+		 * we didn't start at zero.  Make sure we loop
+		 * around to the start of the file.
+		 */
+		defrag->last_offset = 0;
+		defrag->cycled = 1;
+		btrfs_requeue_inode_defrag(inode, defrag);
+	} else {
+		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+	}
+
+	iput(inode);
+	return 0;
 }
 
 /*
@@ -221,32 +355,19 @@
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 {
 	struct inode_defrag *defrag;
-	struct btrfs_root *inode_root;
-	struct inode *inode;
-	struct rb_node *n;
-	struct btrfs_key key;
-	struct btrfs_ioctl_defrag_range_args range;
 	u64 first_ino = 0;
 	u64 root_objectid = 0;
-	int num_defrag;
-	int defrag_batch = 1024;
-
-	memset(&range, 0, sizeof(range));
-	range.len = (u64)-1;
 
 	atomic_inc(&fs_info->defrag_running);
-	spin_lock(&fs_info->defrag_inodes_lock);
 	while(1) {
-		n = NULL;
+		if (!__need_auto_defrag(fs_info->tree_root))
+			break;
 
 		/* find an inode to defrag */
-		defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
-						 first_ino, &n);
+		defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
+						 first_ino);
 		if (!defrag) {
-			if (n) {
-				defrag = rb_entry(n, struct inode_defrag,
-						  rb_node);
-			} else if (root_objectid || first_ino) {
+			if (root_objectid || first_ino) {
 				root_objectid = 0;
 				first_ino = 0;
 				continue;
@@ -255,70 +376,11 @@
 			}
 		}
 
-		/* remove it from the rbtree */
 		first_ino = defrag->ino + 1;
 		root_objectid = defrag->root;
-		rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
 
-		if (btrfs_fs_closing(fs_info))
-			goto next_free;
-
-		spin_unlock(&fs_info->defrag_inodes_lock);
-
-		/* get the inode */
-		key.objectid = defrag->root;
-		btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
-		key.offset = (u64)-1;
-		inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
-		if (IS_ERR(inode_root))
-			goto next;
-
-		key.objectid = defrag->ino;
-		btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
-		key.offset = 0;
-
-		inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
-		if (IS_ERR(inode))
-			goto next;
-
-		/* do a chunk of defrag */
-		clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
-		range.start = defrag->last_offset;
-		num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
-					       defrag_batch);
-		/*
-		 * if we filled the whole defrag batch, there
-		 * must be more work to do.  Queue this defrag
-		 * again
-		 */
-		if (num_defrag == defrag_batch) {
-			defrag->last_offset = range.start;
-			__btrfs_add_inode_defrag(inode, defrag);
-			/*
-			 * we don't want to kfree defrag, we added it back to
-			 * the rbtree
-			 */
-			defrag = NULL;
-		} else if (defrag->last_offset && !defrag->cycled) {
-			/*
-			 * we didn't fill our defrag batch, but
-			 * we didn't start at zero.  Make sure we loop
-			 * around to the start of the file.
-			 */
-			defrag->last_offset = 0;
-			defrag->cycled = 1;
-			__btrfs_add_inode_defrag(inode, defrag);
-			defrag = NULL;
-		}
-
-		iput(inode);
-next:
-		spin_lock(&fs_info->defrag_inodes_lock);
-next_free:
-		kfree(defrag);
+		__btrfs_run_defrag_inode(fs_info, defrag);
 	}
-	spin_unlock(&fs_info->defrag_inodes_lock);
-
 	atomic_dec(&fs_info->defrag_running);
 
 	/*
@@ -526,6 +588,8 @@
 				split->block_len = em->block_len;
 			else
 				split->block_len = split->len;
+			split->orig_block_len = max(split->block_len,
+						    em->orig_block_len);
 			split->generation = gen;
 			split->bdev = em->bdev;
 			split->flags = flags;
@@ -547,6 +611,8 @@
 			split->flags = flags;
 			split->compress_type = em->compress_type;
 			split->generation = gen;
+			split->orig_block_len = max(em->block_len,
+						    em->orig_block_len);
 
 			if (compressed) {
 				split->block_len = em->block_len;
@@ -555,7 +621,7 @@
 			} else {
 				split->block_len = split->len;
 				split->block_start = em->block_start + diff;
-				split->orig_start = split->start;
+				split->orig_start = em->orig_start;
 			}
 
 			ret = add_extent_mapping(em_tree, split);
@@ -1348,7 +1414,7 @@
 
 		balance_dirty_pages_ratelimited(inode->i_mapping);
 		if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
-			btrfs_btree_balance_dirty(root, 1);
+			btrfs_btree_balance_dirty(root);
 
 		pos += copied;
 		num_written += copied;
@@ -1397,6 +1463,24 @@
 	return written ? written : err;
 }
 
+static void update_time_for_write(struct inode *inode)
+{
+	struct timespec now;
+
+	if (IS_NOCMTIME(inode))
+		return;
+
+	now = current_fs_time(inode->i_sb);
+	if (!timespec_equal(&inode->i_mtime, &now))
+		inode->i_mtime = now;
+
+	if (!timespec_equal(&inode->i_ctime, &now))
+		inode->i_ctime = now;
+
+	if (IS_I_VERSION(inode))
+		inode_inc_iversion(inode);
+}
+
 static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 				    const struct iovec *iov,
 				    unsigned long nr_segs, loff_t pos)
@@ -1409,6 +1493,7 @@
 	ssize_t num_written = 0;
 	ssize_t err = 0;
 	size_t count, ocount;
+	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 
 	sb_start_write(inode->i_sb);
 
@@ -1451,11 +1536,13 @@
 		goto out;
 	}
 
-	err = file_update_time(file);
-	if (err) {
-		mutex_unlock(&inode->i_mutex);
-		goto out;
-	}
+	/*
+	 * We reserve space for updating the inode when we reserve space for the
+	 * extent we are going to write, so we will enospc out there.  We don't
+	 * need to start yet another transaction to update the inode as we will
+	 * update the inode when we finish writing whatever data we write.
+	 */
+	update_time_for_write(inode);
 
 	start_pos = round_down(pos, root->sectorsize);
 	if (start_pos > i_size_read(inode)) {
@@ -1466,6 +1553,9 @@
 		}
 	}
 
+	if (sync)
+		atomic_inc(&BTRFS_I(inode)->sync_writers);
+
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		num_written = __btrfs_direct_write(iocb, iov, nr_segs,
 						   pos, ppos, count, ocount);
@@ -1492,14 +1582,21 @@
 	 * this will either be one more than the running transaction
 	 * or the generation used for the next transaction if there isn't
 	 * one running right now.
+	 *
+	 * We also have to set last_sub_trans to the current log transid,
+	 * otherwise subsequent syncs to a file that's been synced in this
+	 * transaction will appear to have already occured.
 	 */
 	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
+	BTRFS_I(inode)->last_sub_trans = root->log_transid;
 	if (num_written > 0 || num_written == -EIOCBQUEUED) {
 		err = generic_write_sync(file, pos, num_written);
 		if (err < 0 && num_written > 0)
 			num_written = err;
 	}
 out:
+	if (sync)
+		atomic_dec(&BTRFS_I(inode)->sync_writers);
 	sb_end_write(inode->i_sb);
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
@@ -1550,7 +1647,9 @@
 	 * out of the ->i_mutex. If so, we can flush the dirty pages by
 	 * multi-task, and make the performance up.
 	 */
+	atomic_inc(&BTRFS_I(inode)->sync_writers);
 	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	atomic_dec(&BTRFS_I(inode)->sync_writers);
 	if (ret)
 		return ret;
 
@@ -1561,7 +1660,7 @@
 	 * range being left.
 	 */
 	atomic_inc(&root->log_batch);
-	btrfs_wait_ordered_range(inode, start, end);
+	btrfs_wait_ordered_range(inode, start, end - start + 1);
 	atomic_inc(&root->log_batch);
 
 	/*
@@ -1767,6 +1866,7 @@
 
 		hole_em->block_start = EXTENT_MAP_HOLE;
 		hole_em->block_len = 0;
+		hole_em->orig_block_len = 0;
 		hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
 		hole_em->compress_type = BTRFS_COMPRESS_NONE;
 		hole_em->generation = trans->transid;
@@ -1796,48 +1896,51 @@
 	struct btrfs_path *path;
 	struct btrfs_block_rsv *rsv;
 	struct btrfs_trans_handle *trans;
-	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
-	u64 lockstart = (offset + mask) & ~mask;
-	u64 lockend = ((offset + len) & ~mask) - 1;
+	u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
+	u64 lockend = round_down(offset + len,
+				 BTRFS_I(inode)->root->sectorsize) - 1;
 	u64 cur_offset = lockstart;
 	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 	u64 drop_end;
-	unsigned long nr;
 	int ret = 0;
 	int err = 0;
-	bool same_page = (offset >> PAGE_CACHE_SHIFT) ==
-		((offset + len) >> PAGE_CACHE_SHIFT);
+	bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
+			  ((offset + len - 1) >> PAGE_CACHE_SHIFT));
 
 	btrfs_wait_ordered_range(inode, offset, len);
 
 	mutex_lock(&inode->i_mutex);
-	if (offset >= inode->i_size) {
-		mutex_unlock(&inode->i_mutex);
-		return 0;
-	}
-
+	/*
+	 * We needn't truncate any page which is beyond the end of the file
+	 * because we are sure there is no data there.
+	 */
 	/*
 	 * Only do this if we are in the same page and we aren't doing the
 	 * entire page.
 	 */
 	if (same_page && len < PAGE_CACHE_SIZE) {
-		ret = btrfs_truncate_page(inode, offset, len, 0);
+		if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE))
+			ret = btrfs_truncate_page(inode, offset, len, 0);
 		mutex_unlock(&inode->i_mutex);
 		return ret;
 	}
 
 	/* zero back part of the first page */
-	ret = btrfs_truncate_page(inode, offset, 0, 0);
-	if (ret) {
-		mutex_unlock(&inode->i_mutex);
-		return ret;
+	if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
+		ret = btrfs_truncate_page(inode, offset, 0, 0);
+		if (ret) {
+			mutex_unlock(&inode->i_mutex);
+			return ret;
+		}
 	}
 
 	/* zero the front end of the last page */
-	ret = btrfs_truncate_page(inode, offset + len, 0, 1);
-	if (ret) {
-		mutex_unlock(&inode->i_mutex);
-		return ret;
+	if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
+		ret = btrfs_truncate_page(inode, offset + len, 0, 1);
+		if (ret) {
+			mutex_unlock(&inode->i_mutex);
+			return ret;
+		}
 	}
 
 	if (lockend < lockstart) {
@@ -1930,9 +2033,8 @@
 			break;
 		}
 
-		nr = trans->blocks_used;
 		btrfs_end_transaction(trans, root);
-		btrfs_btree_balance_dirty(root, nr);
+		btrfs_btree_balance_dirty(root);
 
 		trans = btrfs_start_transaction(root, 3);
 		if (IS_ERR(trans)) {
@@ -1963,11 +2065,13 @@
 	if (!trans)
 		goto out_free;
 
+	inode_inc_iversion(inode);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
 	trans->block_rsv = &root->fs_info->trans_block_rsv;
 	ret = btrfs_update_inode(trans, root, inode);
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 out_free:
 	btrfs_free_path(path);
 	btrfs_free_block_rsv(root, rsv);
@@ -1991,12 +2095,12 @@
 	u64 alloc_end;
 	u64 alloc_hint = 0;
 	u64 locked_end;
-	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
 	struct extent_map *em;
+	int blocksize = BTRFS_I(inode)->root->sectorsize;
 	int ret;
 
-	alloc_start = offset & ~mask;
-	alloc_end =  (offset + len + mask) & ~mask;
+	alloc_start = round_down(offset, blocksize);
+	alloc_end = round_up(offset + len, blocksize);
 
 	/* Make sure we aren't being give some crap mode */
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2009,7 +2113,7 @@
 	 * Make sure we have enough space before we do the
 	 * allocation.
 	 */
-	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1);
+	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
 	if (ret)
 		return ret;
 
@@ -2077,7 +2181,7 @@
 		}
 		last_byte = min(extent_map_end(em), alloc_end);
 		actual_end = min_t(u64, extent_map_end(em), offset + len);
-		last_byte = (last_byte + mask) & ~mask;
+		last_byte = ALIGN(last_byte, blocksize);
 
 		if (em->block_start == EXTENT_MAP_HOLE ||
 		    (cur_offset >= inode->i_size &&
@@ -2116,11 +2220,11 @@
 out:
 	mutex_unlock(&inode->i_mutex);
 	/* Let go of our reservation. */
-	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1);
+	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
 	return ret;
 }
 
-static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
+static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_map *em;
@@ -2154,7 +2258,7 @@
 	 * before the position we want in case there is outstanding delalloc
 	 * going on here.
 	 */
-	if (origin == SEEK_HOLE && start != 0) {
+	if (whence == SEEK_HOLE && start != 0) {
 		if (start <= root->sectorsize)
 			em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
 						     root->sectorsize, 0);
@@ -2188,13 +2292,13 @@
 				}
 			}
 
-			if (origin == SEEK_HOLE) {
+			if (whence == SEEK_HOLE) {
 				*offset = start;
 				free_extent_map(em);
 				break;
 			}
 		} else {
-			if (origin == SEEK_DATA) {
+			if (whence == SEEK_DATA) {
 				if (em->block_start == EXTENT_MAP_DELALLOC) {
 					if (start >= inode->i_size) {
 						free_extent_map(em);
@@ -2231,16 +2335,16 @@
 	return ret;
 }
 
-static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 	case SEEK_CUR:
-		offset = generic_file_llseek(file, offset, origin);
+		offset = generic_file_llseek(file, offset, whence);
 		goto out;
 	case SEEK_DATA:
 	case SEEK_HOLE:
@@ -2249,7 +2353,7 @@
 			return -ENXIO;
 		}
 
-		ret = find_desired_extent(inode, &offset, origin);
+		ret = find_desired_extent(inode, &offset, whence);
 		if (ret) {
 			mutex_unlock(&inode->i_mutex);
 			return ret;
@@ -2292,3 +2396,21 @@
 	.compat_ioctl	= btrfs_ioctl,
 #endif
 };
+
+void btrfs_auto_defrag_exit(void)
+{
+	if (btrfs_inode_defrag_cachep)
+		kmem_cache_destroy(btrfs_inode_defrag_cachep);
+}
+
+int btrfs_auto_defrag_init(void)
+{
+	btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
+					sizeof(struct inode_defrag), 0,
+					SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+					NULL);
+	if (!btrfs_inode_defrag_cachep)
+		return -ENOMEM;
+
+	return 0;
+}

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1027b85..59ea2e4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c

@@ -307,7 +307,6 @@
 
 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
 {
-	WARN_ON(io_ctl->cur);
 	BUG_ON(io_ctl->index >= io_ctl->num_pages);
 	io_ctl->page = io_ctl->pages[io_ctl->index++];
 	io_ctl->cur = kmap(io_ctl->page);
@@ -1250,18 +1249,13 @@
 			 * if previous extent entry covers the offset,
 			 * we should return it instead of the bitmap entry
 			 */
-			n = &entry->offset_index;
-			while (1) {
-				n = rb_prev(n);
-				if (!n)
-					break;
+			n = rb_prev(&entry->offset_index);
+			if (n) {
 				prev = rb_entry(n, struct btrfs_free_space,
 						offset_index);
-				if (!prev->bitmap) {
-					if (prev->offset + prev->bytes > offset)
-						entry = prev;
-					break;
-				}
+				if (!prev->bitmap &&
+				    prev->offset + prev->bytes > offset)
+					entry = prev;
 			}
 		}
 		return entry;
@@ -1287,18 +1281,13 @@
 	}
 
 	if (entry->bitmap) {
-		n = &entry->offset_index;
-		while (1) {
-			n = rb_prev(n);
-			if (!n)
-				break;
+		n = rb_prev(&entry->offset_index);
+		if (n) {
 			prev = rb_entry(n, struct btrfs_free_space,
 					offset_index);
-			if (!prev->bitmap) {
-				if (prev->offset + prev->bytes > offset)
-					return prev;
-				break;
-			}
+			if (!prev->bitmap &&
+			    prev->offset + prev->bytes > offset)
+				return prev;
 		}
 		if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
 			return entry;
@@ -1364,7 +1353,7 @@
 	u64 bitmap_bytes;
 	u64 extent_bytes;
 	u64 size = block_group->key.offset;
-	u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
+	u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
 	int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
 
 	BUG_ON(ctl->total_bitmaps > max_bitmaps);
@@ -1650,8 +1639,7 @@
 	 * some block groups are so tiny they can't be enveloped by a bitmap, so
 	 * don't even bother to create a bitmap for this
 	 */
-	if (BITS_PER_BITMAP * block_group->sectorsize >
-	    block_group->key.offset)
+	if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset)
 		return false;
 
 	return true;
@@ -2298,10 +2286,10 @@
 	unsigned long total_found = 0;
 	int ret;
 
-	i = offset_to_bit(entry->offset, block_group->sectorsize,
+	i = offset_to_bit(entry->offset, ctl->unit,
 			  max_t(u64, offset, entry->offset));
-	want_bits = bytes_to_bits(bytes, block_group->sectorsize);
-	min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
+	want_bits = bytes_to_bits(bytes, ctl->unit);
+	min_bits = bytes_to_bits(min_bytes, ctl->unit);
 
 again:
 	found_bits = 0;
@@ -2325,23 +2313,22 @@
 
 	total_found += found_bits;
 
-	if (cluster->max_size < found_bits * block_group->sectorsize)
-		cluster->max_size = found_bits * block_group->sectorsize;
+	if (cluster->max_size < found_bits * ctl->unit)
+		cluster->max_size = found_bits * ctl->unit;
 
 	if (total_found < want_bits || cluster->max_size < cont1_bytes) {
 		i = next_zero + 1;
 		goto again;
 	}
 
-	cluster->window_start = start * block_group->sectorsize +
-		entry->offset;
+	cluster->window_start = start * ctl->unit + entry->offset;
 	rb_erase(&entry->offset_index, &ctl->free_space_offset);
 	ret = tree_insert_offset(&cluster->root, entry->offset,
 				 &entry->offset_index, 1);
 	BUG_ON(ret); /* -EEXIST; Logic error */
 
 	trace_btrfs_setup_cluster(block_group, cluster,
-				  total_found * block_group->sectorsize, 1);
+				  total_found * ctl->unit, 1);
 	return 0;
 }
 

diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index b1a1c92..d26f67a 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c

@@ -434,8 +434,9 @@
 	 * 3 items for pre-allocation
 	 */
 	trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
-	ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv,
-					  trans->bytes_reserved);
+	ret = btrfs_block_rsv_add(root, trans->block_rsv,
+				  trans->bytes_reserved,
+				  BTRFS_RESERVE_NO_FLUSH);
 	if (ret)
 		goto out;
 	trace_btrfs_space_reservation(root->fs_info, "ino_cache",

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95542a1..16d9e8e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c

@@ -71,6 +71,7 @@
 static struct extent_io_ops btrfs_extent_io_ops;
 
 static struct kmem_cache *btrfs_inode_cachep;
+static struct kmem_cache *btrfs_delalloc_work_cachep;
 struct kmem_cache *btrfs_trans_handle_cachep;
 struct kmem_cache *btrfs_transaction_cachep;
 struct kmem_cache *btrfs_path_cachep;
@@ -94,6 +95,10 @@
 				   struct page *locked_page,
 				   u64 start, u64 end, int *page_started,
 				   unsigned long *nr_written, int unlock);
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+					   u64 len, u64 orig_start,
+					   u64 block_start, u64 block_len,
+					   u64 orig_block_len, int type);
 
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
 				     struct inode *inode,  struct inode *dir,
@@ -698,14 +703,19 @@
 
 		em->block_start = ins.objectid;
 		em->block_len = ins.offset;
+		em->orig_block_len = ins.offset;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
 		em->compress_type = async_extent->compress_type;
 		set_bit(EXTENT_FLAG_PINNED, &em->flags);
 		set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+		em->generation = -1;
 
 		while (1) {
 			write_lock(&em_tree->lock);
 			ret = add_extent_mapping(em_tree, em);
+			if (!ret)
+				list_move(&em->list,
+					  &em_tree->modified_extents);
 			write_unlock(&em_tree->lock);
 			if (ret != -EEXIST) {
 				free_extent_map(em);
@@ -803,14 +813,14 @@
  * required to start IO on it.  It may be clean and already done with
  * IO when we return.
  */
-static noinline int cow_file_range(struct inode *inode,
-				   struct page *locked_page,
-				   u64 start, u64 end, int *page_started,
-				   unsigned long *nr_written,
-				   int unlock)
+static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
+				     struct inode *inode,
+				     struct btrfs_root *root,
+				     struct page *locked_page,
+				     u64 start, u64 end, int *page_started,
+				     unsigned long *nr_written,
+				     int unlock)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct btrfs_trans_handle *trans;
 	u64 alloc_hint = 0;
 	u64 num_bytes;
 	unsigned long ram_size;
@@ -823,25 +833,10 @@
 	int ret = 0;
 
 	BUG_ON(btrfs_is_free_space_inode(inode));
-	trans = btrfs_join_transaction(root);
-	if (IS_ERR(trans)) {
-		extent_clear_unlock_delalloc(inode,
-			     &BTRFS_I(inode)->io_tree,
-			     start, end, locked_page,
-			     EXTENT_CLEAR_UNLOCK_PAGE |
-			     EXTENT_CLEAR_UNLOCK |
-			     EXTENT_CLEAR_DELALLOC |
-			     EXTENT_CLEAR_DIRTY |
-			     EXTENT_SET_WRITEBACK |
-			     EXTENT_END_WRITEBACK);
-		return PTR_ERR(trans);
-	}
-	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
 	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
 	num_bytes = max(blocksize,  num_bytes);
 	disk_num_bytes = num_bytes;
-	ret = 0;
 
 	/* if this is a small write inside eof, kick off defrag */
 	if (num_bytes < 64 * 1024 &&
@@ -900,12 +895,17 @@
 
 		em->block_start = ins.objectid;
 		em->block_len = ins.offset;
+		em->orig_block_len = ins.offset;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
 		set_bit(EXTENT_FLAG_PINNED, &em->flags);
+		em->generation = -1;
 
 		while (1) {
 			write_lock(&em_tree->lock);
 			ret = add_extent_mapping(em_tree, em);
+			if (!ret)
+				list_move(&em->list,
+					  &em_tree->modified_extents);
 			write_unlock(&em_tree->lock);
 			if (ret != -EEXIST) {
 				free_extent_map(em);
@@ -952,11 +952,9 @@
 		alloc_hint = ins.objectid + ins.offset;
 		start += cur_alloc_size;
 	}
-	ret = 0;
 out:
-	btrfs_end_transaction(trans, root);
-
 	return ret;
+
 out_unlock:
 	extent_clear_unlock_delalloc(inode,
 		     &BTRFS_I(inode)->io_tree,
@@ -971,6 +969,39 @@
 	goto out;
 }
 
+static noinline int cow_file_range(struct inode *inode,
+				   struct page *locked_page,
+				   u64 start, u64 end, int *page_started,
+				   unsigned long *nr_written,
+				   int unlock)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret;
+
+	trans = btrfs_join_transaction(root);
+	if (IS_ERR(trans)) {
+		extent_clear_unlock_delalloc(inode,
+			     &BTRFS_I(inode)->io_tree,
+			     start, end, locked_page,
+			     EXTENT_CLEAR_UNLOCK_PAGE |
+			     EXTENT_CLEAR_UNLOCK |
+			     EXTENT_CLEAR_DELALLOC |
+			     EXTENT_CLEAR_DIRTY |
+			     EXTENT_SET_WRITEBACK |
+			     EXTENT_END_WRITEBACK);
+		return PTR_ERR(trans);
+	}
+	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
+	ret = __cow_file_range(trans, inode, root, locked_page, start, end,
+			       page_started, nr_written, unlock);
+
+	btrfs_end_transaction(trans, root);
+
+	return ret;
+}
+
 /*
  * work queue call back to started compression on a file and pages
  */
@@ -1126,6 +1157,7 @@
 	u64 extent_offset;
 	u64 disk_bytenr;
 	u64 num_bytes;
+	u64 disk_num_bytes;
 	int extent_type;
 	int ret, err;
 	int type;
@@ -1228,6 +1260,8 @@
 			extent_offset = btrfs_file_extent_offset(leaf, fi);
 			extent_end = found_key.offset +
 				btrfs_file_extent_num_bytes(leaf, fi);
+			disk_num_bytes =
+				btrfs_file_extent_disk_num_bytes(leaf, fi);
 			if (extent_end <= start) {
 				path->slots[0]++;
 				goto next_slot;
@@ -1281,9 +1315,9 @@
 
 		btrfs_release_path(path);
 		if (cow_start != (u64)-1) {
-			ret = cow_file_range(inode, locked_page, cow_start,
-					found_key.offset - 1, page_started,
-					nr_written, 1);
+			ret = __cow_file_range(trans, inode, root, locked_page,
+					       cow_start, found_key.offset - 1,
+					       page_started, nr_written, 1);
 			if (ret) {
 				btrfs_abort_transaction(trans, root, ret);
 				goto error;
@@ -1298,16 +1332,21 @@
 			em = alloc_extent_map();
 			BUG_ON(!em); /* -ENOMEM */
 			em->start = cur_offset;
-			em->orig_start = em->start;
+			em->orig_start = found_key.offset - extent_offset;
 			em->len = num_bytes;
 			em->block_len = num_bytes;
 			em->block_start = disk_bytenr;
+			em->orig_block_len = disk_num_bytes;
 			em->bdev = root->fs_info->fs_devices->latest_bdev;
 			set_bit(EXTENT_FLAG_PINNED, &em->flags);
-			set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+			set_bit(EXTENT_FLAG_FILLING, &em->flags);
+			em->generation = -1;
 			while (1) {
 				write_lock(&em_tree->lock);
 				ret = add_extent_mapping(em_tree, em);
+				if (!ret)
+					list_move(&em->list,
+						  &em_tree->modified_extents);
 				write_unlock(&em_tree->lock);
 				if (ret != -EEXIST) {
 					free_extent_map(em);
@@ -1352,8 +1391,9 @@
 	}
 
 	if (cow_start != (u64)-1) {
-		ret = cow_file_range(inode, locked_page, cow_start, end,
-				     page_started, nr_written, 1);
+		ret = __cow_file_range(trans, inode, root, locked_page,
+				       cow_start, end,
+				       page_started, nr_written, 1);
 		if (ret) {
 			btrfs_abort_transaction(trans, root, ret);
 			goto error;
@@ -1531,7 +1571,6 @@
 			 unsigned long bio_flags)
 {
 	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-	struct btrfs_mapping_tree *map_tree;
 	u64 logical = (u64)bio->bi_sector << 9;
 	u64 length = 0;
 	u64 map_length;
@@ -1541,11 +1580,10 @@
 		return 0;
 
 	length = bio->bi_size;
-	map_tree = &root->fs_info->mapping_tree;
 	map_length = length;
-	ret = btrfs_map_block(map_tree, READ, logical,
+	ret = btrfs_map_block(root->fs_info, READ, logical,
 			      &map_length, NULL, 0);
-	/* Will always return 0 or 1 with map_multi == NULL */
+	/* Will always return 0 with map_multi == NULL */
 	BUG_ON(ret < 0);
 	if (map_length < length + size)
 		return 1;
@@ -1586,7 +1624,12 @@
 			  u64 bio_offset)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	return btrfs_map_bio(root, rw, bio, mirror_num, 1);
+	int ret;
+
+	ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+	if (ret)
+		bio_endio(bio, ret);
+	return ret;
 }
 
 /*
@@ -1601,6 +1644,7 @@
 	int ret = 0;
 	int skip_sum;
 	int metadata = 0;
+	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -1610,31 +1654,43 @@
 	if (!(rw & REQ_WRITE)) {
 		ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
 		if (ret)
-			return ret;
+			goto out;
 
 		if (bio_flags & EXTENT_BIO_COMPRESSED) {
-			return btrfs_submit_compressed_read(inode, bio,
-						    mirror_num, bio_flags);
+			ret = btrfs_submit_compressed_read(inode, bio,
+							   mirror_num,
+							   bio_flags);
+			goto out;
 		} else if (!skip_sum) {
 			ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
 			if (ret)
-				return ret;
+				goto out;
 		}
 		goto mapit;
-	} else if (!skip_sum) {
+	} else if (async && !skip_sum) {
 		/* csum items have already been cloned */
 		if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
 			goto mapit;
 		/* we're doing a write, do the async checksumming */
-		return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
 				   inode, rw, bio, mirror_num,
 				   bio_flags, bio_offset,
 				   __btrfs_submit_bio_start,
 				   __btrfs_submit_bio_done);
+		goto out;
+	} else if (!skip_sum) {
+		ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
+		if (ret)
+			goto out;
 	}
 
 mapit:
-	return btrfs_map_bio(root, rw, bio, mirror_num, 0);
+	ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+
+out:
+	if (ret < 0)
+		bio_endio(bio, ret);
+	return ret;
 }
 
 /*
@@ -1657,8 +1713,7 @@
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 			      struct extent_state **cached_state)
 {
-	if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
-		WARN_ON(1);
+	WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
 	return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
 				   cached_state, GFP_NOFS);
 }
@@ -1867,22 +1922,20 @@
 
 	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
 		BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
-		ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-		if (!ret) {
-			if (nolock)
-				trans = btrfs_join_transaction_nolock(root);
-			else
-				trans = btrfs_join_transaction(root);
-			if (IS_ERR(trans)) {
-				ret = PTR_ERR(trans);
-				trans = NULL;
-				goto out;
-			}
-			trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-			ret = btrfs_update_inode_fallback(trans, root, inode);
-			if (ret) /* -ENOMEM or corruption */
-				btrfs_abort_transaction(trans, root, ret);
+		btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+		if (nolock)
+			trans = btrfs_join_transaction_nolock(root);
+		else
+			trans = btrfs_join_transaction(root);
+		if (IS_ERR(trans)) {
+			ret = PTR_ERR(trans);
+			trans = NULL;
+			goto out;
 		}
+		trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+		ret = btrfs_update_inode_fallback(trans, root, inode);
+		if (ret) /* -ENOMEM or corruption */
+			btrfs_abort_transaction(trans, root, ret);
 		goto out;
 	}
 
@@ -1931,15 +1984,11 @@
 	add_pending_csums(trans, inode, ordered_extent->file_offset,
 			  &ordered_extent->list);
 
-	ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-	if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-		ret = btrfs_update_inode_fallback(trans, root, inode);
-		if (ret) { /* -ENOMEM or corruption */
-			btrfs_abort_transaction(trans, root, ret);
-			goto out_unlock;
-		}
-	} else {
-		btrfs_set_inode_last_trans(trans, inode);
+	btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+	ret = btrfs_update_inode_fallback(trans, root, inode);
+	if (ret) { /* -ENOMEM or corruption */
+		btrfs_abort_transaction(trans, root, ret);
+		goto out_unlock;
 	}
 	ret = 0;
 out_unlock:
@@ -3074,7 +3123,6 @@
 	struct btrfs_trans_handle *trans;
 	struct inode *inode = dentry->d_inode;
 	int ret;
-	unsigned long nr = 0;
 
 	trans = __unlink_start_trans(dir, dentry);
 	if (IS_ERR(trans))
@@ -3094,9 +3142,8 @@
 	}
 
 out:
-	nr = trans->blocks_used;
 	__unlink_end_trans(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 	return ret;
 }
 
@@ -3186,7 +3233,6 @@
 	int err = 0;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_trans_handle *trans;
-	unsigned long nr = 0;
 
 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
 		return -ENOTEMPTY;
@@ -3215,9 +3261,8 @@
 	if (!err)
 		btrfs_i_size_write(inode, 0);
 out:
-	nr = trans->blocks_used;
 	__unlink_end_trans(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 
 	return err;
 }
@@ -3497,11 +3542,11 @@
 	if (ret)
 		goto out;
 
-	ret = -ENOMEM;
 again:
 	page = find_or_create_page(mapping, index, mask);
 	if (!page) {
 		btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -3550,7 +3595,6 @@
 		goto out_unlock;
 	}
 
-	ret = 0;
 	if (offset != PAGE_CACHE_SIZE) {
 		if (!len)
 			len = PAGE_CACHE_SIZE - offset;
@@ -3668,6 +3712,7 @@
 
 			hole_em->block_start = EXTENT_MAP_HOLE;
 			hole_em->block_len = 0;
+			hole_em->orig_block_len = 0;
 			hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
 			hole_em->compress_type = BTRFS_COMPRESS_NONE;
 			hole_em->generation = trans->transid;
@@ -3783,7 +3828,6 @@
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_block_rsv *rsv, *global_rsv;
 	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
-	unsigned long nr;
 	int ret;
 
 	trace_btrfs_inode_evict(inode);
@@ -3829,7 +3873,8 @@
 	 * inode item when doing the truncate.
 	 */
 	while (1) {
-		ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
+		ret = btrfs_block_rsv_refill(root, rsv, min_size,
+					     BTRFS_RESERVE_FLUSH_LIMIT);
 
 		/*
 		 * Try and steal from the global reserve since we will
@@ -3847,7 +3892,7 @@
 			goto no_delete;
 		}
 
-		trans = btrfs_start_transaction_noflush(root, 1);
+		trans = btrfs_start_transaction_lflush(root, 1);
 		if (IS_ERR(trans)) {
 			btrfs_orphan_del(NULL, inode);
 			btrfs_free_block_rsv(root, rsv);
@@ -3864,10 +3909,9 @@
 		ret = btrfs_update_inode(trans, root, inode);
 		BUG_ON(ret);
 
-		nr = trans->blocks_used;
 		btrfs_end_transaction(trans, root);
 		trans = NULL;
-		btrfs_btree_balance_dirty(root, nr);
+		btrfs_btree_balance_dirty(root);
 	}
 
 	btrfs_free_block_rsv(root, rsv);
@@ -3883,9 +3927,8 @@
 	      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
 		btrfs_return_ino(root, btrfs_ino(inode));
 
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 no_delete:
 	clear_inode(inode);
 	return;
@@ -4219,16 +4262,7 @@
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	if (unlikely(d_need_lookup(dentry))) {
-		memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
-		kfree(dentry->d_fsdata);
-		dentry->d_fsdata = NULL;
-		/* This thing is hashed, drop it for now */
-		d_drop(dentry);
-	} else {
-		ret = btrfs_inode_by_name(dir, dentry, &location);
-	}
-
+	ret = btrfs_inode_by_name(dir, dentry, &location);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
@@ -4298,11 +4332,6 @@
 	struct dentry *ret;
 
 	ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
-	if (unlikely(d_need_lookup(dentry))) {
-		spin_lock(&dentry->d_lock);
-		dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
-		spin_unlock(&dentry->d_lock);
-	}
 	return ret;
 }
 
@@ -4775,8 +4804,7 @@
 	if (S_ISREG(mode)) {
 		if (btrfs_test_opt(root, NODATASUM))
 			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-		if (btrfs_test_opt(root, NODATACOW) ||
-		    (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
+		if (btrfs_test_opt(root, NODATACOW))
 			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
 	}
 
@@ -4842,7 +4870,7 @@
 	ret = btrfs_insert_dir_item(trans, root, name, name_len,
 				    parent_inode, &key,
 				    btrfs_inode_type(inode), index);
-	if (ret == -EEXIST)
+	if (ret == -EEXIST || ret == -EOVERFLOW)
 		goto fail_dir_item;
 	else if (ret) {
 		btrfs_abort_transaction(trans, root, ret);
@@ -4897,7 +4925,6 @@
 	int err;
 	int drop_inode = 0;
 	u64 objectid;
-	unsigned long nr = 0;
 	u64 index = 0;
 
 	if (!new_valid_dev(rdev))
@@ -4930,6 +4957,12 @@
 		goto out_unlock;
 	}
 
+	err = btrfs_update_inode(trans, root, inode);
+	if (err) {
+		drop_inode = 1;
+		goto out_unlock;
+	}
+
 	/*
 	* If the active LSM wants to access the inode during
 	* d_instantiate it needs these. Smack checks to see
@@ -4947,9 +4980,8 @@
 		d_instantiate(dentry, inode);
 	}
 out_unlock:
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
@@ -4963,9 +4995,8 @@
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct inode *inode = NULL;
-	int drop_inode = 0;
+	int drop_inode_on_err = 0;
 	int err;
-	unsigned long nr = 0;
 	u64 objectid;
 	u64 index = 0;
 
@@ -4989,12 +5020,15 @@
 		err = PTR_ERR(inode);
 		goto out_unlock;
 	}
+	drop_inode_on_err = 1;
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-	if (err) {
-		drop_inode = 1;
+	if (err)
 		goto out_unlock;
-	}
+
+	err = btrfs_update_inode(trans, root, inode);
+	if (err)
+		goto out_unlock;
 
 	/*
 	* If the active LSM wants to access the inode during
@@ -5007,21 +5041,20 @@
 
 	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
-		drop_inode = 1;
-	else {
-		inode->i_mapping->a_ops = &btrfs_aops;
-		inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
-		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
-		d_instantiate(dentry, inode);
-	}
+		goto out_unlock;
+
+	inode->i_mapping->a_ops = &btrfs_aops;
+	inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+	BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+	d_instantiate(dentry, inode);
+
 out_unlock:
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
-	if (drop_inode) {
+	if (err && drop_inode_on_err) {
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 	return err;
 }
 
@@ -5032,7 +5065,6 @@
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct inode *inode = old_dentry->d_inode;
 	u64 index;
-	unsigned long nr = 0;
 	int err;
 	int drop_inode = 0;
 
@@ -5062,6 +5094,7 @@
 	inode_inc_iversion(inode);
 	inode->i_ctime = CURRENT_TIME;
 	ihold(inode);
+	set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
 	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
 
@@ -5076,14 +5109,13 @@
 		btrfs_log_new_name(trans, inode, NULL, parent);
 	}
 
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
 fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 	return err;
 }
 
@@ -5096,7 +5128,6 @@
 	int drop_on_err = 0;
 	u64 objectid = 0;
 	u64 index = 0;
-	unsigned long nr = 1;
 
 	/*
 	 * 2 items for inode and ref
@@ -5142,11 +5173,10 @@
 	drop_on_err = 0;
 
 out_fail:
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
 	if (drop_on_err)
 		iput(inode);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 	return err;
 }
 
@@ -5340,6 +5370,7 @@
 		if (start + len <= found_key.offset)
 			goto not_found;
 		em->start = start;
+		em->orig_start = start;
 		em->len = found_key.offset - start;
 		goto not_found_em;
 	}
@@ -5350,6 +5381,8 @@
 		em->len = extent_end - extent_start;
 		em->orig_start = extent_start -
 				 btrfs_file_extent_offset(leaf, item);
+		em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
+								      item);
 		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
 		if (bytenr == 0) {
 			em->block_start = EXTENT_MAP_HOLE;
@@ -5359,8 +5392,7 @@
 			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 			em->compress_type = compress_type;
 			em->block_start = bytenr;
-			em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
-									 item);
+			em->block_len = em->orig_block_len;
 		} else {
 			bytenr += btrfs_file_extent_offset(leaf, item);
 			em->block_start = bytenr;
@@ -5390,7 +5422,8 @@
 		em->start = extent_start + extent_offset;
 		em->len = (copy_size + root->sectorsize - 1) &
 			~((u64)root->sectorsize - 1);
-		em->orig_start = EXTENT_MAP_INLINE;
+		em->orig_block_len = em->len;
+		em->orig_start = em->start;
 		if (compress_type) {
 			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 			em->compress_type = compress_type;
@@ -5439,11 +5472,11 @@
 				    extent_map_end(em) - 1, NULL, GFP_NOFS);
 		goto insert;
 	} else {
-		printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
-		WARN_ON(1);
+		WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type);
 	}
 not_found:
 	em->start = start;
+	em->orig_start = start;
 	em->len = len;
 not_found_em:
 	em->block_start = EXTENT_MAP_HOLE;
@@ -5645,38 +5678,19 @@
 }
 
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
-						  struct extent_map *em,
 						  u64 start, u64 len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map *em;
 	struct btrfs_key ins;
 	u64 alloc_hint;
 	int ret;
-	bool insert = false;
-
-	/*
-	 * Ok if the extent map we looked up is a hole and is for the exact
-	 * range we want, there is no reason to allocate a new one, however if
-	 * it is not right then we need to free this one and drop the cache for
-	 * our range.
-	 */
-	if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
-	    em->len != len) {
-		free_extent_map(em);
-		em = NULL;
-		insert = true;
-		btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
-	}
 
 	trans = btrfs_join_transaction(root);
 	if (IS_ERR(trans))
 		return ERR_CAST(trans);
 
-	if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
-		btrfs_add_inode_defrag(trans, inode);
-
 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
 	alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5687,37 +5701,10 @@
 		goto out;
 	}
 
-	if (!em) {
-		em = alloc_extent_map();
-		if (!em) {
-			em = ERR_PTR(-ENOMEM);
-			goto out;
-		}
-	}
-
-	em->start = start;
-	em->orig_start = em->start;
-	em->len = ins.offset;
-
-	em->block_start = ins.objectid;
-	em->block_len = ins.offset;
-	em->bdev = root->fs_info->fs_devices->latest_bdev;
-
-	/*
-	 * We need to do this because if we're using the original em we searched
-	 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
-	 */
-	em->flags = 0;
-	set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
-	while (insert) {
-		write_lock(&em_tree->lock);
-		ret = add_extent_mapping(em_tree, em);
-		write_unlock(&em_tree->lock);
-		if (ret != -EEXIST)
-			break;
-		btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
-	}
+	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+			      ins.offset, ins.offset, 0);
+	if (IS_ERR(em))
+		goto out;
 
 	ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
 					   ins.offset, ins.offset, 0);
@@ -5894,7 +5881,7 @@
 static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 					   u64 len, u64 orig_start,
 					   u64 block_start, u64 block_len,
-					   int type)
+					   u64 orig_block_len, int type)
 {
 	struct extent_map_tree *em_tree;
 	struct extent_map *em;
@@ -5912,15 +5899,20 @@
 	em->block_len = block_len;
 	em->block_start = block_start;
 	em->bdev = root->fs_info->fs_devices->latest_bdev;
+	em->orig_block_len = orig_block_len;
+	em->generation = -1;
 	set_bit(EXTENT_FLAG_PINNED, &em->flags);
 	if (type == BTRFS_ORDERED_PREALLOC)
-		set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+		set_bit(EXTENT_FLAG_FILLING, &em->flags);
 
 	do {
 		btrfs_drop_extent_cache(inode, em->start,
 				em->start + em->len - 1, 0);
 		write_lock(&em_tree->lock);
 		ret = add_extent_mapping(em_tree, em);
+		if (!ret)
+			list_move(&em->list,
+				  &em_tree->modified_extents);
 		write_unlock(&em_tree->lock);
 	} while (ret == -EEXIST);
 
@@ -6047,13 +6039,15 @@
 			goto must_cow;
 
 		if (can_nocow_odirect(trans, inode, start, len) == 1) {
-			u64 orig_start = em->start;
+			u64 orig_start = em->orig_start;
+			u64 orig_block_len = em->orig_block_len;
 
 			if (type == BTRFS_ORDERED_PREALLOC) {
 				free_extent_map(em);
 				em = create_pinned_em(inode, start, len,
 						       orig_start,
-						       block_start, len, type);
+						       block_start, len,
+						       orig_block_len, type);
 				if (IS_ERR(em)) {
 					btrfs_end_transaction(trans, root);
 					goto unlock_err;
@@ -6077,7 +6071,8 @@
 	 * it above
 	 */
 	len = bh_result->b_size;
-	em = btrfs_new_extent_direct(inode, em, start, len);
+	free_extent_map(em);
+	em = btrfs_new_extent_direct(inode, start, len);
 	if (IS_ERR(em)) {
 		ret = PTR_ERR(em);
 		goto unlock_err;
@@ -6318,6 +6313,9 @@
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
 
+	if (async_submit)
+		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+
 	bio_get(bio);
 
 	if (!write) {
@@ -6362,7 +6360,6 @@
 {
 	struct inode *inode = dip->inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
 	struct bio *bio;
 	struct bio *orig_bio = dip->orig_bio;
 	struct bio_vec *bvec = orig_bio->bi_io_vec;
@@ -6375,7 +6372,7 @@
 	int async_submit = 0;
 
 	map_length = orig_bio->bi_size;
-	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+	ret = btrfs_map_block(root->fs_info, READ, start_sector << 9,
 			      &map_length, NULL, 0);
 	if (ret) {
 		bio_put(orig_bio);
@@ -6429,7 +6426,8 @@
 			bio->bi_end_io = btrfs_end_dio_bio;
 
 			map_length = orig_bio->bi_size;
-			ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+			ret = btrfs_map_block(root->fs_info, READ,
+					      start_sector << 9,
 					      &map_length, NULL, 0);
 			if (ret) {
 				bio_put(bio);
@@ -6582,9 +6580,17 @@
 		   btrfs_submit_direct, 0);
 }
 
+#define BTRFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
+
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len)
 {
+	int	ret;
+
+	ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
+	if (ret)
+		return ret;
+
 	return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
 }
 
@@ -6855,7 +6861,6 @@
 	int ret;
 	int err = 0;
 	struct btrfs_trans_handle *trans;
-	unsigned long nr;
 	u64 mask = root->sectorsize - 1;
 	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 
@@ -6978,9 +6983,8 @@
 			break;
 		}
 
-		nr = trans->blocks_used;
 		btrfs_end_transaction(trans, root);
-		btrfs_btree_balance_dirty(root, nr);
+		btrfs_btree_balance_dirty(root);
 
 		trans = btrfs_start_transaction(root, 2);
 		if (IS_ERR(trans)) {
@@ -7014,9 +7018,8 @@
 		if (ret && !err)
 			err = ret;
 
-		nr = trans->blocks_used;
 		ret = btrfs_end_transaction(trans, root);
-		btrfs_btree_balance_dirty(root, nr);
+		btrfs_btree_balance_dirty(root);
 	}
 
 out:
@@ -7093,6 +7096,7 @@
 	extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
 	ei->io_tree.track_uptodate = 1;
 	ei->io_failure_tree.track_uptodate = 1;
+	atomic_set(&ei->sync_writers, 0);
 	mutex_init(&ei->log_mutex);
 	mutex_init(&ei->delalloc_mutex);
 	btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7203,6 +7207,8 @@
 		kmem_cache_destroy(btrfs_path_cachep);
 	if (btrfs_free_space_cachep)
 		kmem_cache_destroy(btrfs_free_space_cachep);
+	if (btrfs_delalloc_work_cachep)
+		kmem_cache_destroy(btrfs_delalloc_work_cachep);
 }
 
 int btrfs_init_cachep(void)
@@ -7237,6 +7243,13 @@
 	if (!btrfs_free_space_cachep)
 		goto fail;
 
+	btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
+			sizeof(struct btrfs_delalloc_work), 0,
+			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+			NULL);
+	if (!btrfs_delalloc_work_cachep)
+		goto fail;
+
 	return 0;
 fail:
 	btrfs_destroy_cachep();
@@ -7308,6 +7321,28 @@
 	if (S_ISDIR(old_inode->i_mode) && new_inode &&
 	    new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
 		return -ENOTEMPTY;
+
+
+	/* check for collisions, even if the  name isn't there */
+	ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+			     new_dentry->d_name.name,
+			     new_dentry->d_name.len);
+
+	if (ret) {
+		if (ret == -EEXIST) {
+			/* we shouldn't get
+			 * eexist without a new_inode */
+			if (!new_inode) {
+				WARN_ON(1);
+				return ret;
+			}
+		} else {
+			/* maybe -EOVERFLOW */
+			return ret;
+		}
+	}
+	ret = 0;
+
 	/*
 	 * we're using rename to replace one file with another.
 	 * and the replacement file is large.  Start IO on it now so
@@ -7447,6 +7482,49 @@
 	return ret;
 }
 
+static void btrfs_run_delalloc_work(struct btrfs_work *work)
+{
+	struct btrfs_delalloc_work *delalloc_work;
+
+	delalloc_work = container_of(work, struct btrfs_delalloc_work,
+				     work);
+	if (delalloc_work->wait)
+		btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
+	else
+		filemap_flush(delalloc_work->inode->i_mapping);
+
+	if (delalloc_work->delay_iput)
+		btrfs_add_delayed_iput(delalloc_work->inode);
+	else
+		iput(delalloc_work->inode);
+	complete(&delalloc_work->completion);
+}
+
+struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
+						    int wait, int delay_iput)
+{
+	struct btrfs_delalloc_work *work;
+
+	work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+	if (!work)
+		return NULL;
+
+	init_completion(&work->completion);
+	INIT_LIST_HEAD(&work->list);
+	work->inode = inode;
+	work->wait = wait;
+	work->delay_iput = delay_iput;
+	work->work.func = btrfs_run_delalloc_work;
+
+	return work;
+}
+
+void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
+{
+	wait_for_completion(&work->completion);
+	kmem_cache_free(btrfs_delalloc_work_cachep, work);
+}
+
 /*
  * some fairly slow code that needs optimization. This walks the list
  * of all the inodes with pending delalloc and forces them to disk.
@@ -7456,10 +7534,15 @@
 	struct list_head *head = &root->fs_info->delalloc_inodes;
 	struct btrfs_inode *binode;
 	struct inode *inode;
+	struct btrfs_delalloc_work *work, *next;
+	struct list_head works;
+	int ret = 0;
 
 	if (root->fs_info->sb->s_flags & MS_RDONLY)
 		return -EROFS;
 
+	INIT_LIST_HEAD(&works);
+
 	spin_lock(&root->fs_info->delalloc_lock);
 	while (!list_empty(head)) {
 		binode = list_entry(head->next, struct btrfs_inode,
@@ -7469,11 +7552,14 @@
 			list_del_init(&binode->delalloc_inodes);
 		spin_unlock(&root->fs_info->delalloc_lock);
 		if (inode) {
-			filemap_flush(inode->i_mapping);
-			if (delay_iput)
-				btrfs_add_delayed_iput(inode);
-			else
-				iput(inode);
+			work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+			if (!work) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			list_add_tail(&work->list, &works);
+			btrfs_queue_worker(&root->fs_info->flush_workers,
+					   &work->work);
 		}
 		cond_resched();
 		spin_lock(&root->fs_info->delalloc_lock);
@@ -7492,7 +7578,12 @@
 		    atomic_read(&root->fs_info->async_delalloc_pages) == 0));
 	}
 	atomic_dec(&root->fs_info->async_submit_draining);
-	return 0;
+out:
+	list_for_each_entry_safe(work, next, &works, list) {
+		list_del_init(&work->list);
+		btrfs_wait_and_free_delalloc_work(work);
+	}
+	return ret;
 }
 
 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -7512,7 +7603,6 @@
 	unsigned long ptr;
 	struct btrfs_file_extent_item *ei;
 	struct extent_buffer *leaf;
-	unsigned long nr = 0;
 
 	name_len = strlen(symname) + 1;
 	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
@@ -7610,13 +7700,12 @@
 out_unlock:
 	if (!err)
 		d_instantiate(dentry, inode);
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 	return err;
 }
 
@@ -7679,6 +7768,7 @@
 		em->len = ins.offset;
 		em->block_start = ins.objectid;
 		em->block_len = ins.offset;
+		em->orig_block_len = ins.offset;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
 		set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
 		em->generation = trans->transid;

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5b3429a..4b45167 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c

@@ -55,6 +55,7 @@
 #include "backref.h"
 #include "rcu-string.h"
 #include "send.h"
+#include "dev-replace.h"
 
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -140,8 +141,11 @@
 		BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
 	}
 
-	if (flags & BTRFS_INODE_NODATACOW)
+	if (flags & BTRFS_INODE_NODATACOW) {
 		BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+		if (S_ISREG(inode->i_mode))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
+	}
 
 	btrfs_update_iflags(inode);
 }
@@ -571,8 +575,12 @@
 		ret = btrfs_commit_transaction(trans,
 					       root->fs_info->extent_root);
 	}
-	if (ret)
+	if (ret) {
+		/* cleanup_transaction has freed this for us */
+		if (trans->aborted)
+			pending_snapshot = NULL;
 		goto fail;
+	}
 
 	ret = pending_snapshot->error;
 	if (ret)
@@ -705,6 +713,16 @@
 	if (error)
 		goto out_dput;
 
+	/*
+	 * even if this name doesn't exist, we may get hash collisions.
+	 * check for them now when we can safely fail
+	 */
+	error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
+					       dir->i_ino, name,
+					       namelen);
+	if (error)
+		goto out_dput;
+
 	down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
 
 	if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
@@ -1293,12 +1311,13 @@
 	return ret;
 }
 
-static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
+static noinline int btrfs_ioctl_resize(struct file *file,
 					void __user *arg)
 {
 	u64 new_size;
 	u64 old_size;
 	u64 devid = 1;
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct btrfs_ioctl_vol_args *vol_args;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device = NULL;
@@ -1313,13 +1332,17 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	mutex_lock(&root->fs_info->volume_mutex);
-	if (root->fs_info->balance_ctl) {
-		printk(KERN_INFO "btrfs: balance in progress\n");
-		ret = -EINVAL;
-		goto out;
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
+			1)) {
+		pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+		return -EINPROGRESS;
 	}
 
+	mutex_lock(&root->fs_info->volume_mutex);
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args)) {
 		ret = PTR_ERR(vol_args);
@@ -1339,7 +1362,7 @@
 		printk(KERN_INFO "btrfs: resizing devid %llu\n",
 		       (unsigned long long)devid);
 	}
-	device = btrfs_find_device(root, devid, NULL, NULL);
+	device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
 	if (!device) {
 		printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
 		       (unsigned long long)devid);
@@ -1371,6 +1394,11 @@
 		}
 	}
 
+	if (device->is_tgtdev_for_dev_replace) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+
 	old_size = device->total_bytes;
 
 	if (mod < 0) {
@@ -1409,12 +1437,14 @@
 		btrfs_commit_transaction(trans, root);
 	} else if (new_size < old_size) {
 		ret = btrfs_shrink_device(device, new_size);
-	}
+	} /* equal, nothing need to do */
 
 out_free:
 	kfree(vol_args);
 out:
 	mutex_unlock(&root->fs_info->volume_mutex);
+	mnt_drop_write_file(file);
+	atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
 	return ret;
 }
 
@@ -2156,9 +2186,17 @@
 	if (btrfs_root_readonly(root))
 		return -EROFS;
 
+	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
+			1)) {
+		pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+		return -EINPROGRESS;
+	}
 	ret = mnt_want_write_file(file);
-	if (ret)
+	if (ret) {
+		atomic_set(&root->fs_info->mutually_exclusive_operation_running,
+			   0);
 		return ret;
+	}
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFDIR:
@@ -2210,6 +2248,7 @@
 	}
 out:
 	mnt_drop_write_file(file);
+	atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
 	return ret;
 }
 
@@ -2221,13 +2260,13 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	mutex_lock(&root->fs_info->volume_mutex);
-	if (root->fs_info->balance_ctl) {
-		printk(KERN_INFO "btrfs: balance in progress\n");
-		ret = -EINVAL;
-		goto out;
+	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
+			1)) {
+		pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+		return -EINPROGRESS;
 	}
 
+	mutex_lock(&root->fs_info->volume_mutex);
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args)) {
 		ret = PTR_ERR(vol_args);
@@ -2240,27 +2279,31 @@
 	kfree(vol_args);
 out:
 	mutex_unlock(&root->fs_info->volume_mutex);
+	atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
 	return ret;
 }
 
-static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 {
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct btrfs_ioctl_vol_args *vol_args;
 	int ret;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (root->fs_info->sb->s_flags & MS_RDONLY)
-		return -EROFS;
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
 
-	mutex_lock(&root->fs_info->volume_mutex);
-	if (root->fs_info->balance_ctl) {
-		printk(KERN_INFO "btrfs: balance in progress\n");
-		ret = -EINVAL;
-		goto out;
+	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
+			1)) {
+		pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+		mnt_drop_write_file(file);
+		return -EINPROGRESS;
 	}
 
+	mutex_lock(&root->fs_info->volume_mutex);
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args)) {
 		ret = PTR_ERR(vol_args);
@@ -2273,6 +2316,8 @@
 	kfree(vol_args);
 out:
 	mutex_unlock(&root->fs_info->volume_mutex);
+	mnt_drop_write_file(file);
+	atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
 	return ret;
 }
 
@@ -2328,7 +2373,7 @@
 		s_uuid = di_args->uuid;
 
 	mutex_lock(&fs_devices->device_list_mutex);
-	dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
+	dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL);
 	mutex_unlock(&fs_devices->device_list_mutex);
 
 	if (!dev) {
@@ -2821,12 +2866,19 @@
 	struct btrfs_disk_key disk_key;
 	u64 objectid = 0;
 	u64 dir_id;
+	int ret;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (copy_from_user(&objectid, argp, sizeof(objectid)))
-		return -EFAULT;
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	if (copy_from_user(&objectid, argp, sizeof(objectid))) {
+		ret = -EFAULT;
+		goto out;
+	}
 
 	if (!objectid)
 		objectid = root->root_key.objectid;
@@ -2836,21 +2888,28 @@
 	location.offset = (u64)-1;
 
 	new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
-	if (IS_ERR(new_root))
-		return PTR_ERR(new_root);
+	if (IS_ERR(new_root)) {
+		ret = PTR_ERR(new_root);
+		goto out;
+	}
 
-	if (btrfs_root_refs(&new_root->root_item) == 0)
-		return -ENOENT;
+	if (btrfs_root_refs(&new_root->root_item) == 0) {
+		ret = -ENOENT;
+		goto out;
+	}
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	path->leave_spinning = 1;
 
 	trans = btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		btrfs_free_path(path);
-		return PTR_ERR(trans);
+		ret = PTR_ERR(trans);
+		goto out;
 	}
 
 	dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
@@ -2861,7 +2920,8 @@
 		btrfs_end_transaction(trans, root);
 		printk(KERN_ERR "Umm, you don't have the default dir item, "
 		       "this isn't going to work\n");
-		return -ENOENT;
+		ret = -ENOENT;
+		goto out;
 	}
 
 	btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
@@ -2871,8 +2931,9 @@
 
 	btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
 	btrfs_end_transaction(trans, root);
-
-	return 0;
+out:
+	mnt_drop_write_file(file);
+	return ret;
 }
 
 void btrfs_get_block_group_info(struct list_head *groups_list,
@@ -3036,32 +3097,38 @@
 	return 0;
 }
 
-static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
+					    void __user *argp)
 {
-	struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
 	struct btrfs_trans_handle *trans;
 	u64 transid;
 	int ret;
 
-	trans = btrfs_start_transaction(root, 0);
-	if (IS_ERR(trans))
-		return PTR_ERR(trans);
+	trans = btrfs_attach_transaction(root);
+	if (IS_ERR(trans)) {
+		if (PTR_ERR(trans) != -ENOENT)
+			return PTR_ERR(trans);
+
+		/* No running transaction, don't bother */
+		transid = root->fs_info->last_trans_committed;
+		goto out;
+	}
 	transid = trans->transid;
 	ret = btrfs_commit_transaction_async(trans, root, 0);
 	if (ret) {
 		btrfs_end_transaction(trans, root);
 		return ret;
 	}
-
+out:
 	if (argp)
 		if (copy_to_user(argp, &transid, sizeof(transid)))
 			return -EFAULT;
 	return 0;
 }
 
-static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root,
+					   void __user *argp)
 {
-	struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
 	u64 transid;
 
 	if (argp) {
@@ -3073,10 +3140,11 @@
 	return btrfs_wait_for_commit(root, transid);
 }
 
-static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
 {
-	int ret;
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct btrfs_ioctl_scrub_args *sa;
+	int ret;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -3085,12 +3153,22 @@
 	if (IS_ERR(sa))
 		return PTR_ERR(sa);
 
-	ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
-			      &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
+	if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
+		ret = mnt_want_write_file(file);
+		if (ret)
+			goto out;
+	}
+
+	ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end,
+			      &sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
+			      0);
 
 	if (copy_to_user(arg, sa, sizeof(*sa)))
 		ret = -EFAULT;
 
+	if (!(sa->flags & BTRFS_SCRUB_READONLY))
+		mnt_drop_write_file(file);
+out:
 	kfree(sa);
 	return ret;
 }
@@ -3100,7 +3178,7 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	return btrfs_scrub_cancel(root);
+	return btrfs_scrub_cancel(root->fs_info);
 }
 
 static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
@@ -3149,6 +3227,51 @@
 	return ret;
 }
 
+static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
+{
+	struct btrfs_ioctl_dev_replace_args *p;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	p = memdup_user(arg, sizeof(*p));
+	if (IS_ERR(p))
+		return PTR_ERR(p);
+
+	switch (p->cmd) {
+	case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
+		if (atomic_xchg(
+			&root->fs_info->mutually_exclusive_operation_running,
+			1)) {
+			pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+			ret = -EINPROGRESS;
+		} else {
+			ret = btrfs_dev_replace_start(root, p);
+			atomic_set(
+			 &root->fs_info->mutually_exclusive_operation_running,
+			 0);
+		}
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
+		btrfs_dev_replace_status(root->fs_info, p);
+		ret = 0;
+		break;
+	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
+		ret = btrfs_dev_replace_cancel(root->fs_info, p);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (copy_to_user(arg, p, sizeof(*p)))
+		ret = -EFAULT;
+
+	kfree(p);
+	return ret;
+}
+
 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
 {
 	int ret = 0;
@@ -3315,6 +3438,7 @@
 	struct btrfs_ioctl_balance_args *bargs;
 	struct btrfs_balance_control *bctl;
 	int ret;
+	int need_to_clear_lock = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -3350,10 +3474,13 @@
 		bargs = NULL;
 	}
 
-	if (fs_info->balance_ctl) {
+	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
+			1)) {
+		pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
 		ret = -EINPROGRESS;
 		goto out_bargs;
 	}
+	need_to_clear_lock = 1;
 
 	bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
 	if (!bctl) {
@@ -3387,6 +3514,9 @@
 out_bargs:
 	kfree(bargs);
 out:
+	if (need_to_clear_lock)
+		atomic_set(&root->fs_info->mutually_exclusive_operation_running,
+			   0);
 	mutex_unlock(&fs_info->balance_mutex);
 	mutex_unlock(&fs_info->volume_mutex);
 	mnt_drop_write_file(file);
@@ -3441,8 +3571,9 @@
 	return ret;
 }
 
-static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 {
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct btrfs_ioctl_quota_ctl_args *sa;
 	struct btrfs_trans_handle *trans = NULL;
 	int ret;
@@ -3451,12 +3582,15 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (root->fs_info->sb->s_flags & MS_RDONLY)
-		return -EROFS;
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
 
 	sa = memdup_user(arg, sizeof(*sa));
-	if (IS_ERR(sa))
-		return PTR_ERR(sa);
+	if (IS_ERR(sa)) {
+		ret = PTR_ERR(sa);
+		goto drop_write;
+	}
 
 	if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
 		trans = btrfs_start_transaction(root, 2);
@@ -3489,14 +3623,16 @@
 		if (err && !ret)
 			ret = err;
 	}
-
 out:
 	kfree(sa);
+drop_write:
+	mnt_drop_write_file(file);
 	return ret;
 }
 
-static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
 {
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct btrfs_ioctl_qgroup_assign_args *sa;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -3505,12 +3641,15 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (root->fs_info->sb->s_flags & MS_RDONLY)
-		return -EROFS;
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
 
 	sa = memdup_user(arg, sizeof(*sa));
-	if (IS_ERR(sa))
-		return PTR_ERR(sa);
+	if (IS_ERR(sa)) {
+		ret = PTR_ERR(sa);
+		goto drop_write;
+	}
 
 	trans = btrfs_join_transaction(root);
 	if (IS_ERR(trans)) {
@@ -3533,11 +3672,14 @@
 
 out:
 	kfree(sa);
+drop_write:
+	mnt_drop_write_file(file);
 	return ret;
 }
 
-static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
 {
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct btrfs_ioctl_qgroup_create_args *sa;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -3546,12 +3688,15 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (root->fs_info->sb->s_flags & MS_RDONLY)
-		return -EROFS;
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
 
 	sa = memdup_user(arg, sizeof(*sa));
-	if (IS_ERR(sa))
-		return PTR_ERR(sa);
+	if (IS_ERR(sa)) {
+		ret = PTR_ERR(sa);
+		goto drop_write;
+	}
 
 	trans = btrfs_join_transaction(root);
 	if (IS_ERR(trans)) {
@@ -3573,11 +3718,14 @@
 
 out:
 	kfree(sa);
+drop_write:
+	mnt_drop_write_file(file);
 	return ret;
 }
 
-static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
 {
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct btrfs_ioctl_qgroup_limit_args *sa;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -3587,12 +3735,15 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (root->fs_info->sb->s_flags & MS_RDONLY)
-		return -EROFS;
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
 
 	sa = memdup_user(arg, sizeof(*sa));
-	if (IS_ERR(sa))
-		return PTR_ERR(sa);
+	if (IS_ERR(sa)) {
+		ret = PTR_ERR(sa);
+		goto drop_write;
+	}
 
 	trans = btrfs_join_transaction(root);
 	if (IS_ERR(trans)) {
@@ -3615,6 +3766,8 @@
 
 out:
 	kfree(sa);
+drop_write:
+	mnt_drop_write_file(file);
 	return ret;
 }
 
@@ -3735,11 +3888,11 @@
 	case BTRFS_IOC_DEFRAG_RANGE:
 		return btrfs_ioctl_defrag(file, argp);
 	case BTRFS_IOC_RESIZE:
-		return btrfs_ioctl_resize(root, argp);
+		return btrfs_ioctl_resize(file, argp);
 	case BTRFS_IOC_ADD_DEV:
 		return btrfs_ioctl_add_dev(root, argp);
 	case BTRFS_IOC_RM_DEV:
-		return btrfs_ioctl_rm_dev(root, argp);
+		return btrfs_ioctl_rm_dev(file, argp);
 	case BTRFS_IOC_FS_INFO:
 		return btrfs_ioctl_fs_info(root, argp);
 	case BTRFS_IOC_DEV_INFO:
@@ -3768,11 +3921,11 @@
 		btrfs_sync_fs(file->f_dentry->d_sb, 1);
 		return 0;
 	case BTRFS_IOC_START_SYNC:
-		return btrfs_ioctl_start_sync(file, argp);
+		return btrfs_ioctl_start_sync(root, argp);
 	case BTRFS_IOC_WAIT_SYNC:
-		return btrfs_ioctl_wait_sync(file, argp);
+		return btrfs_ioctl_wait_sync(root, argp);
 	case BTRFS_IOC_SCRUB:
-		return btrfs_ioctl_scrub(root, argp);
+		return btrfs_ioctl_scrub(file, argp);
 	case BTRFS_IOC_SCRUB_CANCEL:
 		return btrfs_ioctl_scrub_cancel(root, argp);
 	case BTRFS_IOC_SCRUB_PROGRESS:
@@ -3790,13 +3943,15 @@
 	case BTRFS_IOC_GET_DEV_STATS:
 		return btrfs_ioctl_get_dev_stats(root, argp);
 	case BTRFS_IOC_QUOTA_CTL:
-		return btrfs_ioctl_quota_ctl(root, argp);
+		return btrfs_ioctl_quota_ctl(file, argp);
 	case BTRFS_IOC_QGROUP_ASSIGN:
-		return btrfs_ioctl_qgroup_assign(root, argp);
+		return btrfs_ioctl_qgroup_assign(file, argp);
 	case BTRFS_IOC_QGROUP_CREATE:
-		return btrfs_ioctl_qgroup_create(root, argp);
+		return btrfs_ioctl_qgroup_create(file, argp);
 	case BTRFS_IOC_QGROUP_LIMIT:
-		return btrfs_ioctl_qgroup_limit(root, argp);
+		return btrfs_ioctl_qgroup_limit(file, argp);
+	case BTRFS_IOC_DEV_REPLACE:
+		return btrfs_ioctl_dev_replace(root, argp);
 	}
 
 	return -ENOTTY;

diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 731e287..dabca9c 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h

@@ -30,6 +30,8 @@
 	char name[BTRFS_PATH_NAME_MAX + 1];
 };
 
+#define BTRFS_DEVICE_PATH_NAME_MAX 1024
+
 #define BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
 #define BTRFS_SUBVOL_RDONLY		(1ULL << 1)
 #define BTRFS_SUBVOL_QGROUP_INHERIT	(1ULL << 2)
@@ -123,7 +125,48 @@
 	__u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
 };
 
-#define BTRFS_DEVICE_PATH_NAME_MAX 1024
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS	0
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID	1
+struct btrfs_ioctl_dev_replace_start_params {
+	__u64 srcdevid;	/* in, if 0, use srcdev_name instead */
+	__u64 cont_reading_from_srcdev_mode;	/* in, see #define
+						 * above */
+	__u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1];	/* in */
+	__u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1];	/* in */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED	0
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED		1
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED		2
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED		3
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED		4
+struct btrfs_ioctl_dev_replace_status_params {
+	__u64 replace_state;	/* out, see #define above */
+	__u64 progress_1000;	/* out, 0 <= x <= 1000 */
+	__u64 time_started;	/* out, seconds since 1-Jan-1970 */
+	__u64 time_stopped;	/* out, seconds since 1-Jan-1970 */
+	__u64 num_write_errors;	/* out */
+	__u64 num_uncorrectable_read_errors;	/* out */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_START			0
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS			1
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL			2
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR			0
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED		1
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED		2
+struct btrfs_ioctl_dev_replace_args {
+	__u64 cmd;	/* in */
+	__u64 result;	/* out */
+
+	union {
+		struct btrfs_ioctl_dev_replace_start_params start;
+		struct btrfs_ioctl_dev_replace_status_params status;
+	};	/* in/out */
+
+	__u64 spare[64];
+};
+
 struct btrfs_ioctl_dev_info_args {
 	__u64 devid;				/* in/out */
 	__u8 uuid[BTRFS_UUID_SIZE];		/* in/out */
@@ -453,4 +496,7 @@
 			       struct btrfs_ioctl_qgroup_limit_args)
 #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
 				      struct btrfs_ioctl_get_dev_stats)
+#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
+				    struct btrfs_ioctl_dev_replace_args)
+
 #endif

diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h
new file mode 100644
index 0000000..b7816ce
--- /dev/null
+++ b/fs/btrfs/math.h

@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (C) 2012 Fujitsu.  All rights reserved.
+ * Written by Miao Xie <miaox@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_MATH_H
+#define __BTRFS_MATH_H
+
+#include <asm/div64.h>
+
+static inline u64 div_factor(u64 num, int factor)
+{
+	if (factor == 10)
+		return num;
+	num *= factor;
+	do_div(num, 10);
+	return num;
+}
+
+static inline u64 div_factor_fine(u64 num, int factor)
+{
+	if (factor == 100)
+		return num;
+	num *= factor;
+	do_div(num, 100);
+	return num;
+}
+
+#endif

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7772f02..f107312 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c

@@ -211,6 +211,8 @@
 	init_waitqueue_head(&entry->wait);
 	INIT_LIST_HEAD(&entry->list);
 	INIT_LIST_HEAD(&entry->root_extent_list);
+	INIT_LIST_HEAD(&entry->work_list);
+	init_completion(&entry->completion);
 
 	trace_btrfs_ordered_extent_add(inode, entry);
 
@@ -464,18 +466,28 @@
 	wake_up(&entry->wait);
 }
 
+static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
+{
+	struct btrfs_ordered_extent *ordered;
+
+	ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
+	btrfs_start_ordered_extent(ordered->inode, ordered, 1);
+	complete(&ordered->completion);
+}
+
 /*
  * wait for all the ordered extents in a root.  This is done when balancing
  * space between drives.
  */
 void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
 {
-	struct list_head splice;
+	struct list_head splice, works;
 	struct list_head *cur;
-	struct btrfs_ordered_extent *ordered;
+	struct btrfs_ordered_extent *ordered, *next;
 	struct inode *inode;
 
 	INIT_LIST_HEAD(&splice);
+	INIT_LIST_HEAD(&works);
 
 	spin_lock(&root->fs_info->ordered_extent_lock);
 	list_splice_init(&root->fs_info->ordered_extents, &splice);
@@ -494,19 +506,32 @@
 		spin_unlock(&root->fs_info->ordered_extent_lock);
 
 		if (inode) {
-			btrfs_start_ordered_extent(inode, ordered, 1);
-			btrfs_put_ordered_extent(ordered);
-			if (delay_iput)
-				btrfs_add_delayed_iput(inode);
-			else
-				iput(inode);
+			ordered->flush_work.func = btrfs_run_ordered_extent_work;
+			list_add_tail(&ordered->work_list, &works);
+			btrfs_queue_worker(&root->fs_info->flush_workers,
+					   &ordered->flush_work);
 		} else {
 			btrfs_put_ordered_extent(ordered);
 		}
 
+		cond_resched();
 		spin_lock(&root->fs_info->ordered_extent_lock);
 	}
 	spin_unlock(&root->fs_info->ordered_extent_lock);
+
+	list_for_each_entry_safe(ordered, next, &works, work_list) {
+		list_del_init(&ordered->work_list);
+		wait_for_completion(&ordered->completion);
+
+		inode = ordered->inode;
+		btrfs_put_ordered_extent(ordered);
+		if (delay_iput)
+			btrfs_add_delayed_iput(inode);
+		else
+			iput(inode);
+
+		cond_resched();
+	}
 }
 
 /*
@@ -519,13 +544,17 @@
  * extra check to make sure the ordered operation list really is empty
  * before we return
  */
-void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
+int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
 {
 	struct btrfs_inode *btrfs_inode;
 	struct inode *inode;
 	struct list_head splice;
+	struct list_head works;
+	struct btrfs_delalloc_work *work, *next;
+	int ret = 0;
 
 	INIT_LIST_HEAD(&splice);
+	INIT_LIST_HEAD(&works);
 
 	mutex_lock(&root->fs_info->ordered_operations_mutex);
 	spin_lock(&root->fs_info->ordered_extent_lock);
@@ -533,6 +562,7 @@
 	list_splice_init(&root->fs_info->ordered_operations, &splice);
 
 	while (!list_empty(&splice)) {
+
 		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
 				   ordered_operations);
 
@@ -549,15 +579,26 @@
 			list_add_tail(&BTRFS_I(inode)->ordered_operations,
 			      &root->fs_info->ordered_operations);
 		}
+
+		if (!inode)
+			continue;
 		spin_unlock(&root->fs_info->ordered_extent_lock);
 
-		if (inode) {
-			if (wait)
-				btrfs_wait_ordered_range(inode, 0, (u64)-1);
-			else
-				filemap_flush(inode->i_mapping);
-			btrfs_add_delayed_iput(inode);
+		work = btrfs_alloc_delalloc_work(inode, wait, 1);
+		if (!work) {
+			if (list_empty(&BTRFS_I(inode)->ordered_operations))
+				list_add_tail(&btrfs_inode->ordered_operations,
+					      &splice);
+			spin_lock(&root->fs_info->ordered_extent_lock);
+			list_splice_tail(&splice,
+					 &root->fs_info->ordered_operations);
+			spin_unlock(&root->fs_info->ordered_extent_lock);
+			ret = -ENOMEM;
+			goto out;
 		}
+		list_add_tail(&work->list, &works);
+		btrfs_queue_worker(&root->fs_info->flush_workers,
+				   &work->work);
 
 		cond_resched();
 		spin_lock(&root->fs_info->ordered_extent_lock);
@@ -566,7 +607,13 @@
 		goto again;
 
 	spin_unlock(&root->fs_info->ordered_extent_lock);
+out:
+	list_for_each_entry_safe(work, next, &works, list) {
+		list_del_init(&work->list);
+		btrfs_wait_and_free_delalloc_work(work);
+	}
 	mutex_unlock(&root->fs_info->ordered_operations_mutex);
+	return ret;
 }
 
 /*
@@ -606,7 +653,6 @@
 	u64 end;
 	u64 orig_end;
 	struct btrfs_ordered_extent *ordered;
-	int found;
 
 	if (start + len < start) {
 		orig_end = INT_LIMIT(loff_t);
@@ -642,7 +688,6 @@
 	filemap_fdatawait_range(inode->i_mapping, start, orig_end);
 
 	end = orig_end;
-	found = 0;
 	while (1) {
 		ordered = btrfs_lookup_first_ordered_extent(inode, end);
 		if (!ordered)
@@ -655,7 +700,6 @@
 			btrfs_put_ordered_extent(ordered);
 			break;
 		}
-		found++;
 		btrfs_start_ordered_extent(inode, ordered, 1);
 		end = ordered->file_offset;
 		btrfs_put_ordered_extent(ordered);
@@ -934,15 +978,6 @@
 	if (last_mod < root->fs_info->last_trans_committed)
 		return;
 
-	/*
-	 * the transaction is already committing.  Just start the IO and
-	 * don't bother with all of this list nonsense
-	 */
-	if (trans && root->fs_info->running_transaction->blocked) {
-		btrfs_wait_ordered_range(inode, 0, (u64)-1);
-		return;
-	}
-
 	spin_lock(&root->fs_info->ordered_extent_lock);
 	if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
 		list_add_tail(&BTRFS_I(inode)->ordered_operations,
@@ -959,6 +994,7 @@
 				     NULL);
 	if (!btrfs_ordered_extent_cache)
 		return -ENOMEM;
+
 	return 0;
 }
 

diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 853fc7b..f29d4bf5 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h

@@ -128,8 +128,11 @@
 	struct list_head root_extent_list;
 
 	struct btrfs_work work;
-};
 
+	struct completion completion;
+	struct btrfs_work flush_work;
+	struct list_head work_list;
+};
 
 /*
  * calculates the total size you need to allocate for an ordered sum
@@ -186,7 +189,7 @@
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 				struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
-void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
+int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct inode *inode);

diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 5e23684..50d95fd 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c

@@ -297,6 +297,9 @@
 		case BTRFS_DEV_STATS_KEY:
 			printk(KERN_INFO "\t\tdevice stats\n");
 			break;
+		case BTRFS_DEV_REPLACE_KEY:
+			printk(KERN_INFO "\t\tdev replace\n");
+			break;
 		};
 	}
 }

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index a955669..96b93da 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c

@@ -27,6 +27,7 @@
 #include "volumes.h"
 #include "disk-io.h"
 #include "transaction.h"
+#include "dev-replace.h"
 
 #undef DEBUG
 
@@ -323,7 +324,6 @@
 	struct reada_extent *re = NULL;
 	struct reada_extent *re_exist = NULL;
 	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	struct btrfs_bio *bbio = NULL;
 	struct btrfs_device *dev;
 	struct btrfs_device *prev_dev;
@@ -332,6 +332,7 @@
 	int nzones = 0;
 	int i;
 	unsigned long index = logical >> PAGE_CACHE_SHIFT;
+	int dev_replace_is_ongoing;
 
 	spin_lock(&fs_info->reada_lock);
 	re = radix_tree_lookup(&fs_info->reada_tree, index);
@@ -358,7 +359,8 @@
 	 * map block
 	 */
 	length = blocksize;
-	ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0);
+	ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
+			      &bbio, 0);
 	if (ret || !bbio || length < blocksize)
 		goto error;
 
@@ -393,6 +395,7 @@
 	}
 
 	/* insert extent in reada_tree + all per-device trees, all or nothing */
+	btrfs_dev_replace_lock(&fs_info->dev_replace);
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_insert(&fs_info->reada_tree, index, re);
 	if (ret == -EEXIST) {
@@ -400,13 +403,17 @@
 		BUG_ON(!re_exist);
 		re_exist->refcnt++;
 		spin_unlock(&fs_info->reada_lock);
+		btrfs_dev_replace_unlock(&fs_info->dev_replace);
 		goto error;
 	}
 	if (ret) {
 		spin_unlock(&fs_info->reada_lock);
+		btrfs_dev_replace_unlock(&fs_info->dev_replace);
 		goto error;
 	}
 	prev_dev = NULL;
+	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
+			&fs_info->dev_replace);
 	for (i = 0; i < nzones; ++i) {
 		dev = bbio->stripes[i].dev;
 		if (dev == prev_dev) {
@@ -419,21 +426,36 @@
 			 */
 			continue;
 		}
+		if (!dev->bdev) {
+			/* cannot read ahead on missing device */
+			continue;
+		}
+		if (dev_replace_is_ongoing &&
+		    dev == fs_info->dev_replace.tgtdev) {
+			/*
+			 * as this device is selected for reading only as
+			 * a last resort, skip it for read ahead.
+			 */
+			continue;
+		}
 		prev_dev = dev;
 		ret = radix_tree_insert(&dev->reada_extents, index, re);
 		if (ret) {
 			while (--i >= 0) {
 				dev = bbio->stripes[i].dev;
 				BUG_ON(dev == NULL);
+				/* ignore whether the entry was inserted */
 				radix_tree_delete(&dev->reada_extents, index);
 			}
 			BUG_ON(fs_info == NULL);
 			radix_tree_delete(&fs_info->reada_tree, index);
 			spin_unlock(&fs_info->reada_lock);
+			btrfs_dev_replace_unlock(&fs_info->dev_replace);
 			goto error;
 		}
 	}
 	spin_unlock(&fs_info->reada_lock);
+	btrfs_dev_replace_unlock(&fs_info->dev_replace);
 
 	kfree(bbio);
 	return re;
@@ -915,7 +937,10 @@
 	generation = btrfs_header_generation(node);
 	free_extent_buffer(node);
 
-	reada_add_block(rc, start, &max_key, level, generation);
+	if (reada_add_block(rc, start, &max_key, level, generation)) {
+		kfree(rc);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	reada_start_machine(root->fs_info);
 

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 776f0aa..300e09a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c

@@ -2025,7 +2025,6 @@
 	struct btrfs_root_item *root_item;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
-	unsigned long nr;
 	int level;
 	int max_level;
 	int replaced = 0;
@@ -2074,7 +2073,8 @@
 		BUG_ON(IS_ERR(trans));
 		trans->block_rsv = rc->block_rsv;
 
-		ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved);
+		ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
+					     BTRFS_RESERVE_FLUSH_ALL);
 		if (ret) {
 			BUG_ON(ret != -EAGAIN);
 			ret = btrfs_commit_transaction(trans, root);
@@ -2125,10 +2125,9 @@
 			       path->slots[level]);
 		root_item->drop_level = level;
 
-		nr = trans->blocks_used;
 		btrfs_end_transaction_throttle(trans, root);
 
-		btrfs_btree_balance_dirty(root, nr);
+		btrfs_btree_balance_dirty(root);
 
 		if (replaced && rc->stage == UPDATE_DATA_PTRS)
 			invalidate_extent_cache(root, &key, &next_key);
@@ -2155,10 +2154,9 @@
 		btrfs_update_reloc_root(trans, root);
 	}
 
-	nr = trans->blocks_used;
 	btrfs_end_transaction_throttle(trans, root);
 
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 
 	if (replaced && rc->stage == UPDATE_DATA_PTRS)
 		invalidate_extent_cache(root, &key, &next_key);
@@ -2184,7 +2182,8 @@
 again:
 	if (!err) {
 		num_bytes = rc->merging_rsv_size;
-		ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
+		ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
+					  BTRFS_RESERVE_FLUSH_ALL);
 		if (ret)
 			err = ret;
 	}
@@ -2459,7 +2458,8 @@
 	num_bytes = calcu_metadata_size(rc, node, 1) * 2;
 
 	trans->block_rsv = rc->block_rsv;
-	ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
+	ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
+				  BTRFS_RESERVE_FLUSH_ALL);
 	if (ret) {
 		if (ret == -EAGAIN)
 			rc->commit_transaction = 1;
@@ -3259,7 +3259,6 @@
 	struct btrfs_path *path;
 	struct btrfs_root *root = fs_info->tree_root;
 	struct btrfs_trans_handle *trans;
-	unsigned long nr;
 	int ret = 0;
 
 	if (inode)
@@ -3293,9 +3292,8 @@
 	ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
 
 	btrfs_free_path(path);
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 out:
 	iput(inode);
 	return ret;
@@ -3685,7 +3683,8 @@
 	 * is no reservation in transaction handle.
 	 */
 	ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
-				  rc->extent_root->nodesize * 256);
+				  rc->extent_root->nodesize * 256,
+				  BTRFS_RESERVE_FLUSH_ALL);
 	if (ret)
 		return ret;
 
@@ -3711,7 +3710,6 @@
 	struct btrfs_trans_handle *trans = NULL;
 	struct btrfs_path *path;
 	struct btrfs_extent_item *ei;
-	unsigned long nr;
 	u64 flags;
 	u32 item_size;
 	int ret;
@@ -3828,9 +3826,8 @@
 			ret = btrfs_commit_transaction(trans, rc->extent_root);
 			BUG_ON(ret);
 		} else {
-			nr = trans->blocks_used;
 			btrfs_end_transaction_throttle(trans, rc->extent_root);
-			btrfs_btree_balance_dirty(rc->extent_root, nr);
+			btrfs_btree_balance_dirty(rc->extent_root);
 		}
 		trans = NULL;
 
@@ -3860,9 +3857,8 @@
 			  GFP_NOFS);
 
 	if (trans) {
-		nr = trans->blocks_used;
 		btrfs_end_transaction_throttle(trans, rc->extent_root);
-		btrfs_btree_balance_dirty(rc->extent_root, nr);
+		btrfs_btree_balance_dirty(rc->extent_root);
 	}
 
 	if (!err) {
@@ -3941,7 +3937,6 @@
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root;
 	struct btrfs_key key;
-	unsigned long nr;
 	u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
 	int err = 0;
 
@@ -3969,9 +3964,8 @@
 
 	err = btrfs_orphan_add(trans, inode);
 out:
-	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
-	btrfs_btree_balance_dirty(root, nr);
+	btrfs_btree_balance_dirty(root);
 	if (err) {
 		if (inode)
 			iput(inode);
@@ -4057,7 +4051,11 @@
 	       (unsigned long long)rc->block_group->key.objectid,
 	       (unsigned long long)rc->block_group->flags);
 
-	btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
+	ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
 	btrfs_wait_ordered_extents(fs_info->tree_root, 0);
 
 	while (1) {

diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index eb923d0..668af53 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c

@@ -548,9 +548,9 @@
 	struct btrfs_root_item *item = &root->root_item;
 	struct timespec ct = CURRENT_TIME;
 
-	spin_lock(&root->root_times_lock);
+	spin_lock(&root->root_item_lock);
 	item->ctransid = cpu_to_le64(trans->transid);
 	item->ctime.sec = cpu_to_le64(ct.tv_sec);
 	item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
-	spin_unlock(&root->root_times_lock);
+	spin_unlock(&root->root_item_lock);
 }

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 27892f6..bdbb94f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 STRATO.  All rights reserved.
+ * Copyright (C) 2011, 2012 STRATO.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -25,6 +25,7 @@
 #include "transaction.h"
 #include "backref.h"
 #include "extent_io.h"
+#include "dev-replace.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
 
@@ -42,10 +43,23 @@
  */
 
 struct scrub_block;
-struct scrub_dev;
+struct scrub_ctx;
 
-#define SCRUB_PAGES_PER_BIO	16	/* 64k per bio */
-#define SCRUB_BIOS_PER_DEV	16	/* 1 MB per device in flight */
+/*
+ * the following three values only influence the performance.
+ * The last one configures the number of parallel and outstanding I/O
+ * operations. The first two values configure an upper limit for the number
+ * of (dynamically allocated) pages that are added to a bio.
+ */
+#define SCRUB_PAGES_PER_RD_BIO	32	/* 128k per bio */
+#define SCRUB_PAGES_PER_WR_BIO	32	/* 128k per bio */
+#define SCRUB_BIOS_PER_SCTX	64	/* 8MB per device in flight */
+
+/*
+ * the following value times PAGE_SIZE needs to be large enough to match the
+ * largest node/leaf/sector size that shall be supported.
+ * Values larger than BTRFS_STRIPE_LEN are not supported.
+ */
 #define SCRUB_MAX_PAGES_PER_BLOCK	16	/* 64k per node/leaf/sector */
 
 struct scrub_page {
@@ -56,6 +70,8 @@
 	u64			generation;
 	u64			logical;
 	u64			physical;
+	u64			physical_for_dev_replace;
+	atomic_t		ref_count;
 	struct {
 		unsigned int	mirror_num:8;
 		unsigned int	have_csum:1;
@@ -66,23 +82,28 @@
 
 struct scrub_bio {
 	int			index;
-	struct scrub_dev	*sdev;
+	struct scrub_ctx	*sctx;
+	struct btrfs_device	*dev;
 	struct bio		*bio;
 	int			err;
 	u64			logical;
 	u64			physical;
-	struct scrub_page	*pagev[SCRUB_PAGES_PER_BIO];
+#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
+	struct scrub_page	*pagev[SCRUB_PAGES_PER_WR_BIO];
+#else
+	struct scrub_page	*pagev[SCRUB_PAGES_PER_RD_BIO];
+#endif
 	int			page_count;
 	int			next_free;
 	struct btrfs_work	work;
 };
 
 struct scrub_block {
-	struct scrub_page	pagev[SCRUB_MAX_PAGES_PER_BLOCK];
+	struct scrub_page	*pagev[SCRUB_MAX_PAGES_PER_BLOCK];
 	int			page_count;
 	atomic_t		outstanding_pages;
 	atomic_t		ref_count; /* free mem on transition to zero */
-	struct scrub_dev	*sdev;
+	struct scrub_ctx	*sctx;
 	struct {
 		unsigned int	header_error:1;
 		unsigned int	checksum_error:1;
@@ -91,23 +112,35 @@
 	};
 };
 
-struct scrub_dev {
-	struct scrub_bio	*bios[SCRUB_BIOS_PER_DEV];
-	struct btrfs_device	*dev;
+struct scrub_wr_ctx {
+	struct scrub_bio *wr_curr_bio;
+	struct btrfs_device *tgtdev;
+	int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
+	atomic_t flush_all_writes;
+	struct mutex wr_lock;
+};
+
+struct scrub_ctx {
+	struct scrub_bio	*bios[SCRUB_BIOS_PER_SCTX];
+	struct btrfs_root	*dev_root;
 	int			first_free;
 	int			curr;
-	atomic_t		in_flight;
-	atomic_t		fixup_cnt;
+	atomic_t		bios_in_flight;
+	atomic_t		workers_pending;
 	spinlock_t		list_lock;
 	wait_queue_head_t	list_wait;
 	u16			csum_size;
 	struct list_head	csum_list;
 	atomic_t		cancel_req;
 	int			readonly;
-	int			pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */
+	int			pages_per_rd_bio;
 	u32			sectorsize;
 	u32			nodesize;
 	u32			leafsize;
+
+	int			is_dev_replace;
+	struct scrub_wr_ctx	wr_ctx;
+
 	/*
 	 * statistics
 	 */
@@ -116,13 +149,23 @@
 };
 
 struct scrub_fixup_nodatasum {
-	struct scrub_dev	*sdev;
+	struct scrub_ctx	*sctx;
+	struct btrfs_device	*dev;
 	u64			logical;
 	struct btrfs_root	*root;
 	struct btrfs_work	work;
 	int			mirror_num;
 };
 
+struct scrub_copy_nocow_ctx {
+	struct scrub_ctx	*sctx;
+	u64			logical;
+	u64			len;
+	int			mirror_num;
+	u64			physical_for_dev_replace;
+	struct btrfs_work	work;
+};
+
 struct scrub_warning {
 	struct btrfs_path	*path;
 	u64			extent_item_size;
@@ -137,15 +180,20 @@
 };
 
 
+static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
+static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
+static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
+static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
-static int scrub_setup_recheck_block(struct scrub_dev *sdev,
-				     struct btrfs_mapping_tree *map_tree,
+static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
+				     struct btrfs_fs_info *fs_info,
+				     struct scrub_block *original_sblock,
 				     u64 length, u64 logical,
-				     struct scrub_block *sblock);
-static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
-			       struct scrub_block *sblock, int is_metadata,
-			       int have_csum, u8 *csum, u64 generation,
-			       u16 csum_size);
+				     struct scrub_block *sblocks_for_recheck);
+static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
+				struct scrub_block *sblock, int is_metadata,
+				int have_csum, u8 *csum, u64 generation,
+				u16 csum_size);
 static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
 					 struct scrub_block *sblock,
 					 int is_metadata, int have_csum,
@@ -158,118 +206,221 @@
 static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 					    struct scrub_block *sblock_good,
 					    int page_num, int force_write);
+static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
+static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
+					   int page_num);
 static int scrub_checksum_data(struct scrub_block *sblock);
 static int scrub_checksum_tree_block(struct scrub_block *sblock);
 static int scrub_checksum_super(struct scrub_block *sblock);
 static void scrub_block_get(struct scrub_block *sblock);
 static void scrub_block_put(struct scrub_block *sblock);
-static int scrub_add_page_to_bio(struct scrub_dev *sdev,
-				 struct scrub_page *spage);
-static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
-		       u64 physical, u64 flags, u64 gen, int mirror_num,
-		       u8 *csum, int force);
+static void scrub_page_get(struct scrub_page *spage);
+static void scrub_page_put(struct scrub_page *spage);
+static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
+				    struct scrub_page *spage);
+static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
+		       u64 physical, struct btrfs_device *dev, u64 flags,
+		       u64 gen, int mirror_num, u8 *csum, int force,
+		       u64 physical_for_dev_replace);
 static void scrub_bio_end_io(struct bio *bio, int err);
 static void scrub_bio_end_io_worker(struct btrfs_work *work);
 static void scrub_block_complete(struct scrub_block *sblock);
+static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
+			       u64 extent_logical, u64 extent_len,
+			       u64 *extent_physical,
+			       struct btrfs_device **extent_dev,
+			       int *extent_mirror_num);
+static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
+			      struct scrub_wr_ctx *wr_ctx,
+			      struct btrfs_fs_info *fs_info,
+			      struct btrfs_device *dev,
+			      int is_dev_replace);
+static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
+static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
+				    struct scrub_page *spage);
+static void scrub_wr_submit(struct scrub_ctx *sctx);
+static void scrub_wr_bio_end_io(struct bio *bio, int err);
+static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
+static int write_page_nocow(struct scrub_ctx *sctx,
+			    u64 physical_for_dev_replace, struct page *page);
+static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
+				      void *ctx);
+static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
+			    int mirror_num, u64 physical_for_dev_replace);
+static void copy_nocow_pages_worker(struct btrfs_work *work);
 
 
-static void scrub_free_csums(struct scrub_dev *sdev)
+static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
 {
-	while (!list_empty(&sdev->csum_list)) {
+	atomic_inc(&sctx->bios_in_flight);
+}
+
+static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
+{
+	atomic_dec(&sctx->bios_in_flight);
+	wake_up(&sctx->list_wait);
+}
+
+/*
+ * used for workers that require transaction commits (i.e., for the
+ * NOCOW case)
+ */
+static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
+{
+	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
+
+	/*
+	 * increment scrubs_running to prevent cancel requests from
+	 * completing as long as a worker is running. we must also
+	 * increment scrubs_paused to prevent deadlocking on pause
+	 * requests used for transactions commits (as the worker uses a
+	 * transaction context). it is safe to regard the worker
+	 * as paused for all matters practical. effectively, we only
+	 * avoid cancellation requests from completing.
+	 */
+	mutex_lock(&fs_info->scrub_lock);
+	atomic_inc(&fs_info->scrubs_running);
+	atomic_inc(&fs_info->scrubs_paused);
+	mutex_unlock(&fs_info->scrub_lock);
+	atomic_inc(&sctx->workers_pending);
+}
+
+/* used for workers that require transaction commits */
+static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
+{
+	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
+
+	/*
+	 * see scrub_pending_trans_workers_inc() why we're pretending
+	 * to be paused in the scrub counters
+	 */
+	mutex_lock(&fs_info->scrub_lock);
+	atomic_dec(&fs_info->scrubs_running);
+	atomic_dec(&fs_info->scrubs_paused);
+	mutex_unlock(&fs_info->scrub_lock);
+	atomic_dec(&sctx->workers_pending);
+	wake_up(&fs_info->scrub_pause_wait);
+	wake_up(&sctx->list_wait);
+}
+
+static void scrub_free_csums(struct scrub_ctx *sctx)
+{
+	while (!list_empty(&sctx->csum_list)) {
 		struct btrfs_ordered_sum *sum;
-		sum = list_first_entry(&sdev->csum_list,
+		sum = list_first_entry(&sctx->csum_list,
 				       struct btrfs_ordered_sum, list);
 		list_del(&sum->list);
 		kfree(sum);
 	}
 }
 
-static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
+static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
 {
 	int i;
 
-	if (!sdev)
+	if (!sctx)
 		return;
 
+	scrub_free_wr_ctx(&sctx->wr_ctx);
+
 	/* this can happen when scrub is cancelled */
-	if (sdev->curr != -1) {
-		struct scrub_bio *sbio = sdev->bios[sdev->curr];
+	if (sctx->curr != -1) {
+		struct scrub_bio *sbio = sctx->bios[sctx->curr];
 
 		for (i = 0; i < sbio->page_count; i++) {
-			BUG_ON(!sbio->pagev[i]);
-			BUG_ON(!sbio->pagev[i]->page);
+			WARN_ON(!sbio->pagev[i]->page);
 			scrub_block_put(sbio->pagev[i]->sblock);
 		}
 		bio_put(sbio->bio);
 	}
 
-	for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
-		struct scrub_bio *sbio = sdev->bios[i];
+	for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
+		struct scrub_bio *sbio = sctx->bios[i];
 
 		if (!sbio)
 			break;
 		kfree(sbio);
 	}
 
-	scrub_free_csums(sdev);
-	kfree(sdev);
+	scrub_free_csums(sctx);
+	kfree(sctx);
 }
 
 static noinline_for_stack
-struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
+struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
 {
-	struct scrub_dev *sdev;
+	struct scrub_ctx *sctx;
 	int		i;
 	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
-	int pages_per_bio;
+	int pages_per_rd_bio;
+	int ret;
 
-	pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
-			      bio_get_nr_vecs(dev->bdev));
-	sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
-	if (!sdev)
+	/*
+	 * the setting of pages_per_rd_bio is correct for scrub but might
+	 * be wrong for the dev_replace code where we might read from
+	 * different devices in the initial huge bios. However, that
+	 * code is able to correctly handle the case when adding a page
+	 * to a bio fails.
+	 */
+	if (dev->bdev)
+		pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
+					 bio_get_nr_vecs(dev->bdev));
+	else
+		pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
+	sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
+	if (!sctx)
 		goto nomem;
-	sdev->dev = dev;
-	sdev->pages_per_bio = pages_per_bio;
-	sdev->curr = -1;
-	for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
+	sctx->is_dev_replace = is_dev_replace;
+	sctx->pages_per_rd_bio = pages_per_rd_bio;
+	sctx->curr = -1;
+	sctx->dev_root = dev->dev_root;
+	for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
 		struct scrub_bio *sbio;
 
 		sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
 		if (!sbio)
 			goto nomem;
-		sdev->bios[i] = sbio;
+		sctx->bios[i] = sbio;
 
 		sbio->index = i;
-		sbio->sdev = sdev;
+		sbio->sctx = sctx;
 		sbio->page_count = 0;
 		sbio->work.func = scrub_bio_end_io_worker;
 
-		if (i != SCRUB_BIOS_PER_DEV-1)
-			sdev->bios[i]->next_free = i + 1;
+		if (i != SCRUB_BIOS_PER_SCTX - 1)
+			sctx->bios[i]->next_free = i + 1;
 		else
-			sdev->bios[i]->next_free = -1;
+			sctx->bios[i]->next_free = -1;
 	}
-	sdev->first_free = 0;
-	sdev->nodesize = dev->dev_root->nodesize;
-	sdev->leafsize = dev->dev_root->leafsize;
-	sdev->sectorsize = dev->dev_root->sectorsize;
-	atomic_set(&sdev->in_flight, 0);
-	atomic_set(&sdev->fixup_cnt, 0);
-	atomic_set(&sdev->cancel_req, 0);
-	sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy);
-	INIT_LIST_HEAD(&sdev->csum_list);
+	sctx->first_free = 0;
+	sctx->nodesize = dev->dev_root->nodesize;
+	sctx->leafsize = dev->dev_root->leafsize;
+	sctx->sectorsize = dev->dev_root->sectorsize;
+	atomic_set(&sctx->bios_in_flight, 0);
+	atomic_set(&sctx->workers_pending, 0);
+	atomic_set(&sctx->cancel_req, 0);
+	sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
+	INIT_LIST_HEAD(&sctx->csum_list);
 
-	spin_lock_init(&sdev->list_lock);
-	spin_lock_init(&sdev->stat_lock);
-	init_waitqueue_head(&sdev->list_wait);
-	return sdev;
+	spin_lock_init(&sctx->list_lock);
+	spin_lock_init(&sctx->stat_lock);
+	init_waitqueue_head(&sctx->list_wait);
+
+	ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
+				 fs_info->dev_replace.tgtdev, is_dev_replace);
+	if (ret) {
+		scrub_free_ctx(sctx);
+		return ERR_PTR(ret);
+	}
+	return sctx;
 
 nomem:
-	scrub_free_dev(sdev);
+	scrub_free_ctx(sctx);
 	return ERR_PTR(-ENOMEM);
 }
 
-static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
+static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
+				     void *warn_ctx)
 {
 	u64 isize;
 	u32 nlink;
@@ -277,7 +428,7 @@
 	int i;
 	struct extent_buffer *eb;
 	struct btrfs_inode_item *inode_item;
-	struct scrub_warning *swarn = ctx;
+	struct scrub_warning *swarn = warn_ctx;
 	struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
 	struct inode_fs_paths *ipath = NULL;
 	struct btrfs_root *local_root;
@@ -345,8 +496,8 @@
 
 static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
 {
-	struct btrfs_device *dev = sblock->sdev->dev;
-	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
+	struct btrfs_device *dev;
+	struct btrfs_fs_info *fs_info;
 	struct btrfs_path *path;
 	struct btrfs_key found_key;
 	struct extent_buffer *eb;
@@ -361,15 +512,18 @@
 	const int bufsize = 4096;
 	int ret;
 
+	WARN_ON(sblock->page_count < 1);
+	dev = sblock->pagev[0]->dev;
+	fs_info = sblock->sctx->dev_root->fs_info;
+
 	path = btrfs_alloc_path();
 
 	swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
 	swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
-	BUG_ON(sblock->page_count < 1);
-	swarn.sector = (sblock->pagev[0].physical) >> 9;
-	swarn.logical = sblock->pagev[0].logical;
+	swarn.sector = (sblock->pagev[0]->physical) >> 9;
+	swarn.logical = sblock->pagev[0]->logical;
 	swarn.errstr = errstr;
-	swarn.dev = dev;
+	swarn.dev = NULL;
 	swarn.msg_bufsize = bufsize;
 	swarn.scratch_bufsize = bufsize;
 
@@ -405,6 +559,7 @@
 		} while (ret != 1);
 	} else {
 		swarn.path = path;
+		swarn.dev = dev;
 		iterate_extent_inodes(fs_info, found_key.objectid,
 					extent_item_pos, 1,
 					scrub_print_warning_inode, &swarn);
@@ -416,11 +571,11 @@
 	kfree(swarn.msg_buf);
 }
 
-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx)
+static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
 {
 	struct page *page = NULL;
 	unsigned long index;
-	struct scrub_fixup_nodatasum *fixup = ctx;
+	struct scrub_fixup_nodatasum *fixup = fixup_ctx;
 	int ret;
 	int corrected = 0;
 	struct btrfs_key key;
@@ -451,7 +606,7 @@
 	}
 
 	if (PageUptodate(page)) {
-		struct btrfs_mapping_tree *map_tree;
+		struct btrfs_fs_info *fs_info;
 		if (PageDirty(page)) {
 			/*
 			 * we need to write the data to the defect sector. the
@@ -472,8 +627,8 @@
 			ret = -EIO;
 			goto out;
 		}
-		map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
-		ret = repair_io_failure(map_tree, offset, PAGE_SIZE,
+		fs_info = BTRFS_I(inode)->root->fs_info;
+		ret = repair_io_failure(fs_info, offset, PAGE_SIZE,
 					fixup->logical, page,
 					fixup->mirror_num);
 		unlock_page(page);
@@ -530,21 +685,21 @@
 {
 	int ret;
 	struct scrub_fixup_nodatasum *fixup;
-	struct scrub_dev *sdev;
+	struct scrub_ctx *sctx;
 	struct btrfs_trans_handle *trans = NULL;
 	struct btrfs_fs_info *fs_info;
 	struct btrfs_path *path;
 	int uncorrectable = 0;
 
 	fixup = container_of(work, struct scrub_fixup_nodatasum, work);
-	sdev = fixup->sdev;
+	sctx = fixup->sctx;
 	fs_info = fixup->root->fs_info;
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		spin_lock(&sdev->stat_lock);
-		++sdev->stat.malloc_errors;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		++sctx->stat.malloc_errors;
+		spin_unlock(&sctx->stat_lock);
 		uncorrectable = 1;
 		goto out;
 	}
@@ -573,35 +728,30 @@
 	}
 	WARN_ON(ret != 1);
 
-	spin_lock(&sdev->stat_lock);
-	++sdev->stat.corrected_errors;
-	spin_unlock(&sdev->stat_lock);
+	spin_lock(&sctx->stat_lock);
+	++sctx->stat.corrected_errors;
+	spin_unlock(&sctx->stat_lock);
 
 out:
 	if (trans && !IS_ERR(trans))
 		btrfs_end_transaction(trans, fixup->root);
 	if (uncorrectable) {
-		spin_lock(&sdev->stat_lock);
-		++sdev->stat.uncorrectable_errors;
-		spin_unlock(&sdev->stat_lock);
-
+		spin_lock(&sctx->stat_lock);
+		++sctx->stat.uncorrectable_errors;
+		spin_unlock(&sctx->stat_lock);
+		btrfs_dev_replace_stats_inc(
+			&sctx->dev_root->fs_info->dev_replace.
+			num_uncorrectable_read_errors);
 		printk_ratelimited_in_rcu(KERN_ERR
 			"btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
 			(unsigned long long)fixup->logical,
-			rcu_str_deref(sdev->dev->name));
+			rcu_str_deref(fixup->dev->name));
 	}
 
 	btrfs_free_path(path);
 	kfree(fixup);
 
-	/* see caller why we're pretending to be paused in the scrub counters */
-	mutex_lock(&fs_info->scrub_lock);
-	atomic_dec(&fs_info->scrubs_running);
-	atomic_dec(&fs_info->scrubs_paused);
-	mutex_unlock(&fs_info->scrub_lock);
-	atomic_dec(&sdev->fixup_cnt);
-	wake_up(&fs_info->scrub_pause_wait);
-	wake_up(&sdev->list_wait);
+	scrub_pending_trans_workers_dec(sctx);
 }
 
 /*
@@ -614,7 +764,8 @@
  */
 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 {
-	struct scrub_dev *sdev = sblock_to_check->sdev;
+	struct scrub_ctx *sctx = sblock_to_check->sctx;
+	struct btrfs_device *dev;
 	struct btrfs_fs_info *fs_info;
 	u64 length;
 	u64 logical;
@@ -633,16 +784,33 @@
 				      DEFAULT_RATELIMIT_BURST);
 
 	BUG_ON(sblock_to_check->page_count < 1);
-	fs_info = sdev->dev->dev_root->fs_info;
+	fs_info = sctx->dev_root->fs_info;
+	if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
+		/*
+		 * if we find an error in a super block, we just report it.
+		 * They will get written with the next transaction commit
+		 * anyway
+		 */
+		spin_lock(&sctx->stat_lock);
+		++sctx->stat.super_errors;
+		spin_unlock(&sctx->stat_lock);
+		return 0;
+	}
 	length = sblock_to_check->page_count * PAGE_SIZE;
-	logical = sblock_to_check->pagev[0].logical;
-	generation = sblock_to_check->pagev[0].generation;
-	BUG_ON(sblock_to_check->pagev[0].mirror_num < 1);
-	failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1;
-	is_metadata = !(sblock_to_check->pagev[0].flags &
+	logical = sblock_to_check->pagev[0]->logical;
+	generation = sblock_to_check->pagev[0]->generation;
+	BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
+	failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
+	is_metadata = !(sblock_to_check->pagev[0]->flags &
 			BTRFS_EXTENT_FLAG_DATA);
-	have_csum = sblock_to_check->pagev[0].have_csum;
-	csum = sblock_to_check->pagev[0].csum;
+	have_csum = sblock_to_check->pagev[0]->have_csum;
+	csum = sblock_to_check->pagev[0]->csum;
+	dev = sblock_to_check->pagev[0]->dev;
+
+	if (sctx->is_dev_replace && !is_metadata && !have_csum) {
+		sblocks_for_recheck = NULL;
+		goto nodatasum_case;
+	}
 
 	/*
 	 * read all mirrors one after the other. This includes to
@@ -677,43 +845,32 @@
 				     sizeof(*sblocks_for_recheck),
 				     GFP_NOFS);
 	if (!sblocks_for_recheck) {
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.malloc_errors++;
-		sdev->stat.read_errors++;
-		sdev->stat.uncorrectable_errors++;
-		spin_unlock(&sdev->stat_lock);
-		btrfs_dev_stat_inc_and_print(sdev->dev,
-					     BTRFS_DEV_STAT_READ_ERRS);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.malloc_errors++;
+		sctx->stat.read_errors++;
+		sctx->stat.uncorrectable_errors++;
+		spin_unlock(&sctx->stat_lock);
+		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
 		goto out;
 	}
 
 	/* setup the context, map the logical blocks and alloc the pages */
-	ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length,
+	ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
 					logical, sblocks_for_recheck);
 	if (ret) {
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.read_errors++;
-		sdev->stat.uncorrectable_errors++;
-		spin_unlock(&sdev->stat_lock);
-		btrfs_dev_stat_inc_and_print(sdev->dev,
-					     BTRFS_DEV_STAT_READ_ERRS);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.read_errors++;
+		sctx->stat.uncorrectable_errors++;
+		spin_unlock(&sctx->stat_lock);
+		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
 		goto out;
 	}
 	BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
 	sblock_bad = sblocks_for_recheck + failed_mirror_index;
 
 	/* build and submit the bios for the failed mirror, check checksums */
-	ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
-				  csum, generation, sdev->csum_size);
-	if (ret) {
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.read_errors++;
-		sdev->stat.uncorrectable_errors++;
-		spin_unlock(&sdev->stat_lock);
-		btrfs_dev_stat_inc_and_print(sdev->dev,
-					     BTRFS_DEV_STAT_READ_ERRS);
-		goto out;
-	}
+	scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
+			    csum, generation, sctx->csum_size);
 
 	if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
 	    sblock_bad->no_io_error_seen) {
@@ -725,50 +882,54 @@
 		 * different bio (usually one of the two latter cases is
 		 * the cause)
 		 */
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.unverified_errors++;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.unverified_errors++;
+		spin_unlock(&sctx->stat_lock);
 
+		if (sctx->is_dev_replace)
+			scrub_write_block_to_dev_replace(sblock_bad);
 		goto out;
 	}
 
 	if (!sblock_bad->no_io_error_seen) {
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.read_errors++;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.read_errors++;
+		spin_unlock(&sctx->stat_lock);
 		if (__ratelimit(&_rs))
 			scrub_print_warning("i/o error", sblock_to_check);
-		btrfs_dev_stat_inc_and_print(sdev->dev,
-					     BTRFS_DEV_STAT_READ_ERRS);
+		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
 	} else if (sblock_bad->checksum_error) {
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.csum_errors++;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.csum_errors++;
+		spin_unlock(&sctx->stat_lock);
 		if (__ratelimit(&_rs))
 			scrub_print_warning("checksum error", sblock_to_check);
-		btrfs_dev_stat_inc_and_print(sdev->dev,
+		btrfs_dev_stat_inc_and_print(dev,
 					     BTRFS_DEV_STAT_CORRUPTION_ERRS);
 	} else if (sblock_bad->header_error) {
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.verify_errors++;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.verify_errors++;
+		spin_unlock(&sctx->stat_lock);
 		if (__ratelimit(&_rs))
 			scrub_print_warning("checksum/header error",
 					    sblock_to_check);
 		if (sblock_bad->generation_error)
-			btrfs_dev_stat_inc_and_print(sdev->dev,
+			btrfs_dev_stat_inc_and_print(dev,
 				BTRFS_DEV_STAT_GENERATION_ERRS);
 		else
-			btrfs_dev_stat_inc_and_print(sdev->dev,
+			btrfs_dev_stat_inc_and_print(dev,
 				BTRFS_DEV_STAT_CORRUPTION_ERRS);
 	}
 
-	if (sdev->readonly)
+	if (sctx->readonly && !sctx->is_dev_replace)
 		goto did_not_correct_error;
 
 	if (!is_metadata && !have_csum) {
 		struct scrub_fixup_nodatasum *fixup_nodatasum;
 
+nodatasum_case:
+		WARN_ON(sctx->is_dev_replace);
+
 		/*
 		 * !is_metadata and !have_csum, this means that the data
 		 * might not be COW'ed, that it might be modified
@@ -779,24 +940,12 @@
 		fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
 		if (!fixup_nodatasum)
 			goto did_not_correct_error;
-		fixup_nodatasum->sdev = sdev;
+		fixup_nodatasum->sctx = sctx;
+		fixup_nodatasum->dev = dev;
 		fixup_nodatasum->logical = logical;
 		fixup_nodatasum->root = fs_info->extent_root;
 		fixup_nodatasum->mirror_num = failed_mirror_index + 1;
-		/*
-		 * increment scrubs_running to prevent cancel requests from
-		 * completing as long as a fixup worker is running. we must also
-		 * increment scrubs_paused to prevent deadlocking on pause
-		 * requests used for transactions commits (as the worker uses a
-		 * transaction context). it is safe to regard the fixup worker
-		 * as paused for all matters practical. effectively, we only
-		 * avoid cancellation requests from completing.
-		 */
-		mutex_lock(&fs_info->scrub_lock);
-		atomic_inc(&fs_info->scrubs_running);
-		atomic_inc(&fs_info->scrubs_paused);
-		mutex_unlock(&fs_info->scrub_lock);
-		atomic_inc(&sdev->fixup_cnt);
+		scrub_pending_trans_workers_inc(sctx);
 		fixup_nodatasum->work.func = scrub_fixup_nodatasum;
 		btrfs_queue_worker(&fs_info->scrub_workers,
 				   &fixup_nodatasum->work);
@@ -805,26 +954,8 @@
 
 	/*
 	 * now build and submit the bios for the other mirrors, check
-	 * checksums
-	 */
-	for (mirror_index = 0;
-	     mirror_index < BTRFS_MAX_MIRRORS &&
-	     sblocks_for_recheck[mirror_index].page_count > 0;
-	     mirror_index++) {
-		if (mirror_index == failed_mirror_index)
-			continue;
-
-		/* build and submit the bios, check checksums */
-		ret = scrub_recheck_block(fs_info,
-					  sblocks_for_recheck + mirror_index,
-					  is_metadata, have_csum, csum,
-					  generation, sdev->csum_size);
-		if (ret)
-			goto did_not_correct_error;
-	}
-
-	/*
-	 * first try to pick the mirror which is completely without I/O
+	 * checksums.
+	 * First try to pick the mirror which is completely without I/O
 	 * errors and also does not have a checksum error.
 	 * If one is found, and if a checksum is present, the full block
 	 * that is known to contain an error is rewritten. Afterwards
@@ -840,24 +971,93 @@
 	     mirror_index < BTRFS_MAX_MIRRORS &&
 	     sblocks_for_recheck[mirror_index].page_count > 0;
 	     mirror_index++) {
-		struct scrub_block *sblock_other = sblocks_for_recheck +
-						   mirror_index;
+		struct scrub_block *sblock_other;
+
+		if (mirror_index == failed_mirror_index)
+			continue;
+		sblock_other = sblocks_for_recheck + mirror_index;
+
+		/* build and submit the bios, check checksums */
+		scrub_recheck_block(fs_info, sblock_other, is_metadata,
+				    have_csum, csum, generation,
+				    sctx->csum_size);
 
 		if (!sblock_other->header_error &&
 		    !sblock_other->checksum_error &&
 		    sblock_other->no_io_error_seen) {
-			int force_write = is_metadata || have_csum;
+			if (sctx->is_dev_replace) {
+				scrub_write_block_to_dev_replace(sblock_other);
+			} else {
+				int force_write = is_metadata || have_csum;
 
-			ret = scrub_repair_block_from_good_copy(sblock_bad,
-								sblock_other,
-								force_write);
+				ret = scrub_repair_block_from_good_copy(
+						sblock_bad, sblock_other,
+						force_write);
+			}
 			if (0 == ret)
 				goto corrected_error;
 		}
 	}
 
 	/*
-	 * in case of I/O errors in the area that is supposed to be
+	 * for dev_replace, pick good pages and write to the target device.
+	 */
+	if (sctx->is_dev_replace) {
+		success = 1;
+		for (page_num = 0; page_num < sblock_bad->page_count;
+		     page_num++) {
+			int sub_success;
+
+			sub_success = 0;
+			for (mirror_index = 0;
+			     mirror_index < BTRFS_MAX_MIRRORS &&
+			     sblocks_for_recheck[mirror_index].page_count > 0;
+			     mirror_index++) {
+				struct scrub_block *sblock_other =
+					sblocks_for_recheck + mirror_index;
+				struct scrub_page *page_other =
+					sblock_other->pagev[page_num];
+
+				if (!page_other->io_error) {
+					ret = scrub_write_page_to_dev_replace(
+							sblock_other, page_num);
+					if (ret == 0) {
+						/* succeeded for this page */
+						sub_success = 1;
+						break;
+					} else {
+						btrfs_dev_replace_stats_inc(
+							&sctx->dev_root->
+							fs_info->dev_replace.
+							num_write_errors);
+					}
+				}
+			}
+
+			if (!sub_success) {
+				/*
+				 * did not find a mirror to fetch the page
+				 * from. scrub_write_page_to_dev_replace()
+				 * handles this case (page->io_error), by
+				 * filling the block with zeros before
+				 * submitting the write request
+				 */
+				success = 0;
+				ret = scrub_write_page_to_dev_replace(
+						sblock_bad, page_num);
+				if (ret)
+					btrfs_dev_replace_stats_inc(
+						&sctx->dev_root->fs_info->
+						dev_replace.num_write_errors);
+			}
+		}
+
+		goto out;
+	}
+
+	/*
+	 * for regular scrub, repair those pages that are errored.
+	 * In case of I/O errors in the area that is supposed to be
 	 * repaired, continue by picking good copies of those pages.
 	 * Select the good pages from mirrors to rewrite bad pages from
 	 * the area to fix. Afterwards verify the checksum of the block
@@ -887,7 +1087,7 @@
 
 	success = 1;
 	for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
-		struct scrub_page *page_bad = sblock_bad->pagev + page_num;
+		struct scrub_page *page_bad = sblock_bad->pagev[page_num];
 
 		if (!page_bad->io_error)
 			continue;
@@ -898,8 +1098,8 @@
 		     mirror_index++) {
 			struct scrub_block *sblock_other = sblocks_for_recheck +
 							   mirror_index;
-			struct scrub_page *page_other = sblock_other->pagev +
-							page_num;
+			struct scrub_page *page_other = sblock_other->pagev[
+							page_num];
 
 			if (!page_other->io_error) {
 				ret = scrub_repair_page_from_good_copy(
@@ -928,10 +1128,10 @@
 			 * is verified, but most likely the data comes out
 			 * of the page cache.
 			 */
-			ret = scrub_recheck_block(fs_info, sblock_bad,
-						  is_metadata, have_csum, csum,
-						  generation, sdev->csum_size);
-			if (!ret && !sblock_bad->header_error &&
+			scrub_recheck_block(fs_info, sblock_bad,
+					    is_metadata, have_csum, csum,
+					    generation, sctx->csum_size);
+			if (!sblock_bad->header_error &&
 			    !sblock_bad->checksum_error &&
 			    sblock_bad->no_io_error_seen)
 				goto corrected_error;
@@ -939,23 +1139,23 @@
 				goto did_not_correct_error;
 		} else {
 corrected_error:
-			spin_lock(&sdev->stat_lock);
-			sdev->stat.corrected_errors++;
-			spin_unlock(&sdev->stat_lock);
+			spin_lock(&sctx->stat_lock);
+			sctx->stat.corrected_errors++;
+			spin_unlock(&sctx->stat_lock);
 			printk_ratelimited_in_rcu(KERN_ERR
 				"btrfs: fixed up error at logical %llu on dev %s\n",
 				(unsigned long long)logical,
-				rcu_str_deref(sdev->dev->name));
+				rcu_str_deref(dev->name));
 		}
 	} else {
 did_not_correct_error:
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.uncorrectable_errors++;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.uncorrectable_errors++;
+		spin_unlock(&sctx->stat_lock);
 		printk_ratelimited_in_rcu(KERN_ERR
 			"btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
 			(unsigned long long)logical,
-			rcu_str_deref(sdev->dev->name));
+			rcu_str_deref(dev->name));
 	}
 
 out:
@@ -966,11 +1166,11 @@
 						     mirror_index;
 			int page_index;
 
-			for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO;
-			     page_index++)
-				if (sblock->pagev[page_index].page)
-					__free_page(
-						sblock->pagev[page_index].page);
+			for (page_index = 0; page_index < sblock->page_count;
+			     page_index++) {
+				sblock->pagev[page_index]->sblock = NULL;
+				scrub_page_put(sblock->pagev[page_index]);
+			}
 		}
 		kfree(sblocks_for_recheck);
 	}
@@ -978,8 +1178,9 @@
 	return 0;
 }
 
-static int scrub_setup_recheck_block(struct scrub_dev *sdev,
-				     struct btrfs_mapping_tree *map_tree,
+static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
+				     struct btrfs_fs_info *fs_info,
+				     struct scrub_block *original_sblock,
 				     u64 length, u64 logical,
 				     struct scrub_block *sblocks_for_recheck)
 {
@@ -988,7 +1189,7 @@
 	int ret;
 
 	/*
-	 * note: the three members sdev, ref_count and outstanding_pages
+	 * note: the two members ref_count and outstanding_pages
 	 * are not used (and not set) in the blocks that are used for
 	 * the recheck procedure
 	 */
@@ -1003,14 +1204,14 @@
 		 * with a length of PAGE_SIZE, each returned stripe
 		 * represents one mirror
 		 */
-		ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length,
-				      &bbio, 0);
+		ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
+				      &mapped_length, &bbio, 0);
 		if (ret || !bbio || mapped_length < sublen) {
 			kfree(bbio);
 			return -EIO;
 		}
 
-		BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
+		BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
 		for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
 		     mirror_index++) {
 			struct scrub_block *sblock;
@@ -1020,21 +1221,31 @@
 				continue;
 
 			sblock = sblocks_for_recheck + mirror_index;
-			page = sblock->pagev + page_index;
-			page->logical = logical;
-			page->physical = bbio->stripes[mirror_index].physical;
-			/* for missing devices, dev->bdev is NULL */
-			page->dev = bbio->stripes[mirror_index].dev;
-			page->mirror_num = mirror_index + 1;
-			page->page = alloc_page(GFP_NOFS);
-			if (!page->page) {
-				spin_lock(&sdev->stat_lock);
-				sdev->stat.malloc_errors++;
-				spin_unlock(&sdev->stat_lock);
+			sblock->sctx = sctx;
+			page = kzalloc(sizeof(*page), GFP_NOFS);
+			if (!page) {
+leave_nomem:
+				spin_lock(&sctx->stat_lock);
+				sctx->stat.malloc_errors++;
+				spin_unlock(&sctx->stat_lock);
 				kfree(bbio);
 				return -ENOMEM;
 			}
+			scrub_page_get(page);
+			sblock->pagev[page_index] = page;
+			page->logical = logical;
+			page->physical = bbio->stripes[mirror_index].physical;
+			BUG_ON(page_index >= original_sblock->page_count);
+			page->physical_for_dev_replace =
+				original_sblock->pagev[page_index]->
+				physical_for_dev_replace;
+			/* for missing devices, dev->bdev is NULL */
+			page->dev = bbio->stripes[mirror_index].dev;
+			page->mirror_num = mirror_index + 1;
 			sblock->page_count++;
+			page->page = alloc_page(GFP_NOFS);
+			if (!page->page)
+				goto leave_nomem;
 		}
 		kfree(bbio);
 		length -= sublen;
@@ -1052,10 +1263,10 @@
  * to take those pages that are not errored from all the mirrors so that
  * the pages that are errored in the just handled mirror can be repaired.
  */
-static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
-			       struct scrub_block *sblock, int is_metadata,
-			       int have_csum, u8 *csum, u64 generation,
-			       u16 csum_size)
+static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
+				struct scrub_block *sblock, int is_metadata,
+				int have_csum, u8 *csum, u64 generation,
+				u16 csum_size)
 {
 	int page_num;
 
@@ -1065,8 +1276,7 @@
 
 	for (page_num = 0; page_num < sblock->page_count; page_num++) {
 		struct bio *bio;
-		int ret;
-		struct scrub_page *page = sblock->pagev + page_num;
+		struct scrub_page *page = sblock->pagev[page_num];
 		DECLARE_COMPLETION_ONSTACK(complete);
 
 		if (page->dev->bdev == NULL) {
@@ -1075,20 +1285,19 @@
 			continue;
 		}
 
-		BUG_ON(!page->page);
+		WARN_ON(!page->page);
 		bio = bio_alloc(GFP_NOFS, 1);
-		if (!bio)
-			return -EIO;
+		if (!bio) {
+			page->io_error = 1;
+			sblock->no_io_error_seen = 0;
+			continue;
+		}
 		bio->bi_bdev = page->dev->bdev;
 		bio->bi_sector = page->physical >> 9;
 		bio->bi_end_io = scrub_complete_bio_end_io;
 		bio->bi_private = &complete;
 
-		ret = bio_add_page(bio, page->page, PAGE_SIZE, 0);
-		if (PAGE_SIZE != ret) {
-			bio_put(bio);
-			return -EIO;
-		}
+		bio_add_page(bio, page->page, PAGE_SIZE, 0);
 		btrfsic_submit_bio(READ, bio);
 
 		/* this will also unplug the queue */
@@ -1105,7 +1314,7 @@
 					     have_csum, csum, generation,
 					     csum_size);
 
-	return 0;
+	return;
 }
 
 static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
@@ -1120,14 +1329,14 @@
 	struct btrfs_root *root = fs_info->extent_root;
 	void *mapped_buffer;
 
-	BUG_ON(!sblock->pagev[0].page);
+	WARN_ON(!sblock->pagev[0]->page);
 	if (is_metadata) {
 		struct btrfs_header *h;
 
-		mapped_buffer = kmap_atomic(sblock->pagev[0].page);
+		mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
 		h = (struct btrfs_header *)mapped_buffer;
 
-		if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
+		if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
 		    memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
 		    memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
 			   BTRFS_UUID_SIZE)) {
@@ -1141,7 +1350,7 @@
 		if (!have_csum)
 			return;
 
-		mapped_buffer = kmap_atomic(sblock->pagev[0].page);
+		mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
 	}
 
 	for (page_num = 0;;) {
@@ -1157,9 +1366,9 @@
 		page_num++;
 		if (page_num >= sblock->page_count)
 			break;
-		BUG_ON(!sblock->pagev[page_num].page);
+		WARN_ON(!sblock->pagev[page_num]->page);
 
-		mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
+		mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
 	}
 
 	btrfs_csum_final(crc, calculated_csum);
@@ -1197,17 +1406,23 @@
 					    struct scrub_block *sblock_good,
 					    int page_num, int force_write)
 {
-	struct scrub_page *page_bad = sblock_bad->pagev + page_num;
-	struct scrub_page *page_good = sblock_good->pagev + page_num;
+	struct scrub_page *page_bad = sblock_bad->pagev[page_num];
+	struct scrub_page *page_good = sblock_good->pagev[page_num];
 
-	BUG_ON(sblock_bad->pagev[page_num].page == NULL);
-	BUG_ON(sblock_good->pagev[page_num].page == NULL);
+	BUG_ON(page_bad->page == NULL);
+	BUG_ON(page_good->page == NULL);
 	if (force_write || sblock_bad->header_error ||
 	    sblock_bad->checksum_error || page_bad->io_error) {
 		struct bio *bio;
 		int ret;
 		DECLARE_COMPLETION_ONSTACK(complete);
 
+		if (!page_bad->dev->bdev) {
+			printk_ratelimited(KERN_WARNING
+				"btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
+			return -EIO;
+		}
+
 		bio = bio_alloc(GFP_NOFS, 1);
 		if (!bio)
 			return -EIO;
@@ -1228,6 +1443,9 @@
 		if (!bio_flagged(bio, BIO_UPTODATE)) {
 			btrfs_dev_stat_inc_and_print(page_bad->dev,
 				BTRFS_DEV_STAT_WRITE_ERRS);
+			btrfs_dev_replace_stats_inc(
+				&sblock_bad->sctx->dev_root->fs_info->
+				dev_replace.num_write_errors);
 			bio_put(bio);
 			return -EIO;
 		}
@@ -1237,13 +1455,174 @@
 	return 0;
 }
 
-static void scrub_checksum(struct scrub_block *sblock)
+static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
+{
+	int page_num;
+
+	for (page_num = 0; page_num < sblock->page_count; page_num++) {
+		int ret;
+
+		ret = scrub_write_page_to_dev_replace(sblock, page_num);
+		if (ret)
+			btrfs_dev_replace_stats_inc(
+				&sblock->sctx->dev_root->fs_info->dev_replace.
+				num_write_errors);
+	}
+}
+
+static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
+					   int page_num)
+{
+	struct scrub_page *spage = sblock->pagev[page_num];
+
+	BUG_ON(spage->page == NULL);
+	if (spage->io_error) {
+		void *mapped_buffer = kmap_atomic(spage->page);
+
+		memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
+		flush_dcache_page(spage->page);
+		kunmap_atomic(mapped_buffer);
+	}
+	return scrub_add_page_to_wr_bio(sblock->sctx, spage);
+}
+
+static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
+				    struct scrub_page *spage)
+{
+	struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
+	struct scrub_bio *sbio;
+	int ret;
+
+	mutex_lock(&wr_ctx->wr_lock);
+again:
+	if (!wr_ctx->wr_curr_bio) {
+		wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
+					      GFP_NOFS);
+		if (!wr_ctx->wr_curr_bio) {
+			mutex_unlock(&wr_ctx->wr_lock);
+			return -ENOMEM;
+		}
+		wr_ctx->wr_curr_bio->sctx = sctx;
+		wr_ctx->wr_curr_bio->page_count = 0;
+	}
+	sbio = wr_ctx->wr_curr_bio;
+	if (sbio->page_count == 0) {
+		struct bio *bio;
+
+		sbio->physical = spage->physical_for_dev_replace;
+		sbio->logical = spage->logical;
+		sbio->dev = wr_ctx->tgtdev;
+		bio = sbio->bio;
+		if (!bio) {
+			bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+			if (!bio) {
+				mutex_unlock(&wr_ctx->wr_lock);
+				return -ENOMEM;
+			}
+			sbio->bio = bio;
+		}
+
+		bio->bi_private = sbio;
+		bio->bi_end_io = scrub_wr_bio_end_io;
+		bio->bi_bdev = sbio->dev->bdev;
+		bio->bi_sector = sbio->physical >> 9;
+		sbio->err = 0;
+	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
+		   spage->physical_for_dev_replace ||
+		   sbio->logical + sbio->page_count * PAGE_SIZE !=
+		   spage->logical) {
+		scrub_wr_submit(sctx);
+		goto again;
+	}
+
+	ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
+	if (ret != PAGE_SIZE) {
+		if (sbio->page_count < 1) {
+			bio_put(sbio->bio);
+			sbio->bio = NULL;
+			mutex_unlock(&wr_ctx->wr_lock);
+			return -EIO;
+		}
+		scrub_wr_submit(sctx);
+		goto again;
+	}
+
+	sbio->pagev[sbio->page_count] = spage;
+	scrub_page_get(spage);
+	sbio->page_count++;
+	if (sbio->page_count == wr_ctx->pages_per_wr_bio)
+		scrub_wr_submit(sctx);
+	mutex_unlock(&wr_ctx->wr_lock);
+
+	return 0;
+}
+
+static void scrub_wr_submit(struct scrub_ctx *sctx)
+{
+	struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
+	struct scrub_bio *sbio;
+
+	if (!wr_ctx->wr_curr_bio)
+		return;
+
+	sbio = wr_ctx->wr_curr_bio;
+	wr_ctx->wr_curr_bio = NULL;
+	WARN_ON(!sbio->bio->bi_bdev);
+	scrub_pending_bio_inc(sctx);
+	/* process all writes in a single worker thread. Then the block layer
+	 * orders the requests before sending them to the driver which
+	 * doubled the write performance on spinning disks when measured
+	 * with Linux 3.5 */
+	btrfsic_submit_bio(WRITE, sbio->bio);
+}
+
+static void scrub_wr_bio_end_io(struct bio *bio, int err)
+{
+	struct scrub_bio *sbio = bio->bi_private;
+	struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
+
+	sbio->err = err;
+	sbio->bio = bio;
+
+	sbio->work.func = scrub_wr_bio_end_io_worker;
+	btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work);
+}
+
+static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
+{
+	struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
+	struct scrub_ctx *sctx = sbio->sctx;
+	int i;
+
+	WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
+	if (sbio->err) {
+		struct btrfs_dev_replace *dev_replace =
+			&sbio->sctx->dev_root->fs_info->dev_replace;
+
+		for (i = 0; i < sbio->page_count; i++) {
+			struct scrub_page *spage = sbio->pagev[i];
+
+			spage->io_error = 1;
+			btrfs_dev_replace_stats_inc(&dev_replace->
+						    num_write_errors);
+		}
+	}
+
+	for (i = 0; i < sbio->page_count; i++)
+		scrub_page_put(sbio->pagev[i]);
+
+	bio_put(sbio->bio);
+	kfree(sbio);
+	scrub_pending_bio_dec(sctx);
+}
+
+static int scrub_checksum(struct scrub_block *sblock)
 {
 	u64 flags;
 	int ret;
 
-	BUG_ON(sblock->page_count < 1);
-	flags = sblock->pagev[0].flags;
+	WARN_ON(sblock->page_count < 1);
+	flags = sblock->pagev[0]->flags;
 	ret = 0;
 	if (flags & BTRFS_EXTENT_FLAG_DATA)
 		ret = scrub_checksum_data(sblock);
@@ -1255,30 +1634,32 @@
 		WARN_ON(1);
 	if (ret)
 		scrub_handle_errored_block(sblock);
+
+	return ret;
 }
 
 static int scrub_checksum_data(struct scrub_block *sblock)
 {
-	struct scrub_dev *sdev = sblock->sdev;
+	struct scrub_ctx *sctx = sblock->sctx;
 	u8 csum[BTRFS_CSUM_SIZE];
 	u8 *on_disk_csum;
 	struct page *page;
 	void *buffer;
 	u32 crc = ~(u32)0;
 	int fail = 0;
-	struct btrfs_root *root = sdev->dev->dev_root;
+	struct btrfs_root *root = sctx->dev_root;
 	u64 len;
 	int index;
 
 	BUG_ON(sblock->page_count < 1);
-	if (!sblock->pagev[0].have_csum)
+	if (!sblock->pagev[0]->have_csum)
 		return 0;
 
-	on_disk_csum = sblock->pagev[0].csum;
-	page = sblock->pagev[0].page;
+	on_disk_csum = sblock->pagev[0]->csum;
+	page = sblock->pagev[0]->page;
 	buffer = kmap_atomic(page);
 
-	len = sdev->sectorsize;
+	len = sctx->sectorsize;
 	index = 0;
 	for (;;) {
 		u64 l = min_t(u64, len, PAGE_SIZE);
@@ -1290,13 +1671,13 @@
 			break;
 		index++;
 		BUG_ON(index >= sblock->page_count);
-		BUG_ON(!sblock->pagev[index].page);
-		page = sblock->pagev[index].page;
+		BUG_ON(!sblock->pagev[index]->page);
+		page = sblock->pagev[index]->page;
 		buffer = kmap_atomic(page);
 	}
 
 	btrfs_csum_final(crc, csum);
-	if (memcmp(csum, on_disk_csum, sdev->csum_size))
+	if (memcmp(csum, on_disk_csum, sctx->csum_size))
 		fail = 1;
 
 	return fail;
@@ -1304,9 +1685,9 @@
 
 static int scrub_checksum_tree_block(struct scrub_block *sblock)
 {
-	struct scrub_dev *sdev = sblock->sdev;
+	struct scrub_ctx *sctx = sblock->sctx;
 	struct btrfs_header *h;
-	struct btrfs_root *root = sdev->dev->dev_root;
+	struct btrfs_root *root = sctx->dev_root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u8 calculated_csum[BTRFS_CSUM_SIZE];
 	u8 on_disk_csum[BTRFS_CSUM_SIZE];
@@ -1321,10 +1702,10 @@
 	int index;
 
 	BUG_ON(sblock->page_count < 1);
-	page = sblock->pagev[0].page;
+	page = sblock->pagev[0]->page;
 	mapped_buffer = kmap_atomic(page);
 	h = (struct btrfs_header *)mapped_buffer;
-	memcpy(on_disk_csum, h->csum, sdev->csum_size);
+	memcpy(on_disk_csum, h->csum, sctx->csum_size);
 
 	/*
 	 * we don't use the getter functions here, as we
@@ -1332,10 +1713,10 @@
 	 * b) the page is already kmapped
 	 */
 
-	if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr))
+	if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
 		++fail;
 
-	if (sblock->pagev[0].generation != le64_to_cpu(h->generation))
+	if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
 		++fail;
 
 	if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1345,8 +1726,8 @@
 		   BTRFS_UUID_SIZE))
 		++fail;
 
-	BUG_ON(sdev->nodesize != sdev->leafsize);
-	len = sdev->nodesize - BTRFS_CSUM_SIZE;
+	WARN_ON(sctx->nodesize != sctx->leafsize);
+	len = sctx->nodesize - BTRFS_CSUM_SIZE;
 	mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
 	p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
 	index = 0;
@@ -1360,15 +1741,15 @@
 			break;
 		index++;
 		BUG_ON(index >= sblock->page_count);
-		BUG_ON(!sblock->pagev[index].page);
-		page = sblock->pagev[index].page;
+		BUG_ON(!sblock->pagev[index]->page);
+		page = sblock->pagev[index]->page;
 		mapped_buffer = kmap_atomic(page);
 		mapped_size = PAGE_SIZE;
 		p = mapped_buffer;
 	}
 
 	btrfs_csum_final(crc, calculated_csum);
-	if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
+	if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
 		++crc_fail;
 
 	return fail || crc_fail;
@@ -1377,8 +1758,8 @@
 static int scrub_checksum_super(struct scrub_block *sblock)
 {
 	struct btrfs_super_block *s;
-	struct scrub_dev *sdev = sblock->sdev;
-	struct btrfs_root *root = sdev->dev->dev_root;
+	struct scrub_ctx *sctx = sblock->sctx;
+	struct btrfs_root *root = sctx->dev_root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u8 calculated_csum[BTRFS_CSUM_SIZE];
 	u8 on_disk_csum[BTRFS_CSUM_SIZE];
@@ -1393,15 +1774,15 @@
 	int index;
 
 	BUG_ON(sblock->page_count < 1);
-	page = sblock->pagev[0].page;
+	page = sblock->pagev[0]->page;
 	mapped_buffer = kmap_atomic(page);
 	s = (struct btrfs_super_block *)mapped_buffer;
-	memcpy(on_disk_csum, s->csum, sdev->csum_size);
+	memcpy(on_disk_csum, s->csum, sctx->csum_size);
 
-	if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
+	if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
 		++fail_cor;
 
-	if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
+	if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
 		++fail_gen;
 
 	if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1421,15 +1802,15 @@
 			break;
 		index++;
 		BUG_ON(index >= sblock->page_count);
-		BUG_ON(!sblock->pagev[index].page);
-		page = sblock->pagev[index].page;
+		BUG_ON(!sblock->pagev[index]->page);
+		page = sblock->pagev[index]->page;
 		mapped_buffer = kmap_atomic(page);
 		mapped_size = PAGE_SIZE;
 		p = mapped_buffer;
 	}
 
 	btrfs_csum_final(crc, calculated_csum);
-	if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
+	if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
 		++fail_cor;
 
 	if (fail_cor + fail_gen) {
@@ -1438,14 +1819,14 @@
 		 * They will get written with the next transaction commit
 		 * anyway
 		 */
-		spin_lock(&sdev->stat_lock);
-		++sdev->stat.super_errors;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		++sctx->stat.super_errors;
+		spin_unlock(&sctx->stat_lock);
 		if (fail_cor)
-			btrfs_dev_stat_inc_and_print(sdev->dev,
+			btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
 				BTRFS_DEV_STAT_CORRUPTION_ERRS);
 		else
-			btrfs_dev_stat_inc_and_print(sdev->dev,
+			btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
 				BTRFS_DEV_STAT_GENERATION_ERRS);
 	}
 
@@ -1463,28 +1844,54 @@
 		int i;
 
 		for (i = 0; i < sblock->page_count; i++)
-			if (sblock->pagev[i].page)
-				__free_page(sblock->pagev[i].page);
+			scrub_page_put(sblock->pagev[i]);
 		kfree(sblock);
 	}
 }
 
-static void scrub_submit(struct scrub_dev *sdev)
+static void scrub_page_get(struct scrub_page *spage)
+{
+	atomic_inc(&spage->ref_count);
+}
+
+static void scrub_page_put(struct scrub_page *spage)
+{
+	if (atomic_dec_and_test(&spage->ref_count)) {
+		if (spage->page)
+			__free_page(spage->page);
+		kfree(spage);
+	}
+}
+
+static void scrub_submit(struct scrub_ctx *sctx)
 {
 	struct scrub_bio *sbio;
 
-	if (sdev->curr == -1)
+	if (sctx->curr == -1)
 		return;
 
-	sbio = sdev->bios[sdev->curr];
-	sdev->curr = -1;
-	atomic_inc(&sdev->in_flight);
+	sbio = sctx->bios[sctx->curr];
+	sctx->curr = -1;
+	scrub_pending_bio_inc(sctx);
 
-	btrfsic_submit_bio(READ, sbio->bio);
+	if (!sbio->bio->bi_bdev) {
+		/*
+		 * this case should not happen. If btrfs_map_block() is
+		 * wrong, it could happen for dev-replace operations on
+		 * missing devices when no mirrors are available, but in
+		 * this case it should already fail the mount.
+		 * This case is handled correctly (but _very_ slowly).
+		 */
+		printk_ratelimited(KERN_WARNING
+			"btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
+		bio_endio(sbio->bio, -EIO);
+	} else {
+		btrfsic_submit_bio(READ, sbio->bio);
+	}
 }
 
-static int scrub_add_page_to_bio(struct scrub_dev *sdev,
-				 struct scrub_page *spage)
+static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
+				    struct scrub_page *spage)
 {
 	struct scrub_block *sblock = spage->sblock;
 	struct scrub_bio *sbio;
@@ -1494,28 +1901,29 @@
 	/*
 	 * grab a fresh bio or wait for one to become available
 	 */
-	while (sdev->curr == -1) {
-		spin_lock(&sdev->list_lock);
-		sdev->curr = sdev->first_free;
-		if (sdev->curr != -1) {
-			sdev->first_free = sdev->bios[sdev->curr]->next_free;
-			sdev->bios[sdev->curr]->next_free = -1;
-			sdev->bios[sdev->curr]->page_count = 0;
-			spin_unlock(&sdev->list_lock);
+	while (sctx->curr == -1) {
+		spin_lock(&sctx->list_lock);
+		sctx->curr = sctx->first_free;
+		if (sctx->curr != -1) {
+			sctx->first_free = sctx->bios[sctx->curr]->next_free;
+			sctx->bios[sctx->curr]->next_free = -1;
+			sctx->bios[sctx->curr]->page_count = 0;
+			spin_unlock(&sctx->list_lock);
 		} else {
-			spin_unlock(&sdev->list_lock);
-			wait_event(sdev->list_wait, sdev->first_free != -1);
+			spin_unlock(&sctx->list_lock);
+			wait_event(sctx->list_wait, sctx->first_free != -1);
 		}
 	}
-	sbio = sdev->bios[sdev->curr];
+	sbio = sctx->bios[sctx->curr];
 	if (sbio->page_count == 0) {
 		struct bio *bio;
 
 		sbio->physical = spage->physical;
 		sbio->logical = spage->logical;
+		sbio->dev = spage->dev;
 		bio = sbio->bio;
 		if (!bio) {
-			bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio);
+			bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
 			if (!bio)
 				return -ENOMEM;
 			sbio->bio = bio;
@@ -1523,14 +1931,15 @@
 
 		bio->bi_private = sbio;
 		bio->bi_end_io = scrub_bio_end_io;
-		bio->bi_bdev = sdev->dev->bdev;
-		bio->bi_sector = spage->physical >> 9;
+		bio->bi_bdev = sbio->dev->bdev;
+		bio->bi_sector = sbio->physical >> 9;
 		sbio->err = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
-		   spage->logical) {
-		scrub_submit(sdev);
+		   spage->logical ||
+		   sbio->dev != spage->dev) {
+		scrub_submit(sctx);
 		goto again;
 	}
 
@@ -1542,81 +1951,87 @@
 			sbio->bio = NULL;
 			return -EIO;
 		}
-		scrub_submit(sdev);
+		scrub_submit(sctx);
 		goto again;
 	}
 
-	scrub_block_get(sblock); /* one for the added page */
+	scrub_block_get(sblock); /* one for the page added to the bio */
 	atomic_inc(&sblock->outstanding_pages);
 	sbio->page_count++;
-	if (sbio->page_count == sdev->pages_per_bio)
-		scrub_submit(sdev);
+	if (sbio->page_count == sctx->pages_per_rd_bio)
+		scrub_submit(sctx);
 
 	return 0;
 }
 
-static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
-		       u64 physical, u64 flags, u64 gen, int mirror_num,
-		       u8 *csum, int force)
+static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
+		       u64 physical, struct btrfs_device *dev, u64 flags,
+		       u64 gen, int mirror_num, u8 *csum, int force,
+		       u64 physical_for_dev_replace)
 {
 	struct scrub_block *sblock;
 	int index;
 
 	sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
 	if (!sblock) {
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.malloc_errors++;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.malloc_errors++;
+		spin_unlock(&sctx->stat_lock);
 		return -ENOMEM;
 	}
 
-	/* one ref inside this function, plus one for each page later on */
+	/* one ref inside this function, plus one for each page added to
+	 * a bio later on */
 	atomic_set(&sblock->ref_count, 1);
-	sblock->sdev = sdev;
+	sblock->sctx = sctx;
 	sblock->no_io_error_seen = 1;
 
 	for (index = 0; len > 0; index++) {
-		struct scrub_page *spage = sblock->pagev + index;
+		struct scrub_page *spage;
 		u64 l = min_t(u64, len, PAGE_SIZE);
 
-		BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
-		spage->page = alloc_page(GFP_NOFS);
-		if (!spage->page) {
-			spin_lock(&sdev->stat_lock);
-			sdev->stat.malloc_errors++;
-			spin_unlock(&sdev->stat_lock);
-			while (index > 0) {
-				index--;
-				__free_page(sblock->pagev[index].page);
-			}
-			kfree(sblock);
+		spage = kzalloc(sizeof(*spage), GFP_NOFS);
+		if (!spage) {
+leave_nomem:
+			spin_lock(&sctx->stat_lock);
+			sctx->stat.malloc_errors++;
+			spin_unlock(&sctx->stat_lock);
+			scrub_block_put(sblock);
 			return -ENOMEM;
 		}
+		BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
+		scrub_page_get(spage);
+		sblock->pagev[index] = spage;
 		spage->sblock = sblock;
-		spage->dev = sdev->dev;
+		spage->dev = dev;
 		spage->flags = flags;
 		spage->generation = gen;
 		spage->logical = logical;
 		spage->physical = physical;
+		spage->physical_for_dev_replace = physical_for_dev_replace;
 		spage->mirror_num = mirror_num;
 		if (csum) {
 			spage->have_csum = 1;
-			memcpy(spage->csum, csum, sdev->csum_size);
+			memcpy(spage->csum, csum, sctx->csum_size);
 		} else {
 			spage->have_csum = 0;
 		}
 		sblock->page_count++;
+		spage->page = alloc_page(GFP_NOFS);
+		if (!spage->page)
+			goto leave_nomem;
 		len -= l;
 		logical += l;
 		physical += l;
+		physical_for_dev_replace += l;
 	}
 
-	BUG_ON(sblock->page_count == 0);
+	WARN_ON(sblock->page_count == 0);
 	for (index = 0; index < sblock->page_count; index++) {
-		struct scrub_page *spage = sblock->pagev + index;
+		struct scrub_page *spage = sblock->pagev[index];
 		int ret;
 
-		ret = scrub_add_page_to_bio(sdev, spage);
+		ret = scrub_add_page_to_rd_bio(sctx, spage);
 		if (ret) {
 			scrub_block_put(sblock);
 			return ret;
@@ -1624,7 +2039,7 @@
 	}
 
 	if (force)
-		scrub_submit(sdev);
+		scrub_submit(sctx);
 
 	/* last one frees, either here or in bio completion for last page */
 	scrub_block_put(sblock);
@@ -1634,8 +2049,7 @@
 static void scrub_bio_end_io(struct bio *bio, int err)
 {
 	struct scrub_bio *sbio = bio->bi_private;
-	struct scrub_dev *sdev = sbio->sdev;
-	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
+	struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
 
 	sbio->err = err;
 	sbio->bio = bio;
@@ -1646,10 +2060,10 @@
 static void scrub_bio_end_io_worker(struct btrfs_work *work)
 {
 	struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
-	struct scrub_dev *sdev = sbio->sdev;
+	struct scrub_ctx *sctx = sbio->sctx;
 	int i;
 
-	BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO);
+	BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
 	if (sbio->err) {
 		for (i = 0; i < sbio->page_count; i++) {
 			struct scrub_page *spage = sbio->pagev[i];
@@ -1671,23 +2085,37 @@
 
 	bio_put(sbio->bio);
 	sbio->bio = NULL;
-	spin_lock(&sdev->list_lock);
-	sbio->next_free = sdev->first_free;
-	sdev->first_free = sbio->index;
-	spin_unlock(&sdev->list_lock);
-	atomic_dec(&sdev->in_flight);
-	wake_up(&sdev->list_wait);
+	spin_lock(&sctx->list_lock);
+	sbio->next_free = sctx->first_free;
+	sctx->first_free = sbio->index;
+	spin_unlock(&sctx->list_lock);
+
+	if (sctx->is_dev_replace &&
+	    atomic_read(&sctx->wr_ctx.flush_all_writes)) {
+		mutex_lock(&sctx->wr_ctx.wr_lock);
+		scrub_wr_submit(sctx);
+		mutex_unlock(&sctx->wr_ctx.wr_lock);
+	}
+
+	scrub_pending_bio_dec(sctx);
 }
 
 static void scrub_block_complete(struct scrub_block *sblock)
 {
-	if (!sblock->no_io_error_seen)
+	if (!sblock->no_io_error_seen) {
 		scrub_handle_errored_block(sblock);
-	else
-		scrub_checksum(sblock);
+	} else {
+		/*
+		 * if has checksum error, write via repair mechanism in
+		 * dev replace case, otherwise write here in dev replace
+		 * case.
+		 */
+		if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
+			scrub_write_block_to_dev_replace(sblock);
+	}
 }
 
-static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
+static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
 			   u8 *csum)
 {
 	struct btrfs_ordered_sum *sum = NULL;
@@ -1695,15 +2123,15 @@
 	unsigned long i;
 	unsigned long num_sectors;
 
-	while (!list_empty(&sdev->csum_list)) {
-		sum = list_first_entry(&sdev->csum_list,
+	while (!list_empty(&sctx->csum_list)) {
+		sum = list_first_entry(&sctx->csum_list,
 				       struct btrfs_ordered_sum, list);
 		if (sum->bytenr > logical)
 			return 0;
 		if (sum->bytenr + sum->len > logical)
 			break;
 
-		++sdev->stat.csum_discards;
+		++sctx->stat.csum_discards;
 		list_del(&sum->list);
 		kfree(sum);
 		sum = NULL;
@@ -1711,10 +2139,10 @@
 	if (!sum)
 		return 0;
 
-	num_sectors = sum->len / sdev->sectorsize;
+	num_sectors = sum->len / sctx->sectorsize;
 	for (i = 0; i < num_sectors; ++i) {
 		if (sum->sums[i].bytenr == logical) {
-			memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
+			memcpy(csum, &sum->sums[i].sum, sctx->csum_size);
 			ret = 1;
 			break;
 		}
@@ -1727,29 +2155,30 @@
 }
 
 /* scrub extent tries to collect up to 64 kB for each bio */
-static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
-			u64 physical, u64 flags, u64 gen, int mirror_num)
+static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
+			u64 physical, struct btrfs_device *dev, u64 flags,
+			u64 gen, int mirror_num, u64 physical_for_dev_replace)
 {
 	int ret;
 	u8 csum[BTRFS_CSUM_SIZE];
 	u32 blocksize;
 
 	if (flags & BTRFS_EXTENT_FLAG_DATA) {
-		blocksize = sdev->sectorsize;
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.data_extents_scrubbed++;
-		sdev->stat.data_bytes_scrubbed += len;
-		spin_unlock(&sdev->stat_lock);
+		blocksize = sctx->sectorsize;
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.data_extents_scrubbed++;
+		sctx->stat.data_bytes_scrubbed += len;
+		spin_unlock(&sctx->stat_lock);
 	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
-		BUG_ON(sdev->nodesize != sdev->leafsize);
-		blocksize = sdev->nodesize;
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.tree_extents_scrubbed++;
-		sdev->stat.tree_bytes_scrubbed += len;
-		spin_unlock(&sdev->stat_lock);
+		WARN_ON(sctx->nodesize != sctx->leafsize);
+		blocksize = sctx->nodesize;
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.tree_extents_scrubbed++;
+		sctx->stat.tree_bytes_scrubbed += len;
+		spin_unlock(&sctx->stat_lock);
 	} else {
-		blocksize = sdev->sectorsize;
-		BUG_ON(1);
+		blocksize = sctx->sectorsize;
+		WARN_ON(1);
 	}
 
 	while (len) {
@@ -1758,26 +2187,38 @@
 
 		if (flags & BTRFS_EXTENT_FLAG_DATA) {
 			/* push csums to sbio */
-			have_csum = scrub_find_csum(sdev, logical, l, csum);
+			have_csum = scrub_find_csum(sctx, logical, l, csum);
 			if (have_csum == 0)
-				++sdev->stat.no_csum;
+				++sctx->stat.no_csum;
+			if (sctx->is_dev_replace && !have_csum) {
+				ret = copy_nocow_pages(sctx, logical, l,
+						       mirror_num,
+						      physical_for_dev_replace);
+				goto behind_scrub_pages;
+			}
 		}
-		ret = scrub_pages(sdev, logical, l, physical, flags, gen,
-				  mirror_num, have_csum ? csum : NULL, 0);
+		ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
+				  mirror_num, have_csum ? csum : NULL, 0,
+				  physical_for_dev_replace);
+behind_scrub_pages:
 		if (ret)
 			return ret;
 		len -= l;
 		logical += l;
 		physical += l;
+		physical_for_dev_replace += l;
 	}
 	return 0;
 }
 
-static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
-	struct map_lookup *map, int num, u64 base, u64 length)
+static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
+					   struct map_lookup *map,
+					   struct btrfs_device *scrub_dev,
+					   int num, u64 base, u64 length,
+					   int is_dev_replace)
 {
 	struct btrfs_path *path;
-	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
+	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
 	struct btrfs_root *root = fs_info->extent_root;
 	struct btrfs_root *csum_root = fs_info->csum_root;
 	struct btrfs_extent_item *extent;
@@ -1797,9 +2238,13 @@
 	struct reada_control *reada2;
 	struct btrfs_key key_start;
 	struct btrfs_key key_end;
-
 	u64 increment = map->stripe_len;
 	u64 offset;
+	u64 extent_logical;
+	u64 extent_physical;
+	u64 extent_len;
+	struct btrfs_device *extent_dev;
+	int extent_mirror_num;
 
 	nstripes = length;
 	offset = 0;
@@ -1843,8 +2288,8 @@
 	 */
 	logical = base + offset;
 
-	wait_event(sdev->list_wait,
-		   atomic_read(&sdev->in_flight) == 0);
+	wait_event(sctx->list_wait,
+		   atomic_read(&sctx->bios_in_flight) == 0);
 	atomic_inc(&fs_info->scrubs_paused);
 	wake_up(&fs_info->scrub_pause_wait);
 
@@ -1898,7 +2343,7 @@
 		 * canceled?
 		 */
 		if (atomic_read(&fs_info->scrub_cancel_req) ||
-		    atomic_read(&sdev->cancel_req)) {
+		    atomic_read(&sctx->cancel_req)) {
 			ret = -ECANCELED;
 			goto out;
 		}
@@ -1907,9 +2352,14 @@
 		 */
 		if (atomic_read(&fs_info->scrub_pause_req)) {
 			/* push queued extents */
-			scrub_submit(sdev);
-			wait_event(sdev->list_wait,
-				   atomic_read(&sdev->in_flight) == 0);
+			atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
+			scrub_submit(sctx);
+			mutex_lock(&sctx->wr_ctx.wr_lock);
+			scrub_wr_submit(sctx);
+			mutex_unlock(&sctx->wr_ctx.wr_lock);
+			wait_event(sctx->list_wait,
+				   atomic_read(&sctx->bios_in_flight) == 0);
+			atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
 			atomic_inc(&fs_info->scrubs_paused);
 			wake_up(&fs_info->scrub_pause_wait);
 			mutex_lock(&fs_info->scrub_lock);
@@ -1926,7 +2376,7 @@
 
 		ret = btrfs_lookup_csums_range(csum_root, logical,
 					       logical + map->stripe_len - 1,
-					       &sdev->csum_list, 1);
+					       &sctx->csum_list, 1);
 		if (ret)
 			goto out;
 
@@ -2004,9 +2454,20 @@
 					     key.objectid;
 			}
 
-			ret = scrub_extent(sdev, key.objectid, key.offset,
-					   key.objectid - logical + physical,
-					   flags, generation, mirror_num);
+			extent_logical = key.objectid;
+			extent_physical = key.objectid - logical + physical;
+			extent_len = key.offset;
+			extent_dev = scrub_dev;
+			extent_mirror_num = mirror_num;
+			if (is_dev_replace)
+				scrub_remap_extent(fs_info, extent_logical,
+						   extent_len, &extent_physical,
+						   &extent_dev,
+						   &extent_mirror_num);
+			ret = scrub_extent(sctx, extent_logical, extent_len,
+					   extent_physical, extent_dev, flags,
+					   generation, extent_mirror_num,
+					   key.objectid - logical + physical);
 			if (ret)
 				goto out;
 
@@ -2016,29 +2477,34 @@
 		btrfs_release_path(path);
 		logical += increment;
 		physical += map->stripe_len;
-		spin_lock(&sdev->stat_lock);
-		sdev->stat.last_physical = physical;
-		spin_unlock(&sdev->stat_lock);
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.last_physical = physical;
+		spin_unlock(&sctx->stat_lock);
 	}
-	/* push queued extents */
-	scrub_submit(sdev);
-
 out:
+	/* push queued extents */
+	scrub_submit(sctx);
+	mutex_lock(&sctx->wr_ctx.wr_lock);
+	scrub_wr_submit(sctx);
+	mutex_unlock(&sctx->wr_ctx.wr_lock);
+
 	blk_finish_plug(&plug);
 	btrfs_free_path(path);
 	return ret < 0 ? ret : 0;
 }
 
-static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
-	u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
-	u64 dev_offset)
+static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
+					  struct btrfs_device *scrub_dev,
+					  u64 chunk_tree, u64 chunk_objectid,
+					  u64 chunk_offset, u64 length,
+					  u64 dev_offset, int is_dev_replace)
 {
 	struct btrfs_mapping_tree *map_tree =
-		&sdev->dev->dev_root->fs_info->mapping_tree;
+		&sctx->dev_root->fs_info->mapping_tree;
 	struct map_lookup *map;
 	struct extent_map *em;
 	int i;
-	int ret = -EINVAL;
+	int ret = 0;
 
 	read_lock(&map_tree->map_tree.lock);
 	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
@@ -2055,9 +2521,11 @@
 		goto out;
 
 	for (i = 0; i < map->num_stripes; ++i) {
-		if (map->stripes[i].dev == sdev->dev &&
+		if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
 		    map->stripes[i].physical == dev_offset) {
-			ret = scrub_stripe(sdev, map, i, chunk_offset, length);
+			ret = scrub_stripe(sctx, map, scrub_dev, i,
+					   chunk_offset, length,
+					   is_dev_replace);
 			if (ret)
 				goto out;
 		}
@@ -2069,11 +2537,13 @@
 }
 
 static noinline_for_stack
-int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
+int scrub_enumerate_chunks(struct scrub_ctx *sctx,
+			   struct btrfs_device *scrub_dev, u64 start, u64 end,
+			   int is_dev_replace)
 {
 	struct btrfs_dev_extent *dev_extent = NULL;
 	struct btrfs_path *path;
-	struct btrfs_root *root = sdev->dev->dev_root;
+	struct btrfs_root *root = sctx->dev_root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 length;
 	u64 chunk_tree;
@@ -2085,6 +2555,7 @@
 	struct btrfs_key key;
 	struct btrfs_key found_key;
 	struct btrfs_block_group_cache *cache;
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -2094,11 +2565,10 @@
 	path->search_commit_root = 1;
 	path->skip_locking = 1;
 
-	key.objectid = sdev->dev->devid;
+	key.objectid = scrub_dev->devid;
 	key.offset = 0ull;
 	key.type = BTRFS_DEV_EXTENT_KEY;
 
-
 	while (1) {
 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 		if (ret < 0)
@@ -2117,7 +2587,7 @@
 
 		btrfs_item_key_to_cpu(l, &found_key, slot);
 
-		if (found_key.objectid != sdev->dev->devid)
+		if (found_key.objectid != scrub_dev->devid)
 			break;
 
 		if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
@@ -2151,11 +2621,62 @@
 			ret = -ENOENT;
 			break;
 		}
-		ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
-				  chunk_offset, length, found_key.offset);
+		dev_replace->cursor_right = found_key.offset + length;
+		dev_replace->cursor_left = found_key.offset;
+		dev_replace->item_needs_writeback = 1;
+		ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
+				  chunk_offset, length, found_key.offset,
+				  is_dev_replace);
+
+		/*
+		 * flush, submit all pending read and write bios, afterwards
+		 * wait for them.
+		 * Note that in the dev replace case, a read request causes
+		 * write requests that are submitted in the read completion
+		 * worker. Therefore in the current situation, it is required
+		 * that all write requests are flushed, so that all read and
+		 * write requests are really completed when bios_in_flight
+		 * changes to 0.
+		 */
+		atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
+		scrub_submit(sctx);
+		mutex_lock(&sctx->wr_ctx.wr_lock);
+		scrub_wr_submit(sctx);
+		mutex_unlock(&sctx->wr_ctx.wr_lock);
+
+		wait_event(sctx->list_wait,
+			   atomic_read(&sctx->bios_in_flight) == 0);
+		atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
+		atomic_inc(&fs_info->scrubs_paused);
+		wake_up(&fs_info->scrub_pause_wait);
+		wait_event(sctx->list_wait,
+			   atomic_read(&sctx->workers_pending) == 0);
+
+		mutex_lock(&fs_info->scrub_lock);
+		while (atomic_read(&fs_info->scrub_pause_req)) {
+			mutex_unlock(&fs_info->scrub_lock);
+			wait_event(fs_info->scrub_pause_wait,
+			   atomic_read(&fs_info->scrub_pause_req) == 0);
+			mutex_lock(&fs_info->scrub_lock);
+		}
+		atomic_dec(&fs_info->scrubs_paused);
+		mutex_unlock(&fs_info->scrub_lock);
+		wake_up(&fs_info->scrub_pause_wait);
+
+		dev_replace->cursor_left = dev_replace->cursor_right;
+		dev_replace->item_needs_writeback = 1;
 		btrfs_put_block_group(cache);
 		if (ret)
 			break;
+		if (is_dev_replace &&
+		    atomic64_read(&dev_replace->num_write_errors) > 0) {
+			ret = -EIO;
+			break;
+		}
+		if (sctx->stat.malloc_errors > 0) {
+			ret = -ENOMEM;
+			break;
+		}
 
 		key.offset = found_key.offset + length;
 		btrfs_release_path(path);
@@ -2170,14 +2691,14 @@
 	return ret < 0 ? ret : 0;
 }
 
-static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
+static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
+					   struct btrfs_device *scrub_dev)
 {
 	int	i;
 	u64	bytenr;
 	u64	gen;
 	int	ret;
-	struct btrfs_device *device = sdev->dev;
-	struct btrfs_root *root = device->dev_root;
+	struct btrfs_root *root = sctx->dev_root;
 
 	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
 		return -EIO;
@@ -2186,15 +2707,16 @@
 
 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
 		bytenr = btrfs_sb_offset(i);
-		if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
+		if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
 			break;
 
-		ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
-				     BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
+		ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
+				  scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
+				  NULL, 1, bytenr);
 		if (ret)
 			return ret;
 	}
-	wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
+	wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
 
 	return 0;
 }
@@ -2202,19 +2724,38 @@
 /*
  * get a reference count on fs_info->scrub_workers. start worker if necessary
  */
-static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
+static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
+						int is_dev_replace)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	int ret = 0;
 
 	mutex_lock(&fs_info->scrub_lock);
 	if (fs_info->scrub_workers_refcnt == 0) {
-		btrfs_init_workers(&fs_info->scrub_workers, "scrub",
-			   fs_info->thread_pool_size, &fs_info->generic_worker);
+		if (is_dev_replace)
+			btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
+					&fs_info->generic_worker);
+		else
+			btrfs_init_workers(&fs_info->scrub_workers, "scrub",
+					fs_info->thread_pool_size,
+					&fs_info->generic_worker);
 		fs_info->scrub_workers.idle_thresh = 4;
 		ret = btrfs_start_workers(&fs_info->scrub_workers);
 		if (ret)
 			goto out;
+		btrfs_init_workers(&fs_info->scrub_wr_completion_workers,
+				   "scrubwrc",
+				   fs_info->thread_pool_size,
+				   &fs_info->generic_worker);
+		fs_info->scrub_wr_completion_workers.idle_thresh = 2;
+		ret = btrfs_start_workers(
+				&fs_info->scrub_wr_completion_workers);
+		if (ret)
+			goto out;
+		btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1,
+				   &fs_info->generic_worker);
+		ret = btrfs_start_workers(&fs_info->scrub_nocow_workers);
+		if (ret)
+			goto out;
 	}
 	++fs_info->scrub_workers_refcnt;
 out:
@@ -2223,40 +2764,41 @@
 	return ret;
 }
 
-static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
+static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
-
 	mutex_lock(&fs_info->scrub_lock);
-	if (--fs_info->scrub_workers_refcnt == 0)
+	if (--fs_info->scrub_workers_refcnt == 0) {
 		btrfs_stop_workers(&fs_info->scrub_workers);
+		btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
+		btrfs_stop_workers(&fs_info->scrub_nocow_workers);
+	}
 	WARN_ON(fs_info->scrub_workers_refcnt < 0);
 	mutex_unlock(&fs_info->scrub_lock);
 }
 
-
-int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
-		    struct btrfs_scrub_progress *progress, int readonly)
+int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+		    u64 end, struct btrfs_scrub_progress *progress,
+		    int readonly, int is_dev_replace)
 {
-	struct scrub_dev *sdev;
-	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct scrub_ctx *sctx;
 	int ret;
 	struct btrfs_device *dev;
 
-	if (btrfs_fs_closing(root->fs_info))
+	if (btrfs_fs_closing(fs_info))
 		return -EINVAL;
 
 	/*
 	 * check some assumptions
 	 */
-	if (root->nodesize != root->leafsize) {
+	if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
 		printk(KERN_ERR
 		       "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
-		       root->nodesize, root->leafsize);
+		       fs_info->chunk_root->nodesize,
+		       fs_info->chunk_root->leafsize);
 		return -EINVAL;
 	}
 
-	if (root->nodesize > BTRFS_STRIPE_LEN) {
+	if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
 		/*
 		 * in this case scrub is unable to calculate the checksum
 		 * the way scrub is implemented. Do not handle this
@@ -2264,80 +2806,105 @@
 		 */
 		printk(KERN_ERR
 		       "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
-		       root->nodesize, BTRFS_STRIPE_LEN);
+		       fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
 		return -EINVAL;
 	}
 
-	if (root->sectorsize != PAGE_SIZE) {
+	if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
 		/* not supported for data w/o checksums */
 		printk(KERN_ERR
 		       "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
-		       root->sectorsize, (unsigned long long)PAGE_SIZE);
+		       fs_info->chunk_root->sectorsize,
+		       (unsigned long long)PAGE_SIZE);
 		return -EINVAL;
 	}
 
-	ret = scrub_workers_get(root);
+	if (fs_info->chunk_root->nodesize >
+	    PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
+	    fs_info->chunk_root->sectorsize >
+	    PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
+		/*
+		 * would exhaust the array bounds of pagev member in
+		 * struct scrub_block
+		 */
+		pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
+		       fs_info->chunk_root->nodesize,
+		       SCRUB_MAX_PAGES_PER_BLOCK,
+		       fs_info->chunk_root->sectorsize,
+		       SCRUB_MAX_PAGES_PER_BLOCK);
+		return -EINVAL;
+	}
+
+	ret = scrub_workers_get(fs_info, is_dev_replace);
 	if (ret)
 		return ret;
 
-	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
-	dev = btrfs_find_device(root, devid, NULL, NULL);
-	if (!dev || dev->missing) {
-		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-		scrub_workers_put(root);
+	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	dev = btrfs_find_device(fs_info, devid, NULL, NULL);
+	if (!dev || (dev->missing && !is_dev_replace)) {
+		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+		scrub_workers_put(fs_info);
 		return -ENODEV;
 	}
 	mutex_lock(&fs_info->scrub_lock);
 
-	if (!dev->in_fs_metadata) {
+	if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
 		mutex_unlock(&fs_info->scrub_lock);
-		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-		scrub_workers_put(root);
-		return -ENODEV;
+		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+		scrub_workers_put(fs_info);
+		return -EIO;
 	}
 
-	if (dev->scrub_device) {
+	btrfs_dev_replace_lock(&fs_info->dev_replace);
+	if (dev->scrub_device ||
+	    (!is_dev_replace &&
+	     btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
+		btrfs_dev_replace_unlock(&fs_info->dev_replace);
 		mutex_unlock(&fs_info->scrub_lock);
-		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-		scrub_workers_put(root);
+		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+		scrub_workers_put(fs_info);
 		return -EINPROGRESS;
 	}
-	sdev = scrub_setup_dev(dev);
-	if (IS_ERR(sdev)) {
+	btrfs_dev_replace_unlock(&fs_info->dev_replace);
+	sctx = scrub_setup_ctx(dev, is_dev_replace);
+	if (IS_ERR(sctx)) {
 		mutex_unlock(&fs_info->scrub_lock);
-		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-		scrub_workers_put(root);
-		return PTR_ERR(sdev);
+		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+		scrub_workers_put(fs_info);
+		return PTR_ERR(sctx);
 	}
-	sdev->readonly = readonly;
-	dev->scrub_device = sdev;
+	sctx->readonly = readonly;
+	dev->scrub_device = sctx;
 
 	atomic_inc(&fs_info->scrubs_running);
 	mutex_unlock(&fs_info->scrub_lock);
-	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
-	down_read(&fs_info->scrub_super_lock);
-	ret = scrub_supers(sdev);
-	up_read(&fs_info->scrub_super_lock);
+	if (!is_dev_replace) {
+		down_read(&fs_info->scrub_super_lock);
+		ret = scrub_supers(sctx, dev);
+		up_read(&fs_info->scrub_super_lock);
+	}
 
 	if (!ret)
-		ret = scrub_enumerate_chunks(sdev, start, end);
+		ret = scrub_enumerate_chunks(sctx, dev, start, end,
+					     is_dev_replace);
 
-	wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
+	wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
 	atomic_dec(&fs_info->scrubs_running);
 	wake_up(&fs_info->scrub_pause_wait);
 
-	wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0);
+	wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
 
 	if (progress)
-		memcpy(progress, &sdev->stat, sizeof(*progress));
+		memcpy(progress, &sctx->stat, sizeof(*progress));
 
 	mutex_lock(&fs_info->scrub_lock);
 	dev->scrub_device = NULL;
 	mutex_unlock(&fs_info->scrub_lock);
 
-	scrub_free_dev(sdev);
-	scrub_workers_put(root);
+	scrub_free_ctx(sctx);
+	scrub_workers_put(fs_info);
 
 	return ret;
 }
@@ -2377,9 +2944,8 @@
 	up_write(&root->fs_info->scrub_super_lock);
 }
 
-int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
+int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
 {
-
 	mutex_lock(&fs_info->scrub_lock);
 	if (!atomic_read(&fs_info->scrubs_running)) {
 		mutex_unlock(&fs_info->scrub_lock);
@@ -2399,23 +2965,18 @@
 	return 0;
 }
 
-int btrfs_scrub_cancel(struct btrfs_root *root)
+int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
+			   struct btrfs_device *dev)
 {
-	return __btrfs_scrub_cancel(root->fs_info);
-}
-
-int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
-{
-	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct scrub_dev *sdev;
+	struct scrub_ctx *sctx;
 
 	mutex_lock(&fs_info->scrub_lock);
-	sdev = dev->scrub_device;
-	if (!sdev) {
+	sctx = dev->scrub_device;
+	if (!sctx) {
 		mutex_unlock(&fs_info->scrub_lock);
 		return -ENOTCONN;
 	}
-	atomic_inc(&sdev->cancel_req);
+	atomic_inc(&sctx->cancel_req);
 	while (dev->scrub_device) {
 		mutex_unlock(&fs_info->scrub_lock);
 		wait_event(fs_info->scrub_pause_wait,
@@ -2438,12 +2999,12 @@
 	 * does not go away in cancel_dev. FIXME: find a better solution
 	 */
 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
-	dev = btrfs_find_device(root, devid, NULL, NULL);
+	dev = btrfs_find_device(fs_info, devid, NULL, NULL);
 	if (!dev) {
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 		return -ENODEV;
 	}
-	ret = btrfs_scrub_cancel_dev(root, dev);
+	ret = btrfs_scrub_cancel_dev(fs_info, dev);
 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
 	return ret;
@@ -2453,15 +3014,284 @@
 			 struct btrfs_scrub_progress *progress)
 {
 	struct btrfs_device *dev;
-	struct scrub_dev *sdev = NULL;
+	struct scrub_ctx *sctx = NULL;
 
 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
-	dev = btrfs_find_device(root, devid, NULL, NULL);
+	dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
 	if (dev)
-		sdev = dev->scrub_device;
-	if (sdev)
-		memcpy(progress, &sdev->stat, sizeof(*progress));
+		sctx = dev->scrub_device;
+	if (sctx)
+		memcpy(progress, &sctx->stat, sizeof(*progress));
 	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
-	return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
+	return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
+}
+
+static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
+			       u64 extent_logical, u64 extent_len,
+			       u64 *extent_physical,
+			       struct btrfs_device **extent_dev,
+			       int *extent_mirror_num)
+{
+	u64 mapped_length;
+	struct btrfs_bio *bbio = NULL;
+	int ret;
+
+	mapped_length = extent_len;
+	ret = btrfs_map_block(fs_info, READ, extent_logical,
+			      &mapped_length, &bbio, 0);
+	if (ret || !bbio || mapped_length < extent_len ||
+	    !bbio->stripes[0].dev->bdev) {
+		kfree(bbio);
+		return;
+	}
+
+	*extent_physical = bbio->stripes[0].physical;
+	*extent_mirror_num = bbio->mirror_num;
+	*extent_dev = bbio->stripes[0].dev;
+	kfree(bbio);
+}
+
+static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
+			      struct scrub_wr_ctx *wr_ctx,
+			      struct btrfs_fs_info *fs_info,
+			      struct btrfs_device *dev,
+			      int is_dev_replace)
+{
+	WARN_ON(wr_ctx->wr_curr_bio != NULL);
+
+	mutex_init(&wr_ctx->wr_lock);
+	wr_ctx->wr_curr_bio = NULL;
+	if (!is_dev_replace)
+		return 0;
+
+	WARN_ON(!dev->bdev);
+	wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
+					 bio_get_nr_vecs(dev->bdev));
+	wr_ctx->tgtdev = dev;
+	atomic_set(&wr_ctx->flush_all_writes, 0);
+	return 0;
+}
+
+static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
+{
+	mutex_lock(&wr_ctx->wr_lock);
+	kfree(wr_ctx->wr_curr_bio);
+	wr_ctx->wr_curr_bio = NULL;
+	mutex_unlock(&wr_ctx->wr_lock);
+}
+
+static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
+			    int mirror_num, u64 physical_for_dev_replace)
+{
+	struct scrub_copy_nocow_ctx *nocow_ctx;
+	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
+
+	nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
+	if (!nocow_ctx) {
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.malloc_errors++;
+		spin_unlock(&sctx->stat_lock);
+		return -ENOMEM;
+	}
+
+	scrub_pending_trans_workers_inc(sctx);
+
+	nocow_ctx->sctx = sctx;
+	nocow_ctx->logical = logical;
+	nocow_ctx->len = len;
+	nocow_ctx->mirror_num = mirror_num;
+	nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
+	nocow_ctx->work.func = copy_nocow_pages_worker;
+	btrfs_queue_worker(&fs_info->scrub_nocow_workers,
+			   &nocow_ctx->work);
+
+	return 0;
+}
+
+static void copy_nocow_pages_worker(struct btrfs_work *work)
+{
+	struct scrub_copy_nocow_ctx *nocow_ctx =
+		container_of(work, struct scrub_copy_nocow_ctx, work);
+	struct scrub_ctx *sctx = nocow_ctx->sctx;
+	u64 logical = nocow_ctx->logical;
+	u64 len = nocow_ctx->len;
+	int mirror_num = nocow_ctx->mirror_num;
+	u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
+	int ret;
+	struct btrfs_trans_handle *trans = NULL;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_path *path;
+	struct btrfs_root *root;
+	int not_written = 0;
+
+	fs_info = sctx->dev_root->fs_info;
+	root = fs_info->extent_root;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.malloc_errors++;
+		spin_unlock(&sctx->stat_lock);
+		not_written = 1;
+		goto out;
+	}
+
+	trans = btrfs_join_transaction(root);
+	if (IS_ERR(trans)) {
+		not_written = 1;
+		goto out;
+	}
+
+	ret = iterate_inodes_from_logical(logical, fs_info, path,
+					  copy_nocow_pages_for_inode,
+					  nocow_ctx);
+	if (ret != 0 && ret != -ENOENT) {
+		pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n",
+			(unsigned long long)logical,
+			(unsigned long long)physical_for_dev_replace,
+			(unsigned long long)len,
+			(unsigned long long)mirror_num, ret);
+		not_written = 1;
+		goto out;
+	}
+
+out:
+	if (trans && !IS_ERR(trans))
+		btrfs_end_transaction(trans, root);
+	if (not_written)
+		btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
+					    num_uncorrectable_read_errors);
+
+	btrfs_free_path(path);
+	kfree(nocow_ctx);
+
+	scrub_pending_trans_workers_dec(sctx);
+}
+
+static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
+{
+	unsigned long index;
+	struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
+	int ret = 0;
+	struct btrfs_key key;
+	struct inode *inode = NULL;
+	struct btrfs_root *local_root;
+	u64 physical_for_dev_replace;
+	u64 len;
+	struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
+
+	key.objectid = root;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(local_root))
+		return PTR_ERR(local_root);
+
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.objectid = inum;
+	key.offset = 0;
+	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
+	len = nocow_ctx->len;
+	while (len >= PAGE_CACHE_SIZE) {
+		struct page *page = NULL;
+		int ret_sub;
+
+		index = offset >> PAGE_CACHE_SHIFT;
+
+		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+		if (!page) {
+			pr_err("find_or_create_page() failed\n");
+			ret = -ENOMEM;
+			goto next_page;
+		}
+
+		if (PageUptodate(page)) {
+			if (PageDirty(page))
+				goto next_page;
+		} else {
+			ClearPageError(page);
+			ret_sub = extent_read_full_page(&BTRFS_I(inode)->
+							 io_tree,
+							page, btrfs_get_extent,
+							nocow_ctx->mirror_num);
+			if (ret_sub) {
+				ret = ret_sub;
+				goto next_page;
+			}
+			wait_on_page_locked(page);
+			if (!PageUptodate(page)) {
+				ret = -EIO;
+				goto next_page;
+			}
+		}
+		ret_sub = write_page_nocow(nocow_ctx->sctx,
+					   physical_for_dev_replace, page);
+		if (ret_sub) {
+			ret = ret_sub;
+			goto next_page;
+		}
+
+next_page:
+		if (page) {
+			unlock_page(page);
+			put_page(page);
+		}
+		offset += PAGE_CACHE_SIZE;
+		physical_for_dev_replace += PAGE_CACHE_SIZE;
+		len -= PAGE_CACHE_SIZE;
+	}
+
+	if (inode)
+		iput(inode);
+	return ret;
+}
+
+static int write_page_nocow(struct scrub_ctx *sctx,
+			    u64 physical_for_dev_replace, struct page *page)
+{
+	struct bio *bio;
+	struct btrfs_device *dev;
+	int ret;
+	DECLARE_COMPLETION_ONSTACK(compl);
+
+	dev = sctx->wr_ctx.tgtdev;
+	if (!dev)
+		return -EIO;
+	if (!dev->bdev) {
+		printk_ratelimited(KERN_WARNING
+			"btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
+		return -EIO;
+	}
+	bio = bio_alloc(GFP_NOFS, 1);
+	if (!bio) {
+		spin_lock(&sctx->stat_lock);
+		sctx->stat.malloc_errors++;
+		spin_unlock(&sctx->stat_lock);
+		return -ENOMEM;
+	}
+	bio->bi_private = &compl;
+	bio->bi_end_io = scrub_complete_bio_end_io;
+	bio->bi_size = 0;
+	bio->bi_sector = physical_for_dev_replace >> 9;
+	bio->bi_bdev = dev->bdev;
+	ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+	if (ret != PAGE_CACHE_SIZE) {
+leave_with_eio:
+		bio_put(bio);
+		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
+		return -EIO;
+	}
+	btrfsic_submit_bio(WRITE_SYNC, bio);
+	wait_for_completion(&compl);
+
+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		goto leave_with_eio;
+
+	bio_put(bio);
+	return 0;
 }

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e78b297..5445454 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c

@@ -4397,9 +4397,9 @@
 	if (!path)
 		return -ENOMEM;
 
-	spin_lock(&send_root->root_times_lock);
+	spin_lock(&send_root->root_item_lock);
 	start_ctransid = btrfs_root_ctransid(&send_root->root_item);
-	spin_unlock(&send_root->root_times_lock);
+	spin_unlock(&send_root->root_item_lock);
 
 	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
 	key.type = BTRFS_INODE_ITEM_KEY;
@@ -4422,9 +4422,9 @@
 	 * Make sure the tree has not changed after re-joining. We detect this
 	 * by comparing start_ctransid and ctransid. They should always match.
 	 */
-	spin_lock(&send_root->root_times_lock);
+	spin_lock(&send_root->root_item_lock);
 	ctransid = btrfs_root_ctransid(&send_root->root_item);
-	spin_unlock(&send_root->root_times_lock);
+	spin_unlock(&send_root->root_item_lock);
 
 	if (ctransid != start_ctransid) {
 		WARN(1, KERN_WARNING "btrfs: the root that you're trying to "

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 915ac14..99545df 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c

@@ -55,6 +55,7 @@
 #include "export.h"
 #include "compression.h"
 #include "rcu-string.h"
+#include "dev-replace.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/btrfs.h>
@@ -116,7 +117,16 @@
 	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
 		sb->s_flags |= MS_RDONLY;
 		printk(KERN_INFO "btrfs is forced readonly\n");
-		__btrfs_scrub_cancel(fs_info);
+		/*
+		 * Note that a running device replace operation is not
+		 * canceled here although there is no way to update
+		 * the progress. It would add the risk of a deadlock,
+		 * therefore the canceling is ommited. The only penalty
+		 * is that some I/O remains active until the procedure
+		 * completes. The next time when the filesystem is
+		 * mounted writeable again, the device replace
+		 * operation continues.
+		 */
 //		WARN_ON(1);
 	}
 }
@@ -1186,7 +1196,8 @@
 	btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size);
 	btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size);
 	btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size);
-	btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size);
+	btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers,
+			      new_pool_size);
 }
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -1215,8 +1226,15 @@
 		return 0;
 
 	if (*flags & MS_RDONLY) {
+		/*
+		 * this also happens on 'umount -rf' or on shutdown, when
+		 * the filesystem is busy.
+		 */
 		sb->s_flags |= MS_RDONLY;
 
+		btrfs_dev_replace_suspend_for_unmount(fs_info);
+		btrfs_scrub_cancel(fs_info);
+
 		ret = btrfs_commit_super(root);
 		if (ret)
 			goto restore;
@@ -1226,6 +1244,15 @@
 			goto restore;
 		}
 
+		if (fs_info->fs_devices->missing_devices >
+		     fs_info->num_tolerated_disk_barrier_failures &&
+		    !(*flags & MS_RDONLY)) {
+			printk(KERN_WARNING
+			       "Btrfs: too many missing devices, writeable remount is not allowed\n");
+			ret = -EACCES;
+			goto restore;
+		}
+
 		if (btrfs_super_log_root(fs_info->super_copy) != 0) {
 			ret = -EINVAL;
 			goto restore;
@@ -1244,6 +1271,11 @@
 		if (ret)
 			goto restore;
 
+		ret = btrfs_resume_dev_replace_async(fs_info);
+		if (ret) {
+			pr_warn("btrfs: failed to resume dev_replace\n");
+			goto restore;
+		}
 		sb->s_flags &= ~MS_RDONLY;
 	}
 
@@ -1336,7 +1368,8 @@
 		min_stripe_size = BTRFS_STRIPE_LEN;
 
 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
-		if (!device->in_fs_metadata || !device->bdev)
+		if (!device->in_fs_metadata || !device->bdev ||
+		    device->is_tgtdev_for_dev_replace)
 			continue;
 
 		avail_space = device->total_bytes - device->bytes_used;
@@ -1647,10 +1680,14 @@
 	if (err)
 		goto free_ordered_data;
 
-	err = btrfs_interface_init();
+	err = btrfs_auto_defrag_init();
 	if (err)
 		goto free_delayed_inode;
 
+	err = btrfs_interface_init();
+	if (err)
+		goto free_auto_defrag;
+
 	err = register_filesystem(&btrfs_fs_type);
 	if (err)
 		goto unregister_ioctl;
@@ -1662,6 +1699,8 @@
 
 unregister_ioctl:
 	btrfs_interface_exit();
+free_auto_defrag:
+	btrfs_auto_defrag_exit();
 free_delayed_inode:
 	btrfs_delayed_inode_exit();
 free_ordered_data:
@@ -1681,6 +1720,7 @@
 static void __exit exit_btrfs_fs(void)
 {
 	btrfs_destroy_cachep();
+	btrfs_auto_defrag_exit();
 	btrfs_delayed_inode_exit();
 	ordered_data_exit();
 	extent_map_exit();

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 04bbfb1..87fac9a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c

@@ -30,6 +30,7 @@
 #include "tree-log.h"
 #include "inode-map.h"
 #include "volumes.h"
+#include "dev-replace.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
@@ -145,16 +146,12 @@
 	 * the log must never go across transaction boundaries.
 	 */
 	smp_mb();
-	if (!list_empty(&fs_info->tree_mod_seq_list)) {
-		printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when "
+	if (!list_empty(&fs_info->tree_mod_seq_list))
+		WARN(1, KERN_ERR "btrfs: tree_mod_seq_list not empty when "
 			"creating a fresh transaction\n");
-		WARN_ON(1);
-	}
-	if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) {
-		printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
+	if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
+		WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
 			"creating a fresh transaction\n");
-		WARN_ON(1);
-	}
 	atomic_set(&fs_info->tree_mod_seq, 0);
 
 	spin_lock_init(&cur_trans->commit_lock);
@@ -295,9 +292,9 @@
 	return 0;
 }
 
-static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
-						    u64 num_items, int type,
-						    int noflush)
+static struct btrfs_trans_handle *
+start_transaction(struct btrfs_root *root, u64 num_items, int type,
+		  enum btrfs_reserve_flush_enum flush)
 {
 	struct btrfs_trans_handle *h;
 	struct btrfs_transaction *cur_trans;
@@ -312,6 +309,7 @@
 		WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
 		h = current->journal_info;
 		h->use_count++;
+		WARN_ON(h->use_count > 2);
 		h->orig_rsv = h->block_rsv;
 		h->block_rsv = NULL;
 		goto got_it;
@@ -331,14 +329,9 @@
 		}
 
 		num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
-		if (noflush)
-			ret = btrfs_block_rsv_add_noflush(root,
-						&root->fs_info->trans_block_rsv,
-						num_bytes);
-		else
-			ret = btrfs_block_rsv_add(root,
-						&root->fs_info->trans_block_rsv,
-						num_bytes);
+		ret = btrfs_block_rsv_add(root,
+					  &root->fs_info->trans_block_rsv,
+					  num_bytes, flush);
 		if (ret)
 			return ERR_PTR(ret);
 	}
@@ -422,13 +415,15 @@
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 						   int num_items)
 {
-	return start_transaction(root, num_items, TRANS_START, 0);
+	return start_transaction(root, num_items, TRANS_START,
+				 BTRFS_RESERVE_FLUSH_ALL);
 }
 
-struct btrfs_trans_handle *btrfs_start_transaction_noflush(
+struct btrfs_trans_handle *btrfs_start_transaction_lflush(
 					struct btrfs_root *root, int num_items)
 {
-	return start_transaction(root, num_items, TRANS_START, 1);
+	return start_transaction(root, num_items, TRANS_START,
+				 BTRFS_RESERVE_FLUSH_LIMIT);
 }
 
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
@@ -461,28 +456,31 @@
 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 {
 	struct btrfs_transaction *cur_trans = NULL, *t;
-	int ret;
+	int ret = 0;
 
-	ret = 0;
 	if (transid) {
 		if (transid <= root->fs_info->last_trans_committed)
 			goto out;
 
+		ret = -EINVAL;
 		/* find specified transaction */
 		spin_lock(&root->fs_info->trans_lock);
 		list_for_each_entry(t, &root->fs_info->trans_list, list) {
 			if (t->transid == transid) {
 				cur_trans = t;
 				atomic_inc(&cur_trans->use_count);
+				ret = 0;
 				break;
 			}
-			if (t->transid > transid)
+			if (t->transid > transid) {
+				ret = 0;
 				break;
+			}
 		}
 		spin_unlock(&root->fs_info->trans_lock);
-		ret = -EINVAL;
+		/* The specified transaction doesn't exist */
 		if (!cur_trans)
-			goto out;  /* bad transid */
+			goto out;
 	} else {
 		/* find newest transaction that is committing | committed */
 		spin_lock(&root->fs_info->trans_lock);
@@ -502,9 +500,7 @@
 	}
 
 	wait_for_commit(root, cur_trans);
-
 	put_transaction(cur_trans);
-	ret = 0;
 out:
 	return ret;
 }
@@ -851,7 +847,9 @@
 		return ret;
 
 	ret = btrfs_run_dev_stats(trans, root->fs_info);
-	BUG_ON(ret);
+	WARN_ON(ret);
+	ret = btrfs_run_dev_replace(trans, root->fs_info);
+	WARN_ON(ret);
 
 	ret = btrfs_run_qgroups(trans, root->fs_info);
 	BUG_ON(ret);
@@ -874,6 +872,8 @@
 	switch_commit_root(fs_info->extent_root);
 	up_write(&fs_info->extent_commit_sem);
 
+	btrfs_after_dev_replace_commit(fs_info);
+
 	return 0;
 }
 
@@ -958,7 +958,6 @@
 	struct btrfs_fs_info *info = root->fs_info;
 	struct btrfs_trans_handle *trans;
 	int ret;
-	unsigned long nr;
 
 	if (xchg(&root->defrag_running, 1))
 		return 0;
@@ -970,9 +969,8 @@
 
 		ret = btrfs_defrag_leaves(trans, root, cacheonly);
 
-		nr = trans->blocks_used;
 		btrfs_end_transaction(trans, root);
-		btrfs_btree_balance_dirty(info->tree_root, nr);
+		btrfs_btree_balance_dirty(info->tree_root);
 		cond_resched();
 
 		if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
@@ -1032,8 +1030,9 @@
 	btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
 
 	if (to_reserve > 0) {
-		ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv,
-						  to_reserve);
+		ret = btrfs_block_rsv_add(root, &pending->block_rsv,
+					  to_reserve,
+					  BTRFS_RESERVE_NO_FLUSH);
 		if (ret) {
 			pending->error = ret;
 			goto no_free_objectid;
@@ -1191,7 +1190,7 @@
 				    parent_inode, &key,
 				    BTRFS_FT_DIR, index);
 	/* We have check then name at the beginning, so it is impossible. */
-	BUG_ON(ret == -EEXIST);
+	BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
 	if (ret) {
 		btrfs_abort_transaction(trans, root, ret);
 		goto fail;
@@ -1309,9 +1308,10 @@
 	 * We've got freeze protection passed with the transaction.
 	 * Tell lockdep about it.
 	 */
-	rwsem_acquire_read(
-		&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
-		0, 1, _THIS_IP_);
+	if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+		rwsem_acquire_read(
+		     &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
+		     0, 1, _THIS_IP_);
 
 	current->journal_info = ac->newtrans;
 
@@ -1349,8 +1349,10 @@
 	 * Tell lockdep we've released the freeze rwsem, since the
 	 * async commit thread will be the one to unlock it.
 	 */
-	rwsem_release(&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
-		      1, _THIS_IP_);
+	if (trans->type < TRANS_JOIN_NOLOCK)
+		rwsem_release(
+			&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
+			1, _THIS_IP_);
 
 	schedule_delayed_work(&ac->work, 0);
 
@@ -1400,6 +1402,48 @@
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 }
 
+static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
+					  struct btrfs_root *root)
+{
+	int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
+	int snap_pending = 0;
+	int ret;
+
+	if (!flush_on_commit) {
+		spin_lock(&root->fs_info->trans_lock);
+		if (!list_empty(&trans->transaction->pending_snapshots))
+			snap_pending = 1;
+		spin_unlock(&root->fs_info->trans_lock);
+	}
+
+	if (flush_on_commit || snap_pending) {
+		btrfs_start_delalloc_inodes(root, 1);
+		btrfs_wait_ordered_extents(root, 1);
+	}
+
+	ret = btrfs_run_delayed_items(trans, root);
+	if (ret)
+		return ret;
+
+	/*
+	 * running the delayed items may have added new refs. account
+	 * them now so that they hinder processing of more delayed refs
+	 * as little as possible.
+	 */
+	btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+
+	/*
+	 * rename don't use btrfs_join_transaction, so, once we
+	 * set the transaction to blocked above, we aren't going
+	 * to get any new ordered operations.  We can safely run
+	 * it here and no for sure that nothing new will be added
+	 * to the list
+	 */
+	btrfs_run_ordered_operations(root, 1);
+
+	return 0;
+}
+
 /*
  * btrfs_transaction state sequence:
  *    in_commit = 0, blocked = 0  (initial)
@@ -1414,15 +1458,20 @@
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	struct btrfs_transaction *prev_trans = NULL;
 	DEFINE_WAIT(wait);
-	int ret = -EIO;
+	int ret;
 	int should_grow = 0;
 	unsigned long now = get_seconds();
-	int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
 
-	btrfs_run_ordered_operations(root, 0);
-
-	if (cur_trans->aborted)
+	ret = btrfs_run_ordered_operations(root, 0);
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
 		goto cleanup_transaction;
+	}
+
+	if (cur_trans->aborted) {
+		ret = cur_trans->aborted;
+		goto cleanup_transaction;
+	}
 
 	/* make a pass through all the delayed refs we have so far
 	 * any runnings procs may add more while we are here
@@ -1490,39 +1539,14 @@
 		should_grow = 1;
 
 	do {
-		int snap_pending = 0;
-
 		joined = cur_trans->num_joined;
-		if (!list_empty(&trans->transaction->pending_snapshots))
-			snap_pending = 1;
 
 		WARN_ON(cur_trans != trans->transaction);
 
-		if (flush_on_commit || snap_pending) {
-			btrfs_start_delalloc_inodes(root, 1);
-			btrfs_wait_ordered_extents(root, 1);
-		}
-
-		ret = btrfs_run_delayed_items(trans, root);
+		ret = btrfs_flush_all_pending_stuffs(trans, root);
 		if (ret)
 			goto cleanup_transaction;
 
-		/*
-		 * running the delayed items may have added new refs. account
-		 * them now so that they hinder processing of more delayed refs
-		 * as little as possible.
-		 */
-		btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
-
-		/*
-		 * rename don't use btrfs_join_transaction, so, once we
-		 * set the transaction to blocked above, we aren't going
-		 * to get any new ordered operations.  We can safely run
-		 * it here and no for sure that nothing new will be added
-		 * to the list
-		 */
-		btrfs_run_ordered_operations(root, 1);
-
 		prepare_to_wait(&cur_trans->writer_wait, &wait,
 				TASK_UNINTERRUPTIBLE);
 
@@ -1535,6 +1559,10 @@
 	} while (atomic_read(&cur_trans->num_writers) > 1 ||
 		 (should_grow && cur_trans->num_joined != joined));
 
+	ret = btrfs_flush_all_pending_stuffs(trans, root);
+	if (ret)
+		goto cleanup_transaction;
+
 	/*
 	 * Ok now we need to make sure to block out any other joins while we
 	 * commit the transaction.  We could have started a join before setting

diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 8096194..0e8aa1e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h

@@ -105,7 +105,7 @@
 			  struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 						   int num_items);
-struct btrfs_trans_handle *btrfs_start_transaction_noflush(
+struct btrfs_trans_handle *btrfs_start_transaction_lflush(
 					struct btrfs_root *root, int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 81e407d..83186c7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c

@@ -2952,33 +2952,9 @@
 			    struct btrfs_inode_item *item,
 			    struct inode *inode, int log_inode_only)
 {
-	btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
-	btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
-	btrfs_set_inode_mode(leaf, item, inode->i_mode);
-	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+	struct btrfs_map_token token;
 
-	btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
-			       inode->i_atime.tv_sec);
-	btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
-				inode->i_atime.tv_nsec);
-
-	btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
-			       inode->i_mtime.tv_sec);
-	btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
-				inode->i_mtime.tv_nsec);
-
-	btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
-			       inode->i_ctime.tv_sec);
-	btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
-				inode->i_ctime.tv_nsec);
-
-	btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
-
-	btrfs_set_inode_sequence(leaf, item, inode->i_version);
-	btrfs_set_inode_transid(leaf, item, trans->transid);
-	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
-	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
-	btrfs_set_inode_block_group(leaf, item, 0);
+	btrfs_init_map_token(&token);
 
 	if (log_inode_only) {
 		/* set the generation to zero so the recover code
@@ -2986,14 +2962,63 @@
 		 * just to say 'this inode exists' and a logging
 		 * to say 'update this inode with these values'
 		 */
-		btrfs_set_inode_generation(leaf, item, 0);
-		btrfs_set_inode_size(leaf, item, 0);
+		btrfs_set_token_inode_generation(leaf, item, 0, &token);
+		btrfs_set_token_inode_size(leaf, item, 0, &token);
 	} else {
-		btrfs_set_inode_generation(leaf, item,
-					   BTRFS_I(inode)->generation);
-		btrfs_set_inode_size(leaf, item, inode->i_size);
+		btrfs_set_token_inode_generation(leaf, item,
+						 BTRFS_I(inode)->generation,
+						 &token);
+		btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
 	}
 
+	btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
+	btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
+	btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
+	btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
+
+	btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+				     inode->i_atime.tv_sec, &token);
+	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+				      inode->i_atime.tv_nsec, &token);
+
+	btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+				     inode->i_mtime.tv_sec, &token);
+	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+				      inode->i_mtime.tv_nsec, &token);
+
+	btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+				     inode->i_ctime.tv_sec, &token);
+	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+				      inode->i_ctime.tv_nsec, &token);
+
+	btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
+				     &token);
+
+	btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
+	btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
+	btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
+	btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
+	btrfs_set_token_inode_block_group(leaf, item, 0, &token);
+}
+
+static int log_inode_item(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *log, struct btrfs_path *path,
+			  struct inode *inode)
+{
+	struct btrfs_inode_item *inode_item;
+	struct btrfs_key key;
+	int ret;
+
+	memcpy(&key, &BTRFS_I(inode)->location, sizeof(key));
+	ret = btrfs_insert_empty_item(trans, log, path, &key,
+				      sizeof(*inode_item));
+	if (ret && ret != -EEXIST)
+		return ret;
+	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				    struct btrfs_inode_item);
+	fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
+	btrfs_release_path(path);
+	return 0;
 }
 
 static noinline int copy_items(struct btrfs_trans_handle *trans,
@@ -3130,151 +3155,234 @@
 	return 0;
 }
 
-struct log_args {
-	struct extent_buffer *src;
-	u64 next_offset;
-	int start_slot;
-	int nr;
-};
+static int drop_adjacent_extents(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root, struct inode *inode,
+				 struct extent_map *em,
+				 struct btrfs_path *path)
+{
+	struct btrfs_file_extent_item *fi;
+	struct extent_buffer *leaf;
+	struct btrfs_key key, new_key;
+	struct btrfs_map_token token;
+	u64 extent_end;
+	u64 extent_offset = 0;
+	int extent_type;
+	int del_slot = 0;
+	int del_nr = 0;
+	int ret = 0;
+
+	while (1) {
+		btrfs_init_map_token(&token);
+		leaf = path->nodes[0];
+		path->slots[0]++;
+		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+			if (del_nr) {
+				ret = btrfs_del_items(trans, root, path,
+						      del_slot, del_nr);
+				if (ret)
+					return ret;
+				del_nr = 0;
+			}
+
+			ret = btrfs_next_leaf_write(trans, root, path, 1);
+			if (ret < 0)
+				return ret;
+			if (ret > 0)
+				return 0;
+			leaf = path->nodes[0];
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.objectid != btrfs_ino(inode) ||
+		    key.type != BTRFS_EXTENT_DATA_KEY ||
+		    key.offset >= em->start + em->len)
+			break;
+
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+		extent_type = btrfs_token_file_extent_type(leaf, fi, &token);
+		if (extent_type == BTRFS_FILE_EXTENT_REG ||
+		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+			extent_offset = btrfs_token_file_extent_offset(leaf,
+								fi, &token);
+			extent_end = key.offset +
+				btrfs_token_file_extent_num_bytes(leaf, fi,
+								  &token);
+		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+			extent_end = key.offset +
+				btrfs_file_extent_inline_len(leaf, fi);
+		} else {
+			BUG();
+		}
+
+		if (extent_end <= em->len + em->start) {
+			if (!del_nr) {
+				del_slot = path->slots[0];
+			}
+			del_nr++;
+			continue;
+		}
+
+		/*
+		 * Ok so we'll ignore previous items if we log a new extent,
+		 * which can lead to overlapping extents, so if we have an
+		 * existing extent we want to adjust we _have_ to check the next
+		 * guy to make sure we even need this extent anymore, this keeps
+		 * us from panicing in set_item_key_safe.
+		 */
+		if (path->slots[0] < btrfs_header_nritems(leaf) - 1) {
+			struct btrfs_key tmp_key;
+
+			btrfs_item_key_to_cpu(leaf, &tmp_key,
+					      path->slots[0] + 1);
+			if (tmp_key.objectid == btrfs_ino(inode) &&
+			    tmp_key.type == BTRFS_EXTENT_DATA_KEY &&
+			    tmp_key.offset <= em->start + em->len) {
+				if (!del_nr)
+					del_slot = path->slots[0];
+				del_nr++;
+				continue;
+			}
+		}
+
+		BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
+		memcpy(&new_key, &key, sizeof(new_key));
+		new_key.offset = em->start + em->len;
+		btrfs_set_item_key_safe(trans, root, path, &new_key);
+		extent_offset += em->start + em->len - key.offset;
+		btrfs_set_token_file_extent_offset(leaf, fi, extent_offset,
+						   &token);
+		btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end -
+						      (em->start + em->len),
+						      &token);
+		btrfs_mark_buffer_dirty(leaf);
+	}
+
+	if (del_nr)
+		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+
+	return ret;
+}
 
 static int log_one_extent(struct btrfs_trans_handle *trans,
 			  struct inode *inode, struct btrfs_root *root,
-			  struct extent_map *em, struct btrfs_path *path,
-			  struct btrfs_path *dst_path, struct log_args *args)
+			  struct extent_map *em, struct btrfs_path *path)
 {
 	struct btrfs_root *log = root->log_root;
 	struct btrfs_file_extent_item *fi;
+	struct extent_buffer *leaf;
+	struct list_head ordered_sums;
+	struct btrfs_map_token token;
 	struct btrfs_key key;
-	u64 start = em->mod_start;
-	u64 search_start = start;
-	u64 len = em->mod_len;
-	u64 num_bytes;
-	int nritems;
+	u64 csum_offset = em->mod_start - em->start;
+	u64 csum_len = em->mod_len;
+	u64 extent_offset = em->start - em->orig_start;
+	u64 block_len;
 	int ret;
+	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	if (BTRFS_I(inode)->logged_trans == trans->transid) {
-		ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
-					   start + len, NULL, 0);
-		if (ret)
-			return ret;
+	INIT_LIST_HEAD(&ordered_sums);
+	btrfs_init_map_token(&token);
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = em->start;
+	path->really_keep_locks = 1;
+
+	ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
+	if (ret && ret != -EEXIST) {
+		path->really_keep_locks = 0;
+		return ret;
+	}
+	leaf = path->nodes[0];
+	fi = btrfs_item_ptr(leaf, path->slots[0],
+			    struct btrfs_file_extent_item);
+	btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
+					       &token);
+	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+		skip_csum = true;
+		btrfs_set_token_file_extent_type(leaf, fi,
+						 BTRFS_FILE_EXTENT_PREALLOC,
+						 &token);
+	} else {
+		btrfs_set_token_file_extent_type(leaf, fi,
+						 BTRFS_FILE_EXTENT_REG,
+						 &token);
+		if (em->block_start == 0)
+			skip_csum = true;
 	}
 
-	while (len) {
-		if (args->nr)
-			goto next_slot;
-again:
-		key.objectid = btrfs_ino(inode);
-		key.type = BTRFS_EXTENT_DATA_KEY;
-		key.offset = search_start;
-
-		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-		if (ret < 0)
-			return ret;
-
-		if (ret) {
-			/*
-			 * A rare case were we can have an em for a section of a
-			 * larger extent so we need to make sure that this em
-			 * falls within the extent we've found.  If not we just
-			 * bail and go back to ye-olde way of doing things but
-			 * it happens often enough in testing that we need to do
-			 * this dance to make sure.
-			 */
-			do {
-				if (path->slots[0] == 0) {
-					btrfs_release_path(path);
-					if (search_start == 0)
-						return -ENOENT;
-					search_start--;
-					goto again;
-				}
-
-				path->slots[0]--;
-				btrfs_item_key_to_cpu(path->nodes[0], &key,
-						      path->slots[0]);
-				if (key.objectid != btrfs_ino(inode) ||
-				    key.type != BTRFS_EXTENT_DATA_KEY) {
-					btrfs_release_path(path);
-					return -ENOENT;
-				}
-			} while (key.offset > start);
-
-			fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
-					    struct btrfs_file_extent_item);
-			num_bytes = btrfs_file_extent_num_bytes(path->nodes[0],
-								fi);
-			if (key.offset + num_bytes <= start) {
-				btrfs_release_path(path);
-				return -ENOENT;
-			}
-		}
-		args->src = path->nodes[0];
-next_slot:
-		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-		fi = btrfs_item_ptr(args->src, path->slots[0],
-				    struct btrfs_file_extent_item);
-		if (args->nr &&
-		    args->start_slot + args->nr == path->slots[0]) {
-			args->nr++;
-		} else if (args->nr) {
-			ret = copy_items(trans, inode, dst_path, args->src,
-					 args->start_slot, args->nr,
-					 LOG_INODE_ALL);
-			if (ret)
-				return ret;
-			args->nr = 1;
-			args->start_slot = path->slots[0];
-		} else if (!args->nr) {
-			args->nr = 1;
-			args->start_slot = path->slots[0];
-		}
-		nritems = btrfs_header_nritems(path->nodes[0]);
-		path->slots[0]++;
-		num_bytes = btrfs_file_extent_num_bytes(args->src, fi);
-		if (len < num_bytes) {
-			/* I _think_ this is ok, envision we write to a
-			 * preallocated space that is adjacent to a previously
-			 * written preallocated space that gets merged when we
-			 * mark this preallocated space written.  If we do not
-			 * have the adjacent extent in cache then when we copy
-			 * this extent it could end up being larger than our EM
-			 * thinks it is, which is a-ok, so just set len to 0.
-			 */
-			len = 0;
-		} else {
-			len -= num_bytes;
-		}
-		start = key.offset + num_bytes;
-		args->next_offset = start;
-		search_start = start;
-
-		if (path->slots[0] < nritems) {
-			if (len)
-				goto next_slot;
-			break;
-		}
-
-		if (args->nr) {
-			ret = copy_items(trans, inode, dst_path, args->src,
-					 args->start_slot, args->nr,
-					 LOG_INODE_ALL);
-			if (ret)
-				return ret;
-			args->nr = 0;
-			btrfs_release_path(path);
-		}
+	block_len = max(em->block_len, em->orig_block_len);
+	if (em->compress_type != BTRFS_COMPRESS_NONE) {
+		btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
+							em->block_start,
+							&token);
+		btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
+							   &token);
+	} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+		btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
+							em->block_start -
+							extent_offset, &token);
+		btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
+							   &token);
+	} else {
+		btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
+		btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
+							   &token);
 	}
 
-	return 0;
+	btrfs_set_token_file_extent_offset(leaf, fi,
+					   em->start - em->orig_start,
+					   &token);
+	btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
+	btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->len, &token);
+	btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
+						&token);
+	btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
+	btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
+	btrfs_mark_buffer_dirty(leaf);
+
+	/*
+	 * Have to check the extent to the right of us to make sure it doesn't
+	 * fall in our current range.  We're ok if the previous extent is in our
+	 * range since the recovery stuff will run us in key order and thus just
+	 * drop the part we overwrote.
+	 */
+	ret = drop_adjacent_extents(trans, log, inode, em, path);
+	btrfs_release_path(path);
+	path->really_keep_locks = 0;
+	if (ret) {
+		return ret;
+	}
+
+	if (skip_csum)
+		return 0;
+
+	/* block start is already adjusted for the file extent offset. */
+	ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
+				       em->block_start + csum_offset,
+				       em->block_start + csum_offset +
+				       csum_len - 1, &ordered_sums, 0);
+	if (ret)
+		return ret;
+
+	while (!list_empty(&ordered_sums)) {
+		struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
+						   struct btrfs_ordered_sum,
+						   list);
+		if (!ret)
+			ret = btrfs_csum_file_blocks(trans, log, sums);
+		list_del(&sums->list);
+		kfree(sums);
+	}
+
+	return ret;
 }
 
 static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root,
 				     struct inode *inode,
-				     struct btrfs_path *path,
-				     struct btrfs_path *dst_path)
+				     struct btrfs_path *path)
 {
-	struct log_args args;
 	struct extent_map *em, *n;
 	struct list_head extents;
 	struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
@@ -3283,8 +3391,6 @@
 
 	INIT_LIST_HEAD(&extents);
 
-	memset(&args, 0, sizeof(args));
-
 	write_lock(&tree->lock);
 	test_gen = root->fs_info->last_trans_committed;
 
@@ -3317,34 +3423,13 @@
 
 		write_unlock(&tree->lock);
 
-		/*
-		 * If the previous EM and the last extent we left off on aren't
-		 * sequential then we need to copy the items we have and redo
-		 * our search
-		 */
-		if (args.nr && em->mod_start != args.next_offset) {
-			ret = copy_items(trans, inode, dst_path, args.src,
-					 args.start_slot, args.nr,
-					 LOG_INODE_ALL);
-			if (ret) {
-				free_extent_map(em);
-				write_lock(&tree->lock);
-				continue;
-			}
-			btrfs_release_path(path);
-			args.nr = 0;
-		}
-
-		ret = log_one_extent(trans, inode, root, em, path, dst_path, &args);
+		ret = log_one_extent(trans, inode, root, em, path);
 		free_extent_map(em);
 		write_lock(&tree->lock);
 	}
 	WARN_ON(!list_empty(&extents));
 	write_unlock(&tree->lock);
 
-	if (!ret && args.nr)
-		ret = copy_items(trans, inode, dst_path, args.src,
-				 args.start_slot, args.nr, LOG_INODE_ALL);
 	btrfs_release_path(path);
 	return ret;
 }
@@ -3400,7 +3485,10 @@
 
 
 	/* today the code can only do partial logging of directories */
-	if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
+	if (S_ISDIR(inode->i_mode) ||
+	    (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+		       &BTRFS_I(inode)->runtime_flags) &&
+	     inode_only == LOG_INODE_EXISTS))
 		max_key.type = BTRFS_XATTR_ITEM_KEY;
 	else
 		max_key.type = (u8)-1;
@@ -3432,14 +3520,28 @@
 	} else {
 		if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 				       &BTRFS_I(inode)->runtime_flags)) {
+			clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+				  &BTRFS_I(inode)->runtime_flags);
 			ret = btrfs_truncate_inode_items(trans, log,
 							 inode, 0, 0);
-		} else {
-			fast_search = true;
+		} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+					      &BTRFS_I(inode)->runtime_flags)) {
+			if (inode_only == LOG_INODE_ALL)
+				fast_search = true;
 			max_key.type = BTRFS_XATTR_ITEM_KEY;
 			ret = drop_objectid_items(trans, log, path, ino,
-						  BTRFS_XATTR_ITEM_KEY);
+						  max_key.type);
+		} else {
+			if (inode_only == LOG_INODE_ALL)
+				fast_search = true;
+			ret = log_inode_item(trans, log, dst_path, inode);
+			if (ret) {
+				err = ret;
+				goto out_unlock;
+			}
+			goto log_extents;
 		}
+
 	}
 	if (ret) {
 		err = ret;
@@ -3518,11 +3620,10 @@
 		ins_nr = 0;
 	}
 
+log_extents:
 	if (fast_search) {
-		btrfs_release_path(path);
 		btrfs_release_path(dst_path);
-		ret = btrfs_log_changed_extents(trans, root, inode, path,
-						dst_path);
+		ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
 		if (ret) {
 			err = ret;
 			goto out_unlock;
@@ -3531,8 +3632,10 @@
 		struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
 		struct extent_map *em, *n;
 
+		write_lock(&tree->lock);
 		list_for_each_entry_safe(em, n, &tree->modified_extents, list)
 			list_del_init(&em->list);
+		write_unlock(&tree->lock);
 	}
 
 	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e3c6ee3..5cce6aa 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c

@@ -25,7 +25,6 @@
 #include <linux/capability.h>
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
-#include <asm/div64.h>
 #include "compat.h"
 #include "ctree.h"
 #include "extent_map.h"
@@ -36,6 +35,8 @@
 #include "async-thread.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
+#include "math.h"
+#include "dev-replace.h"
 
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root,
@@ -71,6 +72,19 @@
 	kfree(fs_devices);
 }
 
+static void btrfs_kobject_uevent(struct block_device *bdev,
+				 enum kobject_action action)
+{
+	int ret;
+
+	ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
+	if (ret)
+		pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n",
+			action,
+			kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
+			&disk_to_dev(bdev->bd_disk)->kobj);
+}
+
 void btrfs_cleanup_fs_uuids(void)
 {
 	struct btrfs_fs_devices *fs_devices;
@@ -108,6 +122,44 @@
 	return NULL;
 }
 
+static int
+btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
+		      int flush, struct block_device **bdev,
+		      struct buffer_head **bh)
+{
+	int ret;
+
+	*bdev = blkdev_get_by_path(device_path, flags, holder);
+
+	if (IS_ERR(*bdev)) {
+		ret = PTR_ERR(*bdev);
+		printk(KERN_INFO "btrfs: open %s failed\n", device_path);
+		goto error;
+	}
+
+	if (flush)
+		filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
+	ret = set_blocksize(*bdev, 4096);
+	if (ret) {
+		blkdev_put(*bdev, flags);
+		goto error;
+	}
+	invalidate_bdev(*bdev);
+	*bh = btrfs_read_dev_super(*bdev);
+	if (!*bh) {
+		ret = -EINVAL;
+		blkdev_put(*bdev, flags);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	*bdev = NULL;
+	*bh = NULL;
+	return ret;
+}
+
 static void requeue_list(struct btrfs_pending_bios *pending_bios,
 			struct bio *head, struct bio *tail)
 {
@@ -467,7 +519,8 @@
 	return ERR_PTR(-ENOMEM);
 }
 
-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
+void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
+			       struct btrfs_fs_devices *fs_devices, int step)
 {
 	struct btrfs_device *device, *next;
 
@@ -480,8 +533,9 @@
 	/* This is the initialized path, it is safe to release the devices. */
 	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
 		if (device->in_fs_metadata) {
-			if (!latest_transid ||
-			    device->generation > latest_transid) {
+			if (!device->is_tgtdev_for_dev_replace &&
+			    (!latest_transid ||
+			     device->generation > latest_transid)) {
 				latest_devid = device->devid;
 				latest_transid = device->generation;
 				latest_bdev = device->bdev;
@@ -489,6 +543,21 @@
 			continue;
 		}
 
+		if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
+			/*
+			 * In the first step, keep the device which has
+			 * the correct fsid and the devid that is used
+			 * for the dev_replace procedure.
+			 * In the second step, the dev_replace state is
+			 * read from the device tree and it is known
+			 * whether the procedure is really active or
+			 * not, which means whether this device is
+			 * used or whether it should be removed.
+			 */
+			if (step == 0 || device->is_tgtdev_for_dev_replace) {
+				continue;
+			}
+		}
 		if (device->bdev) {
 			blkdev_put(device->bdev, device->mode);
 			device->bdev = NULL;
@@ -497,7 +566,8 @@
 		if (device->writeable) {
 			list_del_init(&device->dev_alloc_list);
 			device->writeable = 0;
-			fs_devices->rw_devices--;
+			if (!device->is_tgtdev_for_dev_replace)
+				fs_devices->rw_devices--;
 		}
 		list_del_init(&device->dev_list);
 		fs_devices->num_devices--;
@@ -555,7 +625,7 @@
 		if (device->bdev)
 			fs_devices->open_devices--;
 
-		if (device->writeable) {
+		if (device->writeable && !device->is_tgtdev_for_dev_replace) {
 			list_del_init(&device->dev_alloc_list);
 			fs_devices->rw_devices--;
 		}
@@ -637,18 +707,10 @@
 		if (!device->name)
 			continue;
 
-		bdev = blkdev_get_by_path(device->name->str, flags, holder);
-		if (IS_ERR(bdev)) {
-			printk(KERN_INFO "btrfs: open %s failed\n", device->name->str);
-			goto error;
-		}
-		filemap_write_and_wait(bdev->bd_inode->i_mapping);
-		invalidate_bdev(bdev);
-		set_blocksize(bdev, 4096);
-
-		bh = btrfs_read_dev_super(bdev);
-		if (!bh)
-			goto error_close;
+		ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
+					    &bdev, &bh);
+		if (ret)
+			continue;
 
 		disk_super = (struct btrfs_super_block *)bh->b_data;
 		devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -687,7 +749,7 @@
 			fs_devices->rotating = 1;
 
 		fs_devices->open_devices++;
-		if (device->writeable) {
+		if (device->writeable && !device->is_tgtdev_for_dev_replace) {
 			fs_devices->rw_devices++;
 			list_add(&device->dev_alloc_list,
 				 &fs_devices->alloc_list);
@@ -697,9 +759,7 @@
 
 error_brelse:
 		brelse(bh);
-error_close:
 		blkdev_put(bdev, flags);
-error:
 		continue;
 	}
 	if (fs_devices->open_devices == 0) {
@@ -744,40 +804,30 @@
 	u64 total_devices;
 
 	flags |= FMODE_EXCL;
-	bdev = blkdev_get_by_path(path, flags, holder);
-
-	if (IS_ERR(bdev)) {
-		ret = PTR_ERR(bdev);
-		goto error;
-	}
-
 	mutex_lock(&uuid_mutex);
-	ret = set_blocksize(bdev, 4096);
+	ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh);
 	if (ret)
-		goto error_close;
-	bh = btrfs_read_dev_super(bdev);
-	if (!bh) {
-		ret = -EINVAL;
-		goto error_close;
-	}
+		goto error;
 	disk_super = (struct btrfs_super_block *)bh->b_data;
 	devid = btrfs_stack_device_id(&disk_super->dev_item);
 	transid = btrfs_super_generation(disk_super);
 	total_devices = btrfs_super_num_devices(disk_super);
-	if (disk_super->label[0])
+	if (disk_super->label[0]) {
+		if (disk_super->label[BTRFS_LABEL_SIZE - 1])
+			disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
 		printk(KERN_INFO "device label %s ", disk_super->label);
-	else
+	} else {
 		printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
+	}
 	printk(KERN_CONT "devid %llu transid %llu %s\n",
 	       (unsigned long long)devid, (unsigned long long)transid, path);
 	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
 	if (!ret && fs_devices_ret)
 		(*fs_devices_ret)->total_devices = total_devices;
 	brelse(bh);
-error_close:
-	mutex_unlock(&uuid_mutex);
 	blkdev_put(bdev, flags);
 error:
+	mutex_unlock(&uuid_mutex);
 	return ret;
 }
 
@@ -796,7 +846,7 @@
 
 	*length = 0;
 
-	if (start >= device->total_bytes)
+	if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace)
 		return 0;
 
 	path = btrfs_alloc_path();
@@ -913,7 +963,7 @@
 	max_hole_size = 0;
 	hole_size = 0;
 
-	if (search_start >= search_end) {
+	if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
 		ret = -ENOSPC;
 		goto error;
 	}
@@ -1096,6 +1146,7 @@
 	struct btrfs_key key;
 
 	WARN_ON(!device->in_fs_metadata);
+	WARN_ON(device->is_tgtdev_for_dev_replace);
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -1330,16 +1381,22 @@
 		root->fs_info->avail_system_alloc_bits |
 		root->fs_info->avail_metadata_alloc_bits;
 
-	if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
-	    root->fs_info->fs_devices->num_devices <= 4) {
+	num_devices = root->fs_info->fs_devices->num_devices;
+	btrfs_dev_replace_lock(&root->fs_info->dev_replace);
+	if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
+		WARN_ON(num_devices < 1);
+		num_devices--;
+	}
+	btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
+
+	if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
 		printk(KERN_ERR "btrfs: unable to go below four devices "
 		       "on raid10\n");
 		ret = -EINVAL;
 		goto out;
 	}
 
-	if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
-	    root->fs_info->fs_devices->num_devices <= 2) {
+	if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
 		printk(KERN_ERR "btrfs: unable to go below two "
 		       "devices on raid1\n");
 		ret = -EINVAL;
@@ -1357,7 +1414,9 @@
 		 * is held.
 		 */
 		list_for_each_entry(tmp, devices, dev_list) {
-			if (tmp->in_fs_metadata && !tmp->bdev) {
+			if (tmp->in_fs_metadata &&
+			    !tmp->is_tgtdev_for_dev_replace &&
+			    !tmp->bdev) {
 				device = tmp;
 				break;
 			}
@@ -1371,24 +1430,16 @@
 			goto out;
 		}
 	} else {
-		bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
-					  root->fs_info->bdev_holder);
-		if (IS_ERR(bdev)) {
-			ret = PTR_ERR(bdev);
+		ret = btrfs_get_bdev_and_sb(device_path,
+					    FMODE_READ | FMODE_EXCL,
+					    root->fs_info->bdev_holder, 0,
+					    &bdev, &bh);
+		if (ret)
 			goto out;
-		}
-
-		set_blocksize(bdev, 4096);
-		invalidate_bdev(bdev);
-		bh = btrfs_read_dev_super(bdev);
-		if (!bh) {
-			ret = -EINVAL;
-			goto error_close;
-		}
 		disk_super = (struct btrfs_super_block *)bh->b_data;
 		devid = btrfs_stack_device_id(&disk_super->dev_item);
 		dev_uuid = disk_super->dev_item.uuid;
-		device = btrfs_find_device(root, devid, dev_uuid,
+		device = btrfs_find_device(root->fs_info, devid, dev_uuid,
 					   disk_super->fsid);
 		if (!device) {
 			ret = -ENOENT;
@@ -1396,6 +1447,12 @@
 		}
 	}
 
+	if (device->is_tgtdev_for_dev_replace) {
+		pr_err("btrfs: unable to remove the dev_replace target dev\n");
+		ret = -EINVAL;
+		goto error_brelse;
+	}
+
 	if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
 		printk(KERN_ERR "btrfs: unable to remove the only writeable "
 		       "device\n");
@@ -1415,6 +1472,11 @@
 	if (ret)
 		goto error_undo;
 
+	/*
+	 * TODO: the superblock still includes this device in its num_devices
+	 * counter although write_all_supers() is not locked out. This
+	 * could give a filesystem state which requires a degraded mount.
+	 */
 	ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
 	if (ret)
 		goto error_undo;
@@ -1425,7 +1487,7 @@
 	spin_unlock(&root->fs_info->free_chunk_lock);
 
 	device->in_fs_metadata = 0;
-	btrfs_scrub_cancel_dev(root, device);
+	btrfs_scrub_cancel_dev(root->fs_info, device);
 
 	/*
 	 * the device list mutex makes sure that we don't change
@@ -1482,7 +1544,7 @@
 	 * at this point, the device is zero sized.  We want to
 	 * remove it from the devices list and zero out the old super
 	 */
-	if (clear_super) {
+	if (clear_super && disk_super) {
 		/* make sure this device isn't detected as part of
 		 * the FS anymore
 		 */
@@ -1493,9 +1555,11 @@
 
 	ret = 0;
 
+	/* Notify udev that device has changed */
+	btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+
 error_brelse:
 	brelse(bh);
-error_close:
 	if (bdev)
 		blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
 out:
@@ -1512,6 +1576,112 @@
 	goto error_brelse;
 }
 
+void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
+				 struct btrfs_device *srcdev)
+{
+	WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
+	list_del_rcu(&srcdev->dev_list);
+	list_del_rcu(&srcdev->dev_alloc_list);
+	fs_info->fs_devices->num_devices--;
+	if (srcdev->missing) {
+		fs_info->fs_devices->missing_devices--;
+		fs_info->fs_devices->rw_devices++;
+	}
+	if (srcdev->can_discard)
+		fs_info->fs_devices->num_can_discard--;
+	if (srcdev->bdev)
+		fs_info->fs_devices->open_devices--;
+
+	call_rcu(&srcdev->rcu, free_device);
+}
+
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
+				      struct btrfs_device *tgtdev)
+{
+	struct btrfs_device *next_device;
+
+	WARN_ON(!tgtdev);
+	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	if (tgtdev->bdev) {
+		btrfs_scratch_superblock(tgtdev);
+		fs_info->fs_devices->open_devices--;
+	}
+	fs_info->fs_devices->num_devices--;
+	if (tgtdev->can_discard)
+		fs_info->fs_devices->num_can_discard++;
+
+	next_device = list_entry(fs_info->fs_devices->devices.next,
+				 struct btrfs_device, dev_list);
+	if (tgtdev->bdev == fs_info->sb->s_bdev)
+		fs_info->sb->s_bdev = next_device->bdev;
+	if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
+		fs_info->fs_devices->latest_bdev = next_device->bdev;
+	list_del_rcu(&tgtdev->dev_list);
+
+	call_rcu(&tgtdev->rcu, free_device);
+
+	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+}
+
+int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
+			      struct btrfs_device **device)
+{
+	int ret = 0;
+	struct btrfs_super_block *disk_super;
+	u64 devid;
+	u8 *dev_uuid;
+	struct block_device *bdev;
+	struct buffer_head *bh;
+
+	*device = NULL;
+	ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
+				    root->fs_info->bdev_holder, 0, &bdev, &bh);
+	if (ret)
+		return ret;
+	disk_super = (struct btrfs_super_block *)bh->b_data;
+	devid = btrfs_stack_device_id(&disk_super->dev_item);
+	dev_uuid = disk_super->dev_item.uuid;
+	*device = btrfs_find_device(root->fs_info, devid, dev_uuid,
+				    disk_super->fsid);
+	brelse(bh);
+	if (!*device)
+		ret = -ENOENT;
+	blkdev_put(bdev, FMODE_READ);
+	return ret;
+}
+
+int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
+					 char *device_path,
+					 struct btrfs_device **device)
+{
+	*device = NULL;
+	if (strcmp(device_path, "missing") == 0) {
+		struct list_head *devices;
+		struct btrfs_device *tmp;
+
+		devices = &root->fs_info->fs_devices->devices;
+		/*
+		 * It is safe to read the devices since the volume_mutex
+		 * is held by the caller.
+		 */
+		list_for_each_entry(tmp, devices, dev_list) {
+			if (tmp->in_fs_metadata && !tmp->bdev) {
+				*device = tmp;
+				break;
+			}
+		}
+
+		if (!*device) {
+			pr_err("btrfs: no missing device found\n");
+			return -ENOENT;
+		}
+
+		return 0;
+	} else {
+		return btrfs_find_device_by_path(root, device_path, device);
+	}
+}
+
 /*
  * does all the dirty work required for changing file system's UUID.
  */
@@ -1630,7 +1800,8 @@
 		read_extent_buffer(leaf, fs_uuid,
 				   (unsigned long)btrfs_device_fsid(dev_item),
 				   BTRFS_UUID_SIZE);
-		device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
+		device = btrfs_find_device(root->fs_info, devid, dev_uuid,
+					   fs_uuid);
 		BUG_ON(!device); /* Logic error */
 
 		if (device->fs_devices->seeding) {
@@ -1678,16 +1849,17 @@
 	filemap_write_and_wait(bdev->bd_inode->i_mapping);
 
 	devices = &root->fs_info->fs_devices->devices;
-	/*
-	 * we have the volume lock, so we don't need the extra
-	 * device list mutex while reading the list here.
-	 */
+
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	list_for_each_entry(device, devices, dev_list) {
 		if (device->bdev == bdev) {
 			ret = -EEXIST;
+			mutex_unlock(
+				&root->fs_info->fs_devices->device_list_mutex);
 			goto error;
 		}
 	}
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
 	device = kzalloc(sizeof(*device), GFP_NOFS);
 	if (!device) {
@@ -1737,6 +1909,7 @@
 	device->dev_root = root->fs_info->dev_root;
 	device->bdev = bdev;
 	device->in_fs_metadata = 1;
+	device->is_tgtdev_for_dev_replace = 0;
 	device->mode = FMODE_EXCL;
 	set_blocksize(device->bdev, 4096);
 
@@ -1844,6 +2017,98 @@
 	return ret;
 }
 
+int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
+				  struct btrfs_device **device_out)
+{
+	struct request_queue *q;
+	struct btrfs_device *device;
+	struct block_device *bdev;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct list_head *devices;
+	struct rcu_string *name;
+	int ret = 0;
+
+	*device_out = NULL;
+	if (fs_info->fs_devices->seeding)
+		return -EINVAL;
+
+	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
+				  fs_info->bdev_holder);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+
+	filemap_write_and_wait(bdev->bd_inode->i_mapping);
+
+	devices = &fs_info->fs_devices->devices;
+	list_for_each_entry(device, devices, dev_list) {
+		if (device->bdev == bdev) {
+			ret = -EEXIST;
+			goto error;
+		}
+	}
+
+	device = kzalloc(sizeof(*device), GFP_NOFS);
+	if (!device) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	name = rcu_string_strdup(device_path, GFP_NOFS);
+	if (!name) {
+		kfree(device);
+		ret = -ENOMEM;
+		goto error;
+	}
+	rcu_assign_pointer(device->name, name);
+
+	q = bdev_get_queue(bdev);
+	if (blk_queue_discard(q))
+		device->can_discard = 1;
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+	device->writeable = 1;
+	device->work.func = pending_bios_fn;
+	generate_random_uuid(device->uuid);
+	device->devid = BTRFS_DEV_REPLACE_DEVID;
+	spin_lock_init(&device->io_lock);
+	device->generation = 0;
+	device->io_width = root->sectorsize;
+	device->io_align = root->sectorsize;
+	device->sector_size = root->sectorsize;
+	device->total_bytes = i_size_read(bdev->bd_inode);
+	device->disk_total_bytes = device->total_bytes;
+	device->dev_root = fs_info->dev_root;
+	device->bdev = bdev;
+	device->in_fs_metadata = 1;
+	device->is_tgtdev_for_dev_replace = 1;
+	device->mode = FMODE_EXCL;
+	set_blocksize(device->bdev, 4096);
+	device->fs_devices = fs_info->fs_devices;
+	list_add(&device->dev_list, &fs_info->fs_devices->devices);
+	fs_info->fs_devices->num_devices++;
+	fs_info->fs_devices->open_devices++;
+	if (device->can_discard)
+		fs_info->fs_devices->num_can_discard++;
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
+	*device_out = device;
+	return ret;
+
+error:
+	blkdev_put(bdev, FMODE_EXCL);
+	return ret;
+}
+
+void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
+					      struct btrfs_device *tgtdev)
+{
+	WARN_ON(fs_info->fs_devices->rw_devices == 0);
+	tgtdev->io_width = fs_info->dev_root->sectorsize;
+	tgtdev->io_align = fs_info->dev_root->sectorsize;
+	tgtdev->sector_size = fs_info->dev_root->sectorsize;
+	tgtdev->dev_root = fs_info->dev_root;
+	tgtdev->in_fs_metadata = 1;
+}
+
 static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
 					struct btrfs_device *device)
 {
@@ -1900,7 +2165,8 @@
 
 	if (!device->writeable)
 		return -EACCES;
-	if (new_size <= device->total_bytes)
+	if (new_size <= device->total_bytes ||
+	    device->is_tgtdev_for_dev_replace)
 		return -EINVAL;
 
 	btrfs_set_super_total_bytes(super_copy, old_total + diff);
@@ -2338,18 +2604,6 @@
 	return 1;
 }
 
-static u64 div_factor_fine(u64 num, int factor)
-{
-	if (factor <= 0)
-		return 0;
-	if (factor >= 100)
-		return num;
-
-	num *= factor;
-	do_div(num, 100);
-	return num;
-}
-
 static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
 			      struct btrfs_balance_args *bargs)
 {
@@ -2514,15 +2768,6 @@
 	return 1;
 }
 
-static u64 div_factor(u64 num, int factor)
-{
-	if (factor == 10)
-		return num;
-	num *= factor;
-	do_div(num, 10);
-	return num;
-}
-
 static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
@@ -2550,7 +2795,8 @@
 		size_to_free = div_factor(old_size, 1);
 		size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
 		if (!device->writeable ||
-		    device->total_bytes - device->bytes_used > size_to_free)
+		    device->total_bytes - device->bytes_used > size_to_free ||
+		    device->is_tgtdev_for_dev_replace)
 			continue;
 
 		ret = btrfs_shrink_device(device, old_size - size_to_free);
@@ -2728,6 +2974,7 @@
 	u64 allowed;
 	int mixed = 0;
 	int ret;
+	u64 num_devices;
 
 	if (btrfs_fs_closing(fs_info) ||
 	    atomic_read(&fs_info->balance_pause_req) ||
@@ -2756,10 +3003,17 @@
 		}
 	}
 
+	num_devices = fs_info->fs_devices->num_devices;
+	btrfs_dev_replace_lock(&fs_info->dev_replace);
+	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
+		BUG_ON(num_devices < 1);
+		num_devices--;
+	}
+	btrfs_dev_replace_unlock(&fs_info->dev_replace);
 	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
-	if (fs_info->fs_devices->num_devices == 1)
+	if (num_devices == 1)
 		allowed |= BTRFS_BLOCK_GROUP_DUP;
-	else if (fs_info->fs_devices->num_devices < 4)
+	else if (num_devices < 4)
 		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
 	else
 		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
@@ -2902,6 +3156,7 @@
 		ret = btrfs_balance(fs_info->balance_ctl, NULL);
 	}
 
+	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 	mutex_unlock(&fs_info->balance_mutex);
 	mutex_unlock(&fs_info->volume_mutex);
 
@@ -2924,6 +3179,7 @@
 		return 0;
 	}
 
+	WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
 	tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
 	if (IS_ERR(tsk))
 		return PTR_ERR(tsk);
@@ -3080,7 +3336,7 @@
 	u64 old_size = device->total_bytes;
 	u64 diff = device->total_bytes - new_size;
 
-	if (new_size >= device->total_bytes)
+	if (device->is_tgtdev_for_dev_replace)
 		return -EINVAL;
 
 	path = btrfs_alloc_path();
@@ -3235,6 +3491,14 @@
 	return 0;
 }
 
+struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+	{ 2, 1, 0, 4, 2, 2 /* raid10 */ },
+	{ 1, 1, 2, 2, 2, 2 /* raid1 */ },
+	{ 1, 2, 1, 1, 1, 2 /* dup */ },
+	{ 1, 1, 0, 2, 1, 1 /* raid0 */ },
+	{ 1, 1, 0, 1, 1, 1 /* single */ },
+};
+
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *extent_root,
 			       struct map_lookup **map_ret,
@@ -3264,43 +3528,21 @@
 	int ndevs;
 	int i;
 	int j;
+	int index;
 
 	BUG_ON(!alloc_profile_is_valid(type, 0));
 
 	if (list_empty(&fs_devices->alloc_list))
 		return -ENOSPC;
 
-	sub_stripes = 1;
-	dev_stripes = 1;
-	devs_increment = 1;
-	ncopies = 1;
-	devs_max = 0;	/* 0 == as many as possible */
-	devs_min = 1;
+	index = __get_raid_index(type);
 
-	/*
-	 * define the properties of each RAID type.
-	 * FIXME: move this to a global table and use it in all RAID
-	 * calculation code
-	 */
-	if (type & (BTRFS_BLOCK_GROUP_DUP)) {
-		dev_stripes = 2;
-		ncopies = 2;
-		devs_max = 1;
-	} else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
-		devs_min = 2;
-	} else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
-		devs_increment = 2;
-		ncopies = 2;
-		devs_max = 2;
-		devs_min = 2;
-	} else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
-		sub_stripes = 2;
-		devs_increment = 2;
-		ncopies = 2;
-		devs_min = 4;
-	} else {
-		devs_max = 1;
-	}
+	sub_stripes = btrfs_raid_array[index].sub_stripes;
+	dev_stripes = btrfs_raid_array[index].dev_stripes;
+	devs_max = btrfs_raid_array[index].devs_max;
+	devs_min = btrfs_raid_array[index].devs_min;
+	devs_increment = btrfs_raid_array[index].devs_increment;
+	ncopies = btrfs_raid_array[index].ncopies;
 
 	if (type & BTRFS_BLOCK_GROUP_DATA) {
 		max_stripe_size = 1024 * 1024 * 1024;
@@ -3347,13 +3589,13 @@
 		cur = cur->next;
 
 		if (!device->writeable) {
-			printk(KERN_ERR
+			WARN(1, KERN_ERR
 			       "btrfs: read-only device in alloc_list\n");
-			WARN_ON(1);
 			continue;
 		}
 
-		if (!device->in_fs_metadata)
+		if (!device->in_fs_metadata ||
+		    device->is_tgtdev_for_dev_replace)
 			continue;
 
 		if (device->total_bytes > device->bytes_used)
@@ -3382,6 +3624,7 @@
 		devices_info[ndevs].total_avail = total_avail;
 		devices_info[ndevs].dev = device;
 		++ndevs;
+		WARN_ON(ndevs > fs_devices->rw_devices);
 	}
 
 	/*
@@ -3740,8 +3983,9 @@
 	}
 }
 
-int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 {
+	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
 	struct extent_map_tree *em_tree = &map_tree->map_tree;
@@ -3761,32 +4005,60 @@
 	else
 		ret = 1;
 	free_extent_map(em);
+
+	btrfs_dev_replace_lock(&fs_info->dev_replace);
+	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
+		ret++;
+	btrfs_dev_replace_unlock(&fs_info->dev_replace);
+
 	return ret;
 }
 
-static int find_live_mirror(struct map_lookup *map, int first, int num,
-			    int optimal)
+static int find_live_mirror(struct btrfs_fs_info *fs_info,
+			    struct map_lookup *map, int first, int num,
+			    int optimal, int dev_replace_is_ongoing)
 {
 	int i;
-	if (map->stripes[optimal].dev->bdev)
-		return optimal;
-	for (i = first; i < first + num; i++) {
-		if (map->stripes[i].dev->bdev)
-			return i;
+	int tolerance;
+	struct btrfs_device *srcdev;
+
+	if (dev_replace_is_ongoing &&
+	    fs_info->dev_replace.cont_reading_from_srcdev_mode ==
+	     BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
+		srcdev = fs_info->dev_replace.srcdev;
+	else
+		srcdev = NULL;
+
+	/*
+	 * try to avoid the drive that is the source drive for a
+	 * dev-replace procedure, only choose it if no other non-missing
+	 * mirror is available
+	 */
+	for (tolerance = 0; tolerance < 2; tolerance++) {
+		if (map->stripes[optimal].dev->bdev &&
+		    (tolerance || map->stripes[optimal].dev != srcdev))
+			return optimal;
+		for (i = first; i < first + num; i++) {
+			if (map->stripes[i].dev->bdev &&
+			    (tolerance || map->stripes[i].dev != srcdev))
+				return i;
+		}
 	}
+
 	/* we couldn't find one that doesn't fail.  Just return something
 	 * and the io error handling code will clean up eventually
 	 */
 	return optimal;
 }
 
-static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 			     u64 logical, u64 *length,
 			     struct btrfs_bio **bbio_ret,
 			     int mirror_num)
 {
 	struct extent_map *em;
 	struct map_lookup *map;
+	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	u64 offset;
 	u64 stripe_offset;
@@ -3800,6 +4072,11 @@
 	int num_stripes;
 	int max_errors = 0;
 	struct btrfs_bio *bbio = NULL;
+	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+	int dev_replace_is_ongoing = 0;
+	int num_alloc_stripes;
+	int patch_the_first_stripe_for_dev_replace = 0;
+	u64 physical_to_patch_in_first_stripe = 0;
 
 	read_lock(&em_tree->lock);
 	em = lookup_extent_mapping(em_tree, logical, *length);
@@ -3816,9 +4093,6 @@
 	map = (struct map_lookup *)em->bdev;
 	offset = logical - em->start;
 
-	if (mirror_num > map->num_stripes)
-		mirror_num = 0;
-
 	stripe_nr = offset;
 	/*
 	 * stripe_nr counts the total number of stripes we have to stride
@@ -3845,6 +4119,93 @@
 	if (!bbio_ret)
 		goto out;
 
+	btrfs_dev_replace_lock(dev_replace);
+	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
+	if (!dev_replace_is_ongoing)
+		btrfs_dev_replace_unlock(dev_replace);
+
+	if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
+	    !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
+	    dev_replace->tgtdev != NULL) {
+		/*
+		 * in dev-replace case, for repair case (that's the only
+		 * case where the mirror is selected explicitly when
+		 * calling btrfs_map_block), blocks left of the left cursor
+		 * can also be read from the target drive.
+		 * For REQ_GET_READ_MIRRORS, the target drive is added as
+		 * the last one to the array of stripes. For READ, it also
+		 * needs to be supported using the same mirror number.
+		 * If the requested block is not left of the left cursor,
+		 * EIO is returned. This can happen because btrfs_num_copies()
+		 * returns one more in the dev-replace case.
+		 */
+		u64 tmp_length = *length;
+		struct btrfs_bio *tmp_bbio = NULL;
+		int tmp_num_stripes;
+		u64 srcdev_devid = dev_replace->srcdev->devid;
+		int index_srcdev = 0;
+		int found = 0;
+		u64 physical_of_found = 0;
+
+		ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
+			     logical, &tmp_length, &tmp_bbio, 0);
+		if (ret) {
+			WARN_ON(tmp_bbio != NULL);
+			goto out;
+		}
+
+		tmp_num_stripes = tmp_bbio->num_stripes;
+		if (mirror_num > tmp_num_stripes) {
+			/*
+			 * REQ_GET_READ_MIRRORS does not contain this
+			 * mirror, that means that the requested area
+			 * is not left of the left cursor
+			 */
+			ret = -EIO;
+			kfree(tmp_bbio);
+			goto out;
+		}
+
+		/*
+		 * process the rest of the function using the mirror_num
+		 * of the source drive. Therefore look it up first.
+		 * At the end, patch the device pointer to the one of the
+		 * target drive.
+		 */
+		for (i = 0; i < tmp_num_stripes; i++) {
+			if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
+				/*
+				 * In case of DUP, in order to keep it
+				 * simple, only add the mirror with the
+				 * lowest physical address
+				 */
+				if (found &&
+				    physical_of_found <=
+				     tmp_bbio->stripes[i].physical)
+					continue;
+				index_srcdev = i;
+				found = 1;
+				physical_of_found =
+					tmp_bbio->stripes[i].physical;
+			}
+		}
+
+		if (found) {
+			mirror_num = index_srcdev + 1;
+			patch_the_first_stripe_for_dev_replace = 1;
+			physical_to_patch_in_first_stripe = physical_of_found;
+		} else {
+			WARN_ON(1);
+			ret = -EIO;
+			kfree(tmp_bbio);
+			goto out;
+		}
+
+		kfree(tmp_bbio);
+	} else if (mirror_num > map->num_stripes) {
+		mirror_num = 0;
+	}
+
 	num_stripes = 1;
 	stripe_index = 0;
 	stripe_nr_orig = stripe_nr;
@@ -3859,19 +4220,20 @@
 					    stripe_nr_end - stripe_nr_orig);
 		stripe_index = do_div(stripe_nr, map->num_stripes);
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (rw & (REQ_WRITE | REQ_DISCARD))
+		if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
 		else {
-			stripe_index = find_live_mirror(map, 0,
+			stripe_index = find_live_mirror(fs_info, map, 0,
 					    map->num_stripes,
-					    current->pid % map->num_stripes);
+					    current->pid % map->num_stripes,
+					    dev_replace_is_ongoing);
 			mirror_num = stripe_index + 1;
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		if (rw & (REQ_WRITE | REQ_DISCARD)) {
+		if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) {
 			num_stripes = map->num_stripes;
 		} else if (mirror_num) {
 			stripe_index = mirror_num - 1;
@@ -3885,7 +4247,7 @@
 		stripe_index = do_div(stripe_nr, factor);
 		stripe_index *= map->sub_stripes;
 
-		if (rw & REQ_WRITE)
+		if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
 			num_stripes = map->sub_stripes;
 		else if (rw & REQ_DISCARD)
 			num_stripes = min_t(u64, map->sub_stripes *
@@ -3895,9 +4257,11 @@
 			stripe_index += mirror_num - 1;
 		else {
 			int old_stripe_index = stripe_index;
-			stripe_index = find_live_mirror(map, stripe_index,
+			stripe_index = find_live_mirror(fs_info, map,
+					      stripe_index,
 					      map->sub_stripes, stripe_index +
-					      current->pid % map->sub_stripes);
+					      current->pid % map->sub_stripes,
+					      dev_replace_is_ongoing);
 			mirror_num = stripe_index - old_stripe_index + 1;
 		}
 	} else {
@@ -3911,7 +4275,14 @@
 	}
 	BUG_ON(stripe_index >= map->num_stripes);
 
-	bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS);
+	num_alloc_stripes = num_stripes;
+	if (dev_replace_is_ongoing) {
+		if (rw & (REQ_WRITE | REQ_DISCARD))
+			num_alloc_stripes <<= 1;
+		if (rw & REQ_GET_READ_MIRRORS)
+			num_alloc_stripes++;
+	}
+	bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
 	if (!bbio) {
 		ret = -ENOMEM;
 		goto out;
@@ -3998,7 +4369,7 @@
 		}
 	}
 
-	if (rw & REQ_WRITE) {
+	if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) {
 		if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
 				 BTRFS_BLOCK_GROUP_RAID10 |
 				 BTRFS_BLOCK_GROUP_DUP)) {
@@ -4006,20 +4377,115 @@
 		}
 	}
 
+	if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
+	    dev_replace->tgtdev != NULL) {
+		int index_where_to_add;
+		u64 srcdev_devid = dev_replace->srcdev->devid;
+
+		/*
+		 * duplicate the write operations while the dev replace
+		 * procedure is running. Since the copying of the old disk
+		 * to the new disk takes place at run time while the
+		 * filesystem is mounted writable, the regular write
+		 * operations to the old disk have to be duplicated to go
+		 * to the new disk as well.
+		 * Note that device->missing is handled by the caller, and
+		 * that the write to the old disk is already set up in the
+		 * stripes array.
+		 */
+		index_where_to_add = num_stripes;
+		for (i = 0; i < num_stripes; i++) {
+			if (bbio->stripes[i].dev->devid == srcdev_devid) {
+				/* write to new disk, too */
+				struct btrfs_bio_stripe *new =
+					bbio->stripes + index_where_to_add;
+				struct btrfs_bio_stripe *old =
+					bbio->stripes + i;
+
+				new->physical = old->physical;
+				new->length = old->length;
+				new->dev = dev_replace->tgtdev;
+				index_where_to_add++;
+				max_errors++;
+			}
+		}
+		num_stripes = index_where_to_add;
+	} else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
+		   dev_replace->tgtdev != NULL) {
+		u64 srcdev_devid = dev_replace->srcdev->devid;
+		int index_srcdev = 0;
+		int found = 0;
+		u64 physical_of_found = 0;
+
+		/*
+		 * During the dev-replace procedure, the target drive can
+		 * also be used to read data in case it is needed to repair
+		 * a corrupt block elsewhere. This is possible if the
+		 * requested area is left of the left cursor. In this area,
+		 * the target drive is a full copy of the source drive.
+		 */
+		for (i = 0; i < num_stripes; i++) {
+			if (bbio->stripes[i].dev->devid == srcdev_devid) {
+				/*
+				 * In case of DUP, in order to keep it
+				 * simple, only add the mirror with the
+				 * lowest physical address
+				 */
+				if (found &&
+				    physical_of_found <=
+				     bbio->stripes[i].physical)
+					continue;
+				index_srcdev = i;
+				found = 1;
+				physical_of_found = bbio->stripes[i].physical;
+			}
+		}
+		if (found) {
+			u64 length = map->stripe_len;
+
+			if (physical_of_found + length <=
+			    dev_replace->cursor_left) {
+				struct btrfs_bio_stripe *tgtdev_stripe =
+					bbio->stripes + num_stripes;
+
+				tgtdev_stripe->physical = physical_of_found;
+				tgtdev_stripe->length =
+					bbio->stripes[index_srcdev].length;
+				tgtdev_stripe->dev = dev_replace->tgtdev;
+
+				num_stripes++;
+			}
+		}
+	}
+
 	*bbio_ret = bbio;
 	bbio->num_stripes = num_stripes;
 	bbio->max_errors = max_errors;
 	bbio->mirror_num = mirror_num;
+
+	/*
+	 * this is the case that REQ_READ && dev_replace_is_ongoing &&
+	 * mirror_num == num_stripes + 1 && dev_replace target drive is
+	 * available as a mirror
+	 */
+	if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
+		WARN_ON(num_stripes > 1);
+		bbio->stripes[0].dev = dev_replace->tgtdev;
+		bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
+		bbio->mirror_num = map->num_stripes + 1;
+	}
 out:
+	if (dev_replace_is_ongoing)
+		btrfs_dev_replace_unlock(dev_replace);
 	free_extent_map(em);
 	return ret;
 }
 
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		      u64 logical, u64 *length,
 		      struct btrfs_bio **bbio_ret, int mirror_num)
 {
-	return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret,
+	return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
 				 mirror_num);
 }
 
@@ -4238,10 +4704,116 @@
 				   &device->work);
 }
 
+static int bio_size_ok(struct block_device *bdev, struct bio *bio,
+		       sector_t sector)
+{
+	struct bio_vec *prev;
+	struct request_queue *q = bdev_get_queue(bdev);
+	unsigned short max_sectors = queue_max_sectors(q);
+	struct bvec_merge_data bvm = {
+		.bi_bdev = bdev,
+		.bi_sector = sector,
+		.bi_rw = bio->bi_rw,
+	};
+
+	if (bio->bi_vcnt == 0) {
+		WARN_ON(1);
+		return 1;
+	}
+
+	prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
+	if ((bio->bi_size >> 9) > max_sectors)
+		return 0;
+
+	if (!q->merge_bvec_fn)
+		return 1;
+
+	bvm.bi_size = bio->bi_size - prev->bv_len;
+	if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
+		return 0;
+	return 1;
+}
+
+static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
+			      struct bio *bio, u64 physical, int dev_nr,
+			      int rw, int async)
+{
+	struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
+
+	bio->bi_private = bbio;
+	bio->bi_private = merge_stripe_index_into_bio_private(
+			bio->bi_private, (unsigned int)dev_nr);
+	bio->bi_end_io = btrfs_end_bio;
+	bio->bi_sector = physical >> 9;
+#ifdef DEBUG
+	{
+		struct rcu_string *name;
+
+		rcu_read_lock();
+		name = rcu_dereference(dev->name);
+		pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
+			 "(%s id %llu), size=%u\n", rw,
+			 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
+			 name->str, dev->devid, bio->bi_size);
+		rcu_read_unlock();
+	}
+#endif
+	bio->bi_bdev = dev->bdev;
+	if (async)
+		schedule_bio(root, dev, rw, bio);
+	else
+		btrfsic_submit_bio(rw, bio);
+}
+
+static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
+			      struct bio *first_bio, struct btrfs_device *dev,
+			      int dev_nr, int rw, int async)
+{
+	struct bio_vec *bvec = first_bio->bi_io_vec;
+	struct bio *bio;
+	int nr_vecs = bio_get_nr_vecs(dev->bdev);
+	u64 physical = bbio->stripes[dev_nr].physical;
+
+again:
+	bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
+	if (!bio)
+		return -ENOMEM;
+
+	while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
+		if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+				 bvec->bv_offset) < bvec->bv_len) {
+			u64 len = bio->bi_size;
+
+			atomic_inc(&bbio->stripes_pending);
+			submit_stripe_bio(root, bbio, bio, physical, dev_nr,
+					  rw, async);
+			physical += len;
+			goto again;
+		}
+		bvec++;
+	}
+
+	submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
+	return 0;
+}
+
+static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
+{
+	atomic_inc(&bbio->error);
+	if (atomic_dec_and_test(&bbio->stripes_pending)) {
+		bio->bi_private = bbio->private;
+		bio->bi_end_io = bbio->end_io;
+		bio->bi_bdev = (struct block_device *)
+			(unsigned long)bbio->mirror_num;
+		bio->bi_sector = logical >> 9;
+		kfree(bbio);
+		bio_endio(bio, -EIO);
+	}
+}
+
 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 		  int mirror_num, int async_submit)
 {
-	struct btrfs_mapping_tree *map_tree;
 	struct btrfs_device *dev;
 	struct bio *first_bio = bio;
 	u64 logical = (u64)bio->bi_sector << 9;
@@ -4253,12 +4825,11 @@
 	struct btrfs_bio *bbio = NULL;
 
 	length = bio->bi_size;
-	map_tree = &root->fs_info->mapping_tree;
 	map_length = length;
 
-	ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio,
+	ret = btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
 			      mirror_num);
-	if (ret) /* -ENOMEM */
+	if (ret)
 		return ret;
 
 	total_devs = bbio->num_stripes;
@@ -4276,52 +4847,48 @@
 	atomic_set(&bbio->stripes_pending, bbio->num_stripes);
 
 	while (dev_nr < total_devs) {
+		dev = bbio->stripes[dev_nr].dev;
+		if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
+			bbio_error(bbio, first_bio, logical);
+			dev_nr++;
+			continue;
+		}
+
+		/*
+		 * Check and see if we're ok with this bio based on it's size
+		 * and offset with the given device.
+		 */
+		if (!bio_size_ok(dev->bdev, first_bio,
+				 bbio->stripes[dev_nr].physical >> 9)) {
+			ret = breakup_stripe_bio(root, bbio, first_bio, dev,
+						 dev_nr, rw, async_submit);
+			BUG_ON(ret);
+			dev_nr++;
+			continue;
+		}
+
 		if (dev_nr < total_devs - 1) {
 			bio = bio_clone(first_bio, GFP_NOFS);
 			BUG_ON(!bio); /* -ENOMEM */
 		} else {
 			bio = first_bio;
 		}
-		bio->bi_private = bbio;
-		bio->bi_private = merge_stripe_index_into_bio_private(
-				bio->bi_private, (unsigned int)dev_nr);
-		bio->bi_end_io = btrfs_end_bio;
-		bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
-		dev = bbio->stripes[dev_nr].dev;
-		if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
-#ifdef DEBUG
-			struct rcu_string *name;
 
-			rcu_read_lock();
-			name = rcu_dereference(dev->name);
-			pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
-				 "(%s id %llu), size=%u\n", rw,
-				 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
-				 name->str, dev->devid, bio->bi_size);
-			rcu_read_unlock();
-#endif
-			bio->bi_bdev = dev->bdev;
-			if (async_submit)
-				schedule_bio(root, dev, rw, bio);
-			else
-				btrfsic_submit_bio(rw, bio);
-		} else {
-			bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
-			bio->bi_sector = logical >> 9;
-			bio_endio(bio, -EIO);
-		}
+		submit_stripe_bio(root, bbio, bio,
+				  bbio->stripes[dev_nr].physical, dev_nr, rw,
+				  async_submit);
 		dev_nr++;
 	}
 	return 0;
 }
 
-struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
 				       u8 *uuid, u8 *fsid)
 {
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *cur_devices;
 
-	cur_devices = root->fs_info->fs_devices;
+	cur_devices = fs_info->fs_devices;
 	while (cur_devices) {
 		if (!fsid ||
 		    !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
@@ -4402,6 +4969,7 @@
 	em->bdev = (struct block_device *)map;
 	em->start = logical;
 	em->len = length;
+	em->orig_start = 0;
 	em->block_start = 0;
 	em->block_len = em->len;
 
@@ -4419,8 +4987,8 @@
 		read_extent_buffer(leaf, uuid, (unsigned long)
 				   btrfs_stripe_dev_uuid_nr(chunk, i),
 				   BTRFS_UUID_SIZE);
-		map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
-							NULL);
+		map->stripes[i].dev = btrfs_find_device(root->fs_info, devid,
+							uuid, NULL);
 		if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
 			kfree(map);
 			free_extent_map(em);
@@ -4461,6 +5029,8 @@
 	device->io_align = btrfs_device_io_align(leaf, dev_item);
 	device->io_width = btrfs_device_io_width(leaf, dev_item);
 	device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+	WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
+	device->is_tgtdev_for_dev_replace = 0;
 
 	ptr = (unsigned long)btrfs_device_uuid(dev_item);
 	read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
@@ -4538,7 +5108,7 @@
 			return ret;
 	}
 
-	device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
+	device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid);
 	if (!device || !device->bdev) {
 		if (!btrfs_test_opt(root, DEGRADED))
 			return -EIO;
@@ -4571,7 +5141,7 @@
 	fill_device_from_item(leaf, dev_item, device);
 	device->dev_root = root->fs_info->dev_root;
 	device->in_fs_metadata = 1;
-	if (device->writeable) {
+	if (device->writeable && !device->is_tgtdev_for_dev_replace) {
 		device->fs_devices->total_rw_bytes += device->total_bytes;
 		spin_lock(&root->fs_info->free_chunk_lock);
 		root->fs_info->free_chunk_space += device->total_bytes -
@@ -4930,7 +5500,7 @@
 	int i;
 
 	mutex_lock(&fs_devices->device_list_mutex);
-	dev = btrfs_find_device(root, stats->devid, NULL, NULL);
+	dev = btrfs_find_device(root->fs_info, stats->devid, NULL, NULL);
 	mutex_unlock(&fs_devices->device_list_mutex);
 
 	if (!dev) {
@@ -4958,3 +5528,21 @@
 		stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
 	return 0;
 }
+
+int btrfs_scratch_superblock(struct btrfs_device *device)
+{
+	struct buffer_head *bh;
+	struct btrfs_super_block *disk_super;
+
+	bh = btrfs_read_dev_super(device->bdev);
+	if (!bh)
+		return -EINVAL;
+	disk_super = (struct btrfs_super_block *)bh->b_data;
+
+	memset(&disk_super->magic, 0, sizeof(disk_super->magic));
+	set_buffer_dirty(bh);
+	sync_dirty_buffer(bh);
+	brelse(bh);
+
+	return 0;
+}

diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 53c06af..d3c3939 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h

@@ -50,6 +50,7 @@
 	int in_fs_metadata;
 	int missing;
 	int can_discard;
+	int is_tgtdev_for_dev_replace;
 
 	spinlock_t io_lock;
 
@@ -88,7 +89,7 @@
 	u8 uuid[BTRFS_UUID_SIZE];
 
 	/* per-device scrub information */
-	struct scrub_dev *scrub_device;
+	struct scrub_ctx *scrub_device;
 
 	struct btrfs_work work;
 	struct rcu_head rcu;
@@ -179,6 +180,15 @@
 	u64 total_avail;
 };
 
+struct btrfs_raid_attr {
+	int sub_stripes;	/* sub_stripes info for map */
+	int dev_stripes;	/* stripes per dev */
+	int devs_max;		/* max devs to use */
+	int devs_min;		/* min devs needed */
+	int devs_increment;	/* ndevs has to be a multiple of this */
+	int ncopies;		/* how many copies to data has */
+};
+
 struct map_lookup {
 	u64 type;
 	int io_align;
@@ -248,7 +258,7 @@
 			   struct btrfs_device *device,
 			   u64 chunk_tree, u64 chunk_objectid,
 			   u64 chunk_offset, u64 start, u64 num_bytes);
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		    u64 logical, u64 *length,
 		    struct btrfs_bio **bbio_ret, int mirror_num);
 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -267,19 +277,27 @@
 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 			  struct btrfs_fs_devices **fs_devices_ret);
 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
+void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
+			       struct btrfs_fs_devices *fs_devices, int step);
+int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
+					 char *device_path,
+					 struct btrfs_device **device);
+int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
+			      struct btrfs_device **device);
 int btrfs_add_device(struct btrfs_trans_handle *trans,
 		     struct btrfs_root *root,
 		     struct btrfs_device *device);
 int btrfs_rm_device(struct btrfs_root *root, char *device_path);
 void btrfs_cleanup_fs_uuids(void);
-int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,
 		      struct btrfs_device *device, u64 new_size);
-struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
 				       u8 *uuid, u8 *fsid);
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_root *root, char *path);
+int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
+				  struct btrfs_device **device_out);
 int btrfs_balance(struct btrfs_balance_control *bctl,
 		  struct btrfs_ioctl_balance_args *bargs);
 int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
@@ -296,6 +314,13 @@
 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
 int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info);
+void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
+				 struct btrfs_device *srcdev);
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
+				      struct btrfs_device *tgtdev);
+void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
+					      struct btrfs_device *tgtdev);
+int btrfs_scratch_superblock(struct btrfs_device *device);
 
 static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
 				      int index)

diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 3f4e2d6..446a684 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c

@@ -122,6 +122,16 @@
 		 */
 		if (!value)
 			goto out;
+	} else {
+		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
+					name, name_len, 0);
+		if (IS_ERR(di)) {
+			ret = PTR_ERR(di);
+			goto out;
+		}
+		if (!di && !value)
+			goto out;
+		btrfs_release_path(path);
 	}
 
 again:
@@ -198,6 +208,7 @@
 
 	inode_inc_iversion(inode);
 	inode->i_ctime = CURRENT_TIME;
+	set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 	ret = btrfs_update_inode(trans, root, inode);
 	BUG_ON(ret);
 out:
@@ -265,7 +276,7 @@
 
 		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
 		if (verify_dir_item(root, leaf, di))
-			continue;
+			goto next;
 
 		name_len = btrfs_dir_name_len(leaf, di);
 		total_size += name_len + 1;

diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 67bef6d..746ce53 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c

@@ -41,12 +41,12 @@
 
 	_enter("{%s},%p,", cache->cache.identifier, cookie);
 
-	lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL);
+	lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp);
 	if (!lookup_data)
 		goto nomem_lookup_data;
 
 	/* create a new object record and a temporary leaf image */
-	object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL);
+	object = kmem_cache_alloc(cachefiles_object_jar, cachefiles_gfp);
 	if (!object)
 		goto nomem_object;
 
@@ -63,7 +63,7 @@
 	 * - stick the length on the front and leave space on the back for the
 	 *   encoder
 	 */
-	buffer = kmalloc((2 + 512) + 3, GFP_KERNEL);
+	buffer = kmalloc((2 + 512) + 3, cachefiles_gfp);
 	if (!buffer)
 		goto nomem_buffer;
 
@@ -219,7 +219,7 @@
 		return;
 	}
 
-	auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL);
+	auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp);
 	if (!auxdata) {
 		_leave(" [nomem]");
 		return;
@@ -441,6 +441,54 @@
 }
 
 /*
+ * Invalidate an object
+ */
+static void cachefiles_invalidate_object(struct fscache_operation *op)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	const struct cred *saved_cred;
+	struct path path;
+	uint64_t ni_size;
+	int ret;
+
+	object = container_of(op->object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	op->object->cookie->def->get_attr(op->object->cookie->netfs_data,
+					  &ni_size);
+
+	_enter("{OBJ%x},[%llu]",
+	       op->object->debug_id, (unsigned long long)ni_size);
+
+	if (object->backer) {
+		ASSERT(S_ISREG(object->backer->d_inode->i_mode));
+
+		fscache_set_store_limit(&object->fscache, ni_size);
+
+		path.dentry = object->backer;
+		path.mnt = cache->mnt;
+
+		cachefiles_begin_secure(cache, &saved_cred);
+		ret = vfs_truncate(&path, 0);
+		if (ret == 0)
+			ret = vfs_truncate(&path, ni_size);
+		cachefiles_end_secure(cache, saved_cred);
+
+		if (ret != 0) {
+			fscache_set_store_limit(&object->fscache, 0);
+			if (ret == -EIO)
+				cachefiles_io_error_obj(object,
+							"Invalidate failed");
+		}
+	}
+
+	fscache_op_complete(op, true);
+	_leave("");
+}
+
+/*
  * dissociate a cache from all the pages it was backing
  */
 static void cachefiles_dissociate_pages(struct fscache_cache *cache)
@@ -455,6 +503,7 @@
 	.lookup_complete	= cachefiles_lookup_complete,
 	.grab_object		= cachefiles_grab_object,
 	.update_object		= cachefiles_update_object,
+	.invalidate_object	= cachefiles_invalidate_object,
 	.drop_object		= cachefiles_drop_object,
 	.put_object		= cachefiles_put_object,
 	.sync_cache		= cachefiles_sync_cache,

diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index bd6bc1b..4938251 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h

@@ -23,6 +23,8 @@
 #define CACHEFILES_DEBUG_KLEAVE	2
 #define CACHEFILES_DEBUG_KDEBUG	4
 
+#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC)
+
 /*
  * node records
  */

diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c
index 81b8b2b..33b58c6 100644
--- a/fs/cachefiles/key.c
+++ b/fs/cachefiles/key.c

@@ -78,7 +78,7 @@
 
 	_debug("max: %d", max);
 
-	key = kmalloc(max, GFP_KERNEL);
+	key = kmalloc(max, cachefiles_gfp);
 	if (!key)
 		return NULL;
 

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index b0b5f7c..8c01c5fc 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c

@@ -40,8 +40,7 @@
 	printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
 	       prefix, fscache_object_states[object->fscache.state],
 	       object->fscache.flags, work_busy(&object->fscache.work),
-	       object->fscache.events,
-	       object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK);
+	       object->fscache.events, object->fscache.event_mask);
 	printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",
 	       prefix, object->fscache.n_ops, object->fscache.n_in_progress,
 	       object->fscache.n_exclusive);

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c994691..4809922 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c

@@ -77,25 +77,25 @@
 	struct page *backpage = monitor->back_page, *backpage2;
 	int ret;
 
-	kenter("{ino=%lx},{%lx,%lx}",
+	_enter("{ino=%lx},{%lx,%lx}",
 	       object->backer->d_inode->i_ino,
 	       backpage->index, backpage->flags);
 
 	/* skip if the page was truncated away completely */
 	if (backpage->mapping != bmapping) {
-		kleave(" = -ENODATA [mapping]");
+		_leave(" = -ENODATA [mapping]");
 		return -ENODATA;
 	}
 
 	backpage2 = find_get_page(bmapping, backpage->index);
 	if (!backpage2) {
-		kleave(" = -ENODATA [gone]");
+		_leave(" = -ENODATA [gone]");
 		return -ENODATA;
 	}
 
 	if (backpage != backpage2) {
 		put_page(backpage2);
-		kleave(" = -ENODATA [different]");
+		_leave(" = -ENODATA [different]");
 		return -ENODATA;
 	}
 
@@ -114,7 +114,7 @@
 		if (PageUptodate(backpage))
 			goto unlock_discard;
 
-		kdebug("reissue read");
+		_debug("reissue read");
 		ret = bmapping->a_ops->readpage(NULL, backpage);
 		if (ret < 0)
 			goto unlock_discard;
@@ -129,7 +129,7 @@
 	}
 
 	/* it'll reappear on the todo list */
-	kleave(" = -EINPROGRESS");
+	_leave(" = -EINPROGRESS");
 	return -EINPROGRESS;
 
 unlock_discard:
@@ -137,7 +137,7 @@
 	spin_lock_irq(&object->work_lock);
 	list_del(&monitor->op_link);
 	spin_unlock_irq(&object->work_lock);
-	kleave(" = %d", ret);
+	_leave(" = %d", ret);
 	return ret;
 }
 
@@ -174,11 +174,13 @@
 		_debug("- copy {%lu}", monitor->back_page->index);
 
 	recheck:
-		if (PageUptodate(monitor->back_page)) {
+		if (test_bit(FSCACHE_COOKIE_INVALIDATING,
+			     &object->fscache.cookie->flags)) {
+			error = -ESTALE;
+		} else if (PageUptodate(monitor->back_page)) {
 			copy_highpage(monitor->netfs_page, monitor->back_page);
-
-			pagevec_add(&pagevec, monitor->netfs_page);
-			fscache_mark_pages_cached(monitor->op, &pagevec);
+			fscache_mark_page_cached(monitor->op,
+						 monitor->netfs_page);
 			error = 0;
 		} else if (!PageError(monitor->back_page)) {
 			/* the page has probably been truncated */
@@ -198,6 +200,7 @@
 
 		fscache_end_io(op, monitor->netfs_page, error);
 		page_cache_release(monitor->netfs_page);
+		fscache_retrieval_complete(op, 1);
 		fscache_put_retrieval(op);
 		kfree(monitor);
 
@@ -239,7 +242,7 @@
 	_debug("read back %p{%lu,%d}",
 	       netpage, netpage->index, page_count(netpage));
 
-	monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+	monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
 	if (!monitor)
 		goto nomem;
 
@@ -258,13 +261,14 @@
 			goto backing_page_already_present;
 
 		if (!newpage) {
-			newpage = page_cache_alloc_cold(bmapping);
+			newpage = __page_cache_alloc(cachefiles_gfp |
+						     __GFP_COLD);
 			if (!newpage)
 				goto nomem_monitor;
 		}
 
 		ret = add_to_page_cache(newpage, bmapping,
-					netpage->index, GFP_KERNEL);
+					netpage->index, cachefiles_gfp);
 		if (ret == 0)
 			goto installed_new_backing_page;
 		if (ret != -EEXIST)
@@ -335,11 +339,11 @@
 backing_page_already_uptodate:
 	_debug("- uptodate");
 
-	pagevec_add(pagevec, netpage);
-	fscache_mark_pages_cached(op, pagevec);
+	fscache_mark_page_cached(op, netpage);
 
 	copy_highpage(netpage, backpage);
 	fscache_end_io(op, netpage, 0);
+	fscache_retrieval_complete(op, 1);
 
 success:
 	_debug("success");
@@ -357,10 +361,13 @@
 
 read_error:
 	_debug("read error %d", ret);
-	if (ret == -ENOMEM)
+	if (ret == -ENOMEM) {
+		fscache_retrieval_complete(op, 1);
 		goto out;
+	}
 io_error:
 	cachefiles_io_error_obj(object, "Page read error on backing file");
+	fscache_retrieval_complete(op, 1);
 	ret = -ENOBUFS;
 	goto out;
 
@@ -370,6 +377,7 @@
 	fscache_put_retrieval(monitor->op);
 	kfree(monitor);
 nomem:
+	fscache_retrieval_complete(op, 1);
 	_leave(" = -ENOMEM");
 	return -ENOMEM;
 }
@@ -408,7 +416,7 @@
 	_enter("{%p},{%lx},,,", object, page->index);
 
 	if (!object->backer)
-		return -ENOBUFS;
+		goto enobufs;
 
 	inode = object->backer->d_inode;
 	ASSERT(S_ISREG(inode->i_mode));
@@ -417,7 +425,7 @@
 
 	/* calculate the shift required to use bmap */
 	if (inode->i_sb->s_blocksize > PAGE_SIZE)
-		return -ENOBUFS;
+		goto enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
@@ -448,15 +456,20 @@
 						       &pagevec);
 	} else if (cachefiles_has_space(cache, 0, 1) == 0) {
 		/* there's space in the cache we can use */
-		pagevec_add(&pagevec, page);
-		fscache_mark_pages_cached(op, &pagevec);
+		fscache_mark_page_cached(op, page);
+		fscache_retrieval_complete(op, 1);
 		ret = -ENODATA;
 	} else {
-		ret = -ENOBUFS;
+		goto enobufs;
 	}
 
 	_leave(" = %d", ret);
 	return ret;
+
+enobufs:
+	fscache_retrieval_complete(op, 1);
+	_leave(" = -ENOBUFS");
+	return -ENOBUFS;
 }
 
 /*
@@ -465,8 +478,7 @@
  */
 static int cachefiles_read_backing_file(struct cachefiles_object *object,
 					struct fscache_retrieval *op,
-					struct list_head *list,
-					struct pagevec *mark_pvec)
+					struct list_head *list)
 {
 	struct cachefiles_one_read *monitor = NULL;
 	struct address_space *bmapping = object->backer->d_inode->i_mapping;
@@ -485,7 +497,7 @@
 		       netpage, netpage->index, page_count(netpage));
 
 		if (!monitor) {
-			monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+			monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
 			if (!monitor)
 				goto nomem;
 
@@ -500,13 +512,14 @@
 				goto backing_page_already_present;
 
 			if (!newpage) {
-				newpage = page_cache_alloc_cold(bmapping);
+				newpage = __page_cache_alloc(cachefiles_gfp |
+							     __GFP_COLD);
 				if (!newpage)
 					goto nomem;
 			}
 
 			ret = add_to_page_cache(newpage, bmapping,
-						netpage->index, GFP_KERNEL);
+						netpage->index, cachefiles_gfp);
 			if (ret == 0)
 				goto installed_new_backing_page;
 			if (ret != -EEXIST)
@@ -536,10 +549,11 @@
 		_debug("- monitor add");
 
 		ret = add_to_page_cache(netpage, op->mapping, netpage->index,
-					GFP_KERNEL);
+					cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
 				page_cache_release(netpage);
+				fscache_retrieval_complete(op, 1);
 				continue;
 			}
 			goto nomem;
@@ -612,10 +626,11 @@
 		_debug("- uptodate");
 
 		ret = add_to_page_cache(netpage, op->mapping, netpage->index,
-					GFP_KERNEL);
+					cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
 				page_cache_release(netpage);
+				fscache_retrieval_complete(op, 1);
 				continue;
 			}
 			goto nomem;
@@ -626,16 +641,17 @@
 		page_cache_release(backpage);
 		backpage = NULL;
 
-		if (!pagevec_add(mark_pvec, netpage))
-			fscache_mark_pages_cached(op, mark_pvec);
+		fscache_mark_page_cached(op, netpage);
 
 		page_cache_get(netpage);
 		if (!pagevec_add(&lru_pvec, netpage))
 			__pagevec_lru_add_file(&lru_pvec);
 
+		/* the netpage is unlocked and marked up to date here */
 		fscache_end_io(op, netpage, 0);
 		page_cache_release(netpage);
 		netpage = NULL;
+		fscache_retrieval_complete(op, 1);
 		continue;
 	}
 
@@ -661,6 +677,7 @@
 	list_for_each_entry_safe(netpage, _n, list, lru) {
 		list_del(&netpage->lru);
 		page_cache_release(netpage);
+		fscache_retrieval_complete(op, 1);
 	}
 
 	_leave(" = %d", ret);
@@ -669,15 +686,17 @@
 nomem:
 	_debug("nomem");
 	ret = -ENOMEM;
-	goto out;
+	goto record_page_complete;
 
 read_error:
 	_debug("read error %d", ret);
 	if (ret == -ENOMEM)
-		goto out;
+		goto record_page_complete;
 io_error:
 	cachefiles_io_error_obj(object, "Page read error on backing file");
 	ret = -ENOBUFS;
+record_page_complete:
+	fscache_retrieval_complete(op, 1);
 	goto out;
 }
 
@@ -709,7 +728,7 @@
 	       *nr_pages);
 
 	if (!object->backer)
-		return -ENOBUFS;
+		goto all_enobufs;
 
 	space = 1;
 	if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
@@ -722,7 +741,7 @@
 
 	/* calculate the shift required to use bmap */
 	if (inode->i_sb->s_blocksize > PAGE_SIZE)
-		return -ENOBUFS;
+		goto all_enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
@@ -762,7 +781,10 @@
 			nrbackpages++;
 		} else if (space && pagevec_add(&pagevec, page) == 0) {
 			fscache_mark_pages_cached(op, &pagevec);
+			fscache_retrieval_complete(op, 1);
 			ret = -ENODATA;
+		} else {
+			fscache_retrieval_complete(op, 1);
 		}
 	}
 
@@ -775,18 +797,18 @@
 	/* submit the apparently valid pages to the backing fs to be read from
 	 * disk */
 	if (nrbackpages > 0) {
-		ret2 = cachefiles_read_backing_file(object, op, &backpages,
-						    &pagevec);
+		ret2 = cachefiles_read_backing_file(object, op, &backpages);
 		if (ret2 == -ENOMEM || ret2 == -EINTR)
 			ret = ret2;
 	}
 
-	if (pagevec_count(&pagevec) > 0)
-		fscache_mark_pages_cached(op, &pagevec);
-
 	_leave(" = %d [nr=%u%s]",
 	       ret, *nr_pages, list_empty(pages) ? " empty" : "");
 	return ret;
+
+all_enobufs:
+	fscache_retrieval_complete(op, *nr_pages);
+	return -ENOBUFS;
 }
 
 /*
@@ -806,7 +828,6 @@
 {
 	struct cachefiles_object *object;
 	struct cachefiles_cache *cache;
-	struct pagevec pagevec;
 	int ret;
 
 	object = container_of(op->op.object,
@@ -817,14 +838,12 @@
 	_enter("%p,{%lx},", object, page->index);
 
 	ret = cachefiles_has_space(cache, 0, 1);
-	if (ret == 0) {
-		pagevec_init(&pagevec, 0);
-		pagevec_add(&pagevec, page);
-		fscache_mark_pages_cached(op, &pagevec);
-	} else {
+	if (ret == 0)
+		fscache_mark_page_cached(op, page);
+	else
 		ret = -ENOBUFS;
-	}
 
+	fscache_retrieval_complete(op, 1);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -874,6 +893,7 @@
 		ret = -ENOBUFS;
 	}
 
+	fscache_retrieval_complete(op, *nr_pages);
 	_leave(" = %d", ret);
 	return ret;
 }

diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index e18b183..73b4628 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c

@@ -174,7 +174,7 @@
 	ASSERT(dentry);
 	ASSERT(dentry->d_inode);
 
-	auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
+	auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp);
 	if (!auxbuf) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6690269..064d1a6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c

@@ -267,6 +267,14 @@
 	kfree(req->r_pages);
 }
 
+static void ceph_unlock_page_vector(struct page **pages, int num_pages)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++)
+		unlock_page(pages[i]);
+}
+
 /*
  * start an async read(ahead) operation.  return nr_pages we submitted
  * a read for on success, or negative error code.
@@ -347,6 +355,7 @@
 	return nr_pages;
 
 out_pages:
+	ceph_unlock_page_vector(pages, nr_pages);
 	ceph_release_page_vector(pages, nr_pages);
 out:
 	ceph_osdc_put_request(req);
@@ -1078,23 +1087,51 @@
 			    struct page **pagep, void **fsdata)
 {
 	struct inode *inode = file->f_dentry->d_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_file_info *fi = file->private_data;
 	struct page *page;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	int r;
+	int r, want, got = 0;
+
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_BUFFER;
+
+	dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
+	     inode, ceph_vinop(inode), pos, len, inode->i_size);
+	r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len);
+	if (r < 0)
+		return r;
+	dout("write_begin %p %llx.%llx %llu~%u  got cap refs on %s\n",
+	     inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
+	if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) {
+		ceph_put_cap_refs(ci, got);
+		return -EAGAIN;
+	}
 
 	do {
 		/* get a page */
 		page = grab_cache_page_write_begin(mapping, index, 0);
-		if (!page)
-			return -ENOMEM;
-		*pagep = page;
+		if (!page) {
+			r = -ENOMEM;
+			break;
+		}
 
 		dout("write_begin file %p inode %p page %p %d~%d\n", file,
 		     inode, page, (int)pos, (int)len);
 
 		r = ceph_update_writeable_page(file, pos, len, page);
+		if (r)
+			page_cache_release(page);
 	} while (r == -EAGAIN);
 
+	if (r) {
+		ceph_put_cap_refs(ci, got);
+	} else {
+		*pagep = page;
+		*(int *)fsdata = got;
+	}
 	return r;
 }
 
@@ -1108,10 +1145,12 @@
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file->f_dentry->d_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 	int check_cap = 0;
+	int got = (unsigned long)fsdata;
 
 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
 	     inode, page, (int)pos, (int)copied, (int)len);
@@ -1134,6 +1173,19 @@
 	up_read(&mdsc->snap_rwsem);
 	page_cache_release(page);
 
+	if (copied > 0) {
+		int dirty;
+		spin_lock(&ci->i_ceph_lock);
+		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+		spin_unlock(&ci->i_ceph_lock);
+		if (dirty)
+			__mark_inode_dirty(inode, dirty);
+	}
+
+	dout("write_end %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
+	     inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
+	ceph_put_cap_refs(ci, got);
+
 	if (check_cap)
 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
 

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 3251e9c..a1d9bb3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c

@@ -236,8 +236,10 @@
 	if (!ctx) {
 		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
 		if (cap) {
+			spin_lock(&mdsc->caps_list_lock);
 			mdsc->caps_use_count++;
 			mdsc->caps_total_count++;
+			spin_unlock(&mdsc->caps_list_lock);
 		}
 		return cap;
 	}
@@ -1349,11 +1351,15 @@
 		if (!ci->i_head_snapc)
 			ci->i_head_snapc = ceph_get_snap_context(
 				ci->i_snap_realm->cached_context);
-		dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
-			ci->i_head_snapc);
+		dout(" inode %p now dirty snapc %p auth cap %p\n",
+		     &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
 		BUG_ON(!list_empty(&ci->i_dirty_item));
 		spin_lock(&mdsc->cap_dirty_lock);
-		list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
+		if (ci->i_auth_cap)
+			list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
+		else
+			list_add(&ci->i_dirty_item,
+				 &mdsc->cap_dirty_migrating);
 		spin_unlock(&mdsc->cap_dirty_lock);
 		if (ci->i_flushing_caps == 0) {
 			ihold(inode);
@@ -2388,7 +2394,7 @@
 			    &atime);
 
 	/* max size increase? */
-	if (max_size != ci->i_max_size) {
+	if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
 		dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
 		ci->i_max_size = max_size;
 		if (max_size >= ci->i_wanted_max_size) {
@@ -2745,6 +2751,7 @@
 
 	/* make sure we re-request max_size, if necessary */
 	spin_lock(&ci->i_ceph_lock);
+	ci->i_wanted_max_size = 0;  /* reset */
 	ci->i_requested_max_size = 0;
 	spin_unlock(&ci->i_ceph_lock);
 }
@@ -2840,8 +2847,6 @@
 	case CEPH_CAP_OP_IMPORT:
 		handle_cap_import(mdsc, inode, h, session,
 				  snaptrace, snaptrace_len);
-		ceph_check_caps(ceph_inode(inode), 0, session);
-		goto done_unlocked;
 	}
 
 	/* the rest require a cap */
@@ -2858,6 +2863,7 @@
 	switch (op) {
 	case CEPH_CAP_OP_REVOKE:
 	case CEPH_CAP_OP_GRANT:
+	case CEPH_CAP_OP_IMPORT:
 		handle_cap_grant(inode, h, session, cap, msg->middle);
 		goto done_unlocked;
 

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e5b7731..8c1aabe 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c

@@ -454,7 +454,7 @@
 	fi->flags &= ~CEPH_F_ATEND;
 }
 
-static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file->f_mapping->host;
@@ -463,7 +463,7 @@
 
 	mutex_lock(&inode->i_mutex);
 	retval = -EINVAL;
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += inode->i_size + 2;   /* FIXME */
 		break;

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 9349bb3..ca3ab3f 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c

@@ -56,13 +56,15 @@
 	struct ceph_nfs_confh *cfh = (void *)rawfh;
 	int connected_handle_length = sizeof(*cfh)/4;
 	int handle_length = sizeof(*fh)/4;
-	struct dentry *dentry = d_find_alias(inode);
+	struct dentry *dentry;
 	struct dentry *parent;
 
 	/* don't re-export snaps */
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EINVAL;
 
+	dentry = d_find_alias(inode);
+
 	/* if we found an alias, generate a connectable fh */
 	if (*max_len >= connected_handle_length && dentry) {
 		dout("encode_fh %p connectable\n", dentry);

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5840d2a..e51558f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c

@@ -712,63 +712,53 @@
 	struct ceph_osd_client *osdc =
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
 	loff_t endoff = pos + iov->iov_len;
-	int want, got = 0;
-	int ret, err;
+	int got = 0;
+	int ret, err, written;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
 retry_snap:
+	written = 0;
 	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
 		return -ENOSPC;
 	__ceph_do_pending_vmtruncate(inode);
-	dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
-	     inode->i_size);
-	if (fi->fmode & CEPH_FILE_MODE_LAZY)
-		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
-	else
-		want = CEPH_CAP_FILE_BUFFER;
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
-	if (ret < 0)
-		goto out_put;
 
-	dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
-	     ceph_cap_string(got));
-
-	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-	    (iocb->ki_filp->f_flags & O_DIRECT) ||
-	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
-	    (fi->flags & CEPH_F_SYNC)) {
-		ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
-			&iocb->ki_pos);
-	} else {
-		/*
-		 * buffered write; drop Fw early to avoid slow
-		 * revocation if we get stuck on balance_dirty_pages
-		 */
-		int dirty;
-
-		spin_lock(&ci->i_ceph_lock);
-		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-		spin_unlock(&ci->i_ceph_lock);
-		ceph_put_cap_refs(ci, got);
-
+	/*
+	 * try to do a buffered write.  if we don't have sufficient
+	 * caps, we'll get -EAGAIN from generic_file_aio_write, or a
+	 * short write if we only get caps for some pages.
+	 */
+	if (!(iocb->ki_filp->f_flags & O_DIRECT) &&
+	    !(inode->i_sb->s_flags & MS_SYNCHRONOUS) &&
+	    !(fi->flags & CEPH_F_SYNC)) {
 		ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+		if (ret >= 0)
+			written = ret;
+
 		if ((ret >= 0 || ret == -EIOCBQUEUED) &&
 		    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
 		     || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
-			err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
+			err = vfs_fsync_range(file, pos, pos + written - 1, 1);
 			if (err < 0)
 				ret = err;
 		}
-
-		if (dirty)
-			__mark_inode_dirty(inode, dirty);
-		goto out;
+		if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff)
+			goto out;
 	}
 
+	dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
+	     inode, ceph_vinop(inode), pos + written,
+	     (unsigned)iov->iov_len - written, inode->i_size);
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff);
+	if (ret < 0)
+		goto out;
+
+	dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
+	     inode, ceph_vinop(inode), pos + written,
+	     (unsigned)iov->iov_len - written, ceph_cap_string(got));
+	ret = ceph_sync_write(file, iov->iov_base + written,
+			      iov->iov_len - written, &iocb->ki_pos);
 	if (ret >= 0) {
 		int dirty;
 		spin_lock(&ci->i_ceph_lock);
@@ -777,13 +767,10 @@
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
 	}
-
-out_put:
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
-	     ceph_cap_string(got));
+	     inode, ceph_vinop(inode), pos + written,
+	     (unsigned)iov->iov_len - written, ceph_cap_string(got));
 	ceph_put_cap_refs(ci, got);
-
 out:
 	if (ret == -EOLDSNAPC) {
 		dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
@@ -797,7 +784,7 @@
 /*
  * llseek.  be sure to verify file size on SEEK_END.
  */
-static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
@@ -805,7 +792,7 @@
 	mutex_lock(&inode->i_mutex);
 	__ceph_do_pending_vmtruncate(inode);
 
-	if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
+	if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
 		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
 		if (ret < 0) {
 			offset = ret;
@@ -813,7 +800,7 @@
 		}
 	}
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += inode->i_size;
 		break;

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ba95eea..2971eaa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -1466,7 +1466,7 @@
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 to;
-	int wrbuffer_refs, wake = 0;
+	int wrbuffer_refs, finish = 0;
 
 retry:
 	spin_lock(&ci->i_ceph_lock);
@@ -1498,15 +1498,18 @@
 	truncate_inode_pages(inode->i_mapping, to);
 
 	spin_lock(&ci->i_ceph_lock);
-	ci->i_truncate_pending--;
-	if (ci->i_truncate_pending == 0)
-		wake = 1;
+	if (to == ci->i_truncate_size) {
+		ci->i_truncate_pending = 0;
+		finish = 1;
+	}
 	spin_unlock(&ci->i_ceph_lock);
+	if (!finish)
+		goto retry;
 
 	if (wrbuffer_refs == 0)
 		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
-	if (wake)
-		wake_up_all(&ci->i_cap_wq);
+
+	wake_up_all(&ci->i_cap_wq);
 }
 
 

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1bcf712..9165eb8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -1590,7 +1590,7 @@
 	} else if (rpath || rino) {
 		*ino = rino;
 		*ppath = rpath;
-		*pathlen = strlen(rpath);
+		*pathlen = rpath ? strlen(rpath) : 0;
 		dout(" path %.*s\n", *pathlen, rpath);
 	}
 
@@ -1876,9 +1876,14 @@
 static void __wake_requests(struct ceph_mds_client *mdsc,
 			    struct list_head *head)
 {
-	struct ceph_mds_request *req, *nreq;
+	struct ceph_mds_request *req;
+	LIST_HEAD(tmp_list);
 
-	list_for_each_entry_safe(req, nreq, head, r_wait) {
+	list_splice_init(head, &tmp_list);
+
+	while (!list_empty(&tmp_list)) {
+		req = list_entry(tmp_list.next,
+				 struct ceph_mds_request, r_wait);
 		list_del_init(&req->r_wait);
 		__do_request(mdsc, req);
 	}

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2eb43f2..e86aa994 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c

@@ -403,8 +403,6 @@
 		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
 	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
 		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
-	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
-		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 		seq_printf(m, ",osdkeepalivetimeout=%d",
 			   opt->osd_keepalive_timeout);
@@ -849,7 +847,7 @@
 		fsc->backing_dev_info.ra_pages =
 			default_backing_dev_info.ra_pages;
 
-	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
+	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 			   atomic_long_inc_return(&bdi_seq));
 	if (!err)
 		sb->s_bdi = &fsc->backing_dev_info;

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 75c1ee69..5cbd00e 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c

@@ -346,19 +346,15 @@
 	if (!cred)
 		return -ENOMEM;
 
-	keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
-			    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-			    KEY_USR_VIEW | KEY_USR_READ,
-			    KEY_ALLOC_NOT_IN_QUOTA);
+	keyring = keyring_alloc(".cifs_idmap", 0, 0, cred,
+				(KEY_POS_ALL & ~KEY_POS_SETATTR) |
+				KEY_USR_VIEW | KEY_USR_READ,
+				KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto failed_put_cred;
 	}
 
-	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
-	if (ret < 0)
-		goto failed_put_key;
-
 	ret = register_key_type(&cifs_idmap_key_type);
 	if (ret < 0)
 		goto failed_put_key;

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 210f0af..ce9f3c5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c

@@ -695,13 +695,13 @@
 	return written;
 }
 
-static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
+static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 {
 	/*
-	 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
 	 * the cached file length
 	 */
-	if (origin != SEEK_SET && origin != SEEK_CUR) {
+	if (whence != SEEK_SET && whence != SEEK_CUR) {
 		int rc;
 		struct inode *inode = file->f_path.dentry->d_inode;
 
@@ -728,7 +728,7 @@
 		if (rc < 0)
 			return (loff_t)rc;
 	}
-	return generic_file_llseek(file, offset, origin);
+	return generic_file_llseek(file, offset, whence);
 }
 
 static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7414ae2..712b10f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c

@@ -1613,12 +1613,12 @@
 	return 0;
 }
 
-static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
+static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry * dentry = file->f_path.dentry;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:

diff --git a/fs/dcache.c b/fs/dcache.c
index 3a463d0..19153a0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c

@@ -455,24 +455,6 @@
 EXPORT_SYMBOL(d_drop);
 
 /*
- * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
- * @dentry: dentry to drop
- *
- * This is called when we do a lookup on a placeholder dentry that needed to be
- * looked up.  The dentry should have been hashed in order for it to be found by
- * the lookup code, but now needs to be unhashed while we do the actual lookup
- * and clear the DCACHE_NEED_LOOKUP flag.
- */
-void d_clear_need_lookup(struct dentry *dentry)
-{
-	spin_lock(&dentry->d_lock);
-	__d_drop(dentry);
-	dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
-	spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(d_clear_need_lookup);
-
-/*
  * Finish off a dentry we've decided to kill.
  * dentry->d_lock must be held, returns with it unlocked.
  * If ref is non-zero, then decrement the refcount too.
@@ -565,13 +547,7 @@
  	if (d_unhashed(dentry))
 		goto kill_it;
 
-	/*
-	 * If this dentry needs lookup, don't set the referenced flag so that it
-	 * is more likely to be cleaned up by the dcache shrinker in case of
-	 * memory pressure.
-	 */
-	if (!d_need_lookup(dentry))
-		dentry->d_flags |= DCACHE_REFERENCED;
+	dentry->d_flags |= DCACHE_REFERENCED;
 	dentry_lru_add(dentry);
 
 	dentry->d_count--;
@@ -1583,7 +1559,7 @@
  */
 struct dentry *d_obtain_alias(struct inode *inode)
 {
-	static const struct qstr anonstring = { .name = "" };
+	static const struct qstr anonstring = QSTR_INIT("/", 1);
 	struct dentry *tmp;
 	struct dentry *res;
 
@@ -1737,13 +1713,6 @@
 	}
 
 	/*
-	 * We are going to instantiate this dentry, unhash it and clear the
-	 * lookup flag so we can do that.
-	 */
-	if (unlikely(d_need_lookup(found)))
-		d_clear_need_lookup(found);
-
-	/*
 	 * Negative dentry: instantiate it unless the inode is a directory and
 	 * already has a dentry.
 	 */

diff --git a/fs/eventfd.c b/fs/eventfd.c
index d81b9f6..35470d9 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c

@@ -19,6 +19,8 @@
 #include <linux/export.h>
 #include <linux/kref.h>
 #include <linux/eventfd.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 struct eventfd_ctx {
 	struct kref kref;
@@ -284,7 +286,25 @@
 	return res;
 }
 
+#ifdef CONFIG_PROC_FS
+static int eventfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct eventfd_ctx *ctx = f->private_data;
+	int ret;
+
+	spin_lock_irq(&ctx->wqh.lock);
+	ret = seq_printf(m, "eventfd-count: %16llx\n",
+			 (unsigned long long)ctx->count);
+	spin_unlock_irq(&ctx->wqh.lock);
+
+	return ret;
+}
+#endif
+
 static const struct file_operations eventfd_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= eventfd_show_fdinfo,
+#endif
 	.release	= eventfd_release,
 	.poll		= eventfd_poll,
 	.read		= eventfd_read,

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cd96649..be56b21 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c

@@ -38,6 +38,8 @@
 #include <asm/io.h>
 #include <asm/mman.h>
 #include <linux/atomic.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 /*
  * LOCKING:
@@ -783,8 +785,34 @@
 	return pollflags != -1 ? pollflags : 0;
 }
 
+#ifdef CONFIG_PROC_FS
+static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct eventpoll *ep = f->private_data;
+	struct rb_node *rbp;
+	int ret = 0;
+
+	mutex_lock(&ep->mtx);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+
+		ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+				 epi->ffd.fd, epi->event.events,
+				 (long long)epi->event.data);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&ep->mtx);
+
+	return ret;
+}
+#endif
+
 /* File callbacks that implement the eventpoll file behaviour */
 static const struct file_operations eventpoll_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= ep_show_fdinfo,
+#endif
 	.release	= ep_eventpoll_release,
 	.poll		= ep_eventpoll_poll,
 	.llseek		= noop_llseek,

diff --git a/fs/exec.c b/fs/exec.c
index 721a299..18c45ca 100644
--- a/fs/exec.c
+++ b/fs/exec.c

@@ -1175,9 +1175,24 @@
 		mutex_unlock(&current->signal->cred_guard_mutex);
 		abort_creds(bprm->cred);
 	}
+	/* If a binfmt changed the interp, free it. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
 	kfree(bprm);
 }
 
+int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+{
+	/* If a binfmt changed the interp, free it first. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
+	bprm->interp = kstrdup(interp, GFP_KERNEL);
+	if (!bprm->interp)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL(bprm_change_interp);
+
 /*
  * install the new credentials for this executable
  */
@@ -1266,14 +1281,13 @@
 	bprm->cred->egid = current_egid();
 
 	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-	    !current->no_new_privs) {
+	    !current->no_new_privs &&
+	    kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+	    kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
 		/* Set-uid? */
 		if (mode & S_ISUID) {
-			if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
-				return -EPERM;
 			bprm->per_clear |= PER_CLEAR_ON_SETID;
 			bprm->cred->euid = inode->i_uid;
-
 		}
 
 		/* Set-gid? */
@@ -1283,8 +1297,6 @@
 		 * executable.
 		 */
 		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-			if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
-				return -EPERM;
 			bprm->per_clear |= PER_CLEAR_ON_SETID;
 			bprm->cred->egid = inode->i_gid;
 		}
@@ -1356,6 +1368,10 @@
 	struct linux_binfmt *fmt;
 	pid_t old_pid, old_vpid;
 
+	/* This allows 4 levels of binfmt rewrites before failing hard. */
+	if (depth > 5)
+		return -ELOOP;
+
 	retval = security_bprm_check(bprm);
 	if (retval)
 		return retval;
@@ -1380,12 +1396,8 @@
 			if (!try_module_get(fmt->module))
 				continue;
 			read_unlock(&binfmt_lock);
+			bprm->recursion_depth = depth + 1;
 			retval = fn(bprm);
-			/*
-			 * Restore the depth counter to its starting value
-			 * in this call, so we don't have to rely on every
-			 * load_binary function to restore it on return.
-			 */
 			bprm->recursion_depth = depth;
 			if (retval >= 0) {
 				if (depth == 0) {
@@ -1657,7 +1669,6 @@
 	return __get_dumpable(mm->flags);
 }
 
-#ifdef __ARCH_WANT_SYS_EXECVE
 SYSCALL_DEFINE3(execve,
 		const char __user *, filename,
 		const char __user *const __user *, argv,
@@ -1685,23 +1696,3 @@
 	return error;
 }
 #endif
-#endif
-
-#ifdef __ARCH_WANT_KERNEL_EXECVE
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	int ret = do_execve(filename,
-			(const char __user *const __user *)argv,
-			(const char __user *const __user *)envp);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * We were successful.  We won't be returning to our caller, but
-	 * instead to user space by manipulating the kernel stack.
-	 */
-	ret_from_kernel_execve(current_pt_regs());
-}
-#endif

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index b561810..d1f80ab 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c

@@ -361,12 +361,12 @@
 	return 0;
 
 err:
-	if (!pcol->read_4_write)
-		_unlock_pcol_pages(pcol, ret, READ);
-
-	pcol_free(pcol);
-
+	if (!pcol_copy) /* Failed before ownership transfer */
+		pcol_copy = pcol;
+	_unlock_pcol_pages(pcol_copy, ret, READ);
+	pcol_free(pcol_copy);
 	kfree(pcol_copy);
+
 	return ret;
 }
 
@@ -676,8 +676,10 @@
 	return 0;
 
 err:
-	_unlock_pcol_pages(pcol, ret, WRITE);
-	pcol_free(pcol);
+	if (!pcol_copy) /* Failed before ownership transfer */
+		pcol_copy = pcol;
+	_unlock_pcol_pages(pcol_copy, ret, WRITE);
+	pcol_free(pcol_copy);
 	kfree(pcol_copy);
 
 	return ret;

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 29ab099..5df4bb4 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c

@@ -322,10 +322,10 @@
 
 	if (parent && (len < 4)) {
 		*max_len = 4;
-		return 255;
+		return FILEID_INVALID;
 	} else if (len < 2) {
 		*max_len = 2;
-		return 255;
+		return FILEID_INVALID;
 	}
 
 	len = 2;
@@ -341,10 +341,21 @@
 	return type;
 }
 
+int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
+			     int *max_len, struct inode *parent)
+{
+	const struct export_operations *nop = inode->i_sb->s_export_op;
+
+	if (nop && nop->encode_fh)
+		return nop->encode_fh(inode, fid->raw, max_len, parent);
+
+	return export_encode_fh(inode, fid, max_len, parent);
+}
+EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
+
 int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 		int connectable)
 {
-	const struct export_operations *nop = dentry->d_sb->s_export_op;
 	int error;
 	struct dentry *p = NULL;
 	struct inode *inode = dentry->d_inode, *parent = NULL;
@@ -357,10 +368,8 @@
 		 */
 		parent = p->d_inode;
 	}
-	if (nop->encode_fh)
-		error = nop->encode_fh(inode, fid->raw, max_len, parent);
-	else
-		error = export_encode_fh(inode, fid, max_len, parent);
+
+	error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
 	dput(p);
 
 	return error;

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index c8fff93..dd91264 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c

@@ -296,17 +296,17 @@
  * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
  *       will be invalid once the directory was converted into a dx directory
  */
-loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
 	loff_t htree_max = ext3_get_htree_eof(file);
 
 	if (likely(dx_dir))
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 					        htree_max, htree_max);
 	else
-		return generic_file_llseek(file, offset, origin);
+		return generic_file_llseek(file, offset, whence);
 }
 
 /*

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7e87e37..b176d42 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c

@@ -1071,8 +1071,7 @@
 	 * mapped. 0 in case of a HOLE.
 	 */
 	if (err > 0) {
-		if (err > 1)
-			WARN_ON(1);
+		WARN_ON(err > 1);
 		err = 0;
 	}
 	*errp = err;

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 5366393..6e50223 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c

@@ -1661,9 +1661,6 @@
 		return -ENOMEM;
 	}
 	sb->s_fs_info = sbi;
-	sbi->s_mount_opt = 0;
-	sbi->s_resuid = make_kuid(&init_user_ns, EXT3_DEF_RESUID);
-	sbi->s_resgid = make_kgid(&init_user_ns, EXT3_DEF_RESGID);
 	sbi->s_sb_block = sb_block;
 
 	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);

diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index c22f170..0a475c8 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig

@@ -39,22 +39,8 @@
 	  compiled kernel size by using one file system driver for
 	  ext2, ext3, and ext4 file systems.
 
-config EXT4_FS_XATTR
-	bool "Ext4 extended attributes"
-	depends on EXT4_FS
-	default y
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-	  You need this for POSIX ACL support on ext4.
-
 config EXT4_FS_POSIX_ACL
 	bool "Ext4 POSIX Access Control Lists"
-	depends on EXT4_FS_XATTR
 	select FS_POSIX_ACL
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
@@ -67,7 +53,6 @@
 
 config EXT4_FS_SECURITY
 	bool "Ext4 Security Labels"
-	depends on EXT4_FS_XATTR
 	help
 	  Security labels support alternative access control models
 	  implemented by security modules like SELinux.  This option

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 56fd8f86..0310fec 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile

@@ -7,8 +7,8 @@
 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
-		mmp.o indirect.o
+		mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
+		xattr_trusted.o inline.o
 
-ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index d3c5b88..e6e0d98 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c

@@ -423,8 +423,10 @@
 
 retry:
 	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	if (IS_ERR(handle)) {
+		error = PTR_ERR(handle);
+		goto release_and_out;
+	}
 	error = ext4_set_acl(handle, inode, type, acl);
 	ext4_journal_stop(handle);
 	if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 8e07d2a..80a28b2 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c

@@ -27,23 +27,11 @@
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 #include "ext4.h"
-
-static unsigned char ext4_filetype_table[] = {
-	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
+#include "xattr.h"
 
 static int ext4_dx_readdir(struct file *filp,
 			   void *dirent, filldir_t filldir);
 
-static unsigned char get_dtype(struct super_block *sb, int filetype)
-{
-	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
-	    (filetype >= EXT4_FT_MAX))
-		return DT_UNKNOWN;
-
-	return (ext4_filetype_table[filetype]);
-}
-
 /**
  * Check if the given dir-inode refers to an htree-indexed directory
  * (or a directory which chould potentially get coverted to use htree
@@ -68,11 +56,14 @@
  * Return 0 if the directory entry is OK, and 1 if there is a problem
  *
  * Note: this is the opposite of what ext2 and ext3 historically returned...
+ *
+ * bh passed here can be an inode block or a dir data block, depending
+ * on the inode inline data flag.
  */
 int __ext4_check_dir_entry(const char *function, unsigned int line,
 			   struct inode *dir, struct file *filp,
 			   struct ext4_dir_entry_2 *de,
-			   struct buffer_head *bh,
+			   struct buffer_head *bh, char *buf, int size,
 			   unsigned int offset)
 {
 	const char *error_msg = NULL;
@@ -85,9 +76,8 @@
 		error_msg = "rec_len % 4 != 0";
 	else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
 		error_msg = "rec_len is too small for name_len";
-	else if (unlikely(((char *) de - bh->b_data) + rlen >
-			  dir->i_sb->s_blocksize))
-		error_msg = "directory entry across blocks";
+	else if (unlikely(((char *) de - buf) + rlen > size))
+		error_msg = "directory entry across range";
 	else if (unlikely(le32_to_cpu(de->inode) >
 			le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
 		error_msg = "inode out of bounds";
@@ -98,14 +88,14 @@
 		ext4_error_file(filp, function, line, bh->b_blocknr,
 				"bad entry in directory: %s - offset=%u(%u), "
 				"inode=%u, rec_len=%d, name_len=%d",
-				error_msg, (unsigned) (offset % bh->b_size),
+				error_msg, (unsigned) (offset % size),
 				offset, le32_to_cpu(de->inode),
 				rlen, de->name_len);
 	else
 		ext4_error_inode(dir, function, line, bh->b_blocknr,
 				"bad entry in directory: %s - offset=%u(%u), "
 				"inode=%u, rec_len=%d, name_len=%d",
-				error_msg, (unsigned) (offset % bh->b_size),
+				error_msg, (unsigned) (offset % size),
 				offset, le32_to_cpu(de->inode),
 				rlen, de->name_len);
 
@@ -125,6 +115,14 @@
 	int ret = 0;
 	int dir_has_error = 0;
 
+	if (ext4_has_inline_data(inode)) {
+		int has_inline_data = 1;
+		ret = ext4_read_inline_dir(filp, dirent, filldir,
+					   &has_inline_data);
+		if (has_inline_data)
+			return ret;
+	}
+
 	if (is_dx_dir(inode)) {
 		err = ext4_dx_readdir(filp, dirent, filldir);
 		if (err != ERR_BAD_DX_DIR) {
@@ -221,8 +219,9 @@
 		while (!error && filp->f_pos < inode->i_size
 		       && offset < sb->s_blocksize) {
 			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
-			if (ext4_check_dir_entry(inode, filp, de,
-						 bh, offset)) {
+			if (ext4_check_dir_entry(inode, filp, de, bh,
+						 bh->b_data, bh->b_size,
+						 offset)) {
 				/*
 				 * On error, skip the f_pos to the next block
 				 */
@@ -334,17 +333,17 @@
  *
  * For non-htree, ext4_llseek already chooses the proper max offset.
  */
-loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
 	loff_t htree_max = ext4_get_htree_eof(file);
 
 	if (likely(dx_dir))
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 						    htree_max, htree_max);
 	else
-		return ext4_llseek(file, offset, origin);
+		return ext4_llseek(file, offset, whence);
 }
 
 /*

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index df163da..8462eb3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h

@@ -57,6 +57,16 @@
 #define ext4_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
+/*
+ * Turn on EXT_DEBUG to get lots of info about extents operations.
+ */
+#define EXT_DEBUG__
+#ifdef EXT_DEBUG
+#define ext_debug(fmt, ...)	printk(fmt, ##__VA_ARGS__)
+#else
+#define ext_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
+#endif
+
 #define EXT4_ERROR_INODE(inode, fmt, a...) \
 	ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
 
@@ -392,6 +402,7 @@
 #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
 #define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
 #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
+#define EXT4_INLINE_DATA_FL		0x10000000 /* Inode has inline data. */
 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
 #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
@@ -448,28 +459,26 @@
 	EXT4_INODE_EXTENTS	= 19,	/* Inode uses extents */
 	EXT4_INODE_EA_INODE	= 21,	/* Inode used for large EA */
 	EXT4_INODE_EOFBLOCKS	= 22,	/* Blocks allocated beyond EOF */
+	EXT4_INODE_INLINE_DATA	= 28,	/* Data in inode. */
 	EXT4_INODE_RESERVED	= 31,	/* reserved for ext4 lib */
 };
 
-#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
-#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \
-	printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \
-		EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); }
-
 /*
- * Since it's pretty easy to mix up bit numbers and hex values, and we
- * can't do a compile-time test for ENUM values, we use a run-time
- * test to make sure that EXT4_XXX_FL is consistent with respect to
- * EXT4_INODE_XXX.  If all is well the printk and BUG_ON will all drop
- * out so it won't cost any extra space in the compiled kernel image.
- * But it's important that these values are the same, since we are
- * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
- * must be consistent with the values of FS_XXX_FL defined in
- * include/linux/fs.h and the on-disk values found in ext2, ext3, and
- * ext4 filesystems, and of course the values defined in e2fsprogs.
+ * Since it's pretty easy to mix up bit numbers and hex values, we use a
+ * build-time check to make sure that EXT4_XXX_FL is consistent with respect to
+ * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost
+ * any extra space in the compiled kernel image, otherwise, the build will fail.
+ * It's important that these values are the same, since we are using
+ * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent
+ * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk
+ * values found in ext2, ext3 and ext4 filesystems, and of course the values
+ * defined in e2fsprogs.
  *
  * It's not paranoia if the Murphy's Law really *is* out to get you.  :-)
  */
+#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
+#define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
+
 static inline void ext4_check_flag_values(void)
 {
 	CHECK_FLAG_VALUE(SECRM);
@@ -494,6 +503,7 @@
 	CHECK_FLAG_VALUE(EXTENTS);
 	CHECK_FLAG_VALUE(EA_INODE);
 	CHECK_FLAG_VALUE(EOFBLOCKS);
+	CHECK_FLAG_VALUE(INLINE_DATA);
 	CHECK_FLAG_VALUE(RESERVED);
 }
 
@@ -811,6 +821,8 @@
 	__u32		ec_len; /* must be 32bit to return holes */
 };
 
+#include "extents_status.h"
+
 /*
  * fourth extended file system inode data in memory
  */
@@ -833,7 +845,6 @@
 #endif
 	unsigned long	i_flags;
 
-#ifdef CONFIG_EXT4_FS_XATTR
 	/*
 	 * Extended attributes can be read independently of the main file
 	 * data. Taking i_mutex even when reading would cause contention
@@ -842,7 +853,6 @@
 	 * EAs.
 	 */
 	struct rw_semaphore xattr_sem;
-#endif
 
 	struct list_head i_orphan;	/* unlinked but open inodes */
 
@@ -888,6 +898,10 @@
 	struct list_head i_prealloc_list;
 	spinlock_t i_prealloc_lock;
 
+	/* extents status tree */
+	struct ext4_es_tree i_es_tree;
+	rwlock_t i_es_lock;
+
 	/* ialloc */
 	ext4_group_t	i_last_alloc_group;
 
@@ -902,6 +916,10 @@
 	/* on-disk additional length */
 	__u16 i_extra_isize;
 
+	/* Indicate the inline data space. */
+	u16 i_inline_off;
+	u16 i_inline_size;
+
 #ifdef CONFIG_QUOTA
 	/* quota space reservation, managed internally by quota code */
 	qsize_t i_reserved_quota;
@@ -1360,6 +1378,7 @@
 	EXT4_STATE_DELALLOC_RESERVED,	/* blks already reserved for delalloc */
 	EXT4_STATE_DIOREAD_LOCK,	/* Disable support for dio read
 					   nolocking */
+	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
 };
 
 #define EXT4_INODE_BIT_FNS(name, field, offset)				\
@@ -1481,7 +1500,7 @@
 #define EXT4_FEATURE_INCOMPAT_DIRDATA		0x1000 /* data in dirent */
 #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM	0x2000 /* use crc32c for bg */
 #define EXT4_FEATURE_INCOMPAT_LARGEDIR		0x4000 /* >2GB or 3-lvl htree */
-#define EXT4_FEATURE_INCOMPAT_INLINEDATA	0x8000 /* data in inode */
+#define EXT4_FEATURE_INCOMPAT_INLINE_DATA	0x8000 /* data in inode */
 
 #define EXT2_FEATURE_COMPAT_SUPP	EXT4_FEATURE_COMPAT_EXT_ATTR
 #define EXT2_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1505,7 +1524,8 @@
 					 EXT4_FEATURE_INCOMPAT_EXTENTS| \
 					 EXT4_FEATURE_INCOMPAT_64BIT| \
 					 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
-					 EXT4_FEATURE_INCOMPAT_MMP)
+					 EXT4_FEATURE_INCOMPAT_MMP |	\
+					 EXT4_FEATURE_INCOMPAT_INLINE_DATA)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 					 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -1592,6 +1612,11 @@
 	__le32	det_checksum;		/* crc32c(uuid+inum+dirblock) */
 };
 
+#define EXT4_DIRENT_TAIL(block, blocksize) \
+	((struct ext4_dir_entry_tail *)(((void *)(block)) + \
+					((blocksize) - \
+					 sizeof(struct ext4_dir_entry_tail))))
+
 /*
  * Ext4 directory file types.  Only the low 3 bits are used.  The
  * other bits are reserved for now.
@@ -1936,14 +1961,42 @@
 extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
 				  struct file *,
 				  struct ext4_dir_entry_2 *,
-				  struct buffer_head *, unsigned int);
-#define ext4_check_dir_entry(dir, filp, de, bh, offset)			\
+				  struct buffer_head *, char *, int,
+				  unsigned int);
+#define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset)	\
 	unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
-					(de), (bh), (offset)))
+					(de), (bh), (buf), (size), (offset)))
 extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 				    __u32 minor_hash,
 				    struct ext4_dir_entry_2 *dirent);
 extern void ext4_htree_free_dir_info(struct dir_private_info *p);
+extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
+			     struct buffer_head *bh,
+			     void *buf, int buf_size,
+			     const char *name, int namelen,
+			     struct ext4_dir_entry_2 **dest_de);
+void ext4_insert_dentry(struct inode *inode,
+			struct ext4_dir_entry_2 *de,
+			int buf_size,
+			const char *name, int namelen);
+static inline void ext4_update_dx_flag(struct inode *inode)
+{
+	if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+				     EXT4_FEATURE_COMPAT_DIR_INDEX))
+		ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
+}
+static unsigned char ext4_filetype_table[] = {
+	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+};
+
+static inline  unsigned char get_dtype(struct super_block *sb, int filetype)
+{
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
+	    (filetype >= EXT4_FT_MAX))
+		return DT_UNKNOWN;
+
+	return ext4_filetype_table[filetype];
+}
 
 /* fsync.c */
 extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
@@ -1994,8 +2047,23 @@
 						ext4_lblk_t, int, int *);
 struct buffer_head *ext4_bread(handle_t *, struct inode *,
 						ext4_lblk_t, int, int *);
+int ext4_get_block_write(struct inode *inode, sector_t iblock,
+			 struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
 				struct buffer_head *bh_result, int create);
+int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
+			   struct buffer_head *bh, int create);
+int ext4_walk_page_buffers(handle_t *handle,
+			   struct buffer_head *head,
+			   unsigned from,
+			   unsigned to,
+			   int *partial,
+			   int (*fn)(handle_t *handle,
+				     struct buffer_head *bh));
+int do_journal_get_write_access(handle_t *handle,
+				struct buffer_head *bh);
+#define FALL_BACK_TO_NONDELALLOC 1
+#define CONVERT_INLINE_DATA	 2
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
@@ -2050,6 +2118,20 @@
 extern int ext4_orphan_del(handle_t *, struct inode *);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 				__u32 start_minor_hash, __u32 *next_hash);
+extern int search_dir(struct buffer_head *bh,
+		      char *search_buf,
+		      int buf_size,
+		      struct inode *dir,
+		      const struct qstr *d_name,
+		      unsigned int offset,
+		      struct ext4_dir_entry_2 **res_dir);
+extern int ext4_generic_delete_entry(handle_t *handle,
+				     struct inode *dir,
+				     struct ext4_dir_entry_2 *de_del,
+				     struct buffer_head *bh,
+				     void *entry_buf,
+				     int buf_size,
+				     int csum_size);
 
 /* resize.c */
 extern int ext4_group_add(struct super_block *sb,
@@ -2376,6 +2458,15 @@
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
 extern struct dentry *ext4_get_parent(struct dentry *child);
+extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
+				 struct ext4_dir_entry_2 *de,
+				 int blocksize, int csum_size,
+				 unsigned int parent_ino, int dotdot_real_len);
+extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
+				   unsigned int blocksize);
+extern int ext4_handle_dirty_dirent_node(handle_t *handle,
+					 struct inode *inode,
+					 struct buffer_head *bh);
 
 /* symlink.c */
 extern const struct inode_operations ext4_symlink_inode_operations;
@@ -2393,6 +2484,9 @@
 			       struct inode *, __le32 *, unsigned int);
 
 /* extents.c */
+struct ext4_ext_path;
+struct ext4_extent;
+
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
 extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
@@ -2410,8 +2504,27 @@
 			  ssize_t len);
 extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
 			   struct ext4_map_blocks *map, int flags);
+extern int ext4_ext_calc_metadata_amount(struct inode *inode,
+					 ext4_lblk_t lblocks);
+extern int ext4_extent_tree_init(handle_t *, struct inode *);
+extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
+						   int num,
+						   struct ext4_ext_path *path);
+extern int ext4_can_extents_be_merged(struct inode *inode,
+				      struct ext4_extent *ex1,
+				      struct ext4_extent *ex2);
+extern int ext4_ext_insert_extent(handle_t *, struct inode *,
+				  struct ext4_ext_path *,
+				  struct ext4_extent *, int);
+extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
+						  struct ext4_ext_path *);
+extern void ext4_ext_drop_refs(struct ext4_ext_path *);
+extern int ext4_ext_check_inode(struct inode *inode);
+extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			__u64 start, __u64 len);
+
+
 /* move_extent.c */
 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			     __u64 start_orig, __u64 start_donor,
@@ -2445,14 +2558,10 @@
 				 * never, ever appear in a buffer_head's state
 				 * flag. See EXT4_MAP_FROM_CLUSTER to see where
 				 * this is used. */
-	BH_Da_Mapped,	/* Delayed allocated block that now has a mapping. This
-			 * flag is set when ext4_map_blocks is called on a
-			 * delayed allocated block to get its real mapping. */
 };
 
 BUFFER_FNS(Uninit, uninit)
 TAS_BUFFER_FNS(Uninit, uninit)
-BUFFER_FNS(Da_Mapped, da_mapped)
 
 /*
  * Add new method to test whether block and inode bitmaps are properly
@@ -2503,6 +2612,4 @@
 
 #endif	/* __KERNEL__ */
 
-#include "ext4_extents.h"
-
 #endif	/* _EXT4_H */

diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index cb1b2c9..487fda1 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h

@@ -43,16 +43,6 @@
 #define CHECK_BINSEARCH__
 
 /*
- * Turn on EXT_DEBUG to get lots of info about extents operations.
- */
-#define EXT_DEBUG__
-#ifdef EXT_DEBUG
-#define ext_debug(fmt, ...)	printk(fmt, ##__VA_ARGS__)
-#else
-#define ext_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
-#endif
-
-/*
  * If EXT_STATS is defined then stats numbers are collected.
  * These number will be displayed at umount time.
  */
@@ -144,20 +134,6 @@
  */
 
 /*
- * to be called by ext4_ext_walk_space()
- * negative retcode - error
- * positive retcode - signal for ext4_ext_walk_space(), see below
- * callback must return valid extent (passed or newly created)
- */
-typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t,
-					struct ext4_ext_cache *,
-					struct ext4_extent *, void *);
-
-#define EXT_CONTINUE   0
-#define EXT_BREAK      1
-#define EXT_REPEAT     2
-
-/*
  * Maximum number of logical blocks in a file; ext4_extent's ee_block is
  * __le32.
  */
@@ -300,21 +276,5 @@
 				     0xffff);
 }
 
-extern int ext4_ext_calc_metadata_amount(struct inode *inode,
-					 ext4_lblk_t lblocks);
-extern int ext4_extent_tree_init(handle_t *, struct inode *);
-extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
-						   int num,
-						   struct ext4_ext_path *path);
-extern int ext4_can_extents_be_merged(struct inode *inode,
-				      struct ext4_extent *ex1,
-				      struct ext4_extent *ex2);
-extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
-extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
-							struct ext4_ext_path *);
-extern void ext4_ext_drop_refs(struct ext4_ext_path *);
-extern int ext4_ext_check_inode(struct inode *inode);
-extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
-				      int search_hint_reverse);
 #endif /* _EXT4_EXTENTS */
 

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 56d258c..7177f9b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h

@@ -254,13 +254,6 @@
 		handle->h_sync = 1;
 }
 
-static inline void ext4_handle_release_buffer(handle_t *handle,
-						struct buffer_head *bh)
-{
-	if (ext4_handle_valid(handle))
-		jbd2_journal_release_buffer(handle, bh);
-}
-
 static inline int ext4_handle_is_aborted(handle_t *handle)
 {
 	if (ext4_handle_valid(handle))

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7011ac9..26af228 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c

@@ -41,6 +41,8 @@
 #include <asm/uaccess.h>
 #include <linux/fiemap.h>
 #include "ext4_jbd2.h"
+#include "ext4_extents.h"
+#include "xattr.h"
 
 #include <trace/events/ext4.h>
 
@@ -109,6 +111,9 @@
 			     int split_flag,
 			     int flags);
 
+static int ext4_find_delayed_extent(struct inode *inode,
+				    struct ext4_ext_cache *newex);
+
 static int ext4_ext_truncate_extend_restart(handle_t *handle,
 					    struct inode *inode,
 					    int needed)
@@ -1959,27 +1964,33 @@
 	return err;
 }
 
-static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
-			       ext4_lblk_t num, ext_prepare_callback func,
-			       void *cbdata)
+static int ext4_fill_fiemap_extents(struct inode *inode,
+				    ext4_lblk_t block, ext4_lblk_t num,
+				    struct fiemap_extent_info *fieinfo)
 {
 	struct ext4_ext_path *path = NULL;
-	struct ext4_ext_cache cbex;
+	struct ext4_ext_cache newex;
 	struct ext4_extent *ex;
-	ext4_lblk_t next, start = 0, end = 0;
+	ext4_lblk_t next, next_del, start = 0, end = 0;
 	ext4_lblk_t last = block + num;
-	int depth, exists, err = 0;
-
-	BUG_ON(func == NULL);
-	BUG_ON(inode == NULL);
+	int exists, depth = 0, err = 0;
+	unsigned int flags = 0;
+	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
 
 	while (block < last && block != EXT_MAX_BLOCKS) {
 		num = last - block;
 		/* find extent for this block */
 		down_read(&EXT4_I(inode)->i_data_sem);
+
+		if (path && ext_depth(inode) != depth) {
+			/* depth was changed. we have to realloc path */
+			kfree(path);
+			path = NULL;
+		}
+
 		path = ext4_ext_find_extent(inode, block, path);
-		up_read(&EXT4_I(inode)->i_data_sem);
 		if (IS_ERR(path)) {
+			up_read(&EXT4_I(inode)->i_data_sem);
 			err = PTR_ERR(path);
 			path = NULL;
 			break;
@@ -1987,13 +1998,16 @@
 
 		depth = ext_depth(inode);
 		if (unlikely(path[depth].p_hdr == NULL)) {
+			up_read(&EXT4_I(inode)->i_data_sem);
 			EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
 			err = -EIO;
 			break;
 		}
 		ex = path[depth].p_ext;
 		next = ext4_ext_next_allocated_block(path);
+		ext4_ext_drop_refs(path);
 
+		flags = 0;
 		exists = 0;
 		if (!ex) {
 			/* there is no extent yet, so try to allocate
@@ -2030,40 +2044,64 @@
 		BUG_ON(end <= start);
 
 		if (!exists) {
-			cbex.ec_block = start;
-			cbex.ec_len = end - start;
-			cbex.ec_start = 0;
+			newex.ec_block = start;
+			newex.ec_len = end - start;
+			newex.ec_start = 0;
 		} else {
-			cbex.ec_block = le32_to_cpu(ex->ee_block);
-			cbex.ec_len = ext4_ext_get_actual_len(ex);
-			cbex.ec_start = ext4_ext_pblock(ex);
+			newex.ec_block = le32_to_cpu(ex->ee_block);
+			newex.ec_len = ext4_ext_get_actual_len(ex);
+			newex.ec_start = ext4_ext_pblock(ex);
+			if (ext4_ext_is_uninitialized(ex))
+				flags |= FIEMAP_EXTENT_UNWRITTEN;
 		}
 
-		if (unlikely(cbex.ec_len == 0)) {
-			EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
+		/*
+		 * Find delayed extent and update newex accordingly. We call
+		 * it even in !exists case to find out whether newex is the
+		 * last existing extent or not.
+		 */
+		next_del = ext4_find_delayed_extent(inode, &newex);
+		if (!exists && next_del) {
+			exists = 1;
+			flags |= FIEMAP_EXTENT_DELALLOC;
+		}
+		up_read(&EXT4_I(inode)->i_data_sem);
+
+		if (unlikely(newex.ec_len == 0)) {
+			EXT4_ERROR_INODE(inode, "newex.ec_len == 0");
 			err = -EIO;
 			break;
 		}
-		err = func(inode, next, &cbex, ex, cbdata);
-		ext4_ext_drop_refs(path);
 
-		if (err < 0)
-			break;
-
-		if (err == EXT_REPEAT)
-			continue;
-		else if (err == EXT_BREAK) {
-			err = 0;
-			break;
+		/* This is possible iff next == next_del == EXT_MAX_BLOCKS */
+		if (next == next_del) {
+			flags |= FIEMAP_EXTENT_LAST;
+			if (unlikely(next_del != EXT_MAX_BLOCKS ||
+				     next != EXT_MAX_BLOCKS)) {
+				EXT4_ERROR_INODE(inode,
+						 "next extent == %u, next "
+						 "delalloc extent = %u",
+						 next, next_del);
+				err = -EIO;
+				break;
+			}
 		}
 
-		if (ext_depth(inode) != depth) {
-			/* depth was changed. we have to realloc path */
-			kfree(path);
-			path = NULL;
+		if (exists) {
+			err = fiemap_fill_next_extent(fieinfo,
+				(__u64)newex.ec_block << blksize_bits,
+				(__u64)newex.ec_start << blksize_bits,
+				(__u64)newex.ec_len << blksize_bits,
+				flags);
+			if (err < 0)
+				break;
+			if (err == 1) {
+				err = 0;
+				break;
+			}
 		}
 
-		block = cbex.ec_block + cbex.ec_len;
+		block = newex.ec_block + newex.ec_len;
 	}
 
 	if (path) {
@@ -2156,7 +2194,6 @@
 		  struct ext4_extent *ex)
 {
 	struct ext4_ext_cache *cex;
-	struct ext4_sb_info *sbi;
 	int ret = 0;
 
 	/*
@@ -2164,7 +2201,6 @@
 	 */
 	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 	cex = &EXT4_I(inode)->i_cached_extent;
-	sbi = EXT4_SB(inode->i_sb);
 
 	/* has cache valid data? */
 	if (cex->ec_len == 0)
@@ -2273,7 +2309,13 @@
 int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
 	int index;
-	int depth = ext_depth(inode);
+	int depth;
+
+	/* If we are converting the inline data, only one is needed here. */
+	if (ext4_has_inline_data(inode))
+		return 1;
+
+	depth = ext_depth(inode);
 
 	if (chunk)
 		index = depth * 2;
@@ -3461,115 +3503,34 @@
 /**
  * ext4_find_delalloc_range: find delayed allocated block in the given range.
  *
- * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
- * whether there are any buffers marked for delayed allocation. It returns '1'
- * on the first delalloc'ed buffer head found. If no buffer head in the given
- * range is marked for delalloc, it returns 0.
- * lblk_start should always be <= lblk_end.
- * search_hint_reverse is to indicate that searching in reverse from lblk_end to
- * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
- * block sooner). This is useful when blocks are truncated sequentially from
- * lblk_start towards lblk_end.
+ * Return 1 if there is a delalloc block in the range, otherwise 0.
  */
 static int ext4_find_delalloc_range(struct inode *inode,
 				    ext4_lblk_t lblk_start,
-				    ext4_lblk_t lblk_end,
-				    int search_hint_reverse)
+				    ext4_lblk_t lblk_end)
 {
-	struct address_space *mapping = inode->i_mapping;
-	struct buffer_head *head, *bh = NULL;
-	struct page *page;
-	ext4_lblk_t i, pg_lblk;
-	pgoff_t index;
+	struct extent_status es;
 
-	if (!test_opt(inode->i_sb, DELALLOC))
-		return 0;
-
-	/* reverse search wont work if fs block size is less than page size */
-	if (inode->i_blkbits < PAGE_CACHE_SHIFT)
-		search_hint_reverse = 0;
-
-	if (search_hint_reverse)
-		i = lblk_end;
+	es.start = lblk_start;
+	ext4_es_find_extent(inode, &es);
+	if (es.len == 0)
+		return 0; /* there is no delay extent in this tree */
+	else if (es.start <= lblk_start && lblk_start < es.start + es.len)
+		return 1;
+	else if (lblk_start <= es.start && es.start <= lblk_end)
+		return 1;
 	else
-		i = lblk_start;
-
-	index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-	while ((i >= lblk_start) && (i <= lblk_end)) {
-		page = find_get_page(mapping, index);
-		if (!page)
-			goto nextpage;
-
-		if (!page_has_buffers(page))
-			goto nextpage;
-
-		head = page_buffers(page);
-		if (!head)
-			goto nextpage;
-
-		bh = head;
-		pg_lblk = index << (PAGE_CACHE_SHIFT -
-						inode->i_blkbits);
-		do {
-			if (unlikely(pg_lblk < lblk_start)) {
-				/*
-				 * This is possible when fs block size is less
-				 * than page size and our cluster starts/ends in
-				 * middle of the page. So we need to skip the
-				 * initial few blocks till we reach the 'lblk'
-				 */
-				pg_lblk++;
-				continue;
-			}
-
-			/* Check if the buffer is delayed allocated and that it
-			 * is not yet mapped. (when da-buffers are mapped during
-			 * their writeout, their da_mapped bit is set.)
-			 */
-			if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
-				page_cache_release(page);
-				trace_ext4_find_delalloc_range(inode,
-						lblk_start, lblk_end,
-						search_hint_reverse,
-						1, i);
-				return 1;
-			}
-			if (search_hint_reverse)
-				i--;
-			else
-				i++;
-		} while ((i >= lblk_start) && (i <= lblk_end) &&
-				((bh = bh->b_this_page) != head));
-nextpage:
-		if (page)
-			page_cache_release(page);
-		/*
-		 * Move to next page. 'i' will be the first lblk in the next
-		 * page.
-		 */
-		if (search_hint_reverse)
-			index--;
-		else
-			index++;
-		i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	}
-
-	trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
-					search_hint_reverse, 0, 0);
-	return 0;
+		return 0;
 }
 
-int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
-			       int search_hint_reverse)
+int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	ext4_lblk_t lblk_start, lblk_end;
 	lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
 	lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
 
-	return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
-					search_hint_reverse);
+	return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
 }
 
 /**
@@ -3630,7 +3591,7 @@
 		lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
 		lblk_to = lblk_from + c_offset - 1;
 
-		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
 			allocated_clusters--;
 	}
 
@@ -3640,7 +3601,7 @@
 		lblk_from = lblk_start + num_blks;
 		lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
 
-		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
 			allocated_clusters--;
 	}
 
@@ -3663,8 +3624,8 @@
 		  flags, allocated);
 	ext4_ext_show_leaf(inode, path);
 
-	trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
-						    newblock);
+	trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
+						    allocated, newblock);
 
 	/* get_block() before submit the IO, split the extent */
 	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
@@ -3911,7 +3872,7 @@
 	struct ext4_extent newex, *ex, *ex2;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	ext4_fsblk_t newblock = 0;
-	int free_on_err = 0, err = 0, depth, ret;
+	int free_on_err = 0, err = 0, depth;
 	unsigned int allocated = 0, offset = 0;
 	unsigned int allocated_clusters = 0;
 	struct ext4_allocation_request ar;
@@ -3927,7 +3888,7 @@
 	if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
 		if (!newex.ee_start_lo && !newex.ee_start_hi) {
 			if ((sbi->s_cluster_ratio > 1) &&
-			    ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+			    ext4_find_delalloc_cluster(inode, map->m_lblk))
 				map->m_flags |= EXT4_MAP_FROM_CLUSTER;
 
 			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -4007,15 +3968,15 @@
 					ee_len, ee_start);
 				goto out;
 			}
-			ret = ext4_ext_handle_uninitialized_extents(
+			allocated = ext4_ext_handle_uninitialized_extents(
 				handle, inode, map, path, flags,
 				allocated, newblock);
-			return ret;
+			goto out3;
 		}
 	}
 
 	if ((sbi->s_cluster_ratio > 1) &&
-	    ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+	    ext4_find_delalloc_cluster(inode, map->m_lblk))
 		map->m_flags |= EXT4_MAP_FROM_CLUSTER;
 
 	/*
@@ -4284,8 +4245,8 @@
 		kfree(path);
 	}
 
-	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
-		newblock, map->m_len, err ? err : allocated);
+out3:
+	trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated);
 
 	return err ? err : allocated;
 }
@@ -4344,6 +4305,8 @@
 
 	last_block = (inode->i_size + sb->s_blocksize - 1)
 			>> EXT4_BLOCK_SIZE_BITS(sb);
+	err = ext4_es_remove_extent(inode, last_block,
+				    EXT_MAX_BLOCKS - last_block);
 	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
 
 	/* In a multi-transaction truncate, we only make the final
@@ -4434,6 +4397,10 @@
 	if (mode & FALLOC_FL_PUNCH_HOLE)
 		return ext4_punch_hole(file, offset, len);
 
+	ret = ext4_convert_inline_data(inode);
+	if (ret)
+		return ret;
+
 	trace_ext4_fallocate_enter(inode, offset, len, mode);
 	map.m_lblk = offset >> blkbits;
 	/*
@@ -4572,206 +4539,43 @@
 }
 
 /*
- * Callback function called for each extent to gather FIEMAP information.
+ * If newex is not existing extent (newex->ec_start equals zero) find
+ * delayed extent at start of newex and update newex accordingly and
+ * return start of the next delayed extent.
+ *
+ * If newex is existing extent (newex->ec_start is not equal zero)
+ * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
+ * extent found. Leave newex unmodified.
  */
-static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
-		       struct ext4_ext_cache *newex, struct ext4_extent *ex,
-		       void *data)
+static int ext4_find_delayed_extent(struct inode *inode,
+				    struct ext4_ext_cache *newex)
 {
-	__u64	logical;
-	__u64	physical;
-	__u64	length;
-	__u32	flags = 0;
-	int		ret = 0;
-	struct fiemap_extent_info *fieinfo = data;
-	unsigned char blksize_bits;
+	struct extent_status es;
+	ext4_lblk_t next_del;
 
-	blksize_bits = inode->i_sb->s_blocksize_bits;
-	logical = (__u64)newex->ec_block << blksize_bits;
+	es.start = newex->ec_block;
+	next_del = ext4_es_find_extent(inode, &es);
 
 	if (newex->ec_start == 0) {
 		/*
 		 * No extent in extent-tree contains block @newex->ec_start,
 		 * then the block may stay in 1)a hole or 2)delayed-extent.
-		 *
-		 * Holes or delayed-extents are processed as follows.
-		 * 1. lookup dirty pages with specified range in pagecache.
-		 *    If no page is got, then there is no delayed-extent and
-		 *    return with EXT_CONTINUE.
-		 * 2. find the 1st mapped buffer,
-		 * 3. check if the mapped buffer is both in the request range
-		 *    and a delayed buffer. If not, there is no delayed-extent,
-		 *    then return.
-		 * 4. a delayed-extent is found, the extent will be collected.
 		 */
-		ext4_lblk_t	end = 0;
-		pgoff_t		last_offset;
-		pgoff_t		offset;
-		pgoff_t		index;
-		pgoff_t		start_index = 0;
-		struct page	**pages = NULL;
-		struct buffer_head *bh = NULL;
-		struct buffer_head *head = NULL;
-		unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+		if (es.len == 0)
+			/* A hole found. */
+			return 0;
 
-		pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (pages == NULL)
-			return -ENOMEM;
-
-		offset = logical >> PAGE_SHIFT;
-repeat:
-		last_offset = offset;
-		head = NULL;
-		ret = find_get_pages_tag(inode->i_mapping, &offset,
-					PAGECACHE_TAG_DIRTY, nr_pages, pages);
-
-		if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
-			/* First time, try to find a mapped buffer. */
-			if (ret == 0) {
-out:
-				for (index = 0; index < ret; index++)
-					page_cache_release(pages[index]);
-				/* just a hole. */
-				kfree(pages);
-				return EXT_CONTINUE;
-			}
-			index = 0;
-
-next_page:
-			/* Try to find the 1st mapped buffer. */
-			end = ((__u64)pages[index]->index << PAGE_SHIFT) >>
-				  blksize_bits;
-			if (!page_has_buffers(pages[index]))
-				goto out;
-			head = page_buffers(pages[index]);
-			if (!head)
-				goto out;
-
-			index++;
-			bh = head;
-			do {
-				if (end >= newex->ec_block +
-					newex->ec_len)
-					/* The buffer is out of
-					 * the request range.
-					 */
-					goto out;
-
-				if (buffer_mapped(bh) &&
-				    end >= newex->ec_block) {
-					start_index = index - 1;
-					/* get the 1st mapped buffer. */
-					goto found_mapped_buffer;
-				}
-
-				bh = bh->b_this_page;
-				end++;
-			} while (bh != head);
-
-			/* No mapped buffer in the range found in this page,
-			 * We need to look up next page.
-			 */
-			if (index >= ret) {
-				/* There is no page left, but we need to limit
-				 * newex->ec_len.
-				 */
-				newex->ec_len = end - newex->ec_block;
-				goto out;
-			}
-			goto next_page;
-		} else {
-			/*Find contiguous delayed buffers. */
-			if (ret > 0 && pages[0]->index == last_offset)
-				head = page_buffers(pages[0]);
-			bh = head;
-			index = 1;
-			start_index = 0;
+		if (es.start > newex->ec_block) {
+			/* A hole found. */
+			newex->ec_len = min(es.start - newex->ec_block,
+					    newex->ec_len);
+			return 0;
 		}
 
-found_mapped_buffer:
-		if (bh != NULL && buffer_delay(bh)) {
-			/* 1st or contiguous delayed buffer found. */
-			if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
-				/*
-				 * 1st delayed buffer found, record
-				 * the start of extent.
-				 */
-				flags |= FIEMAP_EXTENT_DELALLOC;
-				newex->ec_block = end;
-				logical = (__u64)end << blksize_bits;
-			}
-			/* Find contiguous delayed buffers. */
-			do {
-				if (!buffer_delay(bh))
-					goto found_delayed_extent;
-				bh = bh->b_this_page;
-				end++;
-			} while (bh != head);
-
-			for (; index < ret; index++) {
-				if (!page_has_buffers(pages[index])) {
-					bh = NULL;
-					break;
-				}
-				head = page_buffers(pages[index]);
-				if (!head) {
-					bh = NULL;
-					break;
-				}
-
-				if (pages[index]->index !=
-				    pages[start_index]->index + index
-				    - start_index) {
-					/* Blocks are not contiguous. */
-					bh = NULL;
-					break;
-				}
-				bh = head;
-				do {
-					if (!buffer_delay(bh))
-						/* Delayed-extent ends. */
-						goto found_delayed_extent;
-					bh = bh->b_this_page;
-					end++;
-				} while (bh != head);
-			}
-		} else if (!(flags & FIEMAP_EXTENT_DELALLOC))
-			/* a hole found. */
-			goto out;
-
-found_delayed_extent:
-		newex->ec_len = min(end - newex->ec_block,
-						(ext4_lblk_t)EXT_INIT_MAX_LEN);
-		if (ret == nr_pages && bh != NULL &&
-			newex->ec_len < EXT_INIT_MAX_LEN &&
-			buffer_delay(bh)) {
-			/* Have not collected an extent and continue. */
-			for (index = 0; index < ret; index++)
-				page_cache_release(pages[index]);
-			goto repeat;
-		}
-
-		for (index = 0; index < ret; index++)
-			page_cache_release(pages[index]);
-		kfree(pages);
+		newex->ec_len = es.start + es.len - newex->ec_block;
 	}
 
-	physical = (__u64)newex->ec_start << blksize_bits;
-	length =   (__u64)newex->ec_len << blksize_bits;
-
-	if (ex && ext4_ext_is_uninitialized(ex))
-		flags |= FIEMAP_EXTENT_UNWRITTEN;
-
-	if (next == EXT_MAX_BLOCKS)
-		flags |= FIEMAP_EXTENT_LAST;
-
-	ret = fiemap_fill_next_extent(fieinfo, logical, physical,
-					length, flags);
-	if (ret < 0)
-		return ret;
-	if (ret == 1)
-		return EXT_BREAK;
-	return EXT_CONTINUE;
+	return next_del;
 }
 /* fiemap flags we can handle specified here */
 #define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -4971,6 +4775,8 @@
 	ext4_ext_invalidate_cache(inode);
 	ext4_discard_preallocations(inode);
 
+	err = ext4_es_remove_extent(inode, first_block,
+				    stop_block - first_block);
 	err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
 
 	ext4_ext_invalidate_cache(inode);
@@ -4991,12 +4797,22 @@
 	mutex_unlock(&inode->i_mutex);
 	return err;
 }
+
 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len)
 {
 	ext4_lblk_t start_blk;
 	int error = 0;
 
+	if (ext4_has_inline_data(inode)) {
+		int has_inline = 1;
+
+		error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline);
+
+		if (has_inline)
+			return error;
+	}
+
 	/* fallback to generic here if not in extents fmt */
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 		return generic_block_fiemap(inode, fieinfo, start, len,
@@ -5018,11 +4834,11 @@
 		len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
 
 		/*
-		 * Walk the extent tree gathering extent information.
-		 * ext4_ext_fiemap_cb will push extents back to user.
+		 * Walk the extent tree gathering extent information
+		 * and pushing extents back to the user.
 		 */
-		error = ext4_ext_walk_space(inode, start_blk, len_blks,
-					  ext4_ext_fiemap_cb, fieinfo);
+		error = ext4_fill_fiemap_extents(inode, start_blk,
+						 len_blks, fieinfo);
 	}
 
 	return error;

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
new file mode 100644
index 0000000..564d981
--- /dev/null
+++ b/fs/ext4/extents_status.c

@@ -0,0 +1,500 @@
+/*
+ *  fs/ext4/extents_status.c
+ *
+ * Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
+ * Modified by
+ *	Allison Henderson <achender@linux.vnet.ibm.com>
+ *	Hugh Dickins <hughd@google.com>
+ *	Zheng Liu <wenqing.lz@taobao.com>
+ *
+ * Ext4 extents status tree core functions.
+ */
+#include <linux/rbtree.h>
+#include "ext4.h"
+#include "extents_status.h"
+#include "ext4_extents.h"
+
+#include <trace/events/ext4.h>
+
+/*
+ * According to previous discussion in Ext4 Developer Workshop, we
+ * will introduce a new structure called io tree to track all extent
+ * status in order to solve some problems that we have met
+ * (e.g. Reservation space warning), and provide extent-level locking.
+ * Delay extent tree is the first step to achieve this goal.  It is
+ * original built by Yongqiang Yang.  At that time it is called delay
+ * extent tree, whose goal is only track delay extent in memory to
+ * simplify the implementation of fiemap and bigalloc, and introduce
+ * lseek SEEK_DATA/SEEK_HOLE support.  That is why it is still called
+ * delay extent tree at the following comment.  But for better
+ * understand what it does, it has been rename to extent status tree.
+ *
+ * Currently the first step has been done.  All delay extents are
+ * tracked in the tree.  It maintains the delay extent when a delay
+ * allocation is issued, and the delay extent is written out or
+ * invalidated.  Therefore the implementation of fiemap and bigalloc
+ * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
+ *
+ * The following comment describes the implemenmtation of extent
+ * status tree and future works.
+ */
+
+/*
+ * extents status tree implementation for ext4.
+ *
+ *
+ * ==========================================================================
+ * Extents status encompass delayed extents and extent locks
+ *
+ * 1. Why delayed extent implementation ?
+ *
+ * Without delayed extent, ext4 identifies a delayed extent by looking
+ * up page cache, this has several deficiencies - complicated, buggy,
+ * and inefficient code.
+ *
+ * FIEMAP, SEEK_HOLE/DATA, bigalloc, punch hole and writeout all need
+ * to know if a block or a range of blocks are belonged to a delayed
+ * extent.
+ *
+ * Let us have a look at how they do without delayed extents implementation.
+ *   --	FIEMAP
+ *	FIEMAP looks up page cache to identify delayed allocations from holes.
+ *
+ *   --	SEEK_HOLE/DATA
+ *	SEEK_HOLE/DATA has the same problem as FIEMAP.
+ *
+ *   --	bigalloc
+ *	bigalloc looks up page cache to figure out if a block is
+ *	already under delayed allocation or not to determine whether
+ *	quota reserving is needed for the cluster.
+ *
+ *   -- punch hole
+ *	punch hole looks up page cache to identify a delayed extent.
+ *
+ *   --	writeout
+ *	Writeout looks up whole page cache to see if a buffer is
+ *	mapped, If there are not very many delayed buffers, then it is
+ *	time comsuming.
+ *
+ * With delayed extents implementation, FIEMAP, SEEK_HOLE/DATA,
+ * bigalloc and writeout can figure out if a block or a range of
+ * blocks is under delayed allocation(belonged to a delayed extent) or
+ * not by searching the delayed extent tree.
+ *
+ *
+ * ==========================================================================
+ * 2. ext4 delayed extents impelmentation
+ *
+ *   --	delayed extent
+ *	A delayed extent is a range of blocks which are contiguous
+ *	logically and under delayed allocation.  Unlike extent in
+ *	ext4, delayed extent in ext4 is a in-memory struct, there is
+ *	no corresponding on-disk data.  There is no limit on length of
+ *	delayed extent, so a delayed extent can contain as many blocks
+ *	as they are contiguous logically.
+ *
+ *   --	delayed extent tree
+ *	Every inode has a delayed extent tree and all under delayed
+ *	allocation blocks are added to the tree as delayed extents.
+ *	Delayed extents in the tree are ordered by logical block no.
+ *
+ *   --	operations on a delayed extent tree
+ *	There are three operations on a delayed extent tree: find next
+ *	delayed extent, adding a space(a range of blocks) and removing
+ *	a space.
+ *
+ *   --	race on a delayed extent tree
+ *	Delayed extent tree is protected inode->i_es_lock.
+ *
+ *
+ * ==========================================================================
+ * 3. performance analysis
+ *   --	overhead
+ *	1. There is a cache extent for write access, so if writes are
+ *	not very random, adding space operaions are in O(1) time.
+ *
+ *   --	gain
+ *	2. Code is much simpler, more readable, more maintainable and
+ *	more efficient.
+ *
+ *
+ * ==========================================================================
+ * 4. TODO list
+ *   -- Track all extent status
+ *
+ *   -- Improve get block process
+ *
+ *   -- Extent-level locking
+ */
+
+static struct kmem_cache *ext4_es_cachep;
+
+int __init ext4_init_es(void)
+{
+	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
+	if (ext4_es_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void ext4_exit_es(void)
+{
+	if (ext4_es_cachep)
+		kmem_cache_destroy(ext4_es_cachep);
+}
+
+void ext4_es_init_tree(struct ext4_es_tree *tree)
+{
+	tree->root = RB_ROOT;
+	tree->cache_es = NULL;
+}
+
+#ifdef ES_DEBUG__
+static void ext4_es_print_tree(struct inode *inode)
+{
+	struct ext4_es_tree *tree;
+	struct rb_node *node;
+
+	printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
+	tree = &EXT4_I(inode)->i_es_tree;
+	node = rb_first(&tree->root);
+	while (node) {
+		struct extent_status *es;
+		es = rb_entry(node, struct extent_status, rb_node);
+		printk(KERN_DEBUG " [%u/%u)", es->start, es->len);
+		node = rb_next(node);
+	}
+	printk(KERN_DEBUG "\n");
+}
+#else
+#define ext4_es_print_tree(inode)
+#endif
+
+static inline ext4_lblk_t extent_status_end(struct extent_status *es)
+{
+	BUG_ON(es->start + es->len < es->start);
+	return es->start + es->len - 1;
+}
+
+/*
+ * search through the tree for an delayed extent with a given offset.  If
+ * it can't be found, try to find next extent.
+ */
+static struct extent_status *__es_tree_search(struct rb_root *root,
+					      ext4_lblk_t offset)
+{
+	struct rb_node *node = root->rb_node;
+	struct extent_status *es = NULL;
+
+	while (node) {
+		es = rb_entry(node, struct extent_status, rb_node);
+		if (offset < es->start)
+			node = node->rb_left;
+		else if (offset > extent_status_end(es))
+			node = node->rb_right;
+		else
+			return es;
+	}
+
+	if (es && offset < es->start)
+		return es;
+
+	if (es && offset > extent_status_end(es)) {
+		node = rb_next(&es->rb_node);
+		return node ? rb_entry(node, struct extent_status, rb_node) :
+			      NULL;
+	}
+
+	return NULL;
+}
+
+/*
+ * ext4_es_find_extent: find the 1st delayed extent covering @es->start
+ * if it exists, otherwise, the next extent after @es->start.
+ *
+ * @inode: the inode which owns delayed extents
+ * @es: delayed extent that we found
+ *
+ * Returns the first block of the next extent after es, otherwise
+ * EXT_MAX_BLOCKS if no delay extent is found.
+ * Delayed extent is returned via @es.
+ */
+ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
+{
+	struct ext4_es_tree *tree = NULL;
+	struct extent_status *es1 = NULL;
+	struct rb_node *node;
+	ext4_lblk_t ret = EXT_MAX_BLOCKS;
+
+	trace_ext4_es_find_extent_enter(inode, es->start);
+
+	read_lock(&EXT4_I(inode)->i_es_lock);
+	tree = &EXT4_I(inode)->i_es_tree;
+
+	/* find delay extent in cache firstly */
+	if (tree->cache_es) {
+		es1 = tree->cache_es;
+		if (in_range(es->start, es1->start, es1->len)) {
+			es_debug("%u cached by [%u/%u)\n",
+				 es->start, es1->start, es1->len);
+			goto out;
+		}
+	}
+
+	es->len = 0;
+	es1 = __es_tree_search(&tree->root, es->start);
+
+out:
+	if (es1) {
+		tree->cache_es = es1;
+		es->start = es1->start;
+		es->len = es1->len;
+		node = rb_next(&es1->rb_node);
+		if (node) {
+			es1 = rb_entry(node, struct extent_status, rb_node);
+			ret = es1->start;
+		}
+	}
+
+	read_unlock(&EXT4_I(inode)->i_es_lock);
+
+	trace_ext4_es_find_extent_exit(inode, es, ret);
+	return ret;
+}
+
+static struct extent_status *
+ext4_es_alloc_extent(ext4_lblk_t start, ext4_lblk_t len)
+{
+	struct extent_status *es;
+	es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
+	if (es == NULL)
+		return NULL;
+	es->start = start;
+	es->len = len;
+	return es;
+}
+
+static void ext4_es_free_extent(struct extent_status *es)
+{
+	kmem_cache_free(ext4_es_cachep, es);
+}
+
+static struct extent_status *
+ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
+{
+	struct extent_status *es1;
+	struct rb_node *node;
+
+	node = rb_prev(&es->rb_node);
+	if (!node)
+		return es;
+
+	es1 = rb_entry(node, struct extent_status, rb_node);
+	if (es->start == extent_status_end(es1) + 1) {
+		es1->len += es->len;
+		rb_erase(&es->rb_node, &tree->root);
+		ext4_es_free_extent(es);
+		es = es1;
+	}
+
+	return es;
+}
+
+static struct extent_status *
+ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
+{
+	struct extent_status *es1;
+	struct rb_node *node;
+
+	node = rb_next(&es->rb_node);
+	if (!node)
+		return es;
+
+	es1 = rb_entry(node, struct extent_status, rb_node);
+	if (es1->start == extent_status_end(es) + 1) {
+		es->len += es1->len;
+		rb_erase(node, &tree->root);
+		ext4_es_free_extent(es1);
+	}
+
+	return es;
+}
+
+static int __es_insert_extent(struct ext4_es_tree *tree, ext4_lblk_t offset,
+			      ext4_lblk_t len)
+{
+	struct rb_node **p = &tree->root.rb_node;
+	struct rb_node *parent = NULL;
+	struct extent_status *es;
+	ext4_lblk_t end = offset + len - 1;
+
+	BUG_ON(end < offset);
+	es = tree->cache_es;
+	if (es && offset == (extent_status_end(es) + 1)) {
+		es_debug("cached by [%u/%u)\n", es->start, es->len);
+		es->len += len;
+		es = ext4_es_try_to_merge_right(tree, es);
+		goto out;
+	} else if (es && es->start == end + 1) {
+		es_debug("cached by [%u/%u)\n", es->start, es->len);
+		es->start = offset;
+		es->len += len;
+		es = ext4_es_try_to_merge_left(tree, es);
+		goto out;
+	} else if (es && es->start <= offset &&
+		   end <= extent_status_end(es)) {
+		es_debug("cached by [%u/%u)\n", es->start, es->len);
+		goto out;
+	}
+
+	while (*p) {
+		parent = *p;
+		es = rb_entry(parent, struct extent_status, rb_node);
+
+		if (offset < es->start) {
+			if (es->start == end + 1) {
+				es->start = offset;
+				es->len += len;
+				es = ext4_es_try_to_merge_left(tree, es);
+				goto out;
+			}
+			p = &(*p)->rb_left;
+		} else if (offset > extent_status_end(es)) {
+			if (offset == extent_status_end(es) + 1) {
+				es->len += len;
+				es = ext4_es_try_to_merge_right(tree, es);
+				goto out;
+			}
+			p = &(*p)->rb_right;
+		} else {
+			if (extent_status_end(es) <= end)
+				es->len = offset - es->start + len;
+			goto out;
+		}
+	}
+
+	es = ext4_es_alloc_extent(offset, len);
+	if (!es)
+		return -ENOMEM;
+	rb_link_node(&es->rb_node, parent, p);
+	rb_insert_color(&es->rb_node, &tree->root);
+
+out:
+	tree->cache_es = es;
+	return 0;
+}
+
+/*
+ * ext4_es_insert_extent() adds a space to a delayed extent tree.
+ * Caller holds inode->i_es_lock.
+ *
+ * ext4_es_insert_extent is called by ext4_da_write_begin and
+ * ext4_es_remove_extent.
+ *
+ * Return 0 on success, error code on failure.
+ */
+int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t offset,
+			  ext4_lblk_t len)
+{
+	struct ext4_es_tree *tree;
+	int err = 0;
+
+	trace_ext4_es_insert_extent(inode, offset, len);
+	es_debug("add [%u/%u) to extent status tree of inode %lu\n",
+		 offset, len, inode->i_ino);
+
+	write_lock(&EXT4_I(inode)->i_es_lock);
+	tree = &EXT4_I(inode)->i_es_tree;
+	err = __es_insert_extent(tree, offset, len);
+	write_unlock(&EXT4_I(inode)->i_es_lock);
+
+	ext4_es_print_tree(inode);
+
+	return err;
+}
+
+/*
+ * ext4_es_remove_extent() removes a space from a delayed extent tree.
+ * Caller holds inode->i_es_lock.
+ *
+ * Return 0 on success, error code on failure.
+ */
+int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
+			  ext4_lblk_t len)
+{
+	struct rb_node *node;
+	struct ext4_es_tree *tree;
+	struct extent_status *es;
+	struct extent_status orig_es;
+	ext4_lblk_t len1, len2, end;
+	int err = 0;
+
+	trace_ext4_es_remove_extent(inode, offset, len);
+	es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
+		 offset, len, inode->i_ino);
+
+	end = offset + len - 1;
+	BUG_ON(end < offset);
+	write_lock(&EXT4_I(inode)->i_es_lock);
+	tree = &EXT4_I(inode)->i_es_tree;
+	es = __es_tree_search(&tree->root, offset);
+	if (!es)
+		goto out;
+	if (es->start > end)
+		goto out;
+
+	/* Simply invalidate cache_es. */
+	tree->cache_es = NULL;
+
+	orig_es.start = es->start;
+	orig_es.len = es->len;
+	len1 = offset > es->start ? offset - es->start : 0;
+	len2 = extent_status_end(es) > end ?
+	       extent_status_end(es) - end : 0;
+	if (len1 > 0)
+		es->len = len1;
+	if (len2 > 0) {
+		if (len1 > 0) {
+			err = __es_insert_extent(tree, end + 1, len2);
+			if (err) {
+				es->start = orig_es.start;
+				es->len = orig_es.len;
+				goto out;
+			}
+		} else {
+			es->start = end + 1;
+			es->len = len2;
+		}
+		goto out;
+	}
+
+	if (len1 > 0) {
+		node = rb_next(&es->rb_node);
+		if (node)
+			es = rb_entry(node, struct extent_status, rb_node);
+		else
+			es = NULL;
+	}
+
+	while (es && extent_status_end(es) <= end) {
+		node = rb_next(&es->rb_node);
+		rb_erase(&es->rb_node, &tree->root);
+		ext4_es_free_extent(es);
+		if (!node) {
+			es = NULL;
+			break;
+		}
+		es = rb_entry(node, struct extent_status, rb_node);
+	}
+
+	if (es && es->start < end + 1) {
+		len1 = extent_status_end(es) - end;
+		es->start = end + 1;
+		es->len = len1;
+	}
+
+out:
+	write_unlock(&EXT4_I(inode)->i_es_lock);
+	ext4_es_print_tree(inode);
+	return err;
+}

diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
new file mode 100644
index 0000000..077f82d
--- /dev/null
+++ b/fs/ext4/extents_status.h

@@ -0,0 +1,45 @@
+/*
+ *  fs/ext4/extents_status.h
+ *
+ * Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
+ * Modified by
+ *	Allison Henderson <achender@linux.vnet.ibm.com>
+ *	Zheng Liu <wenqing.lz@taobao.com>
+ *
+ */
+
+#ifndef _EXT4_EXTENTS_STATUS_H
+#define _EXT4_EXTENTS_STATUS_H
+
+/*
+ * Turn on ES_DEBUG__ to get lots of info about extent status operations.
+ */
+#ifdef ES_DEBUG__
+#define es_debug(fmt, ...)	printk(fmt, ##__VA_ARGS__)
+#else
+#define es_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
+#endif
+
+struct extent_status {
+	struct rb_node rb_node;
+	ext4_lblk_t start;	/* first block extent covers */
+	ext4_lblk_t len;	/* length of extent in block */
+};
+
+struct ext4_es_tree {
+	struct rb_root root;
+	struct extent_status *cache_es;	/* recently accessed extent */
+};
+
+extern int __init ext4_init_es(void);
+extern void ext4_exit_es(void);
+extern void ext4_es_init_tree(struct ext4_es_tree *tree);
+
+extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t start,
+				 ext4_lblk_t len);
+extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t start,
+				 ext4_lblk_t len);
+extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
+				struct extent_status *es);
+
+#endif /* _EXT4_EXTENTS_STATUS_H */

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bf3966b..d07c27c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c

@@ -24,6 +24,7 @@
 #include <linux/mount.h>
 #include <linux/path.h>
 #include <linux/quotaops.h>
+#include <linux/pagevec.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -286,11 +287,329 @@
 }
 
 /*
+ * Here we use ext4_map_blocks() to get a block mapping for a extent-based
+ * file rather than ext4_ext_walk_space() because we can introduce
+ * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
+ * function.  When extent status tree has been fully implemented, it will
+ * track all extent status for a file and we can directly use it to
+ * retrieve the offset for SEEK_DATA/SEEK_HOLE.
+ */
+
+/*
+ * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
+ * lookup page cache to check whether or not there has some data between
+ * [startoff, endoff] because, if this range contains an unwritten extent,
+ * we determine this extent as a data or a hole according to whether the
+ * page cache has data or not.
+ */
+static int ext4_find_unwritten_pgoff(struct inode *inode,
+				     int whence,
+				     struct ext4_map_blocks *map,
+				     loff_t *offset)
+{
+	struct pagevec pvec;
+	unsigned int blkbits;
+	pgoff_t index;
+	pgoff_t end;
+	loff_t endoff;
+	loff_t startoff;
+	loff_t lastoff;
+	int found = 0;
+
+	blkbits = inode->i_sb->s_blocksize_bits;
+	startoff = *offset;
+	lastoff = startoff;
+	endoff = (map->m_lblk + map->m_len) << blkbits;
+
+	index = startoff >> PAGE_CACHE_SHIFT;
+	end = endoff >> PAGE_CACHE_SHIFT;
+
+	pagevec_init(&pvec, 0);
+	do {
+		int i, num;
+		unsigned long nr_pages;
+
+		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
+					  (pgoff_t)num);
+		if (nr_pages == 0) {
+			if (whence == SEEK_DATA)
+				break;
+
+			BUG_ON(whence != SEEK_HOLE);
+			/*
+			 * If this is the first time to go into the loop and
+			 * offset is not beyond the end offset, it will be a
+			 * hole at this offset
+			 */
+			if (lastoff == startoff || lastoff < endoff)
+				found = 1;
+			break;
+		}
+
+		/*
+		 * If this is the first time to go into the loop and
+		 * offset is smaller than the first page offset, it will be a
+		 * hole at this offset.
+		 */
+		if (lastoff == startoff && whence == SEEK_HOLE &&
+		    lastoff < page_offset(pvec.pages[0])) {
+			found = 1;
+			break;
+		}
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+			struct buffer_head *bh, *head;
+
+			/*
+			 * If the current offset is not beyond the end of given
+			 * range, it will be a hole.
+			 */
+			if (lastoff < endoff && whence == SEEK_HOLE &&
+			    page->index > end) {
+				found = 1;
+				*offset = lastoff;
+				goto out;
+			}
+
+			lock_page(page);
+
+			if (unlikely(page->mapping != inode->i_mapping)) {
+				unlock_page(page);
+				continue;
+			}
+
+			if (!page_has_buffers(page)) {
+				unlock_page(page);
+				continue;
+			}
+
+			if (page_has_buffers(page)) {
+				lastoff = page_offset(page);
+				bh = head = page_buffers(page);
+				do {
+					if (buffer_uptodate(bh) ||
+					    buffer_unwritten(bh)) {
+						if (whence == SEEK_DATA)
+							found = 1;
+					} else {
+						if (whence == SEEK_HOLE)
+							found = 1;
+					}
+					if (found) {
+						*offset = max_t(loff_t,
+							startoff, lastoff);
+						unlock_page(page);
+						goto out;
+					}
+					lastoff += bh->b_size;
+					bh = bh->b_this_page;
+				} while (bh != head);
+			}
+
+			lastoff = page_offset(page) + PAGE_SIZE;
+			unlock_page(page);
+		}
+
+		/*
+		 * The no. of pages is less than our desired, that would be a
+		 * hole in there.
+		 */
+		if (nr_pages < num && whence == SEEK_HOLE) {
+			found = 1;
+			*offset = lastoff;
+			break;
+		}
+
+		index = pvec.pages[i - 1]->index + 1;
+		pagevec_release(&pvec);
+	} while (index <= end);
+
+out:
+	pagevec_release(&pvec);
+	return found;
+}
+
+/*
+ * ext4_seek_data() retrieves the offset for SEEK_DATA.
+ */
+static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct ext4_map_blocks map;
+	struct extent_status es;
+	ext4_lblk_t start, last, end;
+	loff_t dataoff, isize;
+	int blkbits;
+	int ret = 0;
+
+	mutex_lock(&inode->i_mutex);
+
+	isize = i_size_read(inode);
+	if (offset >= isize) {
+		mutex_unlock(&inode->i_mutex);
+		return -ENXIO;
+	}
+
+	blkbits = inode->i_sb->s_blocksize_bits;
+	start = offset >> blkbits;
+	last = start;
+	end = isize >> blkbits;
+	dataoff = offset;
+
+	do {
+		map.m_lblk = last;
+		map.m_len = end - last + 1;
+		ret = ext4_map_blocks(NULL, inode, &map, 0);
+		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
+			if (last != start)
+				dataoff = last << blkbits;
+			break;
+		}
+
+		/*
+		 * If there is a delay extent at this offset,
+		 * it will be as a data.
+		 */
+		es.start = last;
+		(void)ext4_es_find_extent(inode, &es);
+		if (last >= es.start &&
+		    last < es.start + es.len) {
+			if (last != start)
+				dataoff = last << blkbits;
+			break;
+		}
+
+		/*
+		 * If there is a unwritten extent at this offset,
+		 * it will be as a data or a hole according to page
+		 * cache that has data or not.
+		 */
+		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+			int unwritten;
+			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+							      &map, &dataoff);
+			if (unwritten)
+				break;
+		}
+
+		last++;
+		dataoff = last << blkbits;
+	} while (last <= end);
+
+	mutex_unlock(&inode->i_mutex);
+
+	if (dataoff > isize)
+		return -ENXIO;
+
+	if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
+		return -EINVAL;
+	if (dataoff > maxsize)
+		return -EINVAL;
+
+	if (dataoff != file->f_pos) {
+		file->f_pos = dataoff;
+		file->f_version = 0;
+	}
+
+	return dataoff;
+}
+
+/*
+ * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
+ */
+static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct ext4_map_blocks map;
+	struct extent_status es;
+	ext4_lblk_t start, last, end;
+	loff_t holeoff, isize;
+	int blkbits;
+	int ret = 0;
+
+	mutex_lock(&inode->i_mutex);
+
+	isize = i_size_read(inode);
+	if (offset >= isize) {
+		mutex_unlock(&inode->i_mutex);
+		return -ENXIO;
+	}
+
+	blkbits = inode->i_sb->s_blocksize_bits;
+	start = offset >> blkbits;
+	last = start;
+	end = isize >> blkbits;
+	holeoff = offset;
+
+	do {
+		map.m_lblk = last;
+		map.m_len = end - last + 1;
+		ret = ext4_map_blocks(NULL, inode, &map, 0);
+		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
+			last += ret;
+			holeoff = last << blkbits;
+			continue;
+		}
+
+		/*
+		 * If there is a delay extent at this offset,
+		 * we will skip this extent.
+		 */
+		es.start = last;
+		(void)ext4_es_find_extent(inode, &es);
+		if (last >= es.start &&
+		    last < es.start + es.len) {
+			last = es.start + es.len;
+			holeoff = last << blkbits;
+			continue;
+		}
+
+		/*
+		 * If there is a unwritten extent at this offset,
+		 * it will be as a data or a hole according to page
+		 * cache that has data or not.
+		 */
+		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+			int unwritten;
+			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+							      &map, &holeoff);
+			if (!unwritten) {
+				last += ret;
+				holeoff = last << blkbits;
+				continue;
+			}
+		}
+
+		/* find a hole */
+		break;
+	} while (last <= end);
+
+	mutex_unlock(&inode->i_mutex);
+
+	if (holeoff > isize)
+		holeoff = isize;
+
+	if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
+		return -EINVAL;
+	if (holeoff > maxsize)
+		return -EINVAL;
+
+	if (holeoff != file->f_pos) {
+		file->f_pos = holeoff;
+		file->f_version = 0;
+	}
+
+	return holeoff;
+}
+
+/*
  * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
  * by calling generic_file_llseek_size() with the appropriate maxbytes
  * value for each.
  */
-loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	loff_t maxbytes;
@@ -300,8 +619,19 @@
 	else
 		maxbytes = inode->i_sb->s_maxbytes;
 
-	return generic_file_llseek_size(file, offset, origin,
-					maxbytes, i_size_read(inode));
+	switch (whence) {
+	case SEEK_SET:
+	case SEEK_CUR:
+	case SEEK_END:
+		return generic_file_llseek_size(file, offset, whence,
+						maxbytes, i_size_read(inode));
+	case SEEK_DATA:
+		return ext4_seek_data(file, offset, maxbytes);
+	case SEEK_HOLE:
+		return ext4_seek_hole(file, offset, maxbytes);
+	}
+
+	return -EINVAL;
 }
 
 const struct file_operations ext4_file_operations = {
@@ -326,12 +656,10 @@
 const struct inode_operations ext4_file_inode_operations = {
 	.setattr	= ext4_setattr,
 	.getattr	= ext4_getattr,
-#ifdef CONFIG_EXT4_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
-#endif
 	.get_acl	= ext4_get_acl,
 	.fiemap		= ext4_fiemap,
 };

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index be1d89f..dfbc1fe 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c

@@ -44,7 +44,6 @@
  */
 static int ext4_sync_parent(struct inode *inode)
 {
-	struct writeback_control wbc;
 	struct dentry *dentry = NULL;
 	struct inode *next;
 	int ret = 0;
@@ -66,10 +65,7 @@
 		ret = sync_mapping_buffers(inode->i_mapping);
 		if (ret)
 			break;
-		memset(&wbc, 0, sizeof(wbc));
-		wbc.sync_mode = WB_SYNC_ALL;
-		wbc.nr_to_write = 0;         /* only write out the inode */
-		ret = sync_inode(inode, &wbc);
+		ret = sync_inode_metadata(inode, 1);
 		if (ret)
 			break;
 	}

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3a100e7..3f32c80 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c

@@ -762,7 +762,6 @@
 
 		BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
 		err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
-		brelse(block_bitmap_bh);
 
 		/* recheck and clear flag under lock if we still need to */
 		ext4_lock_group(sb, group);
@@ -775,6 +774,7 @@
 			ext4_group_desc_csum_set(sb, group, gdp);
 		}
 		ext4_unlock_group(sb, group);
+		brelse(block_bitmap_bh);
 
 		if (err)
 			goto fail;
@@ -902,6 +902,10 @@
 
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
 
+	ei->i_inline_off = 0;
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
+		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+
 	ret = inode;
 	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);

diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 792e388..20862f9 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c

@@ -22,6 +22,7 @@
 
 #include "ext4_jbd2.h"
 #include "truncate.h"
+#include "ext4_extents.h"	/* Needed for EXT_MAX_BLOCKS */
 
 #include <trace/events/ext4.h>
 
@@ -755,8 +756,7 @@
 		partial--;
 	}
 out:
-	trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
-				map->m_pblk, map->m_len, err);
+	trace_ext4_ind_map_blocks_exit(inode, map, err);
 	return err;
 }
 
@@ -1412,6 +1412,7 @@
 	down_write(&ei->i_data_sem);
 
 	ext4_discard_preallocations(inode);
+	ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
 
 	/*
 	 * The orphan list entry will now protect us from any crash which

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
new file mode 100644
index 0000000..387c47c
--- /dev/null
+++ b/fs/ext4/inline.c

@@ -0,0 +1,1884 @@
+/*
+ * Copyright (c) 2012 Taobao.
+ * Written by Tao Ma <boyu.mt@taobao.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include "ext4_jbd2.h"
+#include "ext4.h"
+#include "xattr.h"
+#include "truncate.h"
+#include <linux/fiemap.h>
+
+#define EXT4_XATTR_SYSTEM_DATA	"data"
+#define EXT4_MIN_INLINE_DATA_SIZE	((sizeof(__le32) * EXT4_N_BLOCKS))
+#define EXT4_INLINE_DOTDOT_SIZE	4
+
+int ext4_get_inline_size(struct inode *inode)
+{
+	if (EXT4_I(inode)->i_inline_off)
+		return EXT4_I(inode)->i_inline_size;
+
+	return 0;
+}
+
+static int get_max_inline_xattr_value_size(struct inode *inode,
+					   struct ext4_iloc *iloc)
+{
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_xattr_entry *entry;
+	struct ext4_inode *raw_inode;
+	int free, min_offs;
+
+	min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
+			EXT4_GOOD_OLD_INODE_SIZE -
+			EXT4_I(inode)->i_extra_isize -
+			sizeof(struct ext4_xattr_ibody_header);
+
+	/*
+	 * We need to subtract another sizeof(__u32) since an in-inode xattr
+	 * needs an empty 4 bytes to indicate the gap between the xattr entry
+	 * and the name/value pair.
+	 */
+	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
+		return EXT4_XATTR_SIZE(min_offs -
+			EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) -
+			EXT4_XATTR_ROUND - sizeof(__u32));
+
+	raw_inode = ext4_raw_inode(iloc);
+	header = IHDR(inode, raw_inode);
+	entry = IFIRST(header);
+
+	/* Compute min_offs. */
+	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+		if (!entry->e_value_block && entry->e_value_size) {
+			size_t offs = le16_to_cpu(entry->e_value_offs);
+			if (offs < min_offs)
+				min_offs = offs;
+		}
+	}
+	free = min_offs -
+		((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
+
+	if (EXT4_I(inode)->i_inline_off) {
+		entry = (struct ext4_xattr_entry *)
+			((void *)raw_inode + EXT4_I(inode)->i_inline_off);
+
+		free += le32_to_cpu(entry->e_value_size);
+		goto out;
+	}
+
+	free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA));
+
+	if (free > EXT4_XATTR_ROUND)
+		free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND);
+	else
+		free = 0;
+
+out:
+	return free;
+}
+
+/*
+ * Get the maximum size we now can store in an inode.
+ * If we can't find the space for a xattr entry, don't use the space
+ * of the extents since we have no space to indicate the inline data.
+ */
+int ext4_get_max_inline_size(struct inode *inode)
+{
+	int error, max_inline_size;
+	struct ext4_iloc iloc;
+
+	if (EXT4_I(inode)->i_extra_isize == 0)
+		return 0;
+
+	error = ext4_get_inode_loc(inode, &iloc);
+	if (error) {
+		ext4_error_inode(inode, __func__, __LINE__, 0,
+				 "can't get inode location %lu",
+				 inode->i_ino);
+		return 0;
+	}
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	max_inline_size = get_max_inline_xattr_value_size(inode, &iloc);
+	up_read(&EXT4_I(inode)->xattr_sem);
+
+	brelse(iloc.bh);
+
+	if (!max_inline_size)
+		return 0;
+
+	return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
+}
+
+int ext4_has_inline_data(struct inode *inode)
+{
+	return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
+	       EXT4_I(inode)->i_inline_off;
+}
+
+/*
+ * this function does not take xattr_sem, which is OK because it is
+ * currently only used in a code path coming form ext4_iget, before
+ * the new inode has been unlocked
+ */
+int ext4_find_inline_data_nolock(struct inode *inode)
+{
+	struct ext4_xattr_ibody_find is = {
+		.s = { .not_found = -ENODATA, },
+	};
+	struct ext4_xattr_info i = {
+		.name_index = EXT4_XATTR_INDEX_SYSTEM,
+		.name = EXT4_XATTR_SYSTEM_DATA,
+	};
+	int error;
+
+	if (EXT4_I(inode)->i_extra_isize == 0)
+		return 0;
+
+	error = ext4_get_inode_loc(inode, &is.iloc);
+	if (error)
+		return error;
+
+	error = ext4_xattr_ibody_find(inode, &i, &is);
+	if (error)
+		goto out;
+
+	if (!is.s.not_found) {
+		EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
+					(void *)ext4_raw_inode(&is.iloc));
+		EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
+				le32_to_cpu(is.s.here->e_value_size);
+		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+	}
+out:
+	brelse(is.iloc.bh);
+	return error;
+}
+
+static int ext4_read_inline_data(struct inode *inode, void *buffer,
+				 unsigned int len,
+				 struct ext4_iloc *iloc)
+{
+	struct ext4_xattr_entry *entry;
+	struct ext4_xattr_ibody_header *header;
+	int cp_len = 0;
+	struct ext4_inode *raw_inode;
+
+	if (!len)
+		return 0;
+
+	BUG_ON(len > EXT4_I(inode)->i_inline_size);
+
+	cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ?
+			len : EXT4_MIN_INLINE_DATA_SIZE;
+
+	raw_inode = ext4_raw_inode(iloc);
+	memcpy(buffer, (void *)(raw_inode->i_block), cp_len);
+
+	len -= cp_len;
+	buffer += cp_len;
+
+	if (!len)
+		goto out;
+
+	header = IHDR(inode, raw_inode);
+	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
+					    EXT4_I(inode)->i_inline_off);
+	len = min_t(unsigned int, len,
+		    (unsigned int)le32_to_cpu(entry->e_value_size));
+
+	memcpy(buffer,
+	       (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len);
+	cp_len += len;
+
+out:
+	return cp_len;
+}
+
+/*
+ * write the buffer to the inline inode.
+ * If 'create' is set, we don't need to do the extra copy in the xattr
+ * value since it is already handled by ext4_xattr_ibody_inline_set.
+ * That saves us one memcpy.
+ */
+void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
+			    void *buffer, loff_t pos, unsigned int len)
+{
+	struct ext4_xattr_entry *entry;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_inode *raw_inode;
+	int cp_len = 0;
+
+	BUG_ON(!EXT4_I(inode)->i_inline_off);
+	BUG_ON(pos + len > EXT4_I(inode)->i_inline_size);
+
+	raw_inode = ext4_raw_inode(iloc);
+	buffer += pos;
+
+	if (pos < EXT4_MIN_INLINE_DATA_SIZE) {
+		cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ?
+			 EXT4_MIN_INLINE_DATA_SIZE - pos : len;
+		memcpy((void *)raw_inode->i_block + pos, buffer, cp_len);
+
+		len -= cp_len;
+		buffer += cp_len;
+		pos += cp_len;
+	}
+
+	if (!len)
+		return;
+
+	pos -= EXT4_MIN_INLINE_DATA_SIZE;
+	header = IHDR(inode, raw_inode);
+	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
+					    EXT4_I(inode)->i_inline_off);
+
+	memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos,
+	       buffer, len);
+}
+
+static int ext4_create_inline_data(handle_t *handle,
+				   struct inode *inode, unsigned len)
+{
+	int error;
+	void *value = NULL;
+	struct ext4_xattr_ibody_find is = {
+		.s = { .not_found = -ENODATA, },
+	};
+	struct ext4_xattr_info i = {
+		.name_index = EXT4_XATTR_INDEX_SYSTEM,
+		.name = EXT4_XATTR_SYSTEM_DATA,
+	};
+
+	error = ext4_get_inode_loc(inode, &is.iloc);
+	if (error)
+		return error;
+
+	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+	if (error)
+		goto out;
+
+	if (len > EXT4_MIN_INLINE_DATA_SIZE) {
+		value = EXT4_ZERO_XATTR_VALUE;
+		len -= EXT4_MIN_INLINE_DATA_SIZE;
+	} else {
+		value = "";
+		len = 0;
+	}
+
+	/* Insert the the xttr entry. */
+	i.value = value;
+	i.value_len = len;
+
+	error = ext4_xattr_ibody_find(inode, &i, &is);
+	if (error)
+		goto out;
+
+	BUG_ON(!is.s.not_found);
+
+	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+	if (error) {
+		if (error == -ENOSPC)
+			ext4_clear_inode_state(inode,
+					       EXT4_STATE_MAY_INLINE_DATA);
+		goto out;
+	}
+
+	memset((void *)ext4_raw_inode(&is.iloc)->i_block,
+		0, EXT4_MIN_INLINE_DATA_SIZE);
+
+	EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
+				      (void *)ext4_raw_inode(&is.iloc));
+	EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
+	ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
+	ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
+	get_bh(is.iloc.bh);
+	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
+
+out:
+	brelse(is.iloc.bh);
+	return error;
+}
+
+static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
+				   unsigned int len)
+{
+	int error;
+	void *value = NULL;
+	struct ext4_xattr_ibody_find is = {
+		.s = { .not_found = -ENODATA, },
+	};
+	struct ext4_xattr_info i = {
+		.name_index = EXT4_XATTR_INDEX_SYSTEM,
+		.name = EXT4_XATTR_SYSTEM_DATA,
+	};
+
+	/* If the old space is ok, write the data directly. */
+	if (len <= EXT4_I(inode)->i_inline_size)
+		return 0;
+
+	error = ext4_get_inode_loc(inode, &is.iloc);
+	if (error)
+		return error;
+
+	error = ext4_xattr_ibody_find(inode, &i, &is);
+	if (error)
+		goto out;
+
+	BUG_ON(is.s.not_found);
+
+	len -= EXT4_MIN_INLINE_DATA_SIZE;
+	value = kzalloc(len, GFP_NOFS);
+	if (!value)
+		goto out;
+
+	error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
+				     value, len);
+	if (error == -ENODATA)
+		goto out;
+
+	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+	if (error)
+		goto out;
+
+	/* Update the xttr entry. */
+	i.value = value;
+	i.value_len = len;
+
+	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+	if (error)
+		goto out;
+
+	EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
+				      (void *)ext4_raw_inode(&is.iloc));
+	EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
+				le32_to_cpu(is.s.here->e_value_size);
+	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+	get_bh(is.iloc.bh);
+	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
+
+out:
+	kfree(value);
+	brelse(is.iloc.bh);
+	return error;
+}
+
+int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
+			     unsigned int len)
+{
+	int ret, size;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+
+	if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
+		return -ENOSPC;
+
+	size = ext4_get_max_inline_size(inode);
+	if (size < len)
+		return -ENOSPC;
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+
+	if (ei->i_inline_off)
+		ret = ext4_update_inline_data(handle, inode, len);
+	else
+		ret = ext4_create_inline_data(handle, inode, len);
+
+	up_write(&EXT4_I(inode)->xattr_sem);
+
+	return ret;
+}
+
+static int ext4_destroy_inline_data_nolock(handle_t *handle,
+					   struct inode *inode)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_xattr_ibody_find is = {
+		.s = { .not_found = 0, },
+	};
+	struct ext4_xattr_info i = {
+		.name_index = EXT4_XATTR_INDEX_SYSTEM,
+		.name = EXT4_XATTR_SYSTEM_DATA,
+		.value = NULL,
+		.value_len = 0,
+	};
+	int error;
+
+	if (!ei->i_inline_off)
+		return 0;
+
+	error = ext4_get_inode_loc(inode, &is.iloc);
+	if (error)
+		return error;
+
+	error = ext4_xattr_ibody_find(inode, &i, &is);
+	if (error)
+		goto out;
+
+	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+	if (error)
+		goto out;
+
+	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+	if (error)
+		goto out;
+
+	memset((void *)ext4_raw_inode(&is.iloc)->i_block,
+		0, EXT4_MIN_INLINE_DATA_SIZE);
+
+	if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
+				      EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+		if (S_ISDIR(inode->i_mode) ||
+		    S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) {
+			ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
+			ext4_ext_tree_init(handle, inode);
+		}
+	}
+	ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
+
+	get_bh(is.iloc.bh);
+	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
+
+	EXT4_I(inode)->i_inline_off = 0;
+	EXT4_I(inode)->i_inline_size = 0;
+	ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+out:
+	brelse(is.iloc.bh);
+	if (error == -ENODATA)
+		error = 0;
+	return error;
+}
+
+static int ext4_read_inline_page(struct inode *inode, struct page *page)
+{
+	void *kaddr;
+	int ret = 0;
+	size_t len;
+	struct ext4_iloc iloc;
+
+	BUG_ON(!PageLocked(page));
+	BUG_ON(!ext4_has_inline_data(inode));
+	BUG_ON(page->index);
+
+	if (!EXT4_I(inode)->i_inline_off) {
+		ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.",
+			     inode->i_ino);
+		goto out;
+	}
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		goto out;
+
+	len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
+	kaddr = kmap_atomic(page);
+	ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
+	flush_dcache_page(page);
+	kunmap_atomic(kaddr);
+	zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	SetPageUptodate(page);
+	brelse(iloc.bh);
+
+out:
+	return ret;
+}
+
+int ext4_readpage_inline(struct inode *inode, struct page *page)
+{
+	int ret = 0;
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		up_read(&EXT4_I(inode)->xattr_sem);
+		return -EAGAIN;
+	}
+
+	/*
+	 * Current inline data can only exist in the 1st page,
+	 * So for all the other pages, just set them uptodate.
+	 */
+	if (!page->index)
+		ret = ext4_read_inline_page(inode, page);
+	else if (!PageUptodate(page)) {
+		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		SetPageUptodate(page);
+	}
+
+	up_read(&EXT4_I(inode)->xattr_sem);
+
+	unlock_page(page);
+	return ret >= 0 ? 0 : ret;
+}
+
+static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
+					      struct inode *inode,
+					      unsigned flags)
+{
+	int ret, needed_blocks;
+	handle_t *handle = NULL;
+	int retries = 0, sem_held = 0;
+	struct page *page = NULL;
+	unsigned from, to;
+	struct ext4_iloc iloc;
+
+	if (!ext4_has_inline_data(inode)) {
+		/*
+		 * clear the flag so that no new write
+		 * will trap here again.
+		 */
+		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+		return 0;
+	}
+
+	needed_blocks = ext4_writepage_trans_blocks(inode);
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+retry:
+	handle = ext4_journal_start(inode, needed_blocks);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		handle = NULL;
+		goto out;
+	}
+
+	/* We cannot recurse into the filesystem as the transaction is already
+	 * started */
+	flags |= AOP_FLAG_NOFS;
+
+	page = grab_cache_page_write_begin(mapping, 0, flags);
+	if (!page) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	sem_held = 1;
+	/* If some one has already done this for us, just exit. */
+	if (!ext4_has_inline_data(inode)) {
+		ret = 0;
+		goto out;
+	}
+
+	from = 0;
+	to = ext4_get_inline_size(inode);
+	if (!PageUptodate(page)) {
+		ret = ext4_read_inline_page(inode, page);
+		if (ret < 0)
+			goto out;
+	}
+
+	ret = ext4_destroy_inline_data_nolock(handle, inode);
+	if (ret)
+		goto out;
+
+	if (ext4_should_dioread_nolock(inode))
+		ret = __block_write_begin(page, from, to, ext4_get_block_write);
+	else
+		ret = __block_write_begin(page, from, to, ext4_get_block);
+
+	if (!ret && ext4_should_journal_data(inode)) {
+		ret = ext4_walk_page_buffers(handle, page_buffers(page),
+					     from, to, NULL,
+					     do_journal_get_write_access);
+	}
+
+	if (ret) {
+		unlock_page(page);
+		page_cache_release(page);
+		ext4_orphan_add(handle, inode);
+		up_write(&EXT4_I(inode)->xattr_sem);
+		sem_held = 0;
+		ext4_journal_stop(handle);
+		handle = NULL;
+		ext4_truncate_failed_write(inode);
+		/*
+		 * If truncate failed early the inode might
+		 * still be on the orphan list; we need to
+		 * make sure the inode is removed from the
+		 * orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+
+	block_commit_write(page, from, to);
+out:
+	if (page) {
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	if (sem_held)
+		up_write(&EXT4_I(inode)->xattr_sem);
+	if (handle)
+		ext4_journal_stop(handle);
+	brelse(iloc.bh);
+	return ret;
+}
+
+/*
+ * Try to write data in the inode.
+ * If the inode has inline data, check whether the new write can be
+ * in the inode also. If not, create the page the handle, move the data
+ * to the page make it update and let the later codes create extent for it.
+ */
+int ext4_try_to_write_inline_data(struct address_space *mapping,
+				  struct inode *inode,
+				  loff_t pos, unsigned len,
+				  unsigned flags,
+				  struct page **pagep)
+{
+	int ret;
+	handle_t *handle;
+	struct page *page;
+	struct ext4_iloc iloc;
+
+	if (pos + len > ext4_get_max_inline_size(inode))
+		goto convert;
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+	/*
+	 * The possible write could happen in the inode,
+	 * so try to reserve the space in inode first.
+	 */
+	handle = ext4_journal_start(inode, 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		handle = NULL;
+		goto out;
+	}
+
+	ret = ext4_prepare_inline_data(handle, inode, pos + len);
+	if (ret && ret != -ENOSPC)
+		goto out;
+
+	/* We don't have space in inline inode, so convert it to extent. */
+	if (ret == -ENOSPC) {
+		ext4_journal_stop(handle);
+		brelse(iloc.bh);
+		goto convert;
+	}
+
+	flags |= AOP_FLAG_NOFS;
+
+	page = grab_cache_page_write_begin(mapping, 0, flags);
+	if (!page) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	*pagep = page;
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		ret = 0;
+		unlock_page(page);
+		page_cache_release(page);
+		goto out_up_read;
+	}
+
+	if (!PageUptodate(page)) {
+		ret = ext4_read_inline_page(inode, page);
+		if (ret < 0)
+			goto out_up_read;
+	}
+
+	ret = 1;
+	handle = NULL;
+out_up_read:
+	up_read(&EXT4_I(inode)->xattr_sem);
+out:
+	if (handle)
+		ext4_journal_stop(handle);
+	brelse(iloc.bh);
+	return ret;
+convert:
+	return ext4_convert_inline_data_to_extent(mapping,
+						  inode, flags);
+}
+
+int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
+			       unsigned copied, struct page *page)
+{
+	int ret;
+	void *kaddr;
+	struct ext4_iloc iloc;
+
+	if (unlikely(copied < len)) {
+		if (!PageUptodate(page)) {
+			copied = 0;
+			goto out;
+		}
+	}
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret) {
+		ext4_std_error(inode->i_sb, ret);
+		copied = 0;
+		goto out;
+	}
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	BUG_ON(!ext4_has_inline_data(inode));
+
+	kaddr = kmap_atomic(page);
+	ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
+	kunmap_atomic(kaddr);
+	SetPageUptodate(page);
+	/* clear page dirty so that writepages wouldn't work for us. */
+	ClearPageDirty(page);
+
+	up_write(&EXT4_I(inode)->xattr_sem);
+	brelse(iloc.bh);
+out:
+	return copied;
+}
+
+struct buffer_head *
+ext4_journalled_write_inline_data(struct inode *inode,
+				  unsigned len,
+				  struct page *page)
+{
+	int ret;
+	void *kaddr;
+	struct ext4_iloc iloc;
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret) {
+		ext4_std_error(inode->i_sb, ret);
+		return NULL;
+	}
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	kaddr = kmap_atomic(page);
+	ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
+	kunmap_atomic(kaddr);
+	up_write(&EXT4_I(inode)->xattr_sem);
+
+	return iloc.bh;
+}
+
+/*
+ * Try to make the page cache and handle ready for the inline data case.
+ * We can call this function in 2 cases:
+ * 1. The inode is created and the first write exceeds inline size. We can
+ *    clear the inode state safely.
+ * 2. The inode has inline data, then we need to read the data, make it
+ *    update and dirty so that ext4_da_writepages can handle it. We don't
+ *    need to start the journal since the file's metatdata isn't changed now.
+ */
+static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
+						 struct inode *inode,
+						 unsigned flags,
+						 void **fsdata)
+{
+	int ret = 0, inline_size;
+	struct page *page;
+
+	page = grab_cache_page_write_begin(mapping, 0, flags);
+	if (!page)
+		return -ENOMEM;
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+		goto out;
+	}
+
+	inline_size = ext4_get_inline_size(inode);
+
+	if (!PageUptodate(page)) {
+		ret = ext4_read_inline_page(inode, page);
+		if (ret < 0)
+			goto out;
+	}
+
+	ret = __block_write_begin(page, 0, inline_size,
+				  ext4_da_get_block_prep);
+	if (ret) {
+		ext4_truncate_failed_write(inode);
+		goto out;
+	}
+
+	SetPageDirty(page);
+	SetPageUptodate(page);
+	ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+	*fsdata = (void *)CONVERT_INLINE_DATA;
+
+out:
+	up_read(&EXT4_I(inode)->xattr_sem);
+	if (page) {
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	return ret;
+}
+
+/*
+ * Prepare the write for the inline data.
+ * If the the data can be written into the inode, we just read
+ * the page and make it uptodate, and start the journal.
+ * Otherwise read the page, makes it dirty so that it can be
+ * handle in writepages(the i_disksize update is left to the
+ * normal ext4_da_write_end).
+ */
+int ext4_da_write_inline_data_begin(struct address_space *mapping,
+				    struct inode *inode,
+				    loff_t pos, unsigned len,
+				    unsigned flags,
+				    struct page **pagep,
+				    void **fsdata)
+{
+	int ret, inline_size;
+	handle_t *handle;
+	struct page *page;
+	struct ext4_iloc iloc;
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+	handle = ext4_journal_start(inode, 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		handle = NULL;
+		goto out;
+	}
+
+	inline_size = ext4_get_max_inline_size(inode);
+
+	ret = -ENOSPC;
+	if (inline_size >= pos + len) {
+		ret = ext4_prepare_inline_data(handle, inode, pos + len);
+		if (ret && ret != -ENOSPC)
+			goto out;
+	}
+
+	if (ret == -ENOSPC) {
+		ret = ext4_da_convert_inline_data_to_extent(mapping,
+							    inode,
+							    flags,
+							    fsdata);
+		goto out;
+	}
+
+	/*
+	 * We cannot recurse into the filesystem as the transaction
+	 * is already started.
+	 */
+	flags |= AOP_FLAG_NOFS;
+
+	page = grab_cache_page_write_begin(mapping, 0, flags);
+	if (!page) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		ret = 0;
+		goto out_release_page;
+	}
+
+	if (!PageUptodate(page)) {
+		ret = ext4_read_inline_page(inode, page);
+		if (ret < 0)
+			goto out_release_page;
+	}
+
+	up_read(&EXT4_I(inode)->xattr_sem);
+	*pagep = page;
+	handle = NULL;
+	brelse(iloc.bh);
+	return 1;
+out_release_page:
+	up_read(&EXT4_I(inode)->xattr_sem);
+	unlock_page(page);
+	page_cache_release(page);
+out:
+	if (handle)
+		ext4_journal_stop(handle);
+	brelse(iloc.bh);
+	return ret;
+}
+
+int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
+				  unsigned len, unsigned copied,
+				  struct page *page)
+{
+	int i_size_changed = 0;
+
+	copied = ext4_write_inline_data_end(inode, pos, len, copied, page);
+
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_mutex.
+	 *
+	 * But it's important to update i_size while still holding page lock:
+	 * page writeout could otherwise come in and zero beyond i_size.
+	 */
+	if (pos+copied > inode->i_size) {
+		i_size_write(inode, pos+copied);
+		i_size_changed = 1;
+	}
+	unlock_page(page);
+	page_cache_release(page);
+
+	/*
+	 * Don't mark the inode dirty under page lock. First, it unnecessarily
+	 * makes the holding time of page lock longer. Second, it forces lock
+	 * ordering of page lock and transaction start for journaling
+	 * filesystems.
+	 */
+	if (i_size_changed)
+		mark_inode_dirty(inode);
+
+	return copied;
+}
+
+#ifdef INLINE_DIR_DEBUG
+void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
+			  void *inline_start, int inline_size)
+{
+	int offset;
+	unsigned short de_len;
+	struct ext4_dir_entry_2 *de = inline_start;
+	void *dlimit = inline_start + inline_size;
+
+	trace_printk("inode %lu\n", dir->i_ino);
+	offset = 0;
+	while ((void *)de < dlimit) {
+		de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
+		trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n",
+			     offset, de_len, de->name_len, de->name,
+			     de->name_len, le32_to_cpu(de->inode));
+		if (ext4_check_dir_entry(dir, NULL, de, bh,
+					 inline_start, inline_size, offset))
+			BUG();
+
+		offset += de_len;
+		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
+	}
+}
+#else
+#define ext4_show_inline_dir(dir, bh, inline_start, inline_size)
+#endif
+
+/*
+ * Add a new entry into a inline dir.
+ * It will return -ENOSPC if no space is available, and -EIO
+ * and -EEXIST if directory entry already exists.
+ */
+static int ext4_add_dirent_to_inline(handle_t *handle,
+				     struct dentry *dentry,
+				     struct inode *inode,
+				     struct ext4_iloc *iloc,
+				     void *inline_start, int inline_size)
+{
+	struct inode	*dir = dentry->d_parent->d_inode;
+	const char	*name = dentry->d_name.name;
+	int		namelen = dentry->d_name.len;
+	unsigned short	reclen;
+	int		err;
+	struct ext4_dir_entry_2 *de;
+
+	reclen = EXT4_DIR_REC_LEN(namelen);
+	err = ext4_find_dest_de(dir, inode, iloc->bh,
+				inline_start, inline_size,
+				name, namelen, &de);
+	if (err)
+		return err;
+
+	err = ext4_journal_get_write_access(handle, iloc->bh);
+	if (err)
+		return err;
+	ext4_insert_dentry(inode, de, inline_size, name, namelen);
+
+	ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
+
+	/*
+	 * XXX shouldn't update any times until successful
+	 * completion of syscall, but too many callers depend
+	 * on this.
+	 *
+	 * XXX similarly, too many callers depend on
+	 * ext4_new_inode() setting the times, but error
+	 * recovery deletes the inode, so the worst that can
+	 * happen is that the times are slightly out of date
+	 * and/or different from the directory change time.
+	 */
+	dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
+	ext4_update_dx_flag(dir);
+	dir->i_version++;
+	ext4_mark_inode_dirty(handle, dir);
+	return 1;
+}
+
+static void *ext4_get_inline_xattr_pos(struct inode *inode,
+				       struct ext4_iloc *iloc)
+{
+	struct ext4_xattr_entry *entry;
+	struct ext4_xattr_ibody_header *header;
+
+	BUG_ON(!EXT4_I(inode)->i_inline_off);
+
+	header = IHDR(inode, ext4_raw_inode(iloc));
+	entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) +
+					    EXT4_I(inode)->i_inline_off);
+
+	return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs);
+}
+
+/* Set the final de to cover the whole block. */
+static void ext4_update_final_de(void *de_buf, int old_size, int new_size)
+{
+	struct ext4_dir_entry_2 *de, *prev_de;
+	void *limit;
+	int de_len;
+
+	de = (struct ext4_dir_entry_2 *)de_buf;
+	if (old_size) {
+		limit = de_buf + old_size;
+		do {
+			prev_de = de;
+			de_len = ext4_rec_len_from_disk(de->rec_len, old_size);
+			de_buf += de_len;
+			de = (struct ext4_dir_entry_2 *)de_buf;
+		} while (de_buf < limit);
+
+		prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size -
+							old_size, new_size);
+	} else {
+		/* this is just created, so create an empty entry. */
+		de->inode = 0;
+		de->rec_len = ext4_rec_len_to_disk(new_size, new_size);
+	}
+}
+
+static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
+				  struct ext4_iloc *iloc)
+{
+	int ret;
+	int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
+	int new_size = get_max_inline_xattr_value_size(dir, iloc);
+
+	if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
+		return -ENOSPC;
+
+	ret = ext4_update_inline_data(handle, dir,
+				      new_size + EXT4_MIN_INLINE_DATA_SIZE);
+	if (ret)
+		return ret;
+
+	ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size,
+			     EXT4_I(dir)->i_inline_size -
+						EXT4_MIN_INLINE_DATA_SIZE);
+	dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size;
+	return 0;
+}
+
+static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
+				     struct ext4_iloc *iloc,
+				     void *buf, int inline_size)
+{
+	ext4_create_inline_data(handle, inode, inline_size);
+	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
+	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+}
+
+static int ext4_finish_convert_inline_dir(handle_t *handle,
+					  struct inode *inode,
+					  struct buffer_head *dir_block,
+					  void *buf,
+					  int inline_size)
+{
+	int err, csum_size = 0, header_size = 0;
+	struct ext4_dir_entry_2 *de;
+	struct ext4_dir_entry_tail *t;
+	void *target = dir_block->b_data;
+
+	/*
+	 * First create "." and ".." and then copy the dir information
+	 * back to the block.
+	 */
+	de = (struct ext4_dir_entry_2 *)target;
+	de = ext4_init_dot_dotdot(inode, de,
+		inode->i_sb->s_blocksize, csum_size,
+		le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1);
+	header_size = (void *)de - target;
+
+	memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
+		inline_size - EXT4_INLINE_DOTDOT_SIZE);
+
+	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+		csum_size = sizeof(struct ext4_dir_entry_tail);
+
+	inode->i_size = inode->i_sb->s_blocksize;
+	i_size_write(inode, inode->i_sb->s_blocksize);
+	EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+	ext4_update_final_de(dir_block->b_data,
+			inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
+			inode->i_sb->s_blocksize - csum_size);
+
+	if (csum_size) {
+		t = EXT4_DIRENT_TAIL(dir_block->b_data,
+				     inode->i_sb->s_blocksize);
+		initialize_dirent_tail(t, inode->i_sb->s_blocksize);
+	}
+	set_buffer_uptodate(dir_block);
+	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+	if (err)
+		goto out;
+	set_buffer_verified(dir_block);
+out:
+	return err;
+}
+
+static int ext4_convert_inline_data_nolock(handle_t *handle,
+					   struct inode *inode,
+					   struct ext4_iloc *iloc)
+{
+	int error;
+	void *buf = NULL;
+	struct buffer_head *data_bh = NULL;
+	struct ext4_map_blocks map;
+	int inline_size;
+
+	inline_size = ext4_get_inline_size(inode);
+	buf = kmalloc(inline_size, GFP_NOFS);
+	if (!buf) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	error = ext4_read_inline_data(inode, buf, inline_size, iloc);
+	if (error < 0)
+		goto out;
+
+	error = ext4_destroy_inline_data_nolock(handle, inode);
+	if (error)
+		goto out;
+
+	map.m_lblk = 0;
+	map.m_len = 1;
+	map.m_flags = 0;
+	error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE);
+	if (error < 0)
+		goto out_restore;
+	if (!(map.m_flags & EXT4_MAP_MAPPED)) {
+		error = -EIO;
+		goto out_restore;
+	}
+
+	data_bh = sb_getblk(inode->i_sb, map.m_pblk);
+	if (!data_bh) {
+		error = -EIO;
+		goto out_restore;
+	}
+
+	lock_buffer(data_bh);
+	error = ext4_journal_get_create_access(handle, data_bh);
+	if (error) {
+		unlock_buffer(data_bh);
+		error = -EIO;
+		goto out_restore;
+	}
+	memset(data_bh->b_data, 0, inode->i_sb->s_blocksize);
+
+	if (!S_ISDIR(inode->i_mode)) {
+		memcpy(data_bh->b_data, buf, inline_size);
+		set_buffer_uptodate(data_bh);
+		error = ext4_handle_dirty_metadata(handle,
+						   inode, data_bh);
+	} else {
+		error = ext4_finish_convert_inline_dir(handle, inode, data_bh,
+						       buf, inline_size);
+	}
+
+	unlock_buffer(data_bh);
+out_restore:
+	if (error)
+		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
+
+out:
+	brelse(data_bh);
+	kfree(buf);
+	return error;
+}
+
+/*
+ * Try to add the new entry to the inline data.
+ * If succeeds, return 0. If not, extended the inline dir and copied data to
+ * the new created block.
+ */
+int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
+			      struct inode *inode)
+{
+	int ret, inline_size;
+	void *inline_start;
+	struct ext4_iloc iloc;
+	struct inode *dir = dentry->d_parent->d_inode;
+
+	ret = ext4_get_inode_loc(dir, &iloc);
+	if (ret)
+		return ret;
+
+	down_write(&EXT4_I(dir)->xattr_sem);
+	if (!ext4_has_inline_data(dir))
+		goto out;
+
+	inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
+						 EXT4_INLINE_DOTDOT_SIZE;
+	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
+
+	ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
+					inline_start, inline_size);
+	if (ret != -ENOSPC)
+		goto out;
+
+	/* check whether it can be inserted to inline xattr space. */
+	inline_size = EXT4_I(dir)->i_inline_size -
+			EXT4_MIN_INLINE_DATA_SIZE;
+	if (!inline_size) {
+		/* Try to use the xattr space.*/
+		ret = ext4_update_inline_dir(handle, dir, &iloc);
+		if (ret && ret != -ENOSPC)
+			goto out;
+
+		inline_size = EXT4_I(dir)->i_inline_size -
+				EXT4_MIN_INLINE_DATA_SIZE;
+	}
+
+	if (inline_size) {
+		inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
+
+		ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
+						inline_start, inline_size);
+
+		if (ret != -ENOSPC)
+			goto out;
+	}
+
+	/*
+	 * The inline space is filled up, so create a new block for it.
+	 * As the extent tree will be created, we have to save the inline
+	 * dir first.
+	 */
+	ret = ext4_convert_inline_data_nolock(handle, dir, &iloc);
+
+out:
+	ext4_mark_inode_dirty(handle, dir);
+	up_write(&EXT4_I(dir)->xattr_sem);
+	brelse(iloc.bh);
+	return ret;
+}
+
+int ext4_read_inline_dir(struct file *filp,
+			 void *dirent, filldir_t filldir,
+			 int *has_inline_data)
+{
+	int error = 0;
+	unsigned int offset, parent_ino;
+	int i, stored;
+	struct ext4_dir_entry_2 *de;
+	struct super_block *sb;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret, inline_size = 0;
+	struct ext4_iloc iloc;
+	void *dir_buf = NULL;
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		up_read(&EXT4_I(inode)->xattr_sem);
+		*has_inline_data = 0;
+		goto out;
+	}
+
+	inline_size = ext4_get_inline_size(inode);
+	dir_buf = kmalloc(inline_size, GFP_NOFS);
+	if (!dir_buf) {
+		ret = -ENOMEM;
+		up_read(&EXT4_I(inode)->xattr_sem);
+		goto out;
+	}
+
+	ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
+	up_read(&EXT4_I(inode)->xattr_sem);
+	if (ret < 0)
+		goto out;
+
+	sb = inode->i_sb;
+	stored = 0;
+	parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
+
+	while (!error && !stored && filp->f_pos < inode->i_size) {
+revalidate:
+		/*
+		 * If the version has changed since the last call to
+		 * readdir(2), then we might be pointing to an invalid
+		 * dirent right now.  Scan from the start of the inline
+		 * dir to make sure.
+		 */
+		if (filp->f_version != inode->i_version) {
+			for (i = 0;
+			     i < inode->i_size && i < offset;) {
+				if (!i) {
+					/* skip "." and ".." if needed. */
+					i += EXT4_INLINE_DOTDOT_SIZE;
+					continue;
+				}
+				de = (struct ext4_dir_entry_2 *)
+					(dir_buf + i);
+				/* It's too expensive to do a full
+				 * dirent test each time round this
+				 * loop, but we do have to test at
+				 * least that it is non-zero.  A
+				 * failure will be detected in the
+				 * dirent test below. */
+				if (ext4_rec_len_from_disk(de->rec_len,
+					inline_size) < EXT4_DIR_REC_LEN(1))
+					break;
+				i += ext4_rec_len_from_disk(de->rec_len,
+							    inline_size);
+			}
+			offset = i;
+			filp->f_pos = offset;
+			filp->f_version = inode->i_version;
+		}
+
+		while (!error && filp->f_pos < inode->i_size) {
+			if (filp->f_pos == 0) {
+				error = filldir(dirent, ".", 1, 0, inode->i_ino,
+						DT_DIR);
+				if (error)
+					break;
+				stored++;
+
+				error = filldir(dirent, "..", 2, 0, parent_ino,
+						DT_DIR);
+				if (error)
+					break;
+				stored++;
+
+				filp->f_pos = offset = EXT4_INLINE_DOTDOT_SIZE;
+				continue;
+			}
+
+			de = (struct ext4_dir_entry_2 *)(dir_buf + offset);
+			if (ext4_check_dir_entry(inode, filp, de,
+						 iloc.bh, dir_buf,
+						 inline_size, offset)) {
+				ret = stored;
+				goto out;
+			}
+			offset += ext4_rec_len_from_disk(de->rec_len,
+							 inline_size);
+			if (le32_to_cpu(de->inode)) {
+				/* We might block in the next section
+				 * if the data destination is
+				 * currently swapped out.  So, use a
+				 * version stamp to detect whether or
+				 * not the directory has been modified
+				 * during the copy operation.
+				 */
+				u64 version = filp->f_version;
+
+				error = filldir(dirent, de->name,
+						de->name_len,
+						filp->f_pos,
+						le32_to_cpu(de->inode),
+						get_dtype(sb, de->file_type));
+				if (error)
+					break;
+				if (version != filp->f_version)
+					goto revalidate;
+				stored++;
+			}
+			filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
+							      inline_size);
+		}
+		offset = 0;
+	}
+out:
+	kfree(dir_buf);
+	brelse(iloc.bh);
+	return ret;
+}
+
+struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
+					struct ext4_dir_entry_2 **parent_de,
+					int *retval)
+{
+	struct ext4_iloc iloc;
+
+	*retval = ext4_get_inode_loc(inode, &iloc);
+	if (*retval)
+		return NULL;
+
+	*parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
+
+	return iloc.bh;
+}
+
+/*
+ * Try to create the inline data for the new dir.
+ * If it succeeds, return 0, otherwise return the error.
+ * In case of ENOSPC, the caller should create the normal disk layout dir.
+ */
+int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
+			       struct inode *inode)
+{
+	int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE;
+	struct ext4_iloc iloc;
+	struct ext4_dir_entry_2 *de;
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+	ret = ext4_prepare_inline_data(handle, inode, inline_size);
+	if (ret)
+		goto out;
+
+	/*
+	 * For inline dir, we only save the inode information for the ".."
+	 * and create a fake dentry to cover the left space.
+	 */
+	de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
+	de->inode = cpu_to_le32(parent->i_ino);
+	de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE);
+	de->inode = 0;
+	de->rec_len = ext4_rec_len_to_disk(
+				inline_size - EXT4_INLINE_DOTDOT_SIZE,
+				inline_size);
+	set_nlink(inode, 2);
+	inode->i_size = EXT4_I(inode)->i_disksize = inline_size;
+out:
+	brelse(iloc.bh);
+	return ret;
+}
+
+struct buffer_head *ext4_find_inline_entry(struct inode *dir,
+					const struct qstr *d_name,
+					struct ext4_dir_entry_2 **res_dir,
+					int *has_inline_data)
+{
+	int ret;
+	struct ext4_iloc iloc;
+	void *inline_start;
+	int inline_size;
+
+	if (ext4_get_inode_loc(dir, &iloc))
+		return NULL;
+
+	down_read(&EXT4_I(dir)->xattr_sem);
+	if (!ext4_has_inline_data(dir)) {
+		*has_inline_data = 0;
+		goto out;
+	}
+
+	inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
+						EXT4_INLINE_DOTDOT_SIZE;
+	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
+	ret = search_dir(iloc.bh, inline_start, inline_size,
+			 dir, d_name, 0, res_dir);
+	if (ret == 1)
+		goto out_find;
+	if (ret < 0)
+		goto out;
+
+	if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE)
+		goto out;
+
+	inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
+	inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
+
+	ret = search_dir(iloc.bh, inline_start, inline_size,
+			 dir, d_name, 0, res_dir);
+	if (ret == 1)
+		goto out_find;
+
+out:
+	brelse(iloc.bh);
+	iloc.bh = NULL;
+out_find:
+	up_read(&EXT4_I(dir)->xattr_sem);
+	return iloc.bh;
+}
+
+int ext4_delete_inline_entry(handle_t *handle,
+			     struct inode *dir,
+			     struct ext4_dir_entry_2 *de_del,
+			     struct buffer_head *bh,
+			     int *has_inline_data)
+{
+	int err, inline_size;
+	struct ext4_iloc iloc;
+	void *inline_start;
+
+	err = ext4_get_inode_loc(dir, &iloc);
+	if (err)
+		return err;
+
+	down_write(&EXT4_I(dir)->xattr_sem);
+	if (!ext4_has_inline_data(dir)) {
+		*has_inline_data = 0;
+		goto out;
+	}
+
+	if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) <
+		EXT4_MIN_INLINE_DATA_SIZE) {
+		inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
+					EXT4_INLINE_DOTDOT_SIZE;
+		inline_size = EXT4_MIN_INLINE_DATA_SIZE -
+				EXT4_INLINE_DOTDOT_SIZE;
+	} else {
+		inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
+		inline_size = ext4_get_inline_size(dir) -
+				EXT4_MIN_INLINE_DATA_SIZE;
+	}
+
+	err = ext4_journal_get_write_access(handle, bh);
+	if (err)
+		goto out;
+
+	err = ext4_generic_delete_entry(handle, dir, de_del, bh,
+					inline_start, inline_size, 0);
+	if (err)
+		goto out;
+
+	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+	err = ext4_mark_inode_dirty(handle, dir);
+	if (unlikely(err))
+		goto out;
+
+	ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
+out:
+	up_write(&EXT4_I(dir)->xattr_sem);
+	brelse(iloc.bh);
+	if (err != -ENOENT)
+		ext4_std_error(dir->i_sb, err);
+	return err;
+}
+
+/*
+ * Get the inline dentry at offset.
+ */
+static inline struct ext4_dir_entry_2 *
+ext4_get_inline_entry(struct inode *inode,
+		      struct ext4_iloc *iloc,
+		      unsigned int offset,
+		      void **inline_start,
+		      int *inline_size)
+{
+	void *inline_pos;
+
+	BUG_ON(offset > ext4_get_inline_size(inode));
+
+	if (offset < EXT4_MIN_INLINE_DATA_SIZE) {
+		inline_pos = (void *)ext4_raw_inode(iloc)->i_block;
+		*inline_size = EXT4_MIN_INLINE_DATA_SIZE;
+	} else {
+		inline_pos = ext4_get_inline_xattr_pos(inode, iloc);
+		offset -= EXT4_MIN_INLINE_DATA_SIZE;
+		*inline_size = ext4_get_inline_size(inode) -
+				EXT4_MIN_INLINE_DATA_SIZE;
+	}
+
+	if (inline_start)
+		*inline_start = inline_pos;
+	return (struct ext4_dir_entry_2 *)(inline_pos + offset);
+}
+
+int empty_inline_dir(struct inode *dir, int *has_inline_data)
+{
+	int err, inline_size;
+	struct ext4_iloc iloc;
+	void *inline_pos;
+	unsigned int offset;
+	struct ext4_dir_entry_2 *de;
+	int ret = 1;
+
+	err = ext4_get_inode_loc(dir, &iloc);
+	if (err) {
+		EXT4_ERROR_INODE(dir, "error %d getting inode %lu block",
+				 err, dir->i_ino);
+		return 1;
+	}
+
+	down_read(&EXT4_I(dir)->xattr_sem);
+	if (!ext4_has_inline_data(dir)) {
+		*has_inline_data = 0;
+		goto out;
+	}
+
+	de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
+	if (!le32_to_cpu(de->inode)) {
+		ext4_warning(dir->i_sb,
+			     "bad inline directory (dir #%lu) - no `..'",
+			     dir->i_ino);
+		ret = 1;
+		goto out;
+	}
+
+	offset = EXT4_INLINE_DOTDOT_SIZE;
+	while (offset < dir->i_size) {
+		de = ext4_get_inline_entry(dir, &iloc, offset,
+					   &inline_pos, &inline_size);
+		if (ext4_check_dir_entry(dir, NULL, de,
+					 iloc.bh, inline_pos,
+					 inline_size, offset)) {
+			ext4_warning(dir->i_sb,
+				     "bad inline directory (dir #%lu) - "
+				     "inode %u, rec_len %u, name_len %d"
+				     "inline size %d\n",
+				     dir->i_ino, le32_to_cpu(de->inode),
+				     le16_to_cpu(de->rec_len), de->name_len,
+				     inline_size);
+			ret = 1;
+			goto out;
+		}
+		if (le32_to_cpu(de->inode)) {
+			ret = 0;
+			goto out;
+		}
+		offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
+	}
+
+out:
+	up_read(&EXT4_I(dir)->xattr_sem);
+	brelse(iloc.bh);
+	return ret;
+}
+
+int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
+{
+	int ret;
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	ret = ext4_destroy_inline_data_nolock(handle, inode);
+	up_write(&EXT4_I(inode)->xattr_sem);
+
+	return ret;
+}
+
+int ext4_inline_data_fiemap(struct inode *inode,
+			    struct fiemap_extent_info *fieinfo,
+			    int *has_inline)
+{
+	__u64 physical = 0;
+	__u64 length;
+	__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST;
+	int error = 0;
+	struct ext4_iloc iloc;
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		*has_inline = 0;
+		goto out;
+	}
+
+	error = ext4_get_inode_loc(inode, &iloc);
+	if (error)
+		goto out;
+
+	physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
+	physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
+	physical += offsetof(struct ext4_inode, i_block);
+	length = i_size_read(inode);
+
+	if (physical)
+		error = fiemap_fill_next_extent(fieinfo, 0, physical,
+						length, flags);
+	brelse(iloc.bh);
+out:
+	up_read(&EXT4_I(inode)->xattr_sem);
+	return (error < 0 ? error : 0);
+}
+
+/*
+ * Called during xattr set, and if we can sparse space 'needed',
+ * just create the extent tree evict the data to the outer block.
+ *
+ * We use jbd2 instead of page cache to move data to the 1st block
+ * so that the whole transaction can be committed as a whole and
+ * the data isn't lost because of the delayed page cache write.
+ */
+int ext4_try_to_evict_inline_data(handle_t *handle,
+				  struct inode *inode,
+				  int needed)
+{
+	int error;
+	struct ext4_xattr_entry *entry;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_inode *raw_inode;
+	struct ext4_iloc iloc;
+
+	error = ext4_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+
+	raw_inode = ext4_raw_inode(&iloc);
+	header = IHDR(inode, raw_inode);
+	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
+					    EXT4_I(inode)->i_inline_off);
+	if (EXT4_XATTR_LEN(entry->e_name_len) +
+	    EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) {
+		error = -ENOSPC;
+		goto out;
+	}
+
+	error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
+out:
+	brelse(iloc.bh);
+	return error;
+}
+
+void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
+{
+	handle_t *handle;
+	int inline_size, value_len, needed_blocks;
+	size_t i_size;
+	void *value = NULL;
+	struct ext4_xattr_ibody_find is = {
+		.s = { .not_found = -ENODATA, },
+	};
+	struct ext4_xattr_info i = {
+		.name_index = EXT4_XATTR_INDEX_SYSTEM,
+		.name = EXT4_XATTR_SYSTEM_DATA,
+	};
+
+
+	needed_blocks = ext4_writepage_trans_blocks(inode);
+	handle = ext4_journal_start(inode, needed_blocks);
+	if (IS_ERR(handle))
+		return;
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		*has_inline = 0;
+		ext4_journal_stop(handle);
+		return;
+	}
+
+	if (ext4_orphan_add(handle, inode))
+		goto out;
+
+	if (ext4_get_inode_loc(inode, &is.iloc))
+		goto out;
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+	i_size = inode->i_size;
+	inline_size = ext4_get_inline_size(inode);
+	EXT4_I(inode)->i_disksize = i_size;
+
+	if (i_size < inline_size) {
+		/* Clear the content in the xattr space. */
+		if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) {
+			if (ext4_xattr_ibody_find(inode, &i, &is))
+				goto out_error;
+
+			BUG_ON(is.s.not_found);
+
+			value_len = le32_to_cpu(is.s.here->e_value_size);
+			value = kmalloc(value_len, GFP_NOFS);
+			if (!value)
+				goto out_error;
+
+			if (ext4_xattr_ibody_get(inode, i.name_index, i.name,
+						value, value_len))
+				goto out_error;
+
+			i.value = value;
+			i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
+					i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
+			if (ext4_xattr_ibody_inline_set(handle, inode, &i, &is))
+				goto out_error;
+		}
+
+		/* Clear the content within i_blocks. */
+		if (i_size < EXT4_MIN_INLINE_DATA_SIZE)
+			memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0,
+					EXT4_MIN_INLINE_DATA_SIZE - i_size);
+
+		EXT4_I(inode)->i_inline_size = i_size <
+					EXT4_MIN_INLINE_DATA_SIZE ?
+					EXT4_MIN_INLINE_DATA_SIZE : i_size;
+	}
+
+out_error:
+	up_write(&EXT4_I(inode)->i_data_sem);
+out:
+	brelse(is.iloc.bh);
+	up_write(&EXT4_I(inode)->xattr_sem);
+	kfree(value);
+	if (inode->i_nlink)
+		ext4_orphan_del(handle, inode);
+
+	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+	ext4_mark_inode_dirty(handle, inode);
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+
+	ext4_journal_stop(handle);
+	return;
+}
+
+int ext4_convert_inline_data(struct inode *inode)
+{
+	int error, needed_blocks;
+	handle_t *handle;
+	struct ext4_iloc iloc;
+
+	if (!ext4_has_inline_data(inode)) {
+		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+		return 0;
+	}
+
+	needed_blocks = ext4_writepage_trans_blocks(inode);
+
+	iloc.bh = NULL;
+	error = ext4_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+
+	handle = ext4_journal_start(inode, needed_blocks);
+	if (IS_ERR(handle)) {
+		error = PTR_ERR(handle);
+		goto out_free;
+	}
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		up_write(&EXT4_I(inode)->xattr_sem);
+		goto out;
+	}
+
+	error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
+	up_write(&EXT4_I(inode)->xattr_sem);
+out:
+	ext4_journal_stop(handle);
+out_free:
+	brelse(iloc.bh);
+	return error;
+}

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b3c243b..cb1c1ab 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -484,49 +484,6 @@
 }
 
 /*
- * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
- */
-static void set_buffers_da_mapped(struct inode *inode,
-				   struct ext4_map_blocks *map)
-{
-	struct address_space *mapping = inode->i_mapping;
-	struct pagevec pvec;
-	int i, nr_pages;
-	pgoff_t index, end;
-
-	index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	end = (map->m_lblk + map->m_len - 1) >>
-		(PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-	pagevec_init(&pvec, 0);
-	while (index <= end) {
-		nr_pages = pagevec_lookup(&pvec, mapping, index,
-					  min(end - index + 1,
-					      (pgoff_t)PAGEVEC_SIZE));
-		if (nr_pages == 0)
-			break;
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-			struct buffer_head *bh, *head;
-
-			if (unlikely(page->mapping != mapping) ||
-			    !PageDirty(page))
-				break;
-
-			if (page_has_buffers(page)) {
-				bh = head = page_buffers(page);
-				do {
-					set_buffer_da_mapped(bh);
-					bh = bh->b_this_page;
-				} while (bh != head);
-			}
-			index++;
-		}
-		pagevec_release(&pvec);
-	}
-}
-
-/*
  * The ext4_map_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
  *
@@ -574,7 +531,16 @@
 		up_read((&EXT4_I(inode)->i_data_sem));
 
 	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-		int ret = check_block_validity(inode, map);
+		int ret;
+		if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+			/* delayed alloc may be allocated by fallocate and
+			 * coverted to initialized by directIO.
+			 * we need to handle delayed extent here.
+			 */
+			down_write((&EXT4_I(inode)->i_data_sem));
+			goto delayed_mapped;
+		}
+		ret = check_block_validity(inode, map);
 		if (ret != 0)
 			return ret;
 	}
@@ -652,12 +618,15 @@
 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
 		ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
-		/* If we have successfully mapped the delayed allocated blocks,
-		 * set the BH_Da_Mapped bit on them. Its important to do this
-		 * under the protection of i_data_sem.
-		 */
-		if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
-			set_buffers_da_mapped(inode, map);
+		if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+			int ret;
+delayed_mapped:
+			/* delayed allocation blocks has been allocated */
+			ret = ext4_es_remove_extent(inode, map->m_lblk,
+						    map->m_len);
+			if (ret < 0)
+				retval = ret;
+		}
 	}
 
 	up_write((&EXT4_I(inode)->i_data_sem));
@@ -680,10 +649,13 @@
 	int ret = 0, started = 0;
 	int dio_credits;
 
+	if (ext4_has_inline_data(inode))
+		return -ERANGE;
+
 	map.m_lblk = iblock;
 	map.m_len = bh->b_size >> inode->i_blkbits;
 
-	if (flags && !handle) {
+	if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
 		/* Direct IO write... */
 		if (map.m_len > DIO_MAX_BLOCKS)
 			map.m_len = DIO_MAX_BLOCKS;
@@ -798,13 +770,13 @@
 	return NULL;
 }
 
-static int walk_page_buffers(handle_t *handle,
-			     struct buffer_head *head,
-			     unsigned from,
-			     unsigned to,
-			     int *partial,
-			     int (*fn)(handle_t *handle,
-				       struct buffer_head *bh))
+int ext4_walk_page_buffers(handle_t *handle,
+			   struct buffer_head *head,
+			   unsigned from,
+			   unsigned to,
+			   int *partial,
+			   int (*fn)(handle_t *handle,
+				     struct buffer_head *bh))
 {
 	struct buffer_head *bh;
 	unsigned block_start, block_end;
@@ -854,8 +826,8 @@
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.
  */
-static int do_journal_get_write_access(handle_t *handle,
-				       struct buffer_head *bh)
+int do_journal_get_write_access(handle_t *handle,
+				struct buffer_head *bh)
 {
 	int dirty = buffer_dirty(bh);
 	int ret;
@@ -878,7 +850,7 @@
 	return ret;
 }
 
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
+static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh_result, int create);
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
 			    loff_t pos, unsigned len, unsigned flags,
@@ -902,6 +874,17 @@
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
 
+	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
+		ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
+						    flags, pagep);
+		if (ret < 0)
+			goto out;
+		if (ret == 1) {
+			ret = 0;
+			goto out;
+		}
+	}
+
 retry:
 	handle = ext4_journal_start(inode, needed_blocks);
 	if (IS_ERR(handle)) {
@@ -919,6 +902,7 @@
 		ret = -ENOMEM;
 		goto out;
 	}
+
 	*pagep = page;
 
 	if (ext4_should_dioread_nolock(inode))
@@ -927,8 +911,9 @@
 		ret = __block_write_begin(page, pos, len, ext4_get_block);
 
 	if (!ret && ext4_should_journal_data(inode)) {
-		ret = walk_page_buffers(handle, page_buffers(page),
-				from, to, NULL, do_journal_get_write_access);
+		ret = ext4_walk_page_buffers(handle, page_buffers(page),
+					     from, to, NULL,
+					     do_journal_get_write_access);
 	}
 
 	if (ret) {
@@ -983,7 +968,12 @@
 	struct inode *inode = mapping->host;
 	handle_t *handle = ext4_journal_current_handle();
 
-	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (ext4_has_inline_data(inode))
+		copied = ext4_write_inline_data_end(inode, pos, len,
+						    copied, page);
+	else
+		copied = block_write_end(file, mapping, pos,
+					 len, copied, page, fsdata);
 
 	/*
 	 * No need to use i_size_read() here, the i_size
@@ -1134,16 +1124,21 @@
 
 	BUG_ON(!ext4_handle_valid(handle));
 
-	if (copied < len) {
-		if (!PageUptodate(page))
-			copied = 0;
-		page_zero_new_buffers(page, from+copied, to);
-	}
+	if (ext4_has_inline_data(inode))
+		copied = ext4_write_inline_data_end(inode, pos, len,
+						    copied, page);
+	else {
+		if (copied < len) {
+			if (!PageUptodate(page))
+				copied = 0;
+			page_zero_new_buffers(page, from+copied, to);
+		}
 
-	ret = walk_page_buffers(handle, page_buffers(page), from,
-				to, &partial, write_end_fn);
-	if (!partial)
-		SetPageUptodate(page);
+		ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
+					     to, &partial, write_end_fn);
+		if (!partial)
+			SetPageUptodate(page);
+	}
 	new_i_size = pos + copied;
 	if (new_i_size > inode->i_size)
 		i_size_write(inode, pos+copied);
@@ -1301,6 +1296,7 @@
 	struct inode *inode = page->mapping->host;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int num_clusters;
+	ext4_fsblk_t lblk;
 
 	head = page_buffers(page);
 	bh = head;
@@ -1310,20 +1306,23 @@
 		if ((offset <= curr_off) && (buffer_delay(bh))) {
 			to_release++;
 			clear_buffer_delay(bh);
-			clear_buffer_da_mapped(bh);
 		}
 		curr_off = next_off;
 	} while ((bh = bh->b_this_page) != head);
 
+	if (to_release) {
+		lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		ext4_es_remove_extent(inode, lblk, to_release);
+	}
+
 	/* If we have released all the blocks belonging to a cluster, then we
 	 * need to release the reserved space for that cluster. */
 	num_clusters = EXT4_NUM_B2C(sbi, to_release);
 	while (num_clusters > 0) {
-		ext4_fsblk_t lblk;
 		lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
 			((num_clusters - 1) << sbi->s_cluster_bits);
 		if (sbi->s_cluster_ratio == 1 ||
-		    !ext4_find_delalloc_cluster(inode, lblk, 1))
+		    !ext4_find_delalloc_cluster(inode, lblk))
 			ext4_da_release_space(inode, 1);
 
 		num_clusters--;
@@ -1429,8 +1428,6 @@
 						clear_buffer_delay(bh);
 						bh->b_blocknr = pblock;
 					}
-					if (buffer_da_mapped(bh))
-						clear_buffer_da_mapped(bh);
 					if (buffer_unwritten(bh) ||
 					    buffer_mapped(bh))
 						BUG_ON(bh->b_blocknr != pblock);
@@ -1500,9 +1497,16 @@
 	struct pagevec pvec;
 	struct inode *inode = mpd->inode;
 	struct address_space *mapping = inode->i_mapping;
+	ext4_lblk_t start, last;
 
 	index = mpd->first_page;
 	end   = mpd->next_page - 1;
+
+	start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	ext4_es_remove_extent(inode, start, last - start + 1);
+
+	pagevec_init(&pvec, 0);
 	while (index <= end) {
 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
 		if (nr_pages == 0)
@@ -1656,15 +1660,6 @@
 
 		for (i = 0; i < map.m_len; i++)
 			unmap_underlying_metadata(bdev, map.m_pblk + i);
-
-		if (ext4_should_order_data(mpd->inode)) {
-			err = ext4_jbd2_file_inode(handle, mpd->inode);
-			if (err) {
-				/* Only if the journal is aborted */
-				mpd->retval = err;
-				goto submit_io;
-			}
-		}
 	}
 
 	/*
@@ -1795,7 +1790,19 @@
 	 * file system block.
 	 */
 	down_read((&EXT4_I(inode)->i_data_sem));
-	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+	if (ext4_has_inline_data(inode)) {
+		/*
+		 * We will soon create blocks for this page, and let
+		 * us pretend as if the blocks aren't allocated yet.
+		 * In case of clusters, we have to handle the work
+		 * of mapping from cluster so that the reserved space
+		 * is calculated properly.
+		 */
+		if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
+		    ext4_find_delalloc_cluster(inode, map->m_lblk))
+			map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+		retval = 0;
+	} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		retval = ext4_ext_map_blocks(NULL, inode, map, 0);
 	else
 		retval = ext4_ind_map_blocks(NULL, inode, map, 0);
@@ -1814,6 +1821,10 @@
 				goto out_unlock;
 		}
 
+		retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len);
+		if (retval)
+			goto out_unlock;
+
 		/* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
 		 * and it should not appear on the bh->b_state.
 		 */
@@ -1842,8 +1853,8 @@
  * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
  * initialized properly.
  */
-static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
-				  struct buffer_head *bh, int create)
+int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
+			   struct buffer_head *bh, int create)
 {
 	struct ext4_map_blocks map;
 	int ret = 0;
@@ -1917,15 +1928,29 @@
 {
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
-	struct buffer_head *page_bufs;
+	struct buffer_head *page_bufs = NULL;
 	handle_t *handle = NULL;
-	int ret = 0;
-	int err;
+	int ret = 0, err = 0;
+	int inline_data = ext4_has_inline_data(inode);
+	struct buffer_head *inode_bh = NULL;
 
 	ClearPageChecked(page);
-	page_bufs = page_buffers(page);
-	BUG_ON(!page_bufs);
-	walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
+
+	if (inline_data) {
+		BUG_ON(page->index != 0);
+		BUG_ON(len > ext4_get_max_inline_size(inode));
+		inode_bh = ext4_journalled_write_inline_data(inode, len, page);
+		if (inode_bh == NULL)
+			goto out;
+	} else {
+		page_bufs = page_buffers(page);
+		if (!page_bufs) {
+			BUG();
+			goto out;
+		}
+		ext4_walk_page_buffers(handle, page_bufs, 0, len,
+				       NULL, bget_one);
+	}
 	/* As soon as we unlock the page, it can go away, but we have
 	 * references to buffers so we are safe */
 	unlock_page(page);
@@ -1938,11 +1963,18 @@
 
 	BUG_ON(!ext4_handle_valid(handle));
 
-	ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
-				do_journal_get_write_access);
+	if (inline_data) {
+		ret = ext4_journal_get_write_access(handle, inode_bh);
 
-	err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
-				write_end_fn);
+		err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
+
+	} else {
+		ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
+					     do_journal_get_write_access);
+
+		err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
+					     write_end_fn);
+	}
 	if (ret == 0)
 		ret = err;
 	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
@@ -1950,9 +1982,12 @@
 	if (!ret)
 		ret = err;
 
-	walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
+	if (!ext4_has_inline_data(inode))
+		ext4_walk_page_buffers(handle, page_bufs, 0, len,
+				       NULL, bput_one);
 	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 out:
+	brelse(inode_bh);
 	return ret;
 }
 
@@ -2029,8 +2064,8 @@
 		commit_write = 1;
 	}
 	page_bufs = page_buffers(page);
-	if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-			      ext4_bh_delay_or_unwritten)) {
+	if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+				   ext4_bh_delay_or_unwritten)) {
 		/*
 		 * We don't want to do block allocation, so redirty
 		 * the page and return.  We may reach here when we do
@@ -2096,7 +2131,8 @@
  * mpage_da_map_and_submit to map a single contiguous memory region
  * and then write them.
  */
-static int write_cache_pages_da(struct address_space *mapping,
+static int write_cache_pages_da(handle_t *handle,
+				struct address_space *mapping,
 				struct writeback_control *wbc,
 				struct mpage_da_data *mpd,
 				pgoff_t *done_index)
@@ -2175,6 +2211,17 @@
 			wait_on_page_writeback(page);
 			BUG_ON(PageWriteback(page));
 
+			/*
+			 * If we have inline data and arrive here, it means that
+			 * we will soon create the block for the 1st page, so
+			 * we'd better clear the inline data here.
+			 */
+			if (ext4_has_inline_data(inode)) {
+				BUG_ON(ext4_test_inode_state(inode,
+						EXT4_STATE_MAY_INLINE_DATA));
+				ext4_destroy_inline_data(handle, inode);
+			}
+
 			if (mpd->next_page != page->index)
 				mpd->first_page = page->index;
 			mpd->next_page = page->index + 1;
@@ -2381,7 +2428,8 @@
 		 * contiguous region of logical blocks that need
 		 * blocks to be allocated by ext4 and submit them.
 		 */
-		ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
+		ret = write_cache_pages_da(handle, mapping,
+					   wbc, &mpd, &done_index);
 		/*
 		 * If we have a contiguous extent of pages and we
 		 * haven't done the I/O yet, map the blocks and submit
@@ -2445,7 +2493,6 @@
 	return ret;
 }
 
-#define FALL_BACK_TO_NONDELALLOC 1
 static int ext4_nonda_switch(struct super_block *sb)
 {
 	s64 free_blocks, dirty_blocks;
@@ -2502,6 +2549,19 @@
 	}
 	*fsdata = (void *)0;
 	trace_ext4_da_write_begin(inode, pos, len, flags);
+
+	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
+		ret = ext4_da_write_inline_data_begin(mapping, inode,
+						      pos, len, flags,
+						      pagep, fsdata);
+		if (ret < 0)
+			goto out;
+		if (ret == 1) {
+			ret = 0;
+			goto out;
+		}
+	}
+
 retry:
 	/*
 	 * With delayed allocation, we don't log the i_disksize update
@@ -2603,22 +2663,13 @@
 	 * changes.  So let's piggyback the i_disksize mark_inode_dirty
 	 * into that.
 	 */
-
 	new_i_size = pos + copied;
 	if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
-		if (ext4_da_should_update_i_disksize(page, end)) {
+		if (ext4_has_inline_data(inode) ||
+		    ext4_da_should_update_i_disksize(page, end)) {
 			down_write(&EXT4_I(inode)->i_data_sem);
-			if (new_i_size > EXT4_I(inode)->i_disksize) {
-				/*
-				 * Updating i_disksize when extending file
-				 * without needing block allocation
-				 */
-				if (ext4_should_order_data(inode))
-					ret = ext4_jbd2_file_inode(handle,
-								   inode);
-
+			if (new_i_size > EXT4_I(inode)->i_disksize)
 				EXT4_I(inode)->i_disksize = new_i_size;
-			}
 			up_write(&EXT4_I(inode)->i_data_sem);
 			/* We need to mark inode dirty even if
 			 * new_i_size is less that inode->i_size
@@ -2627,8 +2678,16 @@
 			ext4_mark_inode_dirty(handle, inode);
 		}
 	}
-	ret2 = generic_write_end(file, mapping, pos, len, copied,
+
+	if (write_mode != CONVERT_INLINE_DATA &&
+	    ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
+	    ext4_has_inline_data(inode))
+		ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied,
+						     page);
+	else
+		ret2 = generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
+
 	copied = ret2;
 	if (ret2 < 0)
 		ret = ret2;
@@ -2721,6 +2780,12 @@
 	journal_t *journal;
 	int err;
 
+	/*
+	 * We can get here for an inline file via the FIBMAP ioctl
+	 */
+	if (ext4_has_inline_data(inode))
+		return 0;
+
 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
 			test_opt(inode->i_sb, DELALLOC)) {
 		/*
@@ -2766,14 +2831,30 @@
 
 static int ext4_readpage(struct file *file, struct page *page)
 {
+	int ret = -EAGAIN;
+	struct inode *inode = page->mapping->host;
+
 	trace_ext4_readpage(page);
-	return mpage_readpage(page, ext4_get_block);
+
+	if (ext4_has_inline_data(inode))
+		ret = ext4_readpage_inline(inode, page);
+
+	if (ret == -EAGAIN)
+		return mpage_readpage(page, ext4_get_block);
+
+	return ret;
 }
 
 static int
 ext4_readpages(struct file *file, struct address_space *mapping,
 		struct list_head *pages, unsigned nr_pages)
 {
+	struct inode *inode = mapping->host;
+
+	/* If the file has inline data, no need to do readpages. */
+	if (ext4_has_inline_data(inode))
+		return 0;
+
 	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 }
 
@@ -2840,7 +2921,7 @@
  * We allocate an uinitialized extent if blocks haven't been allocated.
  * The extent will be converted to initialized after the IO is complete.
  */
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
+int ext4_get_block_write(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh_result, int create)
 {
 	ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
@@ -2850,29 +2931,12 @@
 }
 
 static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
-		   struct buffer_head *bh_result, int flags)
+		   struct buffer_head *bh_result, int create)
 {
-	handle_t *handle = ext4_journal_current_handle();
-	struct ext4_map_blocks map;
-	int ret = 0;
-
-	ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n",
-		   inode->i_ino, flags);
-
-	flags = EXT4_GET_BLOCKS_NO_LOCK;
-
-	map.m_lblk = iblock;
-	map.m_len = bh_result->b_size >> inode->i_blkbits;
-
-	ret = ext4_map_blocks(handle, inode, &map, flags);
-	if (ret > 0) {
-		map_bh(bh_result, inode->i_sb, map.m_pblk);
-		bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
-					map.m_flags;
-		bh_result->b_size = inode->i_sb->s_blocksize * map.m_len;
-		ret = 0;
-	}
-	return ret;
+	ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+		   inode->i_ino, create);
+	return _ext4_get_block(inode, iblock, bh_result,
+			       EXT4_GET_BLOCKS_NO_LOCK);
 }
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -2978,10 +3042,10 @@
  * fall back to buffered IO.
  *
  * For holes, we fallocate those blocks, mark them as uninitialized
- * If those blocks were preallocated, we mark sure they are splited, but
+ * If those blocks were preallocated, we mark sure they are split, but
  * still keep the range to write as uninitialized.
  *
- * The unwrritten extents will be converted to written when DIO is completed.
+ * The unwritten extents will be converted to written when DIO is completed.
  * For async direct IO, since the IO may still pending when return, we
  * set up an end_io call back function, which will do the conversion
  * when async direct IO completed.
@@ -2999,125 +3063,120 @@
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 	size_t count = iov_length(iov, nr_segs);
-
+	int overwrite = 0;
+	get_block_t *get_block_func = NULL;
+	int dio_flags = 0;
 	loff_t final_size = offset + count;
-	if (rw == WRITE && final_size <= inode->i_size) {
-		int overwrite = 0;
 
-		BUG_ON(iocb->private == NULL);
+	/* Use the old path for reads and writes beyond i_size. */
+	if (rw != WRITE || final_size > inode->i_size)
+		return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
 
-		/* If we do a overwrite dio, i_mutex locking can be released */
-		overwrite = *((int *)iocb->private);
+	BUG_ON(iocb->private == NULL);
 
-		if (overwrite) {
-			atomic_inc(&inode->i_dio_count);
-			down_read(&EXT4_I(inode)->i_data_sem);
-			mutex_unlock(&inode->i_mutex);
-		}
+	/* If we do a overwrite dio, i_mutex locking can be released */
+	overwrite = *((int *)iocb->private);
 
-		/*
- 		 * We could direct write to holes and fallocate.
-		 *
- 		 * Allocated blocks to fill the hole are marked as uninitialized
- 		 * to prevent parallel buffered read to expose the stale data
- 		 * before DIO complete the data IO.
-		 *
- 		 * As to previously fallocated extents, ext4 get_block
- 		 * will just simply mark the buffer mapped but still
- 		 * keep the extents uninitialized.
- 		 *
-		 * for non AIO case, we will convert those unwritten extents
-		 * to written after return back from blockdev_direct_IO.
-		 *
-		 * for async DIO, the conversion needs to be defered when
-		 * the IO is completed. The ext4 end_io callback function
-		 * will be called to take care of the conversion work.
-		 * Here for async case, we allocate an io_end structure to
-		 * hook to the iocb.
- 		 */
-		iocb->private = NULL;
-		ext4_inode_aio_set(inode, NULL);
-		if (!is_sync_kiocb(iocb)) {
-			ext4_io_end_t *io_end =
-				ext4_init_io_end(inode, GFP_NOFS);
-			if (!io_end) {
-				ret = -ENOMEM;
-				goto retake_lock;
-			}
-			io_end->flag |= EXT4_IO_END_DIRECT;
-			iocb->private = io_end;
-			/*
-			 * we save the io structure for current async
-			 * direct IO, so that later ext4_map_blocks()
-			 * could flag the io structure whether there
-			 * is a unwritten extents needs to be converted
-			 * when IO is completed.
-			 */
-			ext4_inode_aio_set(inode, io_end);
-		}
-
-		if (overwrite)
-			ret = __blockdev_direct_IO(rw, iocb, inode,
-						 inode->i_sb->s_bdev, iov,
-						 offset, nr_segs,
-						 ext4_get_block_write_nolock,
-						 ext4_end_io_dio,
-						 NULL,
-						 0);
-		else
-			ret = __blockdev_direct_IO(rw, iocb, inode,
-						 inode->i_sb->s_bdev, iov,
-						 offset, nr_segs,
-						 ext4_get_block_write,
-						 ext4_end_io_dio,
-						 NULL,
-						 DIO_LOCKING);
-		if (iocb->private)
-			ext4_inode_aio_set(inode, NULL);
-		/*
-		 * The io_end structure takes a reference to the inode,
-		 * that structure needs to be destroyed and the
-		 * reference to the inode need to be dropped, when IO is
-		 * complete, even with 0 byte write, or failed.
-		 *
-		 * In the successful AIO DIO case, the io_end structure will be
-		 * desctroyed and the reference to the inode will be dropped
-		 * after the end_io call back function is called.
-		 *
-		 * In the case there is 0 byte write, or error case, since
-		 * VFS direct IO won't invoke the end_io call back function,
-		 * we need to free the end_io structure here.
-		 */
-		if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
-			ext4_free_io_end(iocb->private);
-			iocb->private = NULL;
-		} else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
-						EXT4_STATE_DIO_UNWRITTEN)) {
-			int err;
-			/*
-			 * for non AIO case, since the IO is already
-			 * completed, we could do the conversion right here
-			 */
-			err = ext4_convert_unwritten_extents(inode,
-							     offset, ret);
-			if (err < 0)
-				ret = err;
-			ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
-		}
-
-	retake_lock:
-		/* take i_mutex locking again if we do a ovewrite dio */
-		if (overwrite) {
-			inode_dio_done(inode);
-			up_read(&EXT4_I(inode)->i_data_sem);
-			mutex_lock(&inode->i_mutex);
-		}
-
-		return ret;
+	if (overwrite) {
+		atomic_inc(&inode->i_dio_count);
+		down_read(&EXT4_I(inode)->i_data_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 
-	/* for write the the end of file case, we fall back to old way */
-	return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+	/*
+	 * We could direct write to holes and fallocate.
+	 *
+	 * Allocated blocks to fill the hole are marked as
+	 * uninitialized to prevent parallel buffered read to expose
+	 * the stale data before DIO complete the data IO.
+	 *
+	 * As to previously fallocated extents, ext4 get_block will
+	 * just simply mark the buffer mapped but still keep the
+	 * extents uninitialized.
+	 *
+	 * For non AIO case, we will convert those unwritten extents
+	 * to written after return back from blockdev_direct_IO.
+	 *
+	 * For async DIO, the conversion needs to be deferred when the
+	 * IO is completed. The ext4 end_io callback function will be
+	 * called to take care of the conversion work.  Here for async
+	 * case, we allocate an io_end structure to hook to the iocb.
+	 */
+	iocb->private = NULL;
+	ext4_inode_aio_set(inode, NULL);
+	if (!is_sync_kiocb(iocb)) {
+		ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
+		if (!io_end) {
+			ret = -ENOMEM;
+			goto retake_lock;
+		}
+		io_end->flag |= EXT4_IO_END_DIRECT;
+		iocb->private = io_end;
+		/*
+		 * we save the io structure for current async direct
+		 * IO, so that later ext4_map_blocks() could flag the
+		 * io structure whether there is a unwritten extents
+		 * needs to be converted when IO is completed.
+		 */
+		ext4_inode_aio_set(inode, io_end);
+	}
+
+	if (overwrite) {
+		get_block_func = ext4_get_block_write_nolock;
+	} else {
+		get_block_func = ext4_get_block_write;
+		dio_flags = DIO_LOCKING;
+	}
+	ret = __blockdev_direct_IO(rw, iocb, inode,
+				   inode->i_sb->s_bdev, iov,
+				   offset, nr_segs,
+				   get_block_func,
+				   ext4_end_io_dio,
+				   NULL,
+				   dio_flags);
+
+	if (iocb->private)
+		ext4_inode_aio_set(inode, NULL);
+	/*
+	 * The io_end structure takes a reference to the inode, that
+	 * structure needs to be destroyed and the reference to the
+	 * inode need to be dropped, when IO is complete, even with 0
+	 * byte write, or failed.
+	 *
+	 * In the successful AIO DIO case, the io_end structure will
+	 * be destroyed and the reference to the inode will be dropped
+	 * after the end_io call back function is called.
+	 *
+	 * In the case there is 0 byte write, or error case, since VFS
+	 * direct IO won't invoke the end_io call back function, we
+	 * need to free the end_io structure here.
+	 */
+	if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
+		ext4_free_io_end(iocb->private);
+		iocb->private = NULL;
+	} else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
+						EXT4_STATE_DIO_UNWRITTEN)) {
+		int err;
+		/*
+		 * for non AIO case, since the IO is already
+		 * completed, we could do the conversion right here
+		 */
+		err = ext4_convert_unwritten_extents(inode,
+						     offset, ret);
+		if (err < 0)
+			ret = err;
+		ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+	}
+
+retake_lock:
+	/* take i_mutex locking again if we do a ovewrite dio */
+	if (overwrite) {
+		inode_dio_done(inode);
+		up_read(&EXT4_I(inode)->i_data_sem);
+		mutex_lock(&inode->i_mutex);
+	}
+
+	return ret;
 }
 
 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
@@ -3134,6 +3193,10 @@
 	if (ext4_should_journal_data(inode))
 		return 0;
 
+	/* Let buffer I/O handle the inline data case. */
+	if (ext4_has_inline_data(inode))
+		return 0;
+
 	trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -3531,6 +3594,14 @@
 	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 		ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 
+	if (ext4_has_inline_data(inode)) {
+		int has_inline = 1;
+
+		ext4_inline_data_truncate(inode, &has_inline);
+		if (has_inline)
+			return;
+	}
+
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		ext4_ext_truncate(inode);
 	else
@@ -3756,6 +3827,19 @@
 	}
 }
 
+static inline void ext4_iget_extra_inode(struct inode *inode,
+					 struct ext4_inode *raw_inode,
+					 struct ext4_inode_info *ei)
+{
+	__le32 *magic = (void *)raw_inode +
+			EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
+	if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
+		ext4_find_inline_data_nolock(inode);
+	} else
+		EXT4_I(inode)->i_inline_off = 0;
+}
+
 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 {
 	struct ext4_iloc iloc;
@@ -3826,6 +3910,7 @@
 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 
 	ext4_clear_state_flags(ei);	/* Only relevant on 32-bit archs */
+	ei->i_inline_off = 0;
 	ei->i_dir_start_lookup = 0;
 	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
 	/* We now have enough fields to check if the inode was active or not.
@@ -3898,11 +3983,7 @@
 			ei->i_extra_isize = sizeof(struct ext4_inode) -
 					    EXT4_GOOD_OLD_INODE_SIZE;
 		} else {
-			__le32 *magic = (void *)raw_inode +
-					EXT4_GOOD_OLD_INODE_SIZE +
-					ei->i_extra_isize;
-			if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
-				ext4_set_inode_state(inode, EXT4_STATE_XATTR);
+			ext4_iget_extra_inode(inode, raw_inode, ei);
 		}
 	}
 
@@ -3925,17 +4006,19 @@
 				 ei->i_file_acl);
 		ret = -EIO;
 		goto bad_inode;
-	} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		    (S_ISLNK(inode->i_mode) &&
-		     !ext4_inode_is_fast_symlink(inode)))
-			/* Validate extent which is part of inode */
-			ret = ext4_ext_check_inode(inode);
-	} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		   (S_ISLNK(inode->i_mode) &&
-		    !ext4_inode_is_fast_symlink(inode))) {
-		/* Validate block references which are part of inode */
-		ret = ext4_ind_check_inode(inode);
+	} else if (!ext4_has_inline_data(inode)) {
+		if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+			if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+			    (S_ISLNK(inode->i_mode) &&
+			     !ext4_inode_is_fast_symlink(inode))))
+				/* Validate extent which is part of inode */
+				ret = ext4_ext_check_inode(inode);
+		} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+			   (S_ISLNK(inode->i_mode) &&
+			    !ext4_inode_is_fast_symlink(inode))) {
+			/* Validate block references which are part of inode */
+			ret = ext4_ind_check_inode(inode);
+		}
 	}
 	if (ret)
 		goto bad_inode;
@@ -4122,9 +4205,10 @@
 				cpu_to_le32(new_encode_dev(inode->i_rdev));
 			raw_inode->i_block[2] = 0;
 		}
-	} else
+	} else if (!ext4_has_inline_data(inode)) {
 		for (block = 0; block < EXT4_N_BLOCKS; block++)
 			raw_inode->i_block[block] = ei->i_data[block];
+	}
 
 	raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
 	if (ei->i_extra_isize) {
@@ -4811,8 +4895,9 @@
 	 * journal_start/journal_stop which can block and take a long time
 	 */
 	if (page_has_buffers(page)) {
-		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-					ext4_bh_unmapped)) {
+		if (!ext4_walk_page_buffers(NULL, page_buffers(page),
+					    0, len, NULL,
+					    ext4_bh_unmapped)) {
 			/* Wait so that we don't change page under IO */
 			wait_on_page_writeback(page);
 			ret = VM_FAULT_LOCKED;
@@ -4833,7 +4918,7 @@
 	}
 	ret = __block_page_mkwrite(vma, vmf, get_block);
 	if (!ret && ext4_should_journal_data(inode)) {
-		if (walk_page_buffers(handle, page_buffers(page), 0,
+		if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
 			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
 			unlock_page(page);
 			ret = VM_FAULT_SIGBUS;

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 526e553..1bf6fe7 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c

@@ -1373,7 +1373,7 @@
 	ex->fe_start += next;
 
 	while (needed > ex->fe_len &&
-	       (buddy = mb_find_buddy(e4b, order, &max))) {
+	       mb_find_buddy(e4b, order, &max)) {
 
 		if (block + 1 >= max)
 			break;
@@ -2607,9 +2607,17 @@
 	mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
 		 entry->efd_count, entry->efd_group, entry);
 
-	if (test_opt(sb, DISCARD))
-		ext4_issue_discard(sb, entry->efd_group,
-				   entry->efd_start_cluster, entry->efd_count);
+	if (test_opt(sb, DISCARD)) {
+		err = ext4_issue_discard(sb, entry->efd_group,
+					 entry->efd_start_cluster,
+					 entry->efd_count);
+		if (err && err != -EOPNOTSUPP)
+			ext4_msg(sb, KERN_WARNING, "discard request in"
+				 " group:%d block:%d count:%d failed"
+				 " with %d", entry->efd_group,
+				 entry->efd_start_cluster,
+				 entry->efd_count, err);
+	}
 
 	err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
 	/* we expect to find existing buddy because it's pinned */
@@ -4310,8 +4318,10 @@
 repeat:
 		/* allocate space in core */
 		*errp = ext4_mb_regular_allocator(ac);
-		if (*errp)
+		if (*errp) {
+			ext4_discard_allocated_blocks(ac);
 			goto errout;
+		}
 
 		/* as we've just preallocated more space than
 		 * user requested orinally, we store allocated
@@ -4333,10 +4343,10 @@
 			ac->ac_b_ex.fe_len = 0;
 			ac->ac_status = AC_STATUS_CONTINUE;
 			goto repeat;
-		} else if (*errp)
-		errout:
+		} else if (*errp) {
 			ext4_discard_allocated_blocks(ac);
-		else {
+			goto errout;
+		} else {
 			block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
 			ar->len = ac->ac_b_ex.fe_len;
 		}
@@ -4347,6 +4357,7 @@
 		*errp = -ENOSPC;
 	}
 
+errout:
 	if (*errp) {
 		ac->ac_b_ex.fe_len = 0;
 		ar->len = 0;
@@ -4656,8 +4667,16 @@
 		 * with group lock held. generate_buddy look at
 		 * them with group lock_held
 		 */
-		if (test_opt(sb, DISCARD))
-			ext4_issue_discard(sb, block_group, bit, count);
+		if (test_opt(sb, DISCARD)) {
+			err = ext4_issue_discard(sb, block_group, bit, count);
+			if (err && err != -EOPNOTSUPP)
+				ext4_msg(sb, KERN_WARNING, "discard request in"
+					 " group:%d block:%d count:%lu failed"
+					 " with %d", block_group, bit, count,
+					 err);
+		}
+
+
 		ext4_lock_group(sb, block_group);
 		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
 		mb_free_blocks(inode, &e4b, bit, count_clusters);
@@ -4851,10 +4870,11 @@
  * one will allocate those blocks, mark it as used in buddy bitmap. This must
  * be called with under the group lock.
  */
-static void ext4_trim_extent(struct super_block *sb, int start, int count,
+static int ext4_trim_extent(struct super_block *sb, int start, int count,
 			     ext4_group_t group, struct ext4_buddy *e4b)
 {
 	struct ext4_free_extent ex;
+	int ret = 0;
 
 	trace_ext4_trim_extent(sb, group, start, count);
 
@@ -4870,9 +4890,10 @@
 	 */
 	mb_mark_used(e4b, &ex);
 	ext4_unlock_group(sb, group);
-	ext4_issue_discard(sb, group, start, count);
+	ret = ext4_issue_discard(sb, group, start, count);
 	ext4_lock_group(sb, group);
 	mb_free_blocks(NULL, e4b, start, ex.fe_len);
+	return ret;
 }
 
 /**
@@ -4901,7 +4922,7 @@
 	void *bitmap;
 	ext4_grpblk_t next, count = 0, free_count = 0;
 	struct ext4_buddy e4b;
-	int ret;
+	int ret = 0;
 
 	trace_ext4_trim_all_free(sb, group, start, max);
 
@@ -4928,8 +4949,11 @@
 		next = mb_find_next_bit(bitmap, max + 1, start);
 
 		if ((next - start) >= minblocks) {
-			ext4_trim_extent(sb, start,
-					 next - start, group, &e4b);
+			ret = ext4_trim_extent(sb, start,
+					       next - start, group, &e4b);
+			if (ret && ret != -EOPNOTSUPP)
+				break;
+			ret = 0;
 			count += next - start;
 		}
 		free_count += next - start;
@@ -4950,8 +4974,10 @@
 			break;
 	}
 
-	if (!ret)
+	if (!ret) {
+		ret = count;
 		EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+	}
 out:
 	ext4_unlock_group(sb, group);
 	ext4_mb_unload_buddy(&e4b);
@@ -4959,7 +4985,7 @@
 	ext4_debug("trimmed %d blocks in the group %d\n",
 		count, group);
 
-	return count;
+	return ret;
 }
 
 /**

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f1bb32e..db8226d 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c

@@ -14,6 +14,7 @@
 
 #include <linux/slab.h>
 #include "ext4_jbd2.h"
+#include "ext4_extents.h"
 
 /*
  * The contiguous blocks details which can be

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 292daee..d9cc5ee 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c

@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
+#include "ext4_extents.h"
 
 /**
  * get_ext_path - Find an extent path for designated logical block number.

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6d600a6..cac4482 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c

@@ -202,13 +202,8 @@
 			     struct inode *inode);
 
 /* checksumming functions */
-#define EXT4_DIRENT_TAIL(block, blocksize) \
-	((struct ext4_dir_entry_tail *)(((void *)(block)) + \
-					((blocksize) - \
-					 sizeof(struct ext4_dir_entry_tail))))
-
-static void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
-				   unsigned int blocksize)
+void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
+			    unsigned int blocksize)
 {
 	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
 	t->det_rec_len = ext4_rec_len_to_disk(
@@ -261,6 +256,12 @@
 	return cpu_to_le32(csum);
 }
 
+static void warn_no_space_for_csum(struct inode *inode)
+{
+	ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
+		     "checksum.  Please run e2fsck -D.", inode->i_ino);
+}
+
 int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
 {
 	struct ext4_dir_entry_tail *t;
@@ -271,8 +272,7 @@
 
 	t = get_dirent_tail(inode, dirent);
 	if (!t) {
-		EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
-				 "leaf for checksum.  Please run e2fsck -D.");
+		warn_no_space_for_csum(inode);
 		return 0;
 	}
 
@@ -294,8 +294,7 @@
 
 	t = get_dirent_tail(inode, dirent);
 	if (!t) {
-		EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
-				 "leaf for checksum.  Please run e2fsck -D.");
+		warn_no_space_for_csum(inode);
 		return;
 	}
 
@@ -303,9 +302,9 @@
 					   (void *)t - (void *)dirent);
 }
 
-static inline int ext4_handle_dirty_dirent_node(handle_t *handle,
-						struct inode *inode,
-						struct buffer_head *bh)
+int ext4_handle_dirty_dirent_node(handle_t *handle,
+				  struct inode *inode,
+				  struct buffer_head *bh)
 {
 	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
 	return ext4_handle_dirty_metadata(handle, inode, bh);
@@ -377,8 +376,7 @@
 	count = le16_to_cpu(c->count);
 	if (count_offset + (limit * sizeof(struct dx_entry)) >
 	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
-		EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
-				 "tree checksum found.  Run e2fsck -D.");
+		warn_no_space_for_csum(inode);
 		return 1;
 	}
 	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
@@ -408,8 +406,7 @@
 	count = le16_to_cpu(c->count);
 	if (count_offset + (limit * sizeof(struct dx_entry)) >
 	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
-		EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
-				 "tree checksum.  Run e2fsck -D.");
+		warn_no_space_for_csum(inode);
 		return;
 	}
 	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
@@ -890,6 +887,7 @@
 					   EXT4_DIR_REC_LEN(0));
 	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
 		if (ext4_check_dir_entry(dir, NULL, de, bh,
+				bh->b_data, bh->b_size,
 				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
 					 + ((char *)de - bh->b_data))) {
 			/* On error, skip the f_pos to the next block. */
@@ -1007,6 +1005,15 @@
 	return (err);
 }
 
+static inline int search_dirblock(struct buffer_head *bh,
+				  struct inode *dir,
+				  const struct qstr *d_name,
+				  unsigned int offset,
+				  struct ext4_dir_entry_2 **res_dir)
+{
+	return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
+			  d_name, offset, res_dir);
+}
 
 /*
  * Directory block splitting, compacting
@@ -1081,13 +1088,6 @@
 	dx_set_count(entries, count + 1);
 }
 
-static void ext4_update_dx_flag(struct inode *inode)
-{
-	if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
-				     EXT4_FEATURE_COMPAT_DIR_INDEX))
-		ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
-}
-
 /*
  * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
  *
@@ -1107,11 +1107,13 @@
 /*
  * Returns 0 if not found, -1 on failure, and 1 on success
  */
-static inline int search_dirblock(struct buffer_head *bh,
-				  struct inode *dir,
-				  const struct qstr *d_name,
-				  unsigned int offset,
-				  struct ext4_dir_entry_2 ** res_dir)
+int search_dir(struct buffer_head *bh,
+	       char *search_buf,
+	       int buf_size,
+	       struct inode *dir,
+	       const struct qstr *d_name,
+	       unsigned int offset,
+	       struct ext4_dir_entry_2 **res_dir)
 {
 	struct ext4_dir_entry_2 * de;
 	char * dlimit;
@@ -1119,8 +1121,8 @@
 	const char *name = d_name->name;
 	int namelen = d_name->len;
 
-	de = (struct ext4_dir_entry_2 *) bh->b_data;
-	dlimit = bh->b_data + dir->i_sb->s_blocksize;
+	de = (struct ext4_dir_entry_2 *)search_buf;
+	dlimit = search_buf + buf_size;
 	while ((char *) de < dlimit) {
 		/* this code is executed quadratically often */
 		/* do minimal checking `by hand' */
@@ -1128,7 +1130,8 @@
 		if ((char *) de + namelen <= dlimit &&
 		    ext4_match (namelen, name, de)) {
 			/* found a match - just to be sure, do a full check */
-			if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
+			if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
+						 bh->b_size, offset))
 				return -1;
 			*res_dir = de;
 			return 1;
@@ -1144,6 +1147,21 @@
 	return 0;
 }
 
+static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
+			       struct ext4_dir_entry *de)
+{
+	struct super_block *sb = dir->i_sb;
+
+	if (!is_dx(dir))
+		return 0;
+	if (block == 0)
+		return 1;
+	if (de->inode == 0 &&
+	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
+			sb->s_blocksize)
+		return 1;
+	return 0;
+}
 
 /*
  *	ext4_find_entry()
@@ -1158,7 +1176,8 @@
  */
 static struct buffer_head * ext4_find_entry (struct inode *dir,
 					const struct qstr *d_name,
-					struct ext4_dir_entry_2 ** res_dir)
+					struct ext4_dir_entry_2 **res_dir,
+					int *inlined)
 {
 	struct super_block *sb;
 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
@@ -1179,6 +1198,18 @@
 	namelen = d_name->len;
 	if (namelen > EXT4_NAME_LEN)
 		return NULL;
+
+	if (ext4_has_inline_data(dir)) {
+		int has_inline_data = 1;
+		ret = ext4_find_inline_entry(dir, d_name, res_dir,
+					     &has_inline_data);
+		if (has_inline_data) {
+			if (inlined)
+				*inlined = 1;
+			return ret;
+		}
+	}
+
 	if ((namelen <= 2) && (name[0] == '.') &&
 	    (name[1] == '.' || name[1] == '\0')) {
 		/*
@@ -1244,6 +1275,8 @@
 			goto next;
 		}
 		if (!buffer_verified(bh) &&
+		    !is_dx_internal_node(dir, block,
+					 (struct ext4_dir_entry *)bh->b_data) &&
 		    !ext4_dirent_csum_verify(dir,
 				(struct ext4_dir_entry *)bh->b_data)) {
 			EXT4_ERROR_INODE(dir, "checksumming directory "
@@ -1361,7 +1394,7 @@
 	if (dentry->d_name.len > EXT4_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	bh = ext4_find_entry(dir, &dentry->d_name, &de);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
 	inode = NULL;
 	if (bh) {
 		__u32 ino = le32_to_cpu(de->inode);
@@ -1395,7 +1428,7 @@
 	struct ext4_dir_entry_2 * de;
 	struct buffer_head *bh;
 
-	bh = ext4_find_entry(child->d_inode, &dotdot, &de);
+	bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
 	if (!bh)
 		return ERR_PTR(-ENOENT);
 	ino = le32_to_cpu(de->inode);
@@ -1593,6 +1626,63 @@
 	return NULL;
 }
 
+int ext4_find_dest_de(struct inode *dir, struct inode *inode,
+		      struct buffer_head *bh,
+		      void *buf, int buf_size,
+		      const char *name, int namelen,
+		      struct ext4_dir_entry_2 **dest_de)
+{
+	struct ext4_dir_entry_2 *de;
+	unsigned short reclen = EXT4_DIR_REC_LEN(namelen);
+	int nlen, rlen;
+	unsigned int offset = 0;
+	char *top;
+
+	de = (struct ext4_dir_entry_2 *)buf;
+	top = buf + buf_size - reclen;
+	while ((char *) de <= top) {
+		if (ext4_check_dir_entry(dir, NULL, de, bh,
+					 buf, buf_size, offset))
+			return -EIO;
+		if (ext4_match(namelen, name, de))
+			return -EEXIST;
+		nlen = EXT4_DIR_REC_LEN(de->name_len);
+		rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+		if ((de->inode ? rlen - nlen : rlen) >= reclen)
+			break;
+		de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
+		offset += rlen;
+	}
+	if ((char *) de > top)
+		return -ENOSPC;
+
+	*dest_de = de;
+	return 0;
+}
+
+void ext4_insert_dentry(struct inode *inode,
+			struct ext4_dir_entry_2 *de,
+			int buf_size,
+			const char *name, int namelen)
+{
+
+	int nlen, rlen;
+
+	nlen = EXT4_DIR_REC_LEN(de->name_len);
+	rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+	if (de->inode) {
+		struct ext4_dir_entry_2 *de1 =
+				(struct ext4_dir_entry_2 *)((char *)de + nlen);
+		de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
+		de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
+		de = de1;
+	}
+	de->file_type = EXT4_FT_UNKNOWN;
+	de->inode = cpu_to_le32(inode->i_ino);
+	ext4_set_de_type(inode->i_sb, de, inode->i_mode);
+	de->name_len = namelen;
+	memcpy(de->name, name, namelen);
+}
 /*
  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
  * it points to a directory entry which is guaranteed to be large
@@ -1608,12 +1698,10 @@
 	struct inode	*dir = dentry->d_parent->d_inode;
 	const char	*name = dentry->d_name.name;
 	int		namelen = dentry->d_name.len;
-	unsigned int	offset = 0;
 	unsigned int	blocksize = dir->i_sb->s_blocksize;
 	unsigned short	reclen;
-	int		nlen, rlen, err;
-	char		*top;
 	int		csum_size = 0;
+	int		err;
 
 	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -1621,22 +1709,11 @@
 
 	reclen = EXT4_DIR_REC_LEN(namelen);
 	if (!de) {
-		de = (struct ext4_dir_entry_2 *)bh->b_data;
-		top = bh->b_data + (blocksize - csum_size) - reclen;
-		while ((char *) de <= top) {
-			if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
-				return -EIO;
-			if (ext4_match(namelen, name, de))
-				return -EEXIST;
-			nlen = EXT4_DIR_REC_LEN(de->name_len);
-			rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
-			if ((de->inode? rlen - nlen: rlen) >= reclen)
-				break;
-			de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
-			offset += rlen;
-		}
-		if ((char *) de > top)
-			return -ENOSPC;
+		err = ext4_find_dest_de(dir, inode,
+					bh, bh->b_data, blocksize - csum_size,
+					name, namelen, &de);
+		if (err)
+			return err;
 	}
 	BUFFER_TRACE(bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, bh);
@@ -1646,19 +1723,8 @@
 	}
 
 	/* By now the buffer is marked for journaling */
-	nlen = EXT4_DIR_REC_LEN(de->name_len);
-	rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
-	if (de->inode) {
-		struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
-		de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
-		de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
-		de = de1;
-	}
-	de->file_type = EXT4_FT_UNKNOWN;
-	de->inode = cpu_to_le32(inode->i_ino);
-	ext4_set_de_type(dir->i_sb, de, inode->i_mode);
-	de->name_len = namelen;
-	memcpy(de->name, name, namelen);
+	ext4_insert_dentry(inode, de, blocksize, name, namelen);
+
 	/*
 	 * XXX shouldn't update any times until successful
 	 * completion of syscall, but too many callers depend
@@ -1831,6 +1897,17 @@
 	blocksize = sb->s_blocksize;
 	if (!dentry->d_name.len)
 		return -EINVAL;
+
+	if (ext4_has_inline_data(dir)) {
+		retval = ext4_try_add_inline_entry(handle, dentry, inode);
+		if (retval < 0)
+			return retval;
+		if (retval == 1) {
+			retval = 0;
+			return retval;
+		}
+	}
+
 	if (is_dx(dir)) {
 		retval = ext4_dx_add_entry(handle, dentry, inode);
 		if (!retval || (retval != ERR_BAD_DX_DIR))
@@ -2036,36 +2113,29 @@
 }
 
 /*
- * ext4_delete_entry deletes a directory entry by merging it with the
- * previous entry
+ * ext4_generic_delete_entry deletes a directory entry by merging it
+ * with the previous entry
  */
-static int ext4_delete_entry(handle_t *handle,
-			     struct inode *dir,
-			     struct ext4_dir_entry_2 *de_del,
-			     struct buffer_head *bh)
+int ext4_generic_delete_entry(handle_t *handle,
+			      struct inode *dir,
+			      struct ext4_dir_entry_2 *de_del,
+			      struct buffer_head *bh,
+			      void *entry_buf,
+			      int buf_size,
+			      int csum_size)
 {
 	struct ext4_dir_entry_2 *de, *pde;
 	unsigned int blocksize = dir->i_sb->s_blocksize;
-	int csum_size = 0;
-	int i, err;
-
-	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
-		csum_size = sizeof(struct ext4_dir_entry_tail);
+	int i;
 
 	i = 0;
 	pde = NULL;
-	de = (struct ext4_dir_entry_2 *) bh->b_data;
-	while (i < bh->b_size - csum_size) {
-		if (ext4_check_dir_entry(dir, NULL, de, bh, i))
+	de = (struct ext4_dir_entry_2 *)entry_buf;
+	while (i < buf_size - csum_size) {
+		if (ext4_check_dir_entry(dir, NULL, de, bh,
+					 bh->b_data, bh->b_size, i))
 			return -EIO;
 		if (de == de_del)  {
-			BUFFER_TRACE(bh, "get_write_access");
-			err = ext4_journal_get_write_access(handle, bh);
-			if (unlikely(err)) {
-				ext4_std_error(dir->i_sb, err);
-				return err;
-			}
 			if (pde)
 				pde->rec_len = ext4_rec_len_to_disk(
 					ext4_rec_len_from_disk(pde->rec_len,
@@ -2076,12 +2146,6 @@
 			else
 				de->inode = 0;
 			dir->i_version++;
-			BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-			err = ext4_handle_dirty_dirent_node(handle, dir, bh);
-			if (unlikely(err)) {
-				ext4_std_error(dir->i_sb, err);
-				return err;
-			}
 			return 0;
 		}
 		i += ext4_rec_len_from_disk(de->rec_len, blocksize);
@@ -2091,6 +2155,48 @@
 	return -ENOENT;
 }
 
+static int ext4_delete_entry(handle_t *handle,
+			     struct inode *dir,
+			     struct ext4_dir_entry_2 *de_del,
+			     struct buffer_head *bh)
+{
+	int err, csum_size = 0;
+
+	if (ext4_has_inline_data(dir)) {
+		int has_inline_data = 1;
+		err = ext4_delete_inline_entry(handle, dir, de_del, bh,
+					       &has_inline_data);
+		if (has_inline_data)
+			return err;
+	}
+
+	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
+				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+		csum_size = sizeof(struct ext4_dir_entry_tail);
+
+	BUFFER_TRACE(bh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, bh);
+	if (unlikely(err))
+		goto out;
+
+	err = ext4_generic_delete_entry(handle, dir, de_del,
+					bh, bh->b_data,
+					dir->i_sb->s_blocksize, csum_size);
+	if (err)
+		goto out;
+
+	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
+	if (unlikely(err))
+		goto out;
+
+	return 0;
+out:
+	if (err != -ENOENT)
+		ext4_std_error(dir->i_sb, err);
+	return err;
+}
+
 /*
  * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
  * since this indicates that nlinks count was previously 1.
@@ -2211,21 +2317,95 @@
 	return err;
 }
 
-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
+			  struct ext4_dir_entry_2 *de,
+			  int blocksize, int csum_size,
+			  unsigned int parent_ino, int dotdot_real_len)
 {
-	handle_t *handle;
-	struct inode *inode;
+	de->inode = cpu_to_le32(inode->i_ino);
+	de->name_len = 1;
+	de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
+					   blocksize);
+	strcpy(de->name, ".");
+	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
+
+	de = ext4_next_entry(de, blocksize);
+	de->inode = cpu_to_le32(parent_ino);
+	de->name_len = 2;
+	if (!dotdot_real_len)
+		de->rec_len = ext4_rec_len_to_disk(blocksize -
+					(csum_size + EXT4_DIR_REC_LEN(1)),
+					blocksize);
+	else
+		de->rec_len = ext4_rec_len_to_disk(
+				EXT4_DIR_REC_LEN(de->name_len), blocksize);
+	strcpy(de->name, "..");
+	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
+
+	return ext4_next_entry(de, blocksize);
+}
+
+static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
+			     struct inode *inode)
+{
 	struct buffer_head *dir_block = NULL;
 	struct ext4_dir_entry_2 *de;
 	struct ext4_dir_entry_tail *t;
 	unsigned int blocksize = dir->i_sb->s_blocksize;
 	int csum_size = 0;
-	int err, retries = 0;
+	int err;
 
 	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
+	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
+		err = ext4_try_create_inline_dir(handle, dir, inode);
+		if (err < 0 && err != -ENOSPC)
+			goto out;
+		if (!err)
+			goto out;
+	}
+
+	inode->i_size = EXT4_I(inode)->i_disksize = blocksize;
+	dir_block = ext4_bread(handle, inode, 0, 1, &err);
+	if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
+		if (!err) {
+			err = -EIO;
+			ext4_error(inode->i_sb,
+				   "Directory hole detected on inode %lu\n",
+				   inode->i_ino);
+		}
+		goto out;
+	}
+	BUFFER_TRACE(dir_block, "get_write_access");
+	err = ext4_journal_get_write_access(handle, dir_block);
+	if (err)
+		goto out;
+	de = (struct ext4_dir_entry_2 *)dir_block->b_data;
+	ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
+	set_nlink(inode, 2);
+	if (csum_size) {
+		t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
+		initialize_dirent_tail(t, blocksize);
+	}
+
+	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+	if (err)
+		goto out;
+	set_buffer_verified(dir_block);
+out:
+	brelse(dir_block);
+	return err;
+}
+
+static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	handle_t *handle;
+	struct inode *inode;
+	int err, retries = 0;
+
 	if (EXT4_DIR_LINK_MAX(dir))
 		return -EMLINK;
 
@@ -2249,47 +2429,9 @@
 
 	inode->i_op = &ext4_dir_inode_operations;
 	inode->i_fop = &ext4_dir_operations;
-	inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-	if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
-		if (!err) {
-			err = -EIO;
-			ext4_error(inode->i_sb,
-				   "Directory hole detected on inode %lu\n",
-				   inode->i_ino);
-		}
-		goto out_clear_inode;
-	}
-	BUFFER_TRACE(dir_block, "get_write_access");
-	err = ext4_journal_get_write_access(handle, dir_block);
+	err = ext4_init_new_dir(handle, dir, inode);
 	if (err)
 		goto out_clear_inode;
-	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
-	de->inode = cpu_to_le32(inode->i_ino);
-	de->name_len = 1;
-	de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
-					   blocksize);
-	strcpy(de->name, ".");
-	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-	de = ext4_next_entry(de, blocksize);
-	de->inode = cpu_to_le32(dir->i_ino);
-	de->rec_len = ext4_rec_len_to_disk(blocksize -
-					   (csum_size + EXT4_DIR_REC_LEN(1)),
-					   blocksize);
-	de->name_len = 2;
-	strcpy(de->name, "..");
-	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-	set_nlink(inode, 2);
-
-	if (csum_size) {
-		t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
-		initialize_dirent_tail(t, blocksize);
-	}
-
-	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
-	if (err)
-		goto out_clear_inode;
-	set_buffer_verified(dir_block);
 	err = ext4_mark_inode_dirty(handle, inode);
 	if (!err)
 		err = ext4_add_entry(handle, dentry, inode);
@@ -2309,7 +2451,6 @@
 	unlock_new_inode(inode);
 	d_instantiate(dentry, inode);
 out_stop:
-	brelse(dir_block);
 	ext4_journal_stop(handle);
 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
@@ -2327,6 +2468,14 @@
 	struct super_block *sb;
 	int err = 0;
 
+	if (ext4_has_inline_data(inode)) {
+		int has_inline_data = 1;
+
+		err = empty_inline_dir(inode, &has_inline_data);
+		if (has_inline_data)
+			return err;
+	}
+
 	sb = inode->i_sb;
 	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
 	    !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
@@ -2393,7 +2542,8 @@
 			set_buffer_verified(bh);
 			de = (struct ext4_dir_entry_2 *) bh->b_data;
 		}
-		if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
+		if (ext4_check_dir_entry(inode, NULL, de, bh,
+					 bh->b_data, bh->b_size, offset)) {
 			de = (struct ext4_dir_entry_2 *)(bh->b_data +
 							 sb->s_blocksize);
 			offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -2579,7 +2729,7 @@
 		return PTR_ERR(handle);
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
 	if (!bh)
 		goto end_rmdir;
 
@@ -2644,7 +2794,7 @@
 		ext4_handle_sync(handle);
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
 	if (!bh)
 		goto end_unlink;
 
@@ -2826,8 +2976,39 @@
 	return err;
 }
 
-#define PARENT_INO(buffer, size) \
-	(ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
+
+/*
+ * Try to find buffer head where contains the parent block.
+ * It should be the inode block if it is inlined or the 1st block
+ * if it is a normal dir.
+ */
+static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
+					struct inode *inode,
+					int *retval,
+					struct ext4_dir_entry_2 **parent_de,
+					int *inlined)
+{
+	struct buffer_head *bh;
+
+	if (!ext4_has_inline_data(inode)) {
+		if (!(bh = ext4_bread(handle, inode, 0, 0, retval))) {
+			if (!*retval) {
+				*retval = -EIO;
+				ext4_error(inode->i_sb,
+					   "Directory hole detected on inode %lu\n",
+					   inode->i_ino);
+			}
+			return NULL;
+		}
+		*parent_de = ext4_next_entry(
+					(struct ext4_dir_entry_2 *)bh->b_data,
+					inode->i_sb->s_blocksize);
+		return bh;
+	}
+
+	*inlined = 1;
+	return ext4_get_first_inline_block(inode, parent_de, retval);
+}
 
 /*
  * Anybody can rename anything with this: the permission checks are left to the
@@ -2841,6 +3022,8 @@
 	struct buffer_head *old_bh, *new_bh, *dir_bh;
 	struct ext4_dir_entry_2 *old_de, *new_de;
 	int retval, force_da_alloc = 0;
+	int inlined = 0, new_inlined = 0;
+	struct ext4_dir_entry_2 *parent_de;
 
 	dquot_initialize(old_dir);
 	dquot_initialize(new_dir);
@@ -2860,7 +3043,7 @@
 	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
 		ext4_handle_sync(handle);
 
-	old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
+	old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL);
 	/*
 	 *  Check for inode number is _not_ due to possible IO errors.
 	 *  We might rmdir the source, keep it as pwd of some process
@@ -2873,7 +3056,8 @@
 		goto end_rename;
 
 	new_inode = new_dentry->d_inode;
-	new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
+	new_bh = ext4_find_entry(new_dir, &new_dentry->d_name,
+				 &new_de, &new_inlined);
 	if (new_bh) {
 		if (!new_inode) {
 			brelse(new_bh);
@@ -2887,22 +3071,17 @@
 				goto end_rename;
 		}
 		retval = -EIO;
-		if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) {
-			if (!retval) {
-				retval = -EIO;
-				ext4_error(old_inode->i_sb,
-					   "Directory hole detected on inode %lu\n",
-					   old_inode->i_ino);
-			}
+		dir_bh = ext4_get_first_dir_block(handle, old_inode,
+						  &retval, &parent_de,
+						  &inlined);
+		if (!dir_bh)
 			goto end_rename;
-		}
-		if (!buffer_verified(dir_bh) &&
+		if (!inlined && !buffer_verified(dir_bh) &&
 		    !ext4_dirent_csum_verify(old_inode,
 				(struct ext4_dir_entry *)dir_bh->b_data))
 			goto end_rename;
 		set_buffer_verified(dir_bh);
-		if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
-				old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
+		if (le32_to_cpu(parent_de->inode) != old_dir->i_ino)
 			goto end_rename;
 		retval = -EMLINK;
 		if (!new_inode && new_dir != old_dir &&
@@ -2931,10 +3110,13 @@
 					ext4_current_time(new_dir);
 		ext4_mark_inode_dirty(handle, new_dir);
 		BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
-		retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh);
-		if (unlikely(retval)) {
-			ext4_std_error(new_dir->i_sb, retval);
-			goto end_rename;
+		if (!new_inlined) {
+			retval = ext4_handle_dirty_dirent_node(handle,
+							       new_dir, new_bh);
+			if (unlikely(retval)) {
+				ext4_std_error(new_dir->i_sb, retval);
+				goto end_rename;
+			}
 		}
 		brelse(new_bh);
 		new_bh = NULL;
@@ -2962,7 +3144,8 @@
 		struct buffer_head *old_bh2;
 		struct ext4_dir_entry_2 *old_de2;
 
-		old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
+		old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name,
+					  &old_de2, NULL);
 		if (old_bh2) {
 			retval = ext4_delete_entry(handle, old_dir,
 						   old_de2, old_bh2);
@@ -2982,17 +3165,19 @@
 	old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
 	ext4_update_dx_flag(old_dir);
 	if (dir_bh) {
-		PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
-						cpu_to_le32(new_dir->i_ino);
+		parent_de->inode = cpu_to_le32(new_dir->i_ino);
 		BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
-		if (is_dx(old_inode)) {
-			retval = ext4_handle_dirty_dx_node(handle,
-							   old_inode,
-							   dir_bh);
+		if (!inlined) {
+			if (is_dx(old_inode)) {
+				retval = ext4_handle_dirty_dx_node(handle,
+								   old_inode,
+								   dir_bh);
+			} else {
+				retval = ext4_handle_dirty_dirent_node(handle,
+							old_inode, dir_bh);
+			}
 		} else {
-			retval = ext4_handle_dirty_dirent_node(handle,
-							       old_inode,
-							       dir_bh);
+			retval = ext4_mark_inode_dirty(handle, old_inode);
 		}
 		if (retval) {
 			ext4_std_error(old_dir->i_sb, retval);
@@ -3043,23 +3228,19 @@
 	.mknod		= ext4_mknod,
 	.rename		= ext4_rename,
 	.setattr	= ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
-#endif
 	.get_acl	= ext4_get_acl,
 	.fiemap         = ext4_fiemap,
 };
 
 const struct inode_operations ext4_special_inode_operations = {
 	.setattr	= ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
-#endif
 	.get_acl	= ext4_get_acl,
 };

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 68e896e..0016fbc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c

@@ -27,7 +27,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "ext4_extents.h"
 
 static struct kmem_cache *io_page_cachep, *io_end_cachep;
 
@@ -111,7 +110,7 @@
 		inode_dio_done(inode);
 	/* Wake up anyone waiting on unwritten extent conversion */
 	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
-		wake_up_all(ext4_ioend_wq(io->inode));
+		wake_up_all(ext4_ioend_wq(inode));
 	return ret;
 }
 

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 47bf06a..d99387b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c

@@ -783,7 +783,7 @@
 
 	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (unlikely(err))
-		goto exit_sbh;
+		goto exit_dind;
 
 	err = ext4_journal_get_write_access(handle, dind);
 	if (unlikely(err))
@@ -792,7 +792,7 @@
 	/* ext4_reserve_inode_write() gets a reference on the iloc */
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (unlikely(err))
-		goto exit_dindj;
+		goto exit_dind;
 
 	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
 				     sizeof(struct buffer_head *),
@@ -846,12 +846,7 @@
 
 exit_inode:
 	ext4_kvfree(n_group_desc);
-	/* ext4_handle_release_buffer(handle, iloc.bh); */
 	brelse(iloc.bh);
-exit_dindj:
-	/* ext4_handle_release_buffer(handle, dind); */
-exit_sbh:
-	/* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
 exit_dind:
 	brelse(dind);
 exit_bh:
@@ -969,14 +964,8 @@
 	}
 
 	for (i = 0; i < reserved_gdb; i++) {
-		if ((err = ext4_journal_get_write_access(handle, primary[i]))) {
-			/*
-			int j;
-			for (j = 0; j < i; j++)
-				ext4_handle_release_buffer(handle, primary[j]);
-			 */
+		if ((err = ext4_journal_get_write_access(handle, primary[i])))
 			goto exit_bh;
-		}
 	}
 
 	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 80928f7..3cdb0a2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c

@@ -45,7 +45,7 @@
 #include <linux/freezer.h>
 
 #include "ext4.h"
-#include "ext4_extents.h"
+#include "ext4_extents.h"	/* Needed for trace points definition */
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -939,10 +939,11 @@
 		return NULL;
 
 	ei->vfs_inode.i_version = 1;
-	ei->vfs_inode.i_data.writeback_index = 0;
 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	spin_lock_init(&ei->i_prealloc_lock);
+	ext4_es_init_tree(&ei->i_es_tree);
+	rwlock_init(&ei->i_es_lock);
 	ei->i_reserved_data_blocks = 0;
 	ei->i_reserved_meta_blocks = 0;
 	ei->i_allocated_meta_blocks = 0;
@@ -996,9 +997,7 @@
 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
 	INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT4_FS_XATTR
 	init_rwsem(&ei->xattr_sem);
-#endif
 	init_rwsem(&ei->i_data_sem);
 	inode_init_once(&ei->vfs_inode);
 }
@@ -1031,6 +1030,7 @@
 	clear_inode(inode);
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
+	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
 	if (EXT4_I(inode)->jinode) {
 		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
 					       EXT4_I(inode)->jinode);
@@ -1447,13 +1447,8 @@
 	{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
 	{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
 	{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
-#ifdef CONFIG_EXT4_FS_XATTR
 	{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
 	{Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
-#else
-	{Opt_user_xattr, 0, MOPT_NOSUPPORT},
-	{Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
-#endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
 	{Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
@@ -3202,7 +3197,6 @@
 	ext4_fsblk_t overhead = 0;
 	char *buf = (char *) get_zeroed_page(GFP_KERNEL);
 
-	memset(buf, 0, PAGE_SIZE);
 	if (!buf)
 		return -ENOMEM;
 
@@ -3256,7 +3250,7 @@
 	unsigned int i;
 	int needs_recovery, has_huge_files, has_bigalloc;
 	__u64 blocks_count;
-	int err;
+	int err = 0;
 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
 	ext4_group_t first_not_zeroed;
 
@@ -3272,9 +3266,6 @@
 	}
 	sb->s_fs_info = sbi;
 	sbi->s_sb = sb;
-	sbi->s_mount_opt = 0;
-	sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID);
-	sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID);
 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
 	sbi->s_sb_block = sb_block;
 	if (sb->s_bdev->bd_part)
@@ -3285,6 +3276,7 @@
 	for (cp = sb->s_id; (cp = strchr(cp, '/'));)
 		*cp = '!';
 
+	/* -EINVAL is default */
 	ret = -EINVAL;
 	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
 	if (!blocksize) {
@@ -3369,9 +3361,7 @@
 	if (def_mount_opts & EXT4_DEFM_UID16)
 		set_opt(sb, NO_UID32);
 	/* xattr user namespace & acls are now defaulted on */
-#ifdef CONFIG_EXT4_FS_XATTR
 	set_opt(sb, XATTR_USER);
-#endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	set_opt(sb, POSIX_ACL);
 #endif
@@ -3662,7 +3652,6 @@
 			 " too large to mount safely on this system");
 		if (sizeof(sector_t) < 8)
 			ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
-		ret = err;
 		goto failed_mount;
 	}
 
@@ -3770,7 +3759,6 @@
 	}
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
-		ret = err;
 		goto failed_mount3;
 	}
 
@@ -3801,7 +3789,6 @@
 
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
-	sbi->s_resize_flags = 0;
 
 	sb->s_root = NULL;
 
@@ -3897,8 +3884,8 @@
 	if (es->s_overhead_clusters)
 		sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
 	else {
-		ret = ext4_calculate_overhead(sb);
-		if (ret)
+		err = ext4_calculate_overhead(sb);
+		if (err)
 			goto failed_mount_wq;
 	}
 
@@ -3910,6 +3897,7 @@
 		alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
 	if (!EXT4_SB(sb)->dio_unwritten_wq) {
 		printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
+		ret = -ENOMEM;
 		goto failed_mount_wq;
 	}
 
@@ -4012,12 +4000,20 @@
 	/* Enable quota usage during mount. */
 	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
 	    !(sb->s_flags & MS_RDONLY)) {
-		ret = ext4_enable_quotas(sb);
-		if (ret)
+		err = ext4_enable_quotas(sb);
+		if (err)
 			goto failed_mount7;
 	}
 #endif  /* CONFIG_QUOTA */
 
+	if (test_opt(sb, DISCARD)) {
+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
+		if (!blk_queue_discard(q))
+			ext4_msg(sb, KERN_WARNING,
+				 "mounting with \"discard\" option, but "
+				 "the device does not support discard");
+	}
+
 	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
 		 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
 		 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
@@ -4084,7 +4080,7 @@
 	kfree(sbi);
 out_free_orig:
 	kfree(orig_data);
-	return ret;
+	return err ? err : ret;
 }
 
 /*
@@ -4790,7 +4786,7 @@
 
 	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead);
+	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
 	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
 		percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
 	/* prevent underflow in case that few free space is available */
@@ -5282,6 +5278,7 @@
 	ext4_li_info = NULL;
 	mutex_init(&ext4_li_mtx);
 
+	/* Build-time check for flags consistency */
 	ext4_check_flag_values();
 
 	for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
@@ -5289,9 +5286,14 @@
 		init_waitqueue_head(&ext4__ioend_wq[i]);
 	}
 
-	err = ext4_init_pageio();
+	err = ext4_init_es();
 	if (err)
 		return err;
+
+	err = ext4_init_pageio();
+	if (err)
+		goto out7;
+
 	err = ext4_init_system_zone();
 	if (err)
 		goto out6;
@@ -5341,6 +5343,9 @@
 	ext4_exit_system_zone();
 out6:
 	ext4_exit_pageio();
+out7:
+	ext4_exit_es();
+
 	return err;
 }
 

diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index ed9354a..ff37119 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c

@@ -35,22 +35,18 @@
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
 	.setattr	= ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
-#endif
 };
 
 const struct inode_operations ext4_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= ext4_follow_link,
 	.setattr	= ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
-#endif
 };

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 2cdb98d..3a91ebc 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c

@@ -61,11 +61,6 @@
 #include "xattr.h"
 #include "acl.h"
 
-#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
-#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
-#define BFIRST(bh) ENTRY(BHDR(bh)+1)
-#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
-
 #ifdef EXT4_XATTR_DEBUG
 # define ea_idebug(inode, f...) do { \
 		printk(KERN_DEBUG "inode %s:%lu: ", \
@@ -312,7 +307,7 @@
 	return error;
 }
 
-static int
+int
 ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 		     void *buffer, size_t buffer_size)
 {
@@ -581,21 +576,6 @@
 	return (*min_offs - ((void *)last - base) - sizeof(__u32));
 }
 
-struct ext4_xattr_info {
-	int name_index;
-	const char *name;
-	const void *value;
-	size_t value_len;
-};
-
-struct ext4_xattr_search {
-	struct ext4_xattr_entry *first;
-	void *base;
-	void *end;
-	struct ext4_xattr_entry *here;
-	int not_found;
-};
-
 static int
 ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
 {
@@ -648,9 +628,14 @@
 				   size. Just replace. */
 				s->here->e_value_size =
 					cpu_to_le32(i->value_len);
-				memset(val + size - EXT4_XATTR_PAD, 0,
-				       EXT4_XATTR_PAD); /* Clear pad bytes. */
-				memcpy(val, i->value, i->value_len);
+				if (i->value == EXT4_ZERO_XATTR_VALUE) {
+					memset(val, 0, size);
+				} else {
+					/* Clear pad bytes first. */
+					memset(val + size - EXT4_XATTR_PAD, 0,
+					       EXT4_XATTR_PAD);
+					memcpy(val, i->value, i->value_len);
+				}
 				return 0;
 			}
 
@@ -689,9 +674,14 @@
 			size_t size = EXT4_XATTR_SIZE(i->value_len);
 			void *val = s->base + min_offs - size;
 			s->here->e_value_offs = cpu_to_le16(min_offs - size);
-			memset(val + size - EXT4_XATTR_PAD, 0,
-			       EXT4_XATTR_PAD); /* Clear the pad bytes. */
-			memcpy(val, i->value, i->value_len);
+			if (i->value == EXT4_ZERO_XATTR_VALUE) {
+				memset(val, 0, size);
+			} else {
+				/* Clear the pad bytes first. */
+				memset(val + size - EXT4_XATTR_PAD, 0,
+				       EXT4_XATTR_PAD);
+				memcpy(val, i->value, i->value_len);
+			}
 		}
 	}
 	return 0;
@@ -794,7 +784,6 @@
 			int offset = (char *)s->here - bs->bh->b_data;
 
 			unlock_buffer(bs->bh);
-			ext4_handle_release_buffer(handle, bs->bh);
 			if (ce) {
 				mb_cache_entry_release(ce);
 				ce = NULL;
@@ -950,14 +939,8 @@
 #undef header
 }
 
-struct ext4_xattr_ibody_find {
-	struct ext4_xattr_search s;
-	struct ext4_iloc iloc;
-};
-
-static int
-ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
-		      struct ext4_xattr_ibody_find *is)
+int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
+			  struct ext4_xattr_ibody_find *is)
 {
 	struct ext4_xattr_ibody_header *header;
 	struct ext4_inode *raw_inode;
@@ -985,10 +968,47 @@
 	return 0;
 }
 
-static int
-ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
-		     struct ext4_xattr_info *i,
-		     struct ext4_xattr_ibody_find *is)
+int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
+				struct ext4_xattr_info *i,
+				struct ext4_xattr_ibody_find *is)
+{
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_xattr_search *s = &is->s;
+	int error;
+
+	if (EXT4_I(inode)->i_extra_isize == 0)
+		return -ENOSPC;
+	error = ext4_xattr_set_entry(i, s);
+	if (error) {
+		if (error == -ENOSPC &&
+		    ext4_has_inline_data(inode)) {
+			error = ext4_try_to_evict_inline_data(handle, inode,
+					EXT4_XATTR_LEN(strlen(i->name) +
+					EXT4_XATTR_SIZE(i->value_len)));
+			if (error)
+				return error;
+			error = ext4_xattr_ibody_find(inode, i, is);
+			if (error)
+				return error;
+			error = ext4_xattr_set_entry(i, s);
+		}
+		if (error)
+			return error;
+	}
+	header = IHDR(inode, ext4_raw_inode(&is->iloc));
+	if (!IS_LAST_ENTRY(s->first)) {
+		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
+		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
+	} else {
+		header->h_magic = cpu_to_le32(0);
+		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
+	}
+	return 0;
+}
+
+static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+				struct ext4_xattr_info *i,
+				struct ext4_xattr_ibody_find *is)
 {
 	struct ext4_xattr_ibody_header *header;
 	struct ext4_xattr_search *s = &is->s;
@@ -1144,9 +1164,17 @@
 {
 	handle_t *handle;
 	int error, retries = 0;
+	int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
 
 retry:
-	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+	/*
+	 * In case of inline data, we may push out the data to a block,
+	 * So reserve the journal space first.
+	 */
+	if (ext4_has_inline_data(inode))
+		credits += ext4_writepage_trans_blocks(inode) + 1;
+
+	handle = ext4_journal_start(inode, credits);
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
 	} else {

diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 91f31ca7..69eda78 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h

@@ -21,6 +21,7 @@
 #define EXT4_XATTR_INDEX_TRUSTED		4
 #define	EXT4_XATTR_INDEX_LUSTRE			5
 #define EXT4_XATTR_INDEX_SECURITY	        6
+#define EXT4_XATTR_INDEX_SYSTEM			7
 
 struct ext4_xattr_header {
 	__le32	h_magic;	/* magic number for identification */
@@ -65,7 +66,32 @@
 		EXT4_I(inode)->i_extra_isize))
 #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
 
-# ifdef CONFIG_EXT4_FS_XATTR
+#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
+#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
+#define BFIRST(bh) ENTRY(BHDR(bh)+1)
+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
+
+struct ext4_xattr_info {
+	int name_index;
+	const char *name;
+	const void *value;
+	size_t value_len;
+};
+
+struct ext4_xattr_search {
+	struct ext4_xattr_entry *first;
+	void *base;
+	void *end;
+	struct ext4_xattr_entry *here;
+	int not_found;
+};
+
+struct ext4_xattr_ibody_find {
+	struct ext4_xattr_search s;
+	struct ext4_iloc iloc;
+};
 
 extern const struct xattr_handler ext4_xattr_user_handler;
 extern const struct xattr_handler ext4_xattr_trusted_handler;
@@ -90,60 +116,82 @@
 
 extern const struct xattr_handler *ext4_xattr_handlers[];
 
-# else  /* CONFIG_EXT4_FS_XATTR */
+extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
+				 struct ext4_xattr_ibody_find *is);
+extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
+				const char *name,
+				void *buffer, size_t buffer_size);
+extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
+				       struct ext4_xattr_info *i,
+				       struct ext4_xattr_ibody_find *is);
 
-static inline int
-ext4_xattr_get(struct inode *inode, int name_index, const char *name,
-	       void *buffer, size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
+extern int ext4_has_inline_data(struct inode *inode);
+extern int ext4_get_inline_size(struct inode *inode);
+extern int ext4_get_max_inline_size(struct inode *inode);
+extern int ext4_find_inline_data_nolock(struct inode *inode);
+extern void ext4_write_inline_data(struct inode *inode,
+				   struct ext4_iloc *iloc,
+				   void *buffer, loff_t pos,
+				   unsigned int len);
+extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
+				    unsigned int len);
+extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
+				 unsigned int len);
+extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
 
-static inline int
-ext4_xattr_set(struct inode *inode, int name_index, const char *name,
-	       const void *value, size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
+extern int ext4_readpage_inline(struct inode *inode, struct page *page);
+extern int ext4_try_to_write_inline_data(struct address_space *mapping,
+					 struct inode *inode,
+					 loff_t pos, unsigned len,
+					 unsigned flags,
+					 struct page **pagep);
+extern int ext4_write_inline_data_end(struct inode *inode,
+				      loff_t pos, unsigned len,
+				      unsigned copied,
+				      struct page *page);
+extern struct buffer_head *
+ext4_journalled_write_inline_data(struct inode *inode,
+				  unsigned len,
+				  struct page *page);
+extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
+					   struct inode *inode,
+					   loff_t pos, unsigned len,
+					   unsigned flags,
+					   struct page **pagep,
+					   void **fsdata);
+extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
+					 unsigned len, unsigned copied,
+					 struct page *page);
+extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
+				     struct inode *inode);
+extern int ext4_try_create_inline_dir(handle_t *handle,
+				      struct inode *parent,
+				      struct inode *inode);
+extern int ext4_read_inline_dir(struct file *filp,
+				void *dirent, filldir_t filldir,
+				int *has_inline_data);
+extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
+					const struct qstr *d_name,
+					struct ext4_dir_entry_2 **res_dir,
+					int *has_inline_data);
+extern int ext4_delete_inline_entry(handle_t *handle,
+				    struct inode *dir,
+				    struct ext4_dir_entry_2 *de_del,
+				    struct buffer_head *bh,
+				    int *has_inline_data);
+extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
+extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
+					struct ext4_dir_entry_2 **parent_de,
+					int *retval);
+extern int ext4_inline_data_fiemap(struct inode *inode,
+				   struct fiemap_extent_info *fieinfo,
+				   int *has_inline);
+extern int ext4_try_to_evict_inline_data(handle_t *handle,
+					 struct inode *inode,
+					 int needed);
+extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
 
-static inline int
-ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
-	       const char *name, const void *value, size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void
-ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
-{
-}
-
-static inline void
-ext4_xattr_put_super(struct super_block *sb)
-{
-}
-
-static __init inline int
-ext4_init_xattr(void)
-{
-	return 0;
-}
-
-static inline void
-ext4_exit_xattr(void)
-{
-}
-
-static inline int
-ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
-			    struct ext4_inode *raw_inode, handle_t *handle)
-{
-	return -EOPNOTSUPP;
-}
-
-#define ext4_xattr_handlers	NULL
-
-# endif  /* CONFIG_EXT4_FS_XATTR */
+extern int ext4_convert_inline_data(struct inode *inode);
 
 #ifdef CONFIG_EXT4_FS_SECURITY
 extern int ext4_init_security(handle_t *handle, struct inode *inode,

diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
new file mode 100644
index 0000000..fd27e7e
--- /dev/null
+++ b/fs/f2fs/Kconfig

@@ -0,0 +1,53 @@
+config F2FS_FS
+	tristate "F2FS filesystem support (EXPERIMENTAL)"
+	depends on BLOCK
+	help
+	  F2FS is based on Log-structured File System (LFS), which supports
+	  versatile "flash-friendly" features. The design has been focused on
+	  addressing the fundamental issues in LFS, which are snowball effect
+	  of wandering tree and high cleaning overhead.
+
+	  Since flash-based storages show different characteristics according to
+	  the internal geometry or flash memory management schemes aka FTL, F2FS
+	  and tools support various parameters not only for configuring on-disk
+	  layout, but also for selecting allocation and cleaning algorithms.
+
+	  If unsure, say N.
+
+config F2FS_STAT_FS
+	bool "F2FS Status Information"
+	depends on F2FS_FS && DEBUG_FS
+	default y
+	help
+	  /sys/kernel/debug/f2fs/ contains information about all the partitions
+	  mounted as f2fs. Each file shows the whole f2fs information.
+
+	  /sys/kernel/debug/f2fs/status includes:
+	    - major file system information managed by f2fs currently
+	    - average SIT information about whole segments
+	    - current memory footprint consumed by f2fs.
+
+config F2FS_FS_XATTR
+	bool "F2FS extended attributes"
+	depends on F2FS_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config F2FS_FS_POSIX_ACL
+	bool "F2FS Access Control Lists"
+	depends on F2FS_FS_XATTR
+	select FS_POSIX_ACL
+	default y
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  gourps beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N

diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
new file mode 100644
index 0000000..27a0820
--- /dev/null
+++ b/fs/f2fs/Makefile

@@ -0,0 +1,7 @@
+obj-$(CONFIG_F2FS_FS) += f2fs.o
+
+f2fs-y		:= dir.o file.o inode.o namei.o hash.o super.o
+f2fs-y		+= checkpoint.o gc.o data.o node.o segment.o recovery.o
+f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
+f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
+f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
new file mode 100644
index 0000000..fed74d1
--- /dev/null
+++ b/fs/f2fs/acl.c

@@ -0,0 +1,414 @@
+/*
+ * fs/f2fs/acl.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/f2fs_fs.h>
+#include "f2fs.h"
+#include "xattr.h"
+#include "acl.h"
+
+#define get_inode_mode(i)	((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+					(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+
+static inline size_t f2fs_acl_size(int count)
+{
+	if (count <= 4) {
+		return sizeof(struct f2fs_acl_header) +
+			count * sizeof(struct f2fs_acl_entry_short);
+	} else {
+		return sizeof(struct f2fs_acl_header) +
+			4 * sizeof(struct f2fs_acl_entry_short) +
+			(count - 4) * sizeof(struct f2fs_acl_entry);
+	}
+}
+
+static inline int f2fs_acl_count(size_t size)
+{
+	ssize_t s;
+	size -= sizeof(struct f2fs_acl_header);
+	s = size - 4 * sizeof(struct f2fs_acl_entry_short);
+	if (s < 0) {
+		if (size % sizeof(struct f2fs_acl_entry_short))
+			return -1;
+		return size / sizeof(struct f2fs_acl_entry_short);
+	} else {
+		if (s % sizeof(struct f2fs_acl_entry))
+			return -1;
+		return s / sizeof(struct f2fs_acl_entry) + 4;
+	}
+}
+
+static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
+{
+	int i, count;
+	struct posix_acl *acl;
+	struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value;
+	struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
+	const char *end = value + size;
+
+	if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
+		return ERR_PTR(-EINVAL);
+
+	count = f2fs_acl_count(size);
+	if (count < 0)
+		return ERR_PTR(-EINVAL);
+	if (count == 0)
+		return NULL;
+
+	acl = posix_acl_alloc(count, GFP_KERNEL);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+
+		if ((char *)entry > end)
+			goto fail;
+
+		acl->a_entries[i].e_tag  = le16_to_cpu(entry->e_tag);
+		acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm);
+
+		switch (acl->a_entries[i].e_tag) {
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
+			entry = (struct f2fs_acl_entry *)((char *)entry +
+					sizeof(struct f2fs_acl_entry_short));
+			break;
+
+		case ACL_USER:
+			acl->a_entries[i].e_uid =
+				make_kuid(&init_user_ns,
+						le32_to_cpu(entry->e_id));
+			entry = (struct f2fs_acl_entry *)((char *)entry +
+					sizeof(struct f2fs_acl_entry));
+			break;
+		case ACL_GROUP:
+			acl->a_entries[i].e_gid =
+				make_kgid(&init_user_ns,
+						le32_to_cpu(entry->e_id));
+			entry = (struct f2fs_acl_entry *)((char *)entry +
+					sizeof(struct f2fs_acl_entry));
+			break;
+		default:
+			goto fail;
+		}
+	}
+	if ((char *)entry != end)
+		goto fail;
+	return acl;
+fail:
+	posix_acl_release(acl);
+	return ERR_PTR(-EINVAL);
+}
+
+static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+	struct f2fs_acl_header *f2fs_acl;
+	struct f2fs_acl_entry *entry;
+	int i;
+
+	f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
+			sizeof(struct f2fs_acl_entry), GFP_KERNEL);
+	if (!f2fs_acl)
+		return ERR_PTR(-ENOMEM);
+
+	f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION);
+	entry = (struct f2fs_acl_entry *)(f2fs_acl + 1);
+
+	for (i = 0; i < acl->a_count; i++) {
+
+		entry->e_tag  = cpu_to_le16(acl->a_entries[i].e_tag);
+		entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm);
+
+		switch (acl->a_entries[i].e_tag) {
+		case ACL_USER:
+			entry->e_id = cpu_to_le32(
+					from_kuid(&init_user_ns,
+						acl->a_entries[i].e_uid));
+			entry = (struct f2fs_acl_entry *)((char *)entry +
+					sizeof(struct f2fs_acl_entry));
+			break;
+		case ACL_GROUP:
+			entry->e_id = cpu_to_le32(
+					from_kgid(&init_user_ns,
+						acl->a_entries[i].e_gid));
+			entry = (struct f2fs_acl_entry *)((char *)entry +
+					sizeof(struct f2fs_acl_entry));
+			break;
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			entry = (struct f2fs_acl_entry *)((char *)entry +
+					sizeof(struct f2fs_acl_entry_short));
+			break;
+		default:
+			goto fail;
+		}
+	}
+	*size = f2fs_acl_size(acl->a_count);
+	return (void *)f2fs_acl;
+
+fail:
+	kfree(f2fs_acl);
+	return ERR_PTR(-EINVAL);
+}
+
+struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
+	void *value = NULL;
+	struct posix_acl *acl;
+	int retval;
+
+	if (!test_opt(sbi, POSIX_ACL))
+		return NULL;
+
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	if (type == ACL_TYPE_ACCESS)
+		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
+
+	retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
+	if (retval > 0) {
+		value = kmalloc(retval, GFP_KERNEL);
+		if (!value)
+			return ERR_PTR(-ENOMEM);
+		retval = f2fs_getxattr(inode, name_index, "", value, retval);
+	}
+
+	if (retval < 0) {
+		if (retval == -ENODATA)
+			acl = NULL;
+		else
+			acl = ERR_PTR(retval);
+	} else {
+		acl = f2fs_acl_from_disk(value, retval);
+	}
+	kfree(value);
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
+
+	return acl;
+}
+
+static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	int name_index;
+	void *value = NULL;
+	size_t size = 0;
+	int error;
+
+	if (!test_opt(sbi, POSIX_ACL))
+		return 0;
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
+		if (acl) {
+			error = posix_acl_equiv_mode(acl, &inode->i_mode);
+			if (error < 0)
+				return error;
+			set_acl_inode(fi, inode->i_mode);
+			if (error == 0)
+				acl = NULL;
+		}
+		break;
+
+	case ACL_TYPE_DEFAULT:
+		name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (acl) {
+		value = f2fs_acl_to_disk(acl, &size);
+		if (IS_ERR(value)) {
+			cond_clear_inode_flag(fi, FI_ACL_MODE);
+			return (int)PTR_ERR(value);
+		}
+	}
+
+	error = f2fs_setxattr(inode, name_index, "", value, size);
+
+	kfree(value);
+	if (!error)
+		set_cached_acl(inode, type, acl);
+
+	cond_clear_inode_flag(fi, FI_ACL_MODE);
+	return error;
+}
+
+int f2fs_init_acl(struct inode *inode, struct inode *dir)
+{
+	struct posix_acl *acl = NULL;
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+	int error = 0;
+
+	if (!S_ISLNK(inode->i_mode)) {
+		if (test_opt(sbi, POSIX_ACL)) {
+			acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT);
+			if (IS_ERR(acl))
+				return PTR_ERR(acl);
+		}
+		if (!acl)
+			inode->i_mode &= ~current_umask();
+	}
+
+	if (test_opt(sbi, POSIX_ACL) && acl) {
+
+		if (S_ISDIR(inode->i_mode)) {
+			error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+			if (error)
+				goto cleanup;
+		}
+		error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
+		if (error < 0)
+			return error;
+		if (error > 0)
+			error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+	}
+cleanup:
+	posix_acl_release(acl);
+	return error;
+}
+
+int f2fs_acl_chmod(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct posix_acl *acl;
+	int error;
+	mode_t mode = get_inode_mode(inode);
+
+	if (!test_opt(sbi, POSIX_ACL))
+		return 0;
+	if (S_ISLNK(mode))
+		return -EOPNOTSUPP;
+
+	acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+
+	error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
+	if (error)
+		return error;
+	error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+	posix_acl_release(acl);
+	return error;
+}
+
+static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+	const char *xname = POSIX_ACL_XATTR_DEFAULT;
+	size_t size;
+
+	if (!test_opt(sbi, POSIX_ACL))
+		return 0;
+
+	if (type == ACL_TYPE_ACCESS)
+		xname = POSIX_ACL_XATTR_ACCESS;
+
+	size = strlen(xname) + 1;
+	if (list && size <= list_size)
+		memcpy(list, xname, size);
+	return size;
+}
+
+static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+	struct posix_acl *acl;
+	int error;
+
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(sbi, POSIX_ACL))
+		return -EOPNOTSUPP;
+
+	acl = f2fs_get_acl(dentry->d_inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (!acl)
+		return -ENODATA;
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl = NULL;
+	int error;
+
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(sbi, POSIX_ACL))
+		return -EOPNOTSUPP;
+	if (!inode_owner_or_capable(inode))
+		return -EPERM;
+
+	if (value) {
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		if (acl) {
+			error = posix_acl_valid(acl);
+			if (error)
+				goto release_and_out;
+		}
+	} else {
+		acl = NULL;
+	}
+
+	error = f2fs_set_acl(inode, type, acl);
+
+release_and_out:
+	posix_acl_release(acl);
+	return error;
+}
+
+const struct xattr_handler f2fs_xattr_acl_default_handler = {
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
+	.flags = ACL_TYPE_DEFAULT,
+	.list = f2fs_xattr_list_acl,
+	.get = f2fs_xattr_get_acl,
+	.set = f2fs_xattr_set_acl,
+};
+
+const struct xattr_handler f2fs_xattr_acl_access_handler = {
+	.prefix = POSIX_ACL_XATTR_ACCESS,
+	.flags = ACL_TYPE_ACCESS,
+	.list = f2fs_xattr_list_acl,
+	.get = f2fs_xattr_get_acl,
+	.set = f2fs_xattr_set_acl,
+};

diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
new file mode 100644
index 0000000..80f4306
--- /dev/null
+++ b/fs/f2fs/acl.h

@@ -0,0 +1,57 @@
+/*
+ * fs/f2fs/acl.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/acl.h
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __F2FS_ACL_H__
+#define __F2FS_ACL_H__
+
+#include <linux/posix_acl_xattr.h>
+
+#define F2FS_ACL_VERSION	0x0001
+
+struct f2fs_acl_entry {
+	__le16 e_tag;
+	__le16 e_perm;
+	__le32 e_id;
+};
+
+struct f2fs_acl_entry_short {
+	__le16 e_tag;
+	__le16 e_perm;
+};
+
+struct f2fs_acl_header {
+	__le32 a_version;
+};
+
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+
+extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type);
+extern int f2fs_acl_chmod(struct inode *inode);
+extern int f2fs_init_acl(struct inode *inode, struct inode *dir);
+#else
+#define f2fs_check_acl	NULL
+#define f2fs_get_acl	NULL
+#define f2fs_set_acl	NULL
+
+static inline int f2fs_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int f2fs_init_acl(struct inode *inode, struct inode *dir)
+{
+	return 0;
+}
+#endif
+#endif /* __F2FS_ACL_H__ */

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
new file mode 100644
index 0000000..6ef36c3
--- /dev/null
+++ b/fs/f2fs/checkpoint.c

@@ -0,0 +1,794 @@
+/*
+ * fs/f2fs/checkpoint.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/f2fs_fs.h>
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *inode_entry_slab;
+
+/*
+ * We guarantee no failure on the returned page.
+ */
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+	struct address_space *mapping = sbi->meta_inode->i_mapping;
+	struct page *page = NULL;
+repeat:
+	page = grab_cache_page(mapping, index);
+	if (!page) {
+		cond_resched();
+		goto repeat;
+	}
+
+	/* We wait writeback only inside grab_meta_page() */
+	wait_on_page_writeback(page);
+	SetPageUptodate(page);
+	return page;
+}
+
+/*
+ * We guarantee no failure on the returned page.
+ */
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+	struct address_space *mapping = sbi->meta_inode->i_mapping;
+	struct page *page;
+repeat:
+	page = grab_cache_page(mapping, index);
+	if (!page) {
+		cond_resched();
+		goto repeat;
+	}
+	if (f2fs_readpage(sbi, page, index, READ_SYNC)) {
+		f2fs_put_page(page, 1);
+		goto repeat;
+	}
+	mark_page_accessed(page);
+
+	/* We do not allow returning an errorneous page */
+	return page;
+}
+
+static int f2fs_write_meta_page(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	int err;
+
+	wait_on_page_writeback(page);
+
+	err = write_meta_page(sbi, page, wbc);
+	if (err) {
+		wbc->pages_skipped++;
+		set_page_dirty(page);
+	}
+
+	dec_page_count(sbi, F2FS_DIRTY_META);
+
+	/* In this case, we should not unlock this page */
+	if (err != AOP_WRITEPAGE_ACTIVATE)
+		unlock_page(page);
+	return err;
+}
+
+static int f2fs_write_meta_pages(struct address_space *mapping,
+				struct writeback_control *wbc)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+	struct block_device *bdev = sbi->sb->s_bdev;
+	long written;
+
+	if (wbc->for_kupdate)
+		return 0;
+
+	if (get_pages(sbi, F2FS_DIRTY_META) == 0)
+		return 0;
+
+	/* if mounting is failed, skip writing node pages */
+	mutex_lock(&sbi->cp_mutex);
+	written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
+	mutex_unlock(&sbi->cp_mutex);
+	wbc->nr_to_write -= written;
+	return 0;
+}
+
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+						long nr_to_write)
+{
+	struct address_space *mapping = sbi->meta_inode->i_mapping;
+	pgoff_t index = 0, end = LONG_MAX;
+	struct pagevec pvec;
+	long nwritten = 0;
+	struct writeback_control wbc = {
+		.for_reclaim = 0,
+	};
+
+	pagevec_init(&pvec, 0);
+
+	while (index <= end) {
+		int i, nr_pages;
+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+				PAGECACHE_TAG_DIRTY,
+				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+		if (nr_pages == 0)
+			break;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+			lock_page(page);
+			BUG_ON(page->mapping != mapping);
+			BUG_ON(!PageDirty(page));
+			clear_page_dirty_for_io(page);
+			f2fs_write_meta_page(page, &wbc);
+			if (nwritten++ >= nr_to_write)
+				break;
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+
+	if (nwritten)
+		f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
+
+	return nwritten;
+}
+
+static int f2fs_set_meta_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+
+	SetPageUptodate(page);
+	if (!PageDirty(page)) {
+		__set_page_dirty_nobuffers(page);
+		inc_page_count(sbi, F2FS_DIRTY_META);
+		F2FS_SET_SB_DIRT(sbi);
+		return 1;
+	}
+	return 0;
+}
+
+const struct address_space_operations f2fs_meta_aops = {
+	.writepage	= f2fs_write_meta_page,
+	.writepages	= f2fs_write_meta_pages,
+	.set_page_dirty	= f2fs_set_meta_page_dirty,
+};
+
+int check_orphan_space(struct f2fs_sb_info *sbi)
+{
+	unsigned int max_orphans;
+	int err = 0;
+
+	/*
+	 * considering 512 blocks in a segment 5 blocks are needed for cp
+	 * and log segment summaries. Remaining blocks are used to keep
+	 * orphan entries with the limitation one reserved segment
+	 * for cp pack we can have max 1020*507 orphan entries
+	 */
+	max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
+	mutex_lock(&sbi->orphan_inode_mutex);
+	if (sbi->n_orphans >= max_orphans)
+		err = -ENOSPC;
+	mutex_unlock(&sbi->orphan_inode_mutex);
+	return err;
+}
+
+void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+	struct list_head *head, *this;
+	struct orphan_inode_entry *new = NULL, *orphan = NULL;
+
+	mutex_lock(&sbi->orphan_inode_mutex);
+	head = &sbi->orphan_inode_list;
+	list_for_each(this, head) {
+		orphan = list_entry(this, struct orphan_inode_entry, list);
+		if (orphan->ino == ino)
+			goto out;
+		if (orphan->ino > ino)
+			break;
+		orphan = NULL;
+	}
+retry:
+	new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+	if (!new) {
+		cond_resched();
+		goto retry;
+	}
+	new->ino = ino;
+	INIT_LIST_HEAD(&new->list);
+
+	/* add new_oentry into list which is sorted by inode number */
+	if (orphan) {
+		struct orphan_inode_entry *prev;
+
+		/* get previous entry */
+		prev = list_entry(orphan->list.prev, typeof(*prev), list);
+		if (&prev->list != head)
+			/* insert new orphan inode entry */
+			list_add(&new->list, &prev->list);
+		else
+			list_add(&new->list, head);
+	} else {
+		list_add_tail(&new->list, head);
+	}
+	sbi->n_orphans++;
+out:
+	mutex_unlock(&sbi->orphan_inode_mutex);
+}
+
+void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+	struct list_head *this, *next, *head;
+	struct orphan_inode_entry *orphan;
+
+	mutex_lock(&sbi->orphan_inode_mutex);
+	head = &sbi->orphan_inode_list;
+	list_for_each_safe(this, next, head) {
+		orphan = list_entry(this, struct orphan_inode_entry, list);
+		if (orphan->ino == ino) {
+			list_del(&orphan->list);
+			kmem_cache_free(orphan_entry_slab, orphan);
+			sbi->n_orphans--;
+			break;
+		}
+	}
+	mutex_unlock(&sbi->orphan_inode_mutex);
+}
+
+static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+	struct inode *inode = f2fs_iget(sbi->sb, ino);
+	BUG_ON(IS_ERR(inode));
+	clear_nlink(inode);
+
+	/* truncate all the data during iput */
+	iput(inode);
+}
+
+int recover_orphan_inodes(struct f2fs_sb_info *sbi)
+{
+	block_t start_blk, orphan_blkaddr, i, j;
+
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
+		return 0;
+
+	sbi->por_doing = 1;
+	start_blk = __start_cp_addr(sbi) + 1;
+	orphan_blkaddr = __start_sum_addr(sbi) - 1;
+
+	for (i = 0; i < orphan_blkaddr; i++) {
+		struct page *page = get_meta_page(sbi, start_blk + i);
+		struct f2fs_orphan_block *orphan_blk;
+
+		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
+		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
+			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
+			recover_orphan_inode(sbi, ino);
+		}
+		f2fs_put_page(page, 1);
+	}
+	/* clear Orphan Flag */
+	clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
+	sbi->por_doing = 0;
+	return 0;
+}
+
+static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
+{
+	struct list_head *head, *this, *next;
+	struct f2fs_orphan_block *orphan_blk = NULL;
+	struct page *page = NULL;
+	unsigned int nentries = 0;
+	unsigned short index = 1;
+	unsigned short orphan_blocks;
+
+	orphan_blocks = (unsigned short)((sbi->n_orphans +
+		(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
+
+	mutex_lock(&sbi->orphan_inode_mutex);
+	head = &sbi->orphan_inode_list;
+
+	/* loop for each orphan inode entry and write them in Jornal block */
+	list_for_each_safe(this, next, head) {
+		struct orphan_inode_entry *orphan;
+
+		orphan = list_entry(this, struct orphan_inode_entry, list);
+
+		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
+			/*
+			 * an orphan block is full of 1020 entries,
+			 * then we need to flush current orphan blocks
+			 * and bring another one in memory
+			 */
+			orphan_blk->blk_addr = cpu_to_le16(index);
+			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
+			orphan_blk->entry_count = cpu_to_le32(nentries);
+			set_page_dirty(page);
+			f2fs_put_page(page, 1);
+			index++;
+			start_blk++;
+			nentries = 0;
+			page = NULL;
+		}
+		if (page)
+			goto page_exist;
+
+		page = grab_meta_page(sbi, start_blk);
+		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
+		memset(orphan_blk, 0, sizeof(*orphan_blk));
+page_exist:
+		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
+	}
+	if (!page)
+		goto end;
+
+	orphan_blk->blk_addr = cpu_to_le16(index);
+	orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
+	orphan_blk->entry_count = cpu_to_le32(nentries);
+	set_page_dirty(page);
+	f2fs_put_page(page, 1);
+end:
+	mutex_unlock(&sbi->orphan_inode_mutex);
+}
+
+static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+				block_t cp_addr, unsigned long long *version)
+{
+	struct page *cp_page_1, *cp_page_2 = NULL;
+	unsigned long blk_size = sbi->blocksize;
+	struct f2fs_checkpoint *cp_block;
+	unsigned long long cur_version = 0, pre_version = 0;
+	unsigned int crc = 0;
+	size_t crc_offset;
+
+	/* Read the 1st cp block in this CP pack */
+	cp_page_1 = get_meta_page(sbi, cp_addr);
+
+	/* get the version number */
+	cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
+	crc_offset = le32_to_cpu(cp_block->checksum_offset);
+	if (crc_offset >= blk_size)
+		goto invalid_cp1;
+
+	crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
+	if (!f2fs_crc_valid(crc, cp_block, crc_offset))
+		goto invalid_cp1;
+
+	pre_version = le64_to_cpu(cp_block->checkpoint_ver);
+
+	/* Read the 2nd cp block in this CP pack */
+	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+	cp_page_2 = get_meta_page(sbi, cp_addr);
+
+	cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
+	crc_offset = le32_to_cpu(cp_block->checksum_offset);
+	if (crc_offset >= blk_size)
+		goto invalid_cp2;
+
+	crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
+	if (!f2fs_crc_valid(crc, cp_block, crc_offset))
+		goto invalid_cp2;
+
+	cur_version = le64_to_cpu(cp_block->checkpoint_ver);
+
+	if (cur_version == pre_version) {
+		*version = cur_version;
+		f2fs_put_page(cp_page_2, 1);
+		return cp_page_1;
+	}
+invalid_cp2:
+	f2fs_put_page(cp_page_2, 1);
+invalid_cp1:
+	f2fs_put_page(cp_page_1, 1);
+	return NULL;
+}
+
+int get_valid_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_checkpoint *cp_block;
+	struct f2fs_super_block *fsb = sbi->raw_super;
+	struct page *cp1, *cp2, *cur_page;
+	unsigned long blk_size = sbi->blocksize;
+	unsigned long long cp1_version = 0, cp2_version = 0;
+	unsigned long long cp_start_blk_no;
+
+	sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
+	if (!sbi->ckpt)
+		return -ENOMEM;
+	/*
+	 * Finding out valid cp block involves read both
+	 * sets( cp pack1 and cp pack 2)
+	 */
+	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
+
+	/* The second checkpoint pack should start at the next segment */
+	cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
+
+	if (cp1 && cp2) {
+		if (ver_after(cp2_version, cp1_version))
+			cur_page = cp2;
+		else
+			cur_page = cp1;
+	} else if (cp1) {
+		cur_page = cp1;
+	} else if (cp2) {
+		cur_page = cp2;
+	} else {
+		goto fail_no_cp;
+	}
+
+	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
+	memcpy(sbi->ckpt, cp_block, blk_size);
+
+	f2fs_put_page(cp1, 1);
+	f2fs_put_page(cp2, 1);
+	return 0;
+
+fail_no_cp:
+	kfree(sbi->ckpt);
+	return -EINVAL;
+}
+
+void set_dirty_dir_page(struct inode *inode, struct page *page)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct list_head *head = &sbi->dir_inode_list;
+	struct dir_inode_entry *new;
+	struct list_head *this;
+
+	if (!S_ISDIR(inode->i_mode))
+		return;
+retry:
+	new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+	if (!new) {
+		cond_resched();
+		goto retry;
+	}
+	new->inode = inode;
+	INIT_LIST_HEAD(&new->list);
+
+	spin_lock(&sbi->dir_inode_lock);
+	list_for_each(this, head) {
+		struct dir_inode_entry *entry;
+		entry = list_entry(this, struct dir_inode_entry, list);
+		if (entry->inode == inode) {
+			kmem_cache_free(inode_entry_slab, new);
+			goto out;
+		}
+	}
+	list_add_tail(&new->list, head);
+	sbi->n_dirty_dirs++;
+
+	BUG_ON(!S_ISDIR(inode->i_mode));
+out:
+	inc_page_count(sbi, F2FS_DIRTY_DENTS);
+	inode_inc_dirty_dents(inode);
+	SetPagePrivate(page);
+
+	spin_unlock(&sbi->dir_inode_lock);
+}
+
+void remove_dirty_dir_inode(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct list_head *head = &sbi->dir_inode_list;
+	struct list_head *this;
+
+	if (!S_ISDIR(inode->i_mode))
+		return;
+
+	spin_lock(&sbi->dir_inode_lock);
+	if (atomic_read(&F2FS_I(inode)->dirty_dents))
+		goto out;
+
+	list_for_each(this, head) {
+		struct dir_inode_entry *entry;
+		entry = list_entry(this, struct dir_inode_entry, list);
+		if (entry->inode == inode) {
+			list_del(&entry->list);
+			kmem_cache_free(inode_entry_slab, entry);
+			sbi->n_dirty_dirs--;
+			break;
+		}
+	}
+out:
+	spin_unlock(&sbi->dir_inode_lock);
+}
+
+void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
+{
+	struct list_head *head = &sbi->dir_inode_list;
+	struct dir_inode_entry *entry;
+	struct inode *inode;
+retry:
+	spin_lock(&sbi->dir_inode_lock);
+	if (list_empty(head)) {
+		spin_unlock(&sbi->dir_inode_lock);
+		return;
+	}
+	entry = list_entry(head->next, struct dir_inode_entry, list);
+	inode = igrab(entry->inode);
+	spin_unlock(&sbi->dir_inode_lock);
+	if (inode) {
+		filemap_flush(inode->i_mapping);
+		iput(inode);
+	} else {
+		/*
+		 * We should submit bio, since it exists several
+		 * wribacking dentry pages in the freeing inode.
+		 */
+		f2fs_submit_bio(sbi, DATA, true);
+	}
+	goto retry;
+}
+
+/*
+ * Freeze all the FS-operations for checkpoint.
+ */
+void block_operations(struct f2fs_sb_info *sbi)
+{
+	int t;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.for_reclaim = 0,
+	};
+
+	/* Stop renaming operation */
+	mutex_lock_op(sbi, RENAME);
+	mutex_lock_op(sbi, DENTRY_OPS);
+
+retry_dents:
+	/* write all the dirty dentry pages */
+	sync_dirty_dir_inodes(sbi);
+
+	mutex_lock_op(sbi, DATA_WRITE);
+	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
+		mutex_unlock_op(sbi, DATA_WRITE);
+		goto retry_dents;
+	}
+
+	/* block all the operations */
+	for (t = DATA_NEW; t <= NODE_TRUNC; t++)
+		mutex_lock_op(sbi, t);
+
+	mutex_lock(&sbi->write_inode);
+
+	/*
+	 * POR: we should ensure that there is no dirty node pages
+	 * until finishing nat/sit flush.
+	 */
+retry:
+	sync_node_pages(sbi, 0, &wbc);
+
+	mutex_lock_op(sbi, NODE_WRITE);
+
+	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
+		mutex_unlock_op(sbi, NODE_WRITE);
+		goto retry;
+	}
+	mutex_unlock(&sbi->write_inode);
+}
+
+static void unblock_operations(struct f2fs_sb_info *sbi)
+{
+	int t;
+	for (t = NODE_WRITE; t >= RENAME; t--)
+		mutex_unlock_op(sbi, t);
+}
+
+static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	nid_t last_nid = 0;
+	block_t start_blk;
+	struct page *cp_page;
+	unsigned int data_sum_blocks, orphan_blocks;
+	unsigned int crc32 = 0;
+	void *kaddr;
+	int i;
+
+	/* Flush all the NAT/SIT pages */
+	while (get_pages(sbi, F2FS_DIRTY_META))
+		sync_meta_pages(sbi, META, LONG_MAX);
+
+	next_free_nid(sbi, &last_nid);
+
+	/*
+	 * modify checkpoint
+	 * version number is already updated
+	 */
+	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
+	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
+	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
+	for (i = 0; i < 3; i++) {
+		ckpt->cur_node_segno[i] =
+			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
+		ckpt->cur_node_blkoff[i] =
+			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
+		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
+				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
+	}
+	for (i = 0; i < 3; i++) {
+		ckpt->cur_data_segno[i] =
+			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
+		ckpt->cur_data_blkoff[i] =
+			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
+		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
+				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
+	}
+
+	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
+	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
+	ckpt->next_free_nid = cpu_to_le32(last_nid);
+
+	/* 2 cp  + n data seg summary + orphan inode blocks */
+	data_sum_blocks = npages_for_summary_flush(sbi);
+	if (data_sum_blocks < 3)
+		set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+	else
+		clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+
+	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
+					/ F2FS_ORPHANS_PER_BLOCK;
+	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
+
+	if (is_umount) {
+		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
+	} else {
+		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+			data_sum_blocks + orphan_blocks);
+	}
+
+	if (sbi->n_orphans)
+		set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+	else
+		clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+
+	/* update SIT/NAT bitmap */
+	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
+	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
+
+	crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
+	*(__le32 *)((unsigned char *)ckpt +
+				le32_to_cpu(ckpt->checksum_offset))
+				= cpu_to_le32(crc32);
+
+	start_blk = __start_cp_addr(sbi);
+
+	/* write out checkpoint buffer at block 0 */
+	cp_page = grab_meta_page(sbi, start_blk++);
+	kaddr = page_address(cp_page);
+	memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
+	set_page_dirty(cp_page);
+	f2fs_put_page(cp_page, 1);
+
+	if (sbi->n_orphans) {
+		write_orphan_inodes(sbi, start_blk);
+		start_blk += orphan_blocks;
+	}
+
+	write_data_summaries(sbi, start_blk);
+	start_blk += data_sum_blocks;
+	if (is_umount) {
+		write_node_summaries(sbi, start_blk);
+		start_blk += NR_CURSEG_NODE_TYPE;
+	}
+
+	/* writeout checkpoint block */
+	cp_page = grab_meta_page(sbi, start_blk);
+	kaddr = page_address(cp_page);
+	memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
+	set_page_dirty(cp_page);
+	f2fs_put_page(cp_page, 1);
+
+	/* wait for previous submitted node/meta pages writeback */
+	while (get_pages(sbi, F2FS_WRITEBACK))
+		congestion_wait(BLK_RW_ASYNC, HZ / 50);
+
+	filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
+	filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
+
+	/* update user_block_counts */
+	sbi->last_valid_block_count = sbi->total_valid_block_count;
+	sbi->alloc_valid_block_count = 0;
+
+	/* Here, we only have one bio having CP pack */
+	if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))
+		sbi->sb->s_flags |= MS_RDONLY;
+	else
+		sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
+
+	clear_prefree_segments(sbi);
+	F2FS_RESET_SB_DIRT(sbi);
+}
+
+/*
+ * We guarantee that this checkpoint procedure should not fail.
+ */
+void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	unsigned long long ckpt_ver;
+
+	if (!blocked) {
+		mutex_lock(&sbi->cp_mutex);
+		block_operations(sbi);
+	}
+
+	f2fs_submit_bio(sbi, DATA, true);
+	f2fs_submit_bio(sbi, NODE, true);
+	f2fs_submit_bio(sbi, META, true);
+
+	/*
+	 * update checkpoint pack index
+	 * Increase the version number so that
+	 * SIT entries and seg summaries are written at correct place
+	 */
+	ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver);
+	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
+
+	/* write cached NAT/SIT entries to NAT/SIT area */
+	flush_nat_entries(sbi);
+	flush_sit_entries(sbi);
+
+	reset_victim_segmap(sbi);
+
+	/* unlock all the fs_lock[] in do_checkpoint() */
+	do_checkpoint(sbi, is_umount);
+
+	unblock_operations(sbi);
+	mutex_unlock(&sbi->cp_mutex);
+}
+
+void init_orphan_info(struct f2fs_sb_info *sbi)
+{
+	mutex_init(&sbi->orphan_inode_mutex);
+	INIT_LIST_HEAD(&sbi->orphan_inode_list);
+	sbi->n_orphans = 0;
+}
+
+int create_checkpoint_caches(void)
+{
+	orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
+			sizeof(struct orphan_inode_entry), NULL);
+	if (unlikely(!orphan_entry_slab))
+		return -ENOMEM;
+	inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
+			sizeof(struct dir_inode_entry), NULL);
+	if (unlikely(!inode_entry_slab)) {
+		kmem_cache_destroy(orphan_entry_slab);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void destroy_checkpoint_caches(void)
+{
+	kmem_cache_destroy(orphan_entry_slab);
+	kmem_cache_destroy(inode_entry_slab);
+}

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
new file mode 100644
index 0000000..655aeab
--- /dev/null
+++ b/fs/f2fs/data.c

@@ -0,0 +1,702 @@
+/*
+ * fs/f2fs/data.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+/*
+ * Lock ordering for the change of data block address:
+ * ->data_page
+ *  ->node_page
+ *    update block addresses in the node page
+ */
+static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
+{
+	struct f2fs_node *rn;
+	__le32 *addr_array;
+	struct page *node_page = dn->node_page;
+	unsigned int ofs_in_node = dn->ofs_in_node;
+
+	wait_on_page_writeback(node_page);
+
+	rn = (struct f2fs_node *)page_address(node_page);
+
+	/* Get physical address of data block */
+	addr_array = blkaddr_in_node(rn);
+	addr_array[ofs_in_node] = cpu_to_le32(new_addr);
+	set_page_dirty(node_page);
+}
+
+int reserve_new_block(struct dnode_of_data *dn)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+
+	if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+		return -EPERM;
+	if (!inc_valid_block_count(sbi, dn->inode, 1))
+		return -ENOSPC;
+
+	__set_data_blkaddr(dn, NEW_ADDR);
+	dn->data_blkaddr = NEW_ADDR;
+	sync_inode_page(dn);
+	return 0;
+}
+
+static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
+					struct buffer_head *bh_result)
+{
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	pgoff_t start_fofs, end_fofs;
+	block_t start_blkaddr;
+
+	read_lock(&fi->ext.ext_lock);
+	if (fi->ext.len == 0) {
+		read_unlock(&fi->ext.ext_lock);
+		return 0;
+	}
+
+	sbi->total_hit_ext++;
+	start_fofs = fi->ext.fofs;
+	end_fofs = fi->ext.fofs + fi->ext.len - 1;
+	start_blkaddr = fi->ext.blk_addr;
+
+	if (pgofs >= start_fofs && pgofs <= end_fofs) {
+		unsigned int blkbits = inode->i_sb->s_blocksize_bits;
+		size_t count;
+
+		clear_buffer_new(bh_result);
+		map_bh(bh_result, inode->i_sb,
+				start_blkaddr + pgofs - start_fofs);
+		count = end_fofs - pgofs + 1;
+		if (count < (UINT_MAX >> blkbits))
+			bh_result->b_size = (count << blkbits);
+		else
+			bh_result->b_size = UINT_MAX;
+
+		sbi->read_hit_ext++;
+		read_unlock(&fi->ext.ext_lock);
+		return 1;
+	}
+	read_unlock(&fi->ext.ext_lock);
+	return 0;
+}
+
+void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
+{
+	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
+	pgoff_t fofs, start_fofs, end_fofs;
+	block_t start_blkaddr, end_blkaddr;
+
+	BUG_ON(blk_addr == NEW_ADDR);
+	fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node;
+
+	/* Update the page address in the parent node */
+	__set_data_blkaddr(dn, blk_addr);
+
+	write_lock(&fi->ext.ext_lock);
+
+	start_fofs = fi->ext.fofs;
+	end_fofs = fi->ext.fofs + fi->ext.len - 1;
+	start_blkaddr = fi->ext.blk_addr;
+	end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
+
+	/* Drop and initialize the matched extent */
+	if (fi->ext.len == 1 && fofs == start_fofs)
+		fi->ext.len = 0;
+
+	/* Initial extent */
+	if (fi->ext.len == 0) {
+		if (blk_addr != NULL_ADDR) {
+			fi->ext.fofs = fofs;
+			fi->ext.blk_addr = blk_addr;
+			fi->ext.len = 1;
+		}
+		goto end_update;
+	}
+
+	/* Frone merge */
+	if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
+		fi->ext.fofs--;
+		fi->ext.blk_addr--;
+		fi->ext.len++;
+		goto end_update;
+	}
+
+	/* Back merge */
+	if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
+		fi->ext.len++;
+		goto end_update;
+	}
+
+	/* Split the existing extent */
+	if (fi->ext.len > 1 &&
+		fofs >= start_fofs && fofs <= end_fofs) {
+		if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
+			fi->ext.len = fofs - start_fofs;
+		} else {
+			fi->ext.fofs = fofs + 1;
+			fi->ext.blk_addr = start_blkaddr +
+					fofs - start_fofs + 1;
+			fi->ext.len -= fofs - start_fofs + 1;
+		}
+		goto end_update;
+	}
+	write_unlock(&fi->ext.ext_lock);
+	return;
+
+end_update:
+	write_unlock(&fi->ext.ext_lock);
+	sync_inode_page(dn);
+	return;
+}
+
+struct page *find_data_page(struct inode *inode, pgoff_t index)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	struct dnode_of_data dn;
+	struct page *page;
+	int err;
+
+	page = find_get_page(mapping, index);
+	if (page && PageUptodate(page))
+		return page;
+	f2fs_put_page(page, 0);
+
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, index, RDONLY_NODE);
+	if (err)
+		return ERR_PTR(err);
+	f2fs_put_dnode(&dn);
+
+	if (dn.data_blkaddr == NULL_ADDR)
+		return ERR_PTR(-ENOENT);
+
+	/* By fallocate(), there is no cached page, but with NEW_ADDR */
+	if (dn.data_blkaddr == NEW_ADDR)
+		return ERR_PTR(-EINVAL);
+
+	page = grab_cache_page(mapping, index);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
+	if (err) {
+		f2fs_put_page(page, 1);
+		return ERR_PTR(err);
+	}
+	unlock_page(page);
+	return page;
+}
+
+/*
+ * If it tries to access a hole, return an error.
+ * Because, the callers, functions in dir.c and GC, should be able to know
+ * whether this page exists or not.
+ */
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	struct dnode_of_data dn;
+	struct page *page;
+	int err;
+
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, index, RDONLY_NODE);
+	if (err)
+		return ERR_PTR(err);
+	f2fs_put_dnode(&dn);
+
+	if (dn.data_blkaddr == NULL_ADDR)
+		return ERR_PTR(-ENOENT);
+
+	page = grab_cache_page(mapping, index);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	if (PageUptodate(page))
+		return page;
+
+	BUG_ON(dn.data_blkaddr == NEW_ADDR);
+	BUG_ON(dn.data_blkaddr == NULL_ADDR);
+
+	err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
+	if (err) {
+		f2fs_put_page(page, 1);
+		return ERR_PTR(err);
+	}
+	return page;
+}
+
+/*
+ * Caller ensures that this data page is never allocated.
+ * A new zero-filled data page is allocated in the page cache.
+ */
+struct page *get_new_data_page(struct inode *inode, pgoff_t index,
+						bool new_i_size)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page;
+	struct dnode_of_data dn;
+	int err;
+
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, index, 0);
+	if (err)
+		return ERR_PTR(err);
+
+	if (dn.data_blkaddr == NULL_ADDR) {
+		if (reserve_new_block(&dn)) {
+			f2fs_put_dnode(&dn);
+			return ERR_PTR(-ENOSPC);
+		}
+	}
+	f2fs_put_dnode(&dn);
+
+	page = grab_cache_page(mapping, index);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	if (PageUptodate(page))
+		return page;
+
+	if (dn.data_blkaddr == NEW_ADDR) {
+		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	} else {
+		err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
+		if (err) {
+			f2fs_put_page(page, 1);
+			return ERR_PTR(err);
+		}
+	}
+	SetPageUptodate(page);
+
+	if (new_i_size &&
+		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
+		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
+		mark_inode_dirty_sync(inode);
+	}
+	return page;
+}
+
+static void read_end_io(struct bio *bio, int err)
+{
+	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+
+	do {
+		struct page *page = bvec->bv_page;
+
+		if (--bvec >= bio->bi_io_vec)
+			prefetchw(&bvec->bv_page->flags);
+
+		if (uptodate) {
+			SetPageUptodate(page);
+		} else {
+			ClearPageUptodate(page);
+			SetPageError(page);
+		}
+		unlock_page(page);
+	} while (bvec >= bio->bi_io_vec);
+	kfree(bio->bi_private);
+	bio_put(bio);
+}
+
+/*
+ * Fill the locked page with data located in the block address.
+ * Read operation is synchronous, and caller must unlock the page.
+ */
+int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
+					block_t blk_addr, int type)
+{
+	struct block_device *bdev = sbi->sb->s_bdev;
+	bool sync = (type == READ_SYNC);
+	struct bio *bio;
+
+	/* This page can be already read by other threads */
+	if (PageUptodate(page)) {
+		if (!sync)
+			unlock_page(page);
+		return 0;
+	}
+
+	down_read(&sbi->bio_sem);
+
+	/* Allocate a new bio */
+	bio = f2fs_bio_alloc(bdev, 1);
+
+	/* Initialize the bio */
+	bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+	bio->bi_end_io = read_end_io;
+
+	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+		kfree(bio->bi_private);
+		bio_put(bio);
+		up_read(&sbi->bio_sem);
+		return -EFAULT;
+	}
+
+	submit_bio(type, bio);
+	up_read(&sbi->bio_sem);
+
+	/* wait for read completion if sync */
+	if (sync) {
+		lock_page(page);
+		if (PageError(page))
+			return -EIO;
+	}
+	return 0;
+}
+
+/*
+ * This function should be used by the data read flow only where it
+ * does not check the "create" flag that indicates block allocation.
+ * The reason for this special functionality is to exploit VFS readahead
+ * mechanism.
+ */
+static int get_data_block_ro(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
+{
+	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
+	unsigned maxblocks = bh_result->b_size >> blkbits;
+	struct dnode_of_data dn;
+	pgoff_t pgofs;
+	int err;
+
+	/* Get the page offset from the block offset(iblock) */
+	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
+
+	if (check_extent_cache(inode, pgofs, bh_result))
+		return 0;
+
+	/* When reading holes, we need its node page */
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE);
+	if (err)
+		return (err == -ENOENT) ? 0 : err;
+
+	/* It does not support data allocation */
+	BUG_ON(create);
+
+	if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
+		int i;
+		unsigned int end_offset;
+
+		end_offset = IS_INODE(dn.node_page) ?
+				ADDRS_PER_INODE :
+				ADDRS_PER_BLOCK;
+
+		clear_buffer_new(bh_result);
+
+		/* Give more consecutive addresses for the read ahead */
+		for (i = 0; i < end_offset - dn.ofs_in_node; i++)
+			if (((datablock_addr(dn.node_page,
+							dn.ofs_in_node + i))
+				!= (dn.data_blkaddr + i)) || maxblocks == i)
+				break;
+		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+		bh_result->b_size = (i << blkbits);
+	}
+	f2fs_put_dnode(&dn);
+	return 0;
+}
+
+static int f2fs_read_data_page(struct file *file, struct page *page)
+{
+	return mpage_readpage(page, get_data_block_ro);
+}
+
+static int f2fs_read_data_pages(struct file *file,
+			struct address_space *mapping,
+			struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
+}
+
+int do_write_data_page(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	block_t old_blk_addr, new_blk_addr;
+	struct dnode_of_data dn;
+	int err = 0;
+
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, page->index, RDONLY_NODE);
+	if (err)
+		return err;
+
+	old_blk_addr = dn.data_blkaddr;
+
+	/* This page is already truncated */
+	if (old_blk_addr == NULL_ADDR)
+		goto out_writepage;
+
+	set_page_writeback(page);
+
+	/*
+	 * If current allocation needs SSR,
+	 * it had better in-place writes for updated data.
+	 */
+	if (old_blk_addr != NEW_ADDR && !is_cold_data(page) &&
+				need_inplace_update(inode)) {
+		rewrite_data_page(F2FS_SB(inode->i_sb), page,
+						old_blk_addr);
+	} else {
+		write_data_page(inode, page, &dn,
+				old_blk_addr, &new_blk_addr);
+		update_extent_cache(new_blk_addr, &dn);
+		F2FS_I(inode)->data_version =
+			le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
+	}
+out_writepage:
+	f2fs_put_dnode(&dn);
+	return err;
+}
+
+static int f2fs_write_data_page(struct page *page,
+					struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	loff_t i_size = i_size_read(inode);
+	const pgoff_t end_index = ((unsigned long long) i_size)
+							>> PAGE_CACHE_SHIFT;
+	unsigned offset;
+	int err = 0;
+
+	if (page->index < end_index)
+		goto out;
+
+	/*
+	 * If the offset is out-of-range of file size,
+	 * this page does not have to be written to disk.
+	 */
+	offset = i_size & (PAGE_CACHE_SIZE - 1);
+	if ((page->index >= end_index + 1) || !offset) {
+		if (S_ISDIR(inode->i_mode)) {
+			dec_page_count(sbi, F2FS_DIRTY_DENTS);
+			inode_dec_dirty_dents(inode);
+		}
+		goto unlock_out;
+	}
+
+	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+out:
+	if (sbi->por_doing)
+		goto redirty_out;
+
+	if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page))
+		goto redirty_out;
+
+	mutex_lock_op(sbi, DATA_WRITE);
+	if (S_ISDIR(inode->i_mode)) {
+		dec_page_count(sbi, F2FS_DIRTY_DENTS);
+		inode_dec_dirty_dents(inode);
+	}
+	err = do_write_data_page(page);
+	if (err && err != -ENOENT) {
+		wbc->pages_skipped++;
+		set_page_dirty(page);
+	}
+	mutex_unlock_op(sbi, DATA_WRITE);
+
+	if (wbc->for_reclaim)
+		f2fs_submit_bio(sbi, DATA, true);
+
+	if (err == -ENOENT)
+		goto unlock_out;
+
+	clear_cold_data(page);
+	unlock_page(page);
+
+	if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode))
+		f2fs_balance_fs(sbi);
+	return 0;
+
+unlock_out:
+	unlock_page(page);
+	return (err == -ENOENT) ? 0 : err;
+
+redirty_out:
+	wbc->pages_skipped++;
+	set_page_dirty(page);
+	return AOP_WRITEPAGE_ACTIVATE;
+}
+
+#define MAX_DESIRED_PAGES_WP	4096
+
+static int f2fs_write_data_pages(struct address_space *mapping,
+			    struct writeback_control *wbc)
+{
+	struct inode *inode = mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	int ret;
+	long excess_nrtw = 0, desired_nrtw;
+
+	if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
+		desired_nrtw = MAX_DESIRED_PAGES_WP;
+		excess_nrtw = desired_nrtw - wbc->nr_to_write;
+		wbc->nr_to_write = desired_nrtw;
+	}
+
+	if (!S_ISDIR(inode->i_mode))
+		mutex_lock(&sbi->writepages);
+	ret = generic_writepages(mapping, wbc);
+	if (!S_ISDIR(inode->i_mode))
+		mutex_unlock(&sbi->writepages);
+	f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
+
+	remove_dirty_dir_inode(inode);
+
+	wbc->nr_to_write -= excess_nrtw;
+	return ret;
+}
+
+static int f2fs_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
+{
+	struct inode *inode = mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct page *page;
+	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
+	struct dnode_of_data dn;
+	int err = 0;
+
+	/* for nobh_write_end */
+	*fsdata = NULL;
+
+	f2fs_balance_fs(sbi);
+
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+
+	mutex_lock_op(sbi, DATA_NEW);
+
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, index, 0);
+	if (err) {
+		mutex_unlock_op(sbi, DATA_NEW);
+		f2fs_put_page(page, 1);
+		return err;
+	}
+
+	if (dn.data_blkaddr == NULL_ADDR) {
+		err = reserve_new_block(&dn);
+		if (err) {
+			f2fs_put_dnode(&dn);
+			mutex_unlock_op(sbi, DATA_NEW);
+			f2fs_put_page(page, 1);
+			return err;
+		}
+	}
+	f2fs_put_dnode(&dn);
+
+	mutex_unlock_op(sbi, DATA_NEW);
+
+	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
+		return 0;
+
+	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned end = start + len;
+
+		/* Reading beyond i_size is simple: memset to zero */
+		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+		return 0;
+	}
+
+	if (dn.data_blkaddr == NEW_ADDR) {
+		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	} else {
+		err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
+		if (err) {
+			f2fs_put_page(page, 1);
+			return err;
+		}
+	}
+	SetPageUptodate(page);
+	clear_cold_data(page);
+	return 0;
+}
+
+static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
+		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+
+	if (rw == WRITE)
+		return 0;
+
+	/* Needs synchronization with the cleaner */
+	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+						  get_data_block_ro);
+}
+
+static void f2fs_invalidate_data_page(struct page *page, unsigned long offset)
+{
+	struct inode *inode = page->mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
+		dec_page_count(sbi, F2FS_DIRTY_DENTS);
+		inode_dec_dirty_dents(inode);
+	}
+	ClearPagePrivate(page);
+}
+
+static int f2fs_release_data_page(struct page *page, gfp_t wait)
+{
+	ClearPagePrivate(page);
+	return 0;
+}
+
+static int f2fs_set_data_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+
+	SetPageUptodate(page);
+	if (!PageDirty(page)) {
+		__set_page_dirty_nobuffers(page);
+		set_dirty_dir_page(inode, page);
+		return 1;
+	}
+	return 0;
+}
+
+const struct address_space_operations f2fs_dblock_aops = {
+	.readpage	= f2fs_read_data_page,
+	.readpages	= f2fs_read_data_pages,
+	.writepage	= f2fs_write_data_page,
+	.writepages	= f2fs_write_data_pages,
+	.write_begin	= f2fs_write_begin,
+	.write_end	= nobh_write_end,
+	.set_page_dirty	= f2fs_set_data_page_dirty,
+	.invalidatepage	= f2fs_invalidate_data_page,
+	.releasepage	= f2fs_release_data_page,
+	.direct_IO	= f2fs_direct_IO,
+};

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
new file mode 100644
index 0000000..0e0380a
--- /dev/null
+++ b/fs/f2fs/debug.c

@@ -0,0 +1,361 @@
+/*
+ * f2fs debugging statistics
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ * Copyright (c) 2012 Linux Foundation
+ * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/backing-dev.h>
+#include <linux/proc_fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/blkdev.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "gc.h"
+
+static LIST_HEAD(f2fs_stat_list);
+static struct dentry *debugfs_root;
+
+static void update_general_status(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_stat_info *si = sbi->stat_info;
+	int i;
+
+	/* valid check of the segment numbers */
+	si->hit_ext = sbi->read_hit_ext;
+	si->total_ext = sbi->total_hit_ext;
+	si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
+	si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
+	si->ndirty_dirs = sbi->n_dirty_dirs;
+	si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
+	si->rsvd_segs = reserved_segments(sbi);
+	si->overp_segs = overprovision_segments(sbi);
+	si->valid_count = valid_user_blocks(sbi);
+	si->valid_node_count = valid_node_count(sbi);
+	si->valid_inode_count = valid_inode_count(sbi);
+	si->utilization = utilization(sbi);
+
+	si->free_segs = free_segments(sbi);
+	si->free_secs = free_sections(sbi);
+	si->prefree_count = prefree_segments(sbi);
+	si->dirty_count = dirty_segments(sbi);
+	si->node_pages = sbi->node_inode->i_mapping->nrpages;
+	si->meta_pages = sbi->meta_inode->i_mapping->nrpages;
+	si->nats = NM_I(sbi)->nat_cnt;
+	si->sits = SIT_I(sbi)->dirty_sentries;
+	si->fnids = NM_I(sbi)->fcnt;
+	si->bg_gc = sbi->bg_gc;
+	si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
+		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
+		/ 2;
+	si->util_valid = (int)(written_block_count(sbi) >>
+						sbi->log_blocks_per_seg)
+		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
+		/ 2;
+	si->util_invalid = 50 - si->util_free - si->util_valid;
+	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
+		struct curseg_info *curseg = CURSEG_I(sbi, i);
+		si->curseg[i] = curseg->segno;
+		si->cursec[i] = curseg->segno / sbi->segs_per_sec;
+		si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
+	}
+
+	for (i = 0; i < 2; i++) {
+		si->segment_count[i] = sbi->segment_count[i];
+		si->block_count[i] = sbi->block_count[i];
+	}
+}
+
+/*
+ * This function calculates BDF of every segments
+ */
+static void update_sit_info(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_stat_info *si = sbi->stat_info;
+	unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned int segno, vblocks;
+	int ndirty = 0;
+
+	bimodal = 0;
+	total_vblocks = 0;
+	blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
+	hblks_per_sec = blks_per_sec / 2;
+	mutex_lock(&sit_i->sentry_lock);
+	for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
+		vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+		dist = abs(vblocks - hblks_per_sec);
+		bimodal += dist * dist;
+
+		if (vblocks > 0 && vblocks < blks_per_sec) {
+			total_vblocks += vblocks;
+			ndirty++;
+		}
+	}
+	mutex_unlock(&sit_i->sentry_lock);
+	dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100;
+	si->bimodal = bimodal / dist;
+	if (si->dirty_count)
+		si->avg_vblocks = total_vblocks / ndirty;
+	else
+		si->avg_vblocks = 0;
+}
+
+/*
+ * This function calculates memory footprint.
+ */
+static void update_mem_info(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_stat_info *si = sbi->stat_info;
+	unsigned npages;
+
+	if (si->base_mem)
+		goto get_cache;
+
+	si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
+	si->base_mem += 2 * sizeof(struct f2fs_inode_info);
+	si->base_mem += sizeof(*sbi->ckpt);
+
+	/* build sm */
+	si->base_mem += sizeof(struct f2fs_sm_info);
+
+	/* build sit */
+	si->base_mem += sizeof(struct sit_info);
+	si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry);
+	si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
+	si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi);
+	if (sbi->segs_per_sec > 1)
+		si->base_mem += sbi->total_sections *
+			sizeof(struct sec_entry);
+	si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
+
+	/* build free segmap */
+	si->base_mem += sizeof(struct free_segmap_info);
+	si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
+	si->base_mem += f2fs_bitmap_size(sbi->total_sections);
+
+	/* build curseg */
+	si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
+	si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
+
+	/* build dirty segmap */
+	si->base_mem += sizeof(struct dirty_seglist_info);
+	si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
+	si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi));
+
+	/* buld nm */
+	si->base_mem += sizeof(struct f2fs_nm_info);
+	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+
+	/* build gc */
+	si->base_mem += sizeof(struct f2fs_gc_kthread);
+
+get_cache:
+	/* free nids */
+	si->cache_mem = NM_I(sbi)->fcnt;
+	si->cache_mem += NM_I(sbi)->nat_cnt;
+	npages = sbi->node_inode->i_mapping->nrpages;
+	si->cache_mem += npages << PAGE_CACHE_SHIFT;
+	npages = sbi->meta_inode->i_mapping->nrpages;
+	si->cache_mem += npages << PAGE_CACHE_SHIFT;
+	si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
+	si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
+}
+
+static int stat_show(struct seq_file *s, void *v)
+{
+	struct f2fs_stat_info *si, *next;
+	int i = 0;
+	int j;
+
+	list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
+
+		mutex_lock(&si->stat_lock);
+		if (!si->sbi) {
+			mutex_unlock(&si->stat_lock);
+			continue;
+		}
+		update_general_status(si->sbi);
+
+		seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++);
+		seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ",
+			   si->nat_area_segs, si->sit_area_segs);
+		seq_printf(s, "[SSA: %d] [MAIN: %d",
+			   si->ssa_area_segs, si->main_area_segs);
+		seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
+			   si->overp_segs, si->rsvd_segs);
+		seq_printf(s, "Utilization: %d%% (%d valid blocks)\n",
+			   si->utilization, si->valid_count);
+		seq_printf(s, "  - Node: %u (Inode: %u, ",
+			   si->valid_node_count, si->valid_inode_count);
+		seq_printf(s, "Other: %u)\n  - Data: %u\n",
+			   si->valid_node_count - si->valid_inode_count,
+			   si->valid_count - si->valid_node_count);
+		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
+			   si->main_area_segs, si->main_area_sections,
+			   si->main_area_zones);
+		seq_printf(s, "  - COLD  data: %d, %d, %d\n",
+			   si->curseg[CURSEG_COLD_DATA],
+			   si->cursec[CURSEG_COLD_DATA],
+			   si->curzone[CURSEG_COLD_DATA]);
+		seq_printf(s, "  - WARM  data: %d, %d, %d\n",
+			   si->curseg[CURSEG_WARM_DATA],
+			   si->cursec[CURSEG_WARM_DATA],
+			   si->curzone[CURSEG_WARM_DATA]);
+		seq_printf(s, "  - HOT   data: %d, %d, %d\n",
+			   si->curseg[CURSEG_HOT_DATA],
+			   si->cursec[CURSEG_HOT_DATA],
+			   si->curzone[CURSEG_HOT_DATA]);
+		seq_printf(s, "  - Dir   dnode: %d, %d, %d\n",
+			   si->curseg[CURSEG_HOT_NODE],
+			   si->cursec[CURSEG_HOT_NODE],
+			   si->curzone[CURSEG_HOT_NODE]);
+		seq_printf(s, "  - File   dnode: %d, %d, %d\n",
+			   si->curseg[CURSEG_WARM_NODE],
+			   si->cursec[CURSEG_WARM_NODE],
+			   si->curzone[CURSEG_WARM_NODE]);
+		seq_printf(s, "  - Indir nodes: %d, %d, %d\n",
+			   si->curseg[CURSEG_COLD_NODE],
+			   si->cursec[CURSEG_COLD_NODE],
+			   si->curzone[CURSEG_COLD_NODE]);
+		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
+			   si->main_area_segs - si->dirty_count -
+			   si->prefree_count - si->free_segs,
+			   si->dirty_count);
+		seq_printf(s, "  - Prefree: %d\n  - Free: %d (%d)\n\n",
+			   si->prefree_count, si->free_segs, si->free_secs);
+		seq_printf(s, "GC calls: %d (BG: %d)\n",
+			   si->call_count, si->bg_gc);
+		seq_printf(s, "  - data segments : %d\n", si->data_segs);
+		seq_printf(s, "  - node segments : %d\n", si->node_segs);
+		seq_printf(s, "Try to move %d blocks\n", si->tot_blks);
+		seq_printf(s, "  - data blocks : %d\n", si->data_blks);
+		seq_printf(s, "  - node blocks : %d\n", si->node_blks);
+		seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
+			   si->hit_ext, si->total_ext);
+		seq_printf(s, "\nBalancing F2FS Async:\n");
+		seq_printf(s, "  - nodes %4d in %4d\n",
+			   si->ndirty_node, si->node_pages);
+		seq_printf(s, "  - dents %4d in dirs:%4d\n",
+			   si->ndirty_dent, si->ndirty_dirs);
+		seq_printf(s, "  - meta %4d in %4d\n",
+			   si->ndirty_meta, si->meta_pages);
+		seq_printf(s, "  - NATs %5d > %lu\n",
+			   si->nats, NM_WOUT_THRESHOLD);
+		seq_printf(s, "  - SITs: %5d\n  - free_nids: %5d\n",
+			   si->sits, si->fnids);
+		seq_printf(s, "\nDistribution of User Blocks:");
+		seq_printf(s, " [ valid | invalid | free ]\n");
+		seq_printf(s, "  [");
+
+		for (j = 0; j < si->util_valid; j++)
+			seq_printf(s, "-");
+		seq_printf(s, "|");
+
+		for (j = 0; j < si->util_invalid; j++)
+			seq_printf(s, "-");
+		seq_printf(s, "|");
+
+		for (j = 0; j < si->util_free; j++)
+			seq_printf(s, "-");
+		seq_printf(s, "]\n\n");
+		seq_printf(s, "SSR: %u blocks in %u segments\n",
+			   si->block_count[SSR], si->segment_count[SSR]);
+		seq_printf(s, "LFS: %u blocks in %u segments\n",
+			   si->block_count[LFS], si->segment_count[LFS]);
+
+		/* segment usage info */
+		update_sit_info(si->sbi);
+		seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n",
+			   si->bimodal, si->avg_vblocks);
+
+		/* memory footprint */
+		update_mem_info(si->sbi);
+		seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
+				(si->base_mem + si->cache_mem) >> 10,
+				si->base_mem >> 10, si->cache_mem >> 10);
+		mutex_unlock(&si->stat_lock);
+	}
+	return 0;
+}
+
+static int stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, stat_show, inode->i_private);
+}
+
+static const struct file_operations stat_fops = {
+	.open = stat_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int init_stats(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	struct f2fs_stat_info *si;
+
+	sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
+	if (!sbi->stat_info)
+		return -ENOMEM;
+
+	si = sbi->stat_info;
+	mutex_init(&si->stat_lock);
+	list_add_tail(&si->stat_list, &f2fs_stat_list);
+
+	si->all_area_segs = le32_to_cpu(raw_super->segment_count);
+	si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
+	si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
+	si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa);
+	si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
+	si->main_area_sections = le32_to_cpu(raw_super->section_count);
+	si->main_area_zones = si->main_area_sections /
+				le32_to_cpu(raw_super->secs_per_zone);
+	si->sbi = sbi;
+	return 0;
+}
+
+int f2fs_build_stats(struct f2fs_sb_info *sbi)
+{
+	int retval;
+
+	retval = init_stats(sbi);
+	if (retval)
+		return retval;
+
+	if (!debugfs_root)
+		debugfs_root = debugfs_create_dir("f2fs", NULL);
+
+	debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
+	return 0;
+}
+
+void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_stat_info *si = sbi->stat_info;
+
+	list_del(&si->stat_list);
+	mutex_lock(&si->stat_lock);
+	si->sbi = NULL;
+	mutex_unlock(&si->stat_lock);
+	kfree(sbi->stat_info);
+}
+
+void destroy_root_stats(void)
+{
+	debugfs_remove_recursive(debugfs_root);
+	debugfs_root = NULL;
+}

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
new file mode 100644
index 0000000..b4e24f3
--- /dev/null
+++ b/fs/f2fs/dir.c

@@ -0,0 +1,672 @@
+/*
+ * fs/f2fs/dir.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include "f2fs.h"
+#include "acl.h"
+
+static unsigned long dir_blocks(struct inode *inode)
+{
+	return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
+							>> PAGE_CACHE_SHIFT;
+}
+
+static unsigned int dir_buckets(unsigned int level)
+{
+	if (level < MAX_DIR_HASH_DEPTH / 2)
+		return 1 << level;
+	else
+		return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
+}
+
+static unsigned int bucket_blocks(unsigned int level)
+{
+	if (level < MAX_DIR_HASH_DEPTH / 2)
+		return 2;
+	else
+		return 4;
+}
+
+static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+	[F2FS_FT_UNKNOWN]	= DT_UNKNOWN,
+	[F2FS_FT_REG_FILE]	= DT_REG,
+	[F2FS_FT_DIR]		= DT_DIR,
+	[F2FS_FT_CHRDEV]	= DT_CHR,
+	[F2FS_FT_BLKDEV]	= DT_BLK,
+	[F2FS_FT_FIFO]		= DT_FIFO,
+	[F2FS_FT_SOCK]		= DT_SOCK,
+	[F2FS_FT_SYMLINK]	= DT_LNK,
+};
+
+#define S_SHIFT 12
+static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]	= F2FS_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]	= F2FS_FT_DIR,
+	[S_IFCHR >> S_SHIFT]	= F2FS_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]	= F2FS_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]	= F2FS_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]	= F2FS_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]	= F2FS_FT_SYMLINK,
+};
+
+static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+{
+	mode_t mode = inode->i_mode;
+	de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
+}
+
+static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+{
+	unsigned long i;
+	unsigned long bidx = 0;
+
+	for (i = 0; i < level; i++)
+		bidx += dir_buckets(i) * bucket_blocks(i);
+	bidx += idx * bucket_blocks(level);
+	return bidx;
+}
+
+static bool early_match_name(const char *name, int namelen,
+			f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+{
+	if (le16_to_cpu(de->name_len) != namelen)
+		return false;
+
+	if (de->hash_code != namehash)
+		return false;
+
+	return true;
+}
+
+static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
+			const char *name, int namelen, int *max_slots,
+			f2fs_hash_t namehash, struct page **res_page)
+{
+	struct f2fs_dir_entry *de;
+	unsigned long bit_pos, end_pos, next_pos;
+	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
+	int slots;
+
+	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+					NR_DENTRY_IN_BLOCK, 0);
+	while (bit_pos < NR_DENTRY_IN_BLOCK) {
+		de = &dentry_blk->dentry[bit_pos];
+		slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+
+		if (early_match_name(name, namelen, namehash, de)) {
+			if (!memcmp(dentry_blk->filename[bit_pos],
+							name, namelen)) {
+				*res_page = dentry_page;
+				goto found;
+			}
+		}
+		next_pos = bit_pos + slots;
+		bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+				NR_DENTRY_IN_BLOCK, next_pos);
+		if (bit_pos >= NR_DENTRY_IN_BLOCK)
+			end_pos = NR_DENTRY_IN_BLOCK;
+		else
+			end_pos = bit_pos;
+		if (*max_slots < end_pos - next_pos)
+			*max_slots = end_pos - next_pos;
+	}
+
+	de = NULL;
+	kunmap(dentry_page);
+found:
+	return de;
+}
+
+static struct f2fs_dir_entry *find_in_level(struct inode *dir,
+		unsigned int level, const char *name, int namelen,
+			f2fs_hash_t namehash, struct page **res_page)
+{
+	int s = GET_DENTRY_SLOTS(namelen);
+	unsigned int nbucket, nblock;
+	unsigned int bidx, end_block;
+	struct page *dentry_page;
+	struct f2fs_dir_entry *de = NULL;
+	bool room = false;
+	int max_slots = 0;
+
+	BUG_ON(level > MAX_DIR_HASH_DEPTH);
+
+	nbucket = dir_buckets(level);
+	nblock = bucket_blocks(level);
+
+	bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
+	end_block = bidx + nblock;
+
+	for (; bidx < end_block; bidx++) {
+		/* no need to allocate new dentry pages to all the indices */
+		dentry_page = find_data_page(dir, bidx);
+		if (IS_ERR(dentry_page)) {
+			room = true;
+			continue;
+		}
+
+		de = find_in_block(dentry_page, name, namelen,
+					&max_slots, namehash, res_page);
+		if (de)
+			break;
+
+		if (max_slots >= s)
+			room = true;
+		f2fs_put_page(dentry_page, 0);
+	}
+
+	if (!de && room && F2FS_I(dir)->chash != namehash) {
+		F2FS_I(dir)->chash = namehash;
+		F2FS_I(dir)->clevel = level;
+	}
+
+	return de;
+}
+
+/*
+ * Find an entry in the specified directory with the wanted name.
+ * It returns the page where the entry was found (as a parameter - res_page),
+ * and the entry itself. Page is returned mapped and unlocked.
+ * Entry is guaranteed to be valid.
+ */
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
+			struct qstr *child, struct page **res_page)
+{
+	const char *name = child->name;
+	int namelen = child->len;
+	unsigned long npages = dir_blocks(dir);
+	struct f2fs_dir_entry *de = NULL;
+	f2fs_hash_t name_hash;
+	unsigned int max_depth;
+	unsigned int level;
+
+	if (npages == 0)
+		return NULL;
+
+	*res_page = NULL;
+
+	name_hash = f2fs_dentry_hash(name, namelen);
+	max_depth = F2FS_I(dir)->i_current_depth;
+
+	for (level = 0; level < max_depth; level++) {
+		de = find_in_level(dir, level, name,
+				namelen, name_hash, res_page);
+		if (de)
+			break;
+	}
+	if (!de && F2FS_I(dir)->chash != name_hash) {
+		F2FS_I(dir)->chash = name_hash;
+		F2FS_I(dir)->clevel = level - 1;
+	}
+	return de;
+}
+
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
+{
+	struct page *page = NULL;
+	struct f2fs_dir_entry *de = NULL;
+	struct f2fs_dentry_block *dentry_blk = NULL;
+
+	page = get_lock_data_page(dir, 0);
+	if (IS_ERR(page))
+		return NULL;
+
+	dentry_blk = kmap(page);
+	de = &dentry_blk->dentry[1];
+	*p = page;
+	unlock_page(page);
+	return de;
+}
+
+ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
+{
+	ino_t res = 0;
+	struct f2fs_dir_entry *de;
+	struct page *page;
+
+	de = f2fs_find_entry(dir, qstr, &page);
+	if (de) {
+		res = le32_to_cpu(de->ino);
+		kunmap(page);
+		f2fs_put_page(page, 0);
+	}
+
+	return res;
+}
+
+void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
+		struct page *page, struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+
+	mutex_lock_op(sbi, DENTRY_OPS);
+	lock_page(page);
+	wait_on_page_writeback(page);
+	de->ino = cpu_to_le32(inode->i_ino);
+	set_de_type(de, inode);
+	kunmap(page);
+	set_page_dirty(page);
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(dir);
+
+	/* update parent inode number before releasing dentry page */
+	F2FS_I(inode)->i_pino = dir->i_ino;
+
+	f2fs_put_page(page, 1);
+	mutex_unlock_op(sbi, DENTRY_OPS);
+}
+
+void init_dent_inode(struct dentry *dentry, struct page *ipage)
+{
+	struct f2fs_node *rn;
+
+	if (IS_ERR(ipage))
+		return;
+
+	wait_on_page_writeback(ipage);
+
+	/* copy dentry info. to this inode page */
+	rn = (struct f2fs_node *)page_address(ipage);
+	rn->i.i_namelen = cpu_to_le32(dentry->d_name.len);
+	memcpy(rn->i.i_name, dentry->d_name.name, dentry->d_name.len);
+	set_page_dirty(ipage);
+}
+
+static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+
+	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
+		int err;
+		err = new_inode_page(inode, dentry);
+		if (err)
+			return err;
+
+		if (S_ISDIR(inode->i_mode)) {
+			err = f2fs_make_empty(inode, dir);
+			if (err) {
+				remove_inode_page(inode);
+				return err;
+			}
+		}
+
+		err = f2fs_init_acl(inode, dir);
+		if (err) {
+			remove_inode_page(inode);
+			return err;
+		}
+	} else {
+		struct page *ipage;
+		ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
+		if (IS_ERR(ipage))
+			return PTR_ERR(ipage);
+		init_dent_inode(dentry, ipage);
+		f2fs_put_page(ipage, 1);
+	}
+	if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
+		inc_nlink(inode);
+		f2fs_write_inode(inode, NULL);
+	}
+	return 0;
+}
+
+static void update_parent_metadata(struct inode *dir, struct inode *inode,
+						unsigned int current_depth)
+{
+	bool need_dir_update = false;
+
+	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
+		if (S_ISDIR(inode->i_mode)) {
+			inc_nlink(dir);
+			need_dir_update = true;
+		}
+		clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+	}
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	if (F2FS_I(dir)->i_current_depth != current_depth) {
+		F2FS_I(dir)->i_current_depth = current_depth;
+		need_dir_update = true;
+	}
+
+	if (need_dir_update)
+		f2fs_write_inode(dir, NULL);
+	else
+		mark_inode_dirty(dir);
+
+	if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
+		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+}
+
+static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
+{
+	int bit_start = 0;
+	int zero_start, zero_end;
+next:
+	zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap,
+						NR_DENTRY_IN_BLOCK,
+						bit_start);
+	if (zero_start >= NR_DENTRY_IN_BLOCK)
+		return NR_DENTRY_IN_BLOCK;
+
+	zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap,
+						NR_DENTRY_IN_BLOCK,
+						zero_start);
+	if (zero_end - zero_start >= slots)
+		return zero_start;
+
+	bit_start = zero_end + 1;
+
+	if (zero_end + 1 >= NR_DENTRY_IN_BLOCK)
+		return NR_DENTRY_IN_BLOCK;
+	goto next;
+}
+
+int f2fs_add_link(struct dentry *dentry, struct inode *inode)
+{
+	unsigned int bit_pos;
+	unsigned int level;
+	unsigned int current_depth;
+	unsigned long bidx, block;
+	f2fs_hash_t dentry_hash;
+	struct f2fs_dir_entry *de;
+	unsigned int nbucket, nblock;
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	struct page *dentry_page = NULL;
+	struct f2fs_dentry_block *dentry_blk = NULL;
+	int slots = GET_DENTRY_SLOTS(namelen);
+	int err = 0;
+	int i;
+
+	dentry_hash = f2fs_dentry_hash(name, dentry->d_name.len);
+	level = 0;
+	current_depth = F2FS_I(dir)->i_current_depth;
+	if (F2FS_I(dir)->chash == dentry_hash) {
+		level = F2FS_I(dir)->clevel;
+		F2FS_I(dir)->chash = 0;
+	}
+
+start:
+	if (current_depth == MAX_DIR_HASH_DEPTH)
+		return -ENOSPC;
+
+	/* Increase the depth, if required */
+	if (level == current_depth)
+		++current_depth;
+
+	nbucket = dir_buckets(level);
+	nblock = bucket_blocks(level);
+
+	bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
+
+	for (block = bidx; block <= (bidx + nblock - 1); block++) {
+		mutex_lock_op(sbi, DENTRY_OPS);
+		dentry_page = get_new_data_page(dir, block, true);
+		if (IS_ERR(dentry_page)) {
+			mutex_unlock_op(sbi, DENTRY_OPS);
+			return PTR_ERR(dentry_page);
+		}
+
+		dentry_blk = kmap(dentry_page);
+		bit_pos = room_for_filename(dentry_blk, slots);
+		if (bit_pos < NR_DENTRY_IN_BLOCK)
+			goto add_dentry;
+
+		kunmap(dentry_page);
+		f2fs_put_page(dentry_page, 1);
+		mutex_unlock_op(sbi, DENTRY_OPS);
+	}
+
+	/* Move to next level to find the empty slot for new dentry */
+	++level;
+	goto start;
+add_dentry:
+	err = init_inode_metadata(inode, dentry);
+	if (err)
+		goto fail;
+
+	wait_on_page_writeback(dentry_page);
+
+	de = &dentry_blk->dentry[bit_pos];
+	de->hash_code = dentry_hash;
+	de->name_len = cpu_to_le16(namelen);
+	memcpy(dentry_blk->filename[bit_pos], name, namelen);
+	de->ino = cpu_to_le32(inode->i_ino);
+	set_de_type(de, inode);
+	for (i = 0; i < slots; i++)
+		test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+	set_page_dirty(dentry_page);
+
+	update_parent_metadata(dir, inode, current_depth);
+
+	/* update parent inode number before releasing dentry page */
+	F2FS_I(inode)->i_pino = dir->i_ino;
+fail:
+	kunmap(dentry_page);
+	f2fs_put_page(dentry_page, 1);
+	mutex_unlock_op(sbi, DENTRY_OPS);
+	return err;
+}
+
+/*
+ * It only removes the dentry from the dentry page,corresponding name
+ * entry in name page does not need to be touched during deletion.
+ */
+void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
+						struct inode *inode)
+{
+	struct	f2fs_dentry_block *dentry_blk;
+	unsigned int bit_pos;
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+	void *kaddr = page_address(page);
+	int i;
+
+	mutex_lock_op(sbi, DENTRY_OPS);
+
+	lock_page(page);
+	wait_on_page_writeback(page);
+
+	dentry_blk = (struct f2fs_dentry_block *)kaddr;
+	bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
+	for (i = 0; i < slots; i++)
+		test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+
+	/* Let's check and deallocate this dentry page */
+	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+			NR_DENTRY_IN_BLOCK,
+			0);
+	kunmap(page); /* kunmap - pair of f2fs_find_entry */
+	set_page_dirty(page);
+
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+	if (inode && S_ISDIR(inode->i_mode)) {
+		drop_nlink(dir);
+		f2fs_write_inode(dir, NULL);
+	} else {
+		mark_inode_dirty(dir);
+	}
+
+	if (inode) {
+		inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+		drop_nlink(inode);
+		if (S_ISDIR(inode->i_mode)) {
+			drop_nlink(inode);
+			i_size_write(inode, 0);
+		}
+		f2fs_write_inode(inode, NULL);
+		if (inode->i_nlink == 0)
+			add_orphan_inode(sbi, inode->i_ino);
+	}
+
+	if (bit_pos == NR_DENTRY_IN_BLOCK) {
+		truncate_hole(dir, page->index, page->index + 1);
+		clear_page_dirty_for_io(page);
+		ClearPageUptodate(page);
+		dec_page_count(sbi, F2FS_DIRTY_DENTS);
+		inode_dec_dirty_dents(dir);
+	}
+	f2fs_put_page(page, 1);
+
+	mutex_unlock_op(sbi, DENTRY_OPS);
+}
+
+int f2fs_make_empty(struct inode *inode, struct inode *parent)
+{
+	struct page *dentry_page;
+	struct f2fs_dentry_block *dentry_blk;
+	struct f2fs_dir_entry *de;
+	void *kaddr;
+
+	dentry_page = get_new_data_page(inode, 0, true);
+	if (IS_ERR(dentry_page))
+		return PTR_ERR(dentry_page);
+
+	kaddr = kmap_atomic(dentry_page);
+	dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+	de = &dentry_blk->dentry[0];
+	de->name_len = cpu_to_le16(1);
+	de->hash_code = 0;
+	de->ino = cpu_to_le32(inode->i_ino);
+	memcpy(dentry_blk->filename[0], ".", 1);
+	set_de_type(de, inode);
+
+	de = &dentry_blk->dentry[1];
+	de->hash_code = 0;
+	de->name_len = cpu_to_le16(2);
+	de->ino = cpu_to_le32(parent->i_ino);
+	memcpy(dentry_blk->filename[1], "..", 2);
+	set_de_type(de, inode);
+
+	test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
+	test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
+	kunmap_atomic(kaddr);
+
+	set_page_dirty(dentry_page);
+	f2fs_put_page(dentry_page, 1);
+	return 0;
+}
+
+bool f2fs_empty_dir(struct inode *dir)
+{
+	unsigned long bidx;
+	struct page *dentry_page;
+	unsigned int bit_pos;
+	struct	f2fs_dentry_block *dentry_blk;
+	unsigned long nblock = dir_blocks(dir);
+
+	for (bidx = 0; bidx < nblock; bidx++) {
+		void *kaddr;
+		dentry_page = get_lock_data_page(dir, bidx);
+		if (IS_ERR(dentry_page)) {
+			if (PTR_ERR(dentry_page) == -ENOENT)
+				continue;
+			else
+				return false;
+		}
+
+		kaddr = kmap_atomic(dentry_page);
+		dentry_blk = (struct f2fs_dentry_block *)kaddr;
+		if (bidx == 0)
+			bit_pos = 2;
+		else
+			bit_pos = 0;
+		bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+						NR_DENTRY_IN_BLOCK,
+						bit_pos);
+		kunmap_atomic(kaddr);
+
+		f2fs_put_page(dentry_page, 1);
+
+		if (bit_pos < NR_DENTRY_IN_BLOCK)
+			return false;
+	}
+	return true;
+}
+
+static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+	unsigned long pos = file->f_pos;
+	struct inode *inode = file->f_dentry->d_inode;
+	unsigned long npages = dir_blocks(inode);
+	unsigned char *types = NULL;
+	unsigned int bit_pos = 0, start_bit_pos = 0;
+	int over = 0;
+	struct f2fs_dentry_block *dentry_blk = NULL;
+	struct f2fs_dir_entry *de = NULL;
+	struct page *dentry_page = NULL;
+	unsigned int n = 0;
+	unsigned char d_type = DT_UNKNOWN;
+	int slots;
+
+	types = f2fs_filetype_table;
+	bit_pos = (pos % NR_DENTRY_IN_BLOCK);
+	n = (pos / NR_DENTRY_IN_BLOCK);
+
+	for ( ; n < npages; n++) {
+		dentry_page = get_lock_data_page(inode, n);
+		if (IS_ERR(dentry_page))
+			continue;
+
+		start_bit_pos = bit_pos;
+		dentry_blk = kmap(dentry_page);
+		while (bit_pos < NR_DENTRY_IN_BLOCK) {
+			d_type = DT_UNKNOWN;
+			bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+							NR_DENTRY_IN_BLOCK,
+							bit_pos);
+			if (bit_pos >= NR_DENTRY_IN_BLOCK)
+				break;
+
+			de = &dentry_blk->dentry[bit_pos];
+			if (types && de->file_type < F2FS_FT_MAX)
+				d_type = types[de->file_type];
+
+			over = filldir(dirent,
+					dentry_blk->filename[bit_pos],
+					le16_to_cpu(de->name_len),
+					(n * NR_DENTRY_IN_BLOCK) + bit_pos,
+					le32_to_cpu(de->ino), d_type);
+			if (over) {
+				file->f_pos += bit_pos - start_bit_pos;
+				goto success;
+			}
+			slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+			bit_pos += slots;
+		}
+		bit_pos = 0;
+		file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK;
+		kunmap(dentry_page);
+		f2fs_put_page(dentry_page, 1);
+		dentry_page = NULL;
+	}
+success:
+	if (dentry_page && !IS_ERR(dentry_page)) {
+		kunmap(dentry_page);
+		f2fs_put_page(dentry_page, 1);
+	}
+
+	return 0;
+}
+
+const struct file_operations f2fs_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= f2fs_readdir,
+	.fsync		= f2fs_sync_file,
+	.unlocked_ioctl	= f2fs_ioctl,
+};

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
new file mode 100644
index 0000000..a18d63d
--- /dev/null
+++ b/fs/f2fs/f2fs.h

@@ -0,0 +1,1083 @@
+/*
+ * fs/f2fs/f2fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_H
+#define _LINUX_F2FS_H
+
+#include <linux/types.h>
+#include <linux/page-flags.h>
+#include <linux/buffer_head.h>
+#include <linux/slab.h>
+#include <linux/crc32.h>
+#include <linux/magic.h>
+
+/*
+ * For mount options
+ */
+#define F2FS_MOUNT_BG_GC		0x00000001
+#define F2FS_MOUNT_DISABLE_ROLL_FORWARD	0x00000002
+#define F2FS_MOUNT_DISCARD		0x00000004
+#define F2FS_MOUNT_NOHEAP		0x00000008
+#define F2FS_MOUNT_XATTR_USER		0x00000010
+#define F2FS_MOUNT_POSIX_ACL		0x00000020
+#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY	0x00000040
+
+#define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
+#define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
+#define test_opt(sbi, option)	(sbi->mount_opt.opt & F2FS_MOUNT_##option)
+
+#define ver_after(a, b)	(typecheck(unsigned long long, a) &&		\
+		typecheck(unsigned long long, b) &&			\
+		((long long)((a) - (b)) > 0))
+
+typedef u64 block_t;
+typedef u32 nid_t;
+
+struct f2fs_mount_info {
+	unsigned int	opt;
+};
+
+static inline __u32 f2fs_crc32(void *buff, size_t len)
+{
+	return crc32_le(F2FS_SUPER_MAGIC, buff, len);
+}
+
+static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size)
+{
+	return f2fs_crc32(buff, buff_size) == blk_crc;
+}
+
+/*
+ * For checkpoint manager
+ */
+enum {
+	NAT_BITMAP,
+	SIT_BITMAP
+};
+
+/* for the list of orphan inodes */
+struct orphan_inode_entry {
+	struct list_head list;	/* list head */
+	nid_t ino;		/* inode number */
+};
+
+/* for the list of directory inodes */
+struct dir_inode_entry {
+	struct list_head list;	/* list head */
+	struct inode *inode;	/* vfs inode pointer */
+};
+
+/* for the list of fsync inodes, used only during recovery */
+struct fsync_inode_entry {
+	struct list_head list;	/* list head */
+	struct inode *inode;	/* vfs inode pointer */
+	block_t blkaddr;	/* block address locating the last inode */
+};
+
+#define nats_in_cursum(sum)		(le16_to_cpu(sum->n_nats))
+#define sits_in_cursum(sum)		(le16_to_cpu(sum->n_sits))
+
+#define nat_in_journal(sum, i)		(sum->nat_j.entries[i].ne)
+#define nid_in_journal(sum, i)		(sum->nat_j.entries[i].nid)
+#define sit_in_journal(sum, i)		(sum->sit_j.entries[i].se)
+#define segno_in_journal(sum, i)	(sum->sit_j.entries[i].segno)
+
+static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
+{
+	int before = nats_in_cursum(rs);
+	rs->n_nats = cpu_to_le16(before + i);
+	return before;
+}
+
+static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
+{
+	int before = sits_in_cursum(rs);
+	rs->n_sits = cpu_to_le16(before + i);
+	return before;
+}
+
+/*
+ * For INODE and NODE manager
+ */
+#define XATTR_NODE_OFFSET	(-1)	/*
+					 * store xattrs to one node block per
+					 * file keeping -1 as its node offset to
+					 * distinguish from index node blocks.
+					 */
+#define RDONLY_NODE		1	/*
+					 * specify a read-only mode when getting
+					 * a node block. 0 is read-write mode.
+					 * used by get_dnode_of_data().
+					 */
+#define F2FS_LINK_MAX		32000	/* maximum link count per file */
+
+/* for in-memory extent cache entry */
+struct extent_info {
+	rwlock_t ext_lock;	/* rwlock for consistency */
+	unsigned int fofs;	/* start offset in a file */
+	u32 blk_addr;		/* start block address of the extent */
+	unsigned int len;	/* lenth of the extent */
+};
+
+/*
+ * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
+ */
+#define FADVISE_COLD_BIT	0x01
+
+struct f2fs_inode_info {
+	struct inode vfs_inode;		/* serve a vfs inode */
+	unsigned long i_flags;		/* keep an inode flags for ioctl */
+	unsigned char i_advise;		/* use to give file attribute hints */
+	unsigned int i_current_depth;	/* use only in directory structure */
+	unsigned int i_pino;		/* parent inode number */
+	umode_t i_acl_mode;		/* keep file acl mode temporarily */
+
+	/* Use below internally in f2fs*/
+	unsigned long flags;		/* use to pass per-file flags */
+	unsigned long long data_version;/* lastes version of data for fsync */
+	atomic_t dirty_dents;		/* # of dirty dentry pages */
+	f2fs_hash_t chash;		/* hash value of given file name */
+	unsigned int clevel;		/* maximum level of given file name */
+	nid_t i_xattr_nid;		/* node id that contains xattrs */
+	struct extent_info ext;		/* in-memory extent cache entry */
+};
+
+static inline void get_extent_info(struct extent_info *ext,
+					struct f2fs_extent i_ext)
+{
+	write_lock(&ext->ext_lock);
+	ext->fofs = le32_to_cpu(i_ext.fofs);
+	ext->blk_addr = le32_to_cpu(i_ext.blk_addr);
+	ext->len = le32_to_cpu(i_ext.len);
+	write_unlock(&ext->ext_lock);
+}
+
+static inline void set_raw_extent(struct extent_info *ext,
+					struct f2fs_extent *i_ext)
+{
+	read_lock(&ext->ext_lock);
+	i_ext->fofs = cpu_to_le32(ext->fofs);
+	i_ext->blk_addr = cpu_to_le32(ext->blk_addr);
+	i_ext->len = cpu_to_le32(ext->len);
+	read_unlock(&ext->ext_lock);
+}
+
+struct f2fs_nm_info {
+	block_t nat_blkaddr;		/* base disk address of NAT */
+	nid_t max_nid;			/* maximum possible node ids */
+	nid_t init_scan_nid;		/* the first nid to be scanned */
+	nid_t next_scan_nid;		/* the next nid to be scanned */
+
+	/* NAT cache management */
+	struct radix_tree_root nat_root;/* root of the nat entry cache */
+	rwlock_t nat_tree_lock;		/* protect nat_tree_lock */
+	unsigned int nat_cnt;		/* the # of cached nat entries */
+	struct list_head nat_entries;	/* cached nat entry list (clean) */
+	struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
+
+	/* free node ids management */
+	struct list_head free_nid_list;	/* a list for free nids */
+	spinlock_t free_nid_list_lock;	/* protect free nid list */
+	unsigned int fcnt;		/* the number of free node id */
+	struct mutex build_lock;	/* lock for build free nids */
+
+	/* for checkpoint */
+	char *nat_bitmap;		/* NAT bitmap pointer */
+	int bitmap_size;		/* bitmap size */
+};
+
+/*
+ * this structure is used as one of function parameters.
+ * all the information are dedicated to a given direct node block determined
+ * by the data offset in a file.
+ */
+struct dnode_of_data {
+	struct inode *inode;		/* vfs inode pointer */
+	struct page *inode_page;	/* its inode page, NULL is possible */
+	struct page *node_page;		/* cached direct node page */
+	nid_t nid;			/* node id of the direct node block */
+	unsigned int ofs_in_node;	/* data offset in the node page */
+	bool inode_page_locked;		/* inode page is locked or not */
+	block_t	data_blkaddr;		/* block address of the node block */
+};
+
+static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
+		struct page *ipage, struct page *npage, nid_t nid)
+{
+	dn->inode = inode;
+	dn->inode_page = ipage;
+	dn->node_page = npage;
+	dn->nid = nid;
+	dn->inode_page_locked = 0;
+}
+
+/*
+ * For SIT manager
+ *
+ * By default, there are 6 active log areas across the whole main area.
+ * When considering hot and cold data separation to reduce cleaning overhead,
+ * we split 3 for data logs and 3 for node logs as hot, warm, and cold types,
+ * respectively.
+ * In the current design, you should not change the numbers intentionally.
+ * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6
+ * logs individually according to the underlying devices. (default: 6)
+ * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for
+ * data and 8 for node logs.
+ */
+#define	NR_CURSEG_DATA_TYPE	(3)
+#define NR_CURSEG_NODE_TYPE	(3)
+#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+
+enum {
+	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
+	CURSEG_WARM_DATA,	/* data blocks */
+	CURSEG_COLD_DATA,	/* multimedia or GCed data blocks */
+	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
+	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
+	CURSEG_COLD_NODE,	/* indirect node blocks */
+	NO_CHECK_TYPE
+};
+
+struct f2fs_sm_info {
+	struct sit_info *sit_info;		/* whole segment information */
+	struct free_segmap_info *free_info;	/* free segment information */
+	struct dirty_seglist_info *dirty_info;	/* dirty segment information */
+	struct curseg_info *curseg_array;	/* active segment information */
+
+	struct list_head wblist_head;	/* list of under-writeback pages */
+	spinlock_t wblist_lock;		/* lock for checkpoint */
+
+	block_t seg0_blkaddr;		/* block address of 0'th segment */
+	block_t main_blkaddr;		/* start block address of main area */
+	block_t ssa_blkaddr;		/* start block address of SSA area */
+
+	unsigned int segment_count;	/* total # of segments */
+	unsigned int main_segments;	/* # of segments in main area */
+	unsigned int reserved_segments;	/* # of reserved segments */
+	unsigned int ovp_segments;	/* # of overprovision segments */
+};
+
+/*
+ * For directory operation
+ */
+#define	NODE_DIR1_BLOCK		(ADDRS_PER_INODE + 1)
+#define	NODE_DIR2_BLOCK		(ADDRS_PER_INODE + 2)
+#define	NODE_IND1_BLOCK		(ADDRS_PER_INODE + 3)
+#define	NODE_IND2_BLOCK		(ADDRS_PER_INODE + 4)
+#define	NODE_DIND_BLOCK		(ADDRS_PER_INODE + 5)
+
+/*
+ * For superblock
+ */
+/*
+ * COUNT_TYPE for monitoring
+ *
+ * f2fs monitors the number of several block types such as on-writeback,
+ * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
+ */
+enum count_type {
+	F2FS_WRITEBACK,
+	F2FS_DIRTY_DENTS,
+	F2FS_DIRTY_NODES,
+	F2FS_DIRTY_META,
+	NR_COUNT_TYPE,
+};
+
+/*
+ * FS_LOCK nesting subclasses for the lock validator:
+ *
+ * The locking order between these classes is
+ * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW
+ *    -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC
+ */
+enum lock_type {
+	RENAME,		/* for renaming operations */
+	DENTRY_OPS,	/* for directory operations */
+	DATA_WRITE,	/* for data write */
+	DATA_NEW,	/* for data allocation */
+	DATA_TRUNC,	/* for data truncate */
+	NODE_NEW,	/* for node allocation */
+	NODE_TRUNC,	/* for node truncate */
+	NODE_WRITE,	/* for node write */
+	NR_LOCK_TYPE,
+};
+
+/*
+ * The below are the page types of bios used in submti_bio().
+ * The available types are:
+ * DATA			User data pages. It operates as async mode.
+ * NODE			Node pages. It operates as async mode.
+ * META			FS metadata pages such as SIT, NAT, CP.
+ * NR_PAGE_TYPE		The number of page types.
+ * META_FLUSH		Make sure the previous pages are written
+ *			with waiting the bio's completion
+ * ...			Only can be used with META.
+ */
+enum page_type {
+	DATA,
+	NODE,
+	META,
+	NR_PAGE_TYPE,
+	META_FLUSH,
+};
+
+struct f2fs_sb_info {
+	struct super_block *sb;			/* pointer to VFS super block */
+	struct buffer_head *raw_super_buf;	/* buffer head of raw sb */
+	struct f2fs_super_block *raw_super;	/* raw super block pointer */
+	int s_dirty;				/* dirty flag for checkpoint */
+
+	/* for node-related operations */
+	struct f2fs_nm_info *nm_info;		/* node manager */
+	struct inode *node_inode;		/* cache node blocks */
+
+	/* for segment-related operations */
+	struct f2fs_sm_info *sm_info;		/* segment manager */
+	struct bio *bio[NR_PAGE_TYPE];		/* bios to merge */
+	sector_t last_block_in_bio[NR_PAGE_TYPE];	/* last block number */
+	struct rw_semaphore bio_sem;		/* IO semaphore */
+
+	/* for checkpoint */
+	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
+	struct inode *meta_inode;		/* cache meta blocks */
+	struct mutex cp_mutex;			/* for checkpoint procedure */
+	struct mutex fs_lock[NR_LOCK_TYPE];	/* for blocking FS operations */
+	struct mutex write_inode;		/* mutex for write inode */
+	struct mutex writepages;		/* mutex for writepages() */
+	int por_doing;				/* recovery is doing or not */
+
+	/* for orphan inode management */
+	struct list_head orphan_inode_list;	/* orphan inode list */
+	struct mutex orphan_inode_mutex;	/* for orphan inode list */
+	unsigned int n_orphans;			/* # of orphan inodes */
+
+	/* for directory inode management */
+	struct list_head dir_inode_list;	/* dir inode list */
+	spinlock_t dir_inode_lock;		/* for dir inode list lock */
+	unsigned int n_dirty_dirs;		/* # of dir inodes */
+
+	/* basic file system units */
+	unsigned int log_sectors_per_block;	/* log2 sectors per block */
+	unsigned int log_blocksize;		/* log2 block size */
+	unsigned int blocksize;			/* block size */
+	unsigned int root_ino_num;		/* root inode number*/
+	unsigned int node_ino_num;		/* node inode number*/
+	unsigned int meta_ino_num;		/* meta inode number*/
+	unsigned int log_blocks_per_seg;	/* log2 blocks per segment */
+	unsigned int blocks_per_seg;		/* blocks per segment */
+	unsigned int segs_per_sec;		/* segments per section */
+	unsigned int secs_per_zone;		/* sections per zone */
+	unsigned int total_sections;		/* total section count */
+	unsigned int total_node_count;		/* total node block count */
+	unsigned int total_valid_node_count;	/* valid node block count */
+	unsigned int total_valid_inode_count;	/* valid inode count */
+	int active_logs;			/* # of active logs */
+
+	block_t user_block_count;		/* # of user blocks */
+	block_t total_valid_block_count;	/* # of valid blocks */
+	block_t alloc_valid_block_count;	/* # of allocated blocks */
+	block_t last_valid_block_count;		/* for recovery */
+	u32 s_next_generation;			/* for NFS support */
+	atomic_t nr_pages[NR_COUNT_TYPE];	/* # of pages, see count_type */
+
+	struct f2fs_mount_info mount_opt;	/* mount options */
+
+	/* for cleaning operations */
+	struct mutex gc_mutex;			/* mutex for GC */
+	struct f2fs_gc_kthread	*gc_thread;	/* GC thread */
+
+	/*
+	 * for stat information.
+	 * one is for the LFS mode, and the other is for the SSR mode.
+	 */
+	struct f2fs_stat_info *stat_info;	/* FS status information */
+	unsigned int segment_count[2];		/* # of allocated segments */
+	unsigned int block_count[2];		/* # of allocated blocks */
+	unsigned int last_victim[2];		/* last victim segment # */
+	int total_hit_ext, read_hit_ext;	/* extent cache hit ratio */
+	int bg_gc;				/* background gc calls */
+	spinlock_t stat_lock;			/* lock for stat operations */
+};
+
+/*
+ * Inline functions
+ */
+static inline struct f2fs_inode_info *F2FS_I(struct inode *inode)
+{
+	return container_of(inode, struct f2fs_inode_info, vfs_inode);
+}
+
+static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
+{
+	return (struct f2fs_super_block *)(sbi->raw_super);
+}
+
+static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
+{
+	return (struct f2fs_checkpoint *)(sbi->ckpt);
+}
+
+static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
+{
+	return (struct f2fs_nm_info *)(sbi->nm_info);
+}
+
+static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi)
+{
+	return (struct f2fs_sm_info *)(sbi->sm_info);
+}
+
+static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
+{
+	return (struct sit_info *)(SM_I(sbi)->sit_info);
+}
+
+static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
+{
+	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
+}
+
+static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
+{
+	return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
+}
+
+static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
+{
+	sbi->s_dirty = 1;
+}
+
+static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
+{
+	sbi->s_dirty = 0;
+}
+
+static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	return ckpt_flags & f;
+}
+
+static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	ckpt_flags |= f;
+	cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
+static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	ckpt_flags &= (~f);
+	cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
+static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t)
+{
+	mutex_lock_nested(&sbi->fs_lock[t], t);
+}
+
+static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t)
+{
+	mutex_unlock(&sbi->fs_lock[t]);
+}
+
+/*
+ * Check whether the given nid is within node id range.
+ */
+static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	BUG_ON((nid >= NM_I(sbi)->max_nid));
+}
+
+#define F2FS_DEFAULT_ALLOCATED_BLOCKS	1
+
+/*
+ * Check whether the inode has blocks or not
+ */
+static inline int F2FS_HAS_BLOCKS(struct inode *inode)
+{
+	if (F2FS_I(inode)->i_xattr_nid)
+		return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1);
+	else
+		return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS);
+}
+
+static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
+				 struct inode *inode, blkcnt_t count)
+{
+	block_t	valid_block_count;
+
+	spin_lock(&sbi->stat_lock);
+	valid_block_count =
+		sbi->total_valid_block_count + (block_t)count;
+	if (valid_block_count > sbi->user_block_count) {
+		spin_unlock(&sbi->stat_lock);
+		return false;
+	}
+	inode->i_blocks += count;
+	sbi->total_valid_block_count = valid_block_count;
+	sbi->alloc_valid_block_count += (block_t)count;
+	spin_unlock(&sbi->stat_lock);
+	return true;
+}
+
+static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
+						struct inode *inode,
+						blkcnt_t count)
+{
+	spin_lock(&sbi->stat_lock);
+	BUG_ON(sbi->total_valid_block_count < (block_t) count);
+	BUG_ON(inode->i_blocks < count);
+	inode->i_blocks -= count;
+	sbi->total_valid_block_count -= (block_t)count;
+	spin_unlock(&sbi->stat_lock);
+	return 0;
+}
+
+static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
+{
+	atomic_inc(&sbi->nr_pages[count_type]);
+	F2FS_SET_SB_DIRT(sbi);
+}
+
+static inline void inode_inc_dirty_dents(struct inode *inode)
+{
+	atomic_inc(&F2FS_I(inode)->dirty_dents);
+}
+
+static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
+{
+	atomic_dec(&sbi->nr_pages[count_type]);
+}
+
+static inline void inode_dec_dirty_dents(struct inode *inode)
+{
+	atomic_dec(&F2FS_I(inode)->dirty_dents);
+}
+
+static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
+{
+	return atomic_read(&sbi->nr_pages[count_type]);
+}
+
+static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
+{
+	block_t ret;
+	spin_lock(&sbi->stat_lock);
+	ret = sbi->total_valid_block_count;
+	spin_unlock(&sbi->stat_lock);
+	return ret;
+}
+
+static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+
+	/* return NAT or SIT bitmap */
+	if (flag == NAT_BITMAP)
+		return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+	else if (flag == SIT_BITMAP)
+		return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+
+	return 0;
+}
+
+static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	int offset = (flag == NAT_BITMAP) ?
+			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
+	return &ckpt->sit_nat_version_bitmap + offset;
+}
+
+static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
+{
+	block_t start_addr;
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver);
+
+	start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+
+	/*
+	 * odd numbered checkpoint should at cp segment 0
+	 * and even segent must be at cp segment 1
+	 */
+	if (!(ckpt_version & 1))
+		start_addr += sbi->blocks_per_seg;
+
+	return start_addr;
+}
+
+static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
+{
+	return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
+}
+
+static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
+						struct inode *inode,
+						unsigned int count)
+{
+	block_t	valid_block_count;
+	unsigned int valid_node_count;
+
+	spin_lock(&sbi->stat_lock);
+
+	valid_block_count = sbi->total_valid_block_count + (block_t)count;
+	sbi->alloc_valid_block_count += (block_t)count;
+	valid_node_count = sbi->total_valid_node_count + count;
+
+	if (valid_block_count > sbi->user_block_count) {
+		spin_unlock(&sbi->stat_lock);
+		return false;
+	}
+
+	if (valid_node_count > sbi->total_node_count) {
+		spin_unlock(&sbi->stat_lock);
+		return false;
+	}
+
+	if (inode)
+		inode->i_blocks += count;
+	sbi->total_valid_node_count = valid_node_count;
+	sbi->total_valid_block_count = valid_block_count;
+	spin_unlock(&sbi->stat_lock);
+
+	return true;
+}
+
+static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
+						struct inode *inode,
+						unsigned int count)
+{
+	spin_lock(&sbi->stat_lock);
+
+	BUG_ON(sbi->total_valid_block_count < count);
+	BUG_ON(sbi->total_valid_node_count < count);
+	BUG_ON(inode->i_blocks < count);
+
+	inode->i_blocks -= count;
+	sbi->total_valid_node_count -= count;
+	sbi->total_valid_block_count -= (block_t)count;
+
+	spin_unlock(&sbi->stat_lock);
+}
+
+static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
+{
+	unsigned int ret;
+	spin_lock(&sbi->stat_lock);
+	ret = sbi->total_valid_node_count;
+	spin_unlock(&sbi->stat_lock);
+	return ret;
+}
+
+static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
+{
+	spin_lock(&sbi->stat_lock);
+	BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count);
+	sbi->total_valid_inode_count++;
+	spin_unlock(&sbi->stat_lock);
+}
+
+static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
+{
+	spin_lock(&sbi->stat_lock);
+	BUG_ON(!sbi->total_valid_inode_count);
+	sbi->total_valid_inode_count--;
+	spin_unlock(&sbi->stat_lock);
+	return 0;
+}
+
+static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
+{
+	unsigned int ret;
+	spin_lock(&sbi->stat_lock);
+	ret = sbi->total_valid_inode_count;
+	spin_unlock(&sbi->stat_lock);
+	return ret;
+}
+
+static inline void f2fs_put_page(struct page *page, int unlock)
+{
+	if (!page || IS_ERR(page))
+		return;
+
+	if (unlock) {
+		BUG_ON(!PageLocked(page));
+		unlock_page(page);
+	}
+	page_cache_release(page);
+}
+
+static inline void f2fs_put_dnode(struct dnode_of_data *dn)
+{
+	if (dn->node_page)
+		f2fs_put_page(dn->node_page, 1);
+	if (dn->inode_page && dn->node_page != dn->inode_page)
+		f2fs_put_page(dn->inode_page, 0);
+	dn->node_page = NULL;
+	dn->inode_page = NULL;
+}
+
+static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
+					size_t size, void (*ctor)(void *))
+{
+	return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
+}
+
+#define RAW_IS_INODE(p)	((p)->footer.nid == (p)->footer.ino)
+
+static inline bool IS_INODE(struct page *page)
+{
+	struct f2fs_node *p = (struct f2fs_node *)page_address(page);
+	return RAW_IS_INODE(p);
+}
+
+static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
+{
+	return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr;
+}
+
+static inline block_t datablock_addr(struct page *node_page,
+		unsigned int offset)
+{
+	struct f2fs_node *raw_node;
+	__le32 *addr_array;
+	raw_node = (struct f2fs_node *)page_address(node_page);
+	addr_array = blkaddr_in_node(raw_node);
+	return le32_to_cpu(addr_array[offset]);
+}
+
+static inline int f2fs_test_bit(unsigned int nr, char *addr)
+{
+	int mask;
+
+	addr += (nr >> 3);
+	mask = 1 << (7 - (nr & 0x07));
+	return mask & *addr;
+}
+
+static inline int f2fs_set_bit(unsigned int nr, char *addr)
+{
+	int mask;
+	int ret;
+
+	addr += (nr >> 3);
+	mask = 1 << (7 - (nr & 0x07));
+	ret = mask & *addr;
+	*addr |= mask;
+	return ret;
+}
+
+static inline int f2fs_clear_bit(unsigned int nr, char *addr)
+{
+	int mask;
+	int ret;
+
+	addr += (nr >> 3);
+	mask = 1 << (7 - (nr & 0x07));
+	ret = mask & *addr;
+	*addr &= ~mask;
+	return ret;
+}
+
+/* used for f2fs_inode_info->flags */
+enum {
+	FI_NEW_INODE,		/* indicate newly allocated inode */
+	FI_NEED_CP,		/* need to do checkpoint during fsync */
+	FI_INC_LINK,		/* need to increment i_nlink */
+	FI_ACL_MODE,		/* indicate acl mode */
+	FI_NO_ALLOC,		/* should not allocate any blocks */
+};
+
+static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
+{
+	set_bit(flag, &fi->flags);
+}
+
+static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
+{
+	return test_bit(flag, &fi->flags);
+}
+
+static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
+{
+	clear_bit(flag, &fi->flags);
+}
+
+static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
+{
+	fi->i_acl_mode = mode;
+	set_inode_flag(fi, FI_ACL_MODE);
+}
+
+static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
+{
+	if (is_inode_flag_set(fi, FI_ACL_MODE)) {
+		clear_inode_flag(fi, FI_ACL_MODE);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * file.c
+ */
+int f2fs_sync_file(struct file *, loff_t, loff_t, int);
+void truncate_data_blocks(struct dnode_of_data *);
+void f2fs_truncate(struct inode *);
+int f2fs_setattr(struct dentry *, struct iattr *);
+int truncate_hole(struct inode *, pgoff_t, pgoff_t);
+long f2fs_ioctl(struct file *, unsigned int, unsigned long);
+
+/*
+ * inode.c
+ */
+void f2fs_set_inode_flags(struct inode *);
+struct inode *f2fs_iget_nowait(struct super_block *, unsigned long);
+struct inode *f2fs_iget(struct super_block *, unsigned long);
+void update_inode(struct inode *, struct page *);
+int f2fs_write_inode(struct inode *, struct writeback_control *);
+void f2fs_evict_inode(struct inode *);
+
+/*
+ * namei.c
+ */
+struct dentry *f2fs_get_parent(struct dentry *child);
+
+/*
+ * dir.c
+ */
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
+							struct page **);
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
+ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
+void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
+				struct page *, struct inode *);
+void init_dent_inode(struct dentry *, struct page *);
+int f2fs_add_link(struct dentry *, struct inode *);
+void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+int f2fs_make_empty(struct inode *, struct inode *);
+bool f2fs_empty_dir(struct inode *);
+
+/*
+ * super.c
+ */
+int f2fs_sync_fs(struct super_block *, int);
+
+/*
+ * hash.c
+ */
+f2fs_hash_t f2fs_dentry_hash(const char *, int);
+
+/*
+ * node.c
+ */
+struct dnode_of_data;
+struct node_info;
+
+int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
+int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
+int truncate_inode_blocks(struct inode *, pgoff_t);
+int remove_inode_page(struct inode *);
+int new_inode_page(struct inode *, struct dentry *);
+struct page *new_node_page(struct dnode_of_data *, unsigned int);
+void ra_node_page(struct f2fs_sb_info *, nid_t);
+struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
+struct page *get_node_page_ra(struct page *, int);
+void sync_inode_page(struct dnode_of_data *);
+int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
+bool alloc_nid(struct f2fs_sb_info *, nid_t *);
+void alloc_nid_done(struct f2fs_sb_info *, nid_t);
+void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
+void recover_node_page(struct f2fs_sb_info *, struct page *,
+		struct f2fs_summary *, struct node_info *, block_t);
+int recover_inode_page(struct f2fs_sb_info *, struct page *);
+int restore_node_summary(struct f2fs_sb_info *, unsigned int,
+				struct f2fs_summary_block *);
+void flush_nat_entries(struct f2fs_sb_info *);
+int build_node_manager(struct f2fs_sb_info *);
+void destroy_node_manager(struct f2fs_sb_info *);
+int create_node_manager_caches(void);
+void destroy_node_manager_caches(void);
+
+/*
+ * segment.c
+ */
+void f2fs_balance_fs(struct f2fs_sb_info *);
+void invalidate_blocks(struct f2fs_sb_info *, block_t);
+void locate_dirty_segment(struct f2fs_sb_info *, unsigned int);
+void clear_prefree_segments(struct f2fs_sb_info *);
+int npages_for_summary_flush(struct f2fs_sb_info *);
+void allocate_new_segments(struct f2fs_sb_info *);
+struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
+struct bio *f2fs_bio_alloc(struct block_device *, int);
+void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
+int write_meta_page(struct f2fs_sb_info *, struct page *,
+					struct writeback_control *);
+void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
+					block_t, block_t *);
+void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
+					block_t, block_t *);
+void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t);
+void recover_data_page(struct f2fs_sb_info *, struct page *,
+				struct f2fs_summary *, block_t, block_t);
+void rewrite_node_page(struct f2fs_sb_info *, struct page *,
+				struct f2fs_summary *, block_t, block_t);
+void write_data_summaries(struct f2fs_sb_info *, block_t);
+void write_node_summaries(struct f2fs_sb_info *, block_t);
+int lookup_journal_in_cursum(struct f2fs_summary_block *,
+					int, unsigned int, int);
+void flush_sit_entries(struct f2fs_sb_info *);
+int build_segment_manager(struct f2fs_sb_info *);
+void reset_victim_segmap(struct f2fs_sb_info *);
+void destroy_segment_manager(struct f2fs_sb_info *);
+
+/*
+ * checkpoint.c
+ */
+struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
+struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+int check_orphan_space(struct f2fs_sb_info *);
+void add_orphan_inode(struct f2fs_sb_info *, nid_t);
+void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
+int recover_orphan_inodes(struct f2fs_sb_info *);
+int get_valid_checkpoint(struct f2fs_sb_info *);
+void set_dirty_dir_page(struct inode *, struct page *);
+void remove_dirty_dir_inode(struct inode *);
+void sync_dirty_dir_inodes(struct f2fs_sb_info *);
+void block_operations(struct f2fs_sb_info *);
+void write_checkpoint(struct f2fs_sb_info *, bool, bool);
+void init_orphan_info(struct f2fs_sb_info *);
+int create_checkpoint_caches(void);
+void destroy_checkpoint_caches(void);
+
+/*
+ * data.c
+ */
+int reserve_new_block(struct dnode_of_data *);
+void update_extent_cache(block_t, struct dnode_of_data *);
+struct page *find_data_page(struct inode *, pgoff_t);
+struct page *get_lock_data_page(struct inode *, pgoff_t);
+struct page *get_new_data_page(struct inode *, pgoff_t, bool);
+int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int);
+int do_write_data_page(struct page *);
+
+/*
+ * gc.c
+ */
+int start_gc_thread(struct f2fs_sb_info *);
+void stop_gc_thread(struct f2fs_sb_info *);
+block_t start_bidx_of_node(unsigned int);
+int f2fs_gc(struct f2fs_sb_info *, int);
+void build_gc_manager(struct f2fs_sb_info *);
+int create_gc_caches(void);
+void destroy_gc_caches(void);
+
+/*
+ * recovery.c
+ */
+void recover_fsync_data(struct f2fs_sb_info *);
+bool space_for_roll_forward(struct f2fs_sb_info *);
+
+/*
+ * debug.c
+ */
+#ifdef CONFIG_F2FS_STAT_FS
+struct f2fs_stat_info {
+	struct list_head stat_list;
+	struct f2fs_sb_info *sbi;
+	struct mutex stat_lock;
+	int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
+	int main_area_segs, main_area_sections, main_area_zones;
+	int hit_ext, total_ext;
+	int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
+	int nats, sits, fnids;
+	int total_count, utilization;
+	int bg_gc;
+	unsigned int valid_count, valid_node_count, valid_inode_count;
+	unsigned int bimodal, avg_vblocks;
+	int util_free, util_valid, util_invalid;
+	int rsvd_segs, overp_segs;
+	int dirty_count, node_pages, meta_pages;
+	int prefree_count, call_count;
+	int tot_segs, node_segs, data_segs, free_segs, free_secs;
+	int tot_blks, data_blks, node_blks;
+	int curseg[NR_CURSEG_TYPE];
+	int cursec[NR_CURSEG_TYPE];
+	int curzone[NR_CURSEG_TYPE];
+
+	unsigned int segment_count[2];
+	unsigned int block_count[2];
+	unsigned base_mem, cache_mem;
+};
+
+#define stat_inc_call_count(si)	((si)->call_count++)
+
+#define stat_inc_seg_count(sbi, type)					\
+	do {								\
+		struct f2fs_stat_info *si = sbi->stat_info;		\
+		(si)->tot_segs++;					\
+		if (type == SUM_TYPE_DATA)				\
+			si->data_segs++;				\
+		else							\
+			si->node_segs++;				\
+	} while (0)
+
+#define stat_inc_tot_blk_count(si, blks)				\
+	(si->tot_blks += (blks))
+
+#define stat_inc_data_blk_count(sbi, blks)				\
+	do {								\
+		struct f2fs_stat_info *si = sbi->stat_info;		\
+		stat_inc_tot_blk_count(si, blks);			\
+		si->data_blks += (blks);				\
+	} while (0)
+
+#define stat_inc_node_blk_count(sbi, blks)				\
+	do {								\
+		struct f2fs_stat_info *si = sbi->stat_info;		\
+		stat_inc_tot_blk_count(si, blks);			\
+		si->node_blks += (blks);				\
+	} while (0)
+
+int f2fs_build_stats(struct f2fs_sb_info *);
+void f2fs_destroy_stats(struct f2fs_sb_info *);
+void destroy_root_stats(void);
+#else
+#define stat_inc_call_count(si)
+#define stat_inc_seg_count(si, type)
+#define stat_inc_tot_blk_count(si, blks)
+#define stat_inc_data_blk_count(si, blks)
+#define stat_inc_node_blk_count(sbi, blks)
+
+static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
+static inline void destroy_root_stats(void) { }
+#endif
+
+extern const struct file_operations f2fs_dir_operations;
+extern const struct file_operations f2fs_file_operations;
+extern const struct inode_operations f2fs_file_inode_operations;
+extern const struct address_space_operations f2fs_dblock_aops;
+extern const struct address_space_operations f2fs_node_aops;
+extern const struct address_space_operations f2fs_meta_aops;
+extern const struct inode_operations f2fs_dir_inode_operations;
+extern const struct inode_operations f2fs_symlink_inode_operations;
+extern const struct inode_operations f2fs_special_inode_operations;
+#endif

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
new file mode 100644
index 0000000..f9e085d
--- /dev/null
+++ b/fs/f2fs/file.c

@@ -0,0 +1,636 @@
+/*
+ * fs/f2fs/file.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/stat.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/falloc.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/mount.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "xattr.h"
+#include "acl.h"
+
+static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
+						struct vm_fault *vmf)
+{
+	struct page *page = vmf->page;
+	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	block_t old_blk_addr;
+	struct dnode_of_data dn;
+	int err;
+
+	f2fs_balance_fs(sbi);
+
+	sb_start_pagefault(inode->i_sb);
+
+	mutex_lock_op(sbi, DATA_NEW);
+
+	/* block allocation */
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, page->index, 0);
+	if (err) {
+		mutex_unlock_op(sbi, DATA_NEW);
+		goto out;
+	}
+
+	old_blk_addr = dn.data_blkaddr;
+
+	if (old_blk_addr == NULL_ADDR) {
+		err = reserve_new_block(&dn);
+		if (err) {
+			f2fs_put_dnode(&dn);
+			mutex_unlock_op(sbi, DATA_NEW);
+			goto out;
+		}
+	}
+	f2fs_put_dnode(&dn);
+
+	mutex_unlock_op(sbi, DATA_NEW);
+
+	lock_page(page);
+	if (page->mapping != inode->i_mapping ||
+			page_offset(page) >= i_size_read(inode) ||
+			!PageUptodate(page)) {
+		unlock_page(page);
+		err = -EFAULT;
+		goto out;
+	}
+
+	/*
+	 * check to see if the page is mapped already (no holes)
+	 */
+	if (PageMappedToDisk(page))
+		goto out;
+
+	/* fill the page */
+	wait_on_page_writeback(page);
+
+	/* page is wholly or partially inside EOF */
+	if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
+		unsigned offset;
+		offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
+		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	}
+	set_page_dirty(page);
+	SetPageUptodate(page);
+
+	file_update_time(vma->vm_file);
+out:
+	sb_end_pagefault(inode->i_sb);
+	return block_page_mkwrite_return(err);
+}
+
+static const struct vm_operations_struct f2fs_file_vm_ops = {
+	.fault        = filemap_fault,
+	.page_mkwrite = f2fs_vm_page_mkwrite,
+};
+
+static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
+{
+	struct dentry *dentry;
+	nid_t pino;
+
+	inode = igrab(inode);
+	dentry = d_find_any_alias(inode);
+	if (!dentry) {
+		iput(inode);
+		return 0;
+	}
+	pino = dentry->d_parent->d_inode->i_ino;
+	dput(dentry);
+	iput(inode);
+	return !is_checkpointed_node(sbi, pino);
+}
+
+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	unsigned long long cur_version;
+	int ret = 0;
+	bool need_cp = false;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.for_reclaim = 0,
+	};
+
+	if (inode->i_sb->s_flags & MS_RDONLY)
+		return 0;
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		goto out;
+
+	mutex_lock(&sbi->cp_mutex);
+	cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
+	mutex_unlock(&sbi->cp_mutex);
+
+	if (F2FS_I(inode)->data_version != cur_version &&
+					!(inode->i_state & I_DIRTY))
+		goto out;
+	F2FS_I(inode)->data_version--;
+
+	if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
+		need_cp = true;
+	if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
+		need_cp = true;
+	if (!space_for_roll_forward(sbi))
+		need_cp = true;
+	if (need_to_sync_dir(sbi, inode))
+		need_cp = true;
+
+	f2fs_write_inode(inode, NULL);
+
+	if (need_cp) {
+		/* all the dirty node pages should be flushed for POR */
+		ret = f2fs_sync_fs(inode->i_sb, 1);
+		clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
+	} else {
+		while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0)
+			f2fs_write_inode(inode, NULL);
+		filemap_fdatawait_range(sbi->node_inode->i_mapping,
+							0, LONG_MAX);
+	}
+out:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
+static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	file_accessed(file);
+	vma->vm_ops = &f2fs_file_vm_ops;
+	return 0;
+}
+
+static int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+{
+	int nr_free = 0, ofs = dn->ofs_in_node;
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct f2fs_node *raw_node;
+	__le32 *addr;
+
+	raw_node = page_address(dn->node_page);
+	addr = blkaddr_in_node(raw_node) + ofs;
+
+	for ( ; count > 0; count--, addr++, dn->ofs_in_node++) {
+		block_t blkaddr = le32_to_cpu(*addr);
+		if (blkaddr == NULL_ADDR)
+			continue;
+
+		update_extent_cache(NULL_ADDR, dn);
+		invalidate_blocks(sbi, blkaddr);
+		dec_valid_block_count(sbi, dn->inode, 1);
+		nr_free++;
+	}
+	if (nr_free) {
+		set_page_dirty(dn->node_page);
+		sync_inode_page(dn);
+	}
+	dn->ofs_in_node = ofs;
+	return nr_free;
+}
+
+void truncate_data_blocks(struct dnode_of_data *dn)
+{
+	truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
+}
+
+static void truncate_partial_data_page(struct inode *inode, u64 from)
+{
+	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
+	struct page *page;
+
+	if (!offset)
+		return;
+
+	page = find_data_page(inode, from >> PAGE_CACHE_SHIFT);
+	if (IS_ERR(page))
+		return;
+
+	lock_page(page);
+	wait_on_page_writeback(page);
+	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
+	set_page_dirty(page);
+	f2fs_put_page(page, 1);
+}
+
+static int truncate_blocks(struct inode *inode, u64 from)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	unsigned int blocksize = inode->i_sb->s_blocksize;
+	struct dnode_of_data dn;
+	pgoff_t free_from;
+	int count = 0;
+	int err;
+
+	free_from = (pgoff_t)
+			((from + blocksize - 1) >> (sbi->log_blocksize));
+
+	mutex_lock_op(sbi, DATA_TRUNC);
+
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = get_dnode_of_data(&dn, free_from, RDONLY_NODE);
+	if (err) {
+		if (err == -ENOENT)
+			goto free_next;
+		mutex_unlock_op(sbi, DATA_TRUNC);
+		return err;
+	}
+
+	if (IS_INODE(dn.node_page))
+		count = ADDRS_PER_INODE;
+	else
+		count = ADDRS_PER_BLOCK;
+
+	count -= dn.ofs_in_node;
+	BUG_ON(count < 0);
+	if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
+		truncate_data_blocks_range(&dn, count);
+		free_from += count;
+	}
+
+	f2fs_put_dnode(&dn);
+free_next:
+	err = truncate_inode_blocks(inode, free_from);
+	mutex_unlock_op(sbi, DATA_TRUNC);
+
+	/* lastly zero out the first data page */
+	truncate_partial_data_page(inode, from);
+
+	return err;
+}
+
+void f2fs_truncate(struct inode *inode)
+{
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+				S_ISLNK(inode->i_mode)))
+		return;
+
+	if (!truncate_blocks(inode, i_size_read(inode))) {
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		mark_inode_dirty(inode);
+	}
+
+	f2fs_balance_fs(F2FS_SB(inode->i_sb));
+}
+
+static int f2fs_getattr(struct vfsmount *mnt,
+			 struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	generic_fillattr(inode, stat);
+	stat->blocks <<= 3;
+	return 0;
+}
+
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+static void __setattr_copy(struct inode *inode, const struct iattr *attr)
+{
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	unsigned int ia_valid = attr->ia_valid;
+
+	if (ia_valid & ATTR_UID)
+		inode->i_uid = attr->ia_uid;
+	if (ia_valid & ATTR_GID)
+		inode->i_gid = attr->ia_gid;
+	if (ia_valid & ATTR_ATIME)
+		inode->i_atime = timespec_trunc(attr->ia_atime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MTIME)
+		inode->i_mtime = timespec_trunc(attr->ia_mtime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_CTIME)
+		inode->i_ctime = timespec_trunc(attr->ia_ctime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MODE) {
+		umode_t mode = attr->ia_mode;
+
+		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+			mode &= ~S_ISGID;
+		set_acl_inode(fi, mode);
+	}
+}
+#else
+#define __setattr_copy setattr_copy
+#endif
+
+int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	int err;
+
+	err = inode_change_ok(inode, attr);
+	if (err)
+		return err;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+			attr->ia_size != i_size_read(inode)) {
+		truncate_setsize(inode, attr->ia_size);
+		f2fs_truncate(inode);
+	}
+
+	__setattr_copy(inode, attr);
+
+	if (attr->ia_valid & ATTR_MODE) {
+		err = f2fs_acl_chmod(inode);
+		if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
+			inode->i_mode = fi->i_acl_mode;
+			clear_inode_flag(fi, FI_ACL_MODE);
+		}
+	}
+
+	mark_inode_dirty(inode);
+	return err;
+}
+
+const struct inode_operations f2fs_file_inode_operations = {
+	.getattr	= f2fs_getattr,
+	.setattr	= f2fs_setattr,
+	.get_acl	= f2fs_get_acl,
+#ifdef CONFIG_F2FS_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= f2fs_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+};
+
+static void fill_zero(struct inode *inode, pgoff_t index,
+					loff_t start, loff_t len)
+{
+	struct page *page;
+
+	if (!len)
+		return;
+
+	page = get_new_data_page(inode, index, false);
+
+	if (!IS_ERR(page)) {
+		wait_on_page_writeback(page);
+		zero_user(page, start, len);
+		set_page_dirty(page);
+		f2fs_put_page(page, 1);
+	}
+}
+
+int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
+{
+	pgoff_t index;
+	int err;
+
+	for (index = pg_start; index < pg_end; index++) {
+		struct dnode_of_data dn;
+		struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+
+		mutex_lock_op(sbi, DATA_TRUNC);
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		err = get_dnode_of_data(&dn, index, RDONLY_NODE);
+		if (err) {
+			mutex_unlock_op(sbi, DATA_TRUNC);
+			if (err == -ENOENT)
+				continue;
+			return err;
+		}
+
+		if (dn.data_blkaddr != NULL_ADDR)
+			truncate_data_blocks_range(&dn, 1);
+		f2fs_put_dnode(&dn);
+		mutex_unlock_op(sbi, DATA_TRUNC);
+	}
+	return 0;
+}
+
+static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
+{
+	pgoff_t pg_start, pg_end;
+	loff_t off_start, off_end;
+	int ret = 0;
+
+	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+
+	off_start = offset & (PAGE_CACHE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+
+	if (pg_start == pg_end) {
+		fill_zero(inode, pg_start, off_start,
+						off_end - off_start);
+	} else {
+		if (off_start)
+			fill_zero(inode, pg_start++, off_start,
+					PAGE_CACHE_SIZE - off_start);
+		if (off_end)
+			fill_zero(inode, pg_end, 0, off_end);
+
+		if (pg_start < pg_end) {
+			struct address_space *mapping = inode->i_mapping;
+			loff_t blk_start, blk_end;
+
+			blk_start = pg_start << PAGE_CACHE_SHIFT;
+			blk_end = pg_end << PAGE_CACHE_SHIFT;
+			truncate_inode_pages_range(mapping, blk_start,
+					blk_end - 1);
+			ret = truncate_hole(inode, pg_start, pg_end);
+		}
+	}
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+		i_size_read(inode) <= (offset + len)) {
+		i_size_write(inode, offset);
+		mark_inode_dirty(inode);
+	}
+
+	return ret;
+}
+
+static int expand_inode_data(struct inode *inode, loff_t offset,
+					loff_t len, int mode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	pgoff_t index, pg_start, pg_end;
+	loff_t new_size = i_size_read(inode);
+	loff_t off_start, off_end;
+	int ret = 0;
+
+	ret = inode_newsize_ok(inode, (len + offset));
+	if (ret)
+		return ret;
+
+	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+
+	off_start = offset & (PAGE_CACHE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+
+	for (index = pg_start; index <= pg_end; index++) {
+		struct dnode_of_data dn;
+
+		mutex_lock_op(sbi, DATA_NEW);
+
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		ret = get_dnode_of_data(&dn, index, 0);
+		if (ret) {
+			mutex_unlock_op(sbi, DATA_NEW);
+			break;
+		}
+
+		if (dn.data_blkaddr == NULL_ADDR) {
+			ret = reserve_new_block(&dn);
+			if (ret) {
+				f2fs_put_dnode(&dn);
+				mutex_unlock_op(sbi, DATA_NEW);
+				break;
+			}
+		}
+		f2fs_put_dnode(&dn);
+
+		mutex_unlock_op(sbi, DATA_NEW);
+
+		if (pg_start == pg_end)
+			new_size = offset + len;
+		else if (index == pg_start && off_start)
+			new_size = (index + 1) << PAGE_CACHE_SHIFT;
+		else if (index == pg_end)
+			new_size = (index << PAGE_CACHE_SHIFT) + off_end;
+		else
+			new_size += PAGE_CACHE_SIZE;
+	}
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+		i_size_read(inode) < new_size) {
+		i_size_write(inode, new_size);
+		mark_inode_dirty(inode);
+	}
+
+	return ret;
+}
+
+static long f2fs_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	long ret;
+
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+		return -EOPNOTSUPP;
+
+	if (mode & FALLOC_FL_PUNCH_HOLE)
+		ret = punch_hole(inode, offset, len, mode);
+	else
+		ret = expand_inode_data(inode, offset, len, mode);
+
+	f2fs_balance_fs(sbi);
+	return ret;
+}
+
+#define F2FS_REG_FLMASK		(~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
+#define F2FS_OTHER_FLMASK	(FS_NODUMP_FL | FS_NOATIME_FL)
+
+static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
+{
+	if (S_ISDIR(mode))
+		return flags;
+	else if (S_ISREG(mode))
+		return flags & F2FS_REG_FLMASK;
+	else
+		return flags & F2FS_OTHER_FLMASK;
+}
+
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	unsigned int flags;
+	int ret;
+
+	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+		flags = fi->i_flags & FS_FL_USER_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case FS_IOC_SETFLAGS:
+	{
+		unsigned int oldflags;
+
+		ret = mnt_want_write(filp->f_path.mnt);
+		if (ret)
+			return ret;
+
+		if (!inode_owner_or_capable(inode)) {
+			ret = -EACCES;
+			goto out;
+		}
+
+		if (get_user(flags, (int __user *) arg)) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		flags = f2fs_mask_flags(inode->i_mode, flags);
+
+		mutex_lock(&inode->i_mutex);
+
+		oldflags = fi->i_flags;
+
+		if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+			if (!capable(CAP_LINUX_IMMUTABLE)) {
+				mutex_unlock(&inode->i_mutex);
+				ret = -EPERM;
+				goto out;
+			}
+		}
+
+		flags = flags & FS_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
+		fi->i_flags = flags;
+		mutex_unlock(&inode->i_mutex);
+
+		f2fs_set_inode_flags(inode);
+		inode->i_ctime = CURRENT_TIME;
+		mark_inode_dirty(inode);
+out:
+		mnt_drop_write(filp->f_path.mnt);
+		return ret;
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
+const struct file_operations f2fs_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= generic_file_aio_read,
+	.aio_write	= generic_file_aio_write,
+	.open		= generic_file_open,
+	.mmap		= f2fs_file_mmap,
+	.fsync		= f2fs_sync_file,
+	.fallocate	= f2fs_fallocate,
+	.unlocked_ioctl	= f2fs_ioctl,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
+};

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
new file mode 100644
index 0000000..644aa38
--- /dev/null
+++ b/fs/f2fs/gc.c

@@ -0,0 +1,742 @@
+/*
+ * fs/f2fs/gc.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/backing-dev.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/f2fs_fs.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/blkdev.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "gc.h"
+
+static struct kmem_cache *winode_slab;
+
+static int gc_thread_func(void *data)
+{
+	struct f2fs_sb_info *sbi = data;
+	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
+	long wait_ms;
+
+	wait_ms = GC_THREAD_MIN_SLEEP_TIME;
+
+	do {
+		if (try_to_freeze())
+			continue;
+		else
+			wait_event_interruptible_timeout(*wq,
+						kthread_should_stop(),
+						msecs_to_jiffies(wait_ms));
+		if (kthread_should_stop())
+			break;
+
+		f2fs_balance_fs(sbi);
+
+		if (!test_opt(sbi, BG_GC))
+			continue;
+
+		/*
+		 * [GC triggering condition]
+		 * 0. GC is not conducted currently.
+		 * 1. There are enough dirty segments.
+		 * 2. IO subsystem is idle by checking the # of writeback pages.
+		 * 3. IO subsystem is idle by checking the # of requests in
+		 *    bdev's request list.
+		 *
+		 * Note) We have to avoid triggering GCs too much frequently.
+		 * Because it is possible that some segments can be
+		 * invalidated soon after by user update or deletion.
+		 * So, I'd like to wait some time to collect dirty segments.
+		 */
+		if (!mutex_trylock(&sbi->gc_mutex))
+			continue;
+
+		if (!is_idle(sbi)) {
+			wait_ms = increase_sleep_time(wait_ms);
+			mutex_unlock(&sbi->gc_mutex);
+			continue;
+		}
+
+		if (has_enough_invalid_blocks(sbi))
+			wait_ms = decrease_sleep_time(wait_ms);
+		else
+			wait_ms = increase_sleep_time(wait_ms);
+
+		sbi->bg_gc++;
+
+		if (f2fs_gc(sbi, 1) == GC_NONE)
+			wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
+		else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
+			wait_ms = GC_THREAD_MAX_SLEEP_TIME;
+
+	} while (!kthread_should_stop());
+	return 0;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_gc_kthread *gc_th;
+
+	gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
+	if (!gc_th)
+		return -ENOMEM;
+
+	sbi->gc_thread = gc_th;
+	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
+	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
+				GC_THREAD_NAME);
+	if (IS_ERR(gc_th->f2fs_gc_task)) {
+		kfree(gc_th);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void stop_gc_thread(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+	if (!gc_th)
+		return;
+	kthread_stop(gc_th->f2fs_gc_task);
+	kfree(gc_th);
+	sbi->gc_thread = NULL;
+}
+
+static int select_gc_type(int gc_type)
+{
+	return (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+}
+
+static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
+			int type, struct victim_sel_policy *p)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+	if (p->alloc_mode) {
+		p->gc_mode = GC_GREEDY;
+		p->dirty_segmap = dirty_i->dirty_segmap[type];
+		p->ofs_unit = 1;
+	} else {
+		p->gc_mode = select_gc_type(gc_type);
+		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
+		p->ofs_unit = sbi->segs_per_sec;
+	}
+	p->offset = sbi->last_victim[p->gc_mode];
+}
+
+static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
+				struct victim_sel_policy *p)
+{
+	if (p->gc_mode == GC_GREEDY)
+		return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
+	else if (p->gc_mode == GC_CB)
+		return UINT_MAX;
+	else /* No other gc_mode */
+		return 0;
+}
+
+static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+
+	/*
+	 * If the gc_type is FG_GC, we can select victim segments
+	 * selected by background GC before.
+	 * Those segments guarantee they have small valid blocks.
+	 */
+	segno = find_next_bit(dirty_i->victim_segmap[BG_GC],
+						TOTAL_SEGS(sbi), 0);
+	if (segno < TOTAL_SEGS(sbi)) {
+		clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
+		return segno;
+	}
+	return NULL_SEGNO;
+}
+
+static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned int secno = GET_SECNO(sbi, segno);
+	unsigned int start = secno * sbi->segs_per_sec;
+	unsigned long long mtime = 0;
+	unsigned int vblocks;
+	unsigned char age = 0;
+	unsigned char u;
+	unsigned int i;
+
+	for (i = 0; i < sbi->segs_per_sec; i++)
+		mtime += get_seg_entry(sbi, start + i)->mtime;
+	vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+
+	mtime = div_u64(mtime, sbi->segs_per_sec);
+	vblocks = div_u64(vblocks, sbi->segs_per_sec);
+
+	u = (vblocks * 100) >> sbi->log_blocks_per_seg;
+
+	/* Handle if the system time is changed by user */
+	if (mtime < sit_i->min_mtime)
+		sit_i->min_mtime = mtime;
+	if (mtime > sit_i->max_mtime)
+		sit_i->max_mtime = mtime;
+	if (sit_i->max_mtime != sit_i->min_mtime)
+		age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
+				sit_i->max_mtime - sit_i->min_mtime);
+
+	return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
+}
+
+static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
+					struct victim_sel_policy *p)
+{
+	if (p->alloc_mode == SSR)
+		return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+
+	/* alloc_mode == LFS */
+	if (p->gc_mode == GC_GREEDY)
+		return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+	else
+		return get_cb_cost(sbi, segno);
+}
+
+/*
+ * This function is called from two pathes.
+ * One is garbage collection and the other is SSR segment selection.
+ * When it is called during GC, it just gets a victim segment
+ * and it does not remove it from dirty seglist.
+ * When it is called from SSR segment selection, it finds a segment
+ * which has minimum valid blocks and removes it from dirty seglist.
+ */
+static int get_victim_by_default(struct f2fs_sb_info *sbi,
+		unsigned int *result, int gc_type, int type, char alloc_mode)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	struct victim_sel_policy p;
+	unsigned int segno;
+	int nsearched = 0;
+
+	p.alloc_mode = alloc_mode;
+	select_policy(sbi, gc_type, type, &p);
+
+	p.min_segno = NULL_SEGNO;
+	p.min_cost = get_max_cost(sbi, &p);
+
+	mutex_lock(&dirty_i->seglist_lock);
+
+	if (p.alloc_mode == LFS && gc_type == FG_GC) {
+		p.min_segno = check_bg_victims(sbi);
+		if (p.min_segno != NULL_SEGNO)
+			goto got_it;
+	}
+
+	while (1) {
+		unsigned long cost;
+
+		segno = find_next_bit(p.dirty_segmap,
+						TOTAL_SEGS(sbi), p.offset);
+		if (segno >= TOTAL_SEGS(sbi)) {
+			if (sbi->last_victim[p.gc_mode]) {
+				sbi->last_victim[p.gc_mode] = 0;
+				p.offset = 0;
+				continue;
+			}
+			break;
+		}
+		p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
+
+		if (test_bit(segno, dirty_i->victim_segmap[FG_GC]))
+			continue;
+		if (gc_type == BG_GC &&
+				test_bit(segno, dirty_i->victim_segmap[BG_GC]))
+			continue;
+		if (IS_CURSEC(sbi, GET_SECNO(sbi, segno)))
+			continue;
+
+		cost = get_gc_cost(sbi, segno, &p);
+
+		if (p.min_cost > cost) {
+			p.min_segno = segno;
+			p.min_cost = cost;
+		}
+
+		if (cost == get_max_cost(sbi, &p))
+			continue;
+
+		if (nsearched++ >= MAX_VICTIM_SEARCH) {
+			sbi->last_victim[p.gc_mode] = segno;
+			break;
+		}
+	}
+got_it:
+	if (p.min_segno != NULL_SEGNO) {
+		*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+		if (p.alloc_mode == LFS) {
+			int i;
+			for (i = 0; i < p.ofs_unit; i++)
+				set_bit(*result + i,
+					dirty_i->victim_segmap[gc_type]);
+		}
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+
+	return (p.min_segno == NULL_SEGNO) ? 0 : 1;
+}
+
+static const struct victim_selection default_v_ops = {
+	.get_victim = get_victim_by_default,
+};
+
+static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
+{
+	struct list_head *this;
+	struct inode_entry *ie;
+
+	list_for_each(this, ilist) {
+		ie = list_entry(this, struct inode_entry, list);
+		if (ie->inode->i_ino == ino)
+			return ie->inode;
+	}
+	return NULL;
+}
+
+static void add_gc_inode(struct inode *inode, struct list_head *ilist)
+{
+	struct list_head *this;
+	struct inode_entry *new_ie, *ie;
+
+	list_for_each(this, ilist) {
+		ie = list_entry(this, struct inode_entry, list);
+		if (ie->inode == inode) {
+			iput(inode);
+			return;
+		}
+	}
+repeat:
+	new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
+	if (!new_ie) {
+		cond_resched();
+		goto repeat;
+	}
+	new_ie->inode = inode;
+	list_add_tail(&new_ie->list, ilist);
+}
+
+static void put_gc_inode(struct list_head *ilist)
+{
+	struct inode_entry *ie, *next_ie;
+	list_for_each_entry_safe(ie, next_ie, ilist, list) {
+		iput(ie->inode);
+		list_del(&ie->list);
+		kmem_cache_free(winode_slab, ie);
+	}
+}
+
+static int check_valid_map(struct f2fs_sb_info *sbi,
+				unsigned int segno, int offset)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	struct seg_entry *sentry;
+	int ret;
+
+	mutex_lock(&sit_i->sentry_lock);
+	sentry = get_seg_entry(sbi, segno);
+	ret = f2fs_test_bit(offset, sentry->cur_valid_map);
+	mutex_unlock(&sit_i->sentry_lock);
+	return ret ? GC_OK : GC_NEXT;
+}
+
+/*
+ * This function compares node address got in summary with that in NAT.
+ * On validity, copy that node with cold status, otherwise (invalid node)
+ * ignore that.
+ */
+static int gc_node_segment(struct f2fs_sb_info *sbi,
+		struct f2fs_summary *sum, unsigned int segno, int gc_type)
+{
+	bool initial = true;
+	struct f2fs_summary *entry;
+	int off;
+
+next_step:
+	entry = sum;
+	for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+		nid_t nid = le32_to_cpu(entry->nid);
+		struct page *node_page;
+		int err;
+
+		/*
+		 * It makes sure that free segments are able to write
+		 * all the dirty node pages before CP after this CP.
+		 * So let's check the space of dirty node pages.
+		 */
+		if (should_do_checkpoint(sbi)) {
+			mutex_lock(&sbi->cp_mutex);
+			block_operations(sbi);
+			return GC_BLOCKED;
+		}
+
+		err = check_valid_map(sbi, segno, off);
+		if (err == GC_ERROR)
+			return err;
+		else if (err == GC_NEXT)
+			continue;
+
+		if (initial) {
+			ra_node_page(sbi, nid);
+			continue;
+		}
+		node_page = get_node_page(sbi, nid);
+		if (IS_ERR(node_page))
+			continue;
+
+		/* set page dirty and write it */
+		if (!PageWriteback(node_page))
+			set_page_dirty(node_page);
+		f2fs_put_page(node_page, 1);
+		stat_inc_node_blk_count(sbi, 1);
+	}
+	if (initial) {
+		initial = false;
+		goto next_step;
+	}
+
+	if (gc_type == FG_GC) {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.nr_to_write = LONG_MAX,
+			.for_reclaim = 0,
+		};
+		sync_node_pages(sbi, 0, &wbc);
+	}
+	return GC_DONE;
+}
+
+/*
+ * Calculate start block index that this node page contains
+ */
+block_t start_bidx_of_node(unsigned int node_ofs)
+{
+	block_t start_bidx;
+	unsigned int bidx, indirect_blks;
+	int dec;
+
+	indirect_blks = 2 * NIDS_PER_BLOCK + 4;
+
+	start_bidx = 1;
+	if (node_ofs == 0) {
+		start_bidx = 0;
+	} else if (node_ofs <= 2) {
+		bidx = node_ofs - 1;
+	} else if (node_ofs <= indirect_blks) {
+		dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
+		bidx = node_ofs - 2 - dec;
+	} else {
+		dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
+		bidx = node_ofs - 5 - dec;
+	}
+
+	if (start_bidx)
+		start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
+	return start_bidx;
+}
+
+static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+		struct node_info *dni, block_t blkaddr, unsigned int *nofs)
+{
+	struct page *node_page;
+	nid_t nid;
+	unsigned int ofs_in_node;
+	block_t source_blkaddr;
+
+	nid = le32_to_cpu(sum->nid);
+	ofs_in_node = le16_to_cpu(sum->ofs_in_node);
+
+	node_page = get_node_page(sbi, nid);
+	if (IS_ERR(node_page))
+		return GC_NEXT;
+
+	get_node_info(sbi, nid, dni);
+
+	if (sum->version != dni->version) {
+		f2fs_put_page(node_page, 1);
+		return GC_NEXT;
+	}
+
+	*nofs = ofs_of_node(node_page);
+	source_blkaddr = datablock_addr(node_page, ofs_in_node);
+	f2fs_put_page(node_page, 1);
+
+	if (source_blkaddr != blkaddr)
+		return GC_NEXT;
+	return GC_OK;
+}
+
+static void move_data_page(struct inode *inode, struct page *page, int gc_type)
+{
+	if (page->mapping != inode->i_mapping)
+		goto out;
+
+	if (inode != page->mapping->host)
+		goto out;
+
+	if (PageWriteback(page))
+		goto out;
+
+	if (gc_type == BG_GC) {
+		set_page_dirty(page);
+		set_cold_data(page);
+	} else {
+		struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+		mutex_lock_op(sbi, DATA_WRITE);
+		if (clear_page_dirty_for_io(page) &&
+			S_ISDIR(inode->i_mode)) {
+			dec_page_count(sbi, F2FS_DIRTY_DENTS);
+			inode_dec_dirty_dents(inode);
+		}
+		set_cold_data(page);
+		do_write_data_page(page);
+		mutex_unlock_op(sbi, DATA_WRITE);
+		clear_cold_data(page);
+	}
+out:
+	f2fs_put_page(page, 1);
+}
+
+/*
+ * This function tries to get parent node of victim data block, and identifies
+ * data block validity. If the block is valid, copy that with cold status and
+ * modify parent node.
+ * If the parent node is not valid or the data block address is different,
+ * the victim data block is ignored.
+ */
+static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+		struct list_head *ilist, unsigned int segno, int gc_type)
+{
+	struct super_block *sb = sbi->sb;
+	struct f2fs_summary *entry;
+	block_t start_addr;
+	int err, off;
+	int phase = 0;
+
+	start_addr = START_BLOCK(sbi, segno);
+
+next_step:
+	entry = sum;
+	for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+		struct page *data_page;
+		struct inode *inode;
+		struct node_info dni; /* dnode info for the data */
+		unsigned int ofs_in_node, nofs;
+		block_t start_bidx;
+
+		/*
+		 * It makes sure that free segments are able to write
+		 * all the dirty node pages before CP after this CP.
+		 * So let's check the space of dirty node pages.
+		 */
+		if (should_do_checkpoint(sbi)) {
+			mutex_lock(&sbi->cp_mutex);
+			block_operations(sbi);
+			err = GC_BLOCKED;
+			goto stop;
+		}
+
+		err = check_valid_map(sbi, segno, off);
+		if (err == GC_ERROR)
+			goto stop;
+		else if (err == GC_NEXT)
+			continue;
+
+		if (phase == 0) {
+			ra_node_page(sbi, le32_to_cpu(entry->nid));
+			continue;
+		}
+
+		/* Get an inode by ino with checking validity */
+		err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
+		if (err == GC_ERROR)
+			goto stop;
+		else if (err == GC_NEXT)
+			continue;
+
+		if (phase == 1) {
+			ra_node_page(sbi, dni.ino);
+			continue;
+		}
+
+		start_bidx = start_bidx_of_node(nofs);
+		ofs_in_node = le16_to_cpu(entry->ofs_in_node);
+
+		if (phase == 2) {
+			inode = f2fs_iget_nowait(sb, dni.ino);
+			if (IS_ERR(inode))
+				continue;
+
+			data_page = find_data_page(inode,
+					start_bidx + ofs_in_node);
+			if (IS_ERR(data_page))
+				goto next_iput;
+
+			f2fs_put_page(data_page, 0);
+			add_gc_inode(inode, ilist);
+		} else {
+			inode = find_gc_inode(dni.ino, ilist);
+			if (inode) {
+				data_page = get_lock_data_page(inode,
+						start_bidx + ofs_in_node);
+				if (IS_ERR(data_page))
+					continue;
+				move_data_page(inode, data_page, gc_type);
+				stat_inc_data_blk_count(sbi, 1);
+			}
+		}
+		continue;
+next_iput:
+		iput(inode);
+	}
+	if (++phase < 4)
+		goto next_step;
+	err = GC_DONE;
+stop:
+	if (gc_type == FG_GC)
+		f2fs_submit_bio(sbi, DATA, true);
+	return err;
+}
+
+static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
+						int gc_type, int type)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	int ret;
+	mutex_lock(&sit_i->sentry_lock);
+	ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS);
+	mutex_unlock(&sit_i->sentry_lock);
+	return ret;
+}
+
+static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
+				struct list_head *ilist, int gc_type)
+{
+	struct page *sum_page;
+	struct f2fs_summary_block *sum;
+	int ret = GC_DONE;
+
+	/* read segment summary of victim */
+	sum_page = get_sum_page(sbi, segno);
+	if (IS_ERR(sum_page))
+		return GC_ERROR;
+
+	/*
+	 * CP needs to lock sum_page. In this time, we don't need
+	 * to lock this page, because this summary page is not gone anywhere.
+	 * Also, this page is not gonna be updated before GC is done.
+	 */
+	unlock_page(sum_page);
+	sum = page_address(sum_page);
+
+	switch (GET_SUM_TYPE((&sum->footer))) {
+	case SUM_TYPE_NODE:
+		ret = gc_node_segment(sbi, sum->entries, segno, gc_type);
+		break;
+	case SUM_TYPE_DATA:
+		ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
+		break;
+	}
+	stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
+	stat_inc_call_count(sbi->stat_info);
+
+	f2fs_put_page(sum_page, 0);
+	return ret;
+}
+
+int f2fs_gc(struct f2fs_sb_info *sbi, int nGC)
+{
+	unsigned int segno;
+	int old_free_secs, cur_free_secs;
+	int gc_status, nfree;
+	struct list_head ilist;
+	int gc_type = BG_GC;
+
+	INIT_LIST_HEAD(&ilist);
+gc_more:
+	nfree = 0;
+	gc_status = GC_NONE;
+
+	if (has_not_enough_free_secs(sbi))
+		old_free_secs = reserved_sections(sbi);
+	else
+		old_free_secs = free_sections(sbi);
+
+	while (sbi->sb->s_flags & MS_ACTIVE) {
+		int i;
+		if (has_not_enough_free_secs(sbi))
+			gc_type = FG_GC;
+
+		cur_free_secs = free_sections(sbi) + nfree;
+
+		/* We got free space successfully. */
+		if (nGC < cur_free_secs - old_free_secs)
+			break;
+
+		if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+			break;
+
+		for (i = 0; i < sbi->segs_per_sec; i++) {
+			/*
+			 * do_garbage_collect will give us three gc_status:
+			 * GC_ERROR, GC_DONE, and GC_BLOCKED.
+			 * If GC is finished uncleanly, we have to return
+			 * the victim to dirty segment list.
+			 */
+			gc_status = do_garbage_collect(sbi, segno + i,
+					&ilist, gc_type);
+			if (gc_status != GC_DONE)
+				goto stop;
+			nfree++;
+		}
+	}
+stop:
+	if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
+		write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
+		if (nfree)
+			goto gc_more;
+	}
+	mutex_unlock(&sbi->gc_mutex);
+
+	put_gc_inode(&ilist);
+	BUG_ON(!list_empty(&ilist));
+	return gc_status;
+}
+
+void build_gc_manager(struct f2fs_sb_info *sbi)
+{
+	DIRTY_I(sbi)->v_ops = &default_v_ops;
+}
+
+int create_gc_caches(void)
+{
+	winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
+			sizeof(struct inode_entry), NULL);
+	if (!winode_slab)
+		return -ENOMEM;
+	return 0;
+}
+
+void destroy_gc_caches(void)
+{
+	kmem_cache_destroy(winode_slab);
+}

diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
new file mode 100644
index 0000000..b026d93
--- /dev/null
+++ b/fs/f2fs/gc.h

@@ -0,0 +1,117 @@
+/*
+ * fs/f2fs/gc.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define GC_THREAD_NAME	"f2fs_gc_task"
+#define GC_THREAD_MIN_WB_PAGES		1	/*
+						 * a threshold to determine
+						 * whether IO subsystem is idle
+						 * or not
+						 */
+#define GC_THREAD_MIN_SLEEP_TIME	10000 /* milliseconds */
+#define GC_THREAD_MAX_SLEEP_TIME	30000
+#define GC_THREAD_NOGC_SLEEP_TIME	10000
+#define LIMIT_INVALID_BLOCK	40 /* percentage over total user space */
+#define LIMIT_FREE_BLOCK	40 /* percentage over invalid + free space */
+
+/* Search max. number of dirty segments to select a victim segment */
+#define MAX_VICTIM_SEARCH	20
+
+enum {
+	GC_NONE = 0,
+	GC_ERROR,
+	GC_OK,
+	GC_NEXT,
+	GC_BLOCKED,
+	GC_DONE,
+};
+
+struct f2fs_gc_kthread {
+	struct task_struct *f2fs_gc_task;
+	wait_queue_head_t gc_wait_queue_head;
+};
+
+struct inode_entry {
+	struct list_head list;
+	struct inode *inode;
+};
+
+/*
+ * inline functions
+ */
+static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
+{
+	if (free_segments(sbi) < overprovision_segments(sbi))
+		return 0;
+	else
+		return (free_segments(sbi) - overprovision_segments(sbi))
+			<< sbi->log_blocks_per_seg;
+}
+
+static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
+{
+	return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
+}
+
+static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
+{
+	block_t reclaimable_user_blocks = sbi->user_block_count -
+		written_block_count(sbi);
+	return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
+}
+
+static inline long increase_sleep_time(long wait)
+{
+	wait += GC_THREAD_MIN_SLEEP_TIME;
+	if (wait > GC_THREAD_MAX_SLEEP_TIME)
+		wait = GC_THREAD_MAX_SLEEP_TIME;
+	return wait;
+}
+
+static inline long decrease_sleep_time(long wait)
+{
+	wait -= GC_THREAD_MIN_SLEEP_TIME;
+	if (wait <= GC_THREAD_MIN_SLEEP_TIME)
+		wait = GC_THREAD_MIN_SLEEP_TIME;
+	return wait;
+}
+
+static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
+{
+	block_t invalid_user_blocks = sbi->user_block_count -
+					written_block_count(sbi);
+	/*
+	 * Background GC is triggered with the following condition.
+	 * 1. There are a number of invalid blocks.
+	 * 2. There is not enough free space.
+	 */
+	if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
+			free_user_blocks(sbi) < limit_free_user_blocks(sbi))
+		return true;
+	return false;
+}
+
+static inline int is_idle(struct f2fs_sb_info *sbi)
+{
+	struct block_device *bdev = sbi->sb->s_bdev;
+	struct request_queue *q = bdev_get_queue(bdev);
+	struct request_list *rl = &q->root_rl;
+	return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
+}
+
+static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi)
+{
+	unsigned int pages_per_sec = sbi->segs_per_sec *
+					(1 << sbi->log_blocks_per_seg);
+	int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
+			>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+	int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
+			>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+	return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2);
+}

diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
new file mode 100644
index 0000000..a60f042
--- /dev/null
+++ b/fs/f2fs/hash.c

@@ -0,0 +1,97 @@
+/*
+ * fs/f2fs/hash.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext3/hash.c
+ *
+ * Copyright (C) 2002 by Theodore Ts'o
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/cryptohash.h>
+#include <linux/pagemap.h>
+
+#include "f2fs.h"
+
+/*
+ * Hashing code copied from ext3
+ */
+#define DELTA 0x9E3779B9
+
+static void TEA_transform(unsigned int buf[4], unsigned int const in[])
+{
+	__u32 sum = 0;
+	__u32 b0 = buf[0], b1 = buf[1];
+	__u32 a = in[0], b = in[1], c = in[2], d = in[3];
+	int n = 16;
+
+	do {
+		sum += DELTA;
+		b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
+		b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
+	} while (--n);
+
+	buf[0] += b0;
+	buf[1] += b1;
+}
+
+static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
+{
+	unsigned pad, val;
+	int i;
+
+	pad = (__u32)len | ((__u32)len << 8);
+	pad |= pad << 16;
+
+	val = pad;
+	if (len > num * 4)
+		len = num * 4;
+	for (i = 0; i < len; i++) {
+		if ((i % 4) == 0)
+			val = pad;
+		val = msg[i] + (val << 8);
+		if ((i % 4) == 3) {
+			*buf++ = val;
+			val = pad;
+			num--;
+		}
+	}
+	if (--num >= 0)
+		*buf++ = val;
+	while (--num >= 0)
+		*buf++ = pad;
+}
+
+f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
+{
+	__u32 hash, minor_hash;
+	f2fs_hash_t f2fs_hash;
+	const char *p;
+	__u32 in[8], buf[4];
+
+	/* Initialize the default seed for the hash checksum functions */
+	buf[0] = 0x67452301;
+	buf[1] = 0xefcdab89;
+	buf[2] = 0x98badcfe;
+	buf[3] = 0x10325476;
+
+	p = name;
+	while (len > 0) {
+		str2hashbuf(p, len, in, 4);
+		TEA_transform(buf, in);
+		len -= 16;
+		p += 16;
+	}
+	hash = buf[0];
+	minor_hash = buf[1];
+
+	f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
+	return f2fs_hash;
+}

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
new file mode 100644
index 0000000..df5fb38
--- /dev/null
+++ b/fs/f2fs/inode.c

@@ -0,0 +1,268 @@
+/*
+ * fs/f2fs/inode.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+
+#include "f2fs.h"
+#include "node.h"
+
+struct f2fs_iget_args {
+	u64 ino;
+	int on_free;
+};
+
+void f2fs_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = F2FS_I(inode)->i_flags;
+
+	inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
+			S_NOATIME | S_DIRSYNC);
+
+	if (flags & FS_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & FS_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & FS_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	if (flags & FS_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & FS_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+}
+
+static int f2fs_iget_test(struct inode *inode, void *data)
+{
+	struct f2fs_iget_args *args = data;
+
+	if (inode->i_ino != args->ino)
+		return 0;
+	if (inode->i_state & (I_FREEING | I_WILL_FREE)) {
+		args->on_free = 1;
+		return 0;
+	}
+	return 1;
+}
+
+struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino)
+{
+	struct f2fs_iget_args args = {
+		.ino = ino,
+		.on_free = 0
+	};
+	struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args);
+
+	if (inode)
+		return inode;
+	if (!args.on_free)
+		return f2fs_iget(sb, ino);
+	return ERR_PTR(-ENOENT);
+}
+
+static int do_read_inode(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	struct page *node_page;
+	struct f2fs_node *rn;
+	struct f2fs_inode *ri;
+
+	/* Check if ino is within scope */
+	check_nid_range(sbi, inode->i_ino);
+
+	node_page = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(node_page))
+		return PTR_ERR(node_page);
+
+	rn = page_address(node_page);
+	ri = &(rn->i);
+
+	inode->i_mode = le16_to_cpu(ri->i_mode);
+	i_uid_write(inode, le32_to_cpu(ri->i_uid));
+	i_gid_write(inode, le32_to_cpu(ri->i_gid));
+	set_nlink(inode, le32_to_cpu(ri->i_links));
+	inode->i_size = le64_to_cpu(ri->i_size);
+	inode->i_blocks = le64_to_cpu(ri->i_blocks);
+
+	inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
+	inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
+	inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
+	inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
+	inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
+	inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
+	inode->i_generation = le32_to_cpu(ri->i_generation);
+
+	fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
+	fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
+	fi->i_flags = le32_to_cpu(ri->i_flags);
+	fi->flags = 0;
+	fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1;
+	fi->i_advise = ri->i_advise;
+	fi->i_pino = le32_to_cpu(ri->i_pino);
+	get_extent_info(&fi->ext, ri->i_ext);
+	f2fs_put_page(node_page, 1);
+	return 0;
+}
+
+struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *inode;
+	int ret;
+
+	inode = iget_locked(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+	if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
+		goto make_now;
+
+	ret = do_read_inode(inode);
+	if (ret)
+		goto bad_inode;
+
+	if (!sbi->por_doing && inode->i_nlink == 0) {
+		ret = -ENOENT;
+		goto bad_inode;
+	}
+
+make_now:
+	if (ino == F2FS_NODE_INO(sbi)) {
+		inode->i_mapping->a_ops = &f2fs_node_aops;
+		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+	} else if (ino == F2FS_META_INO(sbi)) {
+		inode->i_mapping->a_ops = &f2fs_meta_aops;
+		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+	} else if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &f2fs_file_inode_operations;
+		inode->i_fop = &f2fs_file_operations;
+		inode->i_mapping->a_ops = &f2fs_dblock_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &f2fs_dir_inode_operations;
+		inode->i_fop = &f2fs_dir_operations;
+		inode->i_mapping->a_ops = &f2fs_dblock_aops;
+		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE |
+				__GFP_ZERO);
+	} else if (S_ISLNK(inode->i_mode)) {
+		inode->i_op = &f2fs_symlink_inode_operations;
+		inode->i_mapping->a_ops = &f2fs_dblock_aops;
+	} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+			S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+		inode->i_op = &f2fs_special_inode_operations;
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+	} else {
+		ret = -EIO;
+		goto bad_inode;
+	}
+	unlock_new_inode(inode);
+
+	return inode;
+
+bad_inode:
+	iget_failed(inode);
+	return ERR_PTR(ret);
+}
+
+void update_inode(struct inode *inode, struct page *node_page)
+{
+	struct f2fs_node *rn;
+	struct f2fs_inode *ri;
+
+	wait_on_page_writeback(node_page);
+
+	rn = page_address(node_page);
+	ri = &(rn->i);
+
+	ri->i_mode = cpu_to_le16(inode->i_mode);
+	ri->i_advise = F2FS_I(inode)->i_advise;
+	ri->i_uid = cpu_to_le32(i_uid_read(inode));
+	ri->i_gid = cpu_to_le32(i_gid_read(inode));
+	ri->i_links = cpu_to_le32(inode->i_nlink);
+	ri->i_size = cpu_to_le64(i_size_read(inode));
+	ri->i_blocks = cpu_to_le64(inode->i_blocks);
+	set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
+
+	ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
+	ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
+	ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+	ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+	ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth);
+	ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
+	ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
+	ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
+	ri->i_generation = cpu_to_le32(inode->i_generation);
+	set_page_dirty(node_page);
+}
+
+int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct page *node_page;
+	bool need_lock = false;
+
+	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
+			inode->i_ino == F2FS_META_INO(sbi))
+		return 0;
+
+	node_page = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(node_page))
+		return PTR_ERR(node_page);
+
+	if (!PageDirty(node_page)) {
+		need_lock = true;
+		f2fs_put_page(node_page, 1);
+		mutex_lock(&sbi->write_inode);
+		node_page = get_node_page(sbi, inode->i_ino);
+		if (IS_ERR(node_page)) {
+			mutex_unlock(&sbi->write_inode);
+			return PTR_ERR(node_page);
+		}
+	}
+	update_inode(inode, node_page);
+	f2fs_put_page(node_page, 1);
+	if (need_lock)
+		mutex_unlock(&sbi->write_inode);
+	return 0;
+}
+
+/*
+ * Called at the last iput() if i_nlink is zero
+ */
+void f2fs_evict_inode(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
+			inode->i_ino == F2FS_META_INO(sbi))
+		goto no_delete;
+
+	BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
+	remove_dirty_dir_inode(inode);
+
+	if (inode->i_nlink || is_bad_inode(inode))
+		goto no_delete;
+
+	set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
+	i_size_write(inode, 0);
+
+	if (F2FS_HAS_BLOCKS(inode))
+		f2fs_truncate(inode);
+
+	remove_inode_page(inode);
+no_delete:
+	clear_inode(inode);
+}

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
new file mode 100644
index 0000000..89b7675
--- /dev/null
+++ b/fs/f2fs/namei.c

@@ -0,0 +1,503 @@
+/*
+ * fs/f2fs/namei.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+
+#include "f2fs.h"
+#include "xattr.h"
+#include "acl.h"
+
+static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+{
+	struct super_block *sb = dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	nid_t ino;
+	struct inode *inode;
+	bool nid_free = false;
+	int err;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock_op(sbi, NODE_NEW);
+	if (!alloc_nid(sbi, &ino)) {
+		mutex_unlock_op(sbi, NODE_NEW);
+		err = -ENOSPC;
+		goto fail;
+	}
+	mutex_unlock_op(sbi, NODE_NEW);
+
+	inode->i_uid = current_fsuid();
+
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else {
+		inode->i_gid = current_fsgid();
+	}
+
+	inode->i_ino = ino;
+	inode->i_mode = mode;
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_generation = sbi->s_next_generation++;
+
+	err = insert_inode_locked(inode);
+	if (err) {
+		err = -EINVAL;
+		nid_free = true;
+		goto out;
+	}
+
+	mark_inode_dirty(inode);
+	return inode;
+
+out:
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+fail:
+	iput(inode);
+	if (nid_free)
+		alloc_nid_failed(sbi, ino);
+	return ERR_PTR(err);
+}
+
+static int is_multimedia_file(const unsigned char *s, const char *sub)
+{
+	int slen = strlen(s);
+	int sublen = strlen(sub);
+	int ret;
+
+	if (sublen > slen)
+		return 1;
+
+	ret = memcmp(s + slen - sublen, sub, sublen);
+	if (ret) {	/* compare upper case */
+		int i;
+		char upper_sub[8];
+		for (i = 0; i < sublen && i < sizeof(upper_sub); i++)
+			upper_sub[i] = toupper(sub[i]);
+		return memcmp(s + slen - sublen, upper_sub, sublen);
+	}
+
+	return ret;
+}
+
+/*
+ * Set multimedia files as cold files for hot/cold data separation
+ */
+static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
+		const unsigned char *name)
+{
+	int i;
+	__u8 (*extlist)[8] = sbi->raw_super->extension_list;
+
+	int count = le32_to_cpu(sbi->raw_super->extension_count);
+	for (i = 0; i < count; i++) {
+		if (!is_multimedia_file(name, extlist[i])) {
+			F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
+			break;
+		}
+	}
+}
+
+static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+						bool excl)
+{
+	struct super_block *sb = dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *inode;
+	nid_t ino = 0;
+	int err;
+
+	inode = f2fs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
+		set_cold_file(sbi, inode, dentry->d_name.name);
+
+	inode->i_op = &f2fs_file_inode_operations;
+	inode->i_fop = &f2fs_file_operations;
+	inode->i_mapping->a_ops = &f2fs_dblock_aops;
+	ino = inode->i_ino;
+
+	err = f2fs_add_link(dentry, inode);
+	if (err)
+		goto out;
+
+	alloc_nid_done(sbi, ino);
+
+	if (!sbi->por_doing)
+		d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	f2fs_balance_fs(sbi);
+	return 0;
+out:
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	alloc_nid_failed(sbi, ino);
+	return err;
+}
+
+static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
+		struct dentry *dentry)
+{
+	struct inode *inode = old_dentry->d_inode;
+	struct super_block *sb = dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int err;
+
+	inode->i_ctime = CURRENT_TIME;
+	atomic_inc(&inode->i_count);
+
+	set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	err = f2fs_add_link(dentry, inode);
+	if (err)
+		goto out;
+
+	d_instantiate(dentry, inode);
+
+	f2fs_balance_fs(sbi);
+	return 0;
+out:
+	clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	iput(inode);
+	return err;
+}
+
+struct dentry *f2fs_get_parent(struct dentry *child)
+{
+	struct qstr dotdot = QSTR_INIT("..", 2);
+	unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
+	if (!ino)
+		return ERR_PTR(-ENOENT);
+	return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
+}
+
+static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
+		unsigned int flags)
+{
+	struct inode *inode = NULL;
+	struct f2fs_dir_entry *de;
+	struct page *page;
+
+	if (dentry->d_name.len > F2FS_MAX_NAME_LEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	de = f2fs_find_entry(dir, &dentry->d_name, &page);
+	if (de) {
+		nid_t ino = le32_to_cpu(de->ino);
+		kunmap(page);
+		f2fs_put_page(page, 0);
+
+		inode = f2fs_iget(dir->i_sb, ino);
+		if (IS_ERR(inode))
+			return ERR_CAST(inode);
+	}
+
+	return d_splice_alias(inode, dentry);
+}
+
+static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct super_block *sb = dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *inode = dentry->d_inode;
+	struct f2fs_dir_entry *de;
+	struct page *page;
+	int err = -ENOENT;
+
+	de = f2fs_find_entry(dir, &dentry->d_name, &page);
+	if (!de)
+		goto fail;
+
+	err = check_orphan_space(sbi);
+	if (err) {
+		kunmap(page);
+		f2fs_put_page(page, 0);
+		goto fail;
+	}
+
+	f2fs_delete_entry(de, page, inode);
+
+	/* In order to evict this inode,  we set it dirty */
+	mark_inode_dirty(inode);
+	f2fs_balance_fs(sbi);
+fail:
+	return err;
+}
+
+static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
+					const char *symname)
+{
+	struct super_block *sb = dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *inode;
+	unsigned symlen = strlen(symname) + 1;
+	int err;
+
+	inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &f2fs_symlink_inode_operations;
+	inode->i_mapping->a_ops = &f2fs_dblock_aops;
+
+	err = f2fs_add_link(dentry, inode);
+	if (err)
+		goto out;
+
+	err = page_symlink(inode, symname, symlen);
+	alloc_nid_done(sbi, inode->i_ino);
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	f2fs_balance_fs(sbi);
+
+	return err;
+out:
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	alloc_nid_failed(sbi, inode->i_ino);
+	return err;
+}
+
+static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+	struct inode *inode;
+	int err;
+
+	inode = f2fs_new_inode(dir, S_IFDIR | mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &f2fs_dir_inode_operations;
+	inode->i_fop = &f2fs_dir_operations;
+	inode->i_mapping->a_ops = &f2fs_dblock_aops;
+	mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+
+	set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	err = f2fs_add_link(dentry, inode);
+	if (err)
+		goto out_fail;
+
+	alloc_nid_done(sbi, inode->i_ino);
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	f2fs_balance_fs(sbi);
+	return 0;
+
+out_fail:
+	clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	alloc_nid_failed(sbi, inode->i_ino);
+	return err;
+}
+
+static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	if (f2fs_empty_dir(inode))
+		return f2fs_unlink(dir, dentry);
+	return -ENOTEMPTY;
+}
+
+static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
+				umode_t mode, dev_t rdev)
+{
+	struct super_block *sb = dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *inode;
+	int err = 0;
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+	inode = f2fs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	init_special_inode(inode, inode->i_mode, rdev);
+	inode->i_op = &f2fs_special_inode_operations;
+
+	err = f2fs_add_link(dentry, inode);
+	if (err)
+		goto out;
+
+	alloc_nid_done(sbi, inode->i_ino);
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	f2fs_balance_fs(sbi);
+
+	return 0;
+out:
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	alloc_nid_failed(sbi, inode->i_ino);
+	return err;
+}
+
+static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
+			struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct super_block *sb = old_dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	struct page *old_dir_page;
+	struct page *old_page;
+	struct f2fs_dir_entry *old_dir_entry = NULL;
+	struct f2fs_dir_entry *old_entry;
+	struct f2fs_dir_entry *new_entry;
+	int err = -ENOENT;
+
+	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+	if (!old_entry)
+		goto out;
+
+	if (S_ISDIR(old_inode->i_mode)) {
+		err = -EIO;
+		old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
+		if (!old_dir_entry)
+			goto out_old;
+	}
+
+	mutex_lock_op(sbi, RENAME);
+
+	if (new_inode) {
+		struct page *new_page;
+
+		err = -ENOTEMPTY;
+		if (old_dir_entry && !f2fs_empty_dir(new_inode))
+			goto out_dir;
+
+		err = -ENOENT;
+		new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
+						&new_page);
+		if (!new_entry)
+			goto out_dir;
+
+		f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+
+		new_inode->i_ctime = CURRENT_TIME;
+		if (old_dir_entry)
+			drop_nlink(new_inode);
+		drop_nlink(new_inode);
+		if (!new_inode->i_nlink)
+			add_orphan_inode(sbi, new_inode->i_ino);
+		f2fs_write_inode(new_inode, NULL);
+	} else {
+		err = f2fs_add_link(new_dentry, old_inode);
+		if (err)
+			goto out_dir;
+
+		if (old_dir_entry) {
+			inc_nlink(new_dir);
+			f2fs_write_inode(new_dir, NULL);
+		}
+	}
+
+	old_inode->i_ctime = CURRENT_TIME;
+	set_inode_flag(F2FS_I(old_inode), FI_NEED_CP);
+	mark_inode_dirty(old_inode);
+
+	f2fs_delete_entry(old_entry, old_page, NULL);
+
+	if (old_dir_entry) {
+		if (old_dir != new_dir) {
+			f2fs_set_link(old_inode, old_dir_entry,
+						old_dir_page, new_dir);
+		} else {
+			kunmap(old_dir_page);
+			f2fs_put_page(old_dir_page, 0);
+		}
+		drop_nlink(old_dir);
+		f2fs_write_inode(old_dir, NULL);
+	}
+
+	mutex_unlock_op(sbi, RENAME);
+
+	f2fs_balance_fs(sbi);
+	return 0;
+
+out_dir:
+	if (old_dir_entry) {
+		kunmap(old_dir_page);
+		f2fs_put_page(old_dir_page, 0);
+	}
+	mutex_unlock_op(sbi, RENAME);
+out_old:
+	kunmap(old_page);
+	f2fs_put_page(old_page, 0);
+out:
+	return err;
+}
+
+const struct inode_operations f2fs_dir_inode_operations = {
+	.create		= f2fs_create,
+	.lookup		= f2fs_lookup,
+	.link		= f2fs_link,
+	.unlink		= f2fs_unlink,
+	.symlink	= f2fs_symlink,
+	.mkdir		= f2fs_mkdir,
+	.rmdir		= f2fs_rmdir,
+	.mknod		= f2fs_mknod,
+	.rename		= f2fs_rename,
+	.setattr	= f2fs_setattr,
+	.get_acl	= f2fs_get_acl,
+#ifdef CONFIG_F2FS_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= f2fs_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+};
+
+const struct inode_operations f2fs_symlink_inode_operations = {
+	.readlink       = generic_readlink,
+	.follow_link    = page_follow_link_light,
+	.put_link       = page_put_link,
+	.setattr	= f2fs_setattr,
+#ifdef CONFIG_F2FS_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= f2fs_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+};
+
+const struct inode_operations f2fs_special_inode_operations = {
+	.setattr        = f2fs_setattr,
+	.get_acl	= f2fs_get_acl,
+#ifdef CONFIG_F2FS_FS_XATTR
+	.setxattr       = generic_setxattr,
+	.getxattr       = generic_getxattr,
+	.listxattr	= f2fs_listxattr,
+	.removexattr    = generic_removexattr,
+#endif
+};

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
new file mode 100644
index 0000000..1987036
--- /dev/null
+++ b/fs/f2fs/node.c

@@ -0,0 +1,1764 @@
+/*
+ * fs/f2fs/node.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/mpage.h>
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+static struct kmem_cache *nat_entry_slab;
+static struct kmem_cache *free_nid_slab;
+
+static void clear_node_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+	unsigned int long flags;
+
+	if (PageDirty(page)) {
+		spin_lock_irqsave(&mapping->tree_lock, flags);
+		radix_tree_tag_clear(&mapping->page_tree,
+				page_index(page),
+				PAGECACHE_TAG_DIRTY);
+		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+
+		clear_page_dirty_for_io(page);
+		dec_page_count(sbi, F2FS_DIRTY_NODES);
+	}
+	ClearPageUptodate(page);
+}
+
+static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	pgoff_t index = current_nat_addr(sbi, nid);
+	return get_meta_page(sbi, index);
+}
+
+static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	struct page *src_page;
+	struct page *dst_page;
+	pgoff_t src_off;
+	pgoff_t dst_off;
+	void *src_addr;
+	void *dst_addr;
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+	src_off = current_nat_addr(sbi, nid);
+	dst_off = next_nat_addr(sbi, src_off);
+
+	/* get current nat block page with lock */
+	src_page = get_meta_page(sbi, src_off);
+
+	/* Dirty src_page means that it is already the new target NAT page. */
+	if (PageDirty(src_page))
+		return src_page;
+
+	dst_page = grab_meta_page(sbi, dst_off);
+
+	src_addr = page_address(src_page);
+	dst_addr = page_address(dst_page);
+	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+	set_page_dirty(dst_page);
+	f2fs_put_page(src_page, 1);
+
+	set_to_next_nat(nm_i, nid);
+
+	return dst_page;
+}
+
+/*
+ * Readahead NAT pages
+ */
+static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
+{
+	struct address_space *mapping = sbi->meta_inode->i_mapping;
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct page *page;
+	pgoff_t index;
+	int i;
+
+	for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
+		if (nid >= nm_i->max_nid)
+			nid = 0;
+		index = current_nat_addr(sbi, nid);
+
+		page = grab_cache_page(mapping, index);
+		if (!page)
+			continue;
+		if (f2fs_readpage(sbi, page, index, READ)) {
+			f2fs_put_page(page, 1);
+			continue;
+		}
+		page_cache_release(page);
+	}
+}
+
+static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
+{
+	return radix_tree_lookup(&nm_i->nat_root, n);
+}
+
+static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
+		nid_t start, unsigned int nr, struct nat_entry **ep)
+{
+	return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
+}
+
+static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
+{
+	list_del(&e->list);
+	radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
+	nm_i->nat_cnt--;
+	kmem_cache_free(nat_entry_slab, e);
+}
+
+int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct nat_entry *e;
+	int is_cp = 1;
+
+	read_lock(&nm_i->nat_tree_lock);
+	e = __lookup_nat_cache(nm_i, nid);
+	if (e && !e->checkpointed)
+		is_cp = 0;
+	read_unlock(&nm_i->nat_tree_lock);
+	return is_cp;
+}
+
+static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
+{
+	struct nat_entry *new;
+
+	new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
+	if (!new)
+		return NULL;
+	if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
+		kmem_cache_free(nat_entry_slab, new);
+		return NULL;
+	}
+	memset(new, 0, sizeof(struct nat_entry));
+	nat_set_nid(new, nid);
+	list_add_tail(&new->list, &nm_i->nat_entries);
+	nm_i->nat_cnt++;
+	return new;
+}
+
+static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
+						struct f2fs_nat_entry *ne)
+{
+	struct nat_entry *e;
+retry:
+	write_lock(&nm_i->nat_tree_lock);
+	e = __lookup_nat_cache(nm_i, nid);
+	if (!e) {
+		e = grab_nat_entry(nm_i, nid);
+		if (!e) {
+			write_unlock(&nm_i->nat_tree_lock);
+			goto retry;
+		}
+		nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
+		nat_set_ino(e, le32_to_cpu(ne->ino));
+		nat_set_version(e, ne->version);
+		e->checkpointed = true;
+	}
+	write_unlock(&nm_i->nat_tree_lock);
+}
+
+static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
+			block_t new_blkaddr)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct nat_entry *e;
+retry:
+	write_lock(&nm_i->nat_tree_lock);
+	e = __lookup_nat_cache(nm_i, ni->nid);
+	if (!e) {
+		e = grab_nat_entry(nm_i, ni->nid);
+		if (!e) {
+			write_unlock(&nm_i->nat_tree_lock);
+			goto retry;
+		}
+		e->ni = *ni;
+		e->checkpointed = true;
+		BUG_ON(ni->blk_addr == NEW_ADDR);
+	} else if (new_blkaddr == NEW_ADDR) {
+		/*
+		 * when nid is reallocated,
+		 * previous nat entry can be remained in nat cache.
+		 * So, reinitialize it with new information.
+		 */
+		e->ni = *ni;
+		BUG_ON(ni->blk_addr != NULL_ADDR);
+	}
+
+	if (new_blkaddr == NEW_ADDR)
+		e->checkpointed = false;
+
+	/* sanity check */
+	BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
+	BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
+			new_blkaddr == NULL_ADDR);
+	BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
+			new_blkaddr == NEW_ADDR);
+	BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
+			nat_get_blkaddr(e) != NULL_ADDR &&
+			new_blkaddr == NEW_ADDR);
+
+	/* increament version no as node is removed */
+	if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
+		unsigned char version = nat_get_version(e);
+		nat_set_version(e, inc_node_version(version));
+	}
+
+	/* change address */
+	nat_set_blkaddr(e, new_blkaddr);
+	__set_nat_cache_dirty(nm_i, e);
+	write_unlock(&nm_i->nat_tree_lock);
+}
+
+static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+	if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)
+		return 0;
+
+	write_lock(&nm_i->nat_tree_lock);
+	while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
+		struct nat_entry *ne;
+		ne = list_first_entry(&nm_i->nat_entries,
+					struct nat_entry, list);
+		__del_from_nat_cache(nm_i, ne);
+		nr_shrink--;
+	}
+	write_unlock(&nm_i->nat_tree_lock);
+	return nr_shrink;
+}
+
+/*
+ * This function returns always success
+ */
+void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	nid_t start_nid = START_NID(nid);
+	struct f2fs_nat_block *nat_blk;
+	struct page *page = NULL;
+	struct f2fs_nat_entry ne;
+	struct nat_entry *e;
+	int i;
+
+	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	ni->nid = nid;
+
+	/* Check nat cache */
+	read_lock(&nm_i->nat_tree_lock);
+	e = __lookup_nat_cache(nm_i, nid);
+	if (e) {
+		ni->ino = nat_get_ino(e);
+		ni->blk_addr = nat_get_blkaddr(e);
+		ni->version = nat_get_version(e);
+	}
+	read_unlock(&nm_i->nat_tree_lock);
+	if (e)
+		return;
+
+	/* Check current segment summary */
+	mutex_lock(&curseg->curseg_mutex);
+	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
+	if (i >= 0) {
+		ne = nat_in_journal(sum, i);
+		node_info_from_raw_nat(ni, &ne);
+	}
+	mutex_unlock(&curseg->curseg_mutex);
+	if (i >= 0)
+		goto cache;
+
+	/* Fill node_info from nat page */
+	page = get_current_nat_page(sbi, start_nid);
+	nat_blk = (struct f2fs_nat_block *)page_address(page);
+	ne = nat_blk->entries[nid - start_nid];
+	node_info_from_raw_nat(ni, &ne);
+	f2fs_put_page(page, 1);
+cache:
+	/* cache nat entry */
+	cache_nat_entry(NM_I(sbi), nid, &ne);
+}
+
+/*
+ * The maximum depth is four.
+ * Offset[0] will have raw inode offset.
+ */
+static int get_node_path(long block, int offset[4], unsigned int noffset[4])
+{
+	const long direct_index = ADDRS_PER_INODE;
+	const long direct_blks = ADDRS_PER_BLOCK;
+	const long dptrs_per_blk = NIDS_PER_BLOCK;
+	const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+	const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
+	int n = 0;
+	int level = 0;
+
+	noffset[0] = 0;
+
+	if (block < direct_index) {
+		offset[n++] = block;
+		level = 0;
+		goto got;
+	}
+	block -= direct_index;
+	if (block < direct_blks) {
+		offset[n++] = NODE_DIR1_BLOCK;
+		noffset[n] = 1;
+		offset[n++] = block;
+		level = 1;
+		goto got;
+	}
+	block -= direct_blks;
+	if (block < direct_blks) {
+		offset[n++] = NODE_DIR2_BLOCK;
+		noffset[n] = 2;
+		offset[n++] = block;
+		level = 1;
+		goto got;
+	}
+	block -= direct_blks;
+	if (block < indirect_blks) {
+		offset[n++] = NODE_IND1_BLOCK;
+		noffset[n] = 3;
+		offset[n++] = block / direct_blks;
+		noffset[n] = 4 + offset[n - 1];
+		offset[n++] = block % direct_blks;
+		level = 2;
+		goto got;
+	}
+	block -= indirect_blks;
+	if (block < indirect_blks) {
+		offset[n++] = NODE_IND2_BLOCK;
+		noffset[n] = 4 + dptrs_per_blk;
+		offset[n++] = block / direct_blks;
+		noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
+		offset[n++] = block % direct_blks;
+		level = 2;
+		goto got;
+	}
+	block -= indirect_blks;
+	if (block < dindirect_blks) {
+		offset[n++] = NODE_DIND_BLOCK;
+		noffset[n] = 5 + (dptrs_per_blk * 2);
+		offset[n++] = block / indirect_blks;
+		noffset[n] = 6 + (dptrs_per_blk * 2) +
+			      offset[n - 1] * (dptrs_per_blk + 1);
+		offset[n++] = (block / direct_blks) % dptrs_per_blk;
+		noffset[n] = 7 + (dptrs_per_blk * 2) +
+			      offset[n - 2] * (dptrs_per_blk + 1) +
+			      offset[n - 1];
+		offset[n++] = block % direct_blks;
+		level = 3;
+		goto got;
+	} else {
+		BUG();
+	}
+got:
+	return level;
+}
+
+/*
+ * Caller should call f2fs_put_dnode(dn).
+ */
+int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct page *npage[4];
+	struct page *parent;
+	int offset[4];
+	unsigned int noffset[4];
+	nid_t nids[4];
+	int level, i;
+	int err = 0;
+
+	level = get_node_path(index, offset, noffset);
+
+	nids[0] = dn->inode->i_ino;
+	npage[0] = get_node_page(sbi, nids[0]);
+	if (IS_ERR(npage[0]))
+		return PTR_ERR(npage[0]);
+
+	parent = npage[0];
+	nids[1] = get_nid(parent, offset[0], true);
+	dn->inode_page = npage[0];
+	dn->inode_page_locked = true;
+
+	/* get indirect or direct nodes */
+	for (i = 1; i <= level; i++) {
+		bool done = false;
+
+		if (!nids[i] && !ro) {
+			mutex_lock_op(sbi, NODE_NEW);
+
+			/* alloc new node */
+			if (!alloc_nid(sbi, &(nids[i]))) {
+				mutex_unlock_op(sbi, NODE_NEW);
+				err = -ENOSPC;
+				goto release_pages;
+			}
+
+			dn->nid = nids[i];
+			npage[i] = new_node_page(dn, noffset[i]);
+			if (IS_ERR(npage[i])) {
+				alloc_nid_failed(sbi, nids[i]);
+				mutex_unlock_op(sbi, NODE_NEW);
+				err = PTR_ERR(npage[i]);
+				goto release_pages;
+			}
+
+			set_nid(parent, offset[i - 1], nids[i], i == 1);
+			alloc_nid_done(sbi, nids[i]);
+			mutex_unlock_op(sbi, NODE_NEW);
+			done = true;
+		} else if (ro && i == level && level > 1) {
+			npage[i] = get_node_page_ra(parent, offset[i - 1]);
+			if (IS_ERR(npage[i])) {
+				err = PTR_ERR(npage[i]);
+				goto release_pages;
+			}
+			done = true;
+		}
+		if (i == 1) {
+			dn->inode_page_locked = false;
+			unlock_page(parent);
+		} else {
+			f2fs_put_page(parent, 1);
+		}
+
+		if (!done) {
+			npage[i] = get_node_page(sbi, nids[i]);
+			if (IS_ERR(npage[i])) {
+				err = PTR_ERR(npage[i]);
+				f2fs_put_page(npage[0], 0);
+				goto release_out;
+			}
+		}
+		if (i < level) {
+			parent = npage[i];
+			nids[i + 1] = get_nid(parent, offset[i], false);
+		}
+	}
+	dn->nid = nids[level];
+	dn->ofs_in_node = offset[level];
+	dn->node_page = npage[level];
+	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
+	return 0;
+
+release_pages:
+	f2fs_put_page(parent, 1);
+	if (i > 1)
+		f2fs_put_page(npage[0], 0);
+release_out:
+	dn->inode_page = NULL;
+	dn->node_page = NULL;
+	return err;
+}
+
+static void truncate_node(struct dnode_of_data *dn)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct node_info ni;
+
+	get_node_info(sbi, dn->nid, &ni);
+	BUG_ON(ni.blk_addr == NULL_ADDR);
+
+	if (ni.blk_addr != NULL_ADDR)
+		invalidate_blocks(sbi, ni.blk_addr);
+
+	/* Deallocate node address */
+	dec_valid_node_count(sbi, dn->inode, 1);
+	set_node_addr(sbi, &ni, NULL_ADDR);
+
+	if (dn->nid == dn->inode->i_ino) {
+		remove_orphan_inode(sbi, dn->nid);
+		dec_valid_inode_count(sbi);
+	} else {
+		sync_inode_page(dn);
+	}
+
+	clear_node_page_dirty(dn->node_page);
+	F2FS_SET_SB_DIRT(sbi);
+
+	f2fs_put_page(dn->node_page, 1);
+	dn->node_page = NULL;
+}
+
+static int truncate_dnode(struct dnode_of_data *dn)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct page *page;
+
+	if (dn->nid == 0)
+		return 1;
+
+	/* get direct node */
+	page = get_node_page(sbi, dn->nid);
+	if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
+		return 1;
+	else if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	/* Make dnode_of_data for parameter */
+	dn->node_page = page;
+	dn->ofs_in_node = 0;
+	truncate_data_blocks(dn);
+	truncate_node(dn);
+	return 1;
+}
+
+static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
+						int ofs, int depth)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct dnode_of_data rdn = *dn;
+	struct page *page;
+	struct f2fs_node *rn;
+	nid_t child_nid;
+	unsigned int child_nofs;
+	int freed = 0;
+	int i, ret;
+
+	if (dn->nid == 0)
+		return NIDS_PER_BLOCK + 1;
+
+	page = get_node_page(sbi, dn->nid);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	rn = (struct f2fs_node *)page_address(page);
+	if (depth < 3) {
+		for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
+			child_nid = le32_to_cpu(rn->in.nid[i]);
+			if (child_nid == 0)
+				continue;
+			rdn.nid = child_nid;
+			ret = truncate_dnode(&rdn);
+			if (ret < 0)
+				goto out_err;
+			set_nid(page, i, 0, false);
+		}
+	} else {
+		child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
+		for (i = ofs; i < NIDS_PER_BLOCK; i++) {
+			child_nid = le32_to_cpu(rn->in.nid[i]);
+			if (child_nid == 0) {
+				child_nofs += NIDS_PER_BLOCK + 1;
+				continue;
+			}
+			rdn.nid = child_nid;
+			ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
+			if (ret == (NIDS_PER_BLOCK + 1)) {
+				set_nid(page, i, 0, false);
+				child_nofs += ret;
+			} else if (ret < 0 && ret != -ENOENT) {
+				goto out_err;
+			}
+		}
+		freed = child_nofs;
+	}
+
+	if (!ofs) {
+		/* remove current indirect node */
+		dn->node_page = page;
+		truncate_node(dn);
+		freed++;
+	} else {
+		f2fs_put_page(page, 1);
+	}
+	return freed;
+
+out_err:
+	f2fs_put_page(page, 1);
+	return ret;
+}
+
+static int truncate_partial_nodes(struct dnode_of_data *dn,
+			struct f2fs_inode *ri, int *offset, int depth)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct page *pages[2];
+	nid_t nid[3];
+	nid_t child_nid;
+	int err = 0;
+	int i;
+	int idx = depth - 2;
+
+	nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
+	if (!nid[0])
+		return 0;
+
+	/* get indirect nodes in the path */
+	for (i = 0; i < depth - 1; i++) {
+		/* refernece count'll be increased */
+		pages[i] = get_node_page(sbi, nid[i]);
+		if (IS_ERR(pages[i])) {
+			depth = i + 1;
+			err = PTR_ERR(pages[i]);
+			goto fail;
+		}
+		nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
+	}
+
+	/* free direct nodes linked to a partial indirect node */
+	for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
+		child_nid = get_nid(pages[idx], i, false);
+		if (!child_nid)
+			continue;
+		dn->nid = child_nid;
+		err = truncate_dnode(dn);
+		if (err < 0)
+			goto fail;
+		set_nid(pages[idx], i, 0, false);
+	}
+
+	if (offset[depth - 1] == 0) {
+		dn->node_page = pages[idx];
+		dn->nid = nid[idx];
+		truncate_node(dn);
+	} else {
+		f2fs_put_page(pages[idx], 1);
+	}
+	offset[idx]++;
+	offset[depth - 1] = 0;
+fail:
+	for (i = depth - 3; i >= 0; i--)
+		f2fs_put_page(pages[i], 1);
+	return err;
+}
+
+/*
+ * All the block addresses of data and nodes should be nullified.
+ */
+int truncate_inode_blocks(struct inode *inode, pgoff_t from)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	int err = 0, cont = 1;
+	int level, offset[4], noffset[4];
+	unsigned int nofs;
+	struct f2fs_node *rn;
+	struct dnode_of_data dn;
+	struct page *page;
+
+	level = get_node_path(from, offset, noffset);
+
+	page = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	set_new_dnode(&dn, inode, page, NULL, 0);
+	unlock_page(page);
+
+	rn = page_address(page);
+	switch (level) {
+	case 0:
+	case 1:
+		nofs = noffset[1];
+		break;
+	case 2:
+		nofs = noffset[1];
+		if (!offset[level - 1])
+			goto skip_partial;
+		err = truncate_partial_nodes(&dn, &rn->i, offset, level);
+		if (err < 0 && err != -ENOENT)
+			goto fail;
+		nofs += 1 + NIDS_PER_BLOCK;
+		break;
+	case 3:
+		nofs = 5 + 2 * NIDS_PER_BLOCK;
+		if (!offset[level - 1])
+			goto skip_partial;
+		err = truncate_partial_nodes(&dn, &rn->i, offset, level);
+		if (err < 0 && err != -ENOENT)
+			goto fail;
+		break;
+	default:
+		BUG();
+	}
+
+skip_partial:
+	while (cont) {
+		dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
+		switch (offset[0]) {
+		case NODE_DIR1_BLOCK:
+		case NODE_DIR2_BLOCK:
+			err = truncate_dnode(&dn);
+			break;
+
+		case NODE_IND1_BLOCK:
+		case NODE_IND2_BLOCK:
+			err = truncate_nodes(&dn, nofs, offset[1], 2);
+			break;
+
+		case NODE_DIND_BLOCK:
+			err = truncate_nodes(&dn, nofs, offset[1], 3);
+			cont = 0;
+			break;
+
+		default:
+			BUG();
+		}
+		if (err < 0 && err != -ENOENT)
+			goto fail;
+		if (offset[1] == 0 &&
+				rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
+			lock_page(page);
+			wait_on_page_writeback(page);
+			rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
+			set_page_dirty(page);
+			unlock_page(page);
+		}
+		offset[1] = 0;
+		offset[0]++;
+		nofs += err;
+	}
+fail:
+	f2fs_put_page(page, 0);
+	return err > 0 ? 0 : err;
+}
+
+int remove_inode_page(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct page *page;
+	nid_t ino = inode->i_ino;
+	struct dnode_of_data dn;
+
+	mutex_lock_op(sbi, NODE_TRUNC);
+	page = get_node_page(sbi, ino);
+	if (IS_ERR(page)) {
+		mutex_unlock_op(sbi, NODE_TRUNC);
+		return PTR_ERR(page);
+	}
+
+	if (F2FS_I(inode)->i_xattr_nid) {
+		nid_t nid = F2FS_I(inode)->i_xattr_nid;
+		struct page *npage = get_node_page(sbi, nid);
+
+		if (IS_ERR(npage)) {
+			mutex_unlock_op(sbi, NODE_TRUNC);
+			return PTR_ERR(npage);
+		}
+
+		F2FS_I(inode)->i_xattr_nid = 0;
+		set_new_dnode(&dn, inode, page, npage, nid);
+		dn.inode_page_locked = 1;
+		truncate_node(&dn);
+	}
+	if (inode->i_blocks == 1) {
+		/* inernally call f2fs_put_page() */
+		set_new_dnode(&dn, inode, page, page, ino);
+		truncate_node(&dn);
+	} else if (inode->i_blocks == 0) {
+		struct node_info ni;
+		get_node_info(sbi, inode->i_ino, &ni);
+
+		/* called after f2fs_new_inode() is failed */
+		BUG_ON(ni.blk_addr != NULL_ADDR);
+		f2fs_put_page(page, 1);
+	} else {
+		BUG();
+	}
+	mutex_unlock_op(sbi, NODE_TRUNC);
+	return 0;
+}
+
+int new_inode_page(struct inode *inode, struct dentry *dentry)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct page *page;
+	struct dnode_of_data dn;
+
+	/* allocate inode page for new inode */
+	set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
+	mutex_lock_op(sbi, NODE_NEW);
+	page = new_node_page(&dn, 0);
+	init_dent_inode(dentry, page);
+	mutex_unlock_op(sbi, NODE_NEW);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+	f2fs_put_page(page, 1);
+	return 0;
+}
+
+struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+	struct address_space *mapping = sbi->node_inode->i_mapping;
+	struct node_info old_ni, new_ni;
+	struct page *page;
+	int err;
+
+	if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+		return ERR_PTR(-EPERM);
+
+	page = grab_cache_page(mapping, dn->nid);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	get_node_info(sbi, dn->nid, &old_ni);
+
+	SetPageUptodate(page);
+	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
+
+	/* Reinitialize old_ni with new node page */
+	BUG_ON(old_ni.blk_addr != NULL_ADDR);
+	new_ni = old_ni;
+	new_ni.ino = dn->inode->i_ino;
+
+	if (!inc_valid_node_count(sbi, dn->inode, 1)) {
+		err = -ENOSPC;
+		goto fail;
+	}
+	set_node_addr(sbi, &new_ni, NEW_ADDR);
+
+	dn->node_page = page;
+	sync_inode_page(dn);
+	set_page_dirty(page);
+	set_cold_node(dn->inode, page);
+	if (ofs == 0)
+		inc_valid_inode_count(sbi);
+
+	return page;
+
+fail:
+	f2fs_put_page(page, 1);
+	return ERR_PTR(err);
+}
+
+static int read_node_page(struct page *page, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+	struct node_info ni;
+
+	get_node_info(sbi, page->index, &ni);
+
+	if (ni.blk_addr == NULL_ADDR)
+		return -ENOENT;
+	return f2fs_readpage(sbi, page, ni.blk_addr, type);
+}
+
+/*
+ * Readahead a node page
+ */
+void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	struct address_space *mapping = sbi->node_inode->i_mapping;
+	struct page *apage;
+
+	apage = find_get_page(mapping, nid);
+	if (apage && PageUptodate(apage))
+		goto release_out;
+	f2fs_put_page(apage, 0);
+
+	apage = grab_cache_page(mapping, nid);
+	if (!apage)
+		return;
+
+	if (read_node_page(apage, READA))
+		goto unlock_out;
+
+	page_cache_release(apage);
+	return;
+
+unlock_out:
+	unlock_page(apage);
+release_out:
+	page_cache_release(apage);
+}
+
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+{
+	int err;
+	struct page *page;
+	struct address_space *mapping = sbi->node_inode->i_mapping;
+
+	page = grab_cache_page(mapping, nid);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	err = read_node_page(page, READ_SYNC);
+	if (err) {
+		f2fs_put_page(page, 1);
+		return ERR_PTR(err);
+	}
+
+	BUG_ON(nid != nid_of_node(page));
+	mark_page_accessed(page);
+	return page;
+}
+
+/*
+ * Return a locked page for the desired node page.
+ * And, readahead MAX_RA_NODE number of node pages.
+ */
+struct page *get_node_page_ra(struct page *parent, int start)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
+	struct address_space *mapping = sbi->node_inode->i_mapping;
+	int i, end;
+	int err = 0;
+	nid_t nid;
+	struct page *page;
+
+	/* First, try getting the desired direct node. */
+	nid = get_nid(parent, start, false);
+	if (!nid)
+		return ERR_PTR(-ENOENT);
+
+	page = find_get_page(mapping, nid);
+	if (page && PageUptodate(page))
+		goto page_hit;
+	f2fs_put_page(page, 0);
+
+repeat:
+	page = grab_cache_page(mapping, nid);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	err = read_node_page(page, READA);
+	if (err) {
+		f2fs_put_page(page, 1);
+		return ERR_PTR(err);
+	}
+
+	/* Then, try readahead for siblings of the desired node */
+	end = start + MAX_RA_NODE;
+	end = min(end, NIDS_PER_BLOCK);
+	for (i = start + 1; i < end; i++) {
+		nid = get_nid(parent, i, false);
+		if (!nid)
+			continue;
+		ra_node_page(sbi, nid);
+	}
+
+page_hit:
+	lock_page(page);
+	if (PageError(page)) {
+		f2fs_put_page(page, 1);
+		return ERR_PTR(-EIO);
+	}
+
+	/* Has the page been truncated? */
+	if (page->mapping != mapping) {
+		f2fs_put_page(page, 1);
+		goto repeat;
+	}
+	return page;
+}
+
+void sync_inode_page(struct dnode_of_data *dn)
+{
+	if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
+		update_inode(dn->inode, dn->node_page);
+	} else if (dn->inode_page) {
+		if (!dn->inode_page_locked)
+			lock_page(dn->inode_page);
+		update_inode(dn->inode, dn->inode_page);
+		if (!dn->inode_page_locked)
+			unlock_page(dn->inode_page);
+	} else {
+		f2fs_write_inode(dn->inode, NULL);
+	}
+}
+
+int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
+					struct writeback_control *wbc)
+{
+	struct address_space *mapping = sbi->node_inode->i_mapping;
+	pgoff_t index, end;
+	struct pagevec pvec;
+	int step = ino ? 2 : 0;
+	int nwritten = 0, wrote = 0;
+
+	pagevec_init(&pvec, 0);
+
+next_step:
+	index = 0;
+	end = LONG_MAX;
+
+	while (index <= end) {
+		int i, nr_pages;
+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+				PAGECACHE_TAG_DIRTY,
+				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+		if (nr_pages == 0)
+			break;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+
+			/*
+			 * flushing sequence with step:
+			 * 0. indirect nodes
+			 * 1. dentry dnodes
+			 * 2. file dnodes
+			 */
+			if (step == 0 && IS_DNODE(page))
+				continue;
+			if (step == 1 && (!IS_DNODE(page) ||
+						is_cold_node(page)))
+				continue;
+			if (step == 2 && (!IS_DNODE(page) ||
+						!is_cold_node(page)))
+				continue;
+
+			/*
+			 * If an fsync mode,
+			 * we should not skip writing node pages.
+			 */
+			if (ino && ino_of_node(page) == ino)
+				lock_page(page);
+			else if (!trylock_page(page))
+				continue;
+
+			if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+				unlock_page(page);
+				continue;
+			}
+			if (ino && ino_of_node(page) != ino)
+				goto continue_unlock;
+
+			if (!PageDirty(page)) {
+				/* someone wrote it for us */
+				goto continue_unlock;
+			}
+
+			if (!clear_page_dirty_for_io(page))
+				goto continue_unlock;
+
+			/* called by fsync() */
+			if (ino && IS_DNODE(page)) {
+				int mark = !is_checkpointed_node(sbi, ino);
+				set_fsync_mark(page, 1);
+				if (IS_INODE(page))
+					set_dentry_mark(page, mark);
+				nwritten++;
+			} else {
+				set_fsync_mark(page, 0);
+				set_dentry_mark(page, 0);
+			}
+			mapping->a_ops->writepage(page, wbc);
+			wrote++;
+
+			if (--wbc->nr_to_write == 0)
+				break;
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+
+		if (wbc->nr_to_write == 0) {
+			step = 2;
+			break;
+		}
+	}
+
+	if (step < 2) {
+		step++;
+		goto next_step;
+	}
+
+	if (wrote)
+		f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
+
+	return nwritten;
+}
+
+static int f2fs_write_node_page(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+	nid_t nid;
+	unsigned int nofs;
+	block_t new_addr;
+	struct node_info ni;
+
+	if (wbc->for_reclaim) {
+		dec_page_count(sbi, F2FS_DIRTY_NODES);
+		wbc->pages_skipped++;
+		set_page_dirty(page);
+		return AOP_WRITEPAGE_ACTIVATE;
+	}
+
+	wait_on_page_writeback(page);
+
+	mutex_lock_op(sbi, NODE_WRITE);
+
+	/* get old block addr of this node page */
+	nid = nid_of_node(page);
+	nofs = ofs_of_node(page);
+	BUG_ON(page->index != nid);
+
+	get_node_info(sbi, nid, &ni);
+
+	/* This page is already truncated */
+	if (ni.blk_addr == NULL_ADDR)
+		return 0;
+
+	set_page_writeback(page);
+
+	/* insert node offset */
+	write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
+	set_node_addr(sbi, &ni, new_addr);
+	dec_page_count(sbi, F2FS_DIRTY_NODES);
+
+	mutex_unlock_op(sbi, NODE_WRITE);
+	unlock_page(page);
+	return 0;
+}
+
+static int f2fs_write_node_pages(struct address_space *mapping,
+			    struct writeback_control *wbc)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+	struct block_device *bdev = sbi->sb->s_bdev;
+	long nr_to_write = wbc->nr_to_write;
+
+	if (wbc->for_kupdate)
+		return 0;
+
+	if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
+		return 0;
+
+	if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
+		write_checkpoint(sbi, false, false);
+		return 0;
+	}
+
+	/* if mounting is failed, skip writing node pages */
+	wbc->nr_to_write = bio_get_nr_vecs(bdev);
+	sync_node_pages(sbi, 0, wbc);
+	wbc->nr_to_write = nr_to_write -
+		(bio_get_nr_vecs(bdev) - wbc->nr_to_write);
+	return 0;
+}
+
+static int f2fs_set_node_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+
+	SetPageUptodate(page);
+	if (!PageDirty(page)) {
+		__set_page_dirty_nobuffers(page);
+		inc_page_count(sbi, F2FS_DIRTY_NODES);
+		SetPagePrivate(page);
+		return 1;
+	}
+	return 0;
+}
+
+static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
+{
+	struct inode *inode = page->mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	if (PageDirty(page))
+		dec_page_count(sbi, F2FS_DIRTY_NODES);
+	ClearPagePrivate(page);
+}
+
+static int f2fs_release_node_page(struct page *page, gfp_t wait)
+{
+	ClearPagePrivate(page);
+	return 0;
+}
+
+/*
+ * Structure of the f2fs node operations
+ */
+const struct address_space_operations f2fs_node_aops = {
+	.writepage	= f2fs_write_node_page,
+	.writepages	= f2fs_write_node_pages,
+	.set_page_dirty	= f2fs_set_node_page_dirty,
+	.invalidatepage	= f2fs_invalidate_node_page,
+	.releasepage	= f2fs_release_node_page,
+};
+
+static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
+{
+	struct list_head *this;
+	struct free_nid *i = NULL;
+	list_for_each(this, head) {
+		i = list_entry(this, struct free_nid, list);
+		if (i->nid == n)
+			break;
+		i = NULL;
+	}
+	return i;
+}
+
+static void __del_from_free_nid_list(struct free_nid *i)
+{
+	list_del(&i->list);
+	kmem_cache_free(free_nid_slab, i);
+}
+
+static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
+{
+	struct free_nid *i;
+
+	if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
+		return 0;
+retry:
+	i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
+	if (!i) {
+		cond_resched();
+		goto retry;
+	}
+	i->nid = nid;
+	i->state = NID_NEW;
+
+	spin_lock(&nm_i->free_nid_list_lock);
+	if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
+		spin_unlock(&nm_i->free_nid_list_lock);
+		kmem_cache_free(free_nid_slab, i);
+		return 0;
+	}
+	list_add_tail(&i->list, &nm_i->free_nid_list);
+	nm_i->fcnt++;
+	spin_unlock(&nm_i->free_nid_list_lock);
+	return 1;
+}
+
+static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
+{
+	struct free_nid *i;
+	spin_lock(&nm_i->free_nid_list_lock);
+	i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+	if (i && i->state == NID_NEW) {
+		__del_from_free_nid_list(i);
+		nm_i->fcnt--;
+	}
+	spin_unlock(&nm_i->free_nid_list_lock);
+}
+
+static int scan_nat_page(struct f2fs_nm_info *nm_i,
+			struct page *nat_page, nid_t start_nid)
+{
+	struct f2fs_nat_block *nat_blk = page_address(nat_page);
+	block_t blk_addr;
+	int fcnt = 0;
+	int i;
+
+	/* 0 nid should not be used */
+	if (start_nid == 0)
+		++start_nid;
+
+	i = start_nid % NAT_ENTRY_PER_BLOCK;
+
+	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
+		blk_addr  = le32_to_cpu(nat_blk->entries[i].block_addr);
+		BUG_ON(blk_addr == NEW_ADDR);
+		if (blk_addr == NULL_ADDR)
+			fcnt += add_free_nid(nm_i, start_nid);
+	}
+	return fcnt;
+}
+
+static void build_free_nids(struct f2fs_sb_info *sbi)
+{
+	struct free_nid *fnid, *next_fnid;
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	nid_t nid = 0;
+	bool is_cycled = false;
+	int fcnt = 0;
+	int i;
+
+	nid = nm_i->next_scan_nid;
+	nm_i->init_scan_nid = nid;
+
+	ra_nat_pages(sbi, nid);
+
+	while (1) {
+		struct page *page = get_current_nat_page(sbi, nid);
+
+		fcnt += scan_nat_page(nm_i, page, nid);
+		f2fs_put_page(page, 1);
+
+		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
+
+		if (nid >= nm_i->max_nid) {
+			nid = 0;
+			is_cycled = true;
+		}
+		if (fcnt > MAX_FREE_NIDS)
+			break;
+		if (is_cycled && nm_i->init_scan_nid <= nid)
+			break;
+	}
+
+	nm_i->next_scan_nid = nid;
+
+	/* find free nids from current sum_pages */
+	mutex_lock(&curseg->curseg_mutex);
+	for (i = 0; i < nats_in_cursum(sum); i++) {
+		block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
+		nid = le32_to_cpu(nid_in_journal(sum, i));
+		if (addr == NULL_ADDR)
+			add_free_nid(nm_i, nid);
+		else
+			remove_free_nid(nm_i, nid);
+	}
+	mutex_unlock(&curseg->curseg_mutex);
+
+	/* remove the free nids from current allocated nids */
+	list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
+		struct nat_entry *ne;
+
+		read_lock(&nm_i->nat_tree_lock);
+		ne = __lookup_nat_cache(nm_i, fnid->nid);
+		if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
+			remove_free_nid(nm_i, fnid->nid);
+		read_unlock(&nm_i->nat_tree_lock);
+	}
+}
+
+/*
+ * If this function returns success, caller can obtain a new nid
+ * from second parameter of this function.
+ * The returned nid could be used ino as well as nid when inode is created.
+ */
+bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct free_nid *i = NULL;
+	struct list_head *this;
+retry:
+	mutex_lock(&nm_i->build_lock);
+	if (!nm_i->fcnt) {
+		/* scan NAT in order to build free nid list */
+		build_free_nids(sbi);
+		if (!nm_i->fcnt) {
+			mutex_unlock(&nm_i->build_lock);
+			return false;
+		}
+	}
+	mutex_unlock(&nm_i->build_lock);
+
+	/*
+	 * We check fcnt again since previous check is racy as
+	 * we didn't hold free_nid_list_lock. So other thread
+	 * could consume all of free nids.
+	 */
+	spin_lock(&nm_i->free_nid_list_lock);
+	if (!nm_i->fcnt) {
+		spin_unlock(&nm_i->free_nid_list_lock);
+		goto retry;
+	}
+
+	BUG_ON(list_empty(&nm_i->free_nid_list));
+	list_for_each(this, &nm_i->free_nid_list) {
+		i = list_entry(this, struct free_nid, list);
+		if (i->state == NID_NEW)
+			break;
+	}
+
+	BUG_ON(i->state != NID_NEW);
+	*nid = i->nid;
+	i->state = NID_ALLOC;
+	nm_i->fcnt--;
+	spin_unlock(&nm_i->free_nid_list_lock);
+	return true;
+}
+
+/*
+ * alloc_nid() should be called prior to this function.
+ */
+void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct free_nid *i;
+
+	spin_lock(&nm_i->free_nid_list_lock);
+	i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+	if (i) {
+		BUG_ON(i->state != NID_ALLOC);
+		__del_from_free_nid_list(i);
+	}
+	spin_unlock(&nm_i->free_nid_list_lock);
+}
+
+/*
+ * alloc_nid() should be called prior to this function.
+ */
+void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
+{
+	alloc_nid_done(sbi, nid);
+	add_free_nid(NM_I(sbi), nid);
+}
+
+void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
+		struct f2fs_summary *sum, struct node_info *ni,
+		block_t new_blkaddr)
+{
+	rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
+	set_node_addr(sbi, ni, new_blkaddr);
+	clear_node_page_dirty(page);
+}
+
+int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
+{
+	struct address_space *mapping = sbi->node_inode->i_mapping;
+	struct f2fs_node *src, *dst;
+	nid_t ino = ino_of_node(page);
+	struct node_info old_ni, new_ni;
+	struct page *ipage;
+
+	ipage = grab_cache_page(mapping, ino);
+	if (!ipage)
+		return -ENOMEM;
+
+	/* Should not use this inode  from free nid list */
+	remove_free_nid(NM_I(sbi), ino);
+
+	get_node_info(sbi, ino, &old_ni);
+	SetPageUptodate(ipage);
+	fill_node_footer(ipage, ino, ino, 0, true);
+
+	src = (struct f2fs_node *)page_address(page);
+	dst = (struct f2fs_node *)page_address(ipage);
+
+	memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
+	dst->i.i_size = 0;
+	dst->i.i_blocks = cpu_to_le64(1);
+	dst->i.i_links = cpu_to_le32(1);
+	dst->i.i_xattr_nid = 0;
+
+	new_ni = old_ni;
+	new_ni.ino = ino;
+
+	set_node_addr(sbi, &new_ni, NEW_ADDR);
+	inc_valid_inode_count(sbi);
+
+	f2fs_put_page(ipage, 1);
+	return 0;
+}
+
+int restore_node_summary(struct f2fs_sb_info *sbi,
+			unsigned int segno, struct f2fs_summary_block *sum)
+{
+	struct f2fs_node *rn;
+	struct f2fs_summary *sum_entry;
+	struct page *page;
+	block_t addr;
+	int i, last_offset;
+
+	/* alloc temporal page for read node */
+	page = alloc_page(GFP_NOFS | __GFP_ZERO);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+	lock_page(page);
+
+	/* scan the node segment */
+	last_offset = sbi->blocks_per_seg;
+	addr = START_BLOCK(sbi, segno);
+	sum_entry = &sum->entries[0];
+
+	for (i = 0; i < last_offset; i++, sum_entry++) {
+		if (f2fs_readpage(sbi, page, addr, READ_SYNC))
+			goto out;
+
+		rn = (struct f2fs_node *)page_address(page);
+		sum_entry->nid = rn->footer.nid;
+		sum_entry->version = 0;
+		sum_entry->ofs_in_node = 0;
+		addr++;
+
+		/*
+		 * In order to read next node page,
+		 * we must clear PageUptodate flag.
+		 */
+		ClearPageUptodate(page);
+	}
+out:
+	unlock_page(page);
+	__free_pages(page, 0);
+	return 0;
+}
+
+static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	int i;
+
+	mutex_lock(&curseg->curseg_mutex);
+
+	if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
+		mutex_unlock(&curseg->curseg_mutex);
+		return false;
+	}
+
+	for (i = 0; i < nats_in_cursum(sum); i++) {
+		struct nat_entry *ne;
+		struct f2fs_nat_entry raw_ne;
+		nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
+
+		raw_ne = nat_in_journal(sum, i);
+retry:
+		write_lock(&nm_i->nat_tree_lock);
+		ne = __lookup_nat_cache(nm_i, nid);
+		if (ne) {
+			__set_nat_cache_dirty(nm_i, ne);
+			write_unlock(&nm_i->nat_tree_lock);
+			continue;
+		}
+		ne = grab_nat_entry(nm_i, nid);
+		if (!ne) {
+			write_unlock(&nm_i->nat_tree_lock);
+			goto retry;
+		}
+		nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
+		nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
+		nat_set_version(ne, raw_ne.version);
+		__set_nat_cache_dirty(nm_i, ne);
+		write_unlock(&nm_i->nat_tree_lock);
+	}
+	update_nats_in_cursum(sum, -i);
+	mutex_unlock(&curseg->curseg_mutex);
+	return true;
+}
+
+/*
+ * This function is called during the checkpointing process.
+ */
+void flush_nat_entries(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct list_head *cur, *n;
+	struct page *page = NULL;
+	struct f2fs_nat_block *nat_blk = NULL;
+	nid_t start_nid = 0, end_nid = 0;
+	bool flushed;
+
+	flushed = flush_nats_in_journal(sbi);
+
+	if (!flushed)
+		mutex_lock(&curseg->curseg_mutex);
+
+	/* 1) flush dirty nat caches */
+	list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
+		struct nat_entry *ne;
+		nid_t nid;
+		struct f2fs_nat_entry raw_ne;
+		int offset = -1;
+		block_t old_blkaddr, new_blkaddr;
+
+		ne = list_entry(cur, struct nat_entry, list);
+		nid = nat_get_nid(ne);
+
+		if (nat_get_blkaddr(ne) == NEW_ADDR)
+			continue;
+		if (flushed)
+			goto to_nat_page;
+
+		/* if there is room for nat enries in curseg->sumpage */
+		offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
+		if (offset >= 0) {
+			raw_ne = nat_in_journal(sum, offset);
+			old_blkaddr = le32_to_cpu(raw_ne.block_addr);
+			goto flush_now;
+		}
+to_nat_page:
+		if (!page || (start_nid > nid || nid > end_nid)) {
+			if (page) {
+				f2fs_put_page(page, 1);
+				page = NULL;
+			}
+			start_nid = START_NID(nid);
+			end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
+
+			/*
+			 * get nat block with dirty flag, increased reference
+			 * count, mapped and lock
+			 */
+			page = get_next_nat_page(sbi, start_nid);
+			nat_blk = page_address(page);
+		}
+
+		BUG_ON(!nat_blk);
+		raw_ne = nat_blk->entries[nid - start_nid];
+		old_blkaddr = le32_to_cpu(raw_ne.block_addr);
+flush_now:
+		new_blkaddr = nat_get_blkaddr(ne);
+
+		raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
+		raw_ne.block_addr = cpu_to_le32(new_blkaddr);
+		raw_ne.version = nat_get_version(ne);
+
+		if (offset < 0) {
+			nat_blk->entries[nid - start_nid] = raw_ne;
+		} else {
+			nat_in_journal(sum, offset) = raw_ne;
+			nid_in_journal(sum, offset) = cpu_to_le32(nid);
+		}
+
+		if (nat_get_blkaddr(ne) == NULL_ADDR) {
+			write_lock(&nm_i->nat_tree_lock);
+			__del_from_nat_cache(nm_i, ne);
+			write_unlock(&nm_i->nat_tree_lock);
+
+			/* We can reuse this freed nid at this point */
+			add_free_nid(NM_I(sbi), nid);
+		} else {
+			write_lock(&nm_i->nat_tree_lock);
+			__clear_nat_cache_dirty(nm_i, ne);
+			ne->checkpointed = true;
+			write_unlock(&nm_i->nat_tree_lock);
+		}
+	}
+	if (!flushed)
+		mutex_unlock(&curseg->curseg_mutex);
+	f2fs_put_page(page, 1);
+
+	/* 2) shrink nat caches if necessary */
+	try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
+}
+
+static int init_node_manager(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned char *version_bitmap;
+	unsigned int nat_segs, nat_blocks;
+
+	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
+
+	/* segment_count_nat includes pair segment so divide to 2. */
+	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
+	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+	nm_i->fcnt = 0;
+	nm_i->nat_cnt = 0;
+
+	INIT_LIST_HEAD(&nm_i->free_nid_list);
+	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
+	INIT_LIST_HEAD(&nm_i->nat_entries);
+	INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
+
+	mutex_init(&nm_i->build_lock);
+	spin_lock_init(&nm_i->free_nid_list_lock);
+	rwlock_init(&nm_i->nat_tree_lock);
+
+	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
+	nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
+	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
+
+	nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL);
+	if (!nm_i->nat_bitmap)
+		return -ENOMEM;
+	version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
+	if (!version_bitmap)
+		return -EFAULT;
+
+	/* copy version bitmap */
+	memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size);
+	return 0;
+}
+
+int build_node_manager(struct f2fs_sb_info *sbi)
+{
+	int err;
+
+	sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
+	if (!sbi->nm_info)
+		return -ENOMEM;
+
+	err = init_node_manager(sbi);
+	if (err)
+		return err;
+
+	build_free_nids(sbi);
+	return 0;
+}
+
+void destroy_node_manager(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct free_nid *i, *next_i;
+	struct nat_entry *natvec[NATVEC_SIZE];
+	nid_t nid = 0;
+	unsigned int found;
+
+	if (!nm_i)
+		return;
+
+	/* destroy free nid list */
+	spin_lock(&nm_i->free_nid_list_lock);
+	list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
+		BUG_ON(i->state == NID_ALLOC);
+		__del_from_free_nid_list(i);
+		nm_i->fcnt--;
+	}
+	BUG_ON(nm_i->fcnt);
+	spin_unlock(&nm_i->free_nid_list_lock);
+
+	/* destroy nat cache */
+	write_lock(&nm_i->nat_tree_lock);
+	while ((found = __gang_lookup_nat_cache(nm_i,
+					nid, NATVEC_SIZE, natvec))) {
+		unsigned idx;
+		for (idx = 0; idx < found; idx++) {
+			struct nat_entry *e = natvec[idx];
+			nid = nat_get_nid(e) + 1;
+			__del_from_nat_cache(nm_i, e);
+		}
+	}
+	BUG_ON(nm_i->nat_cnt);
+	write_unlock(&nm_i->nat_tree_lock);
+
+	kfree(nm_i->nat_bitmap);
+	sbi->nm_info = NULL;
+	kfree(nm_i);
+}
+
+int create_node_manager_caches(void)
+{
+	nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
+			sizeof(struct nat_entry), NULL);
+	if (!nat_entry_slab)
+		return -ENOMEM;
+
+	free_nid_slab = f2fs_kmem_cache_create("free_nid",
+			sizeof(struct free_nid), NULL);
+	if (!free_nid_slab) {
+		kmem_cache_destroy(nat_entry_slab);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void destroy_node_manager_caches(void)
+{
+	kmem_cache_destroy(free_nid_slab);
+	kmem_cache_destroy(nat_entry_slab);
+}

diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
new file mode 100644
index 0000000..afdb130
--- /dev/null
+++ b/fs/f2fs/node.h

@@ -0,0 +1,353 @@
+/*
+ * fs/f2fs/node.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/* start node id of a node block dedicated to the given node id */
+#define	START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
+
+/* node block offset on the NAT area dedicated to the given start node id */
+#define	NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
+
+/* # of pages to perform readahead before building free nids */
+#define FREE_NID_PAGES 4
+
+/* maximum # of free node ids to produce during build_free_nids */
+#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
+
+/* maximum readahead size for node during getting data blocks */
+#define MAX_RA_NODE		128
+
+/* maximum cached nat entries to manage memory footprint */
+#define NM_WOUT_THRESHOLD	(64 * NAT_ENTRY_PER_BLOCK)
+
+/* vector size for gang look-up from nat cache that consists of radix tree */
+#define NATVEC_SIZE	64
+
+/*
+ * For node information
+ */
+struct node_info {
+	nid_t nid;		/* node id */
+	nid_t ino;		/* inode number of the node's owner */
+	block_t	blk_addr;	/* block address of the node */
+	unsigned char version;	/* version of the node */
+};
+
+struct nat_entry {
+	struct list_head list;	/* for clean or dirty nat list */
+	bool checkpointed;	/* whether it is checkpointed or not */
+	struct node_info ni;	/* in-memory node information */
+};
+
+#define nat_get_nid(nat)		(nat->ni.nid)
+#define nat_set_nid(nat, n)		(nat->ni.nid = n)
+#define nat_get_blkaddr(nat)		(nat->ni.blk_addr)
+#define nat_set_blkaddr(nat, b)		(nat->ni.blk_addr = b)
+#define nat_get_ino(nat)		(nat->ni.ino)
+#define nat_set_ino(nat, i)		(nat->ni.ino = i)
+#define nat_get_version(nat)		(nat->ni.version)
+#define nat_set_version(nat, v)		(nat->ni.version = v)
+
+#define __set_nat_cache_dirty(nm_i, ne)					\
+	list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
+#define __clear_nat_cache_dirty(nm_i, ne)				\
+	list_move_tail(&ne->list, &nm_i->nat_entries);
+#define inc_node_version(version)	(++version)
+
+static inline void node_info_from_raw_nat(struct node_info *ni,
+						struct f2fs_nat_entry *raw_ne)
+{
+	ni->ino = le32_to_cpu(raw_ne->ino);
+	ni->blk_addr = le32_to_cpu(raw_ne->block_addr);
+	ni->version = raw_ne->version;
+}
+
+/*
+ * For free nid mangement
+ */
+enum nid_state {
+	NID_NEW,	/* newly added to free nid list */
+	NID_ALLOC	/* it is allocated */
+};
+
+struct free_nid {
+	struct list_head list;	/* for free node id list */
+	nid_t nid;		/* node id */
+	int state;		/* in use or not: NID_NEW or NID_ALLOC */
+};
+
+static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct free_nid *fnid;
+
+	if (nm_i->fcnt <= 0)
+		return -1;
+	spin_lock(&nm_i->free_nid_list_lock);
+	fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
+	*nid = fnid->nid;
+	spin_unlock(&nm_i->free_nid_list_lock);
+	return 0;
+}
+
+/*
+ * inline functions
+ */
+static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
+}
+
+static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	pgoff_t block_off;
+	pgoff_t block_addr;
+	int seg_off;
+
+	block_off = NAT_BLOCK_OFFSET(start);
+	seg_off = block_off >> sbi->log_blocks_per_seg;
+
+	block_addr = (pgoff_t)(nm_i->nat_blkaddr +
+		(seg_off << sbi->log_blocks_per_seg << 1) +
+		(block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
+
+	if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
+		block_addr += sbi->blocks_per_seg;
+
+	return block_addr;
+}
+
+static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
+						pgoff_t block_addr)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+	block_addr -= nm_i->nat_blkaddr;
+	if ((block_addr >> sbi->log_blocks_per_seg) % 2)
+		block_addr -= sbi->blocks_per_seg;
+	else
+		block_addr += sbi->blocks_per_seg;
+
+	return block_addr + nm_i->nat_blkaddr;
+}
+
+static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
+{
+	unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
+
+	if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
+		f2fs_clear_bit(block_off, nm_i->nat_bitmap);
+	else
+		f2fs_set_bit(block_off, nm_i->nat_bitmap);
+}
+
+static inline void fill_node_footer(struct page *page, nid_t nid,
+				nid_t ino, unsigned int ofs, bool reset)
+{
+	void *kaddr = page_address(page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	if (reset)
+		memset(rn, 0, sizeof(*rn));
+	rn->footer.nid = cpu_to_le32(nid);
+	rn->footer.ino = cpu_to_le32(ino);
+	rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT);
+}
+
+static inline void copy_node_footer(struct page *dst, struct page *src)
+{
+	void *src_addr = page_address(src);
+	void *dst_addr = page_address(dst);
+	struct f2fs_node *src_rn = (struct f2fs_node *)src_addr;
+	struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr;
+	memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
+}
+
+static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	void *kaddr = page_address(page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	rn->footer.cp_ver = ckpt->checkpoint_ver;
+	rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
+}
+
+static inline nid_t ino_of_node(struct page *node_page)
+{
+	void *kaddr = page_address(node_page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	return le32_to_cpu(rn->footer.ino);
+}
+
+static inline nid_t nid_of_node(struct page *node_page)
+{
+	void *kaddr = page_address(node_page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	return le32_to_cpu(rn->footer.nid);
+}
+
+static inline unsigned int ofs_of_node(struct page *node_page)
+{
+	void *kaddr = page_address(node_page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	unsigned flag = le32_to_cpu(rn->footer.flag);
+	return flag >> OFFSET_BIT_SHIFT;
+}
+
+static inline unsigned long long cpver_of_node(struct page *node_page)
+{
+	void *kaddr = page_address(node_page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	return le64_to_cpu(rn->footer.cp_ver);
+}
+
+static inline block_t next_blkaddr_of_node(struct page *node_page)
+{
+	void *kaddr = page_address(node_page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	return le32_to_cpu(rn->footer.next_blkaddr);
+}
+
+/*
+ * f2fs assigns the following node offsets described as (num).
+ * N = NIDS_PER_BLOCK
+ *
+ *  Inode block (0)
+ *    |- direct node (1)
+ *    |- direct node (2)
+ *    |- indirect node (3)
+ *    |            `- direct node (4 => 4 + N - 1)
+ *    |- indirect node (4 + N)
+ *    |            `- direct node (5 + N => 5 + 2N - 1)
+ *    `- double indirect node (5 + 2N)
+ *                 `- indirect node (6 + 2N)
+ *                       `- direct node (x(N + 1))
+ */
+static inline bool IS_DNODE(struct page *node_page)
+{
+	unsigned int ofs = ofs_of_node(node_page);
+	if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
+			ofs == 5 + 2 * NIDS_PER_BLOCK)
+		return false;
+	if (ofs >= 6 + 2 * NIDS_PER_BLOCK) {
+		ofs -= 6 + 2 * NIDS_PER_BLOCK;
+		if ((long int)ofs % (NIDS_PER_BLOCK + 1))
+			return false;
+	}
+	return true;
+}
+
+static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
+{
+	struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
+
+	wait_on_page_writeback(p);
+
+	if (i)
+		rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
+	else
+		rn->in.nid[off] = cpu_to_le32(nid);
+	set_page_dirty(p);
+}
+
+static inline nid_t get_nid(struct page *p, int off, bool i)
+{
+	struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
+	if (i)
+		return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
+	return le32_to_cpu(rn->in.nid[off]);
+}
+
+/*
+ * Coldness identification:
+ *  - Mark cold files in f2fs_inode_info
+ *  - Mark cold node blocks in their node footer
+ *  - Mark cold data pages in page cache
+ */
+static inline int is_cold_file(struct inode *inode)
+{
+	return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT;
+}
+
+static inline int is_cold_data(struct page *page)
+{
+	return PageChecked(page);
+}
+
+static inline void set_cold_data(struct page *page)
+{
+	SetPageChecked(page);
+}
+
+static inline void clear_cold_data(struct page *page)
+{
+	ClearPageChecked(page);
+}
+
+static inline int is_cold_node(struct page *page)
+{
+	void *kaddr = page_address(page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	unsigned int flag = le32_to_cpu(rn->footer.flag);
+	return flag & (0x1 << COLD_BIT_SHIFT);
+}
+
+static inline unsigned char is_fsync_dnode(struct page *page)
+{
+	void *kaddr = page_address(page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	unsigned int flag = le32_to_cpu(rn->footer.flag);
+	return flag & (0x1 << FSYNC_BIT_SHIFT);
+}
+
+static inline unsigned char is_dent_dnode(struct page *page)
+{
+	void *kaddr = page_address(page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	unsigned int flag = le32_to_cpu(rn->footer.flag);
+	return flag & (0x1 << DENT_BIT_SHIFT);
+}
+
+static inline void set_cold_node(struct inode *inode, struct page *page)
+{
+	struct f2fs_node *rn = (struct f2fs_node *)page_address(page);
+	unsigned int flag = le32_to_cpu(rn->footer.flag);
+
+	if (S_ISDIR(inode->i_mode))
+		flag &= ~(0x1 << COLD_BIT_SHIFT);
+	else
+		flag |= (0x1 << COLD_BIT_SHIFT);
+	rn->footer.flag = cpu_to_le32(flag);
+}
+
+static inline void set_fsync_mark(struct page *page, int mark)
+{
+	void *kaddr = page_address(page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	unsigned int flag = le32_to_cpu(rn->footer.flag);
+	if (mark)
+		flag |= (0x1 << FSYNC_BIT_SHIFT);
+	else
+		flag &= ~(0x1 << FSYNC_BIT_SHIFT);
+	rn->footer.flag = cpu_to_le32(flag);
+}
+
+static inline void set_dentry_mark(struct page *page, int mark)
+{
+	void *kaddr = page_address(page);
+	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+	unsigned int flag = le32_to_cpu(rn->footer.flag);
+	if (mark)
+		flag |= (0x1 << DENT_BIT_SHIFT);
+	else
+		flag &= ~(0x1 << DENT_BIT_SHIFT);
+	rn->footer.flag = cpu_to_le32(flag);
+}

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
new file mode 100644
index 0000000..b07e9b6
--- /dev/null
+++ b/fs/f2fs/recovery.c

@@ -0,0 +1,375 @@
+/*
+ * fs/f2fs/recovery.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+static struct kmem_cache *fsync_entry_slab;
+
+bool space_for_roll_forward(struct f2fs_sb_info *sbi)
+{
+	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
+			> sbi->user_block_count)
+		return false;
+	return true;
+}
+
+static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
+								nid_t ino)
+{
+	struct list_head *this;
+	struct fsync_inode_entry *entry;
+
+	list_for_each(this, head) {
+		entry = list_entry(this, struct fsync_inode_entry, list);
+		if (entry->inode->i_ino == ino)
+			return entry;
+	}
+	return NULL;
+}
+
+static int recover_dentry(struct page *ipage, struct inode *inode)
+{
+	struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
+	struct f2fs_inode *raw_inode = &(raw_node->i);
+	struct dentry dent, parent;
+	struct f2fs_dir_entry *de;
+	struct page *page;
+	struct inode *dir;
+	int err = 0;
+
+	if (!is_dent_dnode(ipage))
+		goto out;
+
+	dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino));
+	if (IS_ERR(dir)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	parent.d_inode = dir;
+	dent.d_parent = &parent;
+	dent.d_name.len = le32_to_cpu(raw_inode->i_namelen);
+	dent.d_name.name = raw_inode->i_name;
+
+	de = f2fs_find_entry(dir, &dent.d_name, &page);
+	if (de) {
+		kunmap(page);
+		f2fs_put_page(page, 0);
+	} else {
+		f2fs_add_link(&dent, inode);
+	}
+	iput(dir);
+out:
+	kunmap(ipage);
+	return err;
+}
+
+static int recover_inode(struct inode *inode, struct page *node_page)
+{
+	void *kaddr = page_address(node_page);
+	struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
+	struct f2fs_inode *raw_inode = &(raw_node->i);
+
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+	i_size_write(inode, le64_to_cpu(raw_inode->i_size));
+	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
+	inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
+	inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
+	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+	inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
+	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+
+	return recover_dentry(node_page, inode);
+}
+
+static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
+{
+	unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
+	struct curseg_info *curseg;
+	struct page *page;
+	block_t blkaddr;
+	int err = 0;
+
+	/* get node pages in the current segment */
+	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+	blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
+
+	/* read node page */
+	page = alloc_page(GFP_F2FS_ZERO);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+	lock_page(page);
+
+	while (1) {
+		struct fsync_inode_entry *entry;
+
+		if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
+			goto out;
+
+		if (cp_ver != cpver_of_node(page))
+			goto out;
+
+		if (!is_fsync_dnode(page))
+			goto next;
+
+		entry = get_fsync_inode(head, ino_of_node(page));
+		if (entry) {
+			entry->blkaddr = blkaddr;
+			if (IS_INODE(page) && is_dent_dnode(page))
+				set_inode_flag(F2FS_I(entry->inode),
+							FI_INC_LINK);
+		} else {
+			if (IS_INODE(page) && is_dent_dnode(page)) {
+				if (recover_inode_page(sbi, page)) {
+					err = -ENOMEM;
+					goto out;
+				}
+			}
+
+			/* add this fsync inode to the list */
+			entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS);
+			if (!entry) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			INIT_LIST_HEAD(&entry->list);
+			list_add_tail(&entry->list, head);
+
+			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
+			if (IS_ERR(entry->inode)) {
+				err = PTR_ERR(entry->inode);
+				goto out;
+			}
+			entry->blkaddr = blkaddr;
+		}
+		if (IS_INODE(page)) {
+			err = recover_inode(entry->inode, page);
+			if (err)
+				goto out;
+		}
+next:
+		/* check next segment */
+		blkaddr = next_blkaddr_of_node(page);
+		ClearPageUptodate(page);
+	}
+out:
+	unlock_page(page);
+	__free_pages(page, 0);
+	return err;
+}
+
+static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
+					struct list_head *head)
+{
+	struct list_head *this;
+	struct fsync_inode_entry *entry;
+	list_for_each(this, head) {
+		entry = list_entry(this, struct fsync_inode_entry, list);
+		iput(entry->inode);
+		list_del(&entry->list);
+		kmem_cache_free(fsync_entry_slab, entry);
+	}
+}
+
+static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
+						block_t blkaddr)
+{
+	struct seg_entry *sentry;
+	unsigned int segno = GET_SEGNO(sbi, blkaddr);
+	unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
+					(sbi->blocks_per_seg - 1);
+	struct f2fs_summary sum;
+	nid_t ino;
+	void *kaddr;
+	struct inode *inode;
+	struct page *node_page;
+	block_t bidx;
+	int i;
+
+	sentry = get_seg_entry(sbi, segno);
+	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
+		return;
+
+	/* Get the previous summary */
+	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
+		struct curseg_info *curseg = CURSEG_I(sbi, i);
+		if (curseg->segno == segno) {
+			sum = curseg->sum_blk->entries[blkoff];
+			break;
+		}
+	}
+	if (i > CURSEG_COLD_DATA) {
+		struct page *sum_page = get_sum_page(sbi, segno);
+		struct f2fs_summary_block *sum_node;
+		kaddr = page_address(sum_page);
+		sum_node = (struct f2fs_summary_block *)kaddr;
+		sum = sum_node->entries[blkoff];
+		f2fs_put_page(sum_page, 1);
+	}
+
+	/* Get the node page */
+	node_page = get_node_page(sbi, le32_to_cpu(sum.nid));
+	bidx = start_bidx_of_node(ofs_of_node(node_page)) +
+				le16_to_cpu(sum.ofs_in_node);
+	ino = ino_of_node(node_page);
+	f2fs_put_page(node_page, 1);
+
+	/* Deallocate previous index in the node page */
+	inode = f2fs_iget_nowait(sbi->sb, ino);
+	truncate_hole(inode, bidx, bidx + 1);
+	iput(inode);
+}
+
+static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
+					struct page *page, block_t blkaddr)
+{
+	unsigned int start, end;
+	struct dnode_of_data dn;
+	struct f2fs_summary sum;
+	struct node_info ni;
+
+	start = start_bidx_of_node(ofs_of_node(page));
+	if (IS_INODE(page))
+		end = start + ADDRS_PER_INODE;
+	else
+		end = start + ADDRS_PER_BLOCK;
+
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	if (get_dnode_of_data(&dn, start, 0))
+		return;
+
+	wait_on_page_writeback(dn.node_page);
+
+	get_node_info(sbi, dn.nid, &ni);
+	BUG_ON(ni.ino != ino_of_node(page));
+	BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
+
+	for (; start < end; start++) {
+		block_t src, dest;
+
+		src = datablock_addr(dn.node_page, dn.ofs_in_node);
+		dest = datablock_addr(page, dn.ofs_in_node);
+
+		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
+			if (src == NULL_ADDR) {
+				int err = reserve_new_block(&dn);
+				/* We should not get -ENOSPC */
+				BUG_ON(err);
+			}
+
+			/* Check the previous node page having this index */
+			check_index_in_prev_nodes(sbi, dest);
+
+			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
+
+			/* write dummy data page */
+			recover_data_page(sbi, NULL, &sum, src, dest);
+			update_extent_cache(dest, &dn);
+		}
+		dn.ofs_in_node++;
+	}
+
+	/* write node page in place */
+	set_summary(&sum, dn.nid, 0, 0);
+	if (IS_INODE(dn.node_page))
+		sync_inode_page(&dn);
+
+	copy_node_footer(dn.node_page, page);
+	fill_node_footer(dn.node_page, dn.nid, ni.ino,
+					ofs_of_node(page), false);
+	set_page_dirty(dn.node_page);
+
+	recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
+	f2fs_put_dnode(&dn);
+}
+
+static void recover_data(struct f2fs_sb_info *sbi,
+				struct list_head *head, int type)
+{
+	unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
+	struct curseg_info *curseg;
+	struct page *page;
+	block_t blkaddr;
+
+	/* get node pages in the current segment */
+	curseg = CURSEG_I(sbi, type);
+	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+	/* read node page */
+	page = alloc_page(GFP_NOFS | __GFP_ZERO);
+	if (IS_ERR(page))
+		return;
+	lock_page(page);
+
+	while (1) {
+		struct fsync_inode_entry *entry;
+
+		if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
+			goto out;
+
+		if (cp_ver != cpver_of_node(page))
+			goto out;
+
+		entry = get_fsync_inode(head, ino_of_node(page));
+		if (!entry)
+			goto next;
+
+		do_recover_data(sbi, entry->inode, page, blkaddr);
+
+		if (entry->blkaddr == blkaddr) {
+			iput(entry->inode);
+			list_del(&entry->list);
+			kmem_cache_free(fsync_entry_slab, entry);
+		}
+next:
+		/* check next segment */
+		blkaddr = next_blkaddr_of_node(page);
+		ClearPageUptodate(page);
+	}
+out:
+	unlock_page(page);
+	__free_pages(page, 0);
+
+	allocate_new_segments(sbi);
+}
+
+void recover_fsync_data(struct f2fs_sb_info *sbi)
+{
+	struct list_head inode_list;
+
+	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
+			sizeof(struct fsync_inode_entry), NULL);
+	if (unlikely(!fsync_entry_slab))
+		return;
+
+	INIT_LIST_HEAD(&inode_list);
+
+	/* step #1: find fsynced inode numbers */
+	if (find_fsync_dnodes(sbi, &inode_list))
+		goto out;
+
+	if (list_empty(&inode_list))
+		goto out;
+
+	/* step #2: recover data */
+	sbi->por_doing = 1;
+	recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+	sbi->por_doing = 0;
+	BUG_ON(!list_empty(&inode_list));
+out:
+	destroy_fsync_dnodes(sbi, &inode_list);
+	kmem_cache_destroy(fsync_entry_slab);
+	write_checkpoint(sbi, false, false);
+}

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
new file mode 100644
index 0000000..1b26e4e
--- /dev/null
+++ b/fs/f2fs/segment.c

@@ -0,0 +1,1791 @@
+/*
+ * fs/f2fs/segment.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+
+#include "f2fs.h"
+#include "segment.h"
+#include "node.h"
+
+static int need_to_flush(struct f2fs_sb_info *sbi)
+{
+	unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
+			sbi->segs_per_sec;
+	int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
+		>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+	int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
+		>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+
+	if (sbi->por_doing)
+		return 0;
+
+	if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
+						reserved_sections(sbi)))
+		return 1;
+	return 0;
+}
+
+/*
+ * This function balances dirty node and dentry pages.
+ * In addition, it controls garbage collection.
+ */
+void f2fs_balance_fs(struct f2fs_sb_info *sbi)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.for_reclaim = 0,
+	};
+
+	if (sbi->por_doing)
+		return;
+
+	/*
+	 * We should do checkpoint when there are so many dirty node pages
+	 * with enough free segments. After then, we should do GC.
+	 */
+	if (need_to_flush(sbi)) {
+		sync_dirty_dir_inodes(sbi);
+		sync_node_pages(sbi, 0, &wbc);
+	}
+
+	if (has_not_enough_free_secs(sbi)) {
+		mutex_lock(&sbi->gc_mutex);
+		f2fs_gc(sbi, 1);
+	}
+}
+
+static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
+		enum dirty_type dirty_type)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+	/* need not be added */
+	if (IS_CURSEG(sbi, segno))
+		return;
+
+	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
+		dirty_i->nr_dirty[dirty_type]++;
+
+	if (dirty_type == DIRTY) {
+		struct seg_entry *sentry = get_seg_entry(sbi, segno);
+		dirty_type = sentry->type;
+		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
+			dirty_i->nr_dirty[dirty_type]++;
+	}
+}
+
+static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
+		enum dirty_type dirty_type)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
+		dirty_i->nr_dirty[dirty_type]--;
+
+	if (dirty_type == DIRTY) {
+		struct seg_entry *sentry = get_seg_entry(sbi, segno);
+		dirty_type = sentry->type;
+		if (test_and_clear_bit(segno,
+					dirty_i->dirty_segmap[dirty_type]))
+			dirty_i->nr_dirty[dirty_type]--;
+		clear_bit(segno, dirty_i->victim_segmap[FG_GC]);
+		clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
+	}
+}
+
+/*
+ * Should not occur error such as -ENOMEM.
+ * Adding dirty entry into seglist is not critical operation.
+ * If a given segment is one of current working segments, it won't be added.
+ */
+void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned short valid_blocks;
+
+	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
+		return;
+
+	mutex_lock(&dirty_i->seglist_lock);
+
+	valid_blocks = get_valid_blocks(sbi, segno, 0);
+
+	if (valid_blocks == 0) {
+		__locate_dirty_segment(sbi, segno, PRE);
+		__remove_dirty_segment(sbi, segno, DIRTY);
+	} else if (valid_blocks < sbi->blocks_per_seg) {
+		__locate_dirty_segment(sbi, segno, DIRTY);
+	} else {
+		/* Recovery routine with SSR needs this */
+		__remove_dirty_segment(sbi, segno, DIRTY);
+	}
+
+	mutex_unlock(&dirty_i->seglist_lock);
+	return;
+}
+
+/*
+ * Should call clear_prefree_segments after checkpoint is done.
+ */
+static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno, offset = 0;
+	unsigned int total_segs = TOTAL_SEGS(sbi);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	while (1) {
+		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
+				offset);
+		if (segno >= total_segs)
+			break;
+		__set_test_and_free(sbi, segno);
+		offset = segno + 1;
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+}
+
+void clear_prefree_segments(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno, offset = 0;
+	unsigned int total_segs = TOTAL_SEGS(sbi);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	while (1) {
+		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
+				offset);
+		if (segno >= total_segs)
+			break;
+
+		offset = segno + 1;
+		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
+			dirty_i->nr_dirty[PRE]--;
+
+		/* Let's use trim */
+		if (test_opt(sbi, DISCARD))
+			blkdev_issue_discard(sbi->sb->s_bdev,
+					START_BLOCK(sbi, segno) <<
+					sbi->log_sectors_per_block,
+					1 << (sbi->log_sectors_per_block +
+						sbi->log_blocks_per_seg),
+					GFP_NOFS, 0);
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+}
+
+static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap))
+		sit_i->dirty_sentries++;
+}
+
+static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
+					unsigned int segno, int modified)
+{
+	struct seg_entry *se = get_seg_entry(sbi, segno);
+	se->type = type;
+	if (modified)
+		__mark_sit_entry_dirty(sbi, segno);
+}
+
+static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
+{
+	struct seg_entry *se;
+	unsigned int segno, offset;
+	long int new_vblocks;
+
+	segno = GET_SEGNO(sbi, blkaddr);
+
+	se = get_seg_entry(sbi, segno);
+	new_vblocks = se->valid_blocks + del;
+	offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
+
+	BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
+				(new_vblocks > sbi->blocks_per_seg)));
+
+	se->valid_blocks = new_vblocks;
+	se->mtime = get_mtime(sbi);
+	SIT_I(sbi)->max_mtime = se->mtime;
+
+	/* Update valid block bitmap */
+	if (del > 0) {
+		if (f2fs_set_bit(offset, se->cur_valid_map))
+			BUG();
+	} else {
+		if (!f2fs_clear_bit(offset, se->cur_valid_map))
+			BUG();
+	}
+	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
+		se->ckpt_valid_blocks += del;
+
+	__mark_sit_entry_dirty(sbi, segno);
+
+	/* update total number of valid blocks to be written in ckpt area */
+	SIT_I(sbi)->written_valid_blocks += del;
+
+	if (sbi->segs_per_sec > 1)
+		get_sec_entry(sbi, segno)->valid_blocks += del;
+}
+
+static void refresh_sit_entry(struct f2fs_sb_info *sbi,
+			block_t old_blkaddr, block_t new_blkaddr)
+{
+	update_sit_entry(sbi, new_blkaddr, 1);
+	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+		update_sit_entry(sbi, old_blkaddr, -1);
+}
+
+void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
+{
+	unsigned int segno = GET_SEGNO(sbi, addr);
+	struct sit_info *sit_i = SIT_I(sbi);
+
+	BUG_ON(addr == NULL_ADDR);
+	if (addr == NEW_ADDR)
+		return;
+
+	/* add it into sit main buffer */
+	mutex_lock(&sit_i->sentry_lock);
+
+	update_sit_entry(sbi, addr, -1);
+
+	/* add it into dirty seglist */
+	locate_dirty_segment(sbi, segno);
+
+	mutex_unlock(&sit_i->sentry_lock);
+}
+
+/*
+ * This function should be resided under the curseg_mutex lock
+ */
+static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
+		struct f2fs_summary *sum, unsigned short offset)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	void *addr = curseg->sum_blk;
+	addr += offset * sizeof(struct f2fs_summary);
+	memcpy(addr, sum, sizeof(struct f2fs_summary));
+	return;
+}
+
+/*
+ * Calculate the number of current summary pages for writing
+ */
+int npages_for_summary_flush(struct f2fs_sb_info *sbi)
+{
+	int total_size_bytes = 0;
+	int valid_sum_count = 0;
+	int i, sum_space;
+
+	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+		if (sbi->ckpt->alloc_type[i] == SSR)
+			valid_sum_count += sbi->blocks_per_seg;
+		else
+			valid_sum_count += curseg_blkoff(sbi, i);
+	}
+
+	total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
+			+ sizeof(struct nat_journal) + 2
+			+ sizeof(struct sit_journal) + 2;
+	sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
+	if (total_size_bytes < sum_space)
+		return 1;
+	else if (total_size_bytes < 2 * sum_space)
+		return 2;
+	return 3;
+}
+
+/*
+ * Caller should put this summary page
+ */
+struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
+}
+
+static void write_sum_page(struct f2fs_sb_info *sbi,
+			struct f2fs_summary_block *sum_blk, block_t blk_addr)
+{
+	struct page *page = grab_meta_page(sbi, blk_addr);
+	void *kaddr = page_address(page);
+	memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
+	set_page_dirty(page);
+	f2fs_put_page(page, 1);
+}
+
+static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
+					int ofs_unit, int type)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
+	unsigned int segno, next_segno, i;
+	int ofs = 0;
+
+	/*
+	 * If there is not enough reserved sections,
+	 * we should not reuse prefree segments.
+	 */
+	if (has_not_enough_free_secs(sbi))
+		return NULL_SEGNO;
+
+	/*
+	 * NODE page should not reuse prefree segment,
+	 * since those information is used for SPOR.
+	 */
+	if (IS_NODESEG(type))
+		return NULL_SEGNO;
+next:
+	segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++);
+	ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit;
+	if (segno < TOTAL_SEGS(sbi)) {
+		/* skip intermediate segments in a section */
+		if (segno % ofs_unit)
+			goto next;
+
+		/* skip if whole section is not prefree */
+		next_segno = find_next_zero_bit(prefree_segmap,
+						TOTAL_SEGS(sbi), segno + 1);
+		if (next_segno - segno < ofs_unit)
+			goto next;
+
+		/* skip if whole section was not free at the last checkpoint */
+		for (i = 0; i < ofs_unit; i++)
+			if (get_seg_entry(sbi, segno)->ckpt_valid_blocks)
+				goto next;
+		return segno;
+	}
+	return NULL_SEGNO;
+}
+
+/*
+ * Find a new segment from the free segments bitmap to right order
+ * This function should be returned with success, otherwise BUG
+ */
+static void get_new_segment(struct f2fs_sb_info *sbi,
+			unsigned int *newseg, bool new_sec, int dir)
+{
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int total_secs = sbi->total_sections;
+	unsigned int segno, secno, zoneno;
+	unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone;
+	unsigned int hint = *newseg / sbi->segs_per_sec;
+	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
+	unsigned int left_start = hint;
+	bool init = true;
+	int go_left = 0;
+	int i;
+
+	write_lock(&free_i->segmap_lock);
+
+	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
+		segno = find_next_zero_bit(free_i->free_segmap,
+					TOTAL_SEGS(sbi), *newseg + 1);
+		if (segno < TOTAL_SEGS(sbi))
+			goto got_it;
+	}
+find_other_zone:
+	secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint);
+	if (secno >= total_secs) {
+		if (dir == ALLOC_RIGHT) {
+			secno = find_next_zero_bit(free_i->free_secmap,
+						total_secs, 0);
+			BUG_ON(secno >= total_secs);
+		} else {
+			go_left = 1;
+			left_start = hint - 1;
+		}
+	}
+	if (go_left == 0)
+		goto skip_left;
+
+	while (test_bit(left_start, free_i->free_secmap)) {
+		if (left_start > 0) {
+			left_start--;
+			continue;
+		}
+		left_start = find_next_zero_bit(free_i->free_secmap,
+						total_secs, 0);
+		BUG_ON(left_start >= total_secs);
+		break;
+	}
+	secno = left_start;
+skip_left:
+	hint = secno;
+	segno = secno * sbi->segs_per_sec;
+	zoneno = secno / sbi->secs_per_zone;
+
+	/* give up on finding another zone */
+	if (!init)
+		goto got_it;
+	if (sbi->secs_per_zone == 1)
+		goto got_it;
+	if (zoneno == old_zoneno)
+		goto got_it;
+	if (dir == ALLOC_LEFT) {
+		if (!go_left && zoneno + 1 >= total_zones)
+			goto got_it;
+		if (go_left && zoneno == 0)
+			goto got_it;
+	}
+	for (i = 0; i < NR_CURSEG_TYPE; i++)
+		if (CURSEG_I(sbi, i)->zone == zoneno)
+			break;
+
+	if (i < NR_CURSEG_TYPE) {
+		/* zone is in user, try another */
+		if (go_left)
+			hint = zoneno * sbi->secs_per_zone - 1;
+		else if (zoneno + 1 >= total_zones)
+			hint = 0;
+		else
+			hint = (zoneno + 1) * sbi->secs_per_zone;
+		init = false;
+		goto find_other_zone;
+	}
+got_it:
+	/* set it as dirty segment in free segmap */
+	BUG_ON(test_bit(segno, free_i->free_segmap));
+	__set_inuse(sbi, segno);
+	*newseg = segno;
+	write_unlock(&free_i->segmap_lock);
+}
+
+static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	struct summary_footer *sum_footer;
+
+	curseg->segno = curseg->next_segno;
+	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
+	curseg->next_blkoff = 0;
+	curseg->next_segno = NULL_SEGNO;
+
+	sum_footer = &(curseg->sum_blk->footer);
+	memset(sum_footer, 0, sizeof(struct summary_footer));
+	if (IS_DATASEG(type))
+		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
+	if (IS_NODESEG(type))
+		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
+	__set_sit_entry_type(sbi, type, curseg->segno, modified);
+}
+
+/*
+ * Allocate a current working segment.
+ * This function always allocates a free segment in LFS manner.
+ */
+static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	unsigned int segno = curseg->segno;
+	int dir = ALLOC_LEFT;
+
+	write_sum_page(sbi, curseg->sum_blk,
+				GET_SUM_BLOCK(sbi, curseg->segno));
+	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
+		dir = ALLOC_RIGHT;
+
+	if (test_opt(sbi, NOHEAP))
+		dir = ALLOC_RIGHT;
+
+	get_new_segment(sbi, &segno, new_sec, dir);
+	curseg->next_segno = segno;
+	reset_curseg(sbi, type, 1);
+	curseg->alloc_type = LFS;
+}
+
+static void __next_free_blkoff(struct f2fs_sb_info *sbi,
+			struct curseg_info *seg, block_t start)
+{
+	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
+	block_t ofs;
+	for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
+		if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
+			&& !f2fs_test_bit(ofs, se->cur_valid_map))
+			break;
+	}
+	seg->next_blkoff = ofs;
+}
+
+/*
+ * If a segment is written by LFS manner, next block offset is just obtained
+ * by increasing the current block offset. However, if a segment is written by
+ * SSR manner, next block offset obtained by calling __next_free_blkoff
+ */
+static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
+				struct curseg_info *seg)
+{
+	if (seg->alloc_type == SSR)
+		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
+	else
+		seg->next_blkoff++;
+}
+
+/*
+ * This function always allocates a used segment (from dirty seglist) by SSR
+ * manner, so it should recover the existing segment information of valid blocks
+ */
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	unsigned int new_segno = curseg->next_segno;
+	struct f2fs_summary_block *sum_node;
+	struct page *sum_page;
+
+	write_sum_page(sbi, curseg->sum_blk,
+				GET_SUM_BLOCK(sbi, curseg->segno));
+	__set_test_and_inuse(sbi, new_segno);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	__remove_dirty_segment(sbi, new_segno, PRE);
+	__remove_dirty_segment(sbi, new_segno, DIRTY);
+	mutex_unlock(&dirty_i->seglist_lock);
+
+	reset_curseg(sbi, type, 1);
+	curseg->alloc_type = SSR;
+	__next_free_blkoff(sbi, curseg, 0);
+
+	if (reuse) {
+		sum_page = get_sum_page(sbi, new_segno);
+		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
+		f2fs_put_page(sum_page, 1);
+	}
+}
+
+/*
+ * flush out current segment and replace it with new segment
+ * This function should be returned with success, otherwise BUG
+ */
+static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
+						int type, bool force)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	unsigned int ofs_unit;
+
+	if (force) {
+		new_curseg(sbi, type, true);
+		goto out;
+	}
+
+	ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec;
+	curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
+
+	if (curseg->next_segno != NULL_SEGNO)
+		change_curseg(sbi, type, false);
+	else if (type == CURSEG_WARM_NODE)
+		new_curseg(sbi, type, false);
+	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
+		change_curseg(sbi, type, true);
+	else
+		new_curseg(sbi, type, false);
+out:
+	sbi->segment_count[curseg->alloc_type]++;
+}
+
+void allocate_new_segments(struct f2fs_sb_info *sbi)
+{
+	struct curseg_info *curseg;
+	unsigned int old_curseg;
+	int i;
+
+	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+		curseg = CURSEG_I(sbi, i);
+		old_curseg = curseg->segno;
+		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
+		locate_dirty_segment(sbi, old_curseg);
+	}
+}
+
+static const struct segment_allocation default_salloc_ops = {
+	.allocate_segment = allocate_segment_by_default,
+};
+
+static void f2fs_end_io_write(struct bio *bio, int err)
+{
+	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_private *p = bio->bi_private;
+
+	do {
+		struct page *page = bvec->bv_page;
+
+		if (--bvec >= bio->bi_io_vec)
+			prefetchw(&bvec->bv_page->flags);
+		if (!uptodate) {
+			SetPageError(page);
+			if (page->mapping)
+				set_bit(AS_EIO, &page->mapping->flags);
+			set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
+			set_page_dirty(page);
+		}
+		end_page_writeback(page);
+		dec_page_count(p->sbi, F2FS_WRITEBACK);
+	} while (bvec >= bio->bi_io_vec);
+
+	if (p->is_sync)
+		complete(p->wait);
+	kfree(p);
+	bio_put(bio);
+}
+
+struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
+{
+	struct bio *bio;
+	struct bio_private *priv;
+retry:
+	priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
+	if (!priv) {
+		cond_resched();
+		goto retry;
+	}
+
+	/* No failure on bio allocation */
+	bio = bio_alloc(GFP_NOIO, npages);
+	bio->bi_bdev = bdev;
+	bio->bi_private = priv;
+	return bio;
+}
+
+static void do_submit_bio(struct f2fs_sb_info *sbi,
+				enum page_type type, bool sync)
+{
+	int rw = sync ? WRITE_SYNC : WRITE;
+	enum page_type btype = type > META ? META : type;
+
+	if (type >= META_FLUSH)
+		rw = WRITE_FLUSH_FUA;
+
+	if (sbi->bio[btype]) {
+		struct bio_private *p = sbi->bio[btype]->bi_private;
+		p->sbi = sbi;
+		sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
+		if (type == META_FLUSH) {
+			DECLARE_COMPLETION_ONSTACK(wait);
+			p->is_sync = true;
+			p->wait = &wait;
+			submit_bio(rw, sbi->bio[btype]);
+			wait_for_completion(&wait);
+		} else {
+			p->is_sync = false;
+			submit_bio(rw, sbi->bio[btype]);
+		}
+		sbi->bio[btype] = NULL;
+	}
+}
+
+void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
+{
+	down_write(&sbi->bio_sem);
+	do_submit_bio(sbi, type, sync);
+	up_write(&sbi->bio_sem);
+}
+
+static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
+				block_t blk_addr, enum page_type type)
+{
+	struct block_device *bdev = sbi->sb->s_bdev;
+
+	verify_block_addr(sbi, blk_addr);
+
+	down_write(&sbi->bio_sem);
+
+	inc_page_count(sbi, F2FS_WRITEBACK);
+
+	if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
+		do_submit_bio(sbi, type, false);
+alloc_new:
+	if (sbi->bio[type] == NULL) {
+		sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev));
+		sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+		/*
+		 * The end_io will be assigned at the sumbission phase.
+		 * Until then, let bio_add_page() merge consecutive IOs as much
+		 * as possible.
+		 */
+	}
+
+	if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
+							PAGE_CACHE_SIZE) {
+		do_submit_bio(sbi, type, false);
+		goto alloc_new;
+	}
+
+	sbi->last_block_in_bio[type] = blk_addr;
+
+	up_write(&sbi->bio_sem);
+}
+
+static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	if (curseg->next_blkoff < sbi->blocks_per_seg)
+		return true;
+	return false;
+}
+
+static int __get_segment_type_2(struct page *page, enum page_type p_type)
+{
+	if (p_type == DATA)
+		return CURSEG_HOT_DATA;
+	else
+		return CURSEG_HOT_NODE;
+}
+
+static int __get_segment_type_4(struct page *page, enum page_type p_type)
+{
+	if (p_type == DATA) {
+		struct inode *inode = page->mapping->host;
+
+		if (S_ISDIR(inode->i_mode))
+			return CURSEG_HOT_DATA;
+		else
+			return CURSEG_COLD_DATA;
+	} else {
+		if (IS_DNODE(page) && !is_cold_node(page))
+			return CURSEG_HOT_NODE;
+		else
+			return CURSEG_COLD_NODE;
+	}
+}
+
+static int __get_segment_type_6(struct page *page, enum page_type p_type)
+{
+	if (p_type == DATA) {
+		struct inode *inode = page->mapping->host;
+
+		if (S_ISDIR(inode->i_mode))
+			return CURSEG_HOT_DATA;
+		else if (is_cold_data(page) || is_cold_file(inode))
+			return CURSEG_COLD_DATA;
+		else
+			return CURSEG_WARM_DATA;
+	} else {
+		if (IS_DNODE(page))
+			return is_cold_node(page) ? CURSEG_WARM_NODE :
+						CURSEG_HOT_NODE;
+		else
+			return CURSEG_COLD_NODE;
+	}
+}
+
+static int __get_segment_type(struct page *page, enum page_type p_type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+	switch (sbi->active_logs) {
+	case 2:
+		return __get_segment_type_2(page, p_type);
+	case 4:
+		return __get_segment_type_4(page, p_type);
+	case 6:
+		return __get_segment_type_6(page, p_type);
+	default:
+		BUG();
+	}
+}
+
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+			block_t old_blkaddr, block_t *new_blkaddr,
+			struct f2fs_summary *sum, enum page_type p_type)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	struct curseg_info *curseg;
+	unsigned int old_cursegno;
+	int type;
+
+	type = __get_segment_type(page, p_type);
+	curseg = CURSEG_I(sbi, type);
+
+	mutex_lock(&curseg->curseg_mutex);
+
+	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+	old_cursegno = curseg->segno;
+
+	/*
+	 * __add_sum_entry should be resided under the curseg_mutex
+	 * because, this function updates a summary entry in the
+	 * current summary block.
+	 */
+	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
+
+	mutex_lock(&sit_i->sentry_lock);
+	__refresh_next_blkoff(sbi, curseg);
+	sbi->block_count[curseg->alloc_type]++;
+
+	/*
+	 * SIT information should be updated before segment allocation,
+	 * since SSR needs latest valid block information.
+	 */
+	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
+
+	if (!__has_curseg_space(sbi, type))
+		sit_i->s_ops->allocate_segment(sbi, type, false);
+
+	locate_dirty_segment(sbi, old_cursegno);
+	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+	mutex_unlock(&sit_i->sentry_lock);
+
+	if (p_type == NODE)
+		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
+
+	/* writeout dirty page into bdev */
+	submit_write_page(sbi, page, *new_blkaddr, p_type);
+
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
+int write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+			struct writeback_control *wbc)
+{
+	if (wbc->for_reclaim)
+		return AOP_WRITEPAGE_ACTIVATE;
+
+	set_page_writeback(page);
+	submit_write_page(sbi, page, page->index, META);
+	return 0;
+}
+
+void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
+		unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
+{
+	struct f2fs_summary sum;
+	set_summary(&sum, nid, 0, 0);
+	do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
+}
+
+void write_data_page(struct inode *inode, struct page *page,
+		struct dnode_of_data *dn, block_t old_blkaddr,
+		block_t *new_blkaddr)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct f2fs_summary sum;
+	struct node_info ni;
+
+	BUG_ON(old_blkaddr == NULL_ADDR);
+	get_node_info(sbi, dn->nid, &ni);
+	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+
+	do_write_page(sbi, page, old_blkaddr,
+			new_blkaddr, &sum, DATA);
+}
+
+void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
+					block_t old_blk_addr)
+{
+	submit_write_page(sbi, page, old_blk_addr, DATA);
+}
+
+void recover_data_page(struct f2fs_sb_info *sbi,
+			struct page *page, struct f2fs_summary *sum,
+			block_t old_blkaddr, block_t new_blkaddr)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	struct curseg_info *curseg;
+	unsigned int segno, old_cursegno;
+	struct seg_entry *se;
+	int type;
+
+	segno = GET_SEGNO(sbi, new_blkaddr);
+	se = get_seg_entry(sbi, segno);
+	type = se->type;
+
+	if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
+		if (old_blkaddr == NULL_ADDR)
+			type = CURSEG_COLD_DATA;
+		else
+			type = CURSEG_WARM_DATA;
+	}
+	curseg = CURSEG_I(sbi, type);
+
+	mutex_lock(&curseg->curseg_mutex);
+	mutex_lock(&sit_i->sentry_lock);
+
+	old_cursegno = curseg->segno;
+
+	/* change the current segment */
+	if (segno != curseg->segno) {
+		curseg->next_segno = segno;
+		change_curseg(sbi, type, true);
+	}
+
+	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
+					(sbi->blocks_per_seg - 1);
+	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
+
+	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
+
+	locate_dirty_segment(sbi, old_cursegno);
+	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+
+	mutex_unlock(&sit_i->sentry_lock);
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
+void rewrite_node_page(struct f2fs_sb_info *sbi,
+			struct page *page, struct f2fs_summary *sum,
+			block_t old_blkaddr, block_t new_blkaddr)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	int type = CURSEG_WARM_NODE;
+	struct curseg_info *curseg;
+	unsigned int segno, old_cursegno;
+	block_t next_blkaddr = next_blkaddr_of_node(page);
+	unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
+
+	curseg = CURSEG_I(sbi, type);
+
+	mutex_lock(&curseg->curseg_mutex);
+	mutex_lock(&sit_i->sentry_lock);
+
+	segno = GET_SEGNO(sbi, new_blkaddr);
+	old_cursegno = curseg->segno;
+
+	/* change the current segment */
+	if (segno != curseg->segno) {
+		curseg->next_segno = segno;
+		change_curseg(sbi, type, true);
+	}
+	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
+					(sbi->blocks_per_seg - 1);
+	__add_sum_entry(sbi, type, sum, curseg->next_blkoff);
+
+	/* change the current log to the next block addr in advance */
+	if (next_segno != segno) {
+		curseg->next_segno = next_segno;
+		change_curseg(sbi, type, true);
+	}
+	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
+					(sbi->blocks_per_seg - 1);
+
+	/* rewrite node page */
+	set_page_writeback(page);
+	submit_write_page(sbi, page, new_blkaddr, NODE);
+	f2fs_submit_bio(sbi, NODE, true);
+	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
+
+	locate_dirty_segment(sbi, old_cursegno);
+	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+
+	mutex_unlock(&sit_i->sentry_lock);
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
+static int read_compacted_summaries(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	struct curseg_info *seg_i;
+	unsigned char *kaddr;
+	struct page *page;
+	block_t start;
+	int i, j, offset;
+
+	start = start_sum_block(sbi);
+
+	page = get_meta_page(sbi, start++);
+	kaddr = (unsigned char *)page_address(page);
+
+	/* Step 1: restore nat cache */
+	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
+
+	/* Step 2: restore sit cache */
+	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
+	memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
+						SUM_JOURNAL_SIZE);
+	offset = 2 * SUM_JOURNAL_SIZE;
+
+	/* Step 3: restore summary entries */
+	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+		unsigned short blk_off;
+		unsigned int segno;
+
+		seg_i = CURSEG_I(sbi, i);
+		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
+		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
+		seg_i->next_segno = segno;
+		reset_curseg(sbi, i, 0);
+		seg_i->alloc_type = ckpt->alloc_type[i];
+		seg_i->next_blkoff = blk_off;
+
+		if (seg_i->alloc_type == SSR)
+			blk_off = sbi->blocks_per_seg;
+
+		for (j = 0; j < blk_off; j++) {
+			struct f2fs_summary *s;
+			s = (struct f2fs_summary *)(kaddr + offset);
+			seg_i->sum_blk->entries[j] = *s;
+			offset += SUMMARY_SIZE;
+			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+						SUM_FOOTER_SIZE)
+				continue;
+
+			f2fs_put_page(page, 1);
+			page = NULL;
+
+			page = get_meta_page(sbi, start++);
+			kaddr = (unsigned char *)page_address(page);
+			offset = 0;
+		}
+	}
+	f2fs_put_page(page, 1);
+	return 0;
+}
+
+static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	struct f2fs_summary_block *sum;
+	struct curseg_info *curseg;
+	struct page *new;
+	unsigned short blk_off;
+	unsigned int segno = 0;
+	block_t blk_addr = 0;
+
+	/* get segment number and block addr */
+	if (IS_DATASEG(type)) {
+		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
+		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
+							CURSEG_HOT_DATA]);
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
+			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
+		else
+			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
+	} else {
+		segno = le32_to_cpu(ckpt->cur_node_segno[type -
+							CURSEG_HOT_NODE]);
+		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
+							CURSEG_HOT_NODE]);
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
+			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
+							type - CURSEG_HOT_NODE);
+		else
+			blk_addr = GET_SUM_BLOCK(sbi, segno);
+	}
+
+	new = get_meta_page(sbi, blk_addr);
+	sum = (struct f2fs_summary_block *)page_address(new);
+
+	if (IS_NODESEG(type)) {
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
+			struct f2fs_summary *ns = &sum->entries[0];
+			int i;
+			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
+				ns->version = 0;
+				ns->ofs_in_node = 0;
+			}
+		} else {
+			if (restore_node_summary(sbi, segno, sum)) {
+				f2fs_put_page(new, 1);
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* set uncompleted segment to curseg */
+	curseg = CURSEG_I(sbi, type);
+	mutex_lock(&curseg->curseg_mutex);
+	memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
+	curseg->next_segno = segno;
+	reset_curseg(sbi, type, 0);
+	curseg->alloc_type = ckpt->alloc_type[type];
+	curseg->next_blkoff = blk_off;
+	mutex_unlock(&curseg->curseg_mutex);
+	f2fs_put_page(new, 1);
+	return 0;
+}
+
+static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
+{
+	int type = CURSEG_HOT_DATA;
+
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
+		/* restore for compacted data summary */
+		if (read_compacted_summaries(sbi))
+			return -EINVAL;
+		type = CURSEG_HOT_NODE;
+	}
+
+	for (; type <= CURSEG_COLD_NODE; type++)
+		if (read_normal_summaries(sbi, type))
+			return -EINVAL;
+	return 0;
+}
+
+static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+	struct page *page;
+	unsigned char *kaddr;
+	struct f2fs_summary *summary;
+	struct curseg_info *seg_i;
+	int written_size = 0;
+	int i, j;
+
+	page = grab_meta_page(sbi, blkaddr++);
+	kaddr = (unsigned char *)page_address(page);
+
+	/* Step 1: write nat cache */
+	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
+	written_size += SUM_JOURNAL_SIZE;
+
+	/* Step 2: write sit cache */
+	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
+	memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
+						SUM_JOURNAL_SIZE);
+	written_size += SUM_JOURNAL_SIZE;
+
+	set_page_dirty(page);
+
+	/* Step 3: write summary entries */
+	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+		unsigned short blkoff;
+		seg_i = CURSEG_I(sbi, i);
+		if (sbi->ckpt->alloc_type[i] == SSR)
+			blkoff = sbi->blocks_per_seg;
+		else
+			blkoff = curseg_blkoff(sbi, i);
+
+		for (j = 0; j < blkoff; j++) {
+			if (!page) {
+				page = grab_meta_page(sbi, blkaddr++);
+				kaddr = (unsigned char *)page_address(page);
+				written_size = 0;
+			}
+			summary = (struct f2fs_summary *)(kaddr + written_size);
+			*summary = seg_i->sum_blk->entries[j];
+			written_size += SUMMARY_SIZE;
+			set_page_dirty(page);
+
+			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+							SUM_FOOTER_SIZE)
+				continue;
+
+			f2fs_put_page(page, 1);
+			page = NULL;
+		}
+	}
+	if (page)
+		f2fs_put_page(page, 1);
+}
+
+static void write_normal_summaries(struct f2fs_sb_info *sbi,
+					block_t blkaddr, int type)
+{
+	int i, end;
+	if (IS_DATASEG(type))
+		end = type + NR_CURSEG_DATA_TYPE;
+	else
+		end = type + NR_CURSEG_NODE_TYPE;
+
+	for (i = type; i < end; i++) {
+		struct curseg_info *sum = CURSEG_I(sbi, i);
+		mutex_lock(&sum->curseg_mutex);
+		write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
+		mutex_unlock(&sum->curseg_mutex);
+	}
+}
+
+void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+{
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
+		write_compacted_summaries(sbi, start_blk);
+	else
+		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
+}
+
+void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+{
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
+		write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
+	return;
+}
+
+int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
+					unsigned int val, int alloc)
+{
+	int i;
+
+	if (type == NAT_JOURNAL) {
+		for (i = 0; i < nats_in_cursum(sum); i++) {
+			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
+				return i;
+		}
+		if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
+			return update_nats_in_cursum(sum, 1);
+	} else if (type == SIT_JOURNAL) {
+		for (i = 0; i < sits_in_cursum(sum); i++)
+			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
+				return i;
+		if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
+			return update_sits_in_cursum(sum, 1);
+	}
+	return -1;
+}
+
+static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
+					unsigned int segno)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
+	block_t blk_addr = sit_i->sit_base_addr + offset;
+
+	check_seg_range(sbi, segno);
+
+	/* calculate sit block address */
+	if (f2fs_test_bit(offset, sit_i->sit_bitmap))
+		blk_addr += sit_i->sit_blocks;
+
+	return get_meta_page(sbi, blk_addr);
+}
+
+static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
+					unsigned int start)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	struct page *src_page, *dst_page;
+	pgoff_t src_off, dst_off;
+	void *src_addr, *dst_addr;
+
+	src_off = current_sit_addr(sbi, start);
+	dst_off = next_sit_addr(sbi, src_off);
+
+	/* get current sit block page without lock */
+	src_page = get_meta_page(sbi, src_off);
+	dst_page = grab_meta_page(sbi, dst_off);
+	BUG_ON(PageDirty(src_page));
+
+	src_addr = page_address(src_page);
+	dst_addr = page_address(dst_page);
+	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+
+	set_page_dirty(dst_page);
+	f2fs_put_page(src_page, 1);
+
+	set_to_next_sit(sit_i, start);
+
+	return dst_page;
+}
+
+static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	int i;
+
+	/*
+	 * If the journal area in the current summary is full of sit entries,
+	 * all the sit entries will be flushed. Otherwise the sit entries
+	 * are not able to replace with newly hot sit entries.
+	 */
+	if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) {
+		for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
+			unsigned int segno;
+			segno = le32_to_cpu(segno_in_journal(sum, i));
+			__mark_sit_entry_dirty(sbi, segno);
+		}
+		update_sits_in_cursum(sum, -sits_in_cursum(sum));
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * CP calls this function, which flushes SIT entries including sit_journal,
+ * and moves prefree segs to free segs.
+ */
+void flush_sit_entries(struct f2fs_sb_info *sbi)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	unsigned long nsegs = TOTAL_SEGS(sbi);
+	struct page *page = NULL;
+	struct f2fs_sit_block *raw_sit = NULL;
+	unsigned int start = 0, end = 0;
+	unsigned int segno = -1;
+	bool flushed;
+
+	mutex_lock(&curseg->curseg_mutex);
+	mutex_lock(&sit_i->sentry_lock);
+
+	/*
+	 * "flushed" indicates whether sit entries in journal are flushed
+	 * to the SIT area or not.
+	 */
+	flushed = flush_sits_in_journal(sbi);
+
+	while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
+		struct seg_entry *se = get_seg_entry(sbi, segno);
+		int sit_offset, offset;
+
+		sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
+
+		if (flushed)
+			goto to_sit_page;
+
+		offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
+		if (offset >= 0) {
+			segno_in_journal(sum, offset) = cpu_to_le32(segno);
+			seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
+			goto flush_done;
+		}
+to_sit_page:
+		if (!page || (start > segno) || (segno > end)) {
+			if (page) {
+				f2fs_put_page(page, 1);
+				page = NULL;
+			}
+
+			start = START_SEGNO(sit_i, segno);
+			end = start + SIT_ENTRY_PER_BLOCK - 1;
+
+			/* read sit block that will be updated */
+			page = get_next_sit_page(sbi, start);
+			raw_sit = page_address(page);
+		}
+
+		/* udpate entry in SIT block */
+		seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
+flush_done:
+		__clear_bit(segno, bitmap);
+		sit_i->dirty_sentries--;
+	}
+	mutex_unlock(&sit_i->sentry_lock);
+	mutex_unlock(&curseg->curseg_mutex);
+
+	/* writeout last modified SIT block */
+	f2fs_put_page(page, 1);
+
+	set_prefree_as_free_segments(sbi);
+}
+
+static int build_sit_info(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	struct sit_info *sit_i;
+	unsigned int sit_segs, start;
+	char *src_bitmap, *dst_bitmap;
+	unsigned int bitmap_size;
+
+	/* allocate memory for SIT information */
+	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
+	if (!sit_i)
+		return -ENOMEM;
+
+	SM_I(sbi)->sit_info = sit_i;
+
+	sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry));
+	if (!sit_i->sentries)
+		return -ENOMEM;
+
+	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
+	sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!sit_i->dirty_sentries_bitmap)
+		return -ENOMEM;
+
+	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
+		sit_i->sentries[start].cur_valid_map
+			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+		sit_i->sentries[start].ckpt_valid_map
+			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+		if (!sit_i->sentries[start].cur_valid_map
+				|| !sit_i->sentries[start].ckpt_valid_map)
+			return -ENOMEM;
+	}
+
+	if (sbi->segs_per_sec > 1) {
+		sit_i->sec_entries = vzalloc(sbi->total_sections *
+					sizeof(struct sec_entry));
+		if (!sit_i->sec_entries)
+			return -ENOMEM;
+	}
+
+	/* get information related with SIT */
+	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
+
+	/* setup SIT bitmap from ckeckpoint pack */
+	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
+	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
+
+	dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!dst_bitmap)
+		return -ENOMEM;
+	memcpy(dst_bitmap, src_bitmap, bitmap_size);
+
+	/* init SIT information */
+	sit_i->s_ops = &default_salloc_ops;
+
+	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
+	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
+	sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
+	sit_i->sit_bitmap = dst_bitmap;
+	sit_i->bitmap_size = bitmap_size;
+	sit_i->dirty_sentries = 0;
+	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
+	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
+	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
+	mutex_init(&sit_i->sentry_lock);
+	return 0;
+}
+
+static int build_free_segmap(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_sm_info *sm_info = SM_I(sbi);
+	struct free_segmap_info *free_i;
+	unsigned int bitmap_size, sec_bitmap_size;
+
+	/* allocate memory for free segmap information */
+	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
+	if (!free_i)
+		return -ENOMEM;
+
+	SM_I(sbi)->free_info = free_i;
+
+	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
+	free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
+	if (!free_i->free_segmap)
+		return -ENOMEM;
+
+	sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections);
+	free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
+	if (!free_i->free_secmap)
+		return -ENOMEM;
+
+	/* set all segments as dirty temporarily */
+	memset(free_i->free_segmap, 0xff, bitmap_size);
+	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
+
+	/* init free segmap information */
+	free_i->start_segno =
+		(unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
+	free_i->free_segments = 0;
+	free_i->free_sections = 0;
+	rwlock_init(&free_i->segmap_lock);
+	return 0;
+}
+
+static int build_curseg(struct f2fs_sb_info *sbi)
+{
+	struct curseg_info *array;
+	int i;
+
+	array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+	if (!array)
+		return -ENOMEM;
+
+	SM_I(sbi)->curseg_array = array;
+
+	for (i = 0; i < NR_CURSEG_TYPE; i++) {
+		mutex_init(&array[i].curseg_mutex);
+		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+		if (!array[i].sum_blk)
+			return -ENOMEM;
+		array[i].segno = NULL_SEGNO;
+		array[i].next_blkoff = 0;
+	}
+	return restore_curseg_summaries(sbi);
+}
+
+static void build_sit_entries(struct f2fs_sb_info *sbi)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	unsigned int start;
+
+	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
+		struct seg_entry *se = &sit_i->sentries[start];
+		struct f2fs_sit_block *sit_blk;
+		struct f2fs_sit_entry sit;
+		struct page *page;
+		int i;
+
+		mutex_lock(&curseg->curseg_mutex);
+		for (i = 0; i < sits_in_cursum(sum); i++) {
+			if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
+				sit = sit_in_journal(sum, i);
+				mutex_unlock(&curseg->curseg_mutex);
+				goto got_it;
+			}
+		}
+		mutex_unlock(&curseg->curseg_mutex);
+		page = get_current_sit_page(sbi, start);
+		sit_blk = (struct f2fs_sit_block *)page_address(page);
+		sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
+		f2fs_put_page(page, 1);
+got_it:
+		check_block_count(sbi, start, &sit);
+		seg_info_from_raw_sit(se, &sit);
+		if (sbi->segs_per_sec > 1) {
+			struct sec_entry *e = get_sec_entry(sbi, start);
+			e->valid_blocks += se->valid_blocks;
+		}
+	}
+}
+
+static void init_free_segmap(struct f2fs_sb_info *sbi)
+{
+	unsigned int start;
+	int type;
+
+	for (start = 0; start < TOTAL_SEGS(sbi); start++) {
+		struct seg_entry *sentry = get_seg_entry(sbi, start);
+		if (!sentry->valid_blocks)
+			__set_free(sbi, start);
+	}
+
+	/* set use the current segments */
+	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
+		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
+		__set_test_and_inuse(sbi, curseg_t->segno);
+	}
+}
+
+static void init_dirty_segmap(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int segno = 0, offset = 0;
+	unsigned short valid_blocks;
+
+	while (segno < TOTAL_SEGS(sbi)) {
+		/* find dirty segment based on free segmap */
+		segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset);
+		if (segno >= TOTAL_SEGS(sbi))
+			break;
+		offset = segno + 1;
+		valid_blocks = get_valid_blocks(sbi, segno, 0);
+		if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks)
+			continue;
+		mutex_lock(&dirty_i->seglist_lock);
+		__locate_dirty_segment(sbi, segno, DIRTY);
+		mutex_unlock(&dirty_i->seglist_lock);
+	}
+}
+
+static int init_victim_segmap(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
+
+	dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
+	dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
+		return -ENOMEM;
+	return 0;
+}
+
+static int build_dirty_segmap(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i;
+	unsigned int bitmap_size, i;
+
+	/* allocate memory for dirty segments list information */
+	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
+	if (!dirty_i)
+		return -ENOMEM;
+
+	SM_I(sbi)->dirty_info = dirty_i;
+	mutex_init(&dirty_i->seglist_lock);
+
+	bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
+
+	for (i = 0; i < NR_DIRTY_TYPE; i++) {
+		dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
+		dirty_i->nr_dirty[i] = 0;
+		if (!dirty_i->dirty_segmap[i])
+			return -ENOMEM;
+	}
+
+	init_dirty_segmap(sbi);
+	return init_victim_segmap(sbi);
+}
+
+/*
+ * Update min, max modified time for cost-benefit GC algorithm
+ */
+static void init_min_max_mtime(struct f2fs_sb_info *sbi)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned int segno;
+
+	mutex_lock(&sit_i->sentry_lock);
+
+	sit_i->min_mtime = LLONG_MAX;
+
+	for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
+		unsigned int i;
+		unsigned long long mtime = 0;
+
+		for (i = 0; i < sbi->segs_per_sec; i++)
+			mtime += get_seg_entry(sbi, segno + i)->mtime;
+
+		mtime = div_u64(mtime, sbi->segs_per_sec);
+
+		if (sit_i->min_mtime > mtime)
+			sit_i->min_mtime = mtime;
+	}
+	sit_i->max_mtime = get_mtime(sbi);
+	mutex_unlock(&sit_i->sentry_lock);
+}
+
+int build_segment_manager(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	struct f2fs_sm_info *sm_info;
+	int err;
+
+	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
+	if (!sm_info)
+		return -ENOMEM;
+
+	/* init sm info */
+	sbi->sm_info = sm_info;
+	INIT_LIST_HEAD(&sm_info->wblist_head);
+	spin_lock_init(&sm_info->wblist_lock);
+	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
+	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
+	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
+	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
+	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
+	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
+	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+
+	err = build_sit_info(sbi);
+	if (err)
+		return err;
+	err = build_free_segmap(sbi);
+	if (err)
+		return err;
+	err = build_curseg(sbi);
+	if (err)
+		return err;
+
+	/* reinit free segmap based on SIT */
+	build_sit_entries(sbi);
+
+	init_free_segmap(sbi);
+	err = build_dirty_segmap(sbi);
+	if (err)
+		return err;
+
+	init_min_max_mtime(sbi);
+	return 0;
+}
+
+static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
+		enum dirty_type dirty_type)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	kfree(dirty_i->dirty_segmap[dirty_type]);
+	dirty_i->nr_dirty[dirty_type] = 0;
+	mutex_unlock(&dirty_i->seglist_lock);
+}
+
+void reset_victim_segmap(struct f2fs_sb_info *sbi)
+{
+	unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
+	memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
+}
+
+static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+	kfree(dirty_i->victim_segmap[FG_GC]);
+	kfree(dirty_i->victim_segmap[BG_GC]);
+}
+
+static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	int i;
+
+	if (!dirty_i)
+		return;
+
+	/* discard pre-free/dirty segments list */
+	for (i = 0; i < NR_DIRTY_TYPE; i++)
+		discard_dirty_segmap(sbi, i);
+
+	destroy_victim_segmap(sbi);
+	SM_I(sbi)->dirty_info = NULL;
+	kfree(dirty_i);
+}
+
+static void destroy_curseg(struct f2fs_sb_info *sbi)
+{
+	struct curseg_info *array = SM_I(sbi)->curseg_array;
+	int i;
+
+	if (!array)
+		return;
+	SM_I(sbi)->curseg_array = NULL;
+	for (i = 0; i < NR_CURSEG_TYPE; i++)
+		kfree(array[i].sum_blk);
+	kfree(array);
+}
+
+static void destroy_free_segmap(struct f2fs_sb_info *sbi)
+{
+	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
+	if (!free_i)
+		return;
+	SM_I(sbi)->free_info = NULL;
+	kfree(free_i->free_segmap);
+	kfree(free_i->free_secmap);
+	kfree(free_i);
+}
+
+static void destroy_sit_info(struct f2fs_sb_info *sbi)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned int start;
+
+	if (!sit_i)
+		return;
+
+	if (sit_i->sentries) {
+		for (start = 0; start < TOTAL_SEGS(sbi); start++) {
+			kfree(sit_i->sentries[start].cur_valid_map);
+			kfree(sit_i->sentries[start].ckpt_valid_map);
+		}
+	}
+	vfree(sit_i->sentries);
+	vfree(sit_i->sec_entries);
+	kfree(sit_i->dirty_sentries_bitmap);
+
+	SM_I(sbi)->sit_info = NULL;
+	kfree(sit_i->sit_bitmap);
+	kfree(sit_i);
+}
+
+void destroy_segment_manager(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_sm_info *sm_info = SM_I(sbi);
+	destroy_dirty_segmap(sbi);
+	destroy_curseg(sbi);
+	destroy_free_segmap(sbi);
+	destroy_sit_info(sbi);
+	sbi->sm_info = NULL;
+	kfree(sm_info);
+}

diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
new file mode 100644
index 0000000..0948405
--- /dev/null
+++ b/fs/f2fs/segment.h

@@ -0,0 +1,618 @@
+/*
+ * fs/f2fs/segment.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/* constant macro */
+#define NULL_SEGNO			((unsigned int)(~0))
+
+/* V: Logical segment # in volume, R: Relative segment # in main area */
+#define GET_L2R_SEGNO(free_i, segno)	(segno - free_i->start_segno)
+#define GET_R2L_SEGNO(free_i, segno)	(segno + free_i->start_segno)
+
+#define IS_DATASEG(t)							\
+	((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) ||		\
+	(t == CURSEG_WARM_DATA))
+
+#define IS_NODESEG(t)							\
+	((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) ||		\
+	(t == CURSEG_WARM_NODE))
+
+#define IS_CURSEG(sbi, segno)						\
+	((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) ||	\
+	 (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) ||	\
+	 (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
+	 (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
+	 (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
+	 (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
+
+#define IS_CURSEC(sbi, secno)						\
+	((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
+	  sbi->segs_per_sec) ||	\
+	 (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno /		\
+	  sbi->segs_per_sec) ||	\
+	 (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno /		\
+	  sbi->segs_per_sec) ||	\
+	 (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno /		\
+	  sbi->segs_per_sec) ||	\
+	 (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
+	  sbi->segs_per_sec) ||	\
+	 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
+	  sbi->segs_per_sec))	\
+
+#define START_BLOCK(sbi, segno)						\
+	(SM_I(sbi)->seg0_blkaddr +					\
+	 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
+#define NEXT_FREE_BLKADDR(sbi, curseg)					\
+	(START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
+
+#define MAIN_BASE_BLOCK(sbi)	(SM_I(sbi)->main_blkaddr)
+
+#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr)				\
+	((blk_addr) - SM_I(sbi)->seg0_blkaddr)
+#define GET_SEGNO_FROM_SEG0(sbi, blk_addr)				\
+	(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
+#define GET_SEGNO(sbi, blk_addr)					\
+	(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ?		\
+	NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi),			\
+		GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
+#define GET_SECNO(sbi, segno)					\
+	((segno) / sbi->segs_per_sec)
+#define GET_ZONENO_FROM_SEGNO(sbi, segno)				\
+	((segno / sbi->segs_per_sec) / sbi->secs_per_zone)
+
+#define GET_SUM_BLOCK(sbi, segno)				\
+	((sbi->sm_info->ssa_blkaddr) + segno)
+
+#define GET_SUM_TYPE(footer) ((footer)->entry_type)
+#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type)
+
+#define SIT_ENTRY_OFFSET(sit_i, segno)					\
+	(segno % sit_i->sents_per_block)
+#define SIT_BLOCK_OFFSET(sit_i, segno)					\
+	(segno / SIT_ENTRY_PER_BLOCK)
+#define	START_SEGNO(sit_i, segno)		\
+	(SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK)
+#define f2fs_bitmap_size(nr)			\
+	(BITS_TO_LONGS(nr) * sizeof(unsigned long))
+#define TOTAL_SEGS(sbi)	(SM_I(sbi)->main_segments)
+
+#define SECTOR_FROM_BLOCK(sbi, blk_addr)				\
+	(blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
+
+/* during checkpoint, bio_private is used to synchronize the last bio */
+struct bio_private {
+	struct f2fs_sb_info *sbi;
+	bool is_sync;
+	void *wait;
+};
+
+/*
+ * indicate a block allocation direction: RIGHT and LEFT.
+ * RIGHT means allocating new sections towards the end of volume.
+ * LEFT means the opposite direction.
+ */
+enum {
+	ALLOC_RIGHT = 0,
+	ALLOC_LEFT
+};
+
+/*
+ * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
+ * LFS writes data sequentially with cleaning operations.
+ * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
+ */
+enum {
+	LFS = 0,
+	SSR
+};
+
+/*
+ * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
+ * GC_CB is based on cost-benefit algorithm.
+ * GC_GREEDY is based on greedy algorithm.
+ */
+enum {
+	GC_CB = 0,
+	GC_GREEDY
+};
+
+/*
+ * BG_GC means the background cleaning job.
+ * FG_GC means the on-demand cleaning job.
+ */
+enum {
+	BG_GC = 0,
+	FG_GC
+};
+
+/* for a function parameter to select a victim segment */
+struct victim_sel_policy {
+	int alloc_mode;			/* LFS or SSR */
+	int gc_mode;			/* GC_CB or GC_GREEDY */
+	unsigned long *dirty_segmap;	/* dirty segment bitmap */
+	unsigned int offset;		/* last scanned bitmap offset */
+	unsigned int ofs_unit;		/* bitmap search unit */
+	unsigned int min_cost;		/* minimum cost */
+	unsigned int min_segno;		/* segment # having min. cost */
+};
+
+struct seg_entry {
+	unsigned short valid_blocks;	/* # of valid blocks */
+	unsigned char *cur_valid_map;	/* validity bitmap of blocks */
+	/*
+	 * # of valid blocks and the validity bitmap stored in the the last
+	 * checkpoint pack. This information is used by the SSR mode.
+	 */
+	unsigned short ckpt_valid_blocks;
+	unsigned char *ckpt_valid_map;
+	unsigned char type;		/* segment type like CURSEG_XXX_TYPE */
+	unsigned long long mtime;	/* modification time of the segment */
+};
+
+struct sec_entry {
+	unsigned int valid_blocks;	/* # of valid blocks in a section */
+};
+
+struct segment_allocation {
+	void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
+};
+
+struct sit_info {
+	const struct segment_allocation *s_ops;
+
+	block_t sit_base_addr;		/* start block address of SIT area */
+	block_t sit_blocks;		/* # of blocks used by SIT area */
+	block_t written_valid_blocks;	/* # of valid blocks in main area */
+	char *sit_bitmap;		/* SIT bitmap pointer */
+	unsigned int bitmap_size;	/* SIT bitmap size */
+
+	unsigned long *dirty_sentries_bitmap;	/* bitmap for dirty sentries */
+	unsigned int dirty_sentries;		/* # of dirty sentries */
+	unsigned int sents_per_block;		/* # of SIT entries per block */
+	struct mutex sentry_lock;		/* to protect SIT cache */
+	struct seg_entry *sentries;		/* SIT segment-level cache */
+	struct sec_entry *sec_entries;		/* SIT section-level cache */
+
+	/* for cost-benefit algorithm in cleaning procedure */
+	unsigned long long elapsed_time;	/* elapsed time after mount */
+	unsigned long long mounted_time;	/* mount time */
+	unsigned long long min_mtime;		/* min. modification time */
+	unsigned long long max_mtime;		/* max. modification time */
+};
+
+struct free_segmap_info {
+	unsigned int start_segno;	/* start segment number logically */
+	unsigned int free_segments;	/* # of free segments */
+	unsigned int free_sections;	/* # of free sections */
+	rwlock_t segmap_lock;		/* free segmap lock */
+	unsigned long *free_segmap;	/* free segment bitmap */
+	unsigned long *free_secmap;	/* free section bitmap */
+};
+
+/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */
+enum dirty_type {
+	DIRTY_HOT_DATA,		/* dirty segments assigned as hot data logs */
+	DIRTY_WARM_DATA,	/* dirty segments assigned as warm data logs */
+	DIRTY_COLD_DATA,	/* dirty segments assigned as cold data logs */
+	DIRTY_HOT_NODE,		/* dirty segments assigned as hot node logs */
+	DIRTY_WARM_NODE,	/* dirty segments assigned as warm node logs */
+	DIRTY_COLD_NODE,	/* dirty segments assigned as cold node logs */
+	DIRTY,			/* to count # of dirty segments */
+	PRE,			/* to count # of entirely obsolete segments */
+	NR_DIRTY_TYPE
+};
+
+struct dirty_seglist_info {
+	const struct victim_selection *v_ops;	/* victim selction operation */
+	unsigned long *dirty_segmap[NR_DIRTY_TYPE];
+	struct mutex seglist_lock;		/* lock for segment bitmaps */
+	int nr_dirty[NR_DIRTY_TYPE];		/* # of dirty segments */
+	unsigned long *victim_segmap[2];	/* BG_GC, FG_GC */
+};
+
+/* victim selection function for cleaning and SSR */
+struct victim_selection {
+	int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
+							int, int, char);
+};
+
+/* for active log information */
+struct curseg_info {
+	struct mutex curseg_mutex;		/* lock for consistency */
+	struct f2fs_summary_block *sum_blk;	/* cached summary block */
+	unsigned char alloc_type;		/* current allocation type */
+	unsigned int segno;			/* current segment number */
+	unsigned short next_blkoff;		/* next block offset to write */
+	unsigned int zone;			/* current zone number */
+	unsigned int next_segno;		/* preallocated segment */
+};
+
+/*
+ * inline functions
+ */
+static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
+{
+	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
+}
+
+static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi,
+						unsigned int segno)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	return &sit_i->sentries[segno];
+}
+
+static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi,
+						unsigned int segno)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	return &sit_i->sec_entries[GET_SECNO(sbi, segno)];
+}
+
+static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
+				unsigned int segno, int section)
+{
+	/*
+	 * In order to get # of valid blocks in a section instantly from many
+	 * segments, f2fs manages two counting structures separately.
+	 */
+	if (section > 1)
+		return get_sec_entry(sbi, segno)->valid_blocks;
+	else
+		return get_seg_entry(sbi, segno)->valid_blocks;
+}
+
+static inline void seg_info_from_raw_sit(struct seg_entry *se,
+					struct f2fs_sit_entry *rs)
+{
+	se->valid_blocks = GET_SIT_VBLOCKS(rs);
+	se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
+	memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+	memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+	se->type = GET_SIT_TYPE(rs);
+	se->mtime = le64_to_cpu(rs->mtime);
+}
+
+static inline void seg_info_to_raw_sit(struct seg_entry *se,
+					struct f2fs_sit_entry *rs)
+{
+	unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) |
+					se->valid_blocks;
+	rs->vblocks = cpu_to_le16(raw_vblocks);
+	memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
+	memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+	se->ckpt_valid_blocks = se->valid_blocks;
+	rs->mtime = cpu_to_le64(se->mtime);
+}
+
+static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
+		unsigned int max, unsigned int segno)
+{
+	unsigned int ret;
+	read_lock(&free_i->segmap_lock);
+	ret = find_next_bit(free_i->free_segmap, max, segno);
+	read_unlock(&free_i->segmap_lock);
+	return ret;
+}
+
+static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int secno = segno / sbi->segs_per_sec;
+	unsigned int start_segno = secno * sbi->segs_per_sec;
+	unsigned int next;
+
+	write_lock(&free_i->segmap_lock);
+	clear_bit(segno, free_i->free_segmap);
+	free_i->free_segments++;
+
+	next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno);
+	if (next >= start_segno + sbi->segs_per_sec) {
+		clear_bit(secno, free_i->free_secmap);
+		free_i->free_sections++;
+	}
+	write_unlock(&free_i->segmap_lock);
+}
+
+static inline void __set_inuse(struct f2fs_sb_info *sbi,
+		unsigned int segno)
+{
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int secno = segno / sbi->segs_per_sec;
+	set_bit(segno, free_i->free_segmap);
+	free_i->free_segments--;
+	if (!test_and_set_bit(secno, free_i->free_secmap))
+		free_i->free_sections--;
+}
+
+static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
+		unsigned int segno)
+{
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int secno = segno / sbi->segs_per_sec;
+	unsigned int start_segno = secno * sbi->segs_per_sec;
+	unsigned int next;
+
+	write_lock(&free_i->segmap_lock);
+	if (test_and_clear_bit(segno, free_i->free_segmap)) {
+		free_i->free_segments++;
+
+		next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
+								start_segno);
+		if (next >= start_segno + sbi->segs_per_sec) {
+			if (test_and_clear_bit(secno, free_i->free_secmap))
+				free_i->free_sections++;
+		}
+	}
+	write_unlock(&free_i->segmap_lock);
+}
+
+static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
+		unsigned int segno)
+{
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int secno = segno / sbi->segs_per_sec;
+	write_lock(&free_i->segmap_lock);
+	if (!test_and_set_bit(segno, free_i->free_segmap)) {
+		free_i->free_segments--;
+		if (!test_and_set_bit(secno, free_i->free_secmap))
+			free_i->free_sections--;
+	}
+	write_unlock(&free_i->segmap_lock);
+}
+
+static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
+		void *dst_addr)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
+}
+
+static inline block_t written_block_count(struct f2fs_sb_info *sbi)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	block_t vblocks;
+
+	mutex_lock(&sit_i->sentry_lock);
+	vblocks = sit_i->written_valid_blocks;
+	mutex_unlock(&sit_i->sentry_lock);
+
+	return vblocks;
+}
+
+static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
+{
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int free_segs;
+
+	read_lock(&free_i->segmap_lock);
+	free_segs = free_i->free_segments;
+	read_unlock(&free_i->segmap_lock);
+
+	return free_segs;
+}
+
+static inline int reserved_segments(struct f2fs_sb_info *sbi)
+{
+	return SM_I(sbi)->reserved_segments;
+}
+
+static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
+{
+	struct free_segmap_info *free_i = FREE_I(sbi);
+	unsigned int free_secs;
+
+	read_lock(&free_i->segmap_lock);
+	free_secs = free_i->free_sections;
+	read_unlock(&free_i->segmap_lock);
+
+	return free_secs;
+}
+
+static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
+{
+	return DIRTY_I(sbi)->nr_dirty[PRE];
+}
+
+static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
+{
+	return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] +
+		DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] +
+		DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] +
+		DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] +
+		DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] +
+		DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
+}
+
+static inline int overprovision_segments(struct f2fs_sb_info *sbi)
+{
+	return SM_I(sbi)->ovp_segments;
+}
+
+static inline int overprovision_sections(struct f2fs_sb_info *sbi)
+{
+	return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec;
+}
+
+static inline int reserved_sections(struct f2fs_sb_info *sbi)
+{
+	return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec;
+}
+
+static inline bool need_SSR(struct f2fs_sb_info *sbi)
+{
+	return (free_sections(sbi) < overprovision_sections(sbi));
+}
+
+static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	return DIRTY_I(sbi)->v_ops->get_victim(sbi,
+				&(curseg)->next_segno, BG_GC, type, SSR);
+}
+
+static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi)
+{
+	return free_sections(sbi) <= reserved_sections(sbi);
+}
+
+static inline int utilization(struct f2fs_sb_info *sbi)
+{
+	return (long int)valid_user_blocks(sbi) * 100 /
+			(long int)sbi->user_block_count;
+}
+
+/*
+ * Sometimes f2fs may be better to drop out-of-place update policy.
+ * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write
+ * data in the original place likewise other traditional file systems.
+ * But, currently set 100 in percentage, which means it is disabled.
+ * See below need_inplace_update().
+ */
+#define MIN_IPU_UTIL		100
+static inline bool need_inplace_update(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	if (S_ISDIR(inode->i_mode))
+		return false;
+	if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL)
+		return true;
+	return false;
+}
+
+static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
+		int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	return curseg->segno;
+}
+
+static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi,
+		int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	return curseg->alloc_type;
+}
+
+static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	return curseg->next_blkoff;
+}
+
+static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+	unsigned int end_segno = SM_I(sbi)->segment_count - 1;
+	BUG_ON(segno > end_segno);
+}
+
+/*
+ * This function is used for only debugging.
+ * NOTE: In future, we have to remove this function.
+ */
+static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
+{
+	struct f2fs_sm_info *sm_info = SM_I(sbi);
+	block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg;
+	block_t start_addr = sm_info->seg0_blkaddr;
+	block_t end_addr = start_addr + total_blks - 1;
+	BUG_ON(blk_addr < start_addr);
+	BUG_ON(blk_addr > end_addr);
+}
+
+/*
+ * Summary block is always treated as invalid block
+ */
+static inline void check_block_count(struct f2fs_sb_info *sbi,
+		int segno, struct f2fs_sit_entry *raw_sit)
+{
+	struct f2fs_sm_info *sm_info = SM_I(sbi);
+	unsigned int end_segno = sm_info->segment_count - 1;
+	int valid_blocks = 0;
+	int i;
+
+	/* check segment usage */
+	BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
+
+	/* check boundary of a given segment number */
+	BUG_ON(segno > end_segno);
+
+	/* check bitmap with valid block count */
+	for (i = 0; i < sbi->blocks_per_seg; i++)
+		if (f2fs_test_bit(i, raw_sit->valid_map))
+			valid_blocks++;
+	BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
+}
+
+static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
+						unsigned int start)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start);
+	block_t blk_addr = sit_i->sit_base_addr + offset;
+
+	check_seg_range(sbi, start);
+
+	/* calculate sit block address */
+	if (f2fs_test_bit(offset, sit_i->sit_bitmap))
+		blk_addr += sit_i->sit_blocks;
+
+	return blk_addr;
+}
+
+static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi,
+						pgoff_t block_addr)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	block_addr -= sit_i->sit_base_addr;
+	if (block_addr < sit_i->sit_blocks)
+		block_addr += sit_i->sit_blocks;
+	else
+		block_addr -= sit_i->sit_blocks;
+
+	return block_addr + sit_i->sit_base_addr;
+}
+
+static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
+{
+	unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start);
+
+	if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
+		f2fs_clear_bit(block_off, sit_i->sit_bitmap);
+	else
+		f2fs_set_bit(block_off, sit_i->sit_bitmap);
+}
+
+static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec -
+						sit_i->mounted_time;
+}
+
+static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
+			unsigned int ofs_in_node, unsigned char version)
+{
+	sum->nid = cpu_to_le32(nid);
+	sum->ofs_in_node = cpu_to_le16(ofs_in_node);
+	sum->version = version;
+}
+
+static inline block_t start_sum_block(struct f2fs_sb_info *sbi)
+{
+	return __start_cp_addr(sbi) +
+		le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
+}
+
+static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
+{
+	return __start_cp_addr(sbi) +
+		le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count)
+				- (base + 1) + type;
+}

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
new file mode 100644
index 0000000..1386732
--- /dev/null
+++ b/fs/f2fs/super.c

@@ -0,0 +1,657 @@
+/*
+ * fs/f2fs/super.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/statfs.h>
+#include <linux/proc_fs.h>
+#include <linux/buffer_head.h>
+#include <linux/backing-dev.h>
+#include <linux/kthread.h>
+#include <linux/parser.h>
+#include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/random.h>
+#include <linux/exportfs.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "xattr.h"
+
+static struct kmem_cache *f2fs_inode_cachep;
+
+enum {
+	Opt_gc_background_off,
+	Opt_disable_roll_forward,
+	Opt_discard,
+	Opt_noheap,
+	Opt_nouser_xattr,
+	Opt_noacl,
+	Opt_active_logs,
+	Opt_disable_ext_identify,
+	Opt_err,
+};
+
+static match_table_t f2fs_tokens = {
+	{Opt_gc_background_off, "background_gc_off"},
+	{Opt_disable_roll_forward, "disable_roll_forward"},
+	{Opt_discard, "discard"},
+	{Opt_noheap, "no_heap"},
+	{Opt_nouser_xattr, "nouser_xattr"},
+	{Opt_noacl, "noacl"},
+	{Opt_active_logs, "active_logs=%u"},
+	{Opt_disable_ext_identify, "disable_ext_identify"},
+	{Opt_err, NULL},
+};
+
+static void init_once(void *foo)
+{
+	struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
+
+	inode_init_once(&fi->vfs_inode);
+}
+
+static struct inode *f2fs_alloc_inode(struct super_block *sb)
+{
+	struct f2fs_inode_info *fi;
+
+	fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO);
+	if (!fi)
+		return NULL;
+
+	init_once((void *) fi);
+
+	/* Initilize f2fs-specific inode info */
+	fi->vfs_inode.i_version = 1;
+	atomic_set(&fi->dirty_dents, 0);
+	fi->i_current_depth = 1;
+	fi->i_advise = 0;
+	rwlock_init(&fi->ext.ext_lock);
+
+	set_inode_flag(fi, FI_NEW_INODE);
+
+	return &fi->vfs_inode;
+}
+
+static void f2fs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
+}
+
+static void f2fs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, f2fs_i_callback);
+}
+
+static void f2fs_put_super(struct super_block *sb)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+	f2fs_destroy_stats(sbi);
+	stop_gc_thread(sbi);
+
+	write_checkpoint(sbi, false, true);
+
+	iput(sbi->node_inode);
+	iput(sbi->meta_inode);
+
+	/* destroy f2fs internal modules */
+	destroy_node_manager(sbi);
+	destroy_segment_manager(sbi);
+
+	kfree(sbi->ckpt);
+
+	sb->s_fs_info = NULL;
+	brelse(sbi->raw_super_buf);
+	kfree(sbi);
+}
+
+int f2fs_sync_fs(struct super_block *sb, int sync)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int ret = 0;
+
+	if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
+		return 0;
+
+	if (sync)
+		write_checkpoint(sbi, false, false);
+
+	return ret;
+}
+
+static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+	block_t total_count, user_block_count, start_count, ovp_count;
+
+	total_count = le64_to_cpu(sbi->raw_super->block_count);
+	user_block_count = sbi->user_block_count;
+	start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
+	ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
+	buf->f_type = F2FS_SUPER_MAGIC;
+	buf->f_bsize = sbi->blocksize;
+
+	buf->f_blocks = total_count - start_count;
+	buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
+	buf->f_bavail = user_block_count - valid_user_blocks(sbi);
+
+	buf->f_files = valid_inode_count(sbi);
+	buf->f_ffree = sbi->total_node_count - valid_node_count(sbi);
+
+	buf->f_namelen = F2FS_MAX_NAME_LEN;
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+
+	return 0;
+}
+
+static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
+
+	if (test_opt(sbi, BG_GC))
+		seq_puts(seq, ",background_gc_on");
+	else
+		seq_puts(seq, ",background_gc_off");
+	if (test_opt(sbi, DISABLE_ROLL_FORWARD))
+		seq_puts(seq, ",disable_roll_forward");
+	if (test_opt(sbi, DISCARD))
+		seq_puts(seq, ",discard");
+	if (test_opt(sbi, NOHEAP))
+		seq_puts(seq, ",no_heap_alloc");
+#ifdef CONFIG_F2FS_FS_XATTR
+	if (test_opt(sbi, XATTR_USER))
+		seq_puts(seq, ",user_xattr");
+	else
+		seq_puts(seq, ",nouser_xattr");
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+	if (test_opt(sbi, POSIX_ACL))
+		seq_puts(seq, ",acl");
+	else
+		seq_puts(seq, ",noacl");
+#endif
+	if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
+		seq_puts(seq, ",disable_ext_indentify");
+
+	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+
+	return 0;
+}
+
+static struct super_operations f2fs_sops = {
+	.alloc_inode	= f2fs_alloc_inode,
+	.destroy_inode	= f2fs_destroy_inode,
+	.write_inode	= f2fs_write_inode,
+	.show_options	= f2fs_show_options,
+	.evict_inode	= f2fs_evict_inode,
+	.put_super	= f2fs_put_super,
+	.sync_fs	= f2fs_sync_fs,
+	.statfs		= f2fs_statfs,
+};
+
+static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
+		u64 ino, u32 generation)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *inode;
+
+	if (ino < F2FS_ROOT_INO(sbi))
+		return ERR_PTR(-ESTALE);
+
+	/*
+	 * f2fs_iget isn't quite right if the inode is currently unallocated!
+	 * However f2fs_iget currently does appropriate checks to handle stale
+	 * inodes so everything is OK.
+	 */
+	inode = f2fs_iget(sb, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (generation && inode->i_generation != generation) {
+		/* we didn't find the right inode.. */
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	return inode;
+}
+
+static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+				    f2fs_nfs_get_inode);
+}
+
+static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+				    f2fs_nfs_get_inode);
+}
+
+static const struct export_operations f2fs_export_ops = {
+	.fh_to_dentry = f2fs_fh_to_dentry,
+	.fh_to_parent = f2fs_fh_to_parent,
+	.get_parent = f2fs_get_parent,
+};
+
+static int parse_options(struct f2fs_sb_info *sbi, char *options)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+	int arg = 0;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+		/*
+		 * Initialize args struct so we know whether arg was
+		 * found; some options take optional arguments.
+		 */
+		args[0].to = args[0].from = NULL;
+		token = match_token(p, f2fs_tokens, args);
+
+		switch (token) {
+		case Opt_gc_background_off:
+			clear_opt(sbi, BG_GC);
+			break;
+		case Opt_disable_roll_forward:
+			set_opt(sbi, DISABLE_ROLL_FORWARD);
+			break;
+		case Opt_discard:
+			set_opt(sbi, DISCARD);
+			break;
+		case Opt_noheap:
+			set_opt(sbi, NOHEAP);
+			break;
+#ifdef CONFIG_F2FS_FS_XATTR
+		case Opt_nouser_xattr:
+			clear_opt(sbi, XATTR_USER);
+			break;
+#else
+		case Opt_nouser_xattr:
+			pr_info("nouser_xattr options not supported\n");
+			break;
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+		case Opt_noacl:
+			clear_opt(sbi, POSIX_ACL);
+			break;
+#else
+		case Opt_noacl:
+			pr_info("noacl options not supported\n");
+			break;
+#endif
+		case Opt_active_logs:
+			if (args->from && match_int(args, &arg))
+				return -EINVAL;
+			if (arg != 2 && arg != 4 && arg != 6)
+				return -EINVAL;
+			sbi->active_logs = arg;
+			break;
+		case Opt_disable_ext_identify:
+			set_opt(sbi, DISABLE_EXT_IDENTIFY);
+			break;
+		default:
+			pr_err("Unrecognized mount option \"%s\" or missing value\n",
+					p);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static loff_t max_file_size(unsigned bits)
+{
+	loff_t result = ADDRS_PER_INODE;
+	loff_t leaf_count = ADDRS_PER_BLOCK;
+
+	/* two direct node blocks */
+	result += (leaf_count * 2);
+
+	/* two indirect node blocks */
+	leaf_count *= NIDS_PER_BLOCK;
+	result += (leaf_count * 2);
+
+	/* one double indirect node block */
+	leaf_count *= NIDS_PER_BLOCK;
+	result += leaf_count;
+
+	result <<= bits;
+	return result;
+}
+
+static int sanity_check_raw_super(struct f2fs_super_block *raw_super)
+{
+	unsigned int blocksize;
+
+	if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic))
+		return 1;
+
+	/* Currently, support only 4KB block size */
+	blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
+	if (blocksize != PAGE_CACHE_SIZE)
+		return 1;
+	if (le32_to_cpu(raw_super->log_sectorsize) !=
+					F2FS_LOG_SECTOR_SIZE)
+		return 1;
+	if (le32_to_cpu(raw_super->log_sectors_per_block) !=
+					F2FS_LOG_SECTORS_PER_BLOCK)
+		return 1;
+	return 0;
+}
+
+static int sanity_check_ckpt(struct f2fs_super_block *raw_super,
+				struct f2fs_checkpoint *ckpt)
+{
+	unsigned int total, fsmeta;
+
+	total = le32_to_cpu(raw_super->segment_count);
+	fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
+	fsmeta += le32_to_cpu(raw_super->segment_count_sit);
+	fsmeta += le32_to_cpu(raw_super->segment_count_nat);
+	fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
+	fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
+
+	if (fsmeta >= total)
+		return 1;
+	return 0;
+}
+
+static void init_sb_info(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_super_block *raw_super = sbi->raw_super;
+	int i;
+
+	sbi->log_sectors_per_block =
+		le32_to_cpu(raw_super->log_sectors_per_block);
+	sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
+	sbi->blocksize = 1 << sbi->log_blocksize;
+	sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+	sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
+	sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
+	sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
+	sbi->total_sections = le32_to_cpu(raw_super->section_count);
+	sbi->total_node_count =
+		(le32_to_cpu(raw_super->segment_count_nat) / 2)
+			* sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
+	sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
+	sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
+	sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
+
+	for (i = 0; i < NR_COUNT_TYPE; i++)
+		atomic_set(&sbi->nr_pages[i], 0);
+}
+
+static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct f2fs_sb_info *sbi;
+	struct f2fs_super_block *raw_super;
+	struct buffer_head *raw_super_buf;
+	struct inode *root;
+	long err = -EINVAL;
+	int i;
+
+	/* allocate memory for f2fs-specific super block info */
+	sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+
+	/* set a temporary block size */
+	if (!sb_set_blocksize(sb, F2FS_BLKSIZE))
+		goto free_sbi;
+
+	/* read f2fs raw super block */
+	raw_super_buf = sb_bread(sb, 0);
+	if (!raw_super_buf) {
+		err = -EIO;
+		goto free_sbi;
+	}
+	raw_super = (struct f2fs_super_block *)
+			((char *)raw_super_buf->b_data + F2FS_SUPER_OFFSET);
+
+	/* init some FS parameters */
+	sbi->active_logs = NR_CURSEG_TYPE;
+
+	set_opt(sbi, BG_GC);
+
+#ifdef CONFIG_F2FS_FS_XATTR
+	set_opt(sbi, XATTR_USER);
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+	set_opt(sbi, POSIX_ACL);
+#endif
+	/* parse mount options */
+	if (parse_options(sbi, (char *)data))
+		goto free_sb_buf;
+
+	/* sanity checking of raw super */
+	if (sanity_check_raw_super(raw_super))
+		goto free_sb_buf;
+
+	sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
+	sb->s_max_links = F2FS_LINK_MAX;
+	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+
+	sb->s_op = &f2fs_sops;
+	sb->s_xattr = f2fs_xattr_handlers;
+	sb->s_export_op = &f2fs_export_ops;
+	sb->s_magic = F2FS_SUPER_MAGIC;
+	sb->s_fs_info = sbi;
+	sb->s_time_gran = 1;
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
+	memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+
+	/* init f2fs-specific super block info */
+	sbi->sb = sb;
+	sbi->raw_super = raw_super;
+	sbi->raw_super_buf = raw_super_buf;
+	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->write_inode);
+	mutex_init(&sbi->writepages);
+	mutex_init(&sbi->cp_mutex);
+	for (i = 0; i < NR_LOCK_TYPE; i++)
+		mutex_init(&sbi->fs_lock[i]);
+	sbi->por_doing = 0;
+	spin_lock_init(&sbi->stat_lock);
+	init_rwsem(&sbi->bio_sem);
+	init_sb_info(sbi);
+
+	/* get an inode for meta space */
+	sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
+	if (IS_ERR(sbi->meta_inode)) {
+		err = PTR_ERR(sbi->meta_inode);
+		goto free_sb_buf;
+	}
+
+	err = get_valid_checkpoint(sbi);
+	if (err)
+		goto free_meta_inode;
+
+	/* sanity checking of checkpoint */
+	err = -EINVAL;
+	if (sanity_check_ckpt(raw_super, sbi->ckpt))
+		goto free_cp;
+
+	sbi->total_valid_node_count =
+				le32_to_cpu(sbi->ckpt->valid_node_count);
+	sbi->total_valid_inode_count =
+				le32_to_cpu(sbi->ckpt->valid_inode_count);
+	sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
+	sbi->total_valid_block_count =
+				le64_to_cpu(sbi->ckpt->valid_block_count);
+	sbi->last_valid_block_count = sbi->total_valid_block_count;
+	sbi->alloc_valid_block_count = 0;
+	INIT_LIST_HEAD(&sbi->dir_inode_list);
+	spin_lock_init(&sbi->dir_inode_lock);
+
+	/* init super block */
+	if (!sb_set_blocksize(sb, sbi->blocksize))
+		goto free_cp;
+
+	init_orphan_info(sbi);
+
+	/* setup f2fs internal modules */
+	err = build_segment_manager(sbi);
+	if (err)
+		goto free_sm;
+	err = build_node_manager(sbi);
+	if (err)
+		goto free_nm;
+
+	build_gc_manager(sbi);
+
+	/* get an inode for node space */
+	sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
+	if (IS_ERR(sbi->node_inode)) {
+		err = PTR_ERR(sbi->node_inode);
+		goto free_nm;
+	}
+
+	/* if there are nt orphan nodes free them */
+	err = -EINVAL;
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
+				recover_orphan_inodes(sbi))
+		goto free_node_inode;
+
+	/* read root inode and dentry */
+	root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
+	if (IS_ERR(root)) {
+		err = PTR_ERR(root);
+		goto free_node_inode;
+	}
+	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size)
+		goto free_root_inode;
+
+	sb->s_root = d_make_root(root); /* allocate root dentry */
+	if (!sb->s_root) {
+		err = -ENOMEM;
+		goto free_root_inode;
+	}
+
+	/* recover fsynced data */
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
+				!test_opt(sbi, DISABLE_ROLL_FORWARD))
+		recover_fsync_data(sbi);
+
+	/* After POR, we can run background GC thread */
+	err = start_gc_thread(sbi);
+	if (err)
+		goto fail;
+
+	err = f2fs_build_stats(sbi);
+	if (err)
+		goto fail;
+
+	return 0;
+fail:
+	stop_gc_thread(sbi);
+free_root_inode:
+	dput(sb->s_root);
+	sb->s_root = NULL;
+free_node_inode:
+	iput(sbi->node_inode);
+free_nm:
+	destroy_node_manager(sbi);
+free_sm:
+	destroy_segment_manager(sbi);
+free_cp:
+	kfree(sbi->ckpt);
+free_meta_inode:
+	make_bad_inode(sbi->meta_inode);
+	iput(sbi->meta_inode);
+free_sb_buf:
+	brelse(raw_super_buf);
+free_sbi:
+	kfree(sbi);
+	return err;
+}
+
+static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
+			const char *dev_name, void *data)
+{
+	return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
+}
+
+static struct file_system_type f2fs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "f2fs",
+	.mount		= f2fs_mount,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
+static int init_inodecache(void)
+{
+	f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
+			sizeof(struct f2fs_inode_info), NULL);
+	if (f2fs_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+	kmem_cache_destroy(f2fs_inode_cachep);
+}
+
+static int __init init_f2fs_fs(void)
+{
+	int err;
+
+	err = init_inodecache();
+	if (err)
+		goto fail;
+	err = create_node_manager_caches();
+	if (err)
+		goto fail;
+	err = create_gc_caches();
+	if (err)
+		goto fail;
+	err = create_checkpoint_caches();
+	if (err)
+		goto fail;
+	return register_filesystem(&f2fs_fs_type);
+fail:
+	return err;
+}
+
+static void __exit exit_f2fs_fs(void)
+{
+	destroy_root_stats();
+	unregister_filesystem(&f2fs_fs_type);
+	destroy_checkpoint_caches();
+	destroy_gc_caches();
+	destroy_node_manager_caches();
+	destroy_inodecache();
+}
+
+module_init(init_f2fs_fs)
+module_exit(exit_f2fs_fs)
+
+MODULE_AUTHOR("Samsung Electronics's Praesto Team");
+MODULE_DESCRIPTION("Flash Friendly File System");
+MODULE_LICENSE("GPL");

diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
new file mode 100644
index 0000000..7d52e8d
--- /dev/null
+++ b/fs/f2fs/xattr.c

@@ -0,0 +1,440 @@
+/*
+ * fs/f2fs/xattr.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/xattr.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
+ * Extended attributes for symlinks and special files added per
+ *  suggestion of Luka Renko <luka.renko@hermes.si>.
+ * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
+ *  Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/rwsem.h>
+#include <linux/f2fs_fs.h>
+#include "f2fs.h"
+#include "xattr.h"
+
+static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+	int total_len, prefix_len = 0;
+	const char *prefix = NULL;
+
+	switch (type) {
+	case F2FS_XATTR_INDEX_USER:
+		if (!test_opt(sbi, XATTR_USER))
+			return -EOPNOTSUPP;
+		prefix = XATTR_USER_PREFIX;
+		prefix_len = XATTR_USER_PREFIX_LEN;
+		break;
+	case F2FS_XATTR_INDEX_TRUSTED:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		prefix = XATTR_TRUSTED_PREFIX;
+		prefix_len = XATTR_TRUSTED_PREFIX_LEN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	total_len = prefix_len + name_len + 1;
+	if (list && total_len <= list_size) {
+		memcpy(list, prefix, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+
+	switch (type) {
+	case F2FS_XATTR_INDEX_USER:
+		if (!test_opt(sbi, XATTR_USER))
+			return -EOPNOTSUPP;
+		break;
+	case F2FS_XATTR_INDEX_TRUSTED:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return f2fs_getxattr(dentry->d_inode, type, name,
+			buffer, size);
+}
+
+static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+
+	switch (type) {
+	case F2FS_XATTR_INDEX_USER:
+		if (!test_opt(sbi, XATTR_USER))
+			return -EOPNOTSUPP;
+		break;
+	case F2FS_XATTR_INDEX_TRUSTED:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+
+	return f2fs_setxattr(dentry->d_inode, type, name, value, size);
+}
+
+static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
+{
+	const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
+	size_t size;
+
+	if (type != F2FS_XATTR_INDEX_ADVISE)
+		return 0;
+
+	size = strlen(xname) + 1;
+	if (list && size <= list_size)
+		memcpy(list, xname, size);
+	return size;
+}
+
+static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+
+	*((char *)buffer) = F2FS_I(inode)->i_advise;
+	return sizeof(char);
+}
+
+static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!inode_owner_or_capable(inode))
+		return -EPERM;
+	if (value == NULL)
+		return -EINVAL;
+
+	F2FS_I(inode)->i_advise |= *(char *)value;
+	return 0;
+}
+
+const struct xattr_handler f2fs_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.flags	= F2FS_XATTR_INDEX_USER,
+	.list	= f2fs_xattr_generic_list,
+	.get	= f2fs_xattr_generic_get,
+	.set	= f2fs_xattr_generic_set,
+};
+
+const struct xattr_handler f2fs_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.flags	= F2FS_XATTR_INDEX_TRUSTED,
+	.list	= f2fs_xattr_generic_list,
+	.get	= f2fs_xattr_generic_get,
+	.set	= f2fs_xattr_generic_set,
+};
+
+const struct xattr_handler f2fs_xattr_advise_handler = {
+	.prefix = F2FS_SYSTEM_ADVISE_PREFIX,
+	.flags	= F2FS_XATTR_INDEX_ADVISE,
+	.list   = f2fs_xattr_advise_list,
+	.get    = f2fs_xattr_advise_get,
+	.set    = f2fs_xattr_advise_set,
+};
+
+static const struct xattr_handler *f2fs_xattr_handler_map[] = {
+	[F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+	[F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler,
+	[F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler,
+#endif
+	[F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
+	[F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
+};
+
+const struct xattr_handler *f2fs_xattr_handlers[] = {
+	&f2fs_xattr_user_handler,
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+	&f2fs_xattr_acl_access_handler,
+	&f2fs_xattr_acl_default_handler,
+#endif
+	&f2fs_xattr_trusted_handler,
+	&f2fs_xattr_advise_handler,
+	NULL,
+};
+
+static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
+{
+	const struct xattr_handler *handler = NULL;
+
+	if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
+		handler = f2fs_xattr_handler_map[name_index];
+	return handler;
+}
+
+int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
+		void *buffer, size_t buffer_size)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	struct f2fs_xattr_entry *entry;
+	struct page *page;
+	void *base_addr;
+	int error = 0, found = 0;
+	int value_len, name_len;
+
+	if (name == NULL)
+		return -EINVAL;
+	name_len = strlen(name);
+
+	if (!fi->i_xattr_nid)
+		return -ENODATA;
+
+	page = get_node_page(sbi, fi->i_xattr_nid);
+	base_addr = page_address(page);
+
+	list_for_each_xattr(entry, base_addr) {
+		if (entry->e_name_index != name_index)
+			continue;
+		if (entry->e_name_len != name_len)
+			continue;
+		if (!memcmp(entry->e_name, name, name_len)) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found) {
+		error = -ENODATA;
+		goto cleanup;
+	}
+
+	value_len = le16_to_cpu(entry->e_value_size);
+
+	if (buffer && value_len > buffer_size) {
+		error = -ERANGE;
+		goto cleanup;
+	}
+
+	if (buffer) {
+		char *pval = entry->e_name + entry->e_name_len;
+		memcpy(buffer, pval, value_len);
+	}
+	error = value_len;
+
+cleanup:
+	f2fs_put_page(page, 1);
+	return error;
+}
+
+ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	struct f2fs_xattr_entry *entry;
+	struct page *page;
+	void *base_addr;
+	int error = 0;
+	size_t rest = buffer_size;
+
+	if (!fi->i_xattr_nid)
+		return 0;
+
+	page = get_node_page(sbi, fi->i_xattr_nid);
+	base_addr = page_address(page);
+
+	list_for_each_xattr(entry, base_addr) {
+		const struct xattr_handler *handler =
+			f2fs_xattr_handler(entry->e_name_index);
+		size_t size;
+
+		if (!handler)
+			continue;
+
+		size = handler->list(dentry, buffer, rest, entry->e_name,
+				entry->e_name_len, handler->flags);
+		if (buffer && size > rest) {
+			error = -ERANGE;
+			goto cleanup;
+		}
+
+		if (buffer)
+			buffer += size;
+		rest -= size;
+	}
+	error = buffer_size - rest;
+cleanup:
+	f2fs_put_page(page, 1);
+	return error;
+}
+
+int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
+					const void *value, size_t value_len)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	struct f2fs_xattr_header *header = NULL;
+	struct f2fs_xattr_entry *here, *last;
+	struct page *page;
+	void *base_addr;
+	int error, found, free, name_len, newsize;
+	char *pval;
+
+	if (name == NULL)
+		return -EINVAL;
+	name_len = strlen(name);
+
+	if (value == NULL)
+		value_len = 0;
+
+	if (name_len > 255 || value_len > MAX_VALUE_LEN)
+		return -ERANGE;
+
+	mutex_lock_op(sbi, NODE_NEW);
+	if (!fi->i_xattr_nid) {
+		/* Allocate new attribute block */
+		struct dnode_of_data dn;
+
+		if (!alloc_nid(sbi, &fi->i_xattr_nid)) {
+			mutex_unlock_op(sbi, NODE_NEW);
+			return -ENOSPC;
+		}
+		set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid);
+		mark_inode_dirty(inode);
+
+		page = new_node_page(&dn, XATTR_NODE_OFFSET);
+		if (IS_ERR(page)) {
+			alloc_nid_failed(sbi, fi->i_xattr_nid);
+			fi->i_xattr_nid = 0;
+			mutex_unlock_op(sbi, NODE_NEW);
+			return PTR_ERR(page);
+		}
+
+		alloc_nid_done(sbi, fi->i_xattr_nid);
+		base_addr = page_address(page);
+		header = XATTR_HDR(base_addr);
+		header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
+		header->h_refcount = cpu_to_le32(1);
+	} else {
+		/* The inode already has an extended attribute block. */
+		page = get_node_page(sbi, fi->i_xattr_nid);
+		if (IS_ERR(page)) {
+			mutex_unlock_op(sbi, NODE_NEW);
+			return PTR_ERR(page);
+		}
+
+		base_addr = page_address(page);
+		header = XATTR_HDR(base_addr);
+	}
+
+	if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
+		error = -EIO;
+		goto cleanup;
+	}
+
+	/* find entry with wanted name. */
+	found = 0;
+	list_for_each_xattr(here, base_addr) {
+		if (here->e_name_index != name_index)
+			continue;
+		if (here->e_name_len != name_len)
+			continue;
+		if (!memcmp(here->e_name, name, name_len)) {
+			found = 1;
+			break;
+		}
+	}
+
+	last = here;
+
+	while (!IS_XATTR_LAST_ENTRY(last))
+		last = XATTR_NEXT_ENTRY(last);
+
+	newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
+			name_len + value_len);
+
+	/* 1. Check space */
+	if (value) {
+		/* If value is NULL, it is remove operation.
+		 * In case of update operation, we caculate free.
+		 */
+		free = MIN_OFFSET - ((char *)last - (char *)header);
+		if (found)
+			free = free - ENTRY_SIZE(here);
+
+		if (free < newsize) {
+			error = -ENOSPC;
+			goto cleanup;
+		}
+	}
+
+	/* 2. Remove old entry */
+	if (found) {
+		/* If entry is found, remove old entry.
+		 * If not found, remove operation is not needed.
+		 */
+		struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
+		int oldsize = ENTRY_SIZE(here);
+
+		memmove(here, next, (char *)last - (char *)next);
+		last = (struct f2fs_xattr_entry *)((char *)last - oldsize);
+		memset(last, 0, oldsize);
+	}
+
+	/* 3. Write new entry */
+	if (value) {
+		/* Before we come here, old entry is removed.
+		 * We just write new entry. */
+		memset(last, 0, newsize);
+		last->e_name_index = name_index;
+		last->e_name_len = name_len;
+		memcpy(last->e_name, name, name_len);
+		pval = last->e_name + name_len;
+		memcpy(pval, value, value_len);
+		last->e_value_size = cpu_to_le16(value_len);
+	}
+
+	set_page_dirty(page);
+	f2fs_put_page(page, 1);
+
+	if (is_inode_flag_set(fi, FI_ACL_MODE)) {
+		inode->i_mode = fi->i_acl_mode;
+		inode->i_ctime = CURRENT_TIME;
+		clear_inode_flag(fi, FI_ACL_MODE);
+	}
+	f2fs_write_inode(inode, NULL);
+	mutex_unlock_op(sbi, NODE_NEW);
+
+	return 0;
+cleanup:
+	f2fs_put_page(page, 1);
+	mutex_unlock_op(sbi, NODE_NEW);
+	return error;
+}

diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
new file mode 100644
index 0000000..49c9558
--- /dev/null
+++ b/fs/f2fs/xattr.h

@@ -0,0 +1,145 @@
+/*
+ * fs/f2fs/xattr.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/xattr.h
+ *
+ * On-disk format of extended attributes for the ext2 filesystem.
+ *
+ * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __F2FS_XATTR_H__
+#define __F2FS_XATTR_H__
+
+#include <linux/init.h>
+#include <linux/xattr.h>
+
+/* Magic value in attribute blocks */
+#define F2FS_XATTR_MAGIC                0xF2F52011
+
+/* Maximum number of references to one attribute block */
+#define F2FS_XATTR_REFCOUNT_MAX         1024
+
+/* Name indexes */
+#define F2FS_SYSTEM_ADVISE_PREFIX		"system.advise"
+#define F2FS_XATTR_INDEX_USER			1
+#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS	2
+#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT	3
+#define F2FS_XATTR_INDEX_TRUSTED		4
+#define F2FS_XATTR_INDEX_LUSTRE			5
+#define F2FS_XATTR_INDEX_SECURITY		6
+#define F2FS_XATTR_INDEX_ADVISE			7
+
+struct f2fs_xattr_header {
+	__le32  h_magic;        /* magic number for identification */
+	__le32  h_refcount;     /* reference count */
+	__u32   h_reserved[4];  /* zero right now */
+};
+
+struct f2fs_xattr_entry {
+	__u8    e_name_index;
+	__u8    e_name_len;
+	__le16  e_value_size;   /* size of attribute value */
+	char    e_name[0];      /* attribute name */
+};
+
+#define XATTR_HDR(ptr)		((struct f2fs_xattr_header *)(ptr))
+#define XATTR_ENTRY(ptr)	((struct f2fs_xattr_entry *)(ptr))
+#define XATTR_FIRST_ENTRY(ptr)	(XATTR_ENTRY(XATTR_HDR(ptr)+1))
+#define XATTR_ROUND		(3)
+
+#define XATTR_ALIGN(size)	((size + XATTR_ROUND) & ~XATTR_ROUND)
+
+#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \
+			entry->e_name_len + le16_to_cpu(entry->e_value_size)))
+
+#define XATTR_NEXT_ENTRY(entry)	((struct f2fs_xattr_entry *)((char *)(entry) +\
+			ENTRY_SIZE(entry)))
+
+#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#define list_for_each_xattr(entry, addr) \
+		for (entry = XATTR_FIRST_ENTRY(addr);\
+				!IS_XATTR_LAST_ENTRY(entry);\
+				entry = XATTR_NEXT_ENTRY(entry))
+
+
+#define MIN_OFFSET	XATTR_ALIGN(PAGE_SIZE - \
+			sizeof(struct node_footer) - \
+			sizeof(__u32))
+
+#define MAX_VALUE_LEN	(MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \
+			sizeof(struct f2fs_xattr_entry))
+
+/*
+ * On-disk structure of f2fs_xattr
+ * We use only 1 block for xattr.
+ *
+ * +--------------------+
+ * | f2fs_xattr_header  |
+ * |                    |
+ * +--------------------+
+ * | f2fs_xattr_entry   |
+ * | .e_name_index = 1  |
+ * | .e_name_len = 3    |
+ * | .e_value_size = 14 |
+ * | .e_name = "foo"    |
+ * | "value_of_xattr"   |<- value_offs = e_name + e_name_len
+ * +--------------------+
+ * | f2fs_xattr_entry   |
+ * | .e_name_index = 4  |
+ * | .e_name = "bar"    |
+ * +--------------------+
+ * |                    |
+ * |        Free        |
+ * |                    |
+ * +--------------------+<- MIN_OFFSET
+ * |   node_footer      |
+ * | (nid, ino, offset) |
+ * +--------------------+
+ *
+ **/
+
+#ifdef CONFIG_F2FS_FS_XATTR
+extern const struct xattr_handler f2fs_xattr_user_handler;
+extern const struct xattr_handler f2fs_xattr_trusted_handler;
+extern const struct xattr_handler f2fs_xattr_acl_access_handler;
+extern const struct xattr_handler f2fs_xattr_acl_default_handler;
+extern const struct xattr_handler f2fs_xattr_advise_handler;
+
+extern const struct xattr_handler *f2fs_xattr_handlers[];
+
+extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
+		const void *value, size_t value_len);
+extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
+		void *buffer, size_t buffer_size);
+extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
+		size_t buffer_size);
+
+#else
+
+#define f2fs_xattr_handlers	NULL
+static inline int f2fs_setxattr(struct inode *inode, int name_index,
+	const char *name, const void *value, size_t value_len)
+{
+	return -EOPNOTSUPP;
+}
+static inline int f2fs_getxattr(struct inode *inode, int name_index,
+		const char *name, void *buffer, size_t buffer_size)
+{
+	return -EOPNOTSUPP;
+}
+static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
+		size_t buffer_size)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#endif /* __F2FS_XATTR_H__ */

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 2a18234..58bf744 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c

@@ -461,8 +461,7 @@
 }
 
 /*
- * Return values: negative -> error, 0 -> not found, positive -> found,
- * value is the total amount of slots, including the shortname entry.
+ * Return values: negative -> error/not found, 0 -> found.
  */
 int fat_search_long(struct inode *inode, const unsigned char *name,
 		    int name_len, struct fat_slot_info *sinfo)
@@ -1255,7 +1254,7 @@
 
 	sinfo->nr_slots = nr_slots;
 
-	/* First stage: search free direcotry entries */
+	/* First stage: search free directory entries */
 	free_slots = nr_bhs = 0;
 	bh = prev = NULL;
 	pos = 0;

diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 623f36f..12701a5 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h

@@ -29,6 +29,7 @@
 	unsigned short fs_fmask;
 	unsigned short fs_dmask;
 	unsigned short codepage;   /* Codepage for shortname conversions */
+	int time_offset;	   /* Offset of timestamps from UTC (in minutes) */
 	char *iocharset;           /* Charset used for filename input/display */
 	unsigned short shortname;  /* flags for shortname display/create rule */
 	unsigned char name_check;  /* r = relaxed, n = normal, s = strict */
@@ -45,7 +46,7 @@
 		 flush:1,	   /* write things quickly */
 		 nocase:1,	   /* Does this need case conversion? 0=need case conversion*/
 		 usefree:1,	   /* Use free_clusters for FAT32 */
-		 tz_utc:1,	   /* Filesystem timestamps are in UTC */
+		 tz_set:1,	   /* Filesystem timestamps' offset set */
 		 rodir:1,	   /* allow ATTR_RO for directory */
 		 discard:1,	   /* Issue discard requests on deletions */
 		 nfs:1;		   /* Do extra work needed for NFS export */

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5bafaad..f8f4916 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c

@@ -26,6 +26,7 @@
 #include <linux/writeback.h>
 #include <linux/log2.h>
 #include <linux/hash.h>
+#include <linux/blkdev.h>
 #include <asm/unaligned.h>
 #include "fat.h"
 
@@ -725,7 +726,8 @@
 	if (opts->allow_utime)
 		seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
 	if (sbi->nls_disk)
-		seq_printf(m, ",codepage=%s", sbi->nls_disk->charset);
+		/* strip "cp" prefix from displayed option */
+		seq_printf(m, ",codepage=%s", &sbi->nls_disk->charset[2]);
 	if (isvfat) {
 		if (sbi->nls_io)
 			seq_printf(m, ",iocharset=%s", sbi->nls_io->charset);
@@ -777,8 +779,12 @@
 	}
 	if (opts->flush)
 		seq_puts(m, ",flush");
-	if (opts->tz_utc)
-		seq_puts(m, ",tz=UTC");
+	if (opts->tz_set) {
+		if (opts->time_offset)
+			seq_printf(m, ",time_offset=%d", opts->time_offset);
+		else
+			seq_puts(m, ",tz=UTC");
+	}
 	if (opts->errors == FAT_ERRORS_CONT)
 		seq_puts(m, ",errors=continue");
 	else if (opts->errors == FAT_ERRORS_PANIC)
@@ -800,7 +806,8 @@
 	Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
 	Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
 	Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
-	Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_err,
+	Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
+	Opt_err,
 };
 
 static const match_table_t fat_tokens = {
@@ -825,6 +832,7 @@
 	{Opt_immutable, "sys_immutable"},
 	{Opt_flush, "flush"},
 	{Opt_tz_utc, "tz=UTC"},
+	{Opt_time_offset, "time_offset=%d"},
 	{Opt_err_cont, "errors=continue"},
 	{Opt_err_panic, "errors=panic"},
 	{Opt_err_ro, "errors=remount-ro"},
@@ -909,7 +917,7 @@
 	opts->utf8 = opts->unicode_xlate = 0;
 	opts->numtail = 1;
 	opts->usefree = opts->nocase = 0;
-	opts->tz_utc = 0;
+	opts->tz_set = 0;
 	opts->nfs = 0;
 	opts->errors = FAT_ERRORS_RO;
 	*debug = 0;
@@ -965,48 +973,57 @@
 			break;
 		case Opt_uid:
 			if (match_int(&args[0], &option))
-				return 0;
+				return -EINVAL;
 			opts->fs_uid = make_kuid(current_user_ns(), option);
 			if (!uid_valid(opts->fs_uid))
-				return 0;
+				return -EINVAL;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
-				return 0;
+				return -EINVAL;
 			opts->fs_gid = make_kgid(current_user_ns(), option);
 			if (!gid_valid(opts->fs_gid))
-				return 0;
+				return -EINVAL;
 			break;
 		case Opt_umask:
 			if (match_octal(&args[0], &option))
-				return 0;
+				return -EINVAL;
 			opts->fs_fmask = opts->fs_dmask = option;
 			break;
 		case Opt_dmask:
 			if (match_octal(&args[0], &option))
-				return 0;
+				return -EINVAL;
 			opts->fs_dmask = option;
 			break;
 		case Opt_fmask:
 			if (match_octal(&args[0], &option))
-				return 0;
+				return -EINVAL;
 			opts->fs_fmask = option;
 			break;
 		case Opt_allow_utime:
 			if (match_octal(&args[0], &option))
-				return 0;
+				return -EINVAL;
 			opts->allow_utime = option & (S_IWGRP | S_IWOTH);
 			break;
 		case Opt_codepage:
 			if (match_int(&args[0], &option))
-				return 0;
+				return -EINVAL;
 			opts->codepage = option;
 			break;
 		case Opt_flush:
 			opts->flush = 1;
 			break;
+		case Opt_time_offset:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			if (option < -12 * 60 || option > 12 * 60)
+				return -EINVAL;
+			opts->tz_set = 1;
+			opts->time_offset = option;
+			break;
 		case Opt_tz_utc:
-			opts->tz_utc = 1;
+			opts->tz_set = 1;
+			opts->time_offset = 0;
 			break;
 		case Opt_err_cont:
 			opts->errors = FAT_ERRORS_CONT;
@@ -1327,7 +1344,7 @@
 	sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
 	if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
 		if (!silent)
-			fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
+			fat_msg(sb, KERN_ERR, "bogus directory-entries per block"
 			       " (%u)", sbi->dir_entries);
 		brelse(bh);
 		goto out_invalid;
@@ -1431,6 +1448,14 @@
 		goto out_fail;
 	}
 
+	if (sbi->options.discard) {
+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
+		if (!blk_queue_discard(q))
+			fat_msg(sb, KERN_WARNING,
+					"mounting with \"discard\" option, but "
+					"the device does not support discard");
+	}
+
 	return 0;
 
 out_invalid:

diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 6d93360..359d307 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c

@@ -135,6 +135,10 @@
 		}
 		if (ret < 0)
 			return ret;
+		/*
+		 * FIXME:Although we can add this cache, fat_cache_add() is
+		 * assuming to be called after linear search with fat_cache_id.
+		 */
 //		fat_cache_add(inode, new_fclus, new_dclus);
 	} else {
 		MSDOS_I(inode)->i_start = new_dclus;
@@ -212,8 +216,10 @@
 		   + days_in_year[month] + day
 		   + DAYS_DELTA) * SECS_PER_DAY;
 
-	if (!sbi->options.tz_utc)
+	if (!sbi->options.tz_set)
 		second += sys_tz.tz_minuteswest * SECS_PER_MIN;
+	else
+		second -= sbi->options.time_offset * SECS_PER_MIN;
 
 	if (time_cs) {
 		ts->tv_sec = second + (time_cs / 100);
@@ -229,8 +235,9 @@
 		       __le16 *time, __le16 *date, u8 *time_cs)
 {
 	struct tm tm;
-	time_to_tm(ts->tv_sec, sbi->options.tz_utc ? 0 :
-		   -sys_tz.tz_minuteswest * 60, &tm);
+	time_to_tm(ts->tv_sec,
+		   (sbi->options.tz_set ? sbi->options.time_offset :
+		   -sys_tz.tz_minuteswest) * SECS_PER_MIN, &tm);
 
 	/*  FAT can only support year between 1980 to 2107 */
 	if (tm.tm_year < 1980 - 1900) {

diff --git a/fs/fhandle.c b/fs/fhandle.c
index cccdc87..999ff5c 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c

@@ -52,7 +52,7 @@
 	handle_bytes = handle_dwords * sizeof(u32);
 	handle->handle_bytes = handle_bytes;
 	if ((handle->handle_bytes > f_handle.handle_bytes) ||
-	    (retval == 255) || (retval == -ENOSPC)) {
+	    (retval == FILEID_INVALID) || (retval == -ENOSPC)) {
 		/* As per old exportfs_encode_fh documentation
 		 * we could return ENOSPC to indicate overflow
 		 * But file system returned 255 always. So handle

diff --git a/fs/file_table.c b/fs/file_table.c
index a72bf9d..de9e965 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c

@@ -458,8 +458,8 @@
 		spin_unlock(&f->f_lock);
 		if (file_check_writeable(f) != 0)
 			continue;
+		__mnt_drop_write(f->f_path.mnt);
 		file_release_write(f);
-		mnt_drop_write_file(f);
 	} while_file_list_for_each_entry;
 	lg_global_unlock(&files_lglock);
 }

diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index 6a3c48a..b52aed1 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c

@@ -314,10 +314,10 @@
  */
 void fscache_io_error(struct fscache_cache *cache)
 {
-	set_bit(FSCACHE_IOERROR, &cache->flags);
-
-	printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n",
-	       cache->ops->name);
+	if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags))
+		printk(KERN_ERR "FS-Cache:"
+		       " Cache '%s' stopped due to I/O error\n",
+		       cache->ops->name);
 }
 EXPORT_SYMBOL(fscache_io_error);
 

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 9905350..8dcb114 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c

@@ -370,6 +370,66 @@
 }
 
 /*
+ * Invalidate an object.  Callable with spinlocks held.
+ */
+void __fscache_invalidate(struct fscache_cookie *cookie)
+{
+	struct fscache_object *object;
+
+	_enter("{%s}", cookie->def->name);
+
+	fscache_stat(&fscache_n_invalidates);
+
+	/* Only permit invalidation of data files.  Invalidating an index will
+	 * require the caller to release all its attachments to the tree rooted
+	 * there, and if it's doing that, it may as well just retire the
+	 * cookie.
+	 */
+	ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
+
+	/* We will be updating the cookie too. */
+	BUG_ON(!cookie->def->get_aux);
+
+	/* If there's an object, we tell the object state machine to handle the
+	 * invalidation on our behalf, otherwise there's nothing to do.
+	 */
+	if (!hlist_empty(&cookie->backing_objects)) {
+		spin_lock(&cookie->lock);
+
+		if (!hlist_empty(&cookie->backing_objects) &&
+		    !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING,
+				      &cookie->flags)) {
+			object = hlist_entry(cookie->backing_objects.first,
+					     struct fscache_object,
+					     cookie_link);
+			if (object->state < FSCACHE_OBJECT_DYING)
+				fscache_raise_event(
+					object, FSCACHE_OBJECT_EV_INVALIDATE);
+		}
+
+		spin_unlock(&cookie->lock);
+	}
+
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_invalidate);
+
+/*
+ * Wait for object invalidation to complete.
+ */
+void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
+{
+	_enter("%p", cookie);
+
+	wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
+		    fscache_wait_bit_interruptible,
+		    TASK_UNINTERRUPTIBLE);
+
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_wait_on_invalidate);
+
+/*
  * update the index entries backing a cookie
  */
 void __fscache_update_cookie(struct fscache_cookie *cookie)
@@ -442,16 +502,34 @@
 
 	event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE;
 
+try_again:
 	spin_lock(&cookie->lock);
 
 	/* break links with all the active objects */
 	while (!hlist_empty(&cookie->backing_objects)) {
+		int n_reads;
 		object = hlist_entry(cookie->backing_objects.first,
 				     struct fscache_object,
 				     cookie_link);
 
 		_debug("RELEASE OBJ%x", object->debug_id);
 
+		set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags);
+		n_reads = atomic_read(&object->n_reads);
+		if (n_reads) {
+			int n_ops = object->n_ops;
+			int n_in_progress = object->n_in_progress;
+			spin_unlock(&cookie->lock);
+			printk(KERN_ERR "FS-Cache:"
+			       " Cookie '%s' still has %d outstanding reads (%d,%d)\n",
+			       cookie->def->name,
+			       n_reads, n_ops, n_in_progress);
+			wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS,
+				    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			printk("Wait finished\n");
+			goto try_again;
+		}
+
 		/* detach each cache object from the object cookie */
 		spin_lock(&object->lock);
 		hlist_del_init(&object->cookie_link);

diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index f6aad48..ee38fef 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h

@@ -121,12 +121,19 @@
 				       struct fscache_operation *);
 extern int fscache_submit_op(struct fscache_object *,
 			     struct fscache_operation *);
-extern int fscache_cancel_op(struct fscache_operation *);
+extern int fscache_cancel_op(struct fscache_operation *,
+			     void (*)(struct fscache_operation *));
+extern void fscache_cancel_all_ops(struct fscache_object *);
 extern void fscache_abort_object(struct fscache_object *);
 extern void fscache_start_operations(struct fscache_object *);
 extern void fscache_operation_gc(struct work_struct *);
 
 /*
+ * page.c
+ */
+extern void fscache_invalidate_writes(struct fscache_cookie *);
+
+/*
  * proc.c
  */
 #ifdef CONFIG_PROC_FS
@@ -194,6 +201,7 @@
 extern atomic_t fscache_n_store_vmscan_gone;
 extern atomic_t fscache_n_store_vmscan_busy;
 extern atomic_t fscache_n_store_vmscan_cancelled;
+extern atomic_t fscache_n_store_vmscan_wait;
 
 extern atomic_t fscache_n_marks;
 extern atomic_t fscache_n_uncaches;
@@ -205,6 +213,9 @@
 extern atomic_t fscache_n_acquires_nobufs;
 extern atomic_t fscache_n_acquires_oom;
 
+extern atomic_t fscache_n_invalidates;
+extern atomic_t fscache_n_invalidates_run;
+
 extern atomic_t fscache_n_updates;
 extern atomic_t fscache_n_updates_null;
 extern atomic_t fscache_n_updates_run;
@@ -237,6 +248,7 @@
 extern atomic_t fscache_n_cop_lookup_object;
 extern atomic_t fscache_n_cop_lookup_complete;
 extern atomic_t fscache_n_cop_grab_object;
+extern atomic_t fscache_n_cop_invalidate_object;
 extern atomic_t fscache_n_cop_update_object;
 extern atomic_t fscache_n_cop_drop_object;
 extern atomic_t fscache_n_cop_put_object;
@@ -278,6 +290,7 @@
 static inline void fscache_raise_event(struct fscache_object *object,
 				       unsigned event)
 {
+	BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS);
 	if (!test_and_set_bit(event, &object->events) &&
 	    test_bit(event, &object->event_mask))
 		fscache_enqueue_object(object);

diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index ebe29c5..f27c89d 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c

@@ -245,7 +245,7 @@
 		   obj->n_in_progress,
 		   obj->n_exclusive,
 		   atomic_read(&obj->n_reads),
-		   obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK,
+		   obj->event_mask,
 		   obj->events,
 		   obj->flags,
 		   work_busy(&obj->work));

diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index b6b897c..50d41c1 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c

@@ -14,6 +14,7 @@
 
 #define FSCACHE_DEBUG_LEVEL COOKIE
 #include <linux/module.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
@@ -22,6 +23,7 @@
 	[FSCACHE_OBJECT_CREATING]	= "OBJECT_CREATING",
 	[FSCACHE_OBJECT_AVAILABLE]	= "OBJECT_AVAILABLE",
 	[FSCACHE_OBJECT_ACTIVE]		= "OBJECT_ACTIVE",
+	[FSCACHE_OBJECT_INVALIDATING]	= "OBJECT_INVALIDATING",
 	[FSCACHE_OBJECT_UPDATING]	= "OBJECT_UPDATING",
 	[FSCACHE_OBJECT_DYING]		= "OBJECT_DYING",
 	[FSCACHE_OBJECT_LC_DYING]	= "OBJECT_LC_DYING",
@@ -39,6 +41,7 @@
 	[FSCACHE_OBJECT_CREATING]	= "CRTN",
 	[FSCACHE_OBJECT_AVAILABLE]	= "AVBL",
 	[FSCACHE_OBJECT_ACTIVE]		= "ACTV",
+	[FSCACHE_OBJECT_INVALIDATING]	= "INVL",
 	[FSCACHE_OBJECT_UPDATING]	= "UPDT",
 	[FSCACHE_OBJECT_DYING]		= "DYNG",
 	[FSCACHE_OBJECT_LC_DYING]	= "LCDY",
@@ -54,6 +57,7 @@
 static void fscache_initialise_object(struct fscache_object *);
 static void fscache_lookup_object(struct fscache_object *);
 static void fscache_object_available(struct fscache_object *);
+static void fscache_invalidate_object(struct fscache_object *);
 static void fscache_release_object(struct fscache_object *);
 static void fscache_withdraw_object(struct fscache_object *);
 static void fscache_enqueue_dependents(struct fscache_object *);
@@ -79,6 +83,15 @@
 }
 
 /*
+ * Notify netfs of invalidation completion.
+ */
+static inline void fscache_invalidation_complete(struct fscache_cookie *cookie)
+{
+	if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
+		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
+}
+
+/*
  * process events that have been sent to an object's state machine
  * - initiates parent lookup
  * - does object lookup
@@ -90,6 +103,7 @@
 {
 	enum fscache_object_state new_state;
 	struct fscache_cookie *cookie;
+	int event;
 
 	ASSERT(object != NULL);
 
@@ -101,7 +115,8 @@
 		/* wait for the parent object to become ready */
 	case FSCACHE_OBJECT_INIT:
 		object->event_mask =
-			ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED);
+			FSCACHE_OBJECT_EVENTS_MASK &
+			~(1 << FSCACHE_OBJECT_EV_CLEARED);
 		fscache_initialise_object(object);
 		goto done;
 
@@ -125,6 +140,16 @@
 	case FSCACHE_OBJECT_ACTIVE:
 		goto active_transit;
 
+		/* Invalidate an object on disk */
+	case FSCACHE_OBJECT_INVALIDATING:
+		clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events);
+		fscache_stat(&fscache_n_invalidates_run);
+		fscache_stat(&fscache_n_cop_invalidate_object);
+		fscache_invalidate_object(object);
+		fscache_stat_d(&fscache_n_cop_invalidate_object);
+		fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
+		goto active_transit;
+
 		/* update the object metadata on disk */
 	case FSCACHE_OBJECT_UPDATING:
 		clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events);
@@ -251,13 +276,17 @@
 
 	/* determine the transition from a lookup state */
 lookup_transit:
-	switch (fls(object->events & object->event_mask) - 1) {
+	event = fls(object->events & object->event_mask) - 1;
+	switch (event) {
 	case FSCACHE_OBJECT_EV_WITHDRAW:
 	case FSCACHE_OBJECT_EV_RETIRE:
 	case FSCACHE_OBJECT_EV_RELEASE:
 	case FSCACHE_OBJECT_EV_ERROR:
 		new_state = FSCACHE_OBJECT_LC_DYING;
 		goto change_state;
+	case FSCACHE_OBJECT_EV_INVALIDATE:
+		new_state = FSCACHE_OBJECT_INVALIDATING;
+		goto change_state;
 	case FSCACHE_OBJECT_EV_REQUEUE:
 		goto done;
 	case -1:
@@ -268,13 +297,17 @@
 
 	/* determine the transition from an active state */
 active_transit:
-	switch (fls(object->events & object->event_mask) - 1) {
+	event = fls(object->events & object->event_mask) - 1;
+	switch (event) {
 	case FSCACHE_OBJECT_EV_WITHDRAW:
 	case FSCACHE_OBJECT_EV_RETIRE:
 	case FSCACHE_OBJECT_EV_RELEASE:
 	case FSCACHE_OBJECT_EV_ERROR:
 		new_state = FSCACHE_OBJECT_DYING;
 		goto change_state;
+	case FSCACHE_OBJECT_EV_INVALIDATE:
+		new_state = FSCACHE_OBJECT_INVALIDATING;
+		goto change_state;
 	case FSCACHE_OBJECT_EV_UPDATE:
 		new_state = FSCACHE_OBJECT_UPDATING;
 		goto change_state;
@@ -287,7 +320,8 @@
 
 	/* determine the transition from a terminal state */
 terminal_transit:
-	switch (fls(object->events & object->event_mask) - 1) {
+	event = fls(object->events & object->event_mask) - 1;
+	switch (event) {
 	case FSCACHE_OBJECT_EV_WITHDRAW:
 		new_state = FSCACHE_OBJECT_WITHDRAWING;
 		goto change_state;
@@ -320,8 +354,8 @@
 
 unsupported_event:
 	printk(KERN_ERR "FS-Cache:"
-	       " Unsupported event %lx [mask %lx] in state %s\n",
-	       object->events, object->event_mask,
+	       " Unsupported event %d [%lx/%lx] in state %s\n",
+	       event, object->events, object->event_mask,
 	       fscache_object_states[object->state]);
 	BUG();
 }
@@ -587,8 +621,6 @@
 	if (object->n_in_progress == 0) {
 		if (object->n_ops > 0) {
 			ASSERTCMP(object->n_ops, >=, object->n_obj_ops);
-			ASSERTIF(object->n_ops > object->n_obj_ops,
-				 !list_empty(&object->pending_ops));
 			fscache_start_operations(object);
 		} else {
 			ASSERT(list_empty(&object->pending_ops));
@@ -681,6 +713,7 @@
 		if (object->cookie == cookie) {
 			hlist_del_init(&object->cookie_link);
 			object->cookie = NULL;
+			fscache_invalidation_complete(cookie);
 			detached = true;
 		}
 		spin_unlock(&cookie->lock);
@@ -890,3 +923,55 @@
 	return result;
 }
 EXPORT_SYMBOL(fscache_check_aux);
+
+/*
+ * Asynchronously invalidate an object.
+ */
+static void fscache_invalidate_object(struct fscache_object *object)
+{
+	struct fscache_operation *op;
+	struct fscache_cookie *cookie = object->cookie;
+
+	_enter("{OBJ%x}", object->debug_id);
+
+	/* Reject any new read/write ops and abort any that are pending. */
+	fscache_invalidate_writes(cookie);
+	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+	fscache_cancel_all_ops(object);
+
+	/* Now we have to wait for in-progress reads and writes */
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op) {
+		fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR);
+		_leave(" [ENOMEM]");
+		return;
+	}
+
+	fscache_operation_init(op, object->cache->ops->invalidate_object, NULL);
+	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
+
+	spin_lock(&cookie->lock);
+	if (fscache_submit_exclusive_op(object, op) < 0)
+		goto submit_op_failed;
+	spin_unlock(&cookie->lock);
+	fscache_put_operation(op);
+
+	/* Once we've completed the invalidation, we know there will be no data
+	 * stored in the cache and thus we can reinstate the data-check-skip
+	 * optimisation.
+	 */
+	set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+
+	/* We can allow read and write requests to come in once again.  They'll
+	 * queue up behind our exclusive invalidation operation.
+	 */
+	fscache_invalidation_complete(cookie);
+	_leave("");
+	return;
+
+submit_op_failed:
+	spin_unlock(&cookie->lock);
+	kfree(op);
+	fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR);
+	_leave(" [EIO]");
+}

diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 30afdfa..762a9ec 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c

@@ -37,6 +37,7 @@
 	ASSERT(op->processor != NULL);
 	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
 
 	fscache_stat(&fscache_n_op_enqueue);
 	switch (op->flags & FSCACHE_OP_TYPE) {
@@ -64,6 +65,9 @@
 static void fscache_run_op(struct fscache_object *object,
 			   struct fscache_operation *op)
 {
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
+
+	op->state = FSCACHE_OP_ST_IN_PROGRESS;
 	object->n_in_progress++;
 	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -84,18 +88,21 @@
 
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
+	ASSERTCMP(atomic_read(&op->usage), >, 0);
+
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
 	ASSERT(list_empty(&op->pend_link));
 
-	ret = -ENOBUFS;
+	op->state = FSCACHE_OP_ST_PENDING;
 	if (fscache_object_is_active(object)) {
 		op->object = object;
 		object->n_ops++;
 		object->n_exclusive++;	/* reads and writes must wait */
 
-		if (object->n_ops > 1) {
+		if (object->n_in_progress > 0) {
 			atomic_inc(&op->usage);
 			list_add_tail(&op->pend_link, &object->pending_ops);
 			fscache_stat(&fscache_n_op_pend);
@@ -121,8 +128,11 @@
 		fscache_stat(&fscache_n_op_pend);
 		ret = 0;
 	} else {
-		/* not allowed to submit ops in any other state */
-		BUG();
+		/* If we're in any other state, there must have been an I/O
+		 * error of some nature.
+		 */
+		ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags));
+		ret = -EIO;
 	}
 
 	spin_unlock(&object->lock);
@@ -186,6 +196,7 @@
 	_enter("{OBJ%x OP%x},{%u}",
 	       object->debug_id, op->debug_id, atomic_read(&op->usage));
 
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
 	spin_lock(&object->lock);
@@ -196,6 +207,7 @@
 	ostate = object->state;
 	smp_rmb();
 
+	op->state = FSCACHE_OP_ST_PENDING;
 	if (fscache_object_is_active(object)) {
 		op->object = object;
 		object->n_ops++;
@@ -225,12 +237,15 @@
 		   object->state == FSCACHE_OBJECT_LC_DYING ||
 		   object->state == FSCACHE_OBJECT_WITHDRAWING) {
 		fscache_stat(&fscache_n_op_rejected);
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	} else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
 		fscache_report_unexpected_submission(object, op, ostate);
 		ASSERT(!fscache_object_is_active(object));
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	} else {
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	}
 
@@ -283,20 +298,28 @@
 /*
  * cancel an operation that's pending on an object
  */
-int fscache_cancel_op(struct fscache_operation *op)
+int fscache_cancel_op(struct fscache_operation *op,
+		      void (*do_cancel)(struct fscache_operation *))
 {
 	struct fscache_object *object = op->object;
 	int ret;
 
 	_enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id);
 
+	ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING);
+	ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED);
+	ASSERTCMP(atomic_read(&op->usage), >, 0);
+
 	spin_lock(&object->lock);
 
 	ret = -EBUSY;
-	if (!list_empty(&op->pend_link)) {
+	if (op->state == FSCACHE_OP_ST_PENDING) {
+		ASSERT(!list_empty(&op->pend_link));
 		fscache_stat(&fscache_n_op_cancelled);
 		list_del_init(&op->pend_link);
-		object->n_ops--;
+		if (do_cancel)
+			do_cancel(op);
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
 			object->n_exclusive--;
 		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
@@ -311,6 +334,70 @@
 }
 
 /*
+ * Cancel all pending operations on an object
+ */
+void fscache_cancel_all_ops(struct fscache_object *object)
+{
+	struct fscache_operation *op;
+
+	_enter("OBJ%x", object->debug_id);
+
+	spin_lock(&object->lock);
+
+	while (!list_empty(&object->pending_ops)) {
+		op = list_entry(object->pending_ops.next,
+				struct fscache_operation, pend_link);
+		fscache_stat(&fscache_n_op_cancelled);
+		list_del_init(&op->pend_link);
+
+		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
+		op->state = FSCACHE_OP_ST_CANCELLED;
+
+		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
+			object->n_exclusive--;
+		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
+			wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
+		fscache_put_operation(op);
+		cond_resched_lock(&object->lock);
+	}
+
+	spin_unlock(&object->lock);
+	_leave("");
+}
+
+/*
+ * Record the completion or cancellation of an in-progress operation.
+ */
+void fscache_op_complete(struct fscache_operation *op, bool cancelled)
+{
+	struct fscache_object *object = op->object;
+
+	_enter("OBJ%x", object->debug_id);
+
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
+	ASSERTCMP(object->n_in_progress, >, 0);
+	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags),
+		    object->n_exclusive, >, 0);
+	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags),
+		    object->n_in_progress, ==, 1);
+
+	spin_lock(&object->lock);
+
+	op->state = cancelled ?
+		FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE;
+
+	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
+		object->n_exclusive--;
+	object->n_in_progress--;
+	if (object->n_in_progress == 0)
+		fscache_start_operations(object);
+
+	spin_unlock(&object->lock);
+	_leave("");
+}
+EXPORT_SYMBOL(fscache_op_complete);
+
+/*
  * release an operation
  * - queues pending ops if this is the last in-progress op
  */
@@ -328,8 +415,9 @@
 		return;
 
 	_debug("PUT OP");
-	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
-		BUG();
+	ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE,
+		    op->state, ==, FSCACHE_OP_ST_CANCELLED);
+	op->state = FSCACHE_OP_ST_DEAD;
 
 	fscache_stat(&fscache_n_op_release);
 
@@ -340,8 +428,14 @@
 
 	object = op->object;
 
-	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags))
-		atomic_dec(&object->n_reads);
+	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) {
+		if (atomic_dec_and_test(&object->n_reads)) {
+			clear_bit(FSCACHE_COOKIE_WAITING_ON_READS,
+				  &object->cookie->flags);
+			wake_up_bit(&object->cookie->flags,
+				    FSCACHE_COOKIE_WAITING_ON_READS);
+		}
+	}
 
 	/* now... we may get called with the object spinlock held, so we
 	 * complete the cleanup here only if we can immediately acquire the
@@ -359,16 +453,6 @@
 		return;
 	}
 
-	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
-		ASSERTCMP(object->n_exclusive, >, 0);
-		object->n_exclusive--;
-	}
-
-	ASSERTCMP(object->n_in_progress, >, 0);
-	object->n_in_progress--;
-	if (object->n_in_progress == 0)
-		fscache_start_operations(object);
-
 	ASSERTCMP(object->n_ops, >, 0);
 	object->n_ops--;
 	if (object->n_ops == 0)
@@ -407,23 +491,14 @@
 		spin_unlock(&cache->op_gc_list_lock);
 
 		object = op->object;
+		spin_lock(&object->lock);
 
 		_debug("GC DEFERRED REL OBJ%x OP%x",
 		       object->debug_id, op->debug_id);
 		fscache_stat(&fscache_n_op_gc);
 
 		ASSERTCMP(atomic_read(&op->usage), ==, 0);
-
-		spin_lock(&object->lock);
-		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
-			ASSERTCMP(object->n_exclusive, >, 0);
-			object->n_exclusive--;
-		}
-
-		ASSERTCMP(object->n_in_progress, >, 0);
-		object->n_in_progress--;
-		if (object->n_in_progress == 0)
-			fscache_start_operations(object);
+		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD);
 
 		ASSERTCMP(object->n_ops, >, 0);
 		object->n_ops--;
@@ -431,6 +506,7 @@
 			fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);
 
 		spin_unlock(&object->lock);
+		kfree(op);
 
 	} while (count++ < 20);
 

diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 3f7a59b..ff000e5 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c

@@ -56,6 +56,7 @@
 
 	_enter("%p,%p,%x", cookie, page, gfp);
 
+try_again:
 	rcu_read_lock();
 	val = radix_tree_lookup(&cookie->stores, page->index);
 	if (!val) {
@@ -104,11 +105,19 @@
 	return true;
 
 page_busy:
-	/* we might want to wait here, but that could deadlock the allocator as
-	 * the work threads writing to the cache may all end up sleeping
-	 * on memory allocation */
-	fscache_stat(&fscache_n_store_vmscan_busy);
-	return false;
+	/* We will wait here if we're allowed to, but that could deadlock the
+	 * allocator as the work threads writing to the cache may all end up
+	 * sleeping on memory allocation, so we may need to impose a timeout
+	 * too. */
+	if (!(gfp & __GFP_WAIT)) {
+		fscache_stat(&fscache_n_store_vmscan_busy);
+		return false;
+	}
+
+	fscache_stat(&fscache_n_store_vmscan_wait);
+	__fscache_wait_on_page_write(cookie, page);
+	gfp &= ~__GFP_WAIT;
+	goto try_again;
 }
 EXPORT_SYMBOL(__fscache_maybe_release_page);
 
@@ -162,6 +171,7 @@
 			fscache_abort_object(object);
 	}
 
+	fscache_op_complete(op, true);
 	_leave("");
 }
 
@@ -223,6 +233,8 @@
 
 	_enter("{OP%x}", op->op.debug_id);
 
+	ASSERTCMP(op->n_pages, ==, 0);
+
 	fscache_hist(fscache_retrieval_histogram, op->start_time);
 	if (op->context)
 		fscache_put_context(op->op.object->cookie, op->context);
@@ -291,6 +303,17 @@
 }
 
 /*
+ * Handle cancellation of a pending retrieval op
+ */
+static void fscache_do_cancel_retrieval(struct fscache_operation *_op)
+{
+	struct fscache_retrieval *op =
+		container_of(_op, struct fscache_retrieval, op);
+
+	op->n_pages = 0;
+}
+
+/*
  * wait for an object to become active (or dead)
  */
 static int fscache_wait_for_retrieval_activation(struct fscache_object *object,
@@ -307,8 +330,8 @@
 	fscache_stat(stat_op_waits);
 	if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
 			fscache_wait_bit_interruptible,
-			TASK_INTERRUPTIBLE) < 0) {
-		ret = fscache_cancel_op(&op->op);
+			TASK_INTERRUPTIBLE) != 0) {
+		ret = fscache_cancel_op(&op->op, fscache_do_cancel_retrieval);
 		if (ret == 0)
 			return -ERESTARTSYS;
 
@@ -320,7 +343,14 @@
 	_debug("<<< GO");
 
 check_if_dead:
+	if (op->op.state == FSCACHE_OP_ST_CANCELLED) {
+		fscache_stat(stat_object_dead);
+		_leave(" = -ENOBUFS [cancelled]");
+		return -ENOBUFS;
+	}
 	if (unlikely(fscache_object_is_dead(object))) {
+		pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->op.state);
+		fscache_cancel_op(&op->op, fscache_do_cancel_retrieval);
 		fscache_stat(stat_object_dead);
 		return -ENOBUFS;
 	}
@@ -353,6 +383,11 @@
 	if (hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(page, !=, NULL);
 
@@ -364,6 +399,7 @@
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
+	op->n_pages = 1;
 
 	spin_lock(&cookie->lock);
 
@@ -375,10 +411,10 @@
 	ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);
 
 	atomic_inc(&object->n_reads);
-	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
 	if (fscache_submit_op(object, &op->op) < 0)
-		goto nobufs_unlock;
+		goto nobufs_unlock_dec;
 	spin_unlock(&cookie->lock);
 
 	fscache_stat(&fscache_n_retrieval_ops);
@@ -425,6 +461,8 @@
 	_leave(" = %d", ret);
 	return ret;
 
+nobufs_unlock_dec:
+	atomic_dec(&object->n_reads);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
 	kfree(op);
@@ -472,6 +510,11 @@
 	if (hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(*nr_pages, >, 0);
 	ASSERT(!list_empty(pages));
@@ -482,6 +525,7 @@
 	op = fscache_alloc_retrieval(mapping, end_io_func, context);
 	if (!op)
 		return -ENOMEM;
+	op->n_pages = *nr_pages;
 
 	spin_lock(&cookie->lock);
 
@@ -491,10 +535,10 @@
 			     struct fscache_object, cookie_link);
 
 	atomic_inc(&object->n_reads);
-	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
 	if (fscache_submit_op(object, &op->op) < 0)
-		goto nobufs_unlock;
+		goto nobufs_unlock_dec;
 	spin_unlock(&cookie->lock);
 
 	fscache_stat(&fscache_n_retrieval_ops);
@@ -541,6 +585,8 @@
 	_leave(" = %d", ret);
 	return ret;
 
+nobufs_unlock_dec:
+	atomic_dec(&object->n_reads);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
 	kfree(op);
@@ -577,12 +623,18 @@
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(page, !=, NULL);
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	if (fscache_wait_for_deferred_lookup(cookie) < 0)
 		return -ERESTARTSYS;
 
 	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
 	if (!op)
 		return -ENOMEM;
+	op->n_pages = 1;
 
 	spin_lock(&cookie->lock);
 
@@ -658,9 +710,27 @@
 	spin_lock(&object->lock);
 	cookie = object->cookie;
 
-	if (!fscache_object_is_active(object) || !cookie) {
+	if (!fscache_object_is_active(object)) {
+		/* If we get here, then the on-disk cache object likely longer
+		 * exists, so we should just cancel this write operation.
+		 */
 		spin_unlock(&object->lock);
-		_leave("");
+		fscache_op_complete(&op->op, false);
+		_leave(" [inactive]");
+		return;
+	}
+
+	if (!cookie) {
+		/* If we get here, then the cookie belonging to the object was
+		 * detached, probably by the cookie being withdrawn due to
+		 * memory pressure, which means that the pages we might write
+		 * to the cache from no longer exist - therefore, we can just
+		 * cancel this write operation.
+		 */
+		spin_unlock(&object->lock);
+		fscache_op_complete(&op->op, false);
+		_leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}",
+		       _op->flags, _op->state, object->state, object->flags);
 		return;
 	}
 
@@ -696,6 +766,7 @@
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
 		fscache_abort_object(object);
+		fscache_op_complete(&op->op, true);
 	} else {
 		fscache_enqueue_operation(&op->op);
 	}
@@ -710,6 +781,38 @@
 	spin_unlock(&cookie->stores_lock);
 	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 	spin_unlock(&object->lock);
+	fscache_op_complete(&op->op, true);
+	_leave("");
+}
+
+/*
+ * Clear the pages pending writing for invalidation
+ */
+void fscache_invalidate_writes(struct fscache_cookie *cookie)
+{
+	struct page *page;
+	void *results[16];
+	int n, i;
+
+	_enter("");
+
+	while (spin_lock(&cookie->stores_lock),
+	       n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0,
+					      ARRAY_SIZE(results),
+					      FSCACHE_COOKIE_PENDING_TAG),
+	       n > 0) {
+		for (i = n - 1; i >= 0; i--) {
+			page = results[i];
+			radix_tree_delete(&cookie->stores, page->index);
+		}
+
+		spin_unlock(&cookie->stores_lock);
+
+		for (i = n - 1; i >= 0; i--)
+			page_cache_release(results[i]);
+	}
+
+	spin_unlock(&cookie->stores_lock);
 	_leave("");
 }
 
@@ -759,7 +862,12 @@
 
 	fscache_stat(&fscache_n_stores);
 
-	op = kzalloc(sizeof(*op), GFP_NOIO);
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
+	op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY);
 	if (!op)
 		goto nomem;
 
@@ -915,6 +1023,40 @@
 EXPORT_SYMBOL(__fscache_uncache_page);
 
 /**
+ * fscache_mark_page_cached - Mark a page as being cached
+ * @op: The retrieval op pages are being marked for
+ * @page: The page to be marked
+ *
+ * Mark a netfs page as being cached.  After this is called, the netfs
+ * must call fscache_uncache_page() to remove the mark.
+ */
+void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page)
+{
+	struct fscache_cookie *cookie = op->op.object->cookie;
+
+#ifdef CONFIG_FSCACHE_STATS
+	atomic_inc(&fscache_n_marks);
+#endif
+
+	_debug("- mark %p{%lx}", page, page->index);
+	if (TestSetPageFsCache(page)) {
+		static bool once_only;
+		if (!once_only) {
+			once_only = true;
+			printk(KERN_WARNING "FS-Cache:"
+			       " Cookie type %s marked page %lx"
+			       " multiple times\n",
+			       cookie->def->name, page->index);
+		}
+	}
+
+	if (cookie->def->mark_page_cached)
+		cookie->def->mark_page_cached(cookie->netfs_data,
+					      op->mapping, page);
+}
+EXPORT_SYMBOL(fscache_mark_page_cached);
+
+/**
  * fscache_mark_pages_cached - Mark pages as being cached
  * @op: The retrieval op pages are being marked for
  * @pagevec: The pages to be marked
@@ -925,32 +1067,11 @@
 void fscache_mark_pages_cached(struct fscache_retrieval *op,
 			       struct pagevec *pagevec)
 {
-	struct fscache_cookie *cookie = op->op.object->cookie;
 	unsigned long loop;
 
-#ifdef CONFIG_FSCACHE_STATS
-	atomic_add(pagevec->nr, &fscache_n_marks);
-#endif
+	for (loop = 0; loop < pagevec->nr; loop++)
+		fscache_mark_page_cached(op, pagevec->pages[loop]);
 
-	for (loop = 0; loop < pagevec->nr; loop++) {
-		struct page *page = pagevec->pages[loop];
-
-		_debug("- mark %p{%lx}", page, page->index);
-		if (TestSetPageFsCache(page)) {
-			static bool once_only;
-			if (!once_only) {
-				once_only = true;
-				printk(KERN_WARNING "FS-Cache:"
-				       " Cookie type %s marked page %lx"
-				       " multiple times\n",
-				       cookie->def->name, page->index);
-			}
-		}
-	}
-
-	if (cookie->def->mark_pages_cached)
-		cookie->def->mark_pages_cached(cookie->netfs_data,
-					       op->mapping, pagevec);
 	pagevec_reinit(pagevec);
 }
 EXPORT_SYMBOL(fscache_mark_pages_cached);

diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 4765190..8179e8b 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c

@@ -69,6 +69,7 @@
 atomic_t fscache_n_store_vmscan_gone;
 atomic_t fscache_n_store_vmscan_busy;
 atomic_t fscache_n_store_vmscan_cancelled;
+atomic_t fscache_n_store_vmscan_wait;
 
 atomic_t fscache_n_marks;
 atomic_t fscache_n_uncaches;
@@ -80,6 +81,9 @@
 atomic_t fscache_n_acquires_nobufs;
 atomic_t fscache_n_acquires_oom;
 
+atomic_t fscache_n_invalidates;
+atomic_t fscache_n_invalidates_run;
+
 atomic_t fscache_n_updates;
 atomic_t fscache_n_updates_null;
 atomic_t fscache_n_updates_run;
@@ -112,6 +116,7 @@
 atomic_t fscache_n_cop_lookup_object;
 atomic_t fscache_n_cop_lookup_complete;
 atomic_t fscache_n_cop_grab_object;
+atomic_t fscache_n_cop_invalidate_object;
 atomic_t fscache_n_cop_update_object;
 atomic_t fscache_n_cop_drop_object;
 atomic_t fscache_n_cop_put_object;
@@ -168,6 +173,10 @@
 		   atomic_read(&fscache_n_object_created),
 		   atomic_read(&fscache_n_object_lookups_timed_out));
 
+	seq_printf(m, "Invals : n=%u run=%u\n",
+		   atomic_read(&fscache_n_invalidates),
+		   atomic_read(&fscache_n_invalidates_run));
+
 	seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
 		   atomic_read(&fscache_n_updates),
 		   atomic_read(&fscache_n_updates_null),
@@ -224,11 +233,12 @@
 		   atomic_read(&fscache_n_store_radix_deletes),
 		   atomic_read(&fscache_n_store_pages_over_limit));
 
-	seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n",
+	seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n",
 		   atomic_read(&fscache_n_store_vmscan_not_storing),
 		   atomic_read(&fscache_n_store_vmscan_gone),
 		   atomic_read(&fscache_n_store_vmscan_busy),
-		   atomic_read(&fscache_n_store_vmscan_cancelled));
+		   atomic_read(&fscache_n_store_vmscan_cancelled),
+		   atomic_read(&fscache_n_store_vmscan_wait));
 
 	seq_printf(m, "Ops    : pend=%u run=%u enq=%u can=%u rej=%u\n",
 		   atomic_read(&fscache_n_op_pend),
@@ -246,7 +256,8 @@
 		   atomic_read(&fscache_n_cop_lookup_object),
 		   atomic_read(&fscache_n_cop_lookup_complete),
 		   atomic_read(&fscache_n_cop_grab_object));
-	seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n",
+	seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n",
+		   atomic_read(&fscache_n_cop_invalidate_object),
 		   atomic_read(&fscache_n_cop_update_object),
 		   atomic_read(&fscache_n_cop_drop_object),
 		   atomic_read(&fscache_n_cop_put_object),

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c23fa7..c163353 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c

@@ -92,8 +92,8 @@
 
 static void fuse_req_init_context(struct fuse_req *req)
 {
-	req->in.h.uid = current_fsuid();
-	req->in.h.gid = current_fsgid();
+	req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
+	req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
 	req->in.h.pid = current->pid;
 }
 

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 324bc08..b7c09f9 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c

@@ -818,8 +818,8 @@
 	stat->ino = attr->ino;
 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
 	stat->nlink = attr->nlink;
-	stat->uid = attr->uid;
-	stat->gid = attr->gid;
+	stat->uid = make_kuid(&init_user_ns, attr->uid);
+	stat->gid = make_kgid(&init_user_ns, attr->gid);
 	stat->rdev = inode->i_rdev;
 	stat->atime.tv_sec = attr->atime;
 	stat->atime.tv_nsec = attr->atimensec;
@@ -1007,12 +1007,12 @@
 	rcu_read_lock();
 	ret = 0;
 	cred = __task_cred(task);
-	if (cred->euid == fc->user_id &&
-	    cred->suid == fc->user_id &&
-	    cred->uid  == fc->user_id &&
-	    cred->egid == fc->group_id &&
-	    cred->sgid == fc->group_id &&
-	    cred->gid  == fc->group_id)
+	if (uid_eq(cred->euid, fc->user_id) &&
+	    uid_eq(cred->suid, fc->user_id) &&
+	    uid_eq(cred->uid,  fc->user_id) &&
+	    gid_eq(cred->egid, fc->group_id) &&
+	    gid_eq(cred->sgid, fc->group_id) &&
+	    gid_eq(cred->gid,  fc->group_id))
 		ret = 1;
 	rcu_read_unlock();
 
@@ -1306,9 +1306,9 @@
 	if (ivalid & ATTR_MODE)
 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
 	if (ivalid & ATTR_UID)
-		arg->valid |= FATTR_UID,    arg->uid = iattr->ia_uid;
+		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
 	if (ivalid & ATTR_GID)
-		arg->valid |= FATTR_GID,    arg->gid = iattr->ia_gid;
+		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
 	if (ivalid & ATTR_SIZE)
 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
 	if (ivalid & ATTR_ATIME) {

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 78d2837..e21d4d8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c

@@ -1599,19 +1599,19 @@
 	return err ? 0 : outarg.block;
 }
 
-static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t retval;
 	struct inode *inode = file->f_path.dentry->d_inode;
 
 	/* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
-	if (origin == SEEK_CUR || origin == SEEK_SET)
-		return generic_file_llseek(file, offset, origin);
+	if (whence == SEEK_CUR || whence == SEEK_SET)
+		return generic_file_llseek(file, offset, whence);
 
 	mutex_lock(&inode->i_mutex);
 	retval = fuse_update_attributes(inode, NULL, file, NULL);
 	if (!retval)
-		retval = generic_file_llseek(file, offset, origin);
+		retval = generic_file_llseek(file, offset, whence);
 	mutex_unlock(&inode->i_mutex);
 
 	return retval;

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e24dd74..e105a53 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h

@@ -333,10 +333,10 @@
 	atomic_t count;
 
 	/** The user id for this mount */
-	uid_t user_id;
+	kuid_t user_id;
 
 	/** The group id for this mount */
-	gid_t group_id;
+	kgid_t group_id;
 
 	/** The fuse mount flags for this mount */
 	unsigned flags;

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f0eda12..73ca6b7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c

@@ -60,8 +60,8 @@
 struct fuse_mount_data {
 	int fd;
 	unsigned rootmode;
-	unsigned user_id;
-	unsigned group_id;
+	kuid_t user_id;
+	kgid_t group_id;
 	unsigned fd_present:1;
 	unsigned rootmode_present:1;
 	unsigned user_id_present:1;
@@ -164,8 +164,8 @@
 	inode->i_ino     = fuse_squash_ino(attr->ino);
 	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
 	set_nlink(inode, attr->nlink);
-	inode->i_uid     = attr->uid;
-	inode->i_gid     = attr->gid;
+	inode->i_uid     = make_kuid(&init_user_ns, attr->uid);
+	inode->i_gid     = make_kgid(&init_user_ns, attr->gid);
 	inode->i_blocks  = attr->blocks;
 	inode->i_atime.tv_sec   = attr->atime;
 	inode->i_atime.tv_nsec  = attr->atimensec;
@@ -492,14 +492,18 @@
 		case OPT_USER_ID:
 			if (match_int(&args[0], &value))
 				return 0;
-			d->user_id = value;
+			d->user_id = make_kuid(current_user_ns(), value);
+			if (!uid_valid(d->user_id))
+				return 0;
 			d->user_id_present = 1;
 			break;
 
 		case OPT_GROUP_ID:
 			if (match_int(&args[0], &value))
 				return 0;
-			d->group_id = value;
+			d->group_id = make_kgid(current_user_ns(), value);
+			if (!gid_valid(d->group_id))
+				return 0;
 			d->group_id_present = 1;
 			break;
 
@@ -540,8 +544,8 @@
 	struct super_block *sb = root->d_sb;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
-	seq_printf(m, ",user_id=%u", fc->user_id);
-	seq_printf(m, ",group_id=%u", fc->group_id);
+	seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
+	seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
 		seq_puts(m, ",default_permissions");
 	if (fc->flags & FUSE_ALLOW_OTHER)
@@ -989,7 +993,8 @@
 	if (!file)
 		goto err;
 
-	if (file->f_op != &fuse_dev_operations)
+	if ((file->f_op != &fuse_dev_operations) ||
+	    (file->f_cred->user_ns != &init_user_ns))
 		goto err_fput;
 
 	fc = kmalloc(sizeof(*fc), GFP_KERNEL);

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index dfe2d8c..991ab2d 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c

@@ -44,7 +44,7 @@
  * gfs2_llseek - seek to a location in a file
  * @file: the file
  * @offset: the offset
- * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
+ * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
  *
  * SEEK_END requires the glock for the file because it references the
  * file's size.
@@ -52,26 +52,26 @@
  * Returns: The new offset, or errno
  */
 
-static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
+static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	struct gfs2_holder i_gh;
 	loff_t error;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END: /* These reference inode->i_size */
 	case SEEK_DATA:
 	case SEEK_HOLE:
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
-			error = generic_file_llseek(file, offset, origin);
+			error = generic_file_llseek(file, offset, whence);
 			gfs2_glock_dq_uninit(&i_gh);
 		}
 		break;
 	case SEEK_CUR:
 	case SEEK_SET:
-		error = generic_file_llseek(file, offset, origin);
+		error = generic_file_llseek(file, offset, whence);
 		break;
 	default:
 		error = -EINVAL;

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 0b35903..d47f116 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c

@@ -35,6 +35,16 @@
 	return block_read_full_page(page, hfs_get_block);
 }
 
+static void hfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		hfs_file_truncate(inode);
+	}
+}
+
 static int hfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -45,11 +55,8 @@
 	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hfs_get_block,
 				&HFS_I(mapping->host)->phys_size);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		hfs_write_failed(mapping, pos + len);
 
 	return ret;
 }
@@ -120,6 +127,7 @@
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
 	ssize_t ret;
 
@@ -135,7 +143,7 @@
 		loff_t end = offset + iov_length(iov, nr_segs);
 
 		if (end > isize)
-			vmtruncate(inode, isize);
+			hfs_write_failed(mapping, end);
 	}
 
 	return ret;
@@ -617,9 +625,12 @@
 	    attr->ia_size != i_size_read(inode)) {
 		inode_dio_wait(inode);
 
-		error = vmtruncate(inode, attr->ia_size);
+		error = inode_newsize_ok(inode, attr->ia_size);
 		if (error)
 			return error;
+
+		truncate_setsize(inode, attr->ia_size);
+		hfs_file_truncate(inode);
 	}
 
 	setattr_copy(inode, attr);
@@ -668,7 +679,6 @@
 
 static const struct inode_operations hfs_file_inode_operations = {
 	.lookup		= hfs_file_lookup,
-	.truncate	= hfs_file_truncate,
 	.setattr	= hfs_inode_setattr,
 	.setxattr	= hfs_setxattr,
 	.getxattr	= hfs_getxattr,

diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index 4cfbe2e..6feefc0 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c

@@ -176,12 +176,14 @@
 	dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count);
 	/* are all of the bits in range? */
 	if ((offset + count) > sbi->total_blocks)
-		return -2;
+		return -ENOENT;
 
 	mutex_lock(&sbi->alloc_mutex);
 	mapping = sbi->alloc_file->i_mapping;
 	pnr = offset / PAGE_CACHE_BITS;
 	page = read_mapping_page(mapping, pnr, NULL);
+	if (IS_ERR(page))
+		goto kaboom;
 	pptr = kmap(page);
 	curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
 	end = pptr + PAGE_CACHE_BITS / 32;
@@ -214,6 +216,8 @@
 		set_page_dirty(page);
 		kunmap(page);
 		page = read_mapping_page(mapping, ++pnr, NULL);
+		if (IS_ERR(page))
+			goto kaboom;
 		pptr = kmap(page);
 		curr = pptr;
 		end = pptr + PAGE_CACHE_BITS / 32;
@@ -232,4 +236,11 @@
 	mutex_unlock(&sbi->alloc_mutex);
 
 	return 0;
+
+kaboom:
+	printk(KERN_CRIT "hfsplus: unable to mark blocks free: error %ld\n",
+			PTR_ERR(page));
+	mutex_unlock(&sbi->alloc_mutex);
+
+	return -EIO;
 }

diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 21023d9..685d07d 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c

@@ -159,7 +159,7 @@
 	kfree(tree);
 }
 
-void hfs_btree_write(struct hfs_btree *tree)
+int hfs_btree_write(struct hfs_btree *tree)
 {
 	struct hfs_btree_header_rec *head;
 	struct hfs_bnode *node;
@@ -168,7 +168,7 @@
 	node = hfs_bnode_find(tree, 0);
 	if (IS_ERR(node))
 		/* panic? */
-		return;
+		return -EIO;
 	/* Load the header */
 	page = node->page[0];
 	head = (struct hfs_btree_header_rec *)(kmap(page) +
@@ -186,6 +186,7 @@
 	kunmap(page);
 	set_page_dirty(page);
 	hfs_bnode_put(node);
+	return 0;
 }
 
 static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx)

diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 5849e3e..eba76ea 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c

@@ -329,6 +329,7 @@
 {
 	u32 count, start;
 	int i;
+	int err = 0;
 
 	hfsplus_dump_extent(extent);
 	for (i = 0; i < 8; extent++, i++) {
@@ -345,18 +346,33 @@
 	for (;;) {
 		start = be32_to_cpu(extent->start_block);
 		if (count <= block_nr) {
-			hfsplus_block_free(sb, start, count);
+			err = hfsplus_block_free(sb, start, count);
+			if (err) {
+				printk(KERN_ERR "hfs: can't free extent\n");
+				dprint(DBG_EXTENT, " start: %u count: %u\n",
+					start, count);
+			}
 			extent->block_count = 0;
 			extent->start_block = 0;
 			block_nr -= count;
 		} else {
 			count -= block_nr;
-			hfsplus_block_free(sb, start + count, block_nr);
+			err = hfsplus_block_free(sb, start + count, block_nr);
+			if (err) {
+				printk(KERN_ERR "hfs: can't free extent\n");
+				dprint(DBG_EXTENT, " start: %u count: %u\n",
+					start, count);
+			}
 			extent->block_count = cpu_to_be32(count);
 			block_nr = 0;
 		}
-		if (!block_nr || !i)
-			return 0;
+		if (!block_nr || !i) {
+			/*
+			 * Try to free all extents and
+			 * return only last error
+			 */
+			return err;
+		}
 		i--;
 		extent--;
 		count = be32_to_cpu(extent->block_count);

diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index c571de2..a6da86b 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h

@@ -335,7 +335,7 @@
 /* btree.c */
 struct hfs_btree *hfs_btree_open(struct super_block *, u32);
 void hfs_btree_close(struct hfs_btree *);
-void hfs_btree_write(struct hfs_btree *);
+int hfs_btree_write(struct hfs_btree *);
 struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *);
 void hfs_bmap_free(struct hfs_bnode *);
 

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 2172aa5..799b336 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c

@@ -28,6 +28,16 @@
 	return block_write_full_page(page, hfsplus_get_block, wbc);
 }
 
+static void hfsplus_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		hfsplus_file_truncate(inode);
+	}
+}
+
 static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -38,11 +48,8 @@
 	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hfsplus_get_block,
 				&HFSPLUS_I(mapping->host)->phys_size);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		hfsplus_write_failed(mapping, pos + len);
 
 	return ret;
 }
@@ -116,6 +123,7 @@
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
 	ssize_t ret;
 
@@ -131,7 +139,7 @@
 		loff_t end = offset + iov_length(iov, nr_segs);
 
 		if (end > isize)
-			vmtruncate(inode, isize);
+			hfsplus_write_failed(mapping, end);
 	}
 
 	return ret;
@@ -300,10 +308,8 @@
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    attr->ia_size != i_size_read(inode)) {
 		inode_dio_wait(inode);
-
-		error = vmtruncate(inode, attr->ia_size);
-		if (error)
-			return error;
+		truncate_setsize(inode, attr->ia_size);
+		hfsplus_file_truncate(inode);
 	}
 
 	setattr_copy(inode, attr);
@@ -358,7 +364,6 @@
 
 static const struct inode_operations hfsplus_file_inode_operations = {
 	.lookup		= hfsplus_file_lookup,
-	.truncate	= hfsplus_file_truncate,
 	.setattr	= hfsplus_setattr,
 	.setxattr	= hfsplus_setxattr,
 	.getxattr	= hfsplus_getxattr,

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 811a84d..796198d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c

@@ -127,8 +127,14 @@
 		hfsplus_mark_mdb_dirty(inode->i_sb);
 	}
 	hfsplus_inode_write_fork(inode, fork);
-	if (tree)
-		hfs_btree_write(tree);
+	if (tree) {
+		int err = hfs_btree_write(tree);
+		if (err) {
+			printk(KERN_ERR "hfs: b-tree write err: %d, ino %lu\n",
+					err, inode->i_ino);
+			return err;
+		}
+	}
 	return 0;
 }
 
@@ -226,6 +232,7 @@
 
 static void delayed_sync_fs(struct work_struct *work)
 {
+	int err;
 	struct hfsplus_sb_info *sbi;
 
 	sbi = container_of(work, struct hfsplus_sb_info, sync_work.work);
@@ -234,7 +241,9 @@
 	sbi->work_queued = 0;
 	spin_unlock(&sbi->work_lock);
 
-	hfsplus_sync_fs(sbi->alloc_file->i_sb, 1);
+	err = hfsplus_sync_fs(sbi->alloc_file->i_sb, 1);
+	if (err)
+		printk(KERN_ERR "hfs: delayed sync fs err %d\n", err);
 }
 
 void hfsplus_mark_mdb_dirty(struct super_block *sb)

diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 89d2a58..fbfe2df 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c

@@ -50,7 +50,7 @@
 	return disk_secno;
 }
 
-static void hpfs_truncate(struct inode *i)
+void hpfs_truncate(struct inode *i)
 {
 	if (IS_IMMUTABLE(i)) return /*-EPERM*/;
 	hpfs_lock_assert(i->i_sb);
@@ -105,6 +105,16 @@
 	return block_read_full_page(page,hpfs_get_block);
 }
 
+static void hpfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		hpfs_truncate(inode);
+	}
+}
+
 static int hpfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -115,11 +125,8 @@
 	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hpfs_get_block,
 				&hpfs_i(mapping->host)->mmu_private);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		hpfs_write_failed(mapping, pos + len);
 
 	return ret;
 }
@@ -166,6 +173,5 @@
 
 const struct inode_operations hpfs_file_iops =
 {
-	.truncate	= hpfs_truncate,
 	.setattr	= hpfs_setattr,
 };

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 7102aae..b7ae286 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h

@@ -252,6 +252,7 @@
 /* file.c */
 
 int hpfs_file_fsync(struct file *, loff_t, loff_t, int);
+void hpfs_truncate(struct inode *);
 extern const struct file_operations hpfs_file_ops;
 extern const struct inode_operations hpfs_file_iops;
 extern const struct address_space_operations hpfs_aops;

diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 804a9a8..5dc06c8 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c

@@ -277,9 +277,12 @@
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
+		error = inode_newsize_ok(inode, attr->ia_size);
 		if (error)
 			goto out_unlock;
+
+		truncate_setsize(inode, attr->ia_size);
+		hpfs_truncate(inode);
 	}
 
 	setattr_copy(inode, attr);

diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 78f21f8..43b315f 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c

@@ -710,7 +710,7 @@
 	struct vfsmount *proc_mnt;
 	int err = -ENOENT;
 
-	proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt);
+	proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt);
 	if (IS_ERR(proc_mnt))
 		goto out;
 

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 484b8d1..dbf41f9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c

@@ -60,7 +60,6 @@
 EXPORT_SYMBOL(jbd2_journal_get_undo_access);
 EXPORT_SYMBOL(jbd2_journal_set_triggers);
 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
-EXPORT_SYMBOL(jbd2_journal_release_buffer);
 EXPORT_SYMBOL(jbd2_journal_forget);
 #if 0
 EXPORT_SYMBOL(journal_sync_buffer);

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d8da40e..42f6615 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c

@@ -1207,17 +1207,6 @@
 	return ret;
 }
 
-/*
- * jbd2_journal_release_buffer: undo a get_write_access without any buffer
- * updates, if the update decided in the end that it didn't need access.
- *
- */
-void
-jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
-{
-	BUFFER_TRACE(bh, "entry");
-}
-
 /**
  * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
  * @handle: transaction handle

diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 0c96eb5..0331072 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c

@@ -417,14 +417,16 @@
 			spin_unlock(&c->erase_completion_lock);
 
 			ret = jffs2_prealloc_raw_node_refs(c, jeb, 1);
-			if (ret)
-				return ret;
+
 			/* Just lock it again and continue. Nothing much can change because
 			   we hold c->alloc_sem anyway. In fact, it's not entirely clear why
 			   we hold c->erase_completion_lock in the majority of this function...
 			   but that's a question for another (more caffeine-rich) day. */
 			spin_lock(&c->erase_completion_lock);
 
+			if (ret)
+				return ret;
+
 			waste = jeb->free_size;
 			jffs2_link_node_ref(c, jeb,
 					    (jeb->offset + c->sector_size - waste) | REF_OBSOLETE,

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 9d3afd1..dd7442c 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c

@@ -119,9 +119,12 @@
 	    iattr->ia_size != i_size_read(inode)) {
 		inode_dio_wait(inode);
 
-		rc = vmtruncate(inode, iattr->ia_size);
+		rc = inode_newsize_ok(inode, iattr->ia_size);
 		if (rc)
 			return rc;
+
+		truncate_setsize(inode, iattr->ia_size);
+		jfs_truncate(inode);
 	}
 
 	setattr_copy(inode, iattr);
@@ -133,7 +136,6 @@
 }
 
 const struct inode_operations jfs_file_inode_operations = {
-	.truncate	= jfs_truncate,
 	.setxattr	= jfs_setxattr,
 	.getxattr	= jfs_getxattr,
 	.listxattr	= jfs_listxattr,

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 4692bf3..b7dc47b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c

@@ -300,6 +300,16 @@
 	return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);
 }
 
+static void jfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		jfs_truncate(inode);
+	}
+}
+
 static int jfs_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
@@ -308,11 +318,8 @@
 
 	ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
 				jfs_get_block);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		jfs_write_failed(mapping, pos + len);
 
 	return ret;
 }
@@ -326,6 +333,7 @@
 	const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
@@ -341,7 +349,7 @@
 		loff_t end = offset + iov_length(iov, nr_segs);
 
 		if (end > isize)
-			vmtruncate(inode, isize);
+			jfs_write_failed(mapping, end);
 	}
 
 	return ret;

diff --git a/fs/libfs.c b/fs/libfs.c
index 7cc37ca..916da8c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c

@@ -81,11 +81,11 @@
 	return 0;
 }
 
-loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
+loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	mutex_lock(&dentry->d_inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
@@ -369,8 +369,6 @@
 	struct inode *inode = dentry->d_inode;
 	int error;
 
-	WARN_ON_ONCE(inode->i_op->truncate);
-
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 13ad153..00ec0b9 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c

@@ -64,10 +64,6 @@
 {
 	const struct file_lock *fl = &lock->fl;
 
-	BUG_ON(fl->fl_start > NLM4_OFFSET_MAX);
-	BUG_ON(fl->fl_end > NLM4_OFFSET_MAX &&
-				fl->fl_end != OFFSET_MAX);
-
 	*l_offset = loff_t_to_s64(fl->fl_start);
 	if (fl->fl_end == OFFSET_MAX)
 		*l_len = 0;
@@ -122,7 +118,6 @@
 {
 	__be32 *p;
 
-	BUG_ON(length > XDR_MAX_NETOBJ);
 	p = xdr_reserve_space(xdr, 4 + length);
 	xdr_encode_opaque(p, data, length);
 }
@@ -156,7 +151,6 @@
 static void encode_cookie(struct xdr_stream *xdr,
 			  const struct nlm_cookie *cookie)
 {
-	BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
 	encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
 }
 
@@ -198,7 +192,6 @@
  */
 static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
 {
-	BUG_ON(fh->size > NFS3_FHSIZE);
 	encode_netobj(xdr, (u8 *)&fh->data, fh->size);
 }
 
@@ -336,7 +329,6 @@
 	u32 length = strlen(name);
 	__be32 *p;
 
-	BUG_ON(length > NLM_MAXSTRLEN);
 	p = xdr_reserve_space(xdr, 4 + length);
 	xdr_encode_opaque(p, name, length);
 }

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 05d2912..54f9e6c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c

@@ -141,7 +141,7 @@
 
 static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 {
-	BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
+	WARN_ON_ONCE(req->a_args.lock.fl.fl_ops != NULL);
 }
 
 /**
@@ -465,7 +465,6 @@
 
 static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host)
 {
-	BUG_ON(fl->fl_ops != NULL);
 	fl->fl_u.nfs_fl.state = 0;
 	fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
 	INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);

diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 982d267..9a55797 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c

@@ -60,10 +60,6 @@
 {
 	const struct file_lock *fl = &lock->fl;
 
-	BUG_ON(fl->fl_start > NLM_OFFSET_MAX);
-	BUG_ON(fl->fl_end > NLM_OFFSET_MAX &&
-				fl->fl_end != OFFSET_MAX);
-
 	*l_offset = loff_t_to_s32(fl->fl_start);
 	if (fl->fl_end == OFFSET_MAX)
 		*l_len = 0;
@@ -119,7 +115,6 @@
 {
 	__be32 *p;
 
-	BUG_ON(length > XDR_MAX_NETOBJ);
 	p = xdr_reserve_space(xdr, 4 + length);
 	xdr_encode_opaque(p, data, length);
 }
@@ -153,7 +148,6 @@
 static void encode_cookie(struct xdr_stream *xdr,
 			  const struct nlm_cookie *cookie)
 {
-	BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
 	encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
 }
 
@@ -195,7 +189,6 @@
  */
 static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
 {
-	BUG_ON(fh->size != NFS2_FHSIZE);
 	encode_netobj(xdr, (u8 *)&fh->data, NFS2_FHSIZE);
 }
 
@@ -330,7 +323,6 @@
 	u32 length = strlen(name);
 	__be32 *p;
 
-	BUG_ON(length > NLM_MAXSTRLEN);
 	p = xdr_reserve_space(xdr, 4 + length);
 	xdr_encode_opaque(p, name, length);
 }

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f9b22e5..0e17090 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c

@@ -177,9 +177,6 @@
 
 	dprintk("lockd: destroy host %s\n", host->h_name);
 
-	BUG_ON(!list_empty(&host->h_lockowners));
-	BUG_ON(atomic_read(&host->h_count));
-
 	hlist_del_init(&host->h_hash);
 
 	nsm_unmonitor(host);
@@ -289,13 +286,12 @@
 
 	dprintk("lockd: release client host %s\n", host->h_name);
 
-	BUG_ON(atomic_read(&host->h_count) < 0);
-	BUG_ON(host->h_server);
+	WARN_ON_ONCE(host->h_server);
 
 	if (atomic_dec_and_test(&host->h_count)) {
-		BUG_ON(!list_empty(&host->h_lockowners));
-		BUG_ON(!list_empty(&host->h_granted));
-		BUG_ON(!list_empty(&host->h_reclaim));
+		WARN_ON_ONCE(!list_empty(&host->h_lockowners));
+		WARN_ON_ONCE(!list_empty(&host->h_granted));
+		WARN_ON_ONCE(!list_empty(&host->h_reclaim));
 
 		mutex_lock(&nlm_host_mutex);
 		nlm_destroy_host_locked(host);
@@ -412,8 +408,7 @@
 
 	dprintk("lockd: release server host %s\n", host->h_name);
 
-	BUG_ON(atomic_read(&host->h_count) < 0);
-	BUG_ON(!host->h_server);
+	WARN_ON_ONCE(!host->h_server);
 	atomic_dec(&host->h_count);
 }
 

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3d7e09b..3c2cfc6 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c

@@ -154,8 +154,6 @@
 		.rpc_resp	= res,
 	};
 
-	BUG_ON(clnt == NULL);
-
 	memset(res, 0, sizeof(*res));
 
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
@@ -466,7 +464,6 @@
 	const u32 len = strlen(string);
 	__be32 *p;
 
-	BUG_ON(len > SM_MAXSTRLEN);
 	p = xdr_reserve_space(xdr, 4 + len);
 	xdr_encode_opaque(p, string, len);
 }

diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index e1a3b6b..9a59cba 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c

@@ -1887,9 +1887,15 @@
 		logfs_put_wblocks(sb, NULL, 1);
 	}
 
-	if (!err)
-		err = vmtruncate(inode, target);
+	if (!err) {
+		err = inode_newsize_ok(inode, target);
+		if (err)
+			goto out;
 
+		truncate_setsize(inode, target);
+	}
+
+ out:
 	/* I don't trust error recovery yet. */
 	WARN_ON(err);
 	return err;

diff --git a/fs/minix/file.c b/fs/minix/file.c
index 4493ce6..adc6f54 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c

@@ -34,9 +34,12 @@
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
+		error = inode_newsize_ok(inode, attr->ia_size);
 		if (error)
 			return error;
+
+		truncate_setsize(inode, attr->ia_size);
+		minix_truncate(inode);
 	}
 
 	setattr_copy(inode, attr);
@@ -45,7 +48,6 @@
 }
 
 const struct inode_operations minix_file_inode_operations = {
-	.truncate	= minix_truncate,
 	.setattr	= minix_setattr,
 	.getattr	= minix_getattr,
 };

diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 4fc5f8a..99541cc 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c

@@ -395,6 +395,16 @@
 	return __block_write_begin(page, pos, len, minix_get_block);
 }
 
+static void minix_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		minix_truncate(inode);
+	}
+}
+
 static int minix_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -403,11 +413,8 @@
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep,
 				minix_get_block);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		minix_write_failed(mapping, pos + len);
 
 	return ret;
 }

diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9..cd50079 100644
--- a/fs/mount.h
+++ b/fs/mount.h

@@ -4,8 +4,11 @@
 
 struct mnt_namespace {
 	atomic_t		count;
+	unsigned int		proc_inum;
 	struct mount *	root;
 	struct list_head	list;
+	struct user_namespace	*user_ns;
+	u64			seq;	/* Sequence number to prevent loops */
 	wait_queue_head_t poll;
 	int event;
 };

diff --git a/fs/namei.c b/fs/namei.c
index 5f4cdf3..43a97ee 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -1275,9 +1275,7 @@
 	*need_lookup = false;
 	dentry = d_lookup(dir, name);
 	if (dentry) {
-		if (d_need_lookup(dentry)) {
-			*need_lookup = true;
-		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
 			error = d_revalidate(dentry, flags);
 			if (unlikely(error <= 0)) {
 				if (error < 0) {
@@ -1383,8 +1381,6 @@
 			return -ECHILD;
 		nd->seq = seq;
 
-		if (unlikely(d_need_lookup(dentry)))
-			goto unlazy;
 		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
 			status = d_revalidate(dentry, nd->flags);
 			if (unlikely(status <= 0)) {
@@ -1410,11 +1406,6 @@
 	if (unlikely(!dentry))
 		goto need_lookup;
 
-	if (unlikely(d_need_lookup(dentry))) {
-		dput(dentry);
-		goto need_lookup;
-	}
-
 	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
 		status = d_revalidate(dentry, nd->flags);
 	if (unlikely(status <= 0)) {
@@ -1859,7 +1850,7 @@
 	if (flags & LOOKUP_ROOT) {
 		struct inode *inode = nd->root.dentry->d_inode;
 		if (*name) {
-			if (!inode->i_op->lookup)
+			if (!can_lookup(inode))
 				return -ENOTDIR;
 			retval = inode_permission(inode, MAY_EXEC);
 			if (retval)
@@ -1903,6 +1894,7 @@
 			get_fs_pwd(current->fs, &nd->path);
 		}
 	} else {
+		/* Caller must check execute permissions on the starting path component */
 		struct fd f = fdget_raw(dfd);
 		struct dentry *dentry;
 
@@ -1912,16 +1904,10 @@
 		dentry = f.file->f_path.dentry;
 
 		if (*name) {
-			if (!S_ISDIR(dentry->d_inode->i_mode)) {
+			if (!can_lookup(dentry->d_inode)) {
 				fdput(f);
 				return -ENOTDIR;
 			}
-
-			retval = inode_permission(dentry->d_inode, MAY_EXEC);
-			if (retval) {
-				fdput(f);
-				return retval;
-			}
 		}
 
 		nd->path = f.file->f_path;
@@ -2189,15 +2175,19 @@
  *     path-walking is complete.
  */
 static struct filename *
-user_path_parent(int dfd, const char __user *path, struct nameidata *nd)
+user_path_parent(int dfd, const char __user *path, struct nameidata *nd,
+		 unsigned int flags)
 {
 	struct filename *s = getname(path);
 	int error;
 
+	/* only LOOKUP_REVAL is allowed in extra flags */
+	flags &= LOOKUP_REVAL;
+
 	if (IS_ERR(s))
 		return s;
 
-	error = filename_lookup(dfd, s, LOOKUP_PARENT, nd);
+	error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd);
 	if (error) {
 		putname(s);
 		return ERR_PTR(error);
@@ -3044,12 +3034,22 @@
 	return file;
 }
 
-struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
+struct dentry *kern_path_create(int dfd, const char *pathname,
+				struct path *path, unsigned int lookup_flags)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 	struct nameidata nd;
 	int err2;
-	int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+	int error;
+	bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
+
+	/*
+	 * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
+	 * other flags passed in are ignored!
+	 */
+	lookup_flags &= LOOKUP_REVAL;
+
+	error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);
 	if (error)
 		return ERR_PTR(error);
 
@@ -3113,13 +3113,14 @@
 }
 EXPORT_SYMBOL(done_path_create);
 
-struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
+struct dentry *user_path_create(int dfd, const char __user *pathname,
+				struct path *path, unsigned int lookup_flags)
 {
 	struct filename *tmp = getname(pathname);
 	struct dentry *res;
 	if (IS_ERR(tmp))
 		return ERR_CAST(tmp);
-	res = kern_path_create(dfd, tmp->name, path, is_dir);
+	res = kern_path_create(dfd, tmp->name, path, lookup_flags);
 	putname(tmp);
 	return res;
 }
@@ -3175,12 +3176,13 @@
 	struct dentry *dentry;
 	struct path path;
 	int error;
+	unsigned int lookup_flags = 0;
 
 	error = may_mknod(mode);
 	if (error)
 		return error;
-
-	dentry = user_path_create(dfd, filename, &path, 0);
+retry:
+	dentry = user_path_create(dfd, filename, &path, lookup_flags);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -3203,6 +3205,10 @@
 	}
 out:
 	done_path_create(&path, dentry);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -3241,8 +3247,10 @@
 	struct dentry *dentry;
 	struct path path;
 	int error;
+	unsigned int lookup_flags = LOOKUP_DIRECTORY;
 
-	dentry = user_path_create(dfd, pathname, &path, 1);
+retry:
+	dentry = user_path_create(dfd, pathname, &path, lookup_flags);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -3252,6 +3260,10 @@
 	if (!error)
 		error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
 	done_path_create(&path, dentry);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -3327,8 +3339,9 @@
 	struct filename *name;
 	struct dentry *dentry;
 	struct nameidata nd;
-
-	name = user_path_parent(dfd, pathname, &nd);
+	unsigned int lookup_flags = 0;
+retry:
+	name = user_path_parent(dfd, pathname, &nd, lookup_flags);
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
@@ -3370,6 +3383,10 @@
 exit1:
 	path_put(&nd.path);
 	putname(name);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -3423,8 +3440,9 @@
 	struct dentry *dentry;
 	struct nameidata nd;
 	struct inode *inode = NULL;
-
-	name = user_path_parent(dfd, pathname, &nd);
+	unsigned int lookup_flags = 0;
+retry:
+	name = user_path_parent(dfd, pathname, &nd, lookup_flags);
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
@@ -3462,6 +3480,11 @@
 exit1:
 	path_put(&nd.path);
 	putname(name);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		inode = NULL;
+		goto retry;
+	}
 	return error;
 
 slashes:
@@ -3513,12 +3536,13 @@
 	struct filename *from;
 	struct dentry *dentry;
 	struct path path;
+	unsigned int lookup_flags = 0;
 
 	from = getname(oldname);
 	if (IS_ERR(from))
 		return PTR_ERR(from);
-
-	dentry = user_path_create(newdfd, newname, &path, 0);
+retry:
+	dentry = user_path_create(newdfd, newname, &path, lookup_flags);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto out_putname;
@@ -3527,6 +3551,10 @@
 	if (!error)
 		error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
 	done_path_create(&path, dentry);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 out_putname:
 	putname(from);
 	return error;
@@ -3613,12 +3641,13 @@
 
 	if (flags & AT_SYMLINK_FOLLOW)
 		how |= LOOKUP_FOLLOW;
-
+retry:
 	error = user_path_at(olddfd, oldname, how, &old_path);
 	if (error)
 		return error;
 
-	new_dentry = user_path_create(newdfd, newname, &new_path, 0);
+	new_dentry = user_path_create(newdfd, newname, &new_path,
+					(how & LOOKUP_REVAL));
 	error = PTR_ERR(new_dentry);
 	if (IS_ERR(new_dentry))
 		goto out;
@@ -3635,6 +3664,10 @@
 	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
 out_dput:
 	done_path_create(&new_path, new_dentry);
+	if (retry_estale(error, how)) {
+		how |= LOOKUP_REVAL;
+		goto retry;
+	}
 out:
 	path_put(&old_path);
 
@@ -3807,15 +3840,17 @@
 	struct nameidata oldnd, newnd;
 	struct filename *from;
 	struct filename *to;
+	unsigned int lookup_flags = 0;
+	bool should_retry = false;
 	int error;
-
-	from = user_path_parent(olddfd, oldname, &oldnd);
+retry:
+	from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
 	if (IS_ERR(from)) {
 		error = PTR_ERR(from);
 		goto exit;
 	}
 
-	to = user_path_parent(newdfd, newname, &newnd);
+	to = user_path_parent(newdfd, newname, &newnd, lookup_flags);
 	if (IS_ERR(to)) {
 		error = PTR_ERR(to);
 		goto exit1;
@@ -3887,11 +3922,18 @@
 	unlock_rename(new_dir, old_dir);
 	mnt_drop_write(oldnd.path.mnt);
 exit2:
+	if (retry_estale(error, lookup_flags))
+		should_retry = true;
 	path_put(&newnd.path);
 	putname(to);
 exit1:
 	path_put(&oldnd.path);
 	putname(from);
+	if (should_retry) {
+		should_retry = false;
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 exit:
 	return error;
 }

diff --git a/fs/namespace.c b/fs/namespace.c
index 2496062..55605c5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c

@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/capability.h>
 #include <linux/mnt_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/idr.h>
@@ -20,6 +21,7 @@
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
 #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 #include "pnode.h"
 #include "internal.h"
 
@@ -311,7 +313,7 @@
 	 * incremented count after it has set MNT_WRITE_HOLD.
 	 */
 	smp_mb();
-	while (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
+	while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
 		cpu_relax();
 	/*
 	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
@@ -784,7 +786,7 @@
 	if (!mnt)
 		return ERR_PTR(-ENOMEM);
 
-	if (flag & (CL_SLAVE | CL_PRIVATE))
+	if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
 		mnt->mnt_group_id = 0; /* not a peer of original */
 	else
 		mnt->mnt_group_id = old->mnt_group_id;
@@ -805,7 +807,8 @@
 	list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
 	br_write_unlock(&vfsmount_lock);
 
-	if (flag & CL_SLAVE) {
+	if ((flag & CL_SLAVE) ||
+	    ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
 		list_add(&mnt->mnt_slave, &old->mnt_slave_list);
 		mnt->mnt_master = old;
 		CLEAR_MNT_SHARED(mnt);
@@ -1266,7 +1269,7 @@
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(mnt, flags);
@@ -1292,7 +1295,7 @@
 
 static int mount_is_safe(struct path *path)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -1308,6 +1311,26 @@
 #endif
 }
 
+static bool mnt_ns_loop(struct path *path)
+{
+	/* Could bind mounting the mount namespace inode cause a
+	 * mount namespace loop?
+	 */
+	struct inode *inode = path->dentry->d_inode;
+	struct proc_inode *ei;
+	struct mnt_namespace *mnt_ns;
+
+	if (!proc_ns_inode(inode))
+		return false;
+
+	ei = PROC_I(inode);
+	if (ei->ns_ops != &mntns_operations)
+		return false;
+
+	mnt_ns = ei->ns;
+	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+}
+
 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 					int flag)
 {
@@ -1610,7 +1633,7 @@
 	int type;
 	int err = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (path->dentry != path->mnt->mnt_root)
@@ -1655,6 +1678,10 @@
 	if (err)
 		return err;
 
+	err = -EINVAL;
+	if (mnt_ns_loop(&old_path))
+		goto out; 
+
 	err = lock_mount(path);
 	if (err)
 		goto out;
@@ -1770,7 +1797,7 @@
 	struct mount *p;
 	struct mount *old;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -1857,21 +1884,6 @@
 	return ERR_PTR(err);
 }
 
-static struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
-{
-	struct file_system_type *type = get_fs_type(fstype);
-	struct vfsmount *mnt;
-	if (!type)
-		return ERR_PTR(-ENODEV);
-	mnt = vfs_kern_mount(type, flags, name, data);
-	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
-	    !mnt->mnt_sb->s_subtype)
-		mnt = fs_set_subtype(mnt, fstype);
-	put_filesystem(type);
-	return mnt;
-}
-
 /*
  * add a mount into a namespace's mount tree
  */
@@ -1917,20 +1929,46 @@
  * create a new mount for userspace and request it to be added into the
  * namespace's tree
  */
-static int do_new_mount(struct path *path, const char *type, int flags,
+static int do_new_mount(struct path *path, const char *fstype, int flags,
 			int mnt_flags, const char *name, void *data)
 {
+	struct file_system_type *type;
+	struct user_namespace *user_ns;
 	struct vfsmount *mnt;
 	int err;
 
-	if (!type)
+	if (!fstype)
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	mnt = do_kern_mount(type, flags, name, data);
+	type = get_fs_type(fstype);
+	if (!type)
+		return -ENODEV;
+
+	if (user_ns != &init_user_ns) {
+		if (!(type->fs_flags & FS_USERNS_MOUNT)) {
+			put_filesystem(type);
+			return -EPERM;
+		}
+		/* Only in special cases allow devices from mounts
+		 * created outside the initial user namespace.
+		 */
+		if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+			flags |= MS_NODEV;
+			mnt_flags |= MNT_NODEV;
+		}
+	}
+
+	mnt = vfs_kern_mount(type, flags, name, data);
+	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+	    !mnt->mnt_sb->s_subtype)
+		mnt = fs_set_subtype(mnt, fstype);
+
+	put_filesystem(type);
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -2261,18 +2299,42 @@
 	return retval;
 }
 
-static struct mnt_namespace *alloc_mnt_ns(void)
+static void free_mnt_ns(struct mnt_namespace *ns)
+{
+	proc_free_inum(ns->proc_inum);
+	put_user_ns(ns->user_ns);
+	kfree(ns);
+}
+
+/*
+ * Assign a sequence number so we can detect when we attempt to bind
+ * mount a reference to an older mount namespace into the current
+ * mount namespace, preventing reference counting loops.  A 64bit
+ * number incrementing at 10Ghz will take 12,427 years to wrap which
+ * is effectively never, so we can ignore the possibility.
+ */
+static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
+
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 {
 	struct mnt_namespace *new_ns;
+	int ret;
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
+	ret = proc_alloc_inum(&new_ns->proc_inum);
+	if (ret) {
+		kfree(new_ns);
+		return ERR_PTR(ret);
+	}
+	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
 	new_ns->root = NULL;
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
 	new_ns->event = 0;
+	new_ns->user_ns = get_user_ns(user_ns);
 	return new_ns;
 }
 
@@ -2281,24 +2343,28 @@
  * copied from the namespace of the passed in task structure.
  */
 static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
-		struct fs_struct *fs)
+		struct user_namespace *user_ns, struct fs_struct *fs)
 {
 	struct mnt_namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
 	struct mount *p, *q;
 	struct mount *old = mnt_ns->root;
 	struct mount *new;
+	int copy_flags;
 
-	new_ns = alloc_mnt_ns();
+	new_ns = alloc_mnt_ns(user_ns);
 	if (IS_ERR(new_ns))
 		return new_ns;
 
 	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
-	new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
+	copy_flags = CL_COPY_ALL | CL_EXPIRE;
+	if (user_ns != mnt_ns->user_ns)
+		copy_flags |= CL_SHARED_TO_SLAVE;
+	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
 	if (IS_ERR(new)) {
 		up_write(&namespace_sem);
-		kfree(new_ns);
+		free_mnt_ns(new_ns);
 		return ERR_CAST(new);
 	}
 	new_ns->root = new;
@@ -2339,7 +2405,7 @@
 }
 
 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
-		struct fs_struct *new_fs)
+		struct user_namespace *user_ns, struct fs_struct *new_fs)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2349,7 +2415,7 @@
 	if (!(flags & CLONE_NEWNS))
 		return ns;
 
-	new_ns = dup_mnt_ns(ns, new_fs);
+	new_ns = dup_mnt_ns(ns, user_ns, new_fs);
 
 	put_mnt_ns(ns);
 	return new_ns;
@@ -2361,7 +2427,7 @@
  */
 static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 {
-	struct mnt_namespace *new_ns = alloc_mnt_ns();
+	struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
 	if (!IS_ERR(new_ns)) {
 		struct mount *mnt = real_mount(m);
 		mnt->mnt_ns = new_ns;
@@ -2501,7 +2567,7 @@
 	struct mount *new_mnt, *root_mnt;
 	int error;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	error = user_path_dir(new_root, &new);
@@ -2583,8 +2649,13 @@
 	struct vfsmount *mnt;
 	struct mnt_namespace *ns;
 	struct path root;
+	struct file_system_type *type;
 
-	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
+	type = get_fs_type("rootfs");
+	if (!type)
+		panic("Can't find rootfs type");
+	mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
+	put_filesystem(type);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
 
@@ -2647,7 +2718,7 @@
 	br_write_unlock(&vfsmount_lock);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
-	kfree(ns);
+	free_mnt_ns(ns);
 }
 
 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
@@ -2681,3 +2752,72 @@
 {
 	return check_mnt(real_mount(mnt));
 }
+
+static void *mntns_get(struct task_struct *task)
+{
+	struct mnt_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy) {
+		ns = nsproxy->mnt_ns;
+		get_mnt_ns(ns);
+	}
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void mntns_put(void *ns)
+{
+	put_mnt_ns(ns);
+}
+
+static int mntns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct fs_struct *fs = current->fs;
+	struct mnt_namespace *mnt_ns = ns;
+	struct path root;
+
+	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
+	    !nsown_capable(CAP_SYS_CHROOT) ||
+	    !nsown_capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (fs->users != 1)
+		return -EINVAL;
+
+	get_mnt_ns(mnt_ns);
+	put_mnt_ns(nsproxy->mnt_ns);
+	nsproxy->mnt_ns = mnt_ns;
+
+	/* Find the root */
+	root.mnt    = &mnt_ns->root->mnt;
+	root.dentry = mnt_ns->root->mnt.mnt_root;
+	path_get(&root);
+	while(d_mountpoint(root.dentry) && follow_down_one(&root))
+		;
+
+	/* Update the pwd and root */
+	set_fs_pwd(fs, &root);
+	set_fs_root(fs, &root);
+
+	path_put(&root);
+	return 0;
+}
+
+static unsigned int mntns_inum(void *ns)
+{
+	struct mnt_namespace *mnt_ns = ns;
+	return mnt_ns->proc_inum;
+}
+
+const struct proc_ns_operations mntns_operations = {
+	.name		= "mnt",
+	.type		= CLONE_NEWNS,
+	.get		= mntns_get,
+	.put		= mntns_put,
+	.install	= mntns_install,
+	.inum		= mntns_inum,
+};

diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d7e9fe7..1acdad7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c

@@ -976,9 +976,7 @@
 			goto out;
 
 		if (attr->ia_size != i_size_read(inode)) {
-			result = vmtruncate(inode, attr->ia_size);
-			if (result)
-				goto out;
+			truncate_setsize(inode, attr->ia_size);
 			mark_inode_dirty(inode);
 		}
 	}

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b7db608..cce2c05 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile

@@ -24,7 +24,7 @@
 	  delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
 	  nfs4namespace.o nfs4getroot.o nfs4client.o
 nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
-nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o
+nfsv4-$(CONFIG_NFS_V4_1)	+= nfs4session.o pnfs.o pnfs_dev.o
 
 obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
 nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f1027b0..4fa788c 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c

@@ -40,6 +40,7 @@
 #include <linux/pagevec.h>
 
 #include "../pnfs.h"
+#include "../nfs4session.h"
 #include "../internal.h"
 #include "blocklayout.h"
 

diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index dded263..862a2f1 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c

@@ -118,7 +118,6 @@
 	struct dentry *dir;
 
 	dir = rpc_d_lookup_sb(sb, "cache");
-	BUG_ON(dir == NULL);
 	ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd);
 	dput(dir);
 	return ret;

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 4251c2ae..efd54f0a 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h

@@ -142,7 +142,7 @@
 
 struct cb_recallslotargs {
 	struct sockaddr	*crsa_addr;
-	uint32_t	crsa_target_max_slots;
+	uint32_t	crsa_target_highest_slotid;
 };
 extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
 					 void *dummy,
@@ -167,8 +167,6 @@
 	struct cb_layoutrecallargs *args,
 	void *dummy, struct cb_process_state *cps);
 
-extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
-
 struct cb_devicenotifyitem {
 	uint32_t		cbd_notify_type;
 	uint32_t		cbd_layout_type;

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 76b4a7a..c89b26b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c

@@ -14,6 +14,7 @@
 #include "delegation.h"
 #include "internal.h"
 #include "pnfs.h"
+#include "nfs4session.h"
 
 #ifdef NFS_DEBUG
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -216,7 +217,6 @@
 			}
 			pnfs_get_layout_hdr(lo);
 			spin_unlock(&ino->i_lock);
-			BUG_ON(!list_empty(&lo->plh_bulk_recall));
 			list_add(&lo->plh_bulk_recall, &recall_list);
 		}
 	}
@@ -562,23 +562,16 @@
 	if (!cps->clp) /* set in cb_sequence */
 		goto out;
 
-	dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
+	dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %d\n",
 		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
-		args->crsa_target_max_slots);
+		args->crsa_target_highest_slotid);
 
 	fc_tbl = &cps->clp->cl_session->fc_slot_table;
 
-	status = htonl(NFS4ERR_BAD_HIGH_SLOT);
-	if (args->crsa_target_max_slots > fc_tbl->max_slots ||
-	    args->crsa_target_max_slots < 1)
-		goto out;
-
 	status = htonl(NFS4_OK);
-	if (args->crsa_target_max_slots == fc_tbl->max_slots)
-		goto out;
 
-	fc_tbl->target_max_slots = args->crsa_target_max_slots;
-	nfs41_handle_recall_slot(cps->clp);
+	nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid);
+	nfs41_server_notify_target_slotid_update(cps->clp);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 742ff4f..59461c9 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c

@@ -16,6 +16,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "internal.h"
+#include "nfs4session.h"
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
 #define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
@@ -520,7 +521,7 @@
 	p = read_buf(xdr, 4);
 	if (unlikely(p == NULL))
 		return htonl(NFS4ERR_BADXDR);
-	args->crsa_target_max_slots = ntohl(*p++);
+	args->crsa_target_highest_slotid = ntohl(*p++);
 	return 0;
 }
 
@@ -762,7 +763,7 @@
 	 * A single slot, so highest used slotid is either 0 or -1
 	 */
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	nfs4_check_drain_bc_complete(session);
+	nfs4_session_drain_complete(session, tbl);
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8b39a42..9f3c664 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c

@@ -277,7 +277,7 @@
 		nfs_cb_idr_remove_locked(clp);
 		spin_unlock(&nn->nfs_client_lock);
 
-		BUG_ON(!list_empty(&clp->cl_superblocks));
+		WARN_ON_ONCE(!list_empty(&clp->cl_superblocks));
 
 		clp->rpc_ops->free_client(clp);
 	}
@@ -615,8 +615,7 @@
  */
 static void nfs_destroy_server(struct nfs_server *server)
 {
-	if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
-			!(server->flags & NFS_MOUNT_LOCAL_FCNTL))
+	if (server->nlm_host)
 		nlmclnt_done(server->nlm_host);
 }
 
@@ -1061,10 +1060,6 @@
 	if (error < 0)
 		goto error;
 
-	BUG_ON(!server->nfs_client);
-	BUG_ON(!server->nfs_client->rpc_ops);
-	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
 	/* Probe the root fh to retrieve its FSID */
 	error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr);
 	if (error < 0)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b9e66b7..32e6c53 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c

@@ -871,7 +871,7 @@
 	return res;
 }
 
-static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
+static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
@@ -880,10 +880,10 @@
 	dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name,
-			offset, origin);
+			offset, whence);
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += filp->f_pos;
 		case 0:
@@ -979,10 +979,11 @@
  * particular file and the "nocto" mount flag is not set.
  *
  */
-static inline
+static
 int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
+	int ret;
 
 	if (IS_AUTOMOUNT(inode))
 		return 0;
@@ -993,9 +994,13 @@
 	if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) &&
 	    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
 		goto out_force;
-	return 0;
+out:
+	return (inode->i_nlink == 0) ? -ENOENT : 0;
 out_force:
-	return __nfs_revalidate_inode(server, inode);
+	ret = __nfs_revalidate_inode(server, inode);
+	if (ret != 0)
+		return ret;
+	goto out;
 }
 
 /*
@@ -1156,11 +1161,14 @@
 
 }
 
+/* Ensure that we revalidate inode->i_nlink */
 static void nfs_drop_nlink(struct inode *inode)
 {
 	spin_lock(&inode->i_lock);
-	if (inode->i_nlink > 0)
-		drop_nlink(inode);
+	/* drop the inode if we're reasonably sure this is the last link */
+	if (inode->i_nlink == 1)
+		clear_nlink(inode);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
 	spin_unlock(&inode->i_lock);
 }
 
@@ -1175,8 +1183,8 @@
 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
 
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
-		drop_nlink(inode);
 		nfs_complete_unlink(dentry, inode);
+		nfs_drop_nlink(inode);
 	}
 	iput(inode);
 }
@@ -1647,10 +1655,8 @@
 	if (inode != NULL) {
 		NFS_PROTO(inode)->return_delegation(inode);
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
-		/* The VFS may want to delete this inode */
 		if (error == 0)
 			nfs_drop_nlink(inode);
-		nfs_mark_for_revalidate(inode);
 	} else
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
 	if (error == -ENOENT)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index cae26cb..0bd7a55 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c

@@ -266,21 +266,8 @@
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
 
-		if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
-			if (bytes > hdr->good_bytes)
-				zero_user(page, 0, PAGE_SIZE);
-			else if (hdr->good_bytes - bytes < PAGE_SIZE)
-				zero_user_segment(page,
-					hdr->good_bytes & ~PAGE_MASK,
-					PAGE_SIZE);
-		}
-		if (!PageCompound(page)) {
-			if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
-				if (bytes < hdr->good_bytes)
-					set_page_dirty(page);
-			} else
-				set_page_dirty(page);
-		}
+		if (!PageCompound(page) && bytes < hdr->good_bytes)
+			set_page_dirty(page);
 		bytes += req->wb_bytes;
 		nfs_list_remove_request(req);
 		nfs_direct_readpage_release(req);

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 582bb88..3c2b893 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c

@@ -119,18 +119,18 @@
 	return __nfs_revalidate_inode(server, inode);
 }
 
-loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
+loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
 {
 	dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
 			filp->f_path.dentry->d_parent->d_name.name,
 			filp->f_path.dentry->d_name.name,
-			offset, origin);
+			offset, whence);
 
 	/*
-	 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
 	 * the cached file length
 	 */
-	if (origin != SEEK_SET && origin != SEEK_CUR) {
+	if (whence != SEEK_SET && whence != SEEK_CUR) {
 		struct inode *inode = filp->f_mapping->host;
 
 		int retval = nfs_revalidate_file_size(inode, filp);
@@ -138,7 +138,7 @@
 			return (loff_t)retval;
 	}
 
-	return generic_file_llseek(filp, offset, origin);
+	return generic_file_llseek(filp, offset, whence);
 }
 EXPORT_SYMBOL_GPL(nfs_file_llseek);
 

diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index c817787..24d1d1c 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c

@@ -307,6 +307,7 @@
 		nfs_fscache_inode_unlock(inode);
 	}
 }
+EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie);
 
 /*
  * Replace a per-inode cookie due to revalidation detecting a file having

diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index c5b11b5..4ecb766 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h

@@ -153,6 +153,22 @@
 }
 
 /*
+ * Invalidate the contents of fscache for this inode.  This will not sleep.
+ */
+static inline void nfs_fscache_invalidate(struct inode *inode)
+{
+	fscache_invalidate(NFS_I(inode)->fscache);
+}
+
+/*
+ * Wait for an object to finish being invalidated.
+ */
+static inline void nfs_fscache_wait_on_invalidate(struct inode *inode)
+{
+	fscache_wait_on_invalidate(NFS_I(inode)->fscache);
+}
+
+/*
  * indicate the client caching state as readable text
  */
 static inline const char *nfs_server_fscache_state(struct nfs_server *server)
@@ -162,7 +178,6 @@
 	return "no ";
 }
 
-
 #else /* CONFIG_NFS_FSCACHE */
 static inline int nfs_fscache_register(void) { return 0; }
 static inline void nfs_fscache_unregister(void) {}
@@ -205,6 +220,10 @@
 static inline void nfs_readpage_to_fscache(struct inode *inode,
 					   struct page *page, int sync) {}
 
+
+static inline void nfs_fscache_invalidate(struct inode *inode) {}
+static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) {}
+
 static inline const char *nfs_server_fscache_state(struct nfs_server *server)
 {
 	return "no ";

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 9cc4a3f..bc3968fa 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c

@@ -193,19 +193,15 @@
 	if (!cred)
 		return -ENOMEM;
 
-	keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred,
-			     (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-			     KEY_USR_VIEW | KEY_USR_READ,
-			     KEY_ALLOC_NOT_IN_QUOTA);
+	keyring = keyring_alloc(".id_resolver", 0, 0, cred,
+				(KEY_POS_ALL & ~KEY_POS_SETATTR) |
+				KEY_USR_VIEW | KEY_USR_READ,
+				KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto failed_put_cred;
 	}
 
-	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
-	if (ret < 0)
-		goto failed_put_key;
-
 	ret = register_key_type(&key_type_id_resolver);
 	if (ret < 0)
 		goto failed_put_key;

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6fa01ae..ebeb94c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c

@@ -107,13 +107,19 @@
 	return ino;
 }
 
+int nfs_drop_inode(struct inode *inode)
+{
+	return NFS_STALE(inode) || generic_drop_inode(inode);
+}
+EXPORT_SYMBOL_GPL(nfs_drop_inode);
+
 void nfs_clear_inode(struct inode *inode)
 {
 	/*
 	 * The following should never happen...
 	 */
-	BUG_ON(nfs_have_writebacks(inode));
-	BUG_ON(!list_empty(&NFS_I(inode)->open_files));
+	WARN_ON_ONCE(nfs_have_writebacks(inode));
+	WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files));
 	nfs_zap_acl_cache(inode);
 	nfs_access_zap_cache(inode);
 	nfs_fscache_release_inode_cookie(inode);
@@ -155,10 +161,12 @@
 	nfsi->attrtimeo_timestamp = jiffies;
 
 	memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
-	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
+	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
-	else
+		nfs_fscache_invalidate(inode);
+	} else {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+	}
 }
 
 void nfs_zap_caches(struct inode *inode)
@@ -173,6 +181,7 @@
 	if (mapping->nrpages != 0) {
 		spin_lock(&inode->i_lock);
 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
+		nfs_fscache_invalidate(inode);
 		spin_unlock(&inode->i_lock);
 	}
 }
@@ -875,7 +884,7 @@
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
 	spin_unlock(&inode->i_lock);
 	nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
-	nfs_fscache_reset_inode_cookie(inode);
+	nfs_fscache_wait_on_invalidate(inode);
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
 			inode->i_sb->s_id, (long long)NFS_FILEID(inode));
 	return 0;
@@ -951,6 +960,10 @@
 		i_size_write(inode, nfs_size_to_loff_t(fattr->size));
 		ret |= NFS_INO_INVALID_ATTR;
 	}
+
+	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+		nfs_fscache_invalidate(inode);
+
 	return ret;
 }
 
@@ -1199,8 +1212,10 @@
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
-	if (S_ISDIR(inode->i_mode))
+	if (S_ISDIR(inode->i_mode)) {
 		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+		nfs_fscache_invalidate(inode);
+	}
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		return 0;
 	return nfs_refresh_inode_locked(inode, fattr);
@@ -1488,6 +1503,9 @@
 			(save_cache_validity & NFS_INO_REVAL_FORCED))
 		nfsi->cache_validity |= invalid;
 
+	if (invalid & NFS_INO_INVALID_DATA)
+		nfs_fscache_invalidate(inode);
+
 	return 0;
  out_err:
 	/*

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 05521ca..f0e6c7d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -18,27 +18,6 @@
  */
 #define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
 
-/*
- * Determine if sessions are in use.
- */
-static inline int nfs4_has_session(const struct nfs_client *clp)
-{
-#ifdef CONFIG_NFS_V4_1
-	if (clp->cl_session)
-		return 1;
-#endif /* CONFIG_NFS_V4_1 */
-	return 0;
-}
-
-static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
-{
-#ifdef CONFIG_NFS_V4_1
-	if (nfs4_has_session(clp))
-		return (clp->cl_session->flags & SESSION4_PERSIST);
-#endif /* CONFIG_NFS_V4_1 */
-	return 0;
-}
-
 static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
 {
 	if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
@@ -276,8 +255,6 @@
 extern struct rpc_procinfo nfs4_procedures[];
 #endif
 
-extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
-
 /* proc.c */
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
@@ -319,6 +296,7 @@
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
 extern int nfs_write_inode(struct inode *, struct writeback_control *);
+extern int nfs_drop_inode(struct inode *);
 extern void nfs_clear_inode(struct inode *);
 extern void nfs_evict_inode(struct inode *);
 void nfs_zap_acl_cache(struct inode *inode);
@@ -386,9 +364,6 @@
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
 			      struct nfs_pgio_header *hdr);
-extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode,
-			const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
@@ -411,9 +386,6 @@
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
-extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode, int ioflags,
-			const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_commit_data *p);
@@ -474,18 +446,6 @@
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr,
 			    rpc_authflavor_t authflavour);
-extern int _nfs4_call_sync(struct rpc_clnt *clnt,
-			   struct nfs_server *server,
-			   struct rpc_message *msg,
-			   struct nfs4_sequence_args *args,
-			   struct nfs4_sequence_res *res,
-			   int cache_reply);
-extern int _nfs4_call_sync_session(struct rpc_clnt *clnt,
-				   struct nfs_server *server,
-				   struct rpc_message *msg,
-				   struct nfs4_sequence_args *args,
-				   struct nfs4_sequence_res *res,
-				   int cache_reply);
 extern int nfs40_walk_client_list(struct nfs_client *clp,
 				struct nfs_client **result,
 				struct rpc_cred *cred);

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 015f71f..91a6faf 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c

@@ -169,6 +169,9 @@
 		(info->hostname ? info->hostname : "server"),
 			info->dirpath);
 
+	if (strlen(info->dirpath) > MNTPATHLEN)
+		return -ENAMETOOLONG;
+
 	if (info->noresvport)
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
@@ -242,6 +245,9 @@
 	struct rpc_clnt *clnt;
 	int status;
 
+	if (strlen(info->dirpath) > MNTPATHLEN)
+		return;
+
 	if (info->noresvport)
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
@@ -283,7 +289,6 @@
 	const u32 pathname_len = strlen(pathname);
 	__be32 *p;
 
-	BUG_ON(pathname_len > MNTPATHLEN);
 	p = xdr_reserve_space(xdr, 4 + pathname_len);
 	xdr_encode_opaque(p, pathname, pathname_len);
 }

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index d04f0df..06b9df4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c

@@ -195,7 +195,6 @@
 {
 	__be32 *p;
 
-	BUG_ON(fh->size != NFS2_FHSIZE);
 	p = xdr_reserve_space(xdr, NFS2_FHSIZE);
 	memcpy(p, fh->data, NFS2_FHSIZE);
 }
@@ -388,7 +387,7 @@
 {
 	__be32 *p;
 
-	BUG_ON(length > NFS2_MAXNAMLEN);
+	WARN_ON_ONCE(length > NFS2_MAXNAMLEN);
 	p = xdr_reserve_space(xdr, 4 + length);
 	xdr_encode_opaque(p, name, length);
 }
@@ -428,7 +427,6 @@
 {
 	__be32 *p;
 
-	BUG_ON(length > NFS2_MAXPATHLEN);
 	p = xdr_reserve_space(xdr, 4);
 	*p = cpu_to_be32(length);
 	xdr_write_pages(xdr, pages, 0, length);

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 6932209..70efb63 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c

@@ -24,14 +24,14 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
-/* A wrapper to handle the EJUKEBOX and EKEYEXPIRED error messages */
+/* A wrapper to handle the EJUKEBOX error messages */
 static int
 nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 {
 	int res;
 	do {
 		res = rpc_call_sync(clnt, msg, flags);
-		if (res != -EJUKEBOX && res != -EKEYEXPIRED)
+		if (res != -EJUKEBOX)
 			break;
 		freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
@@ -44,7 +44,7 @@
 static int
 nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
-	if (task->tk_status != -EJUKEBOX && task->tk_status != -EKEYEXPIRED)
+	if (task->tk_status != -EJUKEBOX)
 		return 0;
 	if (task->tk_status == -EJUKEBOX)
 		nfs_inc_stats(inode, NFSIOS_DELAY);

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 6cbe894..bffc324 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c

@@ -198,7 +198,7 @@
 {
 	__be32 *p;
 
-	BUG_ON(length > NFS3_MAXNAMLEN);
+	WARN_ON_ONCE(length > NFS3_MAXNAMLEN);
 	p = xdr_reserve_space(xdr, 4 + length);
 	xdr_encode_opaque(p, name, length);
 }
@@ -238,7 +238,6 @@
 static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
 			    const u32 length)
 {
-	BUG_ON(length > NFS3_MAXPATHLEN);
 	encode_uint32(xdr, length);
 	xdr_write_pages(xdr, pages, 0, length);
 }
@@ -388,7 +387,6 @@
  */
 static void encode_ftype3(struct xdr_stream *xdr, const u32 type)
 {
-	BUG_ON(type > NF3FIFO);
 	encode_uint32(xdr, type);
 }
 
@@ -443,7 +441,7 @@
 {
 	__be32 *p;
 
-	BUG_ON(fh->size > NFS3_FHSIZE);
+	WARN_ON_ONCE(fh->size > NFS3_FHSIZE);
 	p = xdr_reserve_space(xdr, 4 + fh->size);
 	xdr_encode_opaque(p, fh->data, fh->size);
 }
@@ -1339,6 +1337,7 @@
 	error = nfsacl_encode(xdr->buf, base, args->inode,
 			    (args->mask & NFS_ACL) ?
 			    args->acl_access : NULL, 1, 0);
+	/* FIXME: this is just broken */
 	BUG_ON(error < 0);
 	error = nfsacl_encode(xdr->buf, base + error, args->inode,
 			    (args->mask & NFS_DFACL) ?

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a525fde..a3f488b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h

@@ -11,6 +11,8 @@
 
 #if IS_ENABLED(CONFIG_NFS_V4)
 
+#define NFS4_MAX_LOOP_ON_RECOVER (10)
+
 struct idmap;
 
 enum nfs4_client_state {
@@ -21,18 +23,12 @@
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_DELEGRETURN,
 	NFS4CLNT_SESSION_RESET,
-	NFS4CLNT_RECALL_SLOT,
 	NFS4CLNT_LEASE_CONFIRM,
 	NFS4CLNT_SERVER_SCOPE_MISMATCH,
 	NFS4CLNT_PURGE_STATE,
 	NFS4CLNT_BIND_CONN_TO_SESSION,
 };
 
-enum nfs4_session_state {
-	NFS4_SESSION_INITING,
-	NFS4_SESSION_DRAINING,
-};
-
 #define NFS4_RENEW_TIMEOUT		0x01
 #define NFS4_RENEW_DELEGATION_CB	0x02
 
@@ -43,8 +39,7 @@
 			struct nfs_server *server,
 			struct rpc_message *msg,
 			struct nfs4_sequence_args *args,
-			struct nfs4_sequence_res *res,
-			int cache_reply);
+			struct nfs4_sequence_res *res);
 	bool	(*match_stateid)(const nfs4_stateid *,
 			const nfs4_stateid *);
 	int	(*find_root_sec)(struct nfs_server *, struct nfs_fh *,
@@ -241,18 +236,14 @@
 	return server->nfs_client->cl_session;
 }
 
-extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy);
 extern int nfs4_setup_sequence(const struct nfs_server *server,
 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
 		struct rpc_task *task);
 extern int nfs41_setup_sequence(struct nfs4_session *session,
 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
 		struct rpc_task *task);
-extern void nfs4_destroy_session(struct nfs4_session *session);
-extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
 extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
-extern int nfs4_init_session(struct nfs_server *server);
 extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
 		struct nfs_fsinfo *fsinfo);
 extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
@@ -280,11 +271,7 @@
 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
 		struct rpc_task *task)
 {
-	return 0;
-}
-
-static inline int nfs4_init_session(struct nfs_server *server)
-{
+	rpc_call_start(task);
 	return 0;
 }
 
@@ -321,17 +308,20 @@
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp);
+struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
 struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
 int nfs4_discover_server_trunking(struct nfs_client *clp,
 			struct nfs_client **);
 int nfs40_discover_server_trunking(struct nfs_client *clp,
 			struct nfs_client **, struct rpc_cred *);
 #if defined(CONFIG_NFS_V4_1)
-struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
 struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
 int nfs41_discover_server_trunking(struct nfs_client *clp,
 			struct nfs_client **, struct rpc_cred *);
 extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
+extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp);
+extern void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp);
+
 #else
 static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
 {
@@ -349,11 +339,12 @@
 extern void nfs_inode_find_state_and_recover(struct inode *inode,
 		const nfs4_stateid *stateid);
 extern void nfs4_schedule_lease_recovery(struct nfs_client *);
+extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
+extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
 extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
 extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
-extern void nfs41_handle_recall_slot(struct nfs_client *clp);
 extern void nfs41_handle_server_scope(struct nfs_client *,
 				      struct nfs41_server_scope **);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 6bacfde..acc3472 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c

@@ -12,6 +12,7 @@
 #include "internal.h"
 #include "callback.h"
 #include "delegation.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
 
@@ -713,10 +714,6 @@
 	struct nfs_fattr *fattr;
 	int error;
 
-	BUG_ON(!server->nfs_client);
-	BUG_ON(!server->nfs_client->rpc_ops);
-	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
 	/* data servers support only a subset of NFSv4.1 */
 	if (is_ds_only_client(server->nfs_client))
 		return -EPROTONOSUPPORT;

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index afddd66..08ddccc 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c

@@ -5,6 +5,7 @@
  */
 #include <linux/nfs_fs.h>
 #include "internal.h"
+#include "fscache.h"
 #include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
@@ -20,7 +21,6 @@
 	struct iattr attr;
 	int err;
 
-	BUG_ON(inode != dentry->d_inode);
 	/*
 	 * If no cached dentry exists or if it's negative, NFSv4 handled the
 	 * opens in ->lookup() or ->create().
@@ -75,6 +75,7 @@
 
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	nfs_file_set_open_context(filp, ctx);
+	nfs_fscache_set_inode_cookie(inode, filp);
 	err = 0;
 
 out_put_ctx:

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 2e45fd9..194c484 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c

@@ -35,6 +35,7 @@
 
 #include <linux/sunrpc/metrics.h>
 
+#include "nfs4session.h"
 #include "internal.h"
 #include "delegation.h"
 #include "nfs4filelayout.h"
@@ -178,7 +179,6 @@
 		break;
 	case -NFS4ERR_DELAY:
 	case -NFS4ERR_GRACE:
-	case -EKEYEXPIRED:
 		rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
 		break;
 	case -NFS4ERR_RETRY_UNCACHED_REP:
@@ -306,12 +306,10 @@
 	}
 	rdata->read_done_cb = filelayout_read_done_cb;
 
-	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
-				&rdata->args.seq_args, &rdata->res.seq_res,
-				task))
-		return;
-
-	rpc_call_start(task);
+	nfs41_setup_sequence(rdata->ds_clp->cl_session,
+			&rdata->args.seq_args,
+			&rdata->res.seq_res,
+			task);
 }
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
@@ -408,12 +406,10 @@
 		rpc_exit(task, 0);
 		return;
 	}
-	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
-				&wdata->args.seq_args, &wdata->res.seq_res,
-				task))
-		return;
-
-	rpc_call_start(task);
+	nfs41_setup_sequence(wdata->ds_clp->cl_session,
+			&wdata->args.seq_args,
+			&wdata->res.seq_res,
+			task);
 }
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
@@ -449,12 +445,10 @@
 {
 	struct nfs_commit_data *wdata = data;
 
-	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
-				&wdata->args.seq_args, &wdata->res.seq_res,
-				task))
-		return;
-
-	rpc_call_start(task);
+	nfs41_setup_sequence(wdata->ds_clp->cl_session,
+			&wdata->args.seq_args,
+			&wdata->res.seq_res,
+			task);
 }
 
 static void filelayout_write_commit_done(struct rpc_task *task, void *data)
@@ -512,7 +506,6 @@
 	loff_t offset = data->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
-	int status;
 
 	dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
 		__func__, hdr->inode->i_ino,
@@ -538,9 +531,8 @@
 	data->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
+	nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
 				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
-	BUG_ON(status != 0);
 	return PNFS_ATTEMPTED;
 }
 
@@ -554,7 +546,6 @@
 	loff_t offset = data->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
-	int status;
 
 	/* Retrieve the correct rpc_client for the byte range */
 	j = nfs4_fl_calc_j_index(lseg, offset);
@@ -579,10 +570,9 @@
 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
+	nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
 				    &filelayout_write_call_ops, sync,
 				    RPC_TASK_SOFTCONN);
-	BUG_ON(status != 0);
 	return PNFS_ATTEMPTED;
 }
 
@@ -909,7 +899,7 @@
 filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 			struct nfs_page *req)
 {
-	BUG_ON(pgio->pg_lseg != NULL);
+	WARN_ON_ONCE(pgio->pg_lseg != NULL);
 
 	if (req->wb_offset != req->wb_pgbase) {
 		/*
@@ -939,7 +929,7 @@
 	struct nfs_commit_info cinfo;
 	int status;
 
-	BUG_ON(pgio->pg_lseg != NULL);
+	WARN_ON_ONCE(pgio->pg_lseg != NULL);
 
 	if (req->wb_offset != req->wb_pgbase)
 		goto out_mds;
@@ -1187,7 +1177,6 @@
 	 */
 	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
 		if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
-			BUG_ON(!list_empty(&b->written));
 			pnfs_put_lseg(b->wlseg);
 			b->wlseg = NULL;
 		}

diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index a8eaa9b..b720064 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c

@@ -33,6 +33,7 @@
 #include <linux/module.h>
 
 #include "internal.h"
+#include "nfs4session.h"
 #include "nfs4filelayout.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
@@ -162,8 +163,6 @@
 	dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
 		mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
 
-	BUG_ON(list_empty(&ds->ds_addrs));
-
 	list_for_each_entry(da, &ds->ds_addrs, da_node) {
 		dprintk("%s: DS %s: trying address %s\n",
 			__func__, ds->ds_remotestr, da->da_remotestr);

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5eec442..5d864fb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -52,7 +52,6 @@
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/nfs_idmap.h>
-#include <linux/sunrpc/bc_xprt.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
 #include <linux/freezer.h>
@@ -64,14 +63,14 @@
 #include "callback.h"
 #include "pnfs.h"
 #include "netns.h"
+#include "nfs4session.h"
+#include "fscache.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 #define NFS4_POLL_RETRY_MIN	(HZ/10)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
-#define NFS4_MAX_LOOP_ON_RECOVER (10)
-
 struct nfs4_opendata;
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
@@ -206,7 +205,6 @@
 {
 	__be32 *start, *p;
 
-	BUG_ON(readdir->count < 80);
 	if (cookie > 2) {
 		readdir->cookie = cookie;
 		memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
@@ -256,22 +254,6 @@
 	kunmap_atomic(start);
 }
 
-static int nfs4_wait_clnt_recover(struct nfs_client *clp)
-{
-	int res;
-
-	might_sleep();
-
-	res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
-			nfs_wait_bit_killable, TASK_KILLABLE);
-	if (res)
-		return res;
-
-	if (clp->cl_cons_state < 0)
-		return clp->cl_cons_state;
-	return 0;
-}
-
 static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 {
 	int res = 0;
@@ -351,7 +333,6 @@
 			}
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
-		case -EKEYEXPIRED:
 			ret = nfs4_delay(server->client, &exception->timeout);
 			if (ret != 0)
 				break;
@@ -397,144 +378,136 @@
 
 #if defined(CONFIG_NFS_V4_1)
 
-/*
- * nfs4_free_slot - free a slot and efficiently update slot table.
- *
- * freeing a slot is trivially done by clearing its respective bit
- * in the bitmap.
- * If the freed slotid equals highest_used_slotid we want to update it
- * so that the server would be able to size down the slot table if needed,
- * otherwise we know that the highest_used_slotid is still in use.
- * When updating highest_used_slotid there may be "holes" in the bitmap
- * so we need to scan down from highest_used_slotid to 0 looking for the now
- * highest slotid in use.
- * If none found, highest_used_slotid is set to NFS4_NO_SLOT.
- *
- * Must be called while holding tbl->slot_tbl_lock
- */
-static void
-nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid)
-{
-	BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE);
-	/* clear used bit in bitmap */
-	__clear_bit(slotid, tbl->used_slots);
-
-	/* update highest_used_slotid when it is freed */
-	if (slotid == tbl->highest_used_slotid) {
-		slotid = find_last_bit(tbl->used_slots, tbl->max_slots);
-		if (slotid < tbl->max_slots)
-			tbl->highest_used_slotid = slotid;
-		else
-			tbl->highest_used_slotid = NFS4_NO_SLOT;
-	}
-	dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
-		slotid, tbl->highest_used_slotid);
-}
-
-bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	return true;
-}
-
-/*
- * Signal state manager thread if session fore channel is drained
- */
-static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
-{
-	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
-		rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq,
-				nfs4_set_task_privileged, NULL);
-		return;
-	}
-
-	if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
-		return;
-
-	dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
-	complete(&ses->fc_slot_table.complete);
-}
-
-/*
- * Signal state manager thread if session back channel is drained
- */
-void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
-{
-	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
-	    ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
-		return;
-	dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
-	complete(&ses->bc_slot_table.complete);
-}
-
 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 {
+	struct nfs4_session *session;
 	struct nfs4_slot_table *tbl;
+	bool send_new_highest_used_slotid = false;
 
-	tbl = &res->sr_session->fc_slot_table;
 	if (!res->sr_slot) {
 		/* just wake up the next guy waiting since
 		 * we may have not consumed a slot after all */
 		dprintk("%s: No slot\n", __func__);
 		return;
 	}
+	tbl = res->sr_slot->table;
+	session = tbl->session;
 
 	spin_lock(&tbl->slot_tbl_lock);
-	nfs4_free_slot(tbl, res->sr_slot - tbl->slots);
-	nfs4_check_drain_fc_complete(res->sr_session);
+	/* Be nice to the server: try to ensure that the last transmitted
+	 * value for highest_user_slotid <= target_highest_slotid
+	 */
+	if (tbl->highest_used_slotid > tbl->target_highest_slotid)
+		send_new_highest_used_slotid = true;
+
+	if (nfs41_wake_and_assign_slot(tbl, res->sr_slot)) {
+		send_new_highest_used_slotid = false;
+		goto out_unlock;
+	}
+	nfs4_free_slot(tbl, res->sr_slot);
+
+	if (tbl->highest_used_slotid != NFS4_NO_SLOT)
+		send_new_highest_used_slotid = false;
+out_unlock:
 	spin_unlock(&tbl->slot_tbl_lock);
 	res->sr_slot = NULL;
+	if (send_new_highest_used_slotid)
+		nfs41_server_notify_highest_slotid_update(session->clp);
 }
 
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
-	unsigned long timestamp;
+	struct nfs4_session *session;
+	struct nfs4_slot *slot;
 	struct nfs_client *clp;
-
-	/*
-	 * sr_status remains 1 if an RPC level error occurred. The server
-	 * may or may not have processed the sequence operation..
-	 * Proceed as if the server received and processed the sequence
-	 * operation.
-	 */
-	if (res->sr_status == 1)
-		res->sr_status = NFS_OK;
+	bool interrupted = false;
+	int ret = 1;
 
 	/* don't increment the sequence number if the task wasn't sent */
 	if (!RPC_WAS_SENT(task))
 		goto out;
 
+	slot = res->sr_slot;
+	session = slot->table->session;
+
+	if (slot->interrupted) {
+		slot->interrupted = 0;
+		interrupted = true;
+	}
+
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
 	case 0:
 		/* Update the slot's sequence and clientid lease timer */
-		++res->sr_slot->seq_nr;
-		timestamp = res->sr_renewal_time;
-		clp = res->sr_session->clp;
-		do_renew_lease(clp, timestamp);
+		++slot->seq_nr;
+		clp = session->clp;
+		do_renew_lease(clp, res->sr_timestamp);
 		/* Check sequence flags */
 		if (res->sr_status_flags != 0)
 			nfs4_schedule_lease_recovery(clp);
+		nfs41_update_target_slotid(slot->table, slot, res);
 		break;
+	case 1:
+		/*
+		 * sr_status remains 1 if an RPC level error occurred.
+		 * The server may or may not have processed the sequence
+		 * operation..
+		 * Mark the slot as having hosted an interrupted RPC call.
+		 */
+		slot->interrupted = 1;
+		goto out;
 	case -NFS4ERR_DELAY:
 		/* The server detected a resend of the RPC call and
 		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
 		 * of RFC5661.
 		 */
-		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
+		dprintk("%s: slot=%u seq=%u: Operation in progress\n",
 			__func__,
-			res->sr_slot - res->sr_session->fc_slot_table.slots,
-			res->sr_slot->seq_nr);
+			slot->slot_nr,
+			slot->seq_nr);
 		goto out_retry;
+	case -NFS4ERR_BADSLOT:
+		/*
+		 * The slot id we used was probably retired. Try again
+		 * using a different slot id.
+		 */
+		goto retry_nowait;
+	case -NFS4ERR_SEQ_MISORDERED:
+		/*
+		 * Was the last operation on this sequence interrupted?
+		 * If so, retry after bumping the sequence number.
+		 */
+		if (interrupted) {
+			++slot->seq_nr;
+			goto retry_nowait;
+		}
+		/*
+		 * Could this slot have been previously retired?
+		 * If so, then the server may be expecting seq_nr = 1!
+		 */
+		if (slot->seq_nr != 1) {
+			slot->seq_nr = 1;
+			goto retry_nowait;
+		}
+		break;
+	case -NFS4ERR_SEQ_FALSE_RETRY:
+		++slot->seq_nr;
+		goto retry_nowait;
 	default:
 		/* Just update the slot sequence no. */
-		++res->sr_slot->seq_nr;
+		++slot->seq_nr;
 	}
 out:
 	/* The session may be reset by one of the error handlers. */
 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
 	nfs41_sequence_free_slot(res);
-	return 1;
+	return ret;
+retry_nowait:
+	if (rpc_restart_call_prepare(task)) {
+		task->tk_status = 0;
+		ret = 0;
+	}
+	goto out;
 out_retry:
 	if (!rpc_restart_call(task))
 		goto out;
@@ -545,55 +518,27 @@
 static int nfs4_sequence_done(struct rpc_task *task,
 			       struct nfs4_sequence_res *res)
 {
-	if (res->sr_session == NULL)
+	if (res->sr_slot == NULL)
 		return 1;
 	return nfs41_sequence_done(task, res);
 }
 
-/*
- * nfs4_find_slot - efficiently look for a free slot
- *
- * nfs4_find_slot looks for an unset bit in the used_slots bitmap.
- * If found, we mark the slot as used, update the highest_used_slotid,
- * and respectively set up the sequence operation args.
- * The slot number is returned if found, or NFS4_NO_SLOT otherwise.
- *
- * Note: must be called with under the slot_tbl_lock.
- */
-static u32
-nfs4_find_slot(struct nfs4_slot_table *tbl)
-{
-	u32 slotid;
-	u32 ret_id = NFS4_NO_SLOT;
-
-	dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
-		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
-		tbl->max_slots);
-	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
-	if (slotid >= tbl->max_slots)
-		goto out;
-	__set_bit(slotid, tbl->used_slots);
-	if (slotid > tbl->highest_used_slotid ||
-			tbl->highest_used_slotid == NFS4_NO_SLOT)
-		tbl->highest_used_slotid = slotid;
-	ret_id = slotid;
-out:
-	dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
-		__func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id);
-	return ret_id;
-}
-
 static void nfs41_init_sequence(struct nfs4_sequence_args *args,
 		struct nfs4_sequence_res *res, int cache_reply)
 {
-	args->sa_session = NULL;
+	args->sa_slot = NULL;
 	args->sa_cache_this = 0;
+	args->sa_privileged = 0;
 	if (cache_reply)
 		args->sa_cache_this = 1;
-	res->sr_session = NULL;
 	res->sr_slot = NULL;
 }
 
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+	args->sa_privileged = 1;
+}
+
 int nfs41_setup_sequence(struct nfs4_session *session,
 				struct nfs4_sequence_args *args,
 				struct nfs4_sequence_res *res,
@@ -601,59 +546,59 @@
 {
 	struct nfs4_slot *slot;
 	struct nfs4_slot_table *tbl;
-	u32 slotid;
 
 	dprintk("--> %s\n", __func__);
 	/* slot already allocated? */
 	if (res->sr_slot != NULL)
-		return 0;
+		goto out_success;
 
 	tbl = &session->fc_slot_table;
 
+	task->tk_timeout = 0;
+
 	spin_lock(&tbl->slot_tbl_lock);
 	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
-	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+	    !args->sa_privileged) {
 		/* The state manager will wait until the slot table is empty */
-		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-		spin_unlock(&tbl->slot_tbl_lock);
 		dprintk("%s session is draining\n", __func__);
-		return -EAGAIN;
+		goto out_sleep;
 	}
 
-	if (!rpc_queue_empty(&tbl->slot_tbl_waitq) &&
-	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
-		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-		spin_unlock(&tbl->slot_tbl_lock);
-		dprintk("%s enforce FIFO order\n", __func__);
-		return -EAGAIN;
-	}
-
-	slotid = nfs4_find_slot(tbl);
-	if (slotid == NFS4_NO_SLOT) {
-		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-		spin_unlock(&tbl->slot_tbl_lock);
+	slot = nfs4_alloc_slot(tbl);
+	if (IS_ERR(slot)) {
+		/* If out of memory, try again in 1/4 second */
+		if (slot == ERR_PTR(-ENOMEM))
+			task->tk_timeout = HZ >> 2;
 		dprintk("<-- %s: no free slots\n", __func__);
-		return -EAGAIN;
+		goto out_sleep;
 	}
 	spin_unlock(&tbl->slot_tbl_lock);
 
-	rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
-	slot = tbl->slots + slotid;
-	args->sa_session = session;
-	args->sa_slotid = slotid;
+	args->sa_slot = slot;
 
-	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
+	dprintk("<-- %s slotid=%d seqid=%d\n", __func__,
+			slot->slot_nr, slot->seq_nr);
 
-	res->sr_session = session;
 	res->sr_slot = slot;
-	res->sr_renewal_time = jiffies;
+	res->sr_timestamp = jiffies;
 	res->sr_status_flags = 0;
 	/*
 	 * sr_status is only set in decode_sequence, and so will remain
 	 * set to 1 if an rpc level failure occurs.
 	 */
 	res->sr_status = 1;
+out_success:
+	rpc_call_start(task);
 	return 0;
+out_sleep:
+	/* Privileged tasks are queued with top priority */
+	if (args->sa_privileged)
+		rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
+				NULL, RPC_PRIORITY_PRIVILEGED);
+	else
+		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+	spin_unlock(&tbl->slot_tbl_lock);
+	return -EAGAIN;
 }
 EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
 
@@ -665,12 +610,14 @@
 	struct nfs4_session *session = nfs4_get_session(server);
 	int ret = 0;
 
-	if (session == NULL)
+	if (session == NULL) {
+		rpc_call_start(task);
 		goto out;
+	}
 
-	dprintk("--> %s clp %p session %p sr_slot %td\n",
+	dprintk("--> %s clp %p session %p sr_slot %d\n",
 		__func__, session->clp, session, res->sr_slot ?
-			res->sr_slot - session->fc_slot_table.slots : -1);
+			res->sr_slot->slot_nr : -1);
 
 	ret = nfs41_setup_sequence(session, args, res, task);
 out:
@@ -687,19 +634,11 @@
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs41_call_sync_data *data = calldata;
+	struct nfs4_session *session = nfs4_get_session(data->seq_server);
 
 	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
 
-	if (nfs4_setup_sequence(data->seq_server, data->seq_args,
-				data->seq_res, task))
-		return;
-	rpc_call_start(task);
-}
-
-static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs41_call_sync_prepare(task, calldata);
+	nfs41_setup_sequence(session, data->seq_args, data->seq_res, task);
 }
 
 static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
@@ -714,17 +653,11 @@
 	.rpc_call_done = nfs41_call_sync_done,
 };
 
-static const struct rpc_call_ops nfs41_call_priv_sync_ops = {
-	.rpc_call_prepare = nfs41_call_priv_sync_prepare,
-	.rpc_call_done = nfs41_call_sync_done,
-};
-
 static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 				   struct nfs_server *server,
 				   struct rpc_message *msg,
 				   struct nfs4_sequence_args *args,
-				   struct nfs4_sequence_res *res,
-				   int privileged)
+				   struct nfs4_sequence_res *res)
 {
 	int ret;
 	struct rpc_task *task;
@@ -740,8 +673,6 @@
 		.callback_data = &data
 	};
 
-	if (privileged)
-		task_setup.callback_ops = &nfs41_call_priv_sync_ops;
 	task = rpc_run_task(&task_setup);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
@@ -752,24 +683,18 @@
 	return ret;
 }
 
-int _nfs4_call_sync_session(struct rpc_clnt *clnt,
-			    struct nfs_server *server,
-			    struct rpc_message *msg,
-			    struct nfs4_sequence_args *args,
-			    struct nfs4_sequence_res *res,
-			    int cache_reply)
-{
-	nfs41_init_sequence(args, res, cache_reply);
-	return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0);
-}
-
 #else
-static inline
+static
 void nfs41_init_sequence(struct nfs4_sequence_args *args,
 		struct nfs4_sequence_res *res, int cache_reply)
 {
 }
 
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+}
+
+
 static int nfs4_sequence_done(struct rpc_task *task,
 			       struct nfs4_sequence_res *res)
 {
@@ -777,18 +702,17 @@
 }
 #endif /* CONFIG_NFS_V4_1 */
 
+static
 int _nfs4_call_sync(struct rpc_clnt *clnt,
 		    struct nfs_server *server,
 		    struct rpc_message *msg,
 		    struct nfs4_sequence_args *args,
-		    struct nfs4_sequence_res *res,
-		    int cache_reply)
+		    struct nfs4_sequence_res *res)
 {
-	nfs41_init_sequence(args, res, cache_reply);
 	return rpc_call_sync(clnt, msg, 0);
 }
 
-static inline
+static
 int nfs4_call_sync(struct rpc_clnt *clnt,
 		   struct nfs_server *server,
 		   struct rpc_message *msg,
@@ -796,8 +720,9 @@
 		   struct nfs4_sequence_res *res,
 		   int cache_reply)
 {
+	nfs41_init_sequence(args, res, cache_reply);
 	return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
-						args, res, cache_reply);
+						args, res);
 }
 
 static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
@@ -809,6 +734,7 @@
 	if (!cinfo->atomic || cinfo->before != dir->i_version)
 		nfs_force_lookup_revalidate(dir);
 	dir->i_version = cinfo->after;
+	nfs_fscache_invalidate(dir);
 	spin_unlock(&dir->i_lock);
 }
 
@@ -1445,13 +1371,6 @@
 				nfs_inode_find_state_and_recover(state->inode,
 						stateid);
 				nfs4_schedule_stateid_recovery(server, state);
-			case -EKEYEXPIRED:
-				/*
-				 * User RPCSEC_GSS context has expired.
-				 * We cannot recover this stateid now, so
-				 * skip it and allow recovery thread to
-				 * proceed.
-				 */
 			case -ENOMEM:
 				err = 0;
 				goto out;
@@ -1574,20 +1493,12 @@
 				&data->o_res.seq_res,
 				task) != 0)
 		nfs_release_seqid(data->o_arg.seqid);
-	else
-		rpc_call_start(task);
 	return;
 unlock_no_action:
 	rcu_read_unlock();
 out_no_action:
 	task->tk_action = NULL;
-
-}
-
-static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs4_open_prepare(task, calldata);
+	nfs4_sequence_done(task, &data->o_res.seq_res);
 }
 
 static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -1648,12 +1559,6 @@
 	.rpc_release = nfs4_open_release,
 };
 
-static const struct rpc_call_ops nfs4_recover_open_ops = {
-	.rpc_call_prepare = nfs4_recover_open_prepare,
-	.rpc_call_done = nfs4_open_done,
-	.rpc_release = nfs4_open_release,
-};
-
 static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 {
 	struct inode *dir = data->dir->d_inode;
@@ -1683,7 +1588,7 @@
 	data->rpc_status = 0;
 	data->cancelled = 0;
 	if (isrecover)
-		task_setup_data.callback_ops = &nfs4_recover_open_ops;
+		nfs4_set_sequence_privileged(&o_arg->seq_args);
 	task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
@@ -1789,24 +1694,6 @@
 	return 0;
 }
 
-static int nfs4_client_recover_expired_lease(struct nfs_client *clp)
-{
-	unsigned int loop;
-	int ret;
-
-	for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
-		ret = nfs4_wait_clnt_recover(clp);
-		if (ret != 0)
-			break;
-		if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
-		    !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
-			break;
-		nfs4_schedule_state_manager(clp);
-		ret = -EIO;
-	}
-	return ret;
-}
-
 static int nfs4_recover_expired_lease(struct nfs_server *server)
 {
 	return nfs4_client_recover_expired_lease(server->nfs_client);
@@ -2282,6 +2169,7 @@
 	if (!call_close) {
 		/* Note: exit _without_ calling nfs4_close_done */
 		task->tk_action = NULL;
+		nfs4_sequence_done(task, &calldata->res.seq_res);
 		goto out;
 	}
 
@@ -2299,8 +2187,6 @@
 				&calldata->res.seq_res,
 				task) != 0)
 		nfs_release_seqid(calldata->arg.seqid);
-	else
-		rpc_call_start(task);
 out:
 	dprintk("%s: done!\n", __func__);
 }
@@ -2533,7 +2419,8 @@
 	rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS];
 
 	len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array));
-	BUG_ON(len < 0);
+	if (len < 0)
+		return len;
 
 	for (i = 0; i < len; i++) {
 		/* AUTH_UNIX is the default flavor if none was specified,
@@ -3038,12 +2925,10 @@
 
 static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->dir),
-				&data->args.seq_args,
-				&data->res.seq_res,
-				task))
-		return;
-	rpc_call_start(task);
+	nfs4_setup_sequence(NFS_SERVER(data->dir),
+			&data->args.seq_args,
+			&data->res.seq_res,
+			task);
 }
 
 static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
@@ -3071,12 +2956,10 @@
 
 static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->old_dir),
-				&data->args.seq_args,
-				&data->res.seq_res,
-				task))
-		return;
-	rpc_call_start(task);
+	nfs4_setup_sequence(NFS_SERVER(data->old_dir),
+			&data->args.seq_args,
+			&data->res.seq_res,
+			task);
 }
 
 static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
@@ -3362,9 +3245,6 @@
 	int mode = sattr->ia_mode;
 	int status = -ENOMEM;
 
-	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
-	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
-
 	data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK);
 	if (data == NULL)
 		goto out;
@@ -3380,10 +3260,13 @@
 		data->arg.ftype = NF4CHR;
 		data->arg.u.device.specdata1 = MAJOR(rdev);
 		data->arg.u.device.specdata2 = MINOR(rdev);
+	} else if (!S_ISSOCK(mode)) {
+		status = -EINVAL;
+		goto out_free;
 	}
 	
 	status = nfs4_do_create(dir, dentry, data);
-
+out_free:
 	nfs4_free_createdata(data);
 out:
 	return status;
@@ -3565,12 +3448,10 @@
 
 static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
-				&data->args.seq_args,
-				&data->res.seq_res,
-				task))
-		return;
-	rpc_call_start(task);
+	nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+			&data->args.seq_args,
+			&data->res.seq_res,
+			task);
 }
 
 static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
@@ -3631,22 +3512,18 @@
 
 static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
-				&data->args.seq_args,
-				&data->res.seq_res,
-				task))
-		return;
-	rpc_call_start(task);
+	nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+			&data->args.seq_args,
+			&data->res.seq_res,
+			task);
 }
 
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
-				&data->args.seq_args,
-				&data->res.seq_res,
-				task))
-		return;
-	rpc_call_start(task);
+	nfs4_setup_sequence(NFS_SERVER(data->inode),
+			&data->args.seq_args,
+			&data->res.seq_res,
+			task);
 }
 
 static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
@@ -3937,8 +3814,13 @@
 		goto out_free;
 	}
 	nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
-	if (buf)
+	if (buf) {
+		if (res.acl_len > buflen) {
+			ret = -ERANGE;
+			goto out_free;
+		}
 		_copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
+	}
 out_ok:
 	ret = res.acl_len;
 out_free:
@@ -4085,7 +3967,6 @@
 		case -NFS4ERR_DELAY:
 			nfs_inc_server_stats(server, NFSIOS_DELAY);
 		case -NFS4ERR_GRACE:
-		case -EKEYEXPIRED:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
 			return -EAGAIN;
@@ -4293,11 +4174,10 @@
 
 	d_data = (struct nfs4_delegreturndata *)data;
 
-	if (nfs4_setup_sequence(d_data->res.server,
-				&d_data->args.seq_args,
-				&d_data->res.seq_res, task))
-		return;
-	rpc_call_start(task);
+	nfs4_setup_sequence(d_data->res.server,
+			&d_data->args.seq_args,
+			&d_data->res.seq_res,
+			task);
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -4543,6 +4423,7 @@
 	if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
 		/* Note: exit _without_ running nfs4_locku_done */
 		task->tk_action = NULL;
+		nfs4_sequence_done(task, &calldata->res.seq_res);
 		return;
 	}
 	calldata->timestamp = jiffies;
@@ -4551,8 +4432,6 @@
 				&calldata->res.seq_res,
 				task) != 0)
 		nfs_release_seqid(calldata->arg.seqid);
-	else
-		rpc_call_start(task);
 }
 
 static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4696,8 +4575,9 @@
 		return;
 	/* Do we need to do an open_to_lock_owner? */
 	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
-		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
+		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
 			goto out_release_lock_seqid;
+		}
 		data->arg.open_stateid = &state->stateid;
 		data->arg.new_lock_owner = 1;
 		data->res.open_seqid = data->arg.open_seqid;
@@ -4707,20 +4587,12 @@
 	if (nfs4_setup_sequence(data->server,
 				&data->arg.seq_args,
 				&data->res.seq_res,
-				task) == 0) {
-		rpc_call_start(task);
+				task) == 0)
 		return;
-	}
 	nfs_release_seqid(data->arg.open_seqid);
 out_release_lock_seqid:
 	nfs_release_seqid(data->arg.lock_seqid);
-	dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
-}
-
-static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs4_lock_prepare(task, calldata);
+	dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
 }
 
 static void nfs4_lock_done(struct rpc_task *task, void *calldata)
@@ -4775,12 +4647,6 @@
 	.rpc_release = nfs4_lock_release,
 };
 
-static const struct rpc_call_ops nfs4_recover_lock_ops = {
-	.rpc_call_prepare = nfs4_recover_lock_prepare,
-	.rpc_call_done = nfs4_lock_done,
-	.rpc_release = nfs4_lock_release,
-};
-
 static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
 {
 	switch (error) {
@@ -4823,15 +4689,15 @@
 		return -ENOMEM;
 	if (IS_SETLKW(cmd))
 		data->arg.block = 1;
-	if (recovery_type > NFS_LOCK_NEW) {
-		if (recovery_type == NFS_LOCK_RECLAIM)
-			data->arg.reclaim = NFS_LOCK_RECLAIM;
-		task_setup_data.callback_ops = &nfs4_recover_lock_ops;
-	}
 	nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
 	msg.rpc_argp = &data->arg;
 	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
+	if (recovery_type > NFS_LOCK_NEW) {
+		if (recovery_type == NFS_LOCK_RECLAIM)
+			data->arg.reclaim = NFS_LOCK_RECLAIM;
+		nfs4_set_sequence_privileged(&data->arg.seq_args);
+	}
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -5100,15 +4966,6 @@
 				nfs4_schedule_stateid_recovery(server, state);
 				err = 0;
 				goto out;
-			case -EKEYEXPIRED:
-				/*
-				 * User RPCSEC_GSS context has expired.
-				 * We cannot recover this stateid now, so
-				 * skip it and allow recovery thread to
-				 * proceed.
-				 */
-				err = 0;
-				goto out;
 			case -ENOMEM:
 			case -NFS4ERR_DENIED:
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
@@ -5357,7 +5214,6 @@
 	};
 
 	dprintk("--> %s\n", __func__);
-	BUG_ON(clp == NULL);
 
 	res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
 	if (unlikely(res.session == NULL)) {
@@ -5569,20 +5425,16 @@
 static void nfs4_get_lease_time_prepare(struct rpc_task *task,
 					void *calldata)
 {
-	int ret;
 	struct nfs4_get_lease_time_data *data =
 			(struct nfs4_get_lease_time_data *)calldata;
 
 	dprintk("--> %s\n", __func__);
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 	/* just setup sequence, do not trigger session recovery
 	   since we're invoked within one */
-	ret = nfs41_setup_sequence(data->clp->cl_session,
-				   &data->args->la_seq_args,
-				   &data->res->lr_seq_res, task);
-
-	BUG_ON(ret == -EAGAIN);
-	rpc_call_start(task);
+	nfs41_setup_sequence(data->clp->cl_session,
+			&data->args->la_seq_args,
+			&data->res->lr_seq_res,
+			task);
 	dprintk("<-- %s\n", __func__);
 }
 
@@ -5644,6 +5496,7 @@
 	int status;
 
 	nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
+	nfs4_set_sequence_privileged(&args.la_seq_args);
 	dprintk("--> %s\n", __func__);
 	task = rpc_run_task(&task_setup);
 
@@ -5658,145 +5511,6 @@
 	return status;
 }
 
-static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
-{
-	return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags);
-}
-
-static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
-		struct nfs4_slot *new,
-		u32 max_slots,
-		u32 ivalue)
-{
-	struct nfs4_slot *old = NULL;
-	u32 i;
-
-	spin_lock(&tbl->slot_tbl_lock);
-	if (new) {
-		old = tbl->slots;
-		tbl->slots = new;
-		tbl->max_slots = max_slots;
-	}
-	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	for (i = 0; i < tbl->max_slots; i++)
-		tbl->slots[i].seq_nr = ivalue;
-	spin_unlock(&tbl->slot_tbl_lock);
-	kfree(old);
-}
-
-/*
- * (re)Initialise a slot table
- */
-static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
-				 u32 ivalue)
-{
-	struct nfs4_slot *new = NULL;
-	int ret = -ENOMEM;
-
-	dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
-		max_reqs, tbl->max_slots);
-
-	/* Does the newly negotiated max_reqs match the existing slot table? */
-	if (max_reqs != tbl->max_slots) {
-		new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
-		if (!new)
-			goto out;
-	}
-	ret = 0;
-
-	nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
-	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
-		tbl, tbl->slots, tbl->max_slots);
-out:
-	dprintk("<-- %s: return %d\n", __func__, ret);
-	return ret;
-}
-
-/* Destroy the slot table */
-static void nfs4_destroy_slot_tables(struct nfs4_session *session)
-{
-	if (session->fc_slot_table.slots != NULL) {
-		kfree(session->fc_slot_table.slots);
-		session->fc_slot_table.slots = NULL;
-	}
-	if (session->bc_slot_table.slots != NULL) {
-		kfree(session->bc_slot_table.slots);
-		session->bc_slot_table.slots = NULL;
-	}
-	return;
-}
-
-/*
- * Initialize or reset the forechannel and backchannel tables
- */
-static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
-{
-	struct nfs4_slot_table *tbl;
-	int status;
-
-	dprintk("--> %s\n", __func__);
-	/* Fore channel */
-	tbl = &ses->fc_slot_table;
-	status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
-	if (status) /* -ENOMEM */
-		return status;
-	/* Back channel */
-	tbl = &ses->bc_slot_table;
-	status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
-	if (status && tbl->slots == NULL)
-		/* Fore and back channel share a connection so get
-		 * both slot tables or neither */
-		nfs4_destroy_slot_tables(ses);
-	return status;
-}
-
-struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
-{
-	struct nfs4_session *session;
-	struct nfs4_slot_table *tbl;
-
-	session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
-	if (!session)
-		return NULL;
-
-	tbl = &session->fc_slot_table;
-	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	spin_lock_init(&tbl->slot_tbl_lock);
-	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
-	init_completion(&tbl->complete);
-
-	tbl = &session->bc_slot_table;
-	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	spin_lock_init(&tbl->slot_tbl_lock);
-	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
-	init_completion(&tbl->complete);
-
-	session->session_state = 1<<NFS4_SESSION_INITING;
-
-	session->clp = clp;
-	return session;
-}
-
-void nfs4_destroy_session(struct nfs4_session *session)
-{
-	struct rpc_xprt *xprt;
-	struct rpc_cred *cred;
-
-	cred = nfs4_get_exchange_id_cred(session->clp);
-	nfs4_proc_destroy_session(session, cred);
-	if (cred)
-		put_rpccred(cred);
-
-	rcu_read_lock();
-	xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
-	rcu_read_unlock();
-	dprintk("%s Destroy backchannel for xprt %p\n",
-		__func__, xprt);
-	xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
-	nfs4_destroy_slot_tables(session);
-	kfree(session);
-}
-
 /*
  * Initialize the values to be used by the client in CREATE_SESSION
  * If nfs4_init_session set the fore channel request and response sizes,
@@ -5809,8 +5523,8 @@
 static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
 {
 	struct nfs4_session *session = args->client->cl_session;
-	unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz,
-		     mxresp_sz = session->fc_attrs.max_resp_sz;
+	unsigned int mxrqst_sz = session->fc_target_max_rqst_sz,
+		     mxresp_sz = session->fc_target_max_resp_sz;
 
 	if (mxrqst_sz == 0)
 		mxrqst_sz = NFS_MAX_FILE_IO_SIZE;
@@ -5919,10 +5633,9 @@
 
 	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 
-	if (!status)
+	if (!status) {
 		/* Verify the session's negotiated channel_attrs values */
 		status = nfs4_verify_channel_attrs(&args, session);
-	if (!status) {
 		/* Increment the clientid slot sequence id */
 		clp->cl_seqid++;
 	}
@@ -5992,83 +5705,6 @@
 }
 
 /*
- * With sessions, the client is not marked ready until after a
- * successful EXCHANGE_ID and CREATE_SESSION.
- *
- * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
- * other versions of NFS can be tried.
- */
-static int nfs41_check_session_ready(struct nfs_client *clp)
-{
-	int ret;
-	
-	if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
-		ret = nfs4_client_recover_expired_lease(clp);
-		if (ret)
-			return ret;
-	}
-	if (clp->cl_cons_state < NFS_CS_READY)
-		return -EPROTONOSUPPORT;
-	smp_rmb();
-	return 0;
-}
-
-int nfs4_init_session(struct nfs_server *server)
-{
-	struct nfs_client *clp = server->nfs_client;
-	struct nfs4_session *session;
-	unsigned int rsize, wsize;
-
-	if (!nfs4_has_session(clp))
-		return 0;
-
-	session = clp->cl_session;
-	spin_lock(&clp->cl_lock);
-	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
-
-		rsize = server->rsize;
-		if (rsize == 0)
-			rsize = NFS_MAX_FILE_IO_SIZE;
-		wsize = server->wsize;
-		if (wsize == 0)
-			wsize = NFS_MAX_FILE_IO_SIZE;
-
-		session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
-		session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
-	}
-	spin_unlock(&clp->cl_lock);
-
-	return nfs41_check_session_ready(clp);
-}
-
-int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
-{
-	struct nfs4_session *session = clp->cl_session;
-	int ret;
-
-	spin_lock(&clp->cl_lock);
-	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
-		/*
-		 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
-		 * DS lease to be equal to the MDS lease.
-		 */
-		clp->cl_lease_time = lease_time;
-		clp->cl_last_renewal = jiffies;
-	}
-	spin_unlock(&clp->cl_lock);
-
-	ret = nfs41_check_session_ready(clp);
-	if (ret)
-		return ret;
-	/* Test for the DS role */
-	if (!is_ds_client(clp))
-		return -ENODEV;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
-
-
-/*
  * Renew the cl_session lease.
  */
 struct nfs4_sequence_data {
@@ -6133,9 +5769,7 @@
 	args = task->tk_msg.rpc_argp;
 	res = task->tk_msg.rpc_resp;
 
-	if (nfs41_setup_sequence(clp->cl_session, args, res, task))
-		return;
-	rpc_call_start(task);
+	nfs41_setup_sequence(clp->cl_session, args, res, task);
 }
 
 static const struct rpc_call_ops nfs41_sequence_ops = {
@@ -6144,7 +5778,9 @@
 	.rpc_release = nfs41_sequence_release,
 };
 
-static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
+		struct rpc_cred *cred,
+		bool is_privileged)
 {
 	struct nfs4_sequence_data *calldata;
 	struct rpc_message msg = {
@@ -6166,6 +5802,8 @@
 		return ERR_PTR(-ENOMEM);
 	}
 	nfs41_init_sequence(&calldata->args, &calldata->res, 0);
+	if (is_privileged)
+		nfs4_set_sequence_privileged(&calldata->args);
 	msg.rpc_argp = &calldata->args;
 	msg.rpc_resp = &calldata->res;
 	calldata->clp = clp;
@@ -6181,7 +5819,7 @@
 
 	if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
 		return 0;
-	task = _nfs41_proc_sequence(clp, cred);
+	task = _nfs41_proc_sequence(clp, cred, false);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
 	else
@@ -6195,7 +5833,7 @@
 	struct rpc_task *task;
 	int ret;
 
-	task = _nfs41_proc_sequence(clp, cred);
+	task = _nfs41_proc_sequence(clp, cred, true);
 	if (IS_ERR(task)) {
 		ret = PTR_ERR(task);
 		goto out;
@@ -6224,13 +5862,10 @@
 {
 	struct nfs4_reclaim_complete_data *calldata = data;
 
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	if (nfs41_setup_sequence(calldata->clp->cl_session,
-				&calldata->arg.seq_args,
-				&calldata->res.seq_res, task))
-		return;
-
-	rpc_call_start(task);
+	nfs41_setup_sequence(calldata->clp->cl_session,
+			&calldata->arg.seq_args,
+			&calldata->res.seq_res,
+			task);
 }
 
 static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
@@ -6307,6 +5942,7 @@
 	calldata->arg.one_fs = 0;
 
 	nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
+	nfs4_set_sequence_privileged(&calldata->arg.seq_args);
 	msg.rpc_argp = &calldata->arg;
 	msg.rpc_resp = &calldata->res;
 	task_setup_data.callback_data = calldata;
@@ -6330,6 +5966,7 @@
 {
 	struct nfs4_layoutget *lgp = calldata;
 	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+	struct nfs4_session *session = nfs4_get_session(server);
 
 	dprintk("--> %s\n", __func__);
 	/* Note the is a race here, where a CB_LAYOUTRECALL can come in
@@ -6337,16 +5974,14 @@
 	 * However, that is not so catastrophic, and there seems
 	 * to be no way to prevent it completely.
 	 */
-	if (nfs4_setup_sequence(server, &lgp->args.seq_args,
+	if (nfs41_setup_sequence(session, &lgp->args.seq_args,
 				&lgp->res.seq_res, task))
 		return;
 	if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
 					  NFS_I(lgp->args.inode)->layout,
 					  lgp->args.ctx->state)) {
 		rpc_exit(task, NFS4_OK);
-		return;
 	}
-	rpc_call_start(task);
 }
 
 static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
@@ -6359,7 +5994,7 @@
 
 	dprintk("--> %s\n", __func__);
 
-	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
+	if (!nfs41_sequence_done(task, &lgp->res.seq_res))
 		goto out;
 
 	switch (task->tk_status) {
@@ -6510,10 +6145,10 @@
 	struct nfs4_layoutreturn *lrp = calldata;
 
 	dprintk("--> %s\n", __func__);
-	if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
-				&lrp->res.seq_res, task))
-		return;
-	rpc_call_start(task);
+	nfs41_setup_sequence(lrp->clp->cl_session,
+			&lrp->args.seq_args,
+			&lrp->res.seq_res,
+			task);
 }
 
 static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
@@ -6523,7 +6158,7 @@
 
 	dprintk("--> %s\n", __func__);
 
-	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
+	if (!nfs41_sequence_done(task, &lrp->res.seq_res))
 		return;
 
 	server = NFS_SERVER(lrp->args.inode);
@@ -6672,11 +6307,12 @@
 {
 	struct nfs4_layoutcommit_data *data = calldata;
 	struct nfs_server *server = NFS_SERVER(data->args.inode);
+	struct nfs4_session *session = nfs4_get_session(server);
 
-	if (nfs4_setup_sequence(server, &data->args.seq_args,
-				&data->res.seq_res, task))
-		return;
-	rpc_call_start(task);
+	nfs41_setup_sequence(session,
+			&data->args.seq_args,
+			&data->res.seq_res,
+			task);
 }
 
 static void
@@ -6685,7 +6321,7 @@
 	struct nfs4_layoutcommit_data *data = calldata;
 	struct nfs_server *server = NFS_SERVER(data->args.inode);
 
-	if (!nfs4_sequence_done(task, &data->res.seq_res))
+	if (!nfs41_sequence_done(task, &data->res.seq_res))
 		return;
 
 	switch (task->tk_status) { /* Just ignore these failures */
@@ -6873,7 +6509,9 @@
 
 	dprintk("NFS call  test_stateid %p\n", stateid);
 	nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
-	status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
+	nfs4_set_sequence_privileged(&args.seq_args);
+	status = nfs4_call_sync_sequence(server->client, server, &msg,
+			&args.seq_args, &res.seq_res);
 	if (status != NFS_OK) {
 		dprintk("NFS reply test_stateid: failed, %d\n", status);
 		return status;
@@ -6920,8 +6558,9 @@
 
 	dprintk("NFS call  free_stateid %p\n", stateid);
 	nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_set_sequence_privileged(&args.seq_args);
 	status = nfs4_call_sync_sequence(server->client, server, &msg,
-					 &args.seq_args, &res.seq_res, 1);
+			&args.seq_args, &res.seq_res);
 	dprintk("NFS reply free_stateid: %d\n", status);
 	return status;
 }
@@ -7041,7 +6680,7 @@
 #if defined(CONFIG_NFS_V4_1)
 static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
 	.minor_version = 1,
-	.call_sync = _nfs4_call_sync_session,
+	.call_sync = nfs4_call_sync_sequence,
 	.match_stateid = nfs41_match_stateid,
 	.find_root_sec = nfs41_find_root_sec,
 	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,

diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
new file mode 100644
index 0000000..ebda5f4
--- /dev/null
+++ b/fs/nfs/nfs4session.c

@@ -0,0 +1,552 @@
+/*
+ * fs/nfs/nfs4session.c
+ *
+ * Copyright (c) 2012 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/bc_xprt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/module.h>
+
+#include "nfs4_fs.h"
+#include "internal.h"
+#include "nfs4session.h"
+#include "callback.h"
+
+#define NFSDBG_FACILITY		NFSDBG_STATE
+
+/*
+ * nfs4_shrink_slot_table - free retired slots from the slot table
+ */
+static void nfs4_shrink_slot_table(struct nfs4_slot_table  *tbl, u32 newsize)
+{
+	struct nfs4_slot **p;
+	if (newsize >= tbl->max_slots)
+		return;
+
+	p = &tbl->slots;
+	while (newsize--)
+		p = &(*p)->next;
+	while (*p) {
+		struct nfs4_slot *slot = *p;
+
+		*p = slot->next;
+		kfree(slot);
+		tbl->max_slots--;
+	}
+}
+
+/*
+ * nfs4_free_slot - free a slot and efficiently update slot table.
+ *
+ * freeing a slot is trivially done by clearing its respective bit
+ * in the bitmap.
+ * If the freed slotid equals highest_used_slotid we want to update it
+ * so that the server would be able to size down the slot table if needed,
+ * otherwise we know that the highest_used_slotid is still in use.
+ * When updating highest_used_slotid there may be "holes" in the bitmap
+ * so we need to scan down from highest_used_slotid to 0 looking for the now
+ * highest slotid in use.
+ * If none found, highest_used_slotid is set to NFS4_NO_SLOT.
+ *
+ * Must be called while holding tbl->slot_tbl_lock
+ */
+void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
+{
+	u32 slotid = slot->slot_nr;
+
+	/* clear used bit in bitmap */
+	__clear_bit(slotid, tbl->used_slots);
+
+	/* update highest_used_slotid when it is freed */
+	if (slotid == tbl->highest_used_slotid) {
+		u32 new_max = find_last_bit(tbl->used_slots, slotid);
+		if (new_max < slotid)
+			tbl->highest_used_slotid = new_max;
+		else {
+			tbl->highest_used_slotid = NFS4_NO_SLOT;
+			nfs4_session_drain_complete(tbl->session, tbl);
+		}
+	}
+	dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
+		slotid, tbl->highest_used_slotid);
+}
+
+static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table  *tbl,
+		u32 slotid, u32 seq_init, gfp_t gfp_mask)
+{
+	struct nfs4_slot *slot;
+
+	slot = kzalloc(sizeof(*slot), gfp_mask);
+	if (slot) {
+		slot->table = tbl;
+		slot->slot_nr = slotid;
+		slot->seq_nr = seq_init;
+	}
+	return slot;
+}
+
+static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table  *tbl,
+		u32 slotid, u32 seq_init, gfp_t gfp_mask)
+{
+	struct nfs4_slot **p, *slot;
+
+	p = &tbl->slots;
+	for (;;) {
+		if (*p == NULL) {
+			*p = nfs4_new_slot(tbl, tbl->max_slots,
+					seq_init, gfp_mask);
+			if (*p == NULL)
+				break;
+			tbl->max_slots++;
+		}
+		slot = *p;
+		if (slot->slot_nr == slotid)
+			return slot;
+		p = &slot->next;
+	}
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * nfs4_alloc_slot - efficiently look for a free slot
+ *
+ * nfs4_alloc_slot looks for an unset bit in the used_slots bitmap.
+ * If found, we mark the slot as used, update the highest_used_slotid,
+ * and respectively set up the sequence operation args.
+ *
+ * Note: must be called with under the slot_tbl_lock.
+ */
+struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
+{
+	struct nfs4_slot *ret = ERR_PTR(-EBUSY);
+	u32 slotid;
+
+	dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
+		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
+		tbl->max_slotid + 1);
+	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
+	if (slotid > tbl->max_slotid)
+		goto out;
+	ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
+	if (IS_ERR(ret))
+		goto out;
+	__set_bit(slotid, tbl->used_slots);
+	if (slotid > tbl->highest_used_slotid ||
+			tbl->highest_used_slotid == NFS4_NO_SLOT)
+		tbl->highest_used_slotid = slotid;
+	ret->generation = tbl->generation;
+
+out:
+	dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
+		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
+		!IS_ERR(ret) ? ret->slot_nr : -1);
+	return ret;
+}
+
+static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl,
+		 u32 max_reqs, u32 ivalue)
+{
+	if (max_reqs <= tbl->max_slots)
+		return 0;
+	if (!IS_ERR(nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS)))
+		return 0;
+	return -ENOMEM;
+}
+
+static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl,
+		u32 server_highest_slotid,
+		u32 ivalue)
+{
+	struct nfs4_slot **p;
+
+	nfs4_shrink_slot_table(tbl, server_highest_slotid + 1);
+	p = &tbl->slots;
+	while (*p) {
+		(*p)->seq_nr = ivalue;
+		(*p)->interrupted = 0;
+		p = &(*p)->next;
+	}
+	tbl->highest_used_slotid = NFS4_NO_SLOT;
+	tbl->target_highest_slotid = server_highest_slotid;
+	tbl->server_highest_slotid = server_highest_slotid;
+	tbl->d_target_highest_slotid = 0;
+	tbl->d2_target_highest_slotid = 0;
+	tbl->max_slotid = server_highest_slotid;
+}
+
+/*
+ * (re)Initialise a slot table
+ */
+static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl,
+		u32 max_reqs, u32 ivalue)
+{
+	int ret;
+
+	dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
+		max_reqs, tbl->max_slots);
+
+	if (max_reqs > NFS4_MAX_SLOT_TABLE)
+		max_reqs = NFS4_MAX_SLOT_TABLE;
+
+	ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue);
+	if (ret)
+		goto out;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue);
+	spin_unlock(&tbl->slot_tbl_lock);
+
+	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
+		tbl, tbl->slots, tbl->max_slots);
+out:
+	dprintk("<-- %s: return %d\n", __func__, ret);
+	return ret;
+}
+
+/* Destroy the slot table */
+static void nfs4_destroy_slot_tables(struct nfs4_session *session)
+{
+	nfs4_shrink_slot_table(&session->fc_slot_table, 0);
+	nfs4_shrink_slot_table(&session->bc_slot_table, 0);
+}
+
+static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
+{
+	struct nfs4_sequence_args *args = task->tk_msg.rpc_argp;
+	struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
+	struct nfs4_slot *slot = pslot;
+	struct nfs4_slot_table *tbl = slot->table;
+
+	if (nfs4_session_draining(tbl->session) && !args->sa_privileged)
+		return false;
+	slot->generation = tbl->generation;
+	args->sa_slot = slot;
+	res->sr_timestamp = jiffies;
+	res->sr_slot = slot;
+	res->sr_status_flags = 0;
+	res->sr_status = 1;
+	return true;
+}
+
+static bool __nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot)
+{
+	if (rpc_wake_up_first(&tbl->slot_tbl_waitq, nfs41_assign_slot, slot))
+		return true;
+	return false;
+}
+
+bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot)
+{
+	if (slot->slot_nr > tbl->max_slotid)
+		return false;
+	return __nfs41_wake_and_assign_slot(tbl, slot);
+}
+
+static bool nfs41_try_wake_next_slot_table_entry(struct nfs4_slot_table *tbl)
+{
+	struct nfs4_slot *slot = nfs4_alloc_slot(tbl);
+	if (!IS_ERR(slot)) {
+		bool ret = __nfs41_wake_and_assign_slot(tbl, slot);
+		if (ret)
+			return ret;
+		nfs4_free_slot(tbl, slot);
+	}
+	return false;
+}
+
+void nfs41_wake_slot_table(struct nfs4_slot_table *tbl)
+{
+	for (;;) {
+		if (!nfs41_try_wake_next_slot_table_entry(tbl))
+			break;
+	}
+}
+
+static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl,
+		u32 target_highest_slotid)
+{
+	u32 max_slotid;
+
+	max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, target_highest_slotid);
+	if (max_slotid > tbl->server_highest_slotid)
+		max_slotid = tbl->server_highest_slotid;
+	if (max_slotid > tbl->target_highest_slotid)
+		max_slotid = tbl->target_highest_slotid;
+	tbl->max_slotid = max_slotid;
+	nfs41_wake_slot_table(tbl);
+}
+
+/* Update the client's idea of target_highest_slotid */
+static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
+		u32 target_highest_slotid)
+{
+	if (tbl->target_highest_slotid == target_highest_slotid)
+		return;
+	tbl->target_highest_slotid = target_highest_slotid;
+	tbl->generation++;
+}
+
+void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
+		u32 target_highest_slotid)
+{
+	spin_lock(&tbl->slot_tbl_lock);
+	nfs41_set_target_slotid_locked(tbl, target_highest_slotid);
+	tbl->d_target_highest_slotid = 0;
+	tbl->d2_target_highest_slotid = 0;
+	nfs41_set_max_slotid_locked(tbl, target_highest_slotid);
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
+static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl,
+		u32 highest_slotid)
+{
+	if (tbl->server_highest_slotid == highest_slotid)
+		return;
+	if (tbl->highest_used_slotid > highest_slotid)
+		return;
+	/* Deallocate slots */
+	nfs4_shrink_slot_table(tbl, highest_slotid + 1);
+	tbl->server_highest_slotid = highest_slotid;
+}
+
+static s32 nfs41_derivative_target_slotid(s32 s1, s32 s2)
+{
+	s1 -= s2;
+	if (s1 == 0)
+		return 0;
+	if (s1 < 0)
+		return (s1 - 1) >> 1;
+	return (s1 + 1) >> 1;
+}
+
+static int nfs41_sign_s32(s32 s1)
+{
+	if (s1 > 0)
+		return 1;
+	if (s1 < 0)
+		return -1;
+	return 0;
+}
+
+static bool nfs41_same_sign_or_zero_s32(s32 s1, s32 s2)
+{
+	if (!s1 || !s2)
+		return true;
+	return nfs41_sign_s32(s1) == nfs41_sign_s32(s2);
+}
+
+/* Try to eliminate outliers by checking for sharp changes in the
+ * derivatives and second derivatives
+ */
+static bool nfs41_is_outlier_target_slotid(struct nfs4_slot_table *tbl,
+		u32 new_target)
+{
+	s32 d_target, d2_target;
+	bool ret = true;
+
+	d_target = nfs41_derivative_target_slotid(new_target,
+			tbl->target_highest_slotid);
+	d2_target = nfs41_derivative_target_slotid(d_target,
+			tbl->d_target_highest_slotid);
+	/* Is first derivative same sign? */
+	if (nfs41_same_sign_or_zero_s32(d_target, tbl->d_target_highest_slotid))
+		ret = false;
+	/* Is second derivative same sign? */
+	if (nfs41_same_sign_or_zero_s32(d2_target, tbl->d2_target_highest_slotid))
+		ret = false;
+	tbl->d_target_highest_slotid = d_target;
+	tbl->d2_target_highest_slotid = d2_target;
+	return ret;
+}
+
+void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot,
+		struct nfs4_sequence_res *res)
+{
+	spin_lock(&tbl->slot_tbl_lock);
+	if (!nfs41_is_outlier_target_slotid(tbl, res->sr_target_highest_slotid))
+		nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid);
+	if (tbl->generation == slot->generation)
+		nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid);
+	nfs41_set_max_slotid_locked(tbl, res->sr_target_highest_slotid);
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
+/*
+ * Initialize or reset the forechannel and backchannel tables
+ */
+int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
+{
+	struct nfs4_slot_table *tbl;
+	int status;
+
+	dprintk("--> %s\n", __func__);
+	/* Fore channel */
+	tbl = &ses->fc_slot_table;
+	tbl->session = ses;
+	status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
+	if (status) /* -ENOMEM */
+		return status;
+	/* Back channel */
+	tbl = &ses->bc_slot_table;
+	tbl->session = ses;
+	status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
+	if (status && tbl->slots == NULL)
+		/* Fore and back channel share a connection so get
+		 * both slot tables or neither */
+		nfs4_destroy_slot_tables(ses);
+	return status;
+}
+
+struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
+{
+	struct nfs4_session *session;
+	struct nfs4_slot_table *tbl;
+
+	session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+	if (!session)
+		return NULL;
+
+	tbl = &session->fc_slot_table;
+	tbl->highest_used_slotid = NFS4_NO_SLOT;
+	spin_lock_init(&tbl->slot_tbl_lock);
+	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+	init_completion(&tbl->complete);
+
+	tbl = &session->bc_slot_table;
+	tbl->highest_used_slotid = NFS4_NO_SLOT;
+	spin_lock_init(&tbl->slot_tbl_lock);
+	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+	init_completion(&tbl->complete);
+
+	session->session_state = 1<<NFS4_SESSION_INITING;
+
+	session->clp = clp;
+	return session;
+}
+
+void nfs4_destroy_session(struct nfs4_session *session)
+{
+	struct rpc_xprt *xprt;
+	struct rpc_cred *cred;
+
+	cred = nfs4_get_exchange_id_cred(session->clp);
+	nfs4_proc_destroy_session(session, cred);
+	if (cred)
+		put_rpccred(cred);
+
+	rcu_read_lock();
+	xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
+	rcu_read_unlock();
+	dprintk("%s Destroy backchannel for xprt %p\n",
+		__func__, xprt);
+	xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
+	nfs4_destroy_slot_tables(session);
+	kfree(session);
+}
+
+/*
+ * With sessions, the client is not marked ready until after a
+ * successful EXCHANGE_ID and CREATE_SESSION.
+ *
+ * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
+ * other versions of NFS can be tried.
+ */
+static int nfs41_check_session_ready(struct nfs_client *clp)
+{
+	int ret;
+	
+	if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
+		ret = nfs4_client_recover_expired_lease(clp);
+		if (ret)
+			return ret;
+	}
+	if (clp->cl_cons_state < NFS_CS_READY)
+		return -EPROTONOSUPPORT;
+	smp_rmb();
+	return 0;
+}
+
+int nfs4_init_session(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_session *session;
+	unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE;
+	unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE;
+
+	if (!nfs4_has_session(clp))
+		return 0;
+
+	if (server->rsize != 0)
+		target_max_resp_sz = server->rsize;
+	target_max_resp_sz += nfs41_maxread_overhead;
+
+	if (server->wsize != 0)
+		target_max_rqst_sz = server->wsize;
+	target_max_rqst_sz += nfs41_maxwrite_overhead;
+
+	session = clp->cl_session;
+	spin_lock(&clp->cl_lock);
+	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
+		/* Initialise targets and channel attributes */
+		session->fc_target_max_rqst_sz = target_max_rqst_sz;
+		session->fc_attrs.max_rqst_sz = target_max_rqst_sz;
+		session->fc_target_max_resp_sz = target_max_resp_sz;
+		session->fc_attrs.max_resp_sz = target_max_resp_sz;
+	} else {
+		/* Just adjust the targets */
+		if (target_max_rqst_sz > session->fc_target_max_rqst_sz) {
+			session->fc_target_max_rqst_sz = target_max_rqst_sz;
+			set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+		}
+		if (target_max_resp_sz > session->fc_target_max_resp_sz) {
+			session->fc_target_max_resp_sz = target_max_resp_sz;
+			set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+		}
+	}
+	spin_unlock(&clp->cl_lock);
+
+	if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+		nfs4_schedule_lease_recovery(clp);
+
+	return nfs41_check_session_ready(clp);
+}
+
+int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
+{
+	struct nfs4_session *session = clp->cl_session;
+	int ret;
+
+	spin_lock(&clp->cl_lock);
+	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
+		/*
+		 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
+		 * DS lease to be equal to the MDS lease.
+		 */
+		clp->cl_lease_time = lease_time;
+		clp->cl_last_renewal = jiffies;
+	}
+	spin_unlock(&clp->cl_lock);
+
+	ret = nfs41_check_session_ready(clp);
+	if (ret)
+		return ret;
+	/* Test for the DS role */
+	if (!is_ds_client(clp))
+		return -ENODEV;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
+
+

diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
new file mode 100644
index 0000000..6f3cb39
--- /dev/null
+++ b/fs/nfs/nfs4session.h

@@ -0,0 +1,142 @@
+/*
+ * fs/nfs/nfs4session.h
+ *
+ * Copyright (c) 2012 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ */
+#ifndef __LINUX_FS_NFS_NFS4SESSION_H
+#define __LINUX_FS_NFS_NFS4SESSION_H
+
+/* maximum number of slots to use */
+#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
+#define NFS4_MAX_SLOT_TABLE (1024U)
+#define NFS4_NO_SLOT ((u32)-1)
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+
+/* Sessions slot seqid */
+struct nfs4_slot {
+	struct nfs4_slot_table	*table;
+	struct nfs4_slot	*next;
+	unsigned long		generation;
+	u32			slot_nr;
+	u32		 	seq_nr;
+	unsigned int		interrupted : 1;
+};
+
+/* Sessions */
+#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
+struct nfs4_slot_table {
+	struct nfs4_session *session;		/* Parent session */
+	struct nfs4_slot *slots;		/* seqid per slot */
+	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
+	spinlock_t	slot_tbl_lock;
+	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
+	u32		max_slots;		/* # slots in table */
+	u32		max_slotid;		/* Max allowed slotid value */
+	u32		highest_used_slotid;	/* sent to server on each SEQ.
+						 * op for dynamic resizing */
+	u32		target_highest_slotid;	/* Server max_slot target */
+	u32		server_highest_slotid;	/* Server highest slotid */
+	s32		d_target_highest_slotid; /* Derivative */
+	s32		d2_target_highest_slotid; /* 2nd derivative */
+	unsigned long	generation;		/* Generation counter for
+						   target_highest_slotid */
+	struct completion complete;
+};
+
+/*
+ * Session related parameters
+ */
+struct nfs4_session {
+	struct nfs4_sessionid		sess_id;
+	u32				flags;
+	unsigned long			session_state;
+	u32				hash_alg;
+	u32				ssv_len;
+
+	/* The fore and back channel */
+	struct nfs4_channel_attrs	fc_attrs;
+	struct nfs4_slot_table		fc_slot_table;
+	struct nfs4_channel_attrs	bc_attrs;
+	struct nfs4_slot_table		bc_slot_table;
+	struct nfs_client		*clp;
+	/* Create session arguments */
+	unsigned int			fc_target_max_rqst_sz;
+	unsigned int			fc_target_max_resp_sz;
+};
+
+enum nfs4_session_state {
+	NFS4_SESSION_INITING,
+	NFS4_SESSION_DRAINING,
+};
+
+#if defined(CONFIG_NFS_V4_1)
+extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
+extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
+
+extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
+		u32 target_highest_slotid);
+extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot,
+		struct nfs4_sequence_res *res);
+
+extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses);
+
+extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
+extern void nfs4_destroy_session(struct nfs4_session *session);
+extern int nfs4_init_session(struct nfs_server *server);
+extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
+
+extern void nfs4_session_drain_complete(struct nfs4_session *session,
+		struct nfs4_slot_table *tbl);
+
+static inline bool nfs4_session_draining(struct nfs4_session *session)
+{
+	return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state);
+}
+
+bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot);
+void nfs41_wake_slot_table(struct nfs4_slot_table *tbl);
+
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+	if (clp->cl_session)
+		return 1;
+	return 0;
+}
+
+static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
+{
+	if (nfs4_has_session(clp))
+		return (clp->cl_session->flags & SESSION4_PERSIST);
+	return 0;
+}
+
+#else /* defined(CONFIG_NFS_V4_1) */
+
+static inline int nfs4_init_session(struct nfs_server *server)
+{
+	return 0;
+}
+
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+	return 0;
+}
+
+static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
+{
+	return 0;
+}
+
+#endif /* defined(CONFIG_NFS_V4_1) */
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+#endif /* __LINUX_FS_NFS_NFS4SESSION_H */

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c351e6b..9448c57 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c

@@ -57,6 +57,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
 
@@ -66,7 +67,6 @@
 
 const nfs4_stateid zero_stateid;
 static DEFINE_MUTEX(nfs_clid_init_mutex);
-static LIST_HEAD(nfs4_clientid_list);
 
 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
@@ -254,24 +254,27 @@
 {
 	struct nfs4_session *ses = clp->cl_session;
 	struct nfs4_slot_table *tbl;
-	int max_slots;
 
 	if (ses == NULL)
 		return;
 	tbl = &ses->fc_slot_table;
 	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
 		spin_lock(&tbl->slot_tbl_lock);
-		max_slots = tbl->max_slots;
-		while (max_slots--) {
-			if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
-						nfs4_set_task_privileged,
-						NULL) == NULL)
-				break;
-		}
+		nfs41_wake_slot_table(tbl);
 		spin_unlock(&tbl->slot_tbl_lock);
 	}
 }
 
+/*
+ * Signal state manager thread if session fore channel is drained
+ */
+void nfs4_session_drain_complete(struct nfs4_session *session,
+		struct nfs4_slot_table *tbl)
+{
+	if (nfs4_session_draining(session))
+		complete(&tbl->complete);
+}
+
 static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
 {
 	spin_lock(&tbl->slot_tbl_lock);
@@ -303,7 +306,6 @@
 	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
 	/* create_session negotiated new slot table */
-	clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
 	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
 	nfs41_setup_state_renewal(clp);
 }
@@ -1086,7 +1088,6 @@
  */
 static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 {
-	BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid);
 	switch (status) {
 		case 0:
 			break;
@@ -1209,6 +1210,40 @@
 }
 EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
 
+int nfs4_wait_clnt_recover(struct nfs_client *clp)
+{
+	int res;
+
+	might_sleep();
+
+	res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
+			nfs_wait_bit_killable, TASK_KILLABLE);
+	if (res)
+		return res;
+
+	if (clp->cl_cons_state < 0)
+		return clp->cl_cons_state;
+	return 0;
+}
+
+int nfs4_client_recover_expired_lease(struct nfs_client *clp)
+{
+	unsigned int loop;
+	int ret;
+
+	for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
+		ret = nfs4_wait_clnt_recover(clp);
+		if (ret != 0)
+			break;
+		if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
+		    !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
+			break;
+		nfs4_schedule_state_manager(clp);
+		ret = -EIO;
+	}
+	return ret;
+}
+
 /*
  * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
  * @clp: client to process
@@ -1401,14 +1436,6 @@
 				/* Mark the file as being 'closed' */
 				state->state = 0;
 				break;
-			case -EKEYEXPIRED:
-				/*
-				 * User RPCSEC_GSS context has expired.
-				 * We cannot recover this stateid now, so
-				 * skip it and allow recovery thread to
-				 * proceed.
-				 */
-				break;
 			case -NFS4ERR_ADMIN_REVOKED:
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_BAD_STATEID:
@@ -1561,14 +1588,6 @@
 	nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
 }
 
-static void nfs4_warn_keyexpired(const char *s)
-{
-	printk_ratelimited(KERN_WARNING "Error: state manager"
-			" encountered RPCSEC_GSS session"
-			" expired against NFSv4 server %s.\n",
-			s);
-}
-
 static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
 {
 	switch (error) {
@@ -1602,10 +1621,6 @@
 		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
 			set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
 			break;
-		case -EKEYEXPIRED:
-			/* Nothing we can do */
-			nfs4_warn_keyexpired(clp->cl_hostname);
-			break;
 		default:
 			dprintk("%s: failed to handle error %d for server %s\n",
 					__func__, error, clp->cl_hostname);
@@ -1722,8 +1737,6 @@
 		dprintk("%s: exit with error %d for server %s\n",
 				__func__, -EPROTONOSUPPORT, clp->cl_hostname);
 		return -EPROTONOSUPPORT;
-	case -EKEYEXPIRED:
-		nfs4_warn_keyexpired(clp->cl_hostname);
 	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
 				 * in nfs4_exchange_id */
 	default:
@@ -1876,7 +1889,6 @@
 		break;
 
 	case -EKEYEXPIRED:
-		nfs4_warn_keyexpired(clp->cl_hostname);
 	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
 				 * in nfs4_exchange_id */
 		status = -EKEYEXPIRED;
@@ -1907,14 +1919,23 @@
 }
 EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
 
-void nfs41_handle_recall_slot(struct nfs_client *clp)
+static void nfs41_ping_server(struct nfs_client *clp)
 {
-	set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
-	dprintk("%s: scheduling slot recall for server %s\n", __func__,
-			clp->cl_hostname);
+	/* Use CHECK_LEASE to ping the server with a SEQUENCE */
+	set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
 	nfs4_schedule_state_manager(clp);
 }
 
+void nfs41_server_notify_target_slotid_update(struct nfs_client *clp)
+{
+	nfs41_ping_server(clp);
+}
+
+void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp)
+{
+	nfs41_ping_server(clp);
+}
+
 static void nfs4_reset_all_state(struct nfs_client *clp)
 {
 	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
@@ -2024,35 +2045,6 @@
 	return status;
 }
 
-static int nfs4_recall_slot(struct nfs_client *clp)
-{
-	struct nfs4_slot_table *fc_tbl;
-	struct nfs4_slot *new, *old;
-	int i;
-
-	if (!nfs4_has_session(clp))
-		return 0;
-	nfs4_begin_drain_session(clp);
-	fc_tbl = &clp->cl_session->fc_slot_table;
-	new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
-		      GFP_NOFS);
-        if (!new)
-		return -ENOMEM;
-
-	spin_lock(&fc_tbl->slot_tbl_lock);
-	for (i = 0; i < fc_tbl->target_max_slots; i++)
-		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
-	old = fc_tbl->slots;
-	fc_tbl->slots = new;
-	fc_tbl->max_slots = fc_tbl->target_max_slots;
-	fc_tbl->target_max_slots = 0;
-	clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
-	spin_unlock(&fc_tbl->slot_tbl_lock);
-
-	kfree(old);
-	return 0;
-}
-
 static int nfs4_bind_conn_to_session(struct nfs_client *clp)
 {
 	struct rpc_cred *cred;
@@ -2083,7 +2075,6 @@
 #else /* CONFIG_NFS_V4_1 */
 static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
 static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
-static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
 
 static int nfs4_bind_conn_to_session(struct nfs_client *clp)
 {
@@ -2115,15 +2106,6 @@
 			continue;
 		}
 
-		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
-			section = "check lease";
-			status = nfs4_check_lease(clp);
-			if (status < 0)
-				goto out_error;
-			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
-				continue;
-		}
-
 		/* Initialize or reset the session */
 		if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) {
 			section = "reset session";
@@ -2144,10 +2126,9 @@
 			continue;
 		}
 
-		/* Recall session slots */
-		if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)) {
-			section = "recall slot";
-			status = nfs4_recall_slot(clp);
+		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+			section = "check lease";
+			status = nfs4_check_lease(clp);
 			if (status < 0)
 				goto out_error;
 			continue;

diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index bd61221..84d2e9e 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c

@@ -51,6 +51,7 @@
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
 	.write_inode	= nfs4_write_inode,
+	.drop_inode	= nfs_drop_inode,
 	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
 	.evict_inode	= nfs4_evict_inode,

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 40836ee..26b1439 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c

@@ -56,6 +56,7 @@
 
 #include "nfs4_fs.h"
 #include "internal.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
 
@@ -270,6 +271,8 @@
 
 #if defined(CONFIG_NFS_V4_1)
 #define NFS4_MAX_MACHINE_NAME_LEN (64)
+#define IMPL_NAME_LIMIT (sizeof(utsname()->sysname) + sizeof(utsname()->release) + \
+			 sizeof(utsname()->version) + sizeof(utsname()->machine) + 8)
 
 #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
 				encode_verifier_maxsz + \
@@ -282,7 +285,7 @@
 				1 /* nii_domain */ + \
 				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
 				1 /* nii_name */ + \
-				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+				XDR_QUADLEN(IMPL_NAME_LIMIT) + \
 				3 /* nii_date */)
 #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
 				2 /* eir_clientid */ + \
@@ -936,7 +939,7 @@
 	 * but this is not required as a MUST for the server to do so. */
 	hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
 
-	BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
+	WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN);
 	encode_string(xdr, hdr->taglen, hdr->tag);
 	p = reserve_space(xdr, 8);
 	*p++ = cpu_to_be32(hdr->minorversion);
@@ -955,7 +958,7 @@
 
 static void encode_nops(struct compound_hdr *hdr)
 {
-	BUG_ON(hdr->nops > NFS4_MAX_OPS);
+	WARN_ON_ONCE(hdr->nops > NFS4_MAX_OPS);
 	*hdr->nops_p = htonl(hdr->nops);
 }
 
@@ -1403,7 +1406,6 @@
 		*p = cpu_to_be32(NFS4_OPEN_NOCREATE);
 		break;
 	default:
-		BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
 		*p = cpu_to_be32(NFS4_OPEN_CREATE);
 		encode_createmode(xdr, arg);
 	}
@@ -1621,7 +1623,6 @@
 	p = reserve_space(xdr, 2*4);
 	*p++ = cpu_to_be32(1);
 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
-	BUG_ON(arg->acl_len % 4);
 	p = reserve_space(xdr, 4);
 	*p = cpu_to_be32(arg->acl_len);
 	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
@@ -1713,7 +1714,7 @@
 			       struct compound_hdr *hdr)
 {
 	__be32 *p;
-	char impl_name[NFS4_OPAQUE_LIMIT];
+	char impl_name[IMPL_NAME_LIMIT];
 	int len = 0;
 
 	encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
@@ -1728,7 +1729,7 @@
 	if (send_implementation_id &&
 	    sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 &&
 	    sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN)
-		<= NFS4_OPAQUE_LIMIT + 1)
+		<= sizeof(impl_name) + 1)
 		len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s",
 			       utsname()->sysname, utsname()->release,
 			       utsname()->version, utsname()->machine);
@@ -1835,18 +1836,16 @@
 			    struct compound_hdr *hdr)
 {
 #if defined(CONFIG_NFS_V4_1)
-	struct nfs4_session *session = args->sa_session;
+	struct nfs4_session *session;
 	struct nfs4_slot_table *tp;
-	struct nfs4_slot *slot;
+	struct nfs4_slot *slot = args->sa_slot;
 	__be32 *p;
 
-	if (!session)
+	if (slot == NULL)
 		return;
 
-	tp = &session->fc_slot_table;
-
-	WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
-	slot = tp->slots + args->sa_slotid;
+	tp = slot->table;
+	session = tp->session;
 
 	encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
 
@@ -1860,12 +1859,12 @@
 		((u32 *)session->sess_id.data)[1],
 		((u32 *)session->sess_id.data)[2],
 		((u32 *)session->sess_id.data)[3],
-		slot->seq_nr, args->sa_slotid,
+		slot->seq_nr, slot->slot_nr,
 		tp->highest_used_slotid, args->sa_cache_this);
 	p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16);
 	p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
 	*p++ = cpu_to_be32(slot->seq_nr);
-	*p++ = cpu_to_be32(args->sa_slotid);
+	*p++ = cpu_to_be32(slot->slot_nr);
 	*p++ = cpu_to_be32(tp->highest_used_slotid);
 	*p = cpu_to_be32(args->sa_cache_this);
 #endif /* CONFIG_NFS_V4_1 */
@@ -2027,8 +2026,9 @@
 static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
 {
 #if defined(CONFIG_NFS_V4_1)
-	if (args->sa_session)
-		return args->sa_session->clp->cl_mvops->minor_version;
+
+	if (args->sa_slot)
+		return args->sa_slot->table->session->clp->cl_mvops->minor_version;
 #endif /* CONFIG_NFS_V4_1 */
 	return 0;
 }
@@ -5509,12 +5509,13 @@
 			   struct rpc_rqst *rqstp)
 {
 #if defined(CONFIG_NFS_V4_1)
+	struct nfs4_session *session;
 	struct nfs4_sessionid id;
 	u32 dummy;
 	int status;
 	__be32 *p;
 
-	if (!res->sr_session)
+	if (res->sr_slot == NULL)
 		return 0;
 
 	status = decode_op_hdr(xdr, OP_SEQUENCE);
@@ -5528,8 +5529,9 @@
 	 * sequence number, the server is looney tunes.
 	 */
 	status = -EREMOTEIO;
+	session = res->sr_slot->table->session;
 
-	if (memcmp(id.data, res->sr_session->sess_id.data,
+	if (memcmp(id.data, session->sess_id.data,
 		   NFS4_MAX_SESSIONID_LEN)) {
 		dprintk("%s Invalid session id\n", __func__);
 		goto out_err;
@@ -5547,14 +5549,14 @@
 	}
 	/* slot id */
 	dummy = be32_to_cpup(p++);
-	if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
+	if (dummy != res->sr_slot->slot_nr) {
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
-	/* highest slot id - currently not processed */
-	dummy = be32_to_cpup(p++);
-	/* target highest slot id - currently not processed */
-	dummy = be32_to_cpup(p++);
+	/* highest slot id */
+	res->sr_highest_slotid = be32_to_cpup(p++);
+	/* target highest slot id */
+	res->sr_target_highest_slotid = be32_to_cpup(p++);
 	/* result flags */
 	res->sr_status_flags = be32_to_cpup(p);
 	status = 0;

diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 8746135..a9ebd81 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c

@@ -148,17 +148,6 @@
 	return end >= start ? end : NFS4_MAX_UINT64;
 }
 
-/* last octet in a range */
-static inline u64
-last_byte_offset(u64 start, u64 len)
-{
-	u64 end;
-
-	BUG_ON(!len);
-	end = start + len;
-	return end > start ? end - 1 : NFS4_MAX_UINT64;
-}
-
 static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
 			   struct page ***p_pages, unsigned *p_pgbase,
 			   u64 offset, unsigned long count)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2878f97..e7165d9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c

@@ -369,17 +369,6 @@
 	return end >= start ? end : NFS4_MAX_UINT64;
 }
 
-/* last octet in a range */
-static inline u64
-last_byte_offset(u64 start, u64 len)
-{
-	u64 end;
-
-	BUG_ON(!len);
-	end = start + len;
-	return end > start ? end - 1 : NFS4_MAX_UINT64;
-}
-
 /*
  * is l2 fully contained in l1?
  *   start1                             end1
@@ -645,7 +634,6 @@
 
 	dprintk("--> %s\n", __func__);
 
-	BUG_ON(ctx == NULL);
 	lgp = kzalloc(sizeof(*lgp), gfp_flags);
 	if (lgp == NULL)
 		return NULL;
@@ -1126,7 +1114,6 @@
 		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
 		 */
 		spin_lock(&clp->cl_lock);
-		BUG_ON(!list_empty(&lo->plh_layouts));
 		list_add_tail(&lo->plh_layouts, &server->layouts);
 		spin_unlock(&clp->cl_lock);
 	}
@@ -1222,7 +1209,7 @@
 {
 	u64 rd_size = req->wb_bytes;
 
-	BUG_ON(pgio->pg_lseg != NULL);
+	WARN_ON_ONCE(pgio->pg_lseg != NULL);
 
 	if (req->wb_offset != req->wb_pgbase) {
 		nfs_pageio_reset_read_mds(pgio);
@@ -1251,7 +1238,7 @@
 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			   struct nfs_page *req, u64 wb_size)
 {
-	BUG_ON(pgio->pg_lseg != NULL);
+	WARN_ON_ONCE(pgio->pg_lseg != NULL);
 
 	if (req->wb_offset != req->wb_pgbase) {
 		nfs_pageio_reset_write_mds(pgio);

diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 50a88c3..f084dac 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c

@@ -47,39 +47,6 @@
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 /*
- * wrapper to handle the -EKEYEXPIRED error message. This should generally
- * only happen if using krb5 auth and a user's TGT expires. NFSv2 doesn't
- * support the NFSERR_JUKEBOX error code, but we handle this situation in the
- * same way that we handle that error with NFSv3.
- */
-static int
-nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
-{
-	int res;
-	do {
-		res = rpc_call_sync(clnt, msg, flags);
-		if (res != -EKEYEXPIRED)
-			break;
-		freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
-		res = -ERESTARTSYS;
-	} while (!fatal_signal_pending(current));
-	return res;
-}
-
-#define rpc_call_sync(clnt, msg, flags)	nfs_rpc_wrapper(clnt, msg, flags)
-
-static int
-nfs_async_handle_expired_key(struct rpc_task *task)
-{
-	if (task->tk_status != -EKEYEXPIRED)
-		return 0;
-	task->tk_status = 0;
-	rpc_restart_call(task);
-	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
-	return 1;
-}
-
-/*
  * Bare-bones access to getattr: this is for nfs_read_super.
  */
 static int
@@ -364,8 +331,6 @@
 
 static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 {
-	if (nfs_async_handle_expired_key(task))
-		return 0;
 	nfs_mark_for_revalidate(dir);
 	return 1;
 }
@@ -385,8 +350,6 @@
 nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 		     struct inode *new_dir)
 {
-	if (nfs_async_handle_expired_key(task))
-		return 0;
 	nfs_mark_for_revalidate(old_dir);
 	nfs_mark_for_revalidate(new_dir);
 	return 1;
@@ -642,9 +605,6 @@
 {
 	struct inode *inode = data->header->inode;
 
-	if (nfs_async_handle_expired_key(task))
-		return -EAGAIN;
-
 	nfs_invalidate_atime(inode);
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(inode, data->res.fattr);
@@ -671,9 +631,6 @@
 {
 	struct inode *inode = data->header->inode;
 
-	if (nfs_async_handle_expired_key(task))
-		return -EAGAIN;
-
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
 	return 0;

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 652d3f7..c25cadf8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c

@@ -64,6 +64,7 @@
 #include "iostat.h"
 #include "internal.h"
 #include "fscache.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "nfs.h"
 
@@ -307,6 +308,7 @@
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
 	.write_inode	= nfs_write_inode,
+	.drop_inode	= nfs_drop_inode,
 	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
 	.evict_inode	= nfs_evict_inode,
@@ -2373,19 +2375,30 @@
 				 struct nfs_parsed_mount_data *parsed,
 				 struct nfs_clone_mount *cloned)
 {
+	struct nfs_server *nfss = NFS_SB(sb);
 	char *uniq = NULL;
 	int ulen = 0;
 
-	if (parsed && parsed->fscache_uniq) {
-		uniq = parsed->fscache_uniq;
-		ulen = strlen(parsed->fscache_uniq);
+	nfss->fscache_key = NULL;
+	nfss->fscache = NULL;
+
+	if (parsed) {
+		if (!(parsed->options & NFS_OPTION_FSCACHE))
+			return;
+		if (parsed->fscache_uniq) {
+			uniq = parsed->fscache_uniq;
+			ulen = strlen(parsed->fscache_uniq);
+		}
 	} else if (cloned) {
 		struct nfs_server *mnt_s = NFS_SB(cloned->sb);
+		if (!(mnt_s->options & NFS_OPTION_FSCACHE))
+			return;
 		if (mnt_s->fscache_key) {
 			uniq = mnt_s->fscache_key->key.uniquifier;
 			ulen = mnt_s->fscache_key->key.uniq_len;
 		};
-	}
+	} else
+		return;
 
 	nfs_fscache_get_super_cookie(sb, uniq, ulen);
 }

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9347ab7..b673be3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c

@@ -202,7 +202,6 @@
 /* A writeback failed: mark the page as bad, and invalidate the page cache */
 static void nfs_set_pageerror(struct page *page)
 {
-	SetPageError(page);
 	nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
 }
 
@@ -239,21 +238,18 @@
 #define NFS_CONGESTION_OFF_THRESH	\
 	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
 
-static int nfs_set_page_writeback(struct page *page)
+static void nfs_set_page_writeback(struct page *page)
 {
+	struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host);
 	int ret = test_set_page_writeback(page);
 
-	if (!ret) {
-		struct inode *inode = page_file_mapping(page)->host;
-		struct nfs_server *nfss = NFS_SERVER(inode);
+	WARN_ON_ONCE(ret != 0);
 
-		if (atomic_long_inc_return(&nfss->writeback) >
-				NFS_CONGESTION_ON_THRESH) {
-			set_bdi_congested(&nfss->backing_dev_info,
-						BLK_RW_ASYNC);
-		}
+	if (atomic_long_inc_return(&nfss->writeback) >
+			NFS_CONGESTION_ON_THRESH) {
+		set_bdi_congested(&nfss->backing_dev_info,
+					BLK_RW_ASYNC);
 	}
-	return ret;
 }
 
 static void nfs_end_page_writeback(struct page *page)
@@ -315,10 +311,10 @@
 	if (IS_ERR(req))
 		goto out;
 
-	ret = nfs_set_page_writeback(page);
-	BUG_ON(ret != 0);
-	BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
+	nfs_set_page_writeback(page);
+	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
 
+	ret = 0;
 	if (!nfs_pageio_add_request(pgio, req)) {
 		nfs_redirty_request(req);
 		ret = pgio->pg_error;
@@ -451,8 +447,6 @@
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	BUG_ON (!NFS_WBACK_BUSY(req));
-
 	spin_lock(&inode->i_lock);
 	if (likely(!PageSwapCache(req->wb_page))) {
 		set_page_private(req->wb_page, 0);
@@ -884,7 +878,7 @@
 {
 	if (nfs_have_delegated_attributes(inode))
 		goto out;
-	if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
+	if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
 		return false;
 out:
 	return PageUptodate(page) != 0;
@@ -1727,7 +1721,6 @@
 	struct nfs_page *req;
 	int ret = 0;
 
-	BUG_ON(!PageLocked(page));
 	for (;;) {
 		wait_on_page_writeback(page);
 		req = nfs_page_find_request(page);
@@ -1801,7 +1794,8 @@
 	if (PagePrivate(page))
 		return -EBUSY;
 
-	nfs_fscache_release_page(page, GFP_KERNEL);
+	if (!nfs_fscache_release_page(page, GFP_KERNEL))
+		return -EBUSY;
 
 	return migrate_page(mapping, newpage, page, mode);
 }
@@ -1829,7 +1823,7 @@
 		goto out_destroy_write_mempool;
 
 	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
-						      nfs_wdata_cachep);
+						      nfs_cdata_cachep);
 	if (nfs_commit_mempool == NULL)
 		goto out_destroy_commit_cache;
 

diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index e6c3815..e761ee9 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c

@@ -8,61 +8,144 @@
 #include <linux/fs.h>
 #include <linux/debugfs.h>
 #include <linux/module.h>
+#include <linux/nsproxy.h>
+#include <linux/sunrpc/clnt.h>
+#include <asm/uaccess.h>
 
 #include "state.h"
-#include "fault_inject.h"
+#include "netns.h"
 
 struct nfsd_fault_inject_op {
 	char *file;
-	void (*func)(u64);
+	u64 (*forget)(struct nfs4_client *, u64);
+	u64 (*print)(struct nfs4_client *, u64);
 };
 
 static struct nfsd_fault_inject_op inject_ops[] = {
 	{
 		.file   = "forget_clients",
-		.func   = nfsd_forget_clients,
+		.forget = nfsd_forget_client,
+		.print  = nfsd_print_client,
 	},
 	{
 		.file   = "forget_locks",
-		.func   = nfsd_forget_locks,
+		.forget = nfsd_forget_client_locks,
+		.print  = nfsd_print_client_locks,
 	},
 	{
 		.file   = "forget_openowners",
-		.func   = nfsd_forget_openowners,
+		.forget = nfsd_forget_client_openowners,
+		.print  = nfsd_print_client_openowners,
 	},
 	{
 		.file   = "forget_delegations",
-		.func   = nfsd_forget_delegations,
+		.forget = nfsd_forget_client_delegations,
+		.print  = nfsd_print_client_delegations,
 	},
 	{
 		.file   = "recall_delegations",
-		.func   = nfsd_recall_delegations,
+		.forget = nfsd_recall_client_delegations,
+		.print  = nfsd_print_client_delegations,
 	},
 };
 
 static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
 static struct dentry *debug_dir;
 
-static int nfsd_inject_set(void *op_ptr, u64 val)
+static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val)
 {
-	struct nfsd_fault_inject_op *op = op_ptr;
+	u64 count = 0;
 
 	if (val == 0)
 		printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
 	else
 		printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
 
-	op->func(val);
-	return 0;
+	nfs4_lock_state();
+	count = nfsd_for_n_state(val, op->forget);
+	nfs4_unlock_state();
+	printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
 }
 
-static int nfsd_inject_get(void *data, u64 *val)
+static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op,
+				   struct sockaddr_storage *addr,
+				   size_t addr_size)
 {
-	*val = 0;
-	return 0;
+	char buf[INET6_ADDRSTRLEN];
+	struct nfs4_client *clp;
+	u64 count;
+
+	nfs4_lock_state();
+	clp = nfsd_find_client(addr, addr_size);
+	if (clp) {
+		count = op->forget(clp, 0);
+		rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
+		printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count);
+	}
+	nfs4_unlock_state();
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
+static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val)
+{
+	nfs4_lock_state();
+	*val = nfsd_for_n_state(0, op->print);
+	nfs4_unlock_state();
+}
+
+static ssize_t fault_inject_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	static u64 val;
+	char read_buf[25];
+	size_t size, ret;
+	loff_t pos = *ppos;
+
+	if (!pos)
+		nfsd_inject_get(file->f_dentry->d_inode->i_private, &val);
+	size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= size || !len)
+		return 0;
+	if (len > size - pos)
+		len = size - pos;
+	ret = copy_to_user(buf, read_buf + pos, len);
+	if (ret == len)
+		return -EFAULT;
+	len -= ret;
+	*ppos = pos + len;
+	return len;
+}
+
+static ssize_t fault_inject_write(struct file *file, const char __user *buf,
+				  size_t len, loff_t *ppos)
+{
+	char write_buf[INET6_ADDRSTRLEN];
+	size_t size = min(sizeof(write_buf) - 1, len);
+	struct net *net = current->nsproxy->net_ns;
+	struct sockaddr_storage sa;
+	u64 val;
+
+	if (copy_from_user(write_buf, buf, size))
+		return -EFAULT;
+	write_buf[size] = '\0';
+
+	size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
+	if (size > 0)
+		nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size);
+	else {
+		val = simple_strtoll(write_buf, NULL, 0);
+		nfsd_inject_set(file->f_dentry->d_inode->i_private, val);
+	}
+	return len; /* on success, claim we got the whole input */
+}
+
+static const struct file_operations fops_nfsd = {
+	.owner   = THIS_MODULE,
+	.read    = fault_inject_read,
+	.write   = fault_inject_write,
+};
 
 void nfsd_fault_inject_cleanup(void)
 {

diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h
deleted file mode 100644
index 90bd057..0000000
--- a/fs/nfsd/fault_inject.h
+++ /dev/null

@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
- *
- * Function definitions for fault injection
- */
-
-#ifndef LINUX_NFSD_FAULT_INJECT_H
-#define LINUX_NFSD_FAULT_INJECT_H
-
-#ifdef CONFIG_NFSD_FAULT_INJECTION
-int nfsd_fault_inject_init(void);
-void nfsd_fault_inject_cleanup(void);
-void nfsd_forget_clients(u64);
-void nfsd_forget_locks(u64);
-void nfsd_forget_openowners(u64);
-void nfsd_forget_delegations(u64);
-void nfsd_recall_delegations(u64);
-#else /* CONFIG_NFSD_FAULT_INJECTION */
-static inline int nfsd_fault_inject_init(void) { return 0; }
-static inline void nfsd_fault_inject_cleanup(void) {}
-static inline void nfsd_forget_clients(u64 num) {}
-static inline void nfsd_forget_locks(u64 num) {}
-static inline void nfsd_forget_openowners(u64 num) {}
-static inline void nfsd_forget_delegations(u64 num) {}
-static inline void nfsd_recall_delegations(u64 num) {}
-#endif /* CONFIG_NFSD_FAULT_INJECTION */
-
-#endif /* LINUX_NFSD_FAULT_INJECT_H */

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 65c2431..1051beb 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h

@@ -24,7 +24,18 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
+/* Hash tables for nfs4_clientid state */
+#define CLIENT_HASH_BITS                 4
+#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
+#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
+
+#define LOCKOWNER_INO_HASH_BITS		8
+#define LOCKOWNER_INO_HASH_SIZE		(1 << LOCKOWNER_INO_HASH_BITS)
+
+#define SESSION_HASH_SIZE	512
+
 struct cld_net;
+struct nfsd4_client_tracking_ops;
 
 struct nfsd_net {
 	struct cld_net *cld_net;
@@ -38,7 +49,62 @@
 	struct lock_manager nfsd4_manager;
 	bool grace_ended;
 	time_t boot_time;
+
+	/*
+	 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+	 * used in reboot/reset lease grace period processing
+	 *
+	 * conf_id_hashtbl[], and conf_name_tree hold confirmed
+	 * setclientid_confirmed info.
+	 *
+	 * unconf_str_hastbl[] and unconf_name_tree hold unconfirmed
+	 * setclientid info.
+	 */
+	struct list_head *reclaim_str_hashtbl;
+	int reclaim_str_hashtbl_size;
+	struct list_head *conf_id_hashtbl;
+	struct rb_root conf_name_tree;
+	struct list_head *unconf_id_hashtbl;
+	struct rb_root unconf_name_tree;
+	struct list_head *ownerstr_hashtbl;
+	struct list_head *lockowner_ino_hashtbl;
+	struct list_head *sessionid_hashtbl;
+	/*
+	 * client_lru holds client queue ordered by nfs4_client.cl_time
+	 * for lease renewal.
+	 *
+	 * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
+	 * for last close replay.
+	 *
+	 * All of the above fields are protected by the client_mutex.
+	 */
+	struct list_head client_lru;
+	struct list_head close_lru;
+
+	struct delayed_work laundromat_work;
+
+	/* client_lock protects the client lru list and session hash table */
+	spinlock_t client_lock;
+
+	struct file *rec_file;
+	bool in_grace;
+	struct nfsd4_client_tracking_ops *client_tracking_ops;
+
+	time_t nfsd4_lease;
+	time_t nfsd4_grace;
+
+	bool nfsd_net_up;
+
+	/*
+	 * Time of server startup
+	 */
+	struct timeval nfssvc_boot;
+
+	struct svc_serv *nfsd_serv;
 };
 
+/* Simple check to find out if a given net was properly initialized */
+#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
+
 extern int nfsd_net_id;
 #endif /* __NFSD_NETNS_H__ */

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index b314888..9170861 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c

@@ -253,7 +253,7 @@
 		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 	while (w > 0) {
-		if (!rqstp->rq_respages[rqstp->rq_resused++])
+		if (!*(rqstp->rq_next_page++))
 			return 0;
 		w -= PAGE_SIZE;
 	}

diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index a596e9d..9cbc1a8 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c

@@ -184,7 +184,7 @@
 			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 		while (w > 0) {
-			if (!rqstp->rq_respages[rqstp->rq_resused++])
+			if (!*(rqstp->rq_next_page++))
 				return 0;
 			w -= PAGE_SIZE;
 		}

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 97d90d1..1fc02df 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c

@@ -460,7 +460,7 @@
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
-	int	i;
+	struct page **p;
 	caddr_t	page_addr = NULL;
 
 	dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
@@ -484,8 +484,8 @@
 				     &resp->common,
 				     nfs3svc_encode_entry_plus);
 	memcpy(resp->verf, argp->verf, 8);
-	for (i=1; i<rqstp->rq_resused ; i++) {
-		page_addr = page_address(rqstp->rq_respages[i]);
+	for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+		page_addr = page_address(*p);
 
 		if (((caddr_t)resp->buffer >= page_addr) &&
 		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 43f46cd..324c0ba 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c

@@ -7,8 +7,10 @@
  */
 
 #include <linux/namei.h>
+#include <linux/sunrpc/svc_xprt.h>
 #include "xdr3.h"
 #include "auth.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
@@ -323,7 +325,7 @@
 					struct nfsd3_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -338,8 +340,9 @@
 	/* set up the kvec */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -461,8 +464,7 @@
 	len = ntohl(*p++);
 	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
 		return 0;
-	args->tname = new =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->tname = new = page_address(*(rqstp->rq_next_page++));
 	args->tlen = len;
 	/* first copy and check from the first page */
 	old = (char*)p;
@@ -533,8 +535,7 @@
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -565,8 +566,7 @@
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -575,7 +575,7 @@
 nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
-	int len, pn;
+	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -590,9 +590,9 @@
 	args->count = len;
 
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
+		struct page *p = *(rqstp->rq_next_page++);
 		if (!args->buffer)
-			args->buffer = page_address(rqstp->rq_respages[pn]);
+			args->buffer = page_address(p);
 		len -= PAGE_SIZE;
 	}
 
@@ -720,12 +720,14 @@
 nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_writeres *resp)
 {
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
 		*p++ = htonl(resp->committed);
-		*p++ = htonl(nfssvc_boot.tv_sec);
-		*p++ = htonl(nfssvc_boot.tv_usec);
+		*p++ = htonl(nn->nfssvc_boot.tv_sec);
+		*p++ = htonl(nn->nfssvc_boot.tv_usec);
 	}
 	return xdr_ressize_check(rqstp, p);
 }
@@ -876,7 +878,7 @@
 		       					common);
 	__be32		*p = cd->buffer;
 	caddr_t		curr_page_addr = NULL;
-	int		pn;		/* current page number */
+	struct page **	page;
 	int		slen;		/* string (name) length */
 	int		elen;		/* estimated entry length in words */
 	int		num_entry_words = 0;	/* actual number of words */
@@ -913,8 +915,9 @@
 	}
 
 	/* determine which page in rq_respages[] we are currently filling */
-	for (pn=1; pn < cd->rqstp->rq_resused; pn++) {
-		curr_page_addr = page_address(cd->rqstp->rq_respages[pn]);
+	for (page = cd->rqstp->rq_respages + 1;
+				page < cd->rqstp->rq_next_page; page++) {
+		curr_page_addr = page_address(*page);
 
 		if (((caddr_t)cd->buffer >= curr_page_addr) &&
 		    ((caddr_t)cd->buffer <  curr_page_addr + PAGE_SIZE))
@@ -929,14 +932,14 @@
 		if (plus)
 			p = encode_entryplus_baggage(cd, p, name, namlen);
 		num_entry_words = p - cd->buffer;
-	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
+	} else if (*(page+1) != NULL) {
 		/* temporarily encode entry into next page, then move back to
 		 * current and next page in rq_respages[] */
 		__be32 *p1, *tmp;
 		int len1, len2;
 
 		/* grab next page for temporary storage of entry */
-		p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]);
+		p1 = tmp = page_address(*(page+1));
 
 		p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
 
@@ -1082,11 +1085,13 @@
 nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_commitres *resp)
 {
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	/* Write verifier */
 	if (resp->status == 0) {
-		*p++ = htonl(nfssvc_boot.tv_sec);
-		*p++ = htonl(nfssvc_boot.tv_usec);
+		*p++ = htonl(nn->nfssvc_boot.tv_sec);
+		*p++ = htonl(nn->nfssvc_boot.tv_usec);
 	}
 	return xdr_ressize_check(rqstp, p);
 }

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index bdf29c9..99bc85f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c

@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include "nfsd.h"
 #include "state.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -625,20 +626,46 @@
 	.pipe_dir_name		= "nfsd4_cb",
 };
 
-static int max_cb_time(void)
+static int max_cb_time(struct net *net)
 {
-	return max(nfsd4_lease/10, (time_t)1) * HZ;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
 }
 
+static struct rpc_cred *callback_cred;
+
+int set_callback_cred(void)
+{
+	if (callback_cred)
+		return 0;
+	callback_cred = rpc_lookup_machine_cred("nfs");
+	if (!callback_cred)
+		return -ENOMEM;
+	return 0;
+}
+
+static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
+{
+	if (clp->cl_minorversion == 0) {
+		return get_rpccred(callback_cred);
+	} else {
+		struct rpc_auth *auth = client->cl_auth;
+		struct auth_cred acred = {};
+
+		acred.uid = ses->se_cb_sec.uid;
+		acred.gid = ses->se_cb_sec.gid;
+		return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0);
+	}
+}
 
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
 	struct rpc_timeout	timeparms = {
-		.to_initval	= max_cb_time(),
+		.to_initval	= max_cb_time(clp->net),
 		.to_retries	= 0,
 	};
 	struct rpc_create_args args = {
-		.net		= &init_net,
+		.net		= clp->net,
 		.address	= (struct sockaddr *) &conn->cb_addr,
 		.addrsize	= conn->cb_addrlen,
 		.saddress	= (struct sockaddr *) &conn->cb_saddr,
@@ -648,6 +675,7 @@
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
 	};
 	struct rpc_clnt *client;
+	struct rpc_cred *cred;
 
 	if (clp->cl_minorversion == 0) {
 		if (!clp->cl_cred.cr_principal &&
@@ -666,7 +694,7 @@
 		args.bc_xprt = conn->cb_xprt;
 		args.prognumber = clp->cl_cb_session->se_cb_prog;
 		args.protocol = XPRT_TRANSPORT_BC_TCP;
-		args.authflavor = RPC_AUTH_UNIX;
+		args.authflavor = ses->se_cb_sec.flavor;
 	}
 	/* Create RPC client */
 	client = rpc_create(&args);
@@ -675,9 +703,14 @@
 			PTR_ERR(client));
 		return PTR_ERR(client);
 	}
+	cred = get_backchannel_cred(clp, client, ses);
+	if (IS_ERR(cred)) {
+		rpc_shutdown_client(client);
+		return PTR_ERR(cred);
+	}
 	clp->cl_cb_client = client;
+	clp->cl_cb_cred = cred;
 	return 0;
-
 }
 
 static void warn_no_callback_path(struct nfs4_client *clp, int reason)
@@ -714,18 +747,6 @@
 	.rpc_call_done = nfsd4_cb_probe_done,
 };
 
-static struct rpc_cred *callback_cred;
-
-int set_callback_cred(void)
-{
-	if (callback_cred)
-		return 0;
-	callback_cred = rpc_lookup_machine_cred("nfs");
-	if (!callback_cred)
-		return -ENOMEM;
-	return 0;
-}
-
 static struct workqueue_struct *callback_wq;
 
 static void run_nfsd4_cb(struct nfsd4_callback *cb)
@@ -743,7 +764,6 @@
 	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
 	cb->cb_msg.rpc_argp = NULL;
 	cb->cb_msg.rpc_resp = NULL;
-	cb->cb_msg.rpc_cred = callback_cred;
 
 	cb->cb_ops = &nfsd4_cb_probe_ops;
 
@@ -962,6 +982,8 @@
 	if (clp->cl_cb_client) {
 		rpc_shutdown_client(clp->cl_cb_client);
 		clp->cl_cb_client = NULL;
+		put_rpccred(clp->cl_cb_cred);
+		clp->cl_cb_cred = NULL;
 	}
 	if (clp->cl_cb_conn.cb_xprt) {
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
@@ -995,7 +1017,7 @@
 		run_nfsd4_cb(cb);
 }
 
-void nfsd4_do_callback_rpc(struct work_struct *w)
+static void nfsd4_do_callback_rpc(struct work_struct *w)
 {
 	struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
 	struct nfs4_client *clp = cb->cb_clp;
@@ -1010,10 +1032,16 @@
 		nfsd4_release_cb(cb);
 		return;
 	}
+	cb->cb_msg.rpc_cred = clp->cl_cb_cred;
 	rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
 			cb->cb_ops, cb);
 }
 
+void nfsd4_init_callback(struct nfsd4_callback *cb)
+{
+	INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc);
+}
+
 void nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfsd4_callback *cb = &dp->dl_recall;
@@ -1025,7 +1053,6 @@
 	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
 	cb->cb_msg.rpc_argp = cb;
 	cb->cb_msg.rpc_resp = cb;
-	cb->cb_msg.rpc_cred = callback_cred;
 
 	cb->cb_ops = &nfsd4_cb_recall_ops;
 

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6c9a4b2..9d1c5db 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c

@@ -40,6 +40,7 @@
 #include "xdr4.h"
 #include "vfs.h"
 #include "current_stateid.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
@@ -194,6 +195,7 @@
 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
 	struct svc_fh *resfh;
+	int accmode;
 	__be32 status;
 
 	resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
@@ -253,9 +255,10 @@
 	/* set reply cache */
 	fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
 			&resfh->fh_handle);
-	if (!open->op_created)
-		status = do_open_permission(rqstp, resfh, open,
-					    NFSD_MAY_NOP);
+	accmode = NFSD_MAY_NOP;
+	if (open->op_created)
+		accmode |= NFSD_MAY_OWNER_OVERRIDE;
+	status = do_open_permission(rqstp, resfh, open, accmode);
 	set_change_info(&open->op_cinfo, current_fh);
 	fh_dup2(current_fh, resfh);
 out:
@@ -304,6 +307,8 @@
 {
 	__be32 status;
 	struct nfsd4_compoundres *resp;
+	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
 		(int)open->op_fname.len, open->op_fname.data,
@@ -331,7 +336,7 @@
 
 	/* check seqid for replay. set nfs4_owner */
 	resp = rqstp->rq_resp;
-	status = nfsd4_process_open1(&resp->cstate, open);
+	status = nfsd4_process_open1(&resp->cstate, open, nn);
 	if (status == nfserr_replay_me) {
 		struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay;
 		fh_put(&cstate->current_fh);
@@ -354,10 +359,10 @@
 	/* Openowner is now set, so sequence id will get bumped.  Now we need
 	 * these checks before we do any creates: */
 	status = nfserr_grace;
-	if (locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+	if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
 		goto out;
 	status = nfserr_no_grace;
-	if (!locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+	if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
 		goto out;
 
 	switch (open->op_claim_type) {
@@ -370,7 +375,9 @@
 			break;
 		case NFS4_OPEN_CLAIM_PREVIOUS:
 			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
-			status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion);
+			status = nfs4_check_open_reclaim(&open->op_clientid,
+							 cstate->minorversion,
+							 nn);
 			if (status)
 				goto out;
 		case NFS4_OPEN_CLAIM_FH:
@@ -490,12 +497,13 @@
 			   &access->ac_supported);
 }
 
-static void gen_boot_verifier(nfs4_verifier *verifier)
+static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 {
 	__be32 verf[2];
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	verf[0] = (__be32)nfssvc_boot.tv_sec;
-	verf[1] = (__be32)nfssvc_boot.tv_usec;
+	verf[0] = (__be32)nn->nfssvc_boot.tv_sec;
+	verf[1] = (__be32)nn->nfssvc_boot.tv_usec;
 	memcpy(verifier->data, verf, sizeof(verifier->data));
 }
 
@@ -503,7 +511,7 @@
 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_commit *commit)
 {
-	gen_boot_verifier(&commit->co_verf);
+	gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
 	return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
 			     commit->co_count);
 }
@@ -684,6 +692,17 @@
 	if (read->rd_offset >= OFFSET_MAX)
 		return nfserr_inval;
 
+	/*
+	 * If we do a zero copy read, then a client will see read data
+	 * that reflects the state of the file *after* performing the
+	 * following compound.
+	 *
+	 * To ensure proper ordering, we therefore turn off zero copy if
+	 * the client wants us to do more in this compound:
+	 */
+	if (!nfsd4_last_compound_op(rqstp))
+		rqstp->rq_splice_ok = false;
+
 	nfs4_lock_state();
 	/* check stateid */
 	if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
@@ -876,6 +895,24 @@
 	return status;
 }
 
+static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
+{
+        int i = 1;
+        int buflen = write->wr_buflen;
+
+        vec[0].iov_base = write->wr_head.iov_base;
+        vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
+        buflen -= vec[0].iov_len;
+
+        while (buflen) {
+                vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
+                vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
+                buflen -= vec[i].iov_len;
+                i++;
+        }
+        return i;
+}
+
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    struct nfsd4_write *write)
@@ -884,6 +921,7 @@
 	struct file *filp = NULL;
 	__be32 status = nfs_ok;
 	unsigned long cnt;
+	int nvecs;
 
 	/* no need to check permission - this will be done in nfsd_write() */
 
@@ -904,10 +942,13 @@
 
 	cnt = write->wr_buflen;
 	write->wr_how_written = write->wr_stable_how;
-	gen_boot_verifier(&write->wr_verifier);
+	gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
+
+	nvecs = fill_in_write_vector(rqstp->rq_vec, write);
+	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
-			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+			     write->wr_offset, rqstp->rq_vec, nvecs,
 			     &cnt, &write->wr_how_written);
 	if (filp)
 		fput(filp);
@@ -1666,6 +1707,12 @@
 		.op_name = "OP_EXCHANGE_ID",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
 	},
+	[OP_BACKCHANNEL_CTL] = {
+		.op_func = (nfsd4op_func)nfsd4_backchannel_ctl,
+		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
+		.op_name = "OP_BACKCHANNEL_CTL",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+	},
 	[OP_BIND_CONN_TO_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
@@ -1719,6 +1766,7 @@
 		.op_func = (nfsd4op_func)nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
+		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 };

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 43295d4..ba6fdd4 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c

@@ -58,13 +58,11 @@
 	void (*create)(struct nfs4_client *);
 	void (*remove)(struct nfs4_client *);
 	int (*check)(struct nfs4_client *);
-	void (*grace_done)(struct net *, time_t);
+	void (*grace_done)(struct nfsd_net *, time_t);
 };
 
 /* Globals */
-static struct file *rec_file;
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
-static struct nfsd4_client_tracking_ops *client_tracking_ops;
 
 static int
 nfs4_save_creds(const struct cred **original_creds)
@@ -102,33 +100,39 @@
 	*out = '\0';
 }
 
-__be32
-nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
+static int
+nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
 	struct hash_desc desc;
 	struct scatterlist sg;
-	__be32 status = nfserr_jukebox;
+	int status;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
 	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 	desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(desc.tfm))
+	if (IS_ERR(desc.tfm)) {
+		status = PTR_ERR(desc.tfm);
 		goto out_no_tfm;
+	}
+
 	cksum.len = crypto_hash_digestsize(desc.tfm);
 	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
-	if (cksum.data == NULL)
+	if (cksum.data == NULL) {
+		status = -ENOMEM;
  		goto out;
+	}
 
 	sg_init_one(&sg, clname->data, clname->len);
 
-	if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data))
+	status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data);
+	if (status)
 		goto out;
 
 	md5_to_hex(dname, cksum.data);
 
-	status = nfs_ok;
+	status = 0;
 out:
 	kfree(cksum.data);
 	crypto_free_hash(desc.tfm);
@@ -136,29 +140,61 @@
 	return status;
 }
 
+/*
+ * If we had an error generating the recdir name for the legacy tracker
+ * then warn the admin. If the error doesn't appear to be transient,
+ * then disable recovery tracking.
+ */
+static void
+legacy_recdir_name_error(int error)
+{
+	printk(KERN_ERR "NFSD: unable to generate recoverydir "
+			"name (%d).\n", error);
+
+	/*
+	 * if the algorithm just doesn't exist, then disable the recovery
+	 * tracker altogether. The crypto libs will generally return this if
+	 * FIPS is enabled as well.
+	 */
+	if (error == -ENOENT) {
+		printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
+			"Reboot recovery will not function correctly!\n");
+
+		/* the argument is ignored by the legacy exit function */
+		nfsd4_client_tracking_exit(NULL);
+	}
+}
+
 static void
 nfsd4_create_clid_dir(struct nfs4_client *clp)
 {
 	const struct cred *original_cred;
-	char *dname = clp->cl_recdir;
+	char dname[HEXDIR_LEN];
 	struct dentry *dir, *dentry;
+	struct nfs4_client_reclaim *crp;
 	int status;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
 
 	if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
-	if (!rec_file)
+	if (!nn->rec_file)
 		return;
+
+	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+	if (status)
+		return legacy_recdir_name_error(status);
+
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
 		return;
 
-	status = mnt_want_write_file(rec_file);
+	status = mnt_want_write_file(nn->rec_file);
 	if (status)
 		return;
 
-	dir = rec_file->f_path.dentry;
+	dir = nn->rec_file->f_path.dentry;
 	/* lock the parent */
 	mutex_lock(&dir->d_inode->i_mutex);
 
@@ -182,18 +218,24 @@
 	dput(dentry);
 out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
-	if (status == 0)
-		vfs_fsync(rec_file, 0);
-	else
+	if (status == 0) {
+		if (nn->in_grace) {
+			crp = nfs4_client_to_reclaim(dname, nn);
+			if (crp)
+				crp->cr_clp = clp;
+		}
+		vfs_fsync(nn->rec_file, 0);
+	} else {
 		printk(KERN_ERR "NFSD: failed to write recovery record"
 				" (err %d); please check that %s exists"
 				" and is writeable", status,
 				user_recovery_dirname);
-	mnt_drop_write_file(rec_file);
+	}
+	mnt_drop_write_file(nn->rec_file);
 	nfs4_reset_creds(original_cred);
 }
 
-typedef int (recdir_func)(struct dentry *, struct dentry *);
+typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *);
 
 struct name_list {
 	char name[HEXDIR_LEN];
@@ -219,10 +261,10 @@
 }
 
 static int
-nfsd4_list_rec_dir(recdir_func *f)
+nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 {
 	const struct cred *original_cred;
-	struct dentry *dir = rec_file->f_path.dentry;
+	struct dentry *dir = nn->rec_file->f_path.dentry;
 	LIST_HEAD(names);
 	int status;
 
@@ -230,13 +272,13 @@
 	if (status < 0)
 		return status;
 
-	status = vfs_llseek(rec_file, 0, SEEK_SET);
+	status = vfs_llseek(nn->rec_file, 0, SEEK_SET);
 	if (status < 0) {
 		nfs4_reset_creds(original_cred);
 		return status;
 	}
 
-	status = vfs_readdir(rec_file, nfsd4_build_namelist, &names);
+	status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names);
 	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	while (!list_empty(&names)) {
 		struct name_list *entry;
@@ -248,7 +290,7 @@
 				status = PTR_ERR(dentry);
 				break;
 			}
-			status = f(dir, dentry);
+			status = f(dir, dentry, nn);
 			dput(dentry);
 		}
 		list_del(&entry->list);
@@ -260,14 +302,14 @@
 }
 
 static int
-nfsd4_unlink_clid_dir(char *name, int namlen)
+nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
 {
 	struct dentry *dir, *dentry;
 	int status;
 
 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
-	dir = rec_file->f_path.dentry;
+	dir = nn->rec_file->f_path.dentry;
 	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_one_len(name, dir, namlen);
 	if (IS_ERR(dentry)) {
@@ -289,37 +331,52 @@
 nfsd4_remove_clid_dir(struct nfs4_client *clp)
 {
 	const struct cred *original_cred;
+	struct nfs4_client_reclaim *crp;
+	char dname[HEXDIR_LEN];
 	int status;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
-	if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+	if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
 
-	status = mnt_want_write_file(rec_file);
+	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+	if (status)
+		return legacy_recdir_name_error(status);
+
+	status = mnt_want_write_file(nn->rec_file);
 	if (status)
 		goto out;
 	clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
-		goto out;
+		goto out_drop_write;
 
-	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
+	status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
 	nfs4_reset_creds(original_cred);
-	if (status == 0)
-		vfs_fsync(rec_file, 0);
-	mnt_drop_write_file(rec_file);
+	if (status == 0) {
+		vfs_fsync(nn->rec_file, 0);
+		if (nn->in_grace) {
+			/* remove reclaim record */
+			crp = nfsd4_find_reclaim_client(dname, nn);
+			if (crp)
+				nfs4_remove_reclaim_record(crp, nn);
+		}
+	}
+out_drop_write:
+	mnt_drop_write_file(nn->rec_file);
 out:
 	if (status)
 		printk("NFSD: Failed to remove expired client state directory"
-				" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
+				" %.*s\n", HEXDIR_LEN, dname);
 }
 
 static int
-purge_old(struct dentry *parent, struct dentry *child)
+purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
 	int status;
 
-	if (nfs4_has_reclaimed_state(child->d_name.name, false))
+	if (nfs4_has_reclaimed_state(child->d_name.name, nn))
 		return 0;
 
 	status = vfs_rmdir(parent->d_inode, child);
@@ -331,27 +388,29 @@
 }
 
 static void
-nfsd4_recdir_purge_old(struct net *net, time_t boot_time)
+nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time)
 {
 	int status;
 
-	if (!rec_file)
+	nn->in_grace = false;
+	if (!nn->rec_file)
 		return;
-	status = mnt_want_write_file(rec_file);
+	status = mnt_want_write_file(nn->rec_file);
 	if (status)
 		goto out;
-	status = nfsd4_list_rec_dir(purge_old);
+	status = nfsd4_list_rec_dir(purge_old, nn);
 	if (status == 0)
-		vfs_fsync(rec_file, 0);
-	mnt_drop_write_file(rec_file);
+		vfs_fsync(nn->rec_file, 0);
+	mnt_drop_write_file(nn->rec_file);
 out:
+	nfs4_release_reclaim(nn);
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
-			" directory %s\n", rec_file->f_path.dentry->d_name.name);
+			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
 }
 
 static int
-load_recdir(struct dentry *parent, struct dentry *child)
+load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
 	if (child->d_name.len != HEXDIR_LEN - 1) {
 		printk("nfsd4: illegal name %s in recovery directory\n",
@@ -359,21 +418,22 @@
 		/* Keep trying; maybe the others are OK: */
 		return 0;
 	}
-	nfs4_client_to_reclaim(child->d_name.name);
+	nfs4_client_to_reclaim(child->d_name.name, nn);
 	return 0;
 }
 
 static int
-nfsd4_recdir_load(void) {
+nfsd4_recdir_load(struct net *net) {
 	int status;
+	struct nfsd_net *nn =  net_generic(net, nfsd_net_id);
 
-	if (!rec_file)
+	if (!nn->rec_file)
 		return 0;
 
-	status = nfsd4_list_rec_dir(load_recdir);
+	status = nfsd4_list_rec_dir(load_recdir, nn);
 	if (status)
 		printk("nfsd4: failed loading clients from recovery"
-			" directory %s\n", rec_file->f_path.dentry->d_name.name);
+			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
 	return status;
 }
 
@@ -382,15 +442,16 @@
  */
 
 static int
-nfsd4_init_recdir(void)
+nfsd4_init_recdir(struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	const struct cred *original_cred;
 	int status;
 
 	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
 			user_recovery_dirname);
 
-	BUG_ON(rec_file);
+	BUG_ON(nn->rec_file);
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0) {
@@ -400,23 +461,65 @@
 		return status;
 	}
 
-	rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
-	if (IS_ERR(rec_file)) {
+	nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
+	if (IS_ERR(nn->rec_file)) {
 		printk("NFSD: unable to find recovery directory %s\n",
 				user_recovery_dirname);
-		status = PTR_ERR(rec_file);
-		rec_file = NULL;
+		status = PTR_ERR(nn->rec_file);
+		nn->rec_file = NULL;
 	}
 
 	nfs4_reset_creds(original_cred);
+	if (!status)
+		nn->in_grace = true;
 	return status;
 }
 
+
+static int
+nfs4_legacy_state_init(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int i;
+
+	nn->reclaim_str_hashtbl = kmalloc(sizeof(struct list_head) *
+					  CLIENT_HASH_SIZE, GFP_KERNEL);
+	if (!nn->reclaim_str_hashtbl)
+		return -ENOMEM;
+
+	for (i = 0; i < CLIENT_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
+	nn->reclaim_str_hashtbl_size = 0;
+
+	return 0;
+}
+
+static void
+nfs4_legacy_state_shutdown(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	kfree(nn->reclaim_str_hashtbl);
+}
+
 static int
 nfsd4_load_reboot_recovery_data(struct net *net)
 {
 	int status;
 
+	status = nfsd4_init_recdir(net);
+	if (!status)
+		status = nfsd4_recdir_load(net);
+	if (status)
+		printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
+	return status;
+}
+
+static int
+nfsd4_legacy_tracking_init(struct net *net)
+{
+	int status;
+
 	/* XXX: The legacy code won't work in a container */
 	if (net != &init_net) {
 		WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client "
@@ -424,30 +527,37 @@
 		return -EINVAL;
 	}
 
-	nfs4_lock_state();
-	status = nfsd4_init_recdir();
-	if (!status)
-		status = nfsd4_recdir_load();
-	nfs4_unlock_state();
+	status = nfs4_legacy_state_init(net);
 	if (status)
-		printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
+		return status;
+
+	status = nfsd4_load_reboot_recovery_data(net);
+	if (status)
+		goto err;
+	return 0;
+
+err:
+	nfs4_legacy_state_shutdown(net);
 	return status;
 }
 
 static void
-nfsd4_shutdown_recdir(void)
+nfsd4_shutdown_recdir(struct nfsd_net *nn)
 {
-	if (!rec_file)
+	if (!nn->rec_file)
 		return;
-	fput(rec_file);
-	rec_file = NULL;
+	fput(nn->rec_file);
+	nn->rec_file = NULL;
 }
 
 static void
 nfsd4_legacy_tracking_exit(struct net *net)
 {
-	nfs4_release_reclaim();
-	nfsd4_shutdown_recdir();
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	nfs4_release_reclaim(nn);
+	nfsd4_shutdown_recdir(nn);
+	nfs4_legacy_state_shutdown(net);
 }
 
 /*
@@ -480,13 +590,26 @@
 static int
 nfsd4_check_legacy_client(struct nfs4_client *clp)
 {
+	int status;
+	char dname[HEXDIR_LEN];
+	struct nfs4_client_reclaim *crp;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
 	/* did we already find that this client is stable? */
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return 0;
 
+	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+	if (status) {
+		legacy_recdir_name_error(status);
+		return status;
+	}
+
 	/* look for it in the reclaim hashtable otherwise */
-	if (nfsd4_find_reclaim_client(clp)) {
+	crp = nfsd4_find_reclaim_client(dname, nn);
+	if (crp) {
 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+		crp->cr_clp = clp;
 		return 0;
 	}
 
@@ -494,7 +617,7 @@
 }
 
 static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
-	.init		= nfsd4_load_reboot_recovery_data,
+	.init		= nfsd4_legacy_tracking_init,
 	.exit		= nfsd4_legacy_tracking_exit,
 	.create		= nfsd4_create_clid_dir,
 	.remove		= nfsd4_remove_clid_dir,
@@ -785,8 +908,7 @@
 {
 	int ret;
 	struct cld_upcall *cup;
-	/* FIXME: determine net from clp */
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	/* Don't upcall if it's already stored */
@@ -823,8 +945,7 @@
 {
 	int ret;
 	struct cld_upcall *cup;
-	/* FIXME: determine net from clp */
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	/* Don't upcall if it's already removed */
@@ -861,8 +982,7 @@
 {
 	int ret;
 	struct cld_upcall *cup;
-	/* FIXME: determine net from clp */
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	/* Don't upcall if one was already stored during this grace pd */
@@ -892,11 +1012,10 @@
 }
 
 static void
-nfsd4_cld_grace_done(struct net *net, time_t boot_time)
+nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time)
 {
 	int ret;
 	struct cld_upcall *cup;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
 	cup = alloc_cld_upcall(cn);
@@ -926,28 +1045,261 @@
 	.grace_done	= nfsd4_cld_grace_done,
 };
 
+/* upcall via usermodehelper */
+static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack";
+module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
+			S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program");
+
+static bool cltrack_legacy_disable;
+module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cltrack_legacy_disable,
+		"Disable legacy recoverydir conversion. Default: false");
+
+#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
+#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
+
+static char *
+nfsd4_cltrack_legacy_topdir(void)
+{
+	int copied;
+	size_t len;
+	char *result;
+
+	if (cltrack_legacy_disable)
+		return NULL;
+
+	len = strlen(LEGACY_TOPDIR_ENV_PREFIX) +
+		strlen(nfs4_recoverydir()) + 1;
+
+	result = kmalloc(len, GFP_KERNEL);
+	if (!result)
+		return result;
+
+	copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s",
+				nfs4_recoverydir());
+	if (copied >= len) {
+		/* just return nothing if output was truncated */
+		kfree(result);
+		return NULL;
+	}
+
+	return result;
+}
+
+static char *
+nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
+{
+	int copied;
+	size_t len;
+	char *result;
+
+	if (cltrack_legacy_disable)
+		return NULL;
+
+	/* +1 is for '/' between "topdir" and "recdir" */
+	len = strlen(LEGACY_RECDIR_ENV_PREFIX) +
+		strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN;
+
+	result = kmalloc(len, GFP_KERNEL);
+	if (!result)
+		return result;
+
+	copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/",
+				nfs4_recoverydir());
+	if (copied > (len - HEXDIR_LEN)) {
+		/* just return nothing if output will be truncated */
+		kfree(result);
+		return NULL;
+	}
+
+	copied = nfs4_make_rec_clidname(result + copied, name);
+	if (copied) {
+		kfree(result);
+		return NULL;
+	}
+
+	return result;
+}
+
+static int
+nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy)
+{
+	char *envp[2];
+	char *argv[4];
+	int ret;
+
+	if (unlikely(!cltrack_prog[0])) {
+		dprintk("%s: cltrack_prog is disabled\n", __func__);
+		return -EACCES;
+	}
+
+	dprintk("%s: cmd: %s\n", __func__, cmd);
+	dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
+	dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)");
+
+	envp[0] = legacy;
+	envp[1] = NULL;
+
+	argv[0] = (char *)cltrack_prog;
+	argv[1] = cmd;
+	argv[2] = arg;
+	argv[3] = NULL;
+
+	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+	/*
+	 * Disable the upcall mechanism if we're getting an ENOENT or EACCES
+	 * error. The admin can re-enable it on the fly by using sysfs
+	 * once the problem has been fixed.
+	 */
+	if (ret == -ENOENT || ret == -EACCES) {
+		dprintk("NFSD: %s was not found or isn't executable (%d). "
+			"Setting cltrack_prog to blank string!",
+			cltrack_prog, ret);
+		cltrack_prog[0] = '\0';
+	}
+	dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret);
+
+	return ret;
+}
+
+static char *
+bin_to_hex_dup(const unsigned char *src, int srclen)
+{
+	int i;
+	char *buf, *hex;
+
+	/* +1 for terminating NULL */
+	buf = kmalloc((srclen * 2) + 1, GFP_KERNEL);
+	if (!buf)
+		return buf;
+
+	hex = buf;
+	for (i = 0; i < srclen; i++) {
+		sprintf(hex, "%2.2x", *src++);
+		hex += 2;
+	}
+	return buf;
+}
+
+static int
+nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net)
+{
+	return nfsd4_umh_cltrack_upcall("init", NULL, NULL);
+}
+
+static void
+nfsd4_umh_cltrack_create(struct nfs4_client *clp)
+{
+	char *hexid;
+
+	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+	if (!hexid) {
+		dprintk("%s: can't allocate memory for upcall!\n", __func__);
+		return;
+	}
+	nfsd4_umh_cltrack_upcall("create", hexid, NULL);
+	kfree(hexid);
+}
+
+static void
+nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
+{
+	char *hexid;
+
+	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+	if (!hexid) {
+		dprintk("%s: can't allocate memory for upcall!\n", __func__);
+		return;
+	}
+	nfsd4_umh_cltrack_upcall("remove", hexid, NULL);
+	kfree(hexid);
+}
+
+static int
+nfsd4_umh_cltrack_check(struct nfs4_client *clp)
+{
+	int ret;
+	char *hexid, *legacy;
+
+	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+	if (!hexid) {
+		dprintk("%s: can't allocate memory for upcall!\n", __func__);
+		return -ENOMEM;
+	}
+	legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
+	ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy);
+	kfree(legacy);
+	kfree(hexid);
+	return ret;
+}
+
+static void
+nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn,
+				time_t boot_time)
+{
+	char *legacy;
+	char timestr[22]; /* FIXME: better way to determine max size? */
+
+	sprintf(timestr, "%ld", boot_time);
+	legacy = nfsd4_cltrack_legacy_topdir();
+	nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy);
+	kfree(legacy);
+}
+
+static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
+	.init		= nfsd4_umh_cltrack_init,
+	.exit		= NULL,
+	.create		= nfsd4_umh_cltrack_create,
+	.remove		= nfsd4_umh_cltrack_remove,
+	.check		= nfsd4_umh_cltrack_check,
+	.grace_done	= nfsd4_umh_cltrack_grace_done,
+};
+
 int
 nfsd4_client_tracking_init(struct net *net)
 {
 	int status;
 	struct path path;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	if (!client_tracking_ops) {
-		client_tracking_ops = &nfsd4_cld_tracking_ops;
-		status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
-		if (!status) {
-			if (S_ISDIR(path.dentry->d_inode->i_mode))
-				client_tracking_ops =
-						&nfsd4_legacy_tracking_ops;
-			path_put(&path);
-		}
+	/* just run the init if it the method is already decided */
+	if (nn->client_tracking_ops)
+		goto do_init;
+
+	/*
+	 * First, try a UMH upcall. It should succeed or fail quickly, so
+	 * there's little harm in trying that first.
+	 */
+	nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
+	status = nn->client_tracking_ops->init(net);
+	if (!status)
+		return status;
+
+	/*
+	 * See if the recoverydir exists and is a directory. If it is,
+	 * then use the legacy ops.
+	 */
+	nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
+	status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
+	if (!status) {
+		status = S_ISDIR(path.dentry->d_inode->i_mode);
+		path_put(&path);
+		if (status)
+			goto do_init;
 	}
 
-	status = client_tracking_ops->init(net);
+	/* Finally, try to use nfsdcld */
+	nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+	printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be "
+			"removed in 3.10. Please transition to using "
+			"nfsdcltrack.\n");
+do_init:
+	status = nn->client_tracking_ops->init(net);
 	if (status) {
 		printk(KERN_WARNING "NFSD: Unable to initialize client "
 				    "recovery tracking! (%d)\n", status);
-		client_tracking_ops = NULL;
+		nn->client_tracking_ops = NULL;
 	}
 	return status;
 }
@@ -955,40 +1307,49 @@
 void
 nfsd4_client_tracking_exit(struct net *net)
 {
-	if (client_tracking_ops) {
-		client_tracking_ops->exit(net);
-		client_tracking_ops = NULL;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (nn->client_tracking_ops) {
+		if (nn->client_tracking_ops->exit)
+			nn->client_tracking_ops->exit(net);
+		nn->client_tracking_ops = NULL;
 	}
 }
 
 void
 nfsd4_client_record_create(struct nfs4_client *clp)
 {
-	if (client_tracking_ops)
-		client_tracking_ops->create(clp);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	if (nn->client_tracking_ops)
+		nn->client_tracking_ops->create(clp);
 }
 
 void
 nfsd4_client_record_remove(struct nfs4_client *clp)
 {
-	if (client_tracking_ops)
-		client_tracking_ops->remove(clp);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	if (nn->client_tracking_ops)
+		nn->client_tracking_ops->remove(clp);
 }
 
 int
 nfsd4_client_record_check(struct nfs4_client *clp)
 {
-	if (client_tracking_ops)
-		return client_tracking_ops->check(clp);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	if (nn->client_tracking_ops)
+		return nn->client_tracking_ops->check(clp);
 
 	return -EOPNOTSUPP;
 }
 
 void
-nfsd4_record_grace_done(struct net *net, time_t boot_time)
+nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time)
 {
-	if (client_tracking_ops)
-		client_tracking_ops->grace_done(net, boot_time);
+	if (nn->client_tracking_ops)
+		nn->client_tracking_ops->grace_done(nn, boot_time);
 }
 
 static int

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d0237f8..ac8ed96 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c

@@ -44,16 +44,11 @@
 #include "xdr4.h"
 #include "vfs.h"
 #include "current_stateid.h"
-#include "fault_inject.h"
 
 #include "netns.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
-/* Globals */
-time_t nfsd4_lease = 90;     /* default lease time */
-time_t nfsd4_grace = 90;
-
 #define all_ones {{~0,~0},~0}
 static const stateid_t one_stateid = {
 	.si_generation = ~0,
@@ -176,8 +171,6 @@
 	return ret & OWNER_HASH_MASK;
 }
 
-static struct list_head	ownerstr_hashtbl[OWNER_HASH_SIZE];
-
 /* hash table for nfs4_file */
 #define FILE_HASH_BITS                   8
 #define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
@@ -192,7 +185,7 @@
 
 static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
 {
-	BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
+	WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
 	atomic_inc(&fp->fi_access[oflag]);
 }
 
@@ -251,7 +244,7 @@
 	 * preallocations that can exist at a time, but the state lock
 	 * prevents anyone from using ours before we get here:
 	 */
-	BUG_ON(error);
+	WARN_ON_ONCE(error);
 	/*
 	 * It shouldn't be a problem to reuse an opaque stateid value.
 	 * I don't think it is for 4.1.  But with 4.0 I worry that, for
@@ -340,7 +333,7 @@
 	fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
-	INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
+	nfsd4_init_callback(&dp->dl_recall);
 	return dp;
 }
 
@@ -390,14 +383,6 @@
  * SETCLIENTID state 
  */
 
-/* client_lock protects the client lru list and session hash table */
-static DEFINE_SPINLOCK(client_lock);
-
-/* Hash tables for nfs4_clientid state */
-#define CLIENT_HASH_BITS                 4
-#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
-#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
-
 static unsigned int clientid_hashval(u32 id)
 {
 	return id & CLIENT_HASH_MASK;
@@ -409,31 +394,6 @@
 }
 
 /*
- * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
- * used in reboot/reset lease grace period processing
- *
- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
- * setclientid_confirmed info. 
- *
- * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
- * setclientid info.
- *
- * client_lru holds client queue ordered by nfs4_client.cl_time
- * for lease renewal.
- *
- * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
- * for last close replay.
- */
-static struct list_head	reclaim_str_hashtbl[CLIENT_HASH_SIZE];
-static int reclaim_str_hashtbl_size = 0;
-static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
-static struct list_head	conf_str_hashtbl[CLIENT_HASH_SIZE];
-static struct list_head	unconf_str_hashtbl[CLIENT_HASH_SIZE];
-static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
-static struct list_head client_lru;
-static struct list_head close_lru;
-
-/*
  * We store the NONE, READ, WRITE, and BOTH bits separately in the
  * st_{access,deny}_bmap field of the stateid, in order to track not
  * only what share bits are currently in force, but also what
@@ -526,7 +486,8 @@
 	case NFS4_SHARE_ACCESS_BOTH:
 		return O_RDWR;
 	}
-	BUG();
+	WARN_ON_ONCE(1);
+	return O_RDONLY;
 }
 
 /* release all access and file references for a given stateid */
@@ -652,9 +613,6 @@
 	nfs4_free_openowner(oo);
 }
 
-#define SESSION_HASH_SIZE	512
-static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
-
 static inline int
 hash_sessionid(struct nfs4_sessionid *sessionid)
 {
@@ -785,9 +743,12 @@
 	return NULL;
 }
 
-static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
+static void init_forechannel_attrs(struct nfsd4_channel_attrs *new,
+				   struct nfsd4_channel_attrs *req,
+				   int numslots, int slotsize,
+				   struct nfsd_net *nn)
 {
-	u32 maxrpc = nfsd_serv->sv_max_mesg;
+	u32 maxrpc = nn->nfsd_serv->sv_max_mesg;
 
 	new->maxreqs = numslots;
 	new->maxresp_cached = min_t(u32, req->maxresp_cached,
@@ -906,21 +867,27 @@
 static void free_session(struct kref *kref)
 {
 	struct nfsd4_session *ses;
+	struct nfsd_net *nn;
 
-	lockdep_assert_held(&client_lock);
 	ses = container_of(kref, struct nfsd4_session, se_ref);
+	nn = net_generic(ses->se_client->net, nfsd_net_id);
+
+	lockdep_assert_held(&nn->client_lock);
 	nfsd4_del_conns(ses);
 	__free_session(ses);
 }
 
 void nfsd4_put_session(struct nfsd4_session *ses)
 {
-	spin_lock(&client_lock);
+	struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
 	nfsd4_put_session_locked(ses);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
-static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan)
+static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan,
+					   struct nfsd_net *nn)
 {
 	struct nfsd4_session *new;
 	int numslots, slotsize;
@@ -941,13 +908,14 @@
 		nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
 		return NULL;
 	}
-	init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
+	init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn);
 	return new;
 }
 
-static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
+static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	int idx;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	new->se_client = clp;
 	gen_sessionid(new);
@@ -957,14 +925,15 @@
 	new->se_cb_seq_nr = 1;
 	new->se_flags = cses->flags;
 	new->se_cb_prog = cses->callback_prog;
+	new->se_cb_sec = cses->cb_sec;
 	kref_init(&new->se_ref);
 	idx = hash_sessionid(&new->se_sessionid);
-	spin_lock(&client_lock);
-	list_add(&new->se_hash, &sessionid_hashtbl[idx]);
+	spin_lock(&nn->client_lock);
+	list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
 	spin_lock(&clp->cl_lock);
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&clp->cl_lock);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 
 	if (cses->flags & SESSION4_BACK_CHAN) {
 		struct sockaddr *sa = svc_addr(rqstp);
@@ -978,20 +947,20 @@
 		rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
 		clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
 	}
-	return new;
 }
 
 /* caller must hold client_lock */
 static struct nfsd4_session *
-find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
+find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
 {
 	struct nfsd4_session *elem;
 	int idx;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dump_sessionid(__func__, sessionid);
 	idx = hash_sessionid(sessionid);
 	/* Search in the appropriate list */
-	list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
+	list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) {
 		if (!memcmp(elem->se_sessionid.data, sessionid->data,
 			    NFS4_MAX_SESSIONID_LEN)) {
 			return elem;
@@ -1016,6 +985,8 @@
 static inline void
 renew_client_locked(struct nfs4_client *clp)
 {
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
 	if (is_client_expired(clp)) {
 		WARN_ON(1);
 		printk("%s: client (clientid %08x/%08x) already expired\n",
@@ -1028,16 +999,18 @@
 	dprintk("renewing client (clientid %08x/%08x)\n", 
 			clp->cl_clientid.cl_boot, 
 			clp->cl_clientid.cl_id);
-	list_move_tail(&clp->cl_lru, &client_lru);
+	list_move_tail(&clp->cl_lru, &nn->client_lru);
 	clp->cl_time = get_seconds();
 }
 
 static inline void
 renew_client(struct nfs4_client *clp)
 {
-	spin_lock(&client_lock);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
 	renew_client_locked(clp);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -1075,7 +1048,9 @@
 static inline void
 free_client(struct nfs4_client *clp)
 {
-	lockdep_assert_held(&client_lock);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	lockdep_assert_held(&nn->client_lock);
 	while (!list_empty(&clp->cl_sessions)) {
 		struct nfsd4_session *ses;
 		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
@@ -1092,15 +1067,16 @@
 release_session_client(struct nfsd4_session *session)
 {
 	struct nfs4_client *clp = session->se_client;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
-	if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
+	if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
 		return;
 	if (is_client_expired(clp)) {
 		free_client(clp);
 		session->se_client = NULL;
 	} else
 		renew_client_locked(clp);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
 /* must be called under the client_lock */
@@ -1123,6 +1099,7 @@
 	struct nfs4_openowner *oo;
 	struct nfs4_delegation *dp;
 	struct list_head reaplist;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
@@ -1144,12 +1121,15 @@
 	if (clp->cl_cb_conn.cb_xprt)
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 	list_del(&clp->cl_idhash);
-	list_del(&clp->cl_strhash);
-	spin_lock(&client_lock);
+	if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
+		rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
+	else
+		rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
+	spin_lock(&nn->client_lock);
 	unhash_client_locked(clp);
 	if (atomic_read(&clp->cl_refcount) == 0)
 		free_client(clp);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 }
 
 static void expire_client(struct nfs4_client *clp)
@@ -1187,6 +1167,17 @@
 	return 0;
 }
 
+static long long
+compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
+{
+	long long res;
+
+	res = o1->len - o2->len;
+	if (res)
+		return res;
+	return (long long)memcmp(o1->data, o2->data, o1->len);
+}
+
 static int same_name(const char *n1, const char *n2)
 {
 	return 0 == memcmp(n1, n2, HEXDIR_LEN);
@@ -1247,10 +1238,9 @@
 	return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
 }
 
-static void gen_clid(struct nfs4_client *clp)
+static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
 {
 	static u32 current_clientid = 1;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	clp->cl_clientid.cl_boot = nn->boot_time;
 	clp->cl_clientid.cl_id = current_clientid++; 
@@ -1283,12 +1273,14 @@
 	return NULL;
 }
 
-static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
+static struct nfs4_client *create_client(struct xdr_netobj name,
 		struct svc_rqst *rqstp, nfs4_verifier *verf)
 {
 	struct nfs4_client *clp;
 	struct sockaddr *sa = svc_addr(rqstp);
 	int ret;
+	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	clp = alloc_client(name);
 	if (clp == NULL)
@@ -1297,23 +1289,21 @@
 	INIT_LIST_HEAD(&clp->cl_sessions);
 	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
 	if (ret) {
-		spin_lock(&client_lock);
+		spin_lock(&nn->client_lock);
 		free_client(clp);
-		spin_unlock(&client_lock);
+		spin_unlock(&nn->client_lock);
 		return NULL;
 	}
 	idr_init(&clp->cl_stateids);
-	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_refcount, 0);
 	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
 	INIT_LIST_HEAD(&clp->cl_idhash);
-	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_lru);
 	INIT_LIST_HEAD(&clp->cl_callbacks);
 	spin_lock_init(&clp->cl_lock);
-	INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
+	nfsd4_init_callback(&clp->cl_cb_null);
 	clp->cl_time = get_seconds();
 	clear_bit(0, &clp->cl_cb_slot_busy);
 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
@@ -1321,17 +1311,60 @@
 	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
 	gen_confirm(clp);
 	clp->cl_cb_session = NULL;
+	clp->net = net;
 	return clp;
 }
 
 static void
-add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
+add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct nfs4_client *clp;
+
+	while (*new) {
+		clp = rb_entry(*new, struct nfs4_client, cl_namenode);
+		parent = *new;
+
+		if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&new_clp->cl_namenode, parent, new);
+	rb_insert_color(&new_clp->cl_namenode, root);
+}
+
+static struct nfs4_client *
+find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
+{
+	long long cmp;
+	struct rb_node *node = root->rb_node;
+	struct nfs4_client *clp;
+
+	while (node) {
+		clp = rb_entry(node, struct nfs4_client, cl_namenode);
+		cmp = compare_blob(&clp->cl_name, name);
+		if (cmp > 0)
+			node = node->rb_left;
+		else if (cmp < 0)
+			node = node->rb_right;
+		else
+			return clp;
+	}
+	return NULL;
+}
+
+static void
+add_to_unconfirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
-	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+	clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
+	add_clp_to_name_tree(clp, &nn->unconf_name_tree);
 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
-	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+	list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
 	renew_client(clp);
 }
 
@@ -1339,22 +1372,23 @@
 move_to_confirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
-	unsigned int strhashval;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
-	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
-	strhashval = clientstr_hashval(clp->cl_recdir);
-	list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
+	rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
+	add_clp_to_name_tree(clp, &nn->conf_name_tree);
+	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
 	renew_client(clp);
 }
 
 static struct nfs4_client *
-find_confirmed_client(clientid_t *clid, bool sessions)
+find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
 	unsigned int idhashval = clientid_hashval(clid->cl_id);
 
-	list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
+	list_for_each_entry(clp, &nn->conf_id_hashtbl[idhashval], cl_idhash) {
 		if (same_clid(&clp->cl_clientid, clid)) {
 			if ((bool)clp->cl_minorversion != sessions)
 				return NULL;
@@ -1366,12 +1400,12 @@
 }
 
 static struct nfs4_client *
-find_unconfirmed_client(clientid_t *clid, bool sessions)
+find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
 	unsigned int idhashval = clientid_hashval(clid->cl_id);
 
-	list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
+	list_for_each_entry(clp, &nn->unconf_id_hashtbl[idhashval], cl_idhash) {
 		if (same_clid(&clp->cl_clientid, clid)) {
 			if ((bool)clp->cl_minorversion != sessions)
 				return NULL;
@@ -1387,27 +1421,15 @@
 } 
 
 static struct nfs4_client *
-find_confirmed_client_by_str(const char *dname, unsigned int hashval)
+find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
 {
-	struct nfs4_client *clp;
-
-	list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
-		if (same_name(clp->cl_recdir, dname))
-			return clp;
-	}
-	return NULL;
+	return find_clp_in_name_tree(name, &nn->conf_name_tree);
 }
 
 static struct nfs4_client *
-find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
+find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
 {
-	struct nfs4_client *clp;
-
-	list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
-		if (same_name(clp->cl_recdir, dname))
-			return clp;
-	}
-	return NULL;
+	return find_clp_in_name_tree(name, &nn->unconf_name_tree);
 }
 
 static void
@@ -1428,7 +1450,7 @@
 	else
 		goto out_err;
 
-	conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val,
+	conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val,
 					    se->se_callback_addr_len,
 					    (struct sockaddr *)&conn->cb_addr,
 					    sizeof(conn->cb_addr));
@@ -1572,12 +1594,11 @@
 {
 	struct nfs4_client *unconf, *conf, *new;
 	__be32 status;
-	unsigned int		strhashval;
-	char			dname[HEXDIR_LEN];
 	char			addr_str[INET6_ADDRSTRLEN];
 	nfs4_verifier		verf = exid->verifier;
 	struct sockaddr		*sa = svc_addr(rqstp);
 	bool	update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A;
+	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	rpc_ntop(sa, addr_str, sizeof(addr_str));
 	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
@@ -1592,24 +1613,16 @@
 	switch (exid->spa_how) {
 	case SP4_NONE:
 		break;
+	default:				/* checked by xdr code */
+		WARN_ON_ONCE(1);
 	case SP4_SSV:
-		return nfserr_serverfault;
-	default:
-		BUG();				/* checked by xdr code */
 	case SP4_MACH_CRED:
 		return nfserr_serverfault;	/* no excuse :-/ */
 	}
 
-	status = nfs4_make_rec_clidname(dname, &exid->clname);
-
-	if (status)
-		return status;
-
-	strhashval = clientstr_hashval(dname);
-
 	/* Cases below refer to rfc 5661 section 18.35.4: */
 	nfs4_lock_state();
-	conf = find_confirmed_client_by_str(dname, strhashval);
+	conf = find_confirmed_client_by_name(&exid->clname, nn);
 	if (conf) {
 		bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
 		bool verfs_match = same_verf(&verf, &conf->cl_verifier);
@@ -1654,21 +1667,21 @@
 		goto out;
 	}
 
-	unconf  = find_unconfirmed_client_by_str(dname, strhashval);
+	unconf  = find_unconfirmed_client_by_name(&exid->clname, nn);
 	if (unconf) /* case 4, possible retry or client restart */
 		expire_client(unconf);
 
 	/* case 1 (normal case) */
 out_new:
-	new = create_client(exid->clname, dname, rqstp, &verf);
+	new = create_client(exid->clname, rqstp, &verf);
 	if (new == NULL) {
 		status = nfserr_jukebox;
 		goto out;
 	}
 	new->cl_minorversion = 1;
 
-	gen_clid(new);
-	add_to_unconfirmed(new, strhashval);
+	gen_clid(new, nn);
+	add_to_unconfirmed(new);
 out_copy:
 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
 	exid->clientid.cl_id = new->cl_clientid.cl_id;
@@ -1761,12 +1774,13 @@
 	struct nfsd4_conn *conn;
 	struct nfsd4_clid_slot *cs_slot = NULL;
 	__be32 status = 0;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
 		return nfserr_inval;
 	if (check_forechannel_attrs(cr_ses->fore_channel))
 		return nfserr_toosmall;
-	new = alloc_session(&cr_ses->fore_channel);
+	new = alloc_session(&cr_ses->fore_channel, nn);
 	if (!new)
 		return nfserr_jukebox;
 	status = nfserr_jukebox;
@@ -1775,8 +1789,8 @@
 		goto out_free_session;
 
 	nfs4_lock_state();
-	unconf = find_unconfirmed_client(&cr_ses->clientid, true);
-	conf = find_confirmed_client(&cr_ses->clientid, true);
+	unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
+	conf = find_confirmed_client(&cr_ses->clientid, true, nn);
 
 	if (conf) {
 		cs_slot = &conf->cl_cs_slot;
@@ -1789,7 +1803,6 @@
 			goto out_free_conn;
 		}
 	} else if (unconf) {
-		unsigned int hash;
 		struct nfs4_client *old;
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
 		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
@@ -1803,8 +1816,7 @@
 			status = nfserr_seq_misordered;
 			goto out_free_conn;
 		}
-		hash = clientstr_hashval(unconf->cl_recdir);
-		old = find_confirmed_client_by_str(unconf->cl_recdir, hash);
+		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
 		if (old)
 			expire_client(old);
 		move_to_confirmed(unconf);
@@ -1843,14 +1855,6 @@
 	goto out;
 }
 
-static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
-{
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
-
-	return argp->opcnt == resp->opcnt;
-}
-
 static __be32 nfsd4_map_bcts_dir(u32 *dir)
 {
 	switch (*dir) {
@@ -1865,24 +1869,40 @@
 	return nfserr_inval;
 }
 
+__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
+{
+	struct nfsd4_session *session = cstate->session;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
+	session->se_cb_prog = bc->bc_cb_program;
+	session->se_cb_sec = bc->bc_cb_sec;
+	spin_unlock(&nn->client_lock);
+
+	nfsd4_probe_callback(session->se_client);
+
+	return nfs_ok;
+}
+
 __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_bind_conn_to_session *bcts)
 {
 	__be32 status;
 	struct nfsd4_conn *conn;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (!nfsd4_last_compound_op(rqstp))
 		return nfserr_not_only_op;
-	spin_lock(&client_lock);
-	cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid);
+	spin_lock(&nn->client_lock);
+	cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp));
 	/* Sorta weird: we only need the refcnt'ing because new_conn acquires
 	 * client_lock iself: */
 	if (cstate->session) {
 		nfsd4_get_session(cstate->session);
 		atomic_inc(&cstate->session->se_client->cl_refcount);
 	}
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	if (!cstate->session)
 		return nfserr_badsession;
 
@@ -1910,6 +1930,7 @@
 {
 	struct nfsd4_session *ses;
 	__be32 status = nfserr_badsession;
+	struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id);
 
 	/* Notes:
 	 * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
@@ -1923,24 +1944,24 @@
 			return nfserr_not_only_op;
 	}
 	dump_sessionid(__func__, &sessionid->sessionid);
-	spin_lock(&client_lock);
-	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
+	spin_lock(&nn->client_lock);
+	ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r));
 	if (!ses) {
-		spin_unlock(&client_lock);
+		spin_unlock(&nn->client_lock);
 		goto out;
 	}
 
 	unhash_session(ses);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 
 	nfs4_lock_state();
 	nfsd4_probe_callback_sync(ses->se_client);
 	nfs4_unlock_state();
 
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	nfsd4_del_conns(ses);
 	nfsd4_put_session_locked(ses);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	status = nfs_ok;
 out:
 	dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -2006,6 +2027,7 @@
 	struct nfsd4_slot *slot;
 	struct nfsd4_conn *conn;
 	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (resp->opcnt != 1)
 		return nfserr_sequence_pos;
@@ -2018,9 +2040,9 @@
 	if (!conn)
 		return nfserr_jukebox;
 
-	spin_lock(&client_lock);
+	spin_lock(&nn->client_lock);
 	status = nfserr_badsession;
-	session = find_in_sessionid_hashtbl(&seq->sessionid);
+	session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp));
 	if (!session)
 		goto out;
 
@@ -2094,7 +2116,7 @@
 		}
 	}
 	kfree(conn);
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	dprintk("%s: return %d\n", __func__, ntohl(status));
 	return status;
 }
@@ -2104,10 +2126,11 @@
 {
 	struct nfs4_client *conf, *unconf, *clp;
 	__be32 status = 0;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	nfs4_lock_state();
-	unconf = find_unconfirmed_client(&dc->clientid, true);
-	conf = find_confirmed_client(&dc->clientid, true);
+	unconf = find_unconfirmed_client(&dc->clientid, true, nn);
+	conf = find_confirmed_client(&dc->clientid, true, nn);
 
 	if (conf) {
 		clp = conf;
@@ -2181,20 +2204,13 @@
 {
 	struct xdr_netobj 	clname = setclid->se_name;
 	nfs4_verifier		clverifier = setclid->se_verf;
-	unsigned int 		strhashval;
 	struct nfs4_client	*conf, *unconf, *new;
 	__be32 			status;
-	char                    dname[HEXDIR_LEN];
-	
-	status = nfs4_make_rec_clidname(dname, &clname);
-	if (status)
-		return status;
-
-	strhashval = clientstr_hashval(dname);
+	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	/* Cases below refer to rfc 3530 section 14.2.33: */
 	nfs4_lock_state();
-	conf = find_confirmed_client_by_str(dname, strhashval);
+	conf = find_confirmed_client_by_name(&clname, nn);
 	if (conf) {
 		/* case 0: */
 		status = nfserr_clid_inuse;
@@ -2209,21 +2225,21 @@
 			goto out;
 		}
 	}
-	unconf = find_unconfirmed_client_by_str(dname, strhashval);
+	unconf = find_unconfirmed_client_by_name(&clname, nn);
 	if (unconf)
 		expire_client(unconf);
 	status = nfserr_jukebox;
-	new = create_client(clname, dname, rqstp, &clverifier);
+	new = create_client(clname, rqstp, &clverifier);
 	if (new == NULL)
 		goto out;
 	if (conf && same_verf(&conf->cl_verifier, &clverifier))
 		/* case 1: probable callback update */
 		copy_clid(new, conf);
 	else /* case 4 (new client) or cases 2, 3 (client reboot): */
-		gen_clid(new);
+		gen_clid(new, nn);
 	new->cl_minorversion = 0;
 	gen_callback(new, setclid, rqstp);
-	add_to_unconfirmed(new, strhashval);
+	add_to_unconfirmed(new);
 	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
 	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
 	memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
@@ -2243,14 +2259,14 @@
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net	*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (STALE_CLIENTID(clid, nn))
 		return nfserr_stale_clientid;
 	nfs4_lock_state();
 
-	conf = find_confirmed_client(clid, false);
-	unconf = find_unconfirmed_client(clid, false);
+	conf = find_confirmed_client(clid, false, nn);
+	unconf = find_unconfirmed_client(clid, false, nn);
 	/*
 	 * We try hard to give out unique clientid's, so if we get an
 	 * attempt to confirm the same clientid with a different cred,
@@ -2276,9 +2292,7 @@
 		nfsd4_probe_callback(conf);
 		expire_client(unconf);
 	} else { /* case 3: normal case; new or rebooted client */
-		unsigned int hash = clientstr_hashval(unconf->cl_recdir);
-
-		conf = find_confirmed_client_by_str(unconf->cl_recdir, hash);
+		conf = find_confirmed_client_by_name(&unconf->cl_name, nn);
 		if (conf)
 			expire_client(conf);
 		move_to_confirmed(unconf);
@@ -2340,7 +2354,7 @@
 	if (openowner_slab == NULL)
 		goto out_nomem;
 	lockowner_slab = kmem_cache_create("nfsd4_lockowners",
-			sizeof(struct nfs4_openowner), 0, 0, NULL);
+			sizeof(struct nfs4_lockowner), 0, 0, NULL);
 	if (lockowner_slab == NULL)
 		goto out_nomem;
 	file_slab = kmem_cache_create("nfsd4_files",
@@ -2404,7 +2418,9 @@
 
 static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
 {
-	list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]);
 	list_add(&oo->oo_perclient, &clp->cl_openowners);
 }
 
@@ -2444,11 +2460,13 @@
 }
 
 static void
-move_to_close_lru(struct nfs4_openowner *oo)
+move_to_close_lru(struct nfs4_openowner *oo, struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
 
-	list_move_tail(&oo->oo_close_lru, &close_lru);
+	list_move_tail(&oo->oo_close_lru, &nn->close_lru);
 	oo->oo_time = get_seconds();
 }
 
@@ -2462,13 +2480,14 @@
 }
 
 static struct nfs4_openowner *
-find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, bool sessions)
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
+			bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_stateowner *so;
 	struct nfs4_openowner *oo;
 	struct nfs4_client *clp;
 
-	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+	list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) {
 		if (!so->so_is_open_owner)
 			continue;
 		if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
@@ -2555,9 +2574,14 @@
 	struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
 	struct nfs4_delegation *dp;
 
-	BUG_ON(!fp);
-	/* We assume break_lease is only called once per lease: */
-	BUG_ON(fp->fi_had_conflict);
+	if (!fp) {
+		WARN(1, "(%p)->fl_owner NULL\n", fl);
+		return;
+	}
+	if (fp->fi_had_conflict) {
+		WARN(1, "duplicate break on %p\n", fp);
+		return;
+	}
 	/*
 	 * We don't want the locks code to timeout the lease for us;
 	 * we'll remove it ourself if a delegation isn't returned
@@ -2599,14 +2623,13 @@
 
 __be32
 nfsd4_process_open1(struct nfsd4_compound_state *cstate,
-		    struct nfsd4_open *open)
+		    struct nfsd4_open *open, struct nfsd_net *nn)
 {
 	clientid_t *clientid = &open->op_clientid;
 	struct nfs4_client *clp = NULL;
 	unsigned int strhashval;
 	struct nfs4_openowner *oo = NULL;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	if (STALE_CLIENTID(&open->op_clientid, nn))
 		return nfserr_stale_clientid;
@@ -2619,10 +2642,11 @@
 		return nfserr_jukebox;
 
 	strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
-	oo = find_openstateowner_str(strhashval, open, cstate->minorversion);
+	oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn);
 	open->op_openowner = oo;
 	if (!oo) {
-		clp = find_confirmed_client(clientid, cstate->minorversion);
+		clp = find_confirmed_client(clientid, cstate->minorversion,
+					    nn);
 		if (clp == NULL)
 			return nfserr_expired;
 		goto new_owner;
@@ -2891,7 +2915,7 @@
 			open->op_why_no_deleg = WND4_CANCELLED;
 			break;
 		case NFS4_SHARE_WANT_NO_DELEG:
-			BUG();	/* not supposed to get here */
+			WARN_ON_ONCE(1);
 		}
 	}
 }
@@ -2959,6 +2983,7 @@
 	}
 	return;
 out_free:
+	unhash_stid(&dp->dl_stid);
 	nfs4_put_delegation(dp);
 out_no_deleg:
 	flag = NFS4_OPEN_DELEGATE_NONE;
@@ -3104,27 +3129,32 @@
 		free_generic_stateid(open->op_stp);
 }
 
+static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp)
+{
+	struct nfs4_client *found;
+
+	if (STALE_CLIENTID(clid, nn))
+		return nfserr_stale_clientid;
+	found = find_confirmed_client(clid, session, nn);
+	if (clp)
+		*clp = found;
+	return found ? nfs_ok : nfserr_expired;
+}
+
 __be32
 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    clientid_t *clid)
 {
 	struct nfs4_client *clp;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	nfs4_lock_state();
 	dprintk("process_renew(%08x/%08x): starting\n", 
 			clid->cl_boot, clid->cl_id);
-	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(clid, nn))
+	status = lookup_clientid(clid, cstate->minorversion, nn, &clp);
+	if (status)
 		goto out;
-	clp = find_confirmed_client(clid, cstate->minorversion);
-	status = nfserr_expired;
-	if (clp == NULL) {
-		/* We assume the client took too long to RENEW. */
-		dprintk("nfsd4_renew: clientid not found!\n");
-		goto out;
-	}
 	status = nfserr_cb_path_down;
 	if (!list_empty(&clp->cl_delegations)
 			&& clp->cl_cb_state != NFSD4_CB_UP)
@@ -3136,44 +3166,42 @@
 }
 
 static void
-nfsd4_end_grace(struct net *net)
+nfsd4_end_grace(struct nfsd_net *nn)
 {
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
 	/* do nothing if grace period already ended */
 	if (nn->grace_ended)
 		return;
 
 	dprintk("NFSD: end of grace period\n");
 	nn->grace_ended = true;
-	nfsd4_record_grace_done(net, nn->boot_time);
+	nfsd4_record_grace_done(nn, nn->boot_time);
 	locks_end_grace(&nn->nfsd4_manager);
 	/*
 	 * Now that every NFSv4 client has had the chance to recover and
 	 * to see the (possibly new, possibly shorter) lease time, we
 	 * can safely set the next grace time to the current lease time:
 	 */
-	nfsd4_grace = nfsd4_lease;
+	nn->nfsd4_grace = nn->nfsd4_lease;
 }
 
 static time_t
-nfs4_laundromat(void)
+nfs4_laundromat(struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
 	struct nfs4_openowner *oo;
 	struct nfs4_delegation *dp;
 	struct list_head *pos, *next, reaplist;
-	time_t cutoff = get_seconds() - nfsd4_lease;
-	time_t t, clientid_val = nfsd4_lease;
-	time_t u, test_val = nfsd4_lease;
+	time_t cutoff = get_seconds() - nn->nfsd4_lease;
+	time_t t, clientid_val = nn->nfsd4_lease;
+	time_t u, test_val = nn->nfsd4_lease;
 
 	nfs4_lock_state();
 
 	dprintk("NFSD: laundromat service - starting\n");
-	nfsd4_end_grace(&init_net);
+	nfsd4_end_grace(nn);
 	INIT_LIST_HEAD(&reaplist);
-	spin_lock(&client_lock);
-	list_for_each_safe(pos, next, &client_lru) {
+	spin_lock(&nn->client_lock);
+	list_for_each_safe(pos, next, &nn->client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
 			t = clp->cl_time - cutoff;
@@ -3189,7 +3217,7 @@
 		unhash_client_locked(clp);
 		list_add(&clp->cl_lru, &reaplist);
 	}
-	spin_unlock(&client_lock);
+	spin_unlock(&nn->client_lock);
 	list_for_each_safe(pos, next, &reaplist) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
@@ -3199,6 +3227,8 @@
 	spin_lock(&recall_lock);
 	list_for_each_safe(pos, next, &del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
+			continue;
 		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
 			u = dp->dl_time - cutoff;
 			if (test_val > u)
@@ -3212,8 +3242,8 @@
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		unhash_delegation(dp);
 	}
-	test_val = nfsd4_lease;
-	list_for_each_safe(pos, next, &close_lru) {
+	test_val = nn->nfsd4_lease;
+	list_for_each_safe(pos, next, &nn->close_lru) {
 		oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
 		if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
 			u = oo->oo_time - cutoff;
@@ -3231,16 +3261,19 @@
 
 static struct workqueue_struct *laundry_wq;
 static void laundromat_main(struct work_struct *);
-static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
 
 static void
-laundromat_main(struct work_struct *not_used)
+laundromat_main(struct work_struct *laundry)
 {
 	time_t t;
+	struct delayed_work *dwork = container_of(laundry, struct delayed_work,
+						  work);
+	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+					   laundromat_work);
 
-	t = nfs4_laundromat();
+	t = nfs4_laundromat(nn);
 	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
-	queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
+	queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
 }
 
 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
@@ -3385,16 +3418,17 @@
 	return nfs_ok;
 }
 
-static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, bool sessions)
+static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
+				   struct nfs4_stid **s, bool sessions,
+				   struct nfsd_net *nn)
 {
 	struct nfs4_client *cl;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
 		return nfserr_bad_stateid;
 	if (STALE_STATEID(stateid, nn))
 		return nfserr_stale_stateid;
-	cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions);
+	cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions, nn);
 	if (!cl)
 		return nfserr_expired;
 	*s = find_stateid_by_type(cl, stateid, typemask);
@@ -3416,6 +3450,7 @@
 	struct nfs4_delegation *dp = NULL;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	__be32 status;
 
 	if (filpp)
@@ -3427,7 +3462,8 @@
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
 		return check_special_stateids(net, current_fh, stateid, flags);
 
-	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion);
+	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+				      &s, cstate->minorversion, nn);
 	if (status)
 		return status;
 	status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
@@ -3441,7 +3477,11 @@
 			goto out;
 		if (filpp) {
 			*filpp = dp->dl_file->fi_deleg_file;
-			BUG_ON(!*filpp);
+			if (!*filpp) {
+				WARN_ON_ONCE(1);
+				status = nfserr_serverfault;
+				goto out;
+			}
 		}
 		break;
 	case NFS4_OPEN_STID:
@@ -3568,7 +3608,8 @@
 static __be32
 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 			 stateid_t *stateid, char typemask,
-			 struct nfs4_ol_stateid **stpp)
+			 struct nfs4_ol_stateid **stpp,
+			 struct nfsd_net *nn)
 {
 	__be32 status;
 	struct nfs4_stid *s;
@@ -3577,7 +3618,8 @@
 		seqid, STATEID_VAL(stateid));
 
 	*stpp = NULL;
-	status = nfsd4_lookup_stateid(stateid, typemask, &s, cstate->minorversion);
+	status = nfsd4_lookup_stateid(stateid, typemask, &s,
+				      cstate->minorversion, nn);
 	if (status)
 		return status;
 	*stpp = openlockstateid(s);
@@ -3586,13 +3628,14 @@
 	return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp);
 }
 
-static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp)
+static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
+						 stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn)
 {
 	__be32 status;
 	struct nfs4_openowner *oo;
 
 	status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
-						NFS4_OPEN_STID, stpp);
+						NFS4_OPEN_STID, stpp, nn);
 	if (status)
 		return status;
 	oo = openowner((*stpp)->st_stateowner);
@@ -3608,6 +3651,7 @@
 	__be32 status;
 	struct nfs4_openowner *oo;
 	struct nfs4_ol_stateid *stp;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
 			(int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3621,7 +3665,7 @@
 
 	status = nfs4_preprocess_seqid_op(cstate,
 					oc->oc_seqid, &oc->oc_req_stateid,
-					NFS4_OPEN_STID, &stp);
+					NFS4_OPEN_STID, &stp, nn);
 	if (status)
 		goto out;
 	oo = openowner(stp->st_stateowner);
@@ -3664,7 +3708,7 @@
 	case NFS4_SHARE_ACCESS_BOTH:
 		break;
 	default:
-		BUG();
+		WARN_ON_ONCE(1);
 	}
 }
 
@@ -3685,6 +3729,7 @@
 {
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 
 			(int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3697,7 +3742,7 @@
 
 	nfs4_lock_state();
 	status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
-					&od->od_stateid, &stp);
+					&od->od_stateid, &stp, nn);
 	if (status)
 		goto out; 
 	status = nfserr_inval;
@@ -3760,6 +3805,8 @@
 	__be32 status;
 	struct nfs4_openowner *oo;
 	struct nfs4_ol_stateid *stp;
+	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_close on file %.*s\n", 
 			(int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3769,7 +3816,7 @@
 	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
 					&close->cl_stateid,
 					NFS4_OPEN_STID|NFS4_CLOSED_STID,
-					&stp);
+					&stp, nn);
 	if (status)
 		goto out; 
 	oo = openowner(stp->st_stateowner);
@@ -3791,7 +3838,7 @@
 			 * little while to handle CLOSE replay.
 			 */
 			if (list_empty(&oo->oo_owner.so_stateids))
-				move_to_close_lru(oo);
+				move_to_close_lru(oo, SVC_NET(rqstp));
 		}
 	}
 out:
@@ -3807,15 +3854,15 @@
 	struct nfs4_delegation *dp;
 	stateid_t *stateid = &dr->dr_stateid;
 	struct nfs4_stid *s;
-	struct inode *inode;
 	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
 		return status;
-	inode = cstate->current_fh.fh_dentry->d_inode;
 
 	nfs4_lock_state();
-	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, cstate->minorversion);
+	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s,
+				      cstate->minorversion, nn);
 	if (status)
 		goto out;
 	dp = delegstateid(s);
@@ -3833,8 +3880,6 @@
 
 #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
 
-#define LOCKOWNER_INO_HASH_BITS 8
-#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
 #define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
 
 static inline u64
@@ -3852,7 +3897,7 @@
 {
 	u64 end;
 
-	BUG_ON(!len);
+	WARN_ON_ONCE(!len);
 	end = start + len;
 	return end > start ? end - 1: NFS4_MAX_UINT64;
 }
@@ -3864,8 +3909,6 @@
 		& LOCKOWNER_INO_HASH_MASK;
 }
 
-static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
-
 /*
  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
  * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
@@ -3931,12 +3974,12 @@
 
 static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
-		struct xdr_netobj *owner)
+		   struct xdr_netobj *owner, struct nfsd_net *nn)
 {
 	unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
 	struct nfs4_lockowner *lo;
 
-	list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
+	list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
 		if (same_lockowner_ino(lo, inode, clid, owner))
 			return lo;
 	}
@@ -3948,9 +3991,10 @@
 	struct inode *inode = open_stp->st_file->fi_inode;
 	unsigned int inohash = lockowner_ino_hashval(inode,
 			clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 
-	list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
-	list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
+	list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]);
+	list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]);
 	list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
 }
 
@@ -4024,8 +4068,10 @@
 	struct nfs4_client *cl = oo->oo_owner.so_client;
 	struct nfs4_lockowner *lo;
 	unsigned int strhashval;
+	struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id);
 
-	lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+	lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid,
+				&lock->v.new.owner, nn);
 	if (lo) {
 		if (!cstate->minorversion)
 			return nfserr_bad_seqid;
@@ -4065,7 +4111,8 @@
 	bool new_state = false;
 	int lkflg;
 	int err;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct net *net = SVC_NET(rqstp);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
 		(long long) lock->lk_offset,
@@ -4099,7 +4146,7 @@
 		status = nfs4_preprocess_confirmed_seqid_op(cstate,
 				        lock->lk_new_open_seqid,
 		                        &lock->lk_new_open_stateid,
-					&open_stp);
+					&open_stp, nn);
 		if (status)
 			goto out;
 		open_sop = openowner(open_stp->st_stateowner);
@@ -4113,7 +4160,7 @@
 		status = nfs4_preprocess_seqid_op(cstate,
 				       lock->lk_old_lock_seqid,
 				       &lock->lk_old_lock_stateid,
-				       NFS4_LOCK_STID, &lock_stp);
+				       NFS4_LOCK_STID, &lock_stp, nn);
 	if (status)
 		goto out;
 	lock_sop = lockowner(lock_stp->st_stateowner);
@@ -4124,10 +4171,10 @@
 		goto out;
 
 	status = nfserr_grace;
-	if (locks_in_grace(SVC_NET(rqstp)) && !lock->lk_reclaim)
+	if (locks_in_grace(net) && !lock->lk_reclaim)
 		goto out;
 	status = nfserr_no_grace;
-	if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim)
+	if (!locks_in_grace(net) && lock->lk_reclaim)
 		goto out;
 
 	file_lock = locks_alloc_lock();
@@ -4238,7 +4285,7 @@
 	struct file_lock *file_lock = NULL;
 	struct nfs4_lockowner *lo;
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	if (locks_in_grace(SVC_NET(rqstp)))
 		return nfserr_grace;
@@ -4248,9 +4295,11 @@
 
 	nfs4_lock_state();
 
-	status = nfserr_stale_clientid;
-	if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid, nn))
-		goto out;
+	if (!nfsd4_has_session(cstate)) {
+		status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL);
+		if (status)
+			goto out;
+	}
 
 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
 		goto out;
@@ -4278,7 +4327,7 @@
 		goto out;
 	}
 
-	lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner);
+	lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn);
 	if (lo)
 		file_lock->fl_owner = (fl_owner_t)lo;
 	file_lock->fl_pid = current->tgid;
@@ -4313,7 +4362,8 @@
 	struct file_lock *file_lock = NULL;
 	__be32 status;
 	int err;
-						        
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
 		(long long) locku->lu_offset,
 		(long long) locku->lu_length);
@@ -4324,7 +4374,8 @@
 	nfs4_lock_state();
 									        
 	status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
-					&locku->lu_stateid, NFS4_LOCK_STID, &stp);
+					&locku->lu_stateid, NFS4_LOCK_STID,
+					&stp, nn);
 	if (status)
 		goto out;
 	filp = find_any_file(stp->st_file);
@@ -4414,23 +4465,21 @@
 	struct list_head matches;
 	unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
 	__be32 status;
-	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
 		clid->cl_boot, clid->cl_id);
 
-	/* XXX check for lease expiration */
-
-	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(clid, nn))
-		return status;
-
 	nfs4_lock_state();
 
+	status = lookup_clientid(clid, cstate->minorversion, nn, NULL);
+	if (status)
+		goto out;
+
 	status = nfserr_locks_held;
 	INIT_LIST_HEAD(&matches);
 
-	list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+	list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) {
 		if (sop->so_is_open_owner)
 			continue;
 		if (!same_owner_str(sop, owner, clid))
@@ -4466,73 +4515,74 @@
 	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
 }
 
-int
-nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
+bool
+nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
 {
-	unsigned int strhashval = clientstr_hashval(name);
-	struct nfs4_client *clp;
+	struct nfs4_client_reclaim *crp;
 
-	clp = find_confirmed_client_by_str(name, strhashval);
-	if (!clp)
-		return 0;
-	return test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+	crp = nfsd4_find_reclaim_client(name, nn);
+	return (crp && crp->cr_clp);
 }
 
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-int
-nfs4_client_to_reclaim(const char *name)
+struct nfs4_client_reclaim *
+nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn)
 {
 	unsigned int strhashval;
-	struct nfs4_client_reclaim *crp = NULL;
+	struct nfs4_client_reclaim *crp;
 
 	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
 	crp = alloc_reclaim();
-	if (!crp)
-		return 0;
-	strhashval = clientstr_hashval(name);
-	INIT_LIST_HEAD(&crp->cr_strhash);
-	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
-	memcpy(crp->cr_recdir, name, HEXDIR_LEN);
-	reclaim_str_hashtbl_size++;
-	return 1;
+	if (crp) {
+		strhashval = clientstr_hashval(name);
+		INIT_LIST_HEAD(&crp->cr_strhash);
+		list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
+		memcpy(crp->cr_recdir, name, HEXDIR_LEN);
+		crp->cr_clp = NULL;
+		nn->reclaim_str_hashtbl_size++;
+	}
+	return crp;
 }
 
 void
-nfs4_release_reclaim(void)
+nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
+{
+	list_del(&crp->cr_strhash);
+	kfree(crp);
+	nn->reclaim_str_hashtbl_size--;
+}
+
+void
+nfs4_release_reclaim(struct nfsd_net *nn)
 {
 	struct nfs4_client_reclaim *crp = NULL;
 	int i;
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		while (!list_empty(&reclaim_str_hashtbl[i])) {
-			crp = list_entry(reclaim_str_hashtbl[i].next,
+		while (!list_empty(&nn->reclaim_str_hashtbl[i])) {
+			crp = list_entry(nn->reclaim_str_hashtbl[i].next,
 			                struct nfs4_client_reclaim, cr_strhash);
-			list_del(&crp->cr_strhash);
-			kfree(crp);
-			reclaim_str_hashtbl_size--;
+			nfs4_remove_reclaim_record(crp, nn);
 		}
 	}
-	BUG_ON(reclaim_str_hashtbl_size);
+	WARN_ON_ONCE(nn->reclaim_str_hashtbl_size);
 }
 
 /*
  * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
 struct nfs4_client_reclaim *
-nfsd4_find_reclaim_client(struct nfs4_client *clp)
+nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
 
-	dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
-		            clp->cl_name.len, clp->cl_name.data,
-			    clp->cl_recdir);
+	dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
 
-	/* find clp->cl_name in reclaim_str_hashtbl */
-	strhashval = clientstr_hashval(clp->cl_recdir);
-	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
-		if (same_name(crp->cr_recdir, clp->cl_recdir)) {
+	strhashval = clientstr_hashval(recdir);
+	list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
+		if (same_name(crp->cr_recdir, recdir)) {
 			return crp;
 		}
 	}
@@ -4543,12 +4593,12 @@
 * Called from OPEN. Look for clientid in reclaim list.
 */
 __be32
-nfs4_check_open_reclaim(clientid_t *clid, bool sessions)
+nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 {
 	struct nfs4_client *clp;
 
 	/* find clientid in conf_id_hashtbl */
-	clp = find_confirmed_client(clid, sessions);
+	clp = find_confirmed_client(clid, sessions, nn);
 	if (clp == NULL)
 		return nfserr_reclaim_bad;
 
@@ -4557,83 +4607,41 @@
 
 #ifdef CONFIG_NFSD_FAULT_INJECTION
 
-void nfsd_forget_clients(u64 num)
+u64 nfsd_forget_client(struct nfs4_client *clp, u64 max)
 {
-	struct nfs4_client *clp, *next;
-	int count = 0;
-
-	nfs4_lock_state();
-	list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
-		expire_client(clp);
-		if (++count == num)
-			break;
-	}
-	nfs4_unlock_state();
-
-	printk(KERN_INFO "NFSD: Forgot %d clients", count);
+	expire_client(clp);
+	return 1;
 }
 
-static void release_lockowner_sop(struct nfs4_stateowner *sop)
+u64 nfsd_print_client(struct nfs4_client *clp, u64 num)
 {
-	release_lockowner(lockowner(sop));
+	char buf[INET6_ADDRSTRLEN];
+	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
+	printk(KERN_INFO "NFS Client: %s\n", buf);
+	return 1;
 }
 
-static void release_openowner_sop(struct nfs4_stateowner *sop)
+static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
+			     const char *type)
 {
-	release_openowner(openowner(sop));
+	char buf[INET6_ADDRSTRLEN];
+	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
+	printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
 }
 
-static int nfsd_release_n_owners(u64 num, bool is_open_owner,
-				void (*release_sop)(struct nfs4_stateowner *))
+static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *))
 {
-	int i, count = 0;
-	struct nfs4_stateowner *sop, *next;
+	struct nfs4_openowner *oop;
+	struct nfs4_lockowner *lop, *lo_next;
+	struct nfs4_ol_stateid *stp, *st_next;
+	u64 count = 0;
 
-	for (i = 0; i < OWNER_HASH_SIZE; i++) {
-		list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
-			if (sop->so_is_open_owner != is_open_owner)
-				continue;
-			release_sop(sop);
-			if (++count == num)
-				return count;
-		}
-	}
-	return count;
-}
-
-void nfsd_forget_locks(u64 num)
-{
-	int count;
-
-	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, false, release_lockowner_sop);
-	nfs4_unlock_state();
-
-	printk(KERN_INFO "NFSD: Forgot %d locks", count);
-}
-
-void nfsd_forget_openowners(u64 num)
-{
-	int count;
-
-	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, true, release_openowner_sop);
-	nfs4_unlock_state();
-
-	printk(KERN_INFO "NFSD: Forgot %d open owners", count);
-}
-
-static int nfsd_process_n_delegations(u64 num, struct list_head *list)
-{
-	int i, count = 0;
-	struct nfs4_file *fp, *fnext;
-	struct nfs4_delegation *dp, *dnext;
-
-	for (i = 0; i < FILE_HASH_SIZE; i++) {
-		list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
-			list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
-				list_move(&dp->dl_recall_lru, list);
-				if (++count == num)
+	list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
+		list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) {
+			list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) {
+				if (func)
+					func(lop);
+				if (++count == max)
 					return count;
 			}
 		}
@@ -4642,39 +4650,134 @@
 	return count;
 }
 
-void nfsd_forget_delegations(u64 num)
+u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max)
 {
-	unsigned int count;
-	LIST_HEAD(victims);
-	struct nfs4_delegation *dp, *dnext;
-
-	spin_lock(&recall_lock);
-	count = nfsd_process_n_delegations(num, &victims);
-	spin_unlock(&recall_lock);
-
-	nfs4_lock_state();
-	list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru)
-		unhash_delegation(dp);
-	nfs4_unlock_state();
-
-	printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+	return nfsd_foreach_client_lock(clp, max, release_lockowner);
 }
 
-void nfsd_recall_delegations(u64 num)
+u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max)
 {
-	unsigned int count;
+	u64 count = nfsd_foreach_client_lock(clp, max, NULL);
+	nfsd_print_count(clp, count, "locked files");
+	return count;
+}
+
+static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *))
+{
+	struct nfs4_openowner *oop, *next;
+	u64 count = 0;
+
+	list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
+		if (func)
+			func(oop);
+		if (++count == max)
+			break;
+	}
+
+	return count;
+}
+
+u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max)
+{
+	return nfsd_foreach_client_open(clp, max, release_openowner);
+}
+
+u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max)
+{
+	u64 count = nfsd_foreach_client_open(clp, max, NULL);
+	nfsd_print_count(clp, count, "open files");
+	return count;
+}
+
+static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
+				     struct list_head *victims)
+{
+	struct nfs4_delegation *dp, *next;
+	u64 count = 0;
+
+	list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
+		if (victims)
+			list_move(&dp->dl_recall_lru, victims);
+		if (++count == max)
+			break;
+	}
+	return count;
+}
+
+u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max)
+{
+	struct nfs4_delegation *dp, *next;
 	LIST_HEAD(victims);
-	struct nfs4_delegation *dp, *dnext;
+	u64 count;
 
 	spin_lock(&recall_lock);
-	count = nfsd_process_n_delegations(num, &victims);
-	list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) {
-		list_del(&dp->dl_recall_lru);
-		nfsd_break_one_deleg(dp);
-	}
+	count = nfsd_find_all_delegations(clp, max, &victims);
 	spin_unlock(&recall_lock);
 
-	printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
+		unhash_delegation(dp);
+
+	return count;
+}
+
+u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max)
+{
+	struct nfs4_delegation *dp, *next;
+	LIST_HEAD(victims);
+	u64 count;
+
+	spin_lock(&recall_lock);
+	count = nfsd_find_all_delegations(clp, max, &victims);
+	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
+		nfsd_break_one_deleg(dp);
+	spin_unlock(&recall_lock);
+
+	return count;
+}
+
+u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max)
+{
+	u64 count = 0;
+
+	spin_lock(&recall_lock);
+	count = nfsd_find_all_delegations(clp, max, NULL);
+	spin_unlock(&recall_lock);
+
+	nfsd_print_count(clp, count, "delegations");
+	return count;
+}
+
+u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
+{
+	struct nfs4_client *clp, *next;
+	u64 count = 0;
+	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
+
+	if (!nfsd_netns_ready(nn))
+		return 0;
+
+	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
+		count += func(clp, max - count);
+		if ((max != 0) && (count >= max))
+			break;
+	}
+
+	return count;
+}
+
+struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
+{
+	struct nfs4_client *clp;
+	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
+
+	if (!nfsd_netns_ready(nn))
+		return NULL;
+
+	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+		if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
+			return clp;
+	}
+	return NULL;
 }
 
 #endif /* CONFIG_NFSD_FAULT_INJECTION */
@@ -4686,27 +4789,10 @@
 {
 	int i;
 
-	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
-		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
-		INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
-		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
-		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
-	}
-	for (i = 0; i < SESSION_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&file_hashtbl[i]);
 	}
-	for (i = 0; i < OWNER_HASH_SIZE; i++) {
-		INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
-	}
-	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
-	INIT_LIST_HEAD(&close_lru);
-	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
-	reclaim_str_hashtbl_size = 0;
 }
 
 /*
@@ -4730,12 +4816,100 @@
 	max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
 }
 
-/* initialization to perform when the nfsd service is started: */
+static int nfs4_state_create_net(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int i;
+
+	nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) *
+			CLIENT_HASH_SIZE, GFP_KERNEL);
+	if (!nn->conf_id_hashtbl)
+		goto err;
+	nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) *
+			CLIENT_HASH_SIZE, GFP_KERNEL);
+	if (!nn->unconf_id_hashtbl)
+		goto err_unconf_id;
+	nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
+			OWNER_HASH_SIZE, GFP_KERNEL);
+	if (!nn->ownerstr_hashtbl)
+		goto err_ownerstr;
+	nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) *
+			LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL);
+	if (!nn->lockowner_ino_hashtbl)
+		goto err_lockowner_ino;
+	nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) *
+			SESSION_HASH_SIZE, GFP_KERNEL);
+	if (!nn->sessionid_hashtbl)
+		goto err_sessionid;
+
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
+		INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
+	}
+	for (i = 0; i < OWNER_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]);
+	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]);
+	for (i = 0; i < SESSION_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
+	nn->conf_name_tree = RB_ROOT;
+	nn->unconf_name_tree = RB_ROOT;
+	INIT_LIST_HEAD(&nn->client_lru);
+	INIT_LIST_HEAD(&nn->close_lru);
+	spin_lock_init(&nn->client_lock);
+
+	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+	get_net(net);
+
+	return 0;
+
+err_sessionid:
+	kfree(nn->lockowner_ino_hashtbl);
+err_lockowner_ino:
+	kfree(nn->ownerstr_hashtbl);
+err_ownerstr:
+	kfree(nn->unconf_id_hashtbl);
+err_unconf_id:
+	kfree(nn->conf_id_hashtbl);
+err:
+	return -ENOMEM;
+}
+
+static void
+nfs4_state_destroy_net(struct net *net)
+{
+	int i;
+	struct nfs4_client *clp = NULL;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct rb_node *node, *tmp;
+
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+		while (!list_empty(&nn->conf_id_hashtbl[i])) {
+			clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
+			destroy_client(clp);
+		}
+	}
+
+	node = rb_first(&nn->unconf_name_tree);
+	while (node != NULL) {
+		tmp = node;
+		node = rb_next(tmp);
+		clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
+		rb_erase(tmp, &nn->unconf_name_tree);
+		destroy_client(clp);
+	}
+
+	kfree(nn->sessionid_hashtbl);
+	kfree(nn->lockowner_ino_hashtbl);
+	kfree(nn->ownerstr_hashtbl);
+	kfree(nn->unconf_id_hashtbl);
+	kfree(nn->conf_id_hashtbl);
+	put_net(net);
+}
 
 int
-nfs4_state_start(void)
+nfs4_state_start_net(struct net *net)
 {
-	struct net *net = &init_net;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
 
@@ -4746,18 +4920,32 @@
 	 * to that instead and then do most of the rest of this on a per-net
 	 * basis.
 	 */
-	get_net(net);
+	if (net != &init_net)
+		return -EINVAL;
+
+	ret = nfs4_state_create_net(net);
+	if (ret)
+		return ret;
 	nfsd4_client_tracking_init(net);
 	nn->boot_time = get_seconds();
 	locks_start_grace(net, &nn->nfsd4_manager);
 	nn->grace_ended = false;
-	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
-	       nfsd4_grace);
+	printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
+	       nn->nfsd4_grace, net);
+	queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
+	return 0;
+}
+
+/* initialization to perform when the nfsd service is started: */
+
+int
+nfs4_state_start(void)
+{
+	int ret;
+
 	ret = set_callback_cred();
-	if (ret) {
-		ret = -ENOMEM;
-		goto out_recovery;
-	}
+	if (ret)
+		return -ENOMEM;
 	laundry_wq = create_singlethread_workqueue("nfsd4");
 	if (laundry_wq == NULL) {
 		ret = -ENOMEM;
@@ -4766,39 +4954,34 @@
 	ret = nfsd4_create_callback_queue();
 	if (ret)
 		goto out_free_laundry;
-	queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
+
 	set_max_delegations();
+
 	return 0;
+
 out_free_laundry:
 	destroy_workqueue(laundry_wq);
 out_recovery:
-	nfsd4_client_tracking_exit(net);
-	put_net(net);
 	return ret;
 }
 
-static void
-__nfs4_state_shutdown(void)
+/* should be called with the state lock held */
+void
+nfs4_state_shutdown_net(struct net *net)
 {
-	int i;
-	struct nfs4_client *clp = NULL;
 	struct nfs4_delegation *dp = NULL;
 	struct list_head *pos, *next, reaplist;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		while (!list_empty(&conf_id_hashtbl[i])) {
-			clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
-			destroy_client(clp);
-		}
-		while (!list_empty(&unconf_str_hashtbl[i])) {
-			clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
-			destroy_client(clp);
-		}
-	}
+	cancel_delayed_work_sync(&nn->laundromat_work);
+	locks_end_grace(&nn->nfsd4_manager);
+
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
 	list_for_each_safe(pos, next, &del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		if (dp->dl_stid.sc_client->net != net)
+			continue;
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&recall_lock);
@@ -4807,22 +4990,14 @@
 		unhash_delegation(dp);
 	}
 
-	nfsd4_client_tracking_exit(&init_net);
-	put_net(&init_net);
+	nfsd4_client_tracking_exit(net);
+	nfs4_state_destroy_net(net);
 }
 
 void
 nfs4_state_shutdown(void)
 {
-	struct net *net = &init_net;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
-	cancel_delayed_work_sync(&laundromat_work);
 	destroy_workqueue(laundry_wq);
-	locks_end_grace(&nn->nfsd4_manager);
-	nfs4_lock_state();
-	__nfs4_state_shutdown();
-	nfs4_unlock_state();
 	nfsd4_destroy_callback_queue();
 }
 

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fd548d1..0dc1158 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c

@@ -53,6 +53,7 @@
 #include "vfs.h"
 #include "state.h"
 #include "cache.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
@@ -65,17 +66,17 @@
 #define NFS4_REFERRAL_FSID_MINOR	0x8000000ULL
 
 static __be32
-check_filename(char *str, int len, __be32 err)
+check_filename(char *str, int len)
 {
 	int i;
 
 	if (len == 0)
 		return nfserr_inval;
 	if (isdotent(str, len))
-		return err;
+		return nfserr_badname;
 	for (i = 0; i < len; i++)
 		if (str[i] == '/')
-			return err;
+			return nfserr_badname;
 	return 0;
 }
 
@@ -422,6 +423,86 @@
 	DECODE_TAIL;
 }
 
+static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+{
+	DECODE_HEAD;
+	u32 dummy, uid, gid;
+	char *machine_name;
+	int i;
+	int nr_secflavs;
+
+	/* callback_sec_params4 */
+	READ_BUF(4);
+	READ32(nr_secflavs);
+	cbs->flavor = (u32)(-1);
+	for (i = 0; i < nr_secflavs; ++i) {
+		READ_BUF(4);
+		READ32(dummy);
+		switch (dummy) {
+		case RPC_AUTH_NULL:
+			/* Nothing to read */
+			if (cbs->flavor == (u32)(-1))
+				cbs->flavor = RPC_AUTH_NULL;
+			break;
+		case RPC_AUTH_UNIX:
+			READ_BUF(8);
+			/* stamp */
+			READ32(dummy);
+
+			/* machine name */
+			READ32(dummy);
+			READ_BUF(dummy);
+			SAVEMEM(machine_name, dummy);
+
+			/* uid, gid */
+			READ_BUF(8);
+			READ32(uid);
+			READ32(gid);
+
+			/* more gids */
+			READ_BUF(4);
+			READ32(dummy);
+			READ_BUF(dummy * 4);
+			if (cbs->flavor == (u32)(-1)) {
+				cbs->uid = uid;
+				cbs->gid = gid;
+				cbs->flavor = RPC_AUTH_UNIX;
+			}
+			break;
+		case RPC_AUTH_GSS:
+			dprintk("RPC_AUTH_GSS callback secflavor "
+				"not supported!\n");
+			READ_BUF(8);
+			/* gcbp_service */
+			READ32(dummy);
+			/* gcbp_handle_from_server */
+			READ32(dummy);
+			READ_BUF(dummy);
+			p += XDR_QUADLEN(dummy);
+			/* gcbp_handle_from_client */
+			READ_BUF(4);
+			READ32(dummy);
+			READ_BUF(dummy);
+			break;
+		default:
+			dprintk("Illegal callback secflavor\n");
+			return nfserr_inval;
+		}
+	}
+	DECODE_TAIL;
+}
+
+static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(bc->bc_cb_program);
+	nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+
+	DECODE_TAIL;
+}
+
 static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
 {
 	DECODE_HEAD;
@@ -490,7 +571,7 @@
 	READ32(create->cr_namelen);
 	READ_BUF(create->cr_namelen);
 	SAVEMEM(create->cr_name, create->cr_namelen);
-	if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
+	if ((status = check_filename(create->cr_name, create->cr_namelen)))
 		return status;
 
 	status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
@@ -522,7 +603,7 @@
 	READ32(link->li_namelen);
 	READ_BUF(link->li_namelen);
 	SAVEMEM(link->li_name, link->li_namelen);
-	if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval)))
+	if ((status = check_filename(link->li_name, link->li_namelen)))
 		return status;
 
 	DECODE_TAIL;
@@ -616,7 +697,7 @@
 	READ32(lookup->lo_len);
 	READ_BUF(lookup->lo_len);
 	SAVEMEM(lookup->lo_name, lookup->lo_len);
-	if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent)))
+	if ((status = check_filename(lookup->lo_name, lookup->lo_len)))
 		return status;
 
 	DECODE_TAIL;
@@ -780,7 +861,7 @@
 		READ32(open->op_fname.len);
 		READ_BUF(open->op_fname.len);
 		SAVEMEM(open->op_fname.data, open->op_fname.len);
-		if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
 			return status;
 		break;
 	case NFS4_OPEN_CLAIM_PREVIOUS:
@@ -795,7 +876,7 @@
 		READ32(open->op_fname.len);
 		READ_BUF(open->op_fname.len);
 		SAVEMEM(open->op_fname.data, open->op_fname.len);
-		if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
 			return status;
 		break;
 	case NFS4_OPEN_CLAIM_FH:
@@ -907,7 +988,7 @@
 	READ32(remove->rm_namelen);
 	READ_BUF(remove->rm_namelen);
 	SAVEMEM(remove->rm_name, remove->rm_namelen);
-	if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent)))
+	if ((status = check_filename(remove->rm_name, remove->rm_namelen)))
 		return status;
 
 	DECODE_TAIL;
@@ -925,9 +1006,9 @@
 	READ32(rename->rn_tnamelen);
 	READ_BUF(rename->rn_tnamelen);
 	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
-	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent)))
+	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen)))
 		return status;
-	if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval)))
+	if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen)))
 		return status;
 
 	DECODE_TAIL;
@@ -954,8 +1035,7 @@
 	READ32(secinfo->si_namelen);
 	READ_BUF(secinfo->si_namelen);
 	SAVEMEM(secinfo->si_name, secinfo->si_namelen);
-	status = check_filename(secinfo->si_name, secinfo->si_namelen,
-								nfserr_noent);
+	status = check_filename(secinfo->si_name, secinfo->si_namelen);
 	if (status)
 		return status;
 	DECODE_TAIL;
@@ -1026,31 +1106,14 @@
 static __be32
 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
 {
-#if 0
-	struct nfsd4_compoundargs save = {
-		.p = argp->p,
-		.end = argp->end,
-		.rqstp = argp->rqstp,
-	};
-	u32             ve_bmval[2];
-	struct iattr    ve_iattr;           /* request */
-	struct nfs4_acl *ve_acl;            /* request */
-#endif
 	DECODE_HEAD;
 
 	if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
 		goto out;
 
 	/* For convenience's sake, we compare raw xdr'd attributes in
-	 * nfsd4_proc_verify; however we still decode here just to return
-	 * correct error in case of bad xdr. */
-#if 0
-	status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl);
-	if (status == nfserr_inval) {
-		status = nfserrno(status);
-		goto out;
-	}
-#endif
+	 * nfsd4_proc_verify */
+
 	READ_BUF(4);
 	READ32(verify->ve_attrlen);
 	READ_BUF(verify->ve_attrlen);
@@ -1063,7 +1126,6 @@
 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 {
 	int avail;
-	int v;
 	int len;
 	DECODE_HEAD;
 
@@ -1087,27 +1149,26 @@
 				__FILE__, __LINE__);
 		goto xdr_error;
 	}
-	argp->rqstp->rq_vec[0].iov_base = p;
-	argp->rqstp->rq_vec[0].iov_len = avail;
-	v = 0;
-	len = write->wr_buflen;
-	while (len > argp->rqstp->rq_vec[v].iov_len) {
-		len -= argp->rqstp->rq_vec[v].iov_len;
-		v++;
-		argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
-		argp->pagelist++;
-		if (argp->pagelen >= PAGE_SIZE) {
-			argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
-			argp->pagelen -= PAGE_SIZE;
-		} else {
-			argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
-			argp->pagelen -= len;
-		}
+	write->wr_head.iov_base = p;
+	write->wr_head.iov_len = avail;
+	WARN_ON(avail != (XDR_QUADLEN(avail) << 2));
+	write->wr_pagelist = argp->pagelist;
+
+	len = XDR_QUADLEN(write->wr_buflen) << 2;
+	if (len >= avail) {
+		int pages;
+
+		len -= avail;
+
+		pages = len >> PAGE_SHIFT;
+		argp->pagelist += pages;
+		argp->pagelen -= pages * PAGE_SIZE;
+		len -= pages * PAGE_SIZE;
+
+		argp->p = (__be32 *)page_address(argp->pagelist[0]);
+		argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
 	}
-	argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
-	argp->p = (__be32*)  (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
-	argp->rqstp->rq_vec[v].iov_len = len;
-	write->wr_vlen = v+1;
+	argp->p += XDR_QUADLEN(len);
 
 	DECODE_TAIL;
 }
@@ -1237,11 +1298,7 @@
 			    struct nfsd4_create_session *sess)
 {
 	DECODE_HEAD;
-
 	u32 dummy;
-	char *machine_name;
-	int i;
-	int nr_secflavs;
 
 	READ_BUF(16);
 	COPYMEM(&sess->clientid, 8);
@@ -1282,58 +1339,9 @@
 		goto xdr_error;
 	}
 
-	READ_BUF(8);
+	READ_BUF(4);
 	READ32(sess->callback_prog);
-
-	/* callback_sec_params4 */
-	READ32(nr_secflavs);
-	for (i = 0; i < nr_secflavs; ++i) {
-		READ_BUF(4);
-		READ32(dummy);
-		switch (dummy) {
-		case RPC_AUTH_NULL:
-			/* Nothing to read */
-			break;
-		case RPC_AUTH_UNIX:
-			READ_BUF(8);
-			/* stamp */
-			READ32(dummy);
-
-			/* machine name */
-			READ32(dummy);
-			READ_BUF(dummy);
-			SAVEMEM(machine_name, dummy);
-
-			/* uid, gid */
-			READ_BUF(8);
-			READ32(sess->uid);
-			READ32(sess->gid);
-
-			/* more gids */
-			READ_BUF(4);
-			READ32(dummy);
-			READ_BUF(dummy * 4);
-			break;
-		case RPC_AUTH_GSS:
-			dprintk("RPC_AUTH_GSS callback secflavor "
-				"not supported!\n");
-			READ_BUF(8);
-			/* gcbp_service */
-			READ32(dummy);
-			/* gcbp_handle_from_server */
-			READ32(dummy);
-			READ_BUF(dummy);
-			p += XDR_QUADLEN(dummy);
-			/* gcbp_handle_from_client */
-			READ_BUF(4);
-			READ32(dummy);
-			READ_BUF(dummy);
-			break;
-		default:
-			dprintk("Illegal callback secflavor\n");
-			return nfserr_inval;
-		}
-	}
+	nfsd4_decode_cb_sec(argp, &sess->cb_sec);
 	DECODE_TAIL;
 }
 
@@ -1528,7 +1536,7 @@
 	[OP_RELEASE_LOCKOWNER]	= (nfsd4_dec)nfsd4_decode_notsupp,
 
 	/* new operations for NFSv4.1 */
-	[OP_BACKCHANNEL_CTL]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_BACKCHANNEL_CTL]	= (nfsd4_dec)nfsd4_decode_backchannel_ctl,
 	[OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
 	[OP_EXCHANGE_ID]	= (nfsd4_dec)nfsd4_decode_exchange_id,
 	[OP_CREATE_SESSION]	= (nfsd4_dec)nfsd4_decode_create_session,
@@ -1568,12 +1576,6 @@
 	bool cachethis = false;
 	int i;
 
-	/*
-	 * XXX: According to spec, we should check the tag
-	 * for UTF-8 compliance.  I'm postponing this for
-	 * now because it seems that some clients do use
-	 * binary tags.
-	 */
 	READ_BUF(4);
 	READ32(argp->taglen);
 	READ_BUF(argp->taglen + 8);
@@ -1603,38 +1605,8 @@
 		op = &argp->ops[i];
 		op->replay = NULL;
 
-		/*
-		 * We can't use READ_BUF() here because we need to handle
-		 * a missing opcode as an OP_WRITE + 1. So we need to check
-		 * to see if we're truly at the end of our buffer or if there
-		 * is another page we need to flip to.
-		 */
-
-		if (argp->p == argp->end) {
-			if (argp->pagelen < 4) {
-				/* There isn't an opcode still on the wire */
-				op->opnum = OP_WRITE + 1;
-				op->status = nfserr_bad_xdr;
-				argp->opcnt = i+1;
-				break;
-			}
-
-			/*
-			 * False alarm. We just hit a page boundary, but there
-			 * is still data available.  Move pointer across page
-			 * boundary.  *snip from READ_BUF*
-			 */
-			argp->p = page_address(argp->pagelist[0]);
-			argp->pagelist++;
-			if (argp->pagelen < PAGE_SIZE) {
-				argp->end = argp->p + (argp->pagelen>>2);
-				argp->pagelen = 0;
-			} else {
-				argp->end = argp->p + (PAGE_SIZE>>2);
-				argp->pagelen -= PAGE_SIZE;
-			}
-		}
-		op->opnum = ntohl(*argp->p++);
+		READ_BUF(4);
+		READ32(op->opnum);
 
 		if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP)
 			op->status = ops->decoders[op->opnum](argp, &op->u);
@@ -2014,6 +1986,22 @@
 	return 0;
 }
 
+
+static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
+{
+	struct path path = exp->ex_path;
+	int err;
+
+	path_get(&path);
+	while (follow_up(&path)) {
+		if (path.dentry != path.mnt->mnt_root)
+			break;
+	}
+	err = vfs_getattr(path.mnt, path.dentry, stat);
+	path_put(&path);
+	return err;
+}
+
 /*
  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
  * ourselves.
@@ -2048,6 +2036,7 @@
 		.mnt	= exp->ex_path.mnt,
 		.dentry	= dentry,
 	};
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
 	BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
@@ -2208,7 +2197,7 @@
 	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
 		if ((buflen -= 4) < 0)
 			goto out_resource;
-		WRITE32(nfsd4_lease);
+		WRITE32(nn->nfsd4_lease);
 	}
 	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
 		if ((buflen -= 4) < 0)
@@ -2430,18 +2419,8 @@
 		 * and this is the root of a cross-mounted filesystem.
 		 */
 		if (ignore_crossmnt == 0 &&
-		    dentry == exp->ex_path.mnt->mnt_root) {
-			struct path path = exp->ex_path;
-			path_get(&path);
-			while (follow_up(&path)) {
-				if (path.dentry != path.mnt->mnt_root)
-					break;
-			}
-			err = vfs_getattr(path.mnt, path.dentry, &stat);
-			path_put(&path);
-			if (err)
-				goto out_nfserr;
-		}
+		    dentry == exp->ex_path.mnt->mnt_root)
+			get_parent_attributes(exp, &stat);
 		WRITE64(stat.ino);
 	}
 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
@@ -2927,7 +2906,8 @@
 		  struct nfsd4_read *read)
 {
 	u32 eof;
-	int v, pn;
+	int v;
+	struct page *page;
 	unsigned long maxcount; 
 	long len;
 	__be32 *p;
@@ -2946,11 +2926,15 @@
 	len = maxcount;
 	v = 0;
 	while (len > 0) {
-		pn = resp->rqstp->rq_resused++;
-		resp->rqstp->rq_vec[v].iov_base =
-			page_address(resp->rqstp->rq_respages[pn]);
+		page = *(resp->rqstp->rq_next_page);
+		if (!page) { /* ran out of pages */
+			maxcount -= len;
+			break;
+		}
+		resp->rqstp->rq_vec[v].iov_base = page_address(page);
 		resp->rqstp->rq_vec[v].iov_len =
 			len < PAGE_SIZE ? len : PAGE_SIZE;
+		resp->rqstp->rq_next_page++;
 		v++;
 		len -= PAGE_SIZE;
 	}
@@ -2996,8 +2980,10 @@
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
+	if (!*resp->rqstp->rq_next_page)
+		return nfserr_resource;
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 
 	maxcount = PAGE_SIZE;
 	RESERVE_SPACE(4);
@@ -3045,6 +3031,8 @@
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
+	if (!*resp->rqstp->rq_next_page)
+		return nfserr_resource;
 
 	RESERVE_SPACE(NFS4_VERIFIER_SIZE);
 	savep = p;
@@ -3071,7 +3059,7 @@
 		goto err_no_verf;
 	}
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 	readdir->common.err = 0;
 	readdir->buflen = maxcount;
 	readdir->buffer = page;
@@ -3094,8 +3082,8 @@
 	p = readdir->buffer;
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xbuf->page_len = ((char*)p) - (char*)page_address(
-		resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	resp->xbuf->page_len = ((char*)p) -
+		(char*)page_address(*(resp->rqstp->rq_next_page-1));
 
 	/* Use rest of head for padding and remaining ops: */
 	resp->xbuf->tail[0].iov_base = tailbase;

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index dab350d..7493428 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c

@@ -19,7 +19,7 @@
 #include "idmap.h"
 #include "nfsd.h"
 #include "cache.h"
-#include "fault_inject.h"
+#include "state.h"
 #include "netns.h"
 
 /*
@@ -186,9 +186,6 @@
 };
 #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 
-extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
-extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
-
 static const struct file_operations pool_stats_operations = {
 	.open		= nfsd_pool_stats_open,
 	.read		= seq_read,
@@ -399,6 +396,8 @@
 {
 	char *mesg = buf;
 	int rv;
+	struct net *net = &init_net;
+
 	if (size > 0) {
 		int newthreads;
 		rv = get_int(&mesg, &newthreads);
@@ -406,11 +405,11 @@
 			return rv;
 		if (newthreads < 0)
 			return -EINVAL;
-		rv = nfsd_svc(newthreads);
+		rv = nfsd_svc(newthreads, net);
 		if (rv < 0)
 			return rv;
 	} else
-		rv = nfsd_nrthreads();
+		rv = nfsd_nrthreads(net);
 
 	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv);
 }
@@ -448,9 +447,10 @@
 	int len;
 	int npools;
 	int *nthreads;
+	struct net *net = &init_net;
 
 	mutex_lock(&nfsd_mutex);
-	npools = nfsd_nrpools();
+	npools = nfsd_nrpools(net);
 	if (npools == 0) {
 		/*
 		 * NFS is shut down.  The admin can start it by
@@ -478,12 +478,12 @@
 			if (nthreads[i] < 0)
 				goto out_free;
 		}
-		rv = nfsd_set_nrthreads(i, nthreads);
+		rv = nfsd_set_nrthreads(i, nthreads, net);
 		if (rv)
 			goto out_free;
 	}
 
-	rv = nfsd_get_nrthreads(npools, nthreads);
+	rv = nfsd_get_nrthreads(npools, nthreads, net);
 	if (rv)
 		goto out_free;
 
@@ -510,11 +510,13 @@
 	unsigned minor;
 	ssize_t tlen = 0;
 	char *sep;
+	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	if (size>0) {
-		if (nfsd_serv)
+		if (nn->nfsd_serv)
 			/* Cannot change versions without updating
-			 * nfsd_serv->sv_xdrsize, and reallocing
+			 * nn->nfsd_serv->sv_xdrsize, and reallocing
 			 * rq_argp and rq_resp
 			 */
 			return -EBUSY;
@@ -645,11 +647,13 @@
  * Zero-length write.  Return a list of NFSD's current listener
  * transports.
  */
-static ssize_t __write_ports_names(char *buf)
+static ssize_t __write_ports_names(char *buf, struct net *net)
 {
-	if (nfsd_serv == NULL)
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (nn->nfsd_serv == NULL)
 		return 0;
-	return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
+	return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
 }
 
 /*
@@ -657,28 +661,28 @@
  * a socket of a supported family/protocol, and we use it as an
  * nfsd listener.
  */
-static ssize_t __write_ports_addfd(char *buf)
+static ssize_t __write_ports_addfd(char *buf, struct net *net)
 {
 	char *mesg = buf;
 	int fd, err;
-	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	err = get_int(&mesg, &fd);
 	if (err != 0 || fd < 0)
 		return -EINVAL;
 
-	err = nfsd_create_serv();
+	err = nfsd_create_serv(net);
 	if (err != 0)
 		return err;
 
-	err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
+	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
 	if (err < 0) {
 		nfsd_destroy(net);
 		return err;
 	}
 
 	/* Decrease the count, but don't shut down the service */
-	nfsd_serv->sv_nrthreads--;
+	nn->nfsd_serv->sv_nrthreads--;
 	return err;
 }
 
@@ -686,12 +690,12 @@
  * A transport listener is added by writing it's transport name and
  * a port number.
  */
-static ssize_t __write_ports_addxprt(char *buf)
+static ssize_t __write_ports_addxprt(char *buf, struct net *net)
 {
 	char transport[16];
 	struct svc_xprt *xprt;
 	int port, err;
-	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	if (sscanf(buf, "%15s %5u", transport, &port) != 2)
 		return -EINVAL;
@@ -699,25 +703,25 @@
 	if (port < 1 || port > USHRT_MAX)
 		return -EINVAL;
 
-	err = nfsd_create_serv();
+	err = nfsd_create_serv(net);
 	if (err != 0)
 		return err;
 
-	err = svc_create_xprt(nfsd_serv, transport, net,
+	err = svc_create_xprt(nn->nfsd_serv, transport, net,
 				PF_INET, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0)
 		goto out_err;
 
-	err = svc_create_xprt(nfsd_serv, transport, net,
+	err = svc_create_xprt(nn->nfsd_serv, transport, net,
 				PF_INET6, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0 && err != -EAFNOSUPPORT)
 		goto out_close;
 
 	/* Decrease the count, but don't shut down the service */
-	nfsd_serv->sv_nrthreads--;
+	nn->nfsd_serv->sv_nrthreads--;
 	return 0;
 out_close:
-	xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port);
+	xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
 	if (xprt != NULL) {
 		svc_close_xprt(xprt);
 		svc_xprt_put(xprt);
@@ -727,16 +731,17 @@
 	return err;
 }
 
-static ssize_t __write_ports(struct file *file, char *buf, size_t size)
+static ssize_t __write_ports(struct file *file, char *buf, size_t size,
+			     struct net *net)
 {
 	if (size == 0)
-		return __write_ports_names(buf);
+		return __write_ports_names(buf, net);
 
 	if (isdigit(buf[0]))
-		return __write_ports_addfd(buf);
+		return __write_ports_addfd(buf, net);
 
 	if (isalpha(buf[0]))
-		return __write_ports_addxprt(buf);
+		return __write_ports_addxprt(buf, net);
 
 	return -EINVAL;
 }
@@ -787,9 +792,10 @@
 static ssize_t write_ports(struct file *file, char *buf, size_t size)
 {
 	ssize_t rv;
+	struct net *net = &init_net;
 
 	mutex_lock(&nfsd_mutex);
-	rv = __write_ports(file, buf, size);
+	rv = __write_ports(file, buf, size, net);
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
@@ -821,6 +827,9 @@
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
+	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	if (size > 0) {
 		int bsize;
 		int rv = get_int(&mesg, &bsize);
@@ -835,7 +844,7 @@
 			bsize = NFSSVC_MAXBLKSIZE;
 		bsize &= ~(1024-1);
 		mutex_lock(&nfsd_mutex);
-		if (nfsd_serv) {
+		if (nn->nfsd_serv) {
 			mutex_unlock(&nfsd_mutex);
 			return -EBUSY;
 		}
@@ -848,13 +857,14 @@
 }
 
 #ifdef CONFIG_NFSD_V4
-static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
+				  time_t *time, struct nfsd_net *nn)
 {
 	char *mesg = buf;
 	int rv, i;
 
 	if (size > 0) {
-		if (nfsd_serv)
+		if (nn->nfsd_serv)
 			return -EBUSY;
 		rv = get_int(&mesg, &i);
 		if (rv)
@@ -879,12 +889,13 @@
 	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
 }
 
-static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
+				time_t *time, struct nfsd_net *nn)
 {
 	ssize_t rv;
 
 	mutex_lock(&nfsd_mutex);
-	rv = __nfsd4_write_time(file, buf, size, time);
+	rv = __nfsd4_write_time(file, buf, size, time, nn);
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
@@ -912,7 +923,8 @@
  */
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 {
-	return nfsd4_write_time(file, buf, size, &nfsd4_lease);
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn);
 }
 
 /**
@@ -927,17 +939,19 @@
  */
 static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
 {
-	return nfsd4_write_time(file, buf, size, &nfsd4_grace);
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+	return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn);
 }
 
-static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
+static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
+				   struct nfsd_net *nn)
 {
 	char *mesg = buf;
 	char *recdir;
 	int len, status;
 
 	if (size > 0) {
-		if (nfsd_serv)
+		if (nn->nfsd_serv)
 			return -EBUSY;
 		if (size > PATH_MAX || buf[size-1] != '\n')
 			return -EINVAL;
@@ -981,9 +995,10 @@
 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
 {
 	ssize_t rv;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	mutex_lock(&nfsd_mutex);
-	rv = __write_recoverydir(file, buf, size);
+	rv = __write_recoverydir(file, buf, size, nn);
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
@@ -1063,6 +1078,7 @@
 static __net_init int nfsd_init_net(struct net *net)
 {
 	int retval;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	retval = nfsd_export_init(net);
 	if (retval)
@@ -1070,6 +1086,8 @@
 	retval = nfsd_idmap_init(net);
 	if (retval)
 		goto out_idmap_error;
+	nn->nfsd4_lease = 90;	/* default lease time */
+	nn->nfsd4_grace = 90;
 	return 0;
 
 out_idmap_error:

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 80d5ce4..de23db2 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h

@@ -55,7 +55,6 @@
 				nfsd_version4;
 extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
-extern struct svc_serv		*nfsd_serv;
 extern spinlock_t		nfsd_drc_lock;
 extern unsigned int		nfsd_drc_max_mem;
 extern unsigned int		nfsd_drc_mem_used;
@@ -65,26 +64,17 @@
 /*
  * Function prototypes.
  */
-int		nfsd_svc(int nrservs);
+int		nfsd_svc(int nrservs, struct net *net);
 int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
 
-int		nfsd_nrthreads(void);
-int		nfsd_nrpools(void);
-int		nfsd_get_nrthreads(int n, int *);
-int		nfsd_set_nrthreads(int n, int *);
+int		nfsd_nrthreads(struct net *);
+int		nfsd_nrpools(struct net *);
+int		nfsd_get_nrthreads(int n, int *, struct net *);
+int		nfsd_set_nrthreads(int n, int *, struct net *);
 int		nfsd_pool_stats_open(struct inode *, struct file *);
 int		nfsd_pool_stats_release(struct inode *, struct file *);
 
-static inline void nfsd_destroy(struct net *net)
-{
-	int destroy = (nfsd_serv->sv_nrthreads == 1);
-
-	if (destroy)
-		svc_shutdown_net(nfsd_serv, net);
-	svc_destroy(nfsd_serv);
-	if (destroy)
-		nfsd_serv = NULL;
-}
+void		nfsd_destroy(struct net *net);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
@@ -103,7 +93,7 @@
 int nfsd_vers(int vers, enum vers_op change);
 int nfsd_minorversion(u32 minorversion, enum vers_op change);
 void nfsd_reset_versions(void);
-int nfsd_create_serv(void);
+int nfsd_create_serv(struct net *net);
 
 extern int nfsd_max_blksize;
 
@@ -121,7 +111,9 @@
 int nfsd4_init_slabs(void);
 void nfsd4_free_slabs(void);
 int nfs4_state_start(void);
+int nfs4_state_start_net(struct net *net);
 void nfs4_state_shutdown(void);
+void nfs4_state_shutdown_net(struct net *net);
 void nfs4_reset_lease(time_t leasetime);
 int nfs4_reset_recoverydir(char *recdir);
 char * nfs4_recoverydir(void);
@@ -130,7 +122,9 @@
 static inline int nfsd4_init_slabs(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
 static inline int nfs4_state_start(void) { return 0; }
+static inline int nfs4_state_start_net(struct net *net) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
+static inline void nfs4_state_shutdown_net(struct net *net) { }
 static inline void nfs4_reset_lease(time_t leasetime) { }
 static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
 static inline char * nfs4_recoverydir(void) {return NULL; }
@@ -265,16 +259,8 @@
 /* Check for dir entries '.' and '..' */
 #define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
 
-/*
- * Time of server startup
- */
-extern struct timeval	nfssvc_boot;
-
 #ifdef CONFIG_NFSD_V4
 
-extern time_t nfsd4_lease;
-extern time_t nfsd4_grace;
-
 /* before processing a COMPOUND operation, we have to check that there
  * is enough space in the buffer for XDR encode to succeed.  otherwise,
  * we might process an operation with side effects, and be unable to

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 032af38..814afaa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c

@@ -572,7 +572,7 @@
 
 		if (inode)
 			_fh_update(fhp, exp, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255) {
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
 			fh_put(fhp);
 			return nfserr_opnotsupp;
 		}
@@ -603,7 +603,7 @@
 			goto out;
 
 		_fh_update(fhp, fhp->fh_export, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255)
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
 			return nfserr_opnotsupp;
 	}
 out:

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2013aa00..cee62ab 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c

@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/fs_struct.h>
 #include <linux/swap.h>
-#include <linux/nsproxy.h>
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
@@ -22,19 +21,19 @@
 #include "nfsd.h"
 #include "cache.h"
 #include "vfs.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
 extern struct svc_program	nfsd_program;
 static int			nfsd(void *vrqstp);
-struct timeval			nfssvc_boot;
 
 /*
- * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
  * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
  * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
  *
- * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
+ * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
  * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
  * of nfsd threads must exist and each must listed in ->sp_all_threads in each
  * entry of ->sv_pools[].
@@ -52,7 +51,6 @@
  *	nfsd_versions
  */
 DEFINE_MUTEX(nfsd_mutex);
-struct svc_serv 		*nfsd_serv;
 
 /*
  * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
@@ -173,28 +171,32 @@
  */
 #define	NFSD_MAXSERVS		8192
 
-int nfsd_nrthreads(void)
+int nfsd_nrthreads(struct net *net)
 {
 	int rv = 0;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	mutex_lock(&nfsd_mutex);
-	if (nfsd_serv)
-		rv = nfsd_serv->sv_nrthreads;
+	if (nn->nfsd_serv)
+		rv = nn->nfsd_serv->sv_nrthreads;
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
 
-static int nfsd_init_socks(void)
+static int nfsd_init_socks(struct net *net)
 {
 	int error;
-	if (!list_empty(&nfsd_serv->sv_permsocks))
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
 		return 0;
 
-	error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, NFS_PORT,
+	error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
 
-	error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, NFS_PORT,
+	error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
@@ -202,14 +204,15 @@
 	return 0;
 }
 
-static bool nfsd_up = false;
+static int nfsd_users = 0;
 
-static int nfsd_startup(int nrservs)
+static int nfsd_startup_generic(int nrservs)
 {
 	int ret;
 
-	if (nfsd_up)
+	if (nfsd_users++)
 		return 0;
+
 	/*
 	 * Readahead param cache - will no-op if it already exists.
 	 * (Note therefore results will be suboptimal if number of
@@ -218,43 +221,79 @@
 	ret = nfsd_racache_init(2*nrservs);
 	if (ret)
 		return ret;
-	ret = nfsd_init_socks();
-	if (ret)
-		goto out_racache;
-	ret = lockd_up(&init_net);
-	if (ret)
-		goto out_racache;
 	ret = nfs4_state_start();
 	if (ret)
-		goto out_lockd;
-	nfsd_up = true;
+		goto out_racache;
 	return 0;
-out_lockd:
-	lockd_down(&init_net);
+
 out_racache:
 	nfsd_racache_shutdown();
 	return ret;
 }
 
-static void nfsd_shutdown(void)
+static void nfsd_shutdown_generic(void)
 {
+	if (--nfsd_users)
+		return;
+
+	nfs4_state_shutdown();
+	nfsd_racache_shutdown();
+}
+
+static int nfsd_startup_net(int nrservs, struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int ret;
+
+	if (nn->nfsd_net_up)
+		return 0;
+
+	ret = nfsd_startup_generic(nrservs);
+	if (ret)
+		return ret;
+	ret = nfsd_init_socks(net);
+	if (ret)
+		goto out_socks;
+	ret = lockd_up(net);
+	if (ret)
+		goto out_socks;
+	ret = nfs4_state_start_net(net);
+	if (ret)
+		goto out_lockd;
+
+	nn->nfsd_net_up = true;
+	return 0;
+
+out_lockd:
+	lockd_down(net);
+out_socks:
+	nfsd_shutdown_generic();
+	return ret;
+}
+
+static void nfsd_shutdown_net(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	nfs4_state_shutdown_net(net);
+	lockd_down(net);
+	nn->nfsd_net_up = false;
+	nfsd_shutdown_generic();
+}
+
+static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	/*
 	 * write_ports can create the server without actually starting
 	 * any threads--if we get shut down before any threads are
 	 * started, then nfsd_last_thread will be run before any of this
 	 * other initialization has been done.
 	 */
-	if (!nfsd_up)
+	if (!nn->nfsd_net_up)
 		return;
-	nfs4_state_shutdown();
-	lockd_down(&init_net);
-	nfsd_racache_shutdown();
-	nfsd_up = false;
-}
-
-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
-{
-	nfsd_shutdown();
+	nfsd_shutdown_net(net);
 
 	svc_rpcb_cleanup(serv, net);
 
@@ -327,69 +366,84 @@
 	return ret;
 }
 
-int nfsd_create_serv(void)
+int nfsd_create_serv(struct net *net)
 {
 	int error;
-	struct net *net = current->nsproxy->net_ns;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
-	if (nfsd_serv) {
-		svc_get(nfsd_serv);
+	if (nn->nfsd_serv) {
+		svc_get(nn->nfsd_serv);
 		return 0;
 	}
 	if (nfsd_max_blksize == 0)
 		nfsd_max_blksize = nfsd_get_default_max_blksize();
 	nfsd_reset_versions();
-	nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+	nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
 				      nfsd_last_thread, nfsd, THIS_MODULE);
-	if (nfsd_serv == NULL)
+	if (nn->nfsd_serv == NULL)
 		return -ENOMEM;
 
-	error = svc_bind(nfsd_serv, net);
+	error = svc_bind(nn->nfsd_serv, net);
 	if (error < 0) {
-		svc_destroy(nfsd_serv);
+		svc_destroy(nn->nfsd_serv);
 		return error;
 	}
 
 	set_max_drc();
-	do_gettimeofday(&nfssvc_boot);		/* record boot time */
+	do_gettimeofday(&nn->nfssvc_boot);		/* record boot time */
 	return 0;
 }
 
-int nfsd_nrpools(void)
+int nfsd_nrpools(struct net *net)
 {
-	if (nfsd_serv == NULL)
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	if (nn->nfsd_serv == NULL)
 		return 0;
 	else
-		return nfsd_serv->sv_nrpools;
+		return nn->nfsd_serv->sv_nrpools;
 }
 
-int nfsd_get_nrthreads(int n, int *nthreads)
+int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
 {
 	int i = 0;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	if (nfsd_serv != NULL) {
-		for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
-			nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
+	if (nn->nfsd_serv != NULL) {
+		for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++)
+			nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads;
 	}
 
 	return 0;
 }
 
-int nfsd_set_nrthreads(int n, int *nthreads)
+void nfsd_destroy(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
+
+	if (destroy)
+		svc_shutdown_net(nn->nfsd_serv, net);
+	svc_destroy(nn->nfsd_serv);
+	if (destroy)
+		nn->nfsd_serv = NULL;
+}
+
+int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 {
 	int i = 0;
 	int tot = 0;
 	int err = 0;
-	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 
-	if (nfsd_serv == NULL || n <= 0)
+	if (nn->nfsd_serv == NULL || n <= 0)
 		return 0;
 
-	if (n > nfsd_serv->sv_nrpools)
-		n = nfsd_serv->sv_nrpools;
+	if (n > nn->nfsd_serv->sv_nrpools)
+		n = nn->nfsd_serv->sv_nrpools;
 
 	/* enforce a global maximum number of threads */
 	tot = 0;
@@ -419,9 +473,9 @@
 		nthreads[0] = 1;
 
 	/* apply the new numbers */
-	svc_get(nfsd_serv);
+	svc_get(nn->nfsd_serv);
 	for (i = 0; i < n; i++) {
-		err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
+		err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i],
 				    	  nthreads[i]);
 		if (err)
 			break;
@@ -436,11 +490,11 @@
  * this is the first time nrservs is nonzero.
  */
 int
-nfsd_svc(int nrservs)
+nfsd_svc(int nrservs, struct net *net)
 {
 	int	error;
 	bool	nfsd_up_before;
-	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
@@ -449,29 +503,29 @@
 	if (nrservs > NFSD_MAXSERVS)
 		nrservs = NFSD_MAXSERVS;
 	error = 0;
-	if (nrservs == 0 && nfsd_serv == NULL)
+	if (nrservs == 0 && nn->nfsd_serv == NULL)
 		goto out;
 
-	error = nfsd_create_serv();
+	error = nfsd_create_serv(net);
 	if (error)
 		goto out;
 
-	nfsd_up_before = nfsd_up;
+	nfsd_up_before = nn->nfsd_net_up;
 
-	error = nfsd_startup(nrservs);
+	error = nfsd_startup_net(nrservs, net);
 	if (error)
 		goto out_destroy;
-	error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
+	error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
 	if (error)
 		goto out_shutdown;
-	/* We are holding a reference to nfsd_serv which
+	/* We are holding a reference to nn->nfsd_serv which
 	 * we don't want to count in the return value,
 	 * so subtract 1
 	 */
-	error = nfsd_serv->sv_nrthreads - 1;
+	error = nn->nfsd_serv->sv_nrthreads - 1;
 out_shutdown:
 	if (error < 0 && !nfsd_up_before)
-		nfsd_shutdown();
+		nfsd_shutdown_net(net);
 out_destroy:
 	nfsd_destroy(net);		/* Release server */
 out:
@@ -487,6 +541,8 @@
 nfsd(void *vrqstp)
 {
 	struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
+	struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
+	struct net *net = perm_sock->xpt_net;
 	int err;
 
 	/* Lock module and set up kernel thread */
@@ -551,7 +607,7 @@
 	/* Release the thread */
 	svc_exit_thread(rqstp);
 
-	nfsd_destroy(&init_net);
+	nfsd_destroy(net);
 
 	/* Release module */
 	mutex_unlock(&nfsd_mutex);
@@ -640,21 +696,24 @@
 	}
 
 	/* Store reply in cache. */
-	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
+	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
 	return 1;
 }
 
 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 {
 	int ret;
+	struct net *net = &init_net;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	mutex_lock(&nfsd_mutex);
-	if (nfsd_serv == NULL) {
+	if (nn->nfsd_serv == NULL) {
 		mutex_unlock(&nfsd_mutex);
 		return -ENODEV;
 	}
 	/* bump up the psudo refcount while traversing */
-	svc_get(nfsd_serv);
-	ret = svc_pool_stats_open(nfsd_serv, file);
+	svc_get(nn->nfsd_serv);
+	ret = svc_pool_stats_open(nn->nfsd_serv, file);
 	mutex_unlock(&nfsd_mutex);
 	return ret;
 }

diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 65ec595..979b421 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c

@@ -246,7 +246,7 @@
 					struct nfsd_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
 
@@ -262,8 +262,9 @@
 	 */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -355,7 +356,7 @@
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -396,7 +397,7 @@
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }

diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e036894..d1c229f 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h

@@ -150,6 +150,12 @@
 	u32		rdma_attrs;
 };
 
+struct nfsd4_cb_sec {
+	u32	flavor; /* (u32)(-1) used to mean "no valid flavor" */
+	u32	uid;
+	u32	gid;
+};
+
 struct nfsd4_create_session {
 	clientid_t			clientid;
 	struct nfs4_sessionid		sessionid;
@@ -158,8 +164,12 @@
 	struct nfsd4_channel_attrs	fore_channel;
 	struct nfsd4_channel_attrs	back_channel;
 	u32				callback_prog;
-	u32				uid;
-	u32				gid;
+	struct nfsd4_cb_sec		cb_sec;
+};
+
+struct nfsd4_backchannel_ctl {
+	u32	bc_cb_program;
+	struct nfsd4_cb_sec		bc_cb_sec;
 };
 
 struct nfsd4_bind_conn_to_session {
@@ -192,6 +202,7 @@
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
 	struct nfsd4_channel_attrs se_bchannel;
+	struct nfsd4_cb_sec	se_cb_sec;
 	struct list_head	se_conns;
 	u32			se_cb_prog;
 	u32			se_cb_seq_nr;
@@ -221,13 +232,12 @@
  */
 struct nfs4_client {
 	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
-	struct list_head	cl_strhash; 	/* hash by cl_name */
+	struct rb_node		cl_namenode;	/* link into by-name trees */
 	struct list_head	cl_openowners;
 	struct idr		cl_stateids;	/* stateid lookup */
 	struct list_head	cl_delegations;
 	struct list_head        cl_lru;         /* tail queue */
 	struct xdr_netobj	cl_name; 	/* id generated by client */
-	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
@@ -242,9 +252,11 @@
 #define NFSD4_CLIENT_CB_KILL		(1)
 #define NFSD4_CLIENT_STABLE		(2)	/* client on stable storage */
 #define NFSD4_CLIENT_RECLAIM_COMPLETE	(3)	/* reclaim_complete done */
+#define NFSD4_CLIENT_CONFIRMED		(4)	/* client is confirmed */
 #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
 					 1 << NFSD4_CLIENT_CB_KILL)
 	unsigned long		cl_flags;
+	struct rpc_cred		*cl_cb_cred;
 	struct rpc_clnt		*cl_cb_client;
 	u32			cl_cb_ident;
 #define NFSD4_CB_UP		0
@@ -271,6 +283,7 @@
 	unsigned long		cl_cb_slot_busy;
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
+	struct net		*net;
 };
 
 static inline void
@@ -292,6 +305,7 @@
  */
 struct nfs4_client_reclaim {
 	struct list_head	cr_strhash;	/* hash by cr_name */
+	struct nfs4_client	*cr_clp;	/* pointer to associated clp */
 	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
 };
 
@@ -452,25 +466,26 @@
 		stateid_t *stateid, int flags, struct file **filp);
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
-extern int nfs4_in_grace(void);
-extern void nfs4_release_reclaim(void);
-extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp);
-extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions);
+void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
+extern void nfs4_release_reclaim(struct nfsd_net *);
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
+							struct nfsd_net *nn);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn);
 extern void nfs4_free_openowner(struct nfs4_openowner *);
 extern void nfs4_free_lockowner(struct nfs4_lockowner *);
 extern int set_callback_cred(void);
+extern void nfsd4_init_callback(struct nfsd4_callback *);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
 extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
-extern void nfsd4_do_callback_rpc(struct work_struct *);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern int nfsd4_create_callback_queue(void);
 extern void nfsd4_destroy_callback_queue(void);
 extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
-extern int nfs4_client_to_reclaim(const char *name);
-extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
+extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
+							struct nfsd_net *nn);
+extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
 extern void release_session_client(struct nfsd4_session *);
 extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
 
@@ -480,5 +495,28 @@
 extern void nfsd4_client_record_create(struct nfs4_client *clp);
 extern void nfsd4_client_record_remove(struct nfs4_client *clp);
 extern int nfsd4_client_record_check(struct nfs4_client *clp);
-extern void nfsd4_record_grace_done(struct net *net, time_t boot_time);
+extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
+
+/* nfs fault injection functions */
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_cleanup(void);
+u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64));
+struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t);
+
+u64 nfsd_forget_client(struct nfs4_client *, u64);
+u64 nfsd_forget_client_locks(struct nfs4_client*, u64);
+u64 nfsd_forget_client_openowners(struct nfs4_client *, u64);
+u64 nfsd_forget_client_delegations(struct nfs4_client *, u64);
+u64 nfsd_recall_client_delegations(struct nfs4_client *, u64);
+
+u64 nfsd_print_client(struct nfs4_client *, u64);
+u64 nfsd_print_client_locks(struct nfs4_client *, u64);
+u64 nfsd_print_client_openowners(struct nfs4_client *, u64);
+u64 nfsd_print_client_delegations(struct nfs4_client *, u64);
+#else /* CONFIG_NFSD_FAULT_INJECTION */
+static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_cleanup(void) {}
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
 #endif   /* NFSD4_STATE_H */

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c120b48..f0a6d88 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c

@@ -886,7 +886,7 @@
 		  struct splice_desc *sd)
 {
 	struct svc_rqst *rqstp = sd->u.data;
-	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+	struct page **pp = rqstp->rq_next_page;
 	struct page *page = buf->page;
 	size_t size;
 
@@ -894,17 +894,15 @@
 
 	if (rqstp->rq_res.page_len == 0) {
 		get_page(page);
-		put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_base = buf->offset;
 		rqstp->rq_res.page_len = size;
 	} else if (page != pp[-1]) {
 		get_page(page);
-		if (*pp)
-			put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		if (*rqstp->rq_next_page)
+			put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_len += size;
 	} else
 		rqstp->rq_res.page_len += size;
@@ -936,7 +934,8 @@
 			.u.data		= rqstp,
 		};
 
-		rqstp->rq_resused = 1;
+		WARN_ON_ONCE(rqstp->rq_next_page != rqstp->rq_respages + 1);
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
 		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	} else {
 		oldfs = get_fs();
@@ -1020,28 +1019,10 @@
 	inode = dentry->d_inode;
 	exp   = fhp->fh_export;
 
-	/*
-	 * Request sync writes if
-	 *  -	the sync export option has been set, or
-	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
-	 *  -   The file system doesn't support fsync().
-	 * When NFSv2 gathered writes have been configured for this volume,
-	 * flushing the data to disk is handled separately below.
-	 */
 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
-	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
-	       stable = 2;
-	       *stablep = 2; /* FILE_SYNC */
-	}
-
 	if (!EX_ISSYNC(exp))
 		stable = 0;
-	if (stable && !use_wgather) {
-		spin_lock(&file->f_lock);
-		file->f_flags |= O_SYNC;
-		spin_unlock(&file->f_lock);
-	}
 
 	/* Write the data. */
 	oldfs = get_fs(); set_fs(KERNEL_DS);
@@ -1057,8 +1038,12 @@
 	if (inode->i_mode & (S_ISUID | S_ISGID))
 		kill_suid(dentry);
 
-	if (stable && use_wgather)
-		host_err = wait_for_concurrent_writes(file);
+	if (stable) {
+		if (use_wgather)
+			host_err = wait_for_concurrent_writes(file);
+		else
+			host_err = vfs_fsync_range(file, offset, offset+*cnt, 0);
+	}
 
 out_nfserr:
 	dprintk("nfsd: write complete host_err=%d\n", host_err);
@@ -1485,13 +1470,19 @@
 		case NFS3_CREATE_EXCLUSIVE:
 			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
 			    && dchild->d_inode->i_atime.tv_sec == v_atime
-			    && dchild->d_inode->i_size  == 0 )
+			    && dchild->d_inode->i_size  == 0 ) {
+				if (created)
+					*created = 1;
 				break;
+			}
 		case NFS4_CREATE_EXCLUSIVE4_1:
 			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
 			    && dchild->d_inode->i_atime.tv_sec == v_atime
-			    && dchild->d_inode->i_size  == 0 )
+			    && dchild->d_inode->i_size  == 0 ) {
+				if (created)
+					*created = 1;
 				goto set_attr;
+			}
 			 /* fallthru */
 		case NFS3_CREATE_GUARDED:
 			err = nfserr_exist;

diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index acd127d..0889bfb 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h

@@ -385,7 +385,8 @@
 	u64		wr_offset;          /* request */
 	u32		wr_stable_how;      /* request */
 	u32		wr_buflen;          /* request */
-	int		wr_vlen;
+	struct kvec	wr_head;
+	struct page **	wr_pagelist;        /* request */
 
 	u32		wr_bytes_written;   /* response */
 	u32		wr_how_written;     /* response */
@@ -462,6 +463,7 @@
 
 		/* NFSv4.1 */
 		struct nfsd4_exchange_id	exchange_id;
+		struct nfsd4_backchannel_ctl	backchannel_ctl;
 		struct nfsd4_bind_conn_to_session bind_conn_to_session;
 		struct nfsd4_create_session	create_session;
 		struct nfsd4_destroy_session	destroy_session;
@@ -526,6 +528,14 @@
 		|| nfsd4_is_solo_sequence(resp);
 }
 
+static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
+{
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
+	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+	return argp->opcnt == resp->opcnt;
+}
+
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
 
 static inline void
@@ -566,6 +576,7 @@
 		struct nfsd4_sequence *seq);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
+extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
 extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *);
 extern __be32 nfsd4_create_session(struct svc_rqst *,
 		struct nfsd4_compound_state *,
@@ -579,7 +590,7 @@
 extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *);
 __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
 extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
-		struct nfsd4_open *open);
+		struct nfsd4_open *open, struct nfsd_net *nn);
 extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 		struct svc_fh *current_fh, struct nfsd4_open *open);
 extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status);

diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 16f35f7..6194688 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c

@@ -167,7 +167,6 @@
 };
 
 const struct inode_operations nilfs_file_inode_operations = {
-	.truncate	= nilfs_truncate,
 	.setattr	= nilfs_setattr,
 	.permission     = nilfs_permission,
 	.fiemap		= nilfs_fiemap,

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 4d31d2c..6b49f14 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c

@@ -213,6 +213,16 @@
 	return ret;
 }
 
+void nilfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		nilfs_truncate(inode);
+	}
+}
+
 static int nilfs_write_begin(struct file *file, struct address_space *mapping,
 			     loff_t pos, unsigned len, unsigned flags,
 			     struct page **pagep, void **fsdata)
@@ -227,10 +237,7 @@
 	err = block_write_begin(mapping, pos, len, flags, pagep,
 				nilfs_get_block);
 	if (unlikely(err)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-
+		nilfs_write_failed(mapping, pos + len);
 		nilfs_transaction_abort(inode->i_sb);
 	}
 	return err;
@@ -259,6 +266,7 @@
 		loff_t offset, unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t size;
 
@@ -278,7 +286,7 @@
 		loff_t end = offset + iov_length(iov, nr_segs);
 
 		if (end > isize)
-			vmtruncate(inode, isize);
+			nilfs_write_failed(mapping, end);
 	}
 
 	return size;
@@ -786,10 +794,8 @@
 	if ((iattr->ia_valid & ATTR_SIZE) &&
 	    iattr->ia_size != i_size_read(inode)) {
 		inode_dio_wait(inode);
-
-		err = vmtruncate(inode, iattr->ia_size);
-		if (unlikely(err))
-			goto out_err;
+		truncate_setsize(inode, iattr->ia_size);
+		nilfs_truncate(inode);
 	}
 
 	setattr_copy(inode, iattr);

diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 74cece8..9bc72de 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h

@@ -277,6 +277,7 @@
 extern void nilfs_truncate(struct inode *);
 extern void nilfs_evict_inode(struct inode *);
 extern int nilfs_setattr(struct dentry *, struct iattr *);
+extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
 int nilfs_permission(struct inode *inode, int mask);
 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
 extern int nilfs_inode_dirty(struct inode *);

diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index f1626f5..ff00a0b 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c

@@ -527,7 +527,8 @@
 		if (unlikely(err)) {
 			loff_t isize = inode->i_size;
 			if (pos + blocksize > isize)
-				vmtruncate(inode, isize);
+				nilfs_write_failed(inode->i_mapping,
+							pos + blocksize);
 			goto failed_inode;
 		}
 

diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index ae5f33a..96d3420 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile

@@ -1,5 +1,5 @@
 obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
-				   mark.o vfsmount_mark.o
+				   mark.o vfsmount_mark.o fdinfo.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 3344bdd..08b886f 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c

@@ -201,7 +201,7 @@
 
 	/* nothing else could have found us thanks to the dnotify_mark_mutex */
 	if (dn_mark->dn == NULL)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
 
@@ -385,7 +385,7 @@
 	spin_unlock(&fsn_mark->lock);
 
 	if (destroy)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
 	fsnotify_put_mark(fsn_mark);

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index a506360..0c2f912 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c

@@ -18,6 +18,12 @@
 	    old->tgid == new->tgid) {
 		switch (old->data_type) {
 		case (FSNOTIFY_EVENT_PATH):
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+			/* dont merge two permission events */
+			if ((old->mask & FAN_ALL_PERM_EVENTS) &&
+			    (new->mask & FAN_ALL_PERM_EVENTS))
+				return false;
+#endif
 			if ((old->path.mnt == new->path.mnt) &&
 			    (old->path.dentry == new->path.dentry))
 				return true;

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6fcaeb8..9ff4a5e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c

@@ -17,6 +17,7 @@
 #include <asm/ioctls.h>
 
 #include "../../mount.h"
+#include "../fdinfo.h"
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
 #define FANOTIFY_DEFAULT_MAX_MARKS	8192
@@ -396,8 +397,12 @@
 
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
+
+	if (file->f_flags & FASYNC)
+		fsnotify_fasync(-1, file, 0);
+
 	/* matches the fanotify_init->fsnotify_alloc_group */
-	fsnotify_put_group(group);
+	fsnotify_destroy_group(group);
 
 	return 0;
 }
@@ -428,6 +433,7 @@
 }
 
 static const struct file_operations fanotify_fops = {
+	.show_fdinfo	= fanotify_show_fdinfo,
 	.poll		= fanotify_poll,
 	.read		= fanotify_read,
 	.write		= fanotify_write,
@@ -491,7 +497,8 @@
 
 static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
 					    __u32 mask,
-					    unsigned int flags)
+					    unsigned int flags,
+					    int *destroy)
 {
 	__u32 oldmask;
 
@@ -505,8 +512,7 @@
 	}
 	spin_unlock(&fsn_mark->lock);
 
-	if (!(oldmask & ~mask))
-		fsnotify_destroy_mark(fsn_mark);
+	*destroy = !(oldmask & ~mask);
 
 	return mask & oldmask;
 }
@@ -517,12 +523,17 @@
 {
 	struct fsnotify_mark *fsn_mark = NULL;
 	__u32 removed;
+	int destroy_mark;
 
 	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
 	if (!fsn_mark)
 		return -ENOENT;
 
-	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
+						 &destroy_mark);
+	if (destroy_mark)
+		fsnotify_destroy_mark(fsn_mark, group);
+
 	fsnotify_put_mark(fsn_mark);
 	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
 		fsnotify_recalc_vfsmount_mask(mnt);
@@ -536,12 +547,16 @@
 {
 	struct fsnotify_mark *fsn_mark = NULL;
 	__u32 removed;
+	int destroy_mark;
 
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return -ENOENT;
 
-	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
+						 &destroy_mark);
+	if (destroy_mark)
+		fsnotify_destroy_mark(fsn_mark, group);
 	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
 	if (removed & inode->i_fsnotify_mask)
@@ -708,13 +723,13 @@
 		break;
 	default:
 		fd = -EINVAL;
-		goto out_put_group;
+		goto out_destroy_group;
 	}
 
 	if (flags & FAN_UNLIMITED_QUEUE) {
 		fd = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
-			goto out_put_group;
+			goto out_destroy_group;
 		group->max_events = UINT_MAX;
 	} else {
 		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
@@ -723,7 +738,7 @@
 	if (flags & FAN_UNLIMITED_MARKS) {
 		fd = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
-			goto out_put_group;
+			goto out_destroy_group;
 		group->fanotify_data.max_marks = UINT_MAX;
 	} else {
 		group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
@@ -731,12 +746,12 @@
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
-		goto out_put_group;
+		goto out_destroy_group;
 
 	return fd;
 
-out_put_group:
-	fsnotify_put_group(group);
+out_destroy_group:
+	fsnotify_destroy_group(group);
 	return fd;
 }
 

diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
new file mode 100644
index 0000000..238a593
--- /dev/null
+++ b/fs/notify/fdinfo.c

@@ -0,0 +1,179 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/inotify.h>
+#include <linux/fanotify.h>
+#include <linux/kernel.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/exportfs.h>
+
+#include "inotify/inotify.h"
+#include "../fs/mount.h"
+
+#if defined(CONFIG_PROC_FS)
+
+#if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)
+
+static int show_fdinfo(struct seq_file *m, struct file *f,
+		       int (*show)(struct seq_file *m, struct fsnotify_mark *mark))
+{
+	struct fsnotify_group *group = f->private_data;
+	struct fsnotify_mark *mark;
+	int ret = 0;
+
+	mutex_lock(&group->mark_mutex);
+	list_for_each_entry(mark, &group->marks_list, g_list) {
+		ret = show(m, mark);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&group->mark_mutex);
+	return ret;
+}
+
+#if defined(CONFIG_EXPORTFS)
+static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
+{
+	struct {
+		struct file_handle handle;
+		u8 pad[64];
+	} f;
+	int size, ret, i;
+
+	f.handle.handle_bytes = sizeof(f.pad);
+	size = f.handle.handle_bytes >> 2;
+
+	ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
+	if ((ret == 255) || (ret == -ENOSPC)) {
+		WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
+		return 0;
+	}
+
+	f.handle.handle_type = ret;
+	f.handle.handle_bytes = size * sizeof(u32);
+
+	ret = seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
+			 f.handle.handle_bytes, f.handle.handle_type);
+
+	for (i = 0; i < f.handle.handle_bytes; i++)
+		ret |= seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
+
+	return ret;
+}
+#else
+static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_INOTIFY_USER
+
+static int inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+{
+	struct inotify_inode_mark *inode_mark;
+	struct inode *inode;
+	int ret = 0;
+
+	if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE)))
+		return 0;
+
+	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+	inode = igrab(mark->i.inode);
+	if (inode) {
+		ret = seq_printf(m, "inotify wd:%x ino:%lx sdev:%x "
+				 "mask:%x ignored_mask:%x ",
+				 inode_mark->wd, inode->i_ino,
+				 inode->i_sb->s_dev,
+				 mark->mask, mark->ignored_mask);
+		ret |= show_mark_fhandle(m, inode);
+		ret |= seq_putc(m, '\n');
+		iput(inode);
+	}
+
+	return ret;
+}
+
+int inotify_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	return show_fdinfo(m, f, inotify_fdinfo);
+}
+
+#endif /* CONFIG_INOTIFY_USER */
+
+#ifdef CONFIG_FANOTIFY
+
+static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+{
+	unsigned int mflags = 0;
+	struct inode *inode;
+	int ret = 0;
+
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
+		return 0;
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
+		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+		inode = igrab(mark->i.inode);
+		if (!inode)
+			goto out;
+		ret = seq_printf(m, "fanotify ino:%lx sdev:%x "
+				 "mflags:%x mask:%x ignored_mask:%x ",
+				 inode->i_ino, inode->i_sb->s_dev,
+				 mflags, mark->mask, mark->ignored_mask);
+		ret |= show_mark_fhandle(m, inode);
+		ret |= seq_putc(m, '\n');
+		iput(inode);
+	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
+		struct mount *mnt = real_mount(mark->m.mnt);
+
+		ret = seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x "
+				 "ignored_mask:%x\n", mnt->mnt_id, mflags,
+				 mark->mask, mark->ignored_mask);
+	}
+out:
+	return ret;
+}
+
+int fanotify_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct fsnotify_group *group = f->private_data;
+	unsigned int flags = 0;
+
+	switch (group->priority) {
+	case FS_PRIO_0:
+		flags |= FAN_CLASS_NOTIF;
+		break;
+	case FS_PRIO_1:
+		flags |= FAN_CLASS_CONTENT;
+		break;
+	case FS_PRIO_2:
+		flags |= FAN_CLASS_PRE_CONTENT;
+		break;
+	}
+
+	if (group->max_events == UINT_MAX)
+		flags |= FAN_UNLIMITED_QUEUE;
+
+	if (group->fanotify_data.max_marks == UINT_MAX)
+		flags |= FAN_UNLIMITED_MARKS;
+
+	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
+		   flags, group->fanotify_data.f_flags);
+
+	return show_fdinfo(m, f, fanotify_fdinfo);
+}
+
+#endif /* CONFIG_FANOTIFY */
+
+#endif /* CONFIG_INOTIFY_USER || CONFIG_FANOTIFY */
+
+#endif /* CONFIG_PROC_FS */

diff --git a/fs/notify/fdinfo.h b/fs/notify/fdinfo.h
new file mode 100644
index 0000000..556afda
--- /dev/null
+++ b/fs/notify/fdinfo.h

@@ -0,0 +1,27 @@
+#ifndef __FSNOTIFY_FDINFO_H__
+#define __FSNOTIFY_FDINFO_H__
+
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+
+struct seq_file;
+struct file;
+
+#ifdef CONFIG_PROC_FS
+
+#ifdef CONFIG_INOTIFY_USER
+extern int inotify_show_fdinfo(struct seq_file *m, struct file *f);
+#endif
+
+#ifdef CONFIG_FANOTIFY
+extern int fanotify_show_fdinfo(struct seq_file *m, struct file *f);
+#endif
+
+#else /* CONFIG_PROC_FS */
+
+#define inotify_show_fdinfo	NULL
+#define fanotify_show_fdinfo	NULL
+
+#endif /* CONFIG_PROC_FS */
+
+#endif /* __FSNOTIFY_FDINFO_H__ */

diff --git a/fs/notify/group.c b/fs/notify/group.c
index 63fc294..bd2625b 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c

@@ -33,9 +33,6 @@
  */
 void fsnotify_final_destroy_group(struct fsnotify_group *group)
 {
-	/* clear the notification queue of all events */
-	fsnotify_flush_notify(group);
-
 	if (group->ops->free_group_priv)
 		group->ops->free_group_priv(group);
 
@@ -43,23 +40,30 @@
 }
 
 /*
- * Trying to get rid of a group.  We need to first get rid of any outstanding
- * allocations and then free the group.  Remember that fsnotify_clear_marks_by_group
- * could miss marks that are being freed by inode and those marks could still
- * hold a reference to this group (via group->num_marks)  If we get into that
- * situtation, the fsnotify_final_destroy_group will get called when that final
- * mark is freed.
+ * Trying to get rid of a group. Remove all marks, flush all events and release
+ * the group reference.
+ * Note that another thread calling fsnotify_clear_marks_by_group() may still
+ * hold a ref to the group.
  */
-static void fsnotify_destroy_group(struct fsnotify_group *group)
+void fsnotify_destroy_group(struct fsnotify_group *group)
 {
 	/* clear all inode marks for this group */
 	fsnotify_clear_marks_by_group(group);
 
 	synchronize_srcu(&fsnotify_mark_srcu);
 
-	/* past the point of no return, matches the initial value of 1 */
-	if (atomic_dec_and_test(&group->num_marks))
-		fsnotify_final_destroy_group(group);
+	/* clear the notification queue of all events */
+	fsnotify_flush_notify(group);
+
+	fsnotify_put_group(group);
+}
+
+/*
+ * Get reference to a group.
+ */
+void fsnotify_get_group(struct fsnotify_group *group)
+{
+	atomic_inc(&group->refcnt);
 }
 
 /*
@@ -68,7 +72,7 @@
 void fsnotify_put_group(struct fsnotify_group *group)
 {
 	if (atomic_dec_and_test(&group->refcnt))
-		fsnotify_destroy_group(group);
+		fsnotify_final_destroy_group(group);
 }
 
 /*
@@ -84,21 +88,24 @@
 
 	/* set to 0 when there a no external references to this group */
 	atomic_set(&group->refcnt, 1);
-	/*
-	 * hits 0 when there are no external references AND no marks for
-	 * this group
-	 */
-	atomic_set(&group->num_marks, 1);
+	atomic_set(&group->num_marks, 0);
 
 	mutex_init(&group->notification_mutex);
 	INIT_LIST_HEAD(&group->notification_list);
 	init_waitqueue_head(&group->notification_waitq);
 	group->max_events = UINT_MAX;
 
-	spin_lock_init(&group->mark_lock);
+	mutex_init(&group->mark_mutex);
 	INIT_LIST_HEAD(&group->marks_list);
 
 	group->ops = ops;
 
 	return group;
 }
+
+int fsnotify_fasync(int fd, struct file *file, int on)
+{
+	struct fsnotify_group *group = file->private_data;
+
+	return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO;
+}

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index b13c00a..f31e90f 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c

@@ -63,8 +63,8 @@
 {
 	struct inode *inode = mark->i.inode;
 
+	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&mark->group->mark_lock);
 
 	spin_lock(&inode->i_lock);
 
@@ -99,8 +99,16 @@
 	spin_unlock(&inode->i_lock);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
-		fsnotify_destroy_mark(mark);
+		struct fsnotify_group *group;
+
+		spin_lock(&mark->lock);
+		fsnotify_get_group(mark->group);
+		group = mark->group;
+		spin_unlock(&mark->lock);
+
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
+		fsnotify_put_group(group);
 	}
 }
 
@@ -116,8 +124,9 @@
  * given a group and inode, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
  */
-struct fsnotify_mark *fsnotify_find_inode_mark_locked(struct fsnotify_group *group,
-						      struct inode *inode)
+static struct fsnotify_mark *fsnotify_find_inode_mark_locked(
+		struct fsnotify_group *group,
+		struct inode *inode)
 {
 	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
@@ -191,8 +200,8 @@
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
 
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&group->mark_lock);
 
 	spin_lock(&inode->i_lock);
 

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index e3cbd74..871569c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c

@@ -118,6 +118,7 @@
 
 	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
 
+	fsnotify_get_group(group);
 	fsn_event_priv->group = group;
 	event_priv->wd = wd;
 
@@ -131,7 +132,7 @@
 	}
 
 	if (inode_mark->mask & IN_ONESHOT)
-		fsnotify_destroy_mark(inode_mark);
+		fsnotify_destroy_mark(inode_mark, group);
 
 	return ret;
 }
@@ -210,6 +211,7 @@
 	event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
 				  fsnotify_event_priv_data);
 
+	fsnotify_put_group(fsn_event_priv->group);
 	kmem_cache_free(event_priv_cachep, event_priv);
 }
 

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index c311dda..228a2c2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c

@@ -40,6 +40,7 @@
 #include <linux/wait.h>
 
 #include "inotify.h"
+#include "../fdinfo.h"
 
 #include <asm/ioctls.h>
 
@@ -264,7 +265,7 @@
 		ret = -EAGAIN;
 		if (file->f_flags & O_NONBLOCK)
 			break;
-		ret = -EINTR;
+		ret = -ERESTARTSYS;
 		if (signal_pending(current))
 			break;
 
@@ -280,23 +281,17 @@
 	return ret;
 }
 
-static int inotify_fasync(int fd, struct file *file, int on)
-{
-	struct fsnotify_group *group = file->private_data;
-
-	return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
-}
-
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
-	fsnotify_clear_marks_by_group(group);
+	if (file->f_flags & FASYNC)
+		fsnotify_fasync(-1, file, 0);
 
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
-	fsnotify_put_group(group);
+	fsnotify_destroy_group(group);
 
 	return 0;
 }
@@ -335,9 +330,10 @@
 }
 
 static const struct file_operations inotify_fops = {
+	.show_fdinfo	= inotify_show_fdinfo,
 	.poll		= inotify_poll,
 	.read		= inotify_read,
-	.fasync		= inotify_fasync,
+	.fasync		= fsnotify_fasync,
 	.release	= inotify_release,
 	.unlocked_ioctl	= inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
@@ -519,13 +515,13 @@
 	struct fsnotify_event_private_data *fsn_event_priv;
 	int ret;
 
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
+
 	ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
 					      FSNOTIFY_EVENT_NONE, NULL, 0,
 					      GFP_NOFS);
 	if (!ignored_event)
-		return;
-
-	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
+		goto skip_send_ignore;
 
 	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
 	if (unlikely(!event_priv))
@@ -533,6 +529,7 @@
 
 	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
 
+	fsnotify_get_group(group);
 	fsn_event_priv->group = group;
 	event_priv->wd = i_mark->wd;
 
@@ -546,9 +543,9 @@
 	}
 
 skip_send_ignore:
-
 	/* matches the reference taken when the event was created */
-	fsnotify_put_event(ignored_event);
+	if (ignored_event)
+		fsnotify_put_event(ignored_event);
 
 	/* remove this mark from the idr */
 	inotify_remove_from_idr(group, i_mark);
@@ -707,12 +704,11 @@
 	spin_lock_init(&group->inotify_data.idr_lock);
 	idr_init(&group->inotify_data.idr);
 	group->inotify_data.last_wd = 0;
-	group->inotify_data.fa = NULL;
 	group->inotify_data.user = get_current_user();
 
 	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
 	    inotify_max_user_instances) {
-		fsnotify_put_group(group);
+		fsnotify_destroy_group(group);
 		return ERR_PTR(-EMFILE);
 	}
 
@@ -741,7 +737,7 @@
 	ret = anon_inode_getfd("inotify", &inotify_fops, group,
 				  O_RDONLY | flags);
 	if (ret < 0)
-		fsnotify_put_group(group);
+		fsnotify_destroy_group(group);
 
 	return ret;
 }
@@ -817,7 +813,7 @@
 
 	ret = 0;
 
-	fsnotify_destroy_mark(&i_mark->fsn_mark);
+	fsnotify_destroy_mark(&i_mark->fsn_mark, group);
 
 	/* match ref taken by inotify_idr_find */
 	fsnotify_put_mark(&i_mark->fsn_mark);

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index f104d56..fc6b49b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c

@@ -109,8 +109,11 @@
 
 void fsnotify_put_mark(struct fsnotify_mark *mark)
 {
-	if (atomic_dec_and_test(&mark->refcnt))
+	if (atomic_dec_and_test(&mark->refcnt)) {
+		if (mark->group)
+			fsnotify_put_group(mark->group);
 		mark->free_mark(mark);
+	}
 }
 
 /*
@@ -118,14 +121,14 @@
  * The caller had better be holding a reference to this mark so we don't actually
  * do the final put under the mark->lock
  */
-void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
+				  struct fsnotify_group *group)
 {
-	struct fsnotify_group *group;
 	struct inode *inode = NULL;
 
-	spin_lock(&mark->lock);
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 
-	group = mark->group;
+	spin_lock(&mark->lock);
 
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
@@ -135,8 +138,6 @@
 
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 
-	spin_lock(&group->mark_lock);
-
 	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
 		inode = mark->i.inode;
 		fsnotify_destroy_inode_mark(mark);
@@ -147,13 +148,22 @@
 
 	list_del_init(&mark->g_list);
 
-	spin_unlock(&group->mark_lock);
 	spin_unlock(&mark->lock);
 
+	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
+		iput(inode);
+	/* release lock temporarily */
+	mutex_unlock(&group->mark_mutex);
+
 	spin_lock(&destroy_lock);
 	list_add(&mark->destroy_list, &destroy_list);
 	spin_unlock(&destroy_lock);
 	wake_up(&destroy_waitq);
+	/*
+	 * We don't necessarily have a ref on mark from caller so the above destroy
+	 * may have actually freed it, unless this group provides a 'freeing_mark'
+	 * function which must be holding a reference.
+	 */
 
 	/*
 	 * Some groups like to know that marks are being freed.  This is a
@@ -175,21 +185,17 @@
 	 * is just a lazy update (and could be a perf win...)
 	 */
 
-	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
-		iput(inode);
+	atomic_dec(&group->num_marks);
 
-	/*
-	 * We don't necessarily have a ref on mark from caller so the above iput
-	 * may have already destroyed it.  Don't touch from now on.
-	 */
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
+}
 
-	/*
-	 * it's possible that this group tried to destroy itself, but this
-	 * this mark was simultaneously being freed by inode.  If that's the
-	 * case, we finish freeing the group here.
-	 */
-	if (unlikely(atomic_dec_and_test(&group->num_marks)))
-		fsnotify_final_destroy_group(group);
+void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+			   struct fsnotify_group *group)
+{
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
+	fsnotify_destroy_mark_locked(mark, group);
+	mutex_unlock(&group->mark_mutex);
 }
 
 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
@@ -214,26 +220,26 @@
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group.
  */
-int fsnotify_add_mark(struct fsnotify_mark *mark,
-		      struct fsnotify_group *group, struct inode *inode,
-		      struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+			     struct fsnotify_group *group, struct inode *inode,
+			     struct vfsmount *mnt, int allow_dups)
 {
 	int ret = 0;
 
 	BUG_ON(inode && mnt);
 	BUG_ON(!inode && !mnt);
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 
 	/*
 	 * LOCKING ORDER!!!!
+	 * group->mark_mutex
 	 * mark->lock
-	 * group->mark_lock
 	 * inode->i_lock
 	 */
 	spin_lock(&mark->lock);
-	spin_lock(&group->mark_lock);
-
 	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
 
+	fsnotify_get_group(group);
 	mark->group = group;
 	list_add(&mark->g_list, &group->marks_list);
 	atomic_inc(&group->num_marks);
@@ -251,11 +257,8 @@
 		BUG();
 	}
 
-	spin_unlock(&group->mark_lock);
-
 	/* this will pin the object if appropriate */
 	fsnotify_set_mark_mask_locked(mark, mark->mask);
-
 	spin_unlock(&mark->lock);
 
 	if (inode)
@@ -265,10 +268,10 @@
 err:
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 	list_del_init(&mark->g_list);
+	fsnotify_put_group(group);
 	mark->group = NULL;
 	atomic_dec(&group->num_marks);
 
-	spin_unlock(&group->mark_lock);
 	spin_unlock(&mark->lock);
 
 	spin_lock(&destroy_lock);
@@ -279,6 +282,16 @@
 	return ret;
 }
 
+int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
+		      struct inode *inode, struct vfsmount *mnt, int allow_dups)
+{
+	int ret;
+	mutex_lock(&group->mark_mutex);
+	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
+	mutex_unlock(&group->mark_mutex);
+	return ret;
+}
+
 /*
  * clear any marks in a group in which mark->flags & flags is true
  */
@@ -286,22 +299,16 @@
 					 unsigned int flags)
 {
 	struct fsnotify_mark *lmark, *mark;
-	LIST_HEAD(free_list);
 
-	spin_lock(&group->mark_lock);
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 		if (mark->flags & flags) {
-			list_add(&mark->free_g_list, &free_list);
-			list_del_init(&mark->g_list);
 			fsnotify_get_mark(mark);
+			fsnotify_destroy_mark_locked(mark, group);
+			fsnotify_put_mark(mark);
 		}
 	}
-	spin_unlock(&group->mark_lock);
-
-	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
-		fsnotify_destroy_mark(mark);
-		fsnotify_put_mark(mark);
-	}
+	mutex_unlock(&group->mark_mutex);
 }
 
 /*
@@ -317,6 +324,8 @@
 	assert_spin_locked(&old->lock);
 	new->i.inode = old->i.inode;
 	new->m.mnt = old->m.mnt;
+	if (old->group)
+		fsnotify_get_group(old->group);
 	new->group = old->group;
 	new->mask = old->mask;
 	new->free_mark = old->free_mark;

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 48cb994..7b51b05 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c

@@ -225,6 +225,7 @@
 	mutex_unlock(&group->notification_mutex);
 
 	wake_up(&group->notification_waitq);
+	kill_fasync(&group->fsn_fa, SIGIO, POLL_IN);
 	return return_event;
 }
 

diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index b7b4b0e..4df58b8 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c

@@ -46,8 +46,16 @@
 	spin_unlock(&mnt->mnt_root->d_lock);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
-		fsnotify_destroy_mark(mark);
+		struct fsnotify_group *group;
+
+		spin_lock(&mark->lock);
+		fsnotify_get_group(mark->group);
+		group = mark->group;
+		spin_unlock(&mark->lock);
+
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
+		fsnotify_put_group(group);
 	}
 }
 
@@ -88,8 +96,8 @@
 {
 	struct vfsmount *mnt = mark->m.mnt;
 
+	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&mark->group->mark_lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
@@ -151,8 +159,8 @@
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
 
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&group->mark_lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 

diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1ecf464..5b2d4f0 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c

@@ -1762,6 +1762,16 @@
 	return err;
 }
 
+static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		ntfs_truncate_vfs(inode);
+	}
+}
+
 /**
  * ntfs_file_buffered_write -
  *
@@ -2022,8 +2032,9 @@
 				 * allocated space, which is not a disaster.
 				 */
 				i_size = i_size_read(vi);
-				if (pos + bytes > i_size)
-					vmtruncate(vi, i_size);
+				if (pos + bytes > i_size) {
+					ntfs_write_failed(mapping, pos + bytes);
+				}
 				break;
 			}
 		}
@@ -2227,7 +2238,6 @@
 
 const struct inode_operations ntfs_file_inode_ops = {
 #ifdef NTFS_RW
-	.truncate	= ntfs_truncate_vfs,
 	.setattr	= ntfs_setattr,
 #endif /* NTFS_RW */
 };

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 1d27331..d3e118c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c

@@ -2866,9 +2866,11 @@
  *
  * See ntfs_truncate() description above for details.
  */
+#ifdef NTFS_RW
 void ntfs_truncate_vfs(struct inode *vi) {
 	ntfs_truncate(vi);
 }
+#endif
 
 /**
  * ntfs_setattr - called from notify_change() when an attribute is being changed
@@ -2914,8 +2916,10 @@
 						NInoCompressed(ni) ?
 						"compressed" : "encrypted");
 				err = -EOPNOTSUPP;
-			} else
-				err = vmtruncate(vi, attr->ia_size);
+			} else {
+				truncate_setsize(vi, attr->ia_size);
+				ntfs_truncate_vfs(vi);
+			}
 			if (err || ia_valid == ATTR_SIZE)
 				goto out;
 		} else {

diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index db29695..76b6cfb 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h

@@ -316,6 +316,10 @@
 	return;
 }
 
+#else
+
+static inline void ntfs_truncate_vfs(struct inode *vi) {}
+
 #endif /* NTFS_RW */
 
 #endif /* _LINUX_NTFS_INODE_H */

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 70b5863..f487aa3 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c

@@ -832,7 +832,7 @@
 	return ret;
 }
 
-int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
+int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
@@ -843,7 +843,7 @@
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_extent_rec rec;
 
-	BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE);
+	BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 
 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
 	if (ret) {
@@ -859,7 +859,7 @@
 	}
 
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
-		if (origin == SEEK_HOLE)
+		if (whence == SEEK_HOLE)
 			*offset = inode->i_size;
 		goto out_unlock;
 	}
@@ -888,8 +888,8 @@
 			is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 		}
 
-		if ((!is_data && origin == SEEK_HOLE) ||
-		    (is_data && origin == SEEK_DATA)) {
+		if ((!is_data && whence == SEEK_HOLE) ||
+		    (is_data && whence == SEEK_DATA)) {
 			if (extoff > *offset)
 				*offset = extoff;
 			goto out_unlock;
@@ -899,7 +899,7 @@
 			cpos += clen;
 	}
 
-	if (origin == SEEK_HOLE) {
+	if (whence == SEEK_HOLE) {
 		extoff = cpos;
 		extoff <<= cs_bits;
 		extlen = clen;

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index dda0898..37d313e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c

@@ -1218,24 +1218,6 @@
 		}
 	}
 
-	/*
-	 * This will intentionally not wind up calling truncate_setsize(),
-	 * since all the work for a size change has been done above.
-	 * Otherwise, we could get into problems with truncate as
-	 * ip_alloc_sem is used there to protect against i_size
-	 * changes.
-	 *
-	 * XXX: this means the conditional below can probably be removed.
-	 */
-	if ((attr->ia_valid & ATTR_SIZE) &&
-	    attr->ia_size != i_size_read(inode)) {
-		status = vmtruncate(inode, attr->ia_size);
-		if (status) {
-			mlog_errno(status);
-			goto bail_commit;
-		}
-	}
-
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 
@@ -2637,14 +2619,14 @@
 }
 
 /* Refer generic_file_llseek_unlocked() */
-static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret = 0;
 
 	mutex_lock(&inode->i_mutex);
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_SET:
 		break;
 	case SEEK_END:
@@ -2659,7 +2641,7 @@
 		break;
 	case SEEK_DATA:
 	case SEEK_HOLE:
-		ret = ocfs2_seek_data_hole_offset(file, &offset, origin);
+		ret = ocfs2_seek_data_hole_offset(file, &offset, whence);
 		if (ret)
 			goto out;
 		break;

diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 77e3cb2..e0d9b3e 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c

@@ -306,6 +306,16 @@
 	return mpage_writepages(mapping, wbc, omfs_get_block);
 }
 
+static void omfs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		omfs_truncate(inode);
+	}
+}
+
 static int omfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -314,11 +324,8 @@
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep,
 				omfs_get_block);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		omfs_write_failed(mapping, pos + len);
 
 	return ret;
 }
@@ -350,9 +357,11 @@
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
+		error = inode_newsize_ok(inode, attr->ia_size);
 		if (error)
 			return error;
+		truncate_setsize(inode, attr->ia_size);
+		omfs_truncate(inode);
 	}
 
 	setattr_copy(inode, attr);
@@ -362,7 +371,6 @@
 
 const struct inode_operations omfs_file_inops = {
 	.setattr = omfs_setattr,
-	.truncate = omfs_truncate
 };
 
 const struct address_space_operations omfs_aops = {

diff --git a/fs/open.c b/fs/open.c
index 59071f5..9b33c0cb 100644
--- a/fs/open.c
+++ b/fs/open.c

@@ -61,33 +61,22 @@
 	return ret;
 }
 
-static long do_sys_truncate(const char __user *pathname, loff_t length)
+long vfs_truncate(struct path *path, loff_t length)
 {
-	struct path path;
 	struct inode *inode;
-	int error;
+	long error;
 
-	error = -EINVAL;
-	if (length < 0)	/* sorry, but loff_t says... */
-		goto out;
-
-	error = user_path(pathname, &path);
-	if (error)
-		goto out;
-	inode = path.dentry->d_inode;
+	inode = path->dentry->d_inode;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
-	error = -EISDIR;
 	if (S_ISDIR(inode->i_mode))
-		goto dput_and_out;
-
-	error = -EINVAL;
+		return -EISDIR;
 	if (!S_ISREG(inode->i_mode))
-		goto dput_and_out;
+		return -EINVAL;
 
-	error = mnt_want_write(path.mnt);
+	error = mnt_want_write(path->mnt);
 	if (error)
-		goto dput_and_out;
+		goto out;
 
 	error = inode_permission(inode, MAY_WRITE);
 	if (error)
@@ -111,19 +100,40 @@
 
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error)
-		error = security_path_truncate(&path);
+		error = security_path_truncate(path);
 	if (!error)
-		error = do_truncate(path.dentry, length, 0, NULL);
+		error = do_truncate(path->dentry, length, 0, NULL);
 
 put_write_and_out:
 	put_write_access(inode);
 mnt_drop_write_and_out:
-	mnt_drop_write(path.mnt);
-dput_and_out:
-	path_put(&path);
+	mnt_drop_write(path->mnt);
 out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(vfs_truncate);
+
+static long do_sys_truncate(const char __user *pathname, loff_t length)
+{
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+	struct path path;
+	int error;
+
+	if (length < 0)	/* sorry, but loff_t says... */
+		return -EINVAL;
+
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
+	if (!error) {
+		error = vfs_truncate(&path, length);
+		path_put(&path);
+	}
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
+	return error;
+}
 
 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 {
@@ -306,6 +316,7 @@
 	struct path path;
 	struct inode *inode;
 	int res;
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
 
 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
 		return -EINVAL;
@@ -328,8 +339,8 @@
 	}
 
 	old_cred = override_creds(override_cred);
-
-	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
+retry:
+	res = user_path_at(dfd, filename, lookup_flags, &path);
 	if (res)
 		goto out;
 
@@ -364,6 +375,10 @@
 
 out_path_release:
 	path_put(&path);
+	if (retry_estale(res, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 out:
 	revert_creds(old_cred);
 	put_cred(override_cred);
@@ -379,8 +394,9 @@
 {
 	struct path path;
 	int error;
-
-	error = user_path_dir(filename, &path);
+	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+retry:
+	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
 	if (error)
 		goto out;
 
@@ -392,6 +408,10 @@
 
 dput_and_out:
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 out:
 	return error;
 }
@@ -425,8 +445,9 @@
 {
 	struct path path;
 	int error;
-
-	error = user_path_dir(filename, &path);
+	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+retry:
+	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
 	if (error)
 		goto out;
 
@@ -435,7 +456,7 @@
 		goto dput_and_out;
 
 	error = -EPERM;
-	if (!capable(CAP_SYS_CHROOT))
+	if (!nsown_capable(CAP_SYS_CHROOT))
 		goto dput_and_out;
 	error = security_path_chroot(&path);
 	if (error)
@@ -445,6 +466,10 @@
 	error = 0;
 dput_and_out:
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 out:
 	return error;
 }
@@ -489,11 +514,16 @@
 {
 	struct path path;
 	int error;
-
-	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+retry:
+	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (!error) {
 		error = chmod_common(&path, mode);
 		path_put(&path);
+		if (retry_estale(error, lookup_flags)) {
+			lookup_flags |= LOOKUP_REVAL;
+			goto retry;
+		}
 	}
 	return error;
 }
@@ -552,6 +582,7 @@
 	lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
 	if (flag & AT_EMPTY_PATH)
 		lookup_flags |= LOOKUP_EMPTY;
+retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (error)
 		goto out;
@@ -562,6 +593,10 @@
 	mnt_drop_write(path.mnt);
 out_release:
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 out:
 	return error;
 }

diff --git a/fs/pnode.h b/fs/pnode.h
index 65c6097..19b853a3 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h

@@ -22,6 +22,7 @@
 #define CL_COPY_ALL 		0x04
 #define CL_MAKE_SHARED 		0x08
 #define CL_PRIVATE 		0x10
+#define CL_SHARED_TO_SLAVE	0x20
 
 static inline void set_mnt_shared(struct mount *mnt)
 {

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 99349ef..981b056 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile

@@ -21,6 +21,7 @@
 proc-y	+= version.o
 proc-y	+= softirqs.o
 proc-y	+= namespaces.o
+proc-y	+= self.o
 proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
 proc-$(CONFIG_NET)		+= proc_net.o
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o

diff --git a/fs/proc/array.c b/fs/proc/array.c
index d369670..6a91e6f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c

@@ -162,7 +162,7 @@
 static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *p)
 {
-	struct user_namespace *user_ns = current_user_ns();
+	struct user_namespace *user_ns = seq_user_ns(m);
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
@@ -212,7 +212,7 @@
 	group_info = cred->group_info;
 	task_unlock(p);
 
-	for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
+	for (g = 0; g < group_info->ngroups; g++)
 		seq_printf(m, "%d ",
 			   from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
 	put_cred(cred);
@@ -220,7 +220,7 @@
 	seq_putc(m, '\n');
 }
 
-static void render_sigset_t(struct seq_file *m, const char *header,
+void render_sigset_t(struct seq_file *m, const char *header,
 				sigset_t *set)
 {
 	int i;
@@ -308,6 +308,10 @@
 	seq_putc(m, '\n');
 }
 
+/* Remove non-existent capabilities */
+#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
+				CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
+
 static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
 	const struct cred *cred;
@@ -321,12 +325,24 @@
 	cap_bset	= cred->cap_bset;
 	rcu_read_unlock();
 
+	NORM_CAPS(cap_inheritable);
+	NORM_CAPS(cap_permitted);
+	NORM_CAPS(cap_effective);
+	NORM_CAPS(cap_bset);
+
 	render_cap_t(m, "CapInh:\t", &cap_inheritable);
 	render_cap_t(m, "CapPrm:\t", &cap_permitted);
 	render_cap_t(m, "CapEff:\t", &cap_effective);
 	render_cap_t(m, "CapBnd:\t", &cap_bset);
 }
 
+static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+	seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+#endif
+}
+
 static inline void task_context_switch_counts(struct seq_file *m,
 						struct task_struct *p)
 {
@@ -360,6 +376,7 @@
 	}
 	task_sig(m, task);
 	task_cap(m, task);
+	task_seccomp(m, task);
 	task_cpus_allowed(m, task);
 	cpuset_task_status_allowed(m, task);
 	task_context_switch_counts(m, task);

diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa63d25..9b43ff77 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c

@@ -542,13 +542,6 @@
 	if (error)
 		return error;
 
-	if ((attr->ia_valid & ATTR_SIZE) &&
-	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
-		if (error)
-			return error;
-	}
-
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 	return 0;
@@ -2345,146 +2338,6 @@
 };
 #endif
 
-/*
- * /proc/self:
- */
-static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
-			      int buflen)
-{
-	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
-	pid_t tgid = task_tgid_nr_ns(current, ns);
-	char tmp[PROC_NUMBUF];
-	if (!tgid)
-		return -ENOENT;
-	sprintf(tmp, "%d", tgid);
-	return vfs_readlink(dentry,buffer,buflen,tmp);
-}
-
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
-	pid_t tgid = task_tgid_nr_ns(current, ns);
-	char *name = ERR_PTR(-ENOENT);
-	if (tgid) {
-		/* 11 for max length of signed int in decimal + NULL term */
-		name = kmalloc(12, GFP_KERNEL);
-		if (!name)
-			name = ERR_PTR(-ENOMEM);
-		else
-			sprintf(name, "%d", tgid);
-	}
-	nd_set_link(nd, name);
-	return NULL;
-}
-
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
-				void *cookie)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		kfree(s);
-}
-
-static const struct inode_operations proc_self_inode_operations = {
-	.readlink	= proc_self_readlink,
-	.follow_link	= proc_self_follow_link,
-	.put_link	= proc_self_put_link,
-};
-
-/*
- * proc base
- *
- * These are the directory entries in the root directory of /proc
- * that properly belong to the /proc filesystem, as they describe
- * describe something that is process related.
- */
-static const struct pid_entry proc_base_stuff[] = {
-	NOD("self", S_IFLNK|S_IRWXUGO,
-		&proc_self_inode_operations, NULL, {}),
-};
-
-static struct dentry *proc_base_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-	const struct pid_entry *p = ptr;
-	struct inode *inode;
-	struct proc_inode *ei;
-	struct dentry *error;
-
-	/* Allocate the inode */
-	error = ERR_PTR(-ENOMEM);
-	inode = new_inode(dir->i_sb);
-	if (!inode)
-		goto out;
-
-	/* Initialize the inode */
-	ei = PROC_I(inode);
-	inode->i_ino = get_next_ino();
-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
-	/*
-	 * grab the reference to the task.
-	 */
-	ei->pid = get_task_pid(task, PIDTYPE_PID);
-	if (!ei->pid)
-		goto out_iput;
-
-	inode->i_mode = p->mode;
-	if (S_ISDIR(inode->i_mode))
-		set_nlink(inode, 2);
-	if (S_ISLNK(inode->i_mode))
-		inode->i_size = 64;
-	if (p->iop)
-		inode->i_op = p->iop;
-	if (p->fop)
-		inode->i_fop = p->fop;
-	ei->op = p->op;
-	d_add(dentry, inode);
-	error = NULL;
-out:
-	return error;
-out_iput:
-	iput(inode);
-	goto out;
-}
-
-static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
-{
-	struct dentry *error;
-	struct task_struct *task = get_proc_task(dir);
-	const struct pid_entry *p, *last;
-
-	error = ERR_PTR(-ENOENT);
-
-	if (!task)
-		goto out_no_task;
-
-	/* Lookup the directory entry */
-	last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
-	for (p = proc_base_stuff; p <= last; p++) {
-		if (p->len != dentry->d_name.len)
-			continue;
-		if (!memcmp(dentry->d_name.name, p->name, p->len))
-			break;
-	}
-	if (p > last)
-		goto out;
-
-	error = proc_base_instantiate(dir, dentry, task, p);
-
-out:
-	put_task_struct(task);
-out_no_task:
-	return error;
-}
-
-static int proc_base_fill_cache(struct file *filp, void *dirent,
-	filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
-{
-	return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
-				proc_base_instantiate, task, p);
-}
-
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 {
@@ -2839,10 +2692,6 @@
 		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
 					tgid->numbers[i].nr);
 	}
-
-	upid = &pid->numbers[pid->level];
-	if (upid->nr == 1)
-		pid_ns_release_proc(upid->ns);
 }
 
 static struct dentry *proc_pid_instantiate(struct inode *dir,
@@ -2876,15 +2725,11 @@
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
-	struct dentry *result;
+	struct dentry *result = NULL;
 	struct task_struct *task;
 	unsigned tgid;
 	struct pid_namespace *ns;
 
-	result = proc_base_lookup(dir, dentry);
-	if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
-		goto out;
-
 	tgid = name_to_int(dentry);
 	if (tgid == ~0U)
 		goto out;
@@ -2947,7 +2792,7 @@
 	return iter;
 }
 
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
 
 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	struct tgid_iter iter)
@@ -2967,25 +2812,12 @@
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	unsigned int nr;
-	struct task_struct *reaper;
 	struct tgid_iter iter;
 	struct pid_namespace *ns;
 	filldir_t __filldir;
 
 	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
-		goto out_no_task;
-	nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-
-	reaper = get_proc_task(filp->f_path.dentry->d_inode);
-	if (!reaper)
-		goto out_no_task;
-
-	for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
-		const struct pid_entry *p = &proc_base_stuff[nr];
-		if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
-			goto out;
-	}
+		goto out;
 
 	ns = filp->f_dentry->d_sb->s_fs_info;
 	iter.task = NULL;
@@ -3006,8 +2838,6 @@
 	}
 	filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
 out:
-	put_task_struct(reaper);
-out_no_task:
 	return 0;
 }
 

diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index f28a875..d7a4a28 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c

@@ -50,6 +50,8 @@
 	if (!ret) {
                 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
 			   (long long)file->f_pos, f_flags);
+		if (file->f_op->show_fdinfo)
+			ret = file->f_op->show_fdinfo(m, file);
 		fput(file);
 	}
 

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef..e064f56 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c

@@ -261,16 +261,9 @@
 	if (error)
 		return error;
 
-	if ((iattr->ia_valid & ATTR_SIZE) &&
-	    iattr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, iattr->ia_size);
-		if (error)
-			return error;
-	}
-
 	setattr_copy(inode, iattr);
 	mark_inode_dirty(inode);
-	
+
 	de->uid = inode->i_uid;
 	de->gid = inode->i_gid;
 	de->mode = inode->i_mode;
@@ -350,37 +343,38 @@
  * Return an inode number between PROC_DYNAMIC_FIRST and
  * 0xffffffff, or zero on failure.
  */
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
 {
 	unsigned int i;
 	int error;
 
 retry:
-	if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
-		return 0;
+	if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+		return -ENOMEM;
 
-	spin_lock(&proc_inum_lock);
+	spin_lock_bh(&proc_inum_lock);
 	error = ida_get_new(&proc_inum_ida, &i);
-	spin_unlock(&proc_inum_lock);
+	spin_unlock_bh(&proc_inum_lock);
 	if (error == -EAGAIN)
 		goto retry;
 	else if (error)
-		return 0;
+		return error;
 
 	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
-		spin_lock(&proc_inum_lock);
+		spin_lock_bh(&proc_inum_lock);
 		ida_remove(&proc_inum_ida, i);
-		spin_unlock(&proc_inum_lock);
-		return 0;
+		spin_unlock_bh(&proc_inum_lock);
+		return -ENOSPC;
 	}
-	return PROC_DYNAMIC_FIRST + i;
+	*inum = PROC_DYNAMIC_FIRST + i;
+	return 0;
 }
 
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
 {
-	spin_lock(&proc_inum_lock);
+	spin_lock_bh(&proc_inum_lock);
 	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
-	spin_unlock(&proc_inum_lock);
+	spin_unlock_bh(&proc_inum_lock);
 }
 
 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
@@ -554,13 +548,12 @@
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
 {
-	unsigned int i;
 	struct proc_dir_entry *tmp;
+	int ret;
 	
-	i = get_inode_number();
-	if (i == 0)
-		return -EAGAIN;
-	dp->low_ino = i;
+	ret = proc_alloc_inum(&dp->low_ino);
+	if (ret)
+		return ret;
 
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
@@ -764,7 +757,7 @@
 
 static void free_proc_entry(struct proc_dir_entry *de)
 {
-	release_inode_number(de->low_ino);
+	proc_free_inum(de->low_ino);
 
 	if (S_ISLNK(de->mode))
 		kfree(de->data);

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3b22bbd..439ae688 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c

@@ -31,6 +31,7 @@
 	struct proc_dir_entry *de;
 	struct ctl_table_header *head;
 	const struct proc_ns_operations *ns_ops;
+	void *ns;
 
 	truncate_inode_pages(&inode->i_data, 0);
 	clear_inode(inode);
@@ -49,8 +50,9 @@
 	}
 	/* Release any associated namespace */
 	ns_ops = PROC_I(inode)->ns_ops;
-	if (ns_ops && ns_ops->put)
-		ns_ops->put(PROC_I(inode)->ns);
+	ns = PROC_I(inode)->ns;
+	if (ns_ops && ns)
+		ns_ops->put(ns);
 }
 
 static struct kmem_cache * proc_inode_cachep;

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 43973b0..252544c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h

@@ -15,6 +15,7 @@
 struct  mempolicy;
 
 extern struct proc_dir_entry proc_root;
+extern void proc_self_init(void);
 #ifdef CONFIG_PROC_SYSCTL
 extern int proc_sys_init(void);
 extern void sysctl_head_put(struct ctl_table_header *head);

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed7..b7a4719 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c

@@ -11,6 +11,7 @@
 #include <net/net_namespace.h>
 #include <linux/ipc_namespace.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include "internal.h"
 
 
@@ -24,12 +25,168 @@
 #ifdef CONFIG_IPC_NS
 	&ipcns_operations,
 #endif
+#ifdef CONFIG_PID_NS
+	&pidns_operations,
+#endif
+#ifdef CONFIG_USER_NS
+	&userns_operations,
+#endif
+	&mntns_operations,
 };
 
 static const struct file_operations ns_file_operations = {
 	.llseek		= no_llseek,
 };
 
+static const struct inode_operations ns_inode_operations = {
+	.setattr	= proc_setattr,
+};
+
+static int ns_delete_dentry(const struct dentry *dentry)
+{
+	/* Don't cache namespace inodes when not in use */
+	return 1;
+}
+
+static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	struct inode *inode = dentry->d_inode;
+	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
+
+	return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+		ns_ops->name, inode->i_ino);
+}
+
+const struct dentry_operations ns_dentry_operations =
+{
+	.d_delete	= ns_delete_dentry,
+	.d_dname	= ns_dname,
+};
+
+static struct dentry *proc_ns_get_dentry(struct super_block *sb,
+	struct task_struct *task, const struct proc_ns_operations *ns_ops)
+{
+	struct dentry *dentry, *result;
+	struct inode *inode;
+	struct proc_inode *ei;
+	struct qstr qname = { .name = "", };
+	void *ns;
+
+	ns = ns_ops->get(task);
+	if (!ns)
+		return ERR_PTR(-ENOENT);
+
+	dentry = d_alloc_pseudo(sb, &qname);
+	if (!dentry) {
+		ns_ops->put(ns);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	inode = iget_locked(sb, ns_ops->inum(ns));
+	if (!inode) {
+		dput(dentry);
+		ns_ops->put(ns);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ei = PROC_I(inode);
+	if (inode->i_state & I_NEW) {
+		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+		inode->i_op = &ns_inode_operations;
+		inode->i_mode = S_IFREG | S_IRUGO;
+		inode->i_fop = &ns_file_operations;
+		ei->ns_ops = ns_ops;
+		ei->ns = ns;
+		unlock_new_inode(inode);
+	} else {
+		ns_ops->put(ns);
+	}
+
+	d_set_d_op(dentry, &ns_dentry_operations);
+	result = d_instantiate_unique(dentry, inode);
+	if (result) {
+		dput(dentry);
+		dentry = result;
+	}
+
+	return dentry;
+}
+
+static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct inode *inode = dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
+	struct proc_inode *ei = PROC_I(inode);
+	struct task_struct *task;
+	struct dentry *ns_dentry;
+	void *error = ERR_PTR(-EACCES);
+
+	task = get_proc_task(inode);
+	if (!task)
+		goto out;
+
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out_put_task;
+
+	ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+	if (IS_ERR(ns_dentry)) {
+		error = ERR_CAST(ns_dentry);
+		goto out_put_task;
+	}
+
+	dput(nd->path.dentry);
+	nd->path.dentry = ns_dentry;
+	error = NULL;
+
+out_put_task:
+	put_task_struct(task);
+out:
+	return error;
+}
+
+static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+	struct inode *inode = dentry->d_inode;
+	struct proc_inode *ei = PROC_I(inode);
+	const struct proc_ns_operations *ns_ops = ei->ns_ops;
+	struct task_struct *task;
+	void *ns;
+	char name[50];
+	int len = -EACCES;
+
+	task = get_proc_task(inode);
+	if (!task)
+		goto out;
+
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out_put_task;
+
+	len = -ENOENT;
+	ns = ns_ops->get(task);
+	if (!ns)
+		goto out_put_task;
+
+	snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
+	len = strlen(name);
+
+	if (len > buflen)
+		len = buflen;
+	if (copy_to_user(buffer, name, len))
+		len = -EFAULT;
+
+	ns_ops->put(ns);
+out_put_task:
+	put_task_struct(task);
+out:
+	return len;
+}
+
+static const struct inode_operations proc_ns_link_inode_operations = {
+	.readlink	= proc_ns_readlink,
+	.follow_link	= proc_ns_follow_link,
+	.setattr	= proc_setattr,
+};
+
 static struct dentry *proc_ns_instantiate(struct inode *dir,
 	struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
@@ -37,21 +194,15 @@
 	struct inode *inode;
 	struct proc_inode *ei;
 	struct dentry *error = ERR_PTR(-ENOENT);
-	void *ns;
 
 	inode = proc_pid_make_inode(dir->i_sb, task);
 	if (!inode)
 		goto out;
 
-	ns = ns_ops->get(task);
-	if (!ns)
-		goto out_iput;
-
 	ei = PROC_I(inode);
-	inode->i_mode = S_IFREG|S_IRUSR;
-	inode->i_fop  = &ns_file_operations;
-	ei->ns_ops    = ns_ops;
-	ei->ns	      = ns;
+	inode->i_mode = S_IFLNK|S_IRWXUGO;
+	inode->i_op = &proc_ns_link_inode_operations;
+	ei->ns_ops = ns_ops;
 
 	d_set_d_op(dentry, &pid_dentry_operations);
 	d_add(dentry, inode);
@@ -60,9 +211,6 @@
 		error = NULL;
 out:
 	return error;
-out_iput:
-	iput(inode);
-	goto out;
 }
 
 static int proc_ns_fill_cache(struct file *filp, void *dirent,
@@ -89,10 +237,6 @@
 	if (!task)
 		goto out_no_task;
 
-	ret = -EPERM;
-	if (!ptrace_may_access(task, PTRACE_MODE_READ))
-		goto out;
-
 	ret = 0;
 	i = filp->f_pos;
 	switch (i) {
@@ -152,10 +296,6 @@
 	if (!task)
 		goto out_no_task;
 
-	error = ERR_PTR(-EPERM);
-	if (!ptrace_may_access(task, PTRACE_MODE_READ))
-		goto out;
-
 	last = &ns_entries[ARRAY_SIZE(ns_entries)];
 	for (entry = ns_entries; entry < last; entry++) {
 		if (strlen((*entry)->name) != len)
@@ -163,7 +303,6 @@
 		if (!memcmp(dentry->d_name.name, (*entry)->name, len))
 			break;
 	}
-	error = ERR_PTR(-ENOENT);
 	if (entry == last)
 		goto out;
 
@@ -198,3 +337,7 @@
 	return ERR_PTR(-EINVAL);
 }
 
+bool proc_ns_inode(struct inode *inode)
+{
+	return inode->i_fop == &ns_file_operations;
+}

diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index df7dd08..de20ec4 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c

@@ -195,11 +195,7 @@
 	set_node_proc_entry(np, de);
 	for (child = NULL; (child = of_get_next_child(np, child));) {
 		/* Use everything after the last slash, or the full name */
-		p = strrchr(child->full_name, '/');
-		if (!p)
-			p = child->full_name;
-		else
-			++p;
+		p = kbasename(child->full_name);
 
 		if (duplicate_name(de, p))
 			p = fixup_name(np, de, p);

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 701580d..1827d88 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c

@@ -736,13 +736,6 @@
 	if (error)
 		return error;
 
-	if ((attr->ia_valid & ATTR_SIZE) &&
-	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
-		if (error)
-			return error;
-	}
-
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 	return 0;

diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9889a92..c6e9fac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c

@@ -100,14 +100,13 @@
 	int err;
 	struct super_block *sb;
 	struct pid_namespace *ns;
-	struct proc_inode *ei;
 	char *options;
 
 	if (flags & MS_KERNMOUNT) {
 		ns = (struct pid_namespace *)data;
 		options = NULL;
 	} else {
-		ns = current->nsproxy->pid_ns;
+		ns = task_active_pid_ns(current);
 		options = data;
 	}
 
@@ -130,13 +129,6 @@
 		sb->s_flags |= MS_ACTIVE;
 	}
 
-	ei = PROC_I(sb->s_root->d_inode);
-	if (!ei->pid) {
-		rcu_read_lock();
-		ei->pid = get_pid(find_pid_ns(1, ns));
-		rcu_read_unlock();
-	}
-
 	return dget(sb->s_root);
 }
 
@@ -153,6 +145,7 @@
 	.name		= "proc",
 	.mount		= proc_mount,
 	.kill_sb	= proc_kill_sb,
+	.fs_flags	= FS_USERNS_MOUNT,
 };
 
 void __init proc_root_init(void)
@@ -163,12 +156,8 @@
 	err = register_filesystem(&proc_fs_type);
 	if (err)
 		return;
-	err = pid_ns_prepare_proc(&init_pid_ns);
-	if (err) {
-		unregister_filesystem(&proc_fs_type);
-		return;
-	}
 
+	proc_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	proc_net_init();

diff --git a/fs/proc/self.c b/fs/proc/self.c
new file mode 100644
index 0000000..aa5cc3b
--- /dev/null
+++ b/fs/proc/self.c

@@ -0,0 +1,59 @@
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+/*
+ * /proc/self:
+ */
+static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
+			      int buflen)
+{
+	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+	pid_t tgid = task_tgid_nr_ns(current, ns);
+	char tmp[PROC_NUMBUF];
+	if (!tgid)
+		return -ENOENT;
+	sprintf(tmp, "%d", tgid);
+	return vfs_readlink(dentry,buffer,buflen,tmp);
+}
+
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+	pid_t tgid = task_tgid_nr_ns(current, ns);
+	char *name = ERR_PTR(-ENOENT);
+	if (tgid) {
+		/* 11 for max length of signed int in decimal + NULL term */
+		name = kmalloc(12, GFP_KERNEL);
+		if (!name)
+			name = ERR_PTR(-ENOMEM);
+		else
+			sprintf(name, "%d", tgid);
+	}
+	nd_set_link(nd, name);
+	return NULL;
+}
+
+static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
+				void *cookie)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		kfree(s);
+}
+
+static const struct inode_operations proc_self_inode_operations = {
+	.readlink	= proc_self_readlink,
+	.follow_link	= proc_self_follow_link,
+	.put_link	= proc_self_put_link,
+};
+
+void __init proc_self_init(void)
+{
+	struct proc_dir_entry *proc_self_symlink;
+	mode_t mode;
+
+	mode = S_IFLNK | S_IRWXUGO;
+	proc_self_symlink = proc_create("self", mode, NULL, NULL );
+	proc_self_symlink->proc_iops = &proc_self_inode_operations;
+}

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4877562..448455b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c

@@ -526,6 +526,57 @@
 	return 0;
 }
 
+static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
+{
+	/*
+	 * Don't forget to update Documentation/ on changes.
+	 */
+	static const char mnemonics[BITS_PER_LONG][2] = {
+		/*
+		 * In case if we meet a flag we don't know about.
+		 */
+		[0 ... (BITS_PER_LONG-1)] = "??",
+
+		[ilog2(VM_READ)]	= "rd",
+		[ilog2(VM_WRITE)]	= "wr",
+		[ilog2(VM_EXEC)]	= "ex",
+		[ilog2(VM_SHARED)]	= "sh",
+		[ilog2(VM_MAYREAD)]	= "mr",
+		[ilog2(VM_MAYWRITE)]	= "mw",
+		[ilog2(VM_MAYEXEC)]	= "me",
+		[ilog2(VM_MAYSHARE)]	= "ms",
+		[ilog2(VM_GROWSDOWN)]	= "gd",
+		[ilog2(VM_PFNMAP)]	= "pf",
+		[ilog2(VM_DENYWRITE)]	= "dw",
+		[ilog2(VM_LOCKED)]	= "lo",
+		[ilog2(VM_IO)]		= "io",
+		[ilog2(VM_SEQ_READ)]	= "sr",
+		[ilog2(VM_RAND_READ)]	= "rr",
+		[ilog2(VM_DONTCOPY)]	= "dc",
+		[ilog2(VM_DONTEXPAND)]	= "de",
+		[ilog2(VM_ACCOUNT)]	= "ac",
+		[ilog2(VM_NORESERVE)]	= "nr",
+		[ilog2(VM_HUGETLB)]	= "ht",
+		[ilog2(VM_NONLINEAR)]	= "nl",
+		[ilog2(VM_ARCH_1)]	= "ar",
+		[ilog2(VM_DONTDUMP)]	= "dd",
+		[ilog2(VM_MIXEDMAP)]	= "mm",
+		[ilog2(VM_HUGEPAGE)]	= "hg",
+		[ilog2(VM_NOHUGEPAGE)]	= "nh",
+		[ilog2(VM_MERGEABLE)]	= "mg",
+	};
+	size_t i;
+
+	seq_puts(m, "VmFlags: ");
+	for (i = 0; i < BITS_PER_LONG; i++) {
+		if (vma->vm_flags & (1UL << i)) {
+			seq_printf(m, "%c%c ",
+				   mnemonics[i][0], mnemonics[i][1]);
+		}
+	}
+	seq_putc(m, '\n');
+}
+
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
 	struct proc_maps_private *priv = m->private;
@@ -581,6 +632,8 @@
 		seq_printf(m, "Nonlinear:      %8lu kB\n",
 				mss.nonlinear >> 10);
 
+	show_smap_vma_flags(m, vma);
+
 	if (m->count < m->size)  /* vma is copied successfully */
 		m->version = (vma != get_gate_vma(task->mm))
 			? vma->vm_start : 0;

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ed1d8c7..67de74c 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c

@@ -151,13 +151,13 @@
 	return 0;
 }
 
-static loff_t pstore_file_llseek(struct file *file, loff_t off, int origin)
+static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
 {
 	struct seq_file *sf = file->private_data;
 
 	if (sf->op)
-		return seq_lseek(file, off, origin);
-	return default_llseek(file, off, origin);
+		return seq_lseek(file, off, whence);
+	return default_llseek(file, off, whence);
 }
 
 static const struct file_operations pstore_file_operations = {

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index af1661f..c7314f1 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c

@@ -307,6 +307,8 @@
 	}
 }
 
+#ifdef CONFIG_BLOCK
+
 /* Return 1 if 'cmd' will block on frozen filesystem */
 static int quotactl_cmd_write(int cmd)
 {
@@ -322,6 +324,8 @@
 	return 1;
 }
 
+#endif /* CONFIG_BLOCK */
+
 /*
  * look up a superblock on which quota ops will be performed
  * - use the name of a block device to find the superblock thereon

diff --git a/fs/read_write.c b/fs/read_write.c
index d065348..bb34af3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c

@@ -54,7 +54,7 @@
  * generic_file_llseek_size - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  * @size:	max size of this file in file system
  * @eof:	offset used for SEEK_END position
  *
@@ -67,12 +67,12 @@
  * read/writes behave like SEEK_SET against seeks.
  */
 loff_t
-generic_file_llseek_size(struct file *file, loff_t offset, int origin,
+generic_file_llseek_size(struct file *file, loff_t offset, int whence,
 		loff_t maxsize, loff_t eof)
 {
 	struct inode *inode = file->f_mapping->host;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += eof;
 		break;
@@ -122,17 +122,17 @@
  * generic_file_llseek - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  *
  * This is a generic implemenation of ->llseek useable for all normal local
  * filesystems.  It just updates the file offset to the value specified by
- * @offset and @origin under i_mutex.
+ * @offset and @whence under i_mutex.
  */
-loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
+loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 
-	return generic_file_llseek_size(file, offset, origin,
+	return generic_file_llseek_size(file, offset, whence,
 					inode->i_sb->s_maxbytes,
 					i_size_read(inode));
 }
@@ -142,32 +142,32 @@
  * noop_llseek - No Operation Performed llseek implementation
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  *
  * This is an implementation of ->llseek useable for the rare special case when
  * userspace expects the seek to succeed but the (device) file is actually not
  * able to perform the seek. In this case you use noop_llseek() instead of
  * falling back to the default implementation of ->llseek.
  */
-loff_t noop_llseek(struct file *file, loff_t offset, int origin)
+loff_t noop_llseek(struct file *file, loff_t offset, int whence)
 {
 	return file->f_pos;
 }
 EXPORT_SYMBOL(noop_llseek);
 
-loff_t no_llseek(struct file *file, loff_t offset, int origin)
+loff_t no_llseek(struct file *file, loff_t offset, int whence)
 {
 	return -ESPIPE;
 }
 EXPORT_SYMBOL(no_llseek);
 
-loff_t default_llseek(struct file *file, loff_t offset, int origin)
+loff_t default_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	loff_t retval;
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case SEEK_END:
 			offset += i_size_read(inode);
 			break;
@@ -216,7 +216,7 @@
 }
 EXPORT_SYMBOL(default_llseek);
 
-loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
+loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t (*fn)(struct file *, loff_t, int);
 
@@ -225,11 +225,11 @@
 		if (file->f_op && file->f_op->llseek)
 			fn = file->f_op->llseek;
 	}
-	return fn(file, offset, origin);
+	return fn(file, offset, whence);
 }
 EXPORT_SYMBOL(vfs_llseek);
 
-SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
+SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
 {
 	off_t retval;
 	struct fd f = fdget(fd);
@@ -237,8 +237,8 @@
 		return -EBADF;
 
 	retval = -EINVAL;
-	if (origin <= SEEK_MAX) {
-		loff_t res = vfs_llseek(f.file, offset, origin);
+	if (whence <= SEEK_MAX) {
+		loff_t res = vfs_llseek(f.file, offset, whence);
 		retval = res;
 		if (res != (loff_t)retval)
 			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
@@ -250,7 +250,7 @@
 #ifdef __ARCH_WANT_SYS_LLSEEK
 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		unsigned long, offset_low, loff_t __user *, result,
-		unsigned int, origin)
+		unsigned int, whence)
 {
 	int retval;
 	struct fd f = fdget(fd);
@@ -260,11 +260,11 @@
 		return -EBADF;
 
 	retval = -EINVAL;
-	if (origin > SEEK_MAX)
+	if (whence > SEEK_MAX)
 		goto out_putf;
 
 	offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
-			origin);
+			whence);
 
 	retval = (int)offset;
 	if (offset >= 0) {
@@ -935,6 +935,8 @@
 	if (retval > 0) {
 		add_rchar(current, retval);
 		add_wchar(current, retval);
+		fsnotify_access(in.file);
+		fsnotify_modify(out.file);
 	}
 
 	inc_syscr(current);

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 8375c92..50302d6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c

@@ -126,7 +126,7 @@
 	return err;
 }
 
-static void reiserfs_vfs_truncate_file(struct inode *inode)
+void reiserfs_vfs_truncate_file(struct inode *inode)
 {
 	mutex_lock(&(REISERFS_I(inode)->tailpack));
 	reiserfs_truncate_file(inode, 1);
@@ -312,7 +312,6 @@
 };
 
 const struct inode_operations reiserfs_file_inode_operations = {
-	.truncate = reiserfs_vfs_truncate_file,
 	.setattr = reiserfs_setattr,
 	.setxattr = reiserfs_setxattr,
 	.getxattr = reiserfs_getxattr,

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d83736f..95d7680 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c

@@ -3085,8 +3085,10 @@
 		loff_t isize = i_size_read(inode);
 		loff_t end = offset + iov_length(iov, nr_segs);
 
-		if (end > isize)
-			vmtruncate(inode, isize);
+		if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
+			truncate_setsize(inode, isize);
+			reiserfs_vfs_truncate_file(inode);
+		}
 	}
 
 	return ret;
@@ -3200,8 +3202,13 @@
 	 */
 	reiserfs_write_unlock_once(inode->i_sb, depth);
 	if ((attr->ia_valid & ATTR_SIZE) &&
-	    attr->ia_size != i_size_read(inode))
-		error = vmtruncate(inode, attr->ia_size);
+	    attr->ia_size != i_size_read(inode)) {
+		error = inode_newsize_ok(inode, attr->ia_size);
+		if (!error) {
+			truncate_setsize(inode, attr->ia_size);
+			reiserfs_vfs_truncate_file(inode);
+		}
+	}
 
 	if (!error) {
 		setattr_copy(inode, attr);

diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 33215f5..157e474 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h

@@ -2455,6 +2455,7 @@
 								    *,
 								    int count);
 int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *);
+void reiserfs_vfs_truncate_file(struct inode *inode);
 int reiserfs_commit_page(struct inode *inode, struct page *page,
 			 unsigned from, unsigned to);
 void reiserfs_flush_old_commits(struct super_block *);

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 99dffab..9d863fb 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c

@@ -300,14 +300,14 @@
  *
  *	Ready-made ->f_op->llseek()
  */
-loff_t seq_lseek(struct file *file, loff_t offset, int origin)
+loff_t seq_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct seq_file *m = file->private_data;
 	loff_t retval = -EINVAL;
 
 	mutex_lock(&m->lock);
 	m->version = file->f_version;
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8bee4e5..b534869 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c

@@ -29,6 +29,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
 
 void signalfd_cleanup(struct sighand_struct *sighand)
 {
@@ -227,7 +228,24 @@
 	return total ? total: ret;
 }
 
+#ifdef CONFIG_PROC_FS
+static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct signalfd_ctx *ctx = f->private_data;
+	sigset_t sigmask;
+
+	sigmask = ctx->sigmask;
+	signotset(&sigmask);
+	render_sigset_t(m, "sigmask:\t", &sigmask);
+
+	return 0;
+}
+#endif
+
 static const struct file_operations signalfd_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= signalfd_show_fdinfo,
+#endif
 	.release	= signalfd_release,
 	.poll		= signalfd_poll,
 	.read		= signalfd_read,

diff --git a/fs/stat.c b/fs/stat.c
index eae4946..14f4545 100644
--- a/fs/stat.c
+++ b/fs/stat.c

@@ -74,7 +74,7 @@
 {
 	struct path path;
 	int error = -EINVAL;
-	int lookup_flags = 0;
+	unsigned int lookup_flags = 0;
 
 	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
 		      AT_EMPTY_PATH)) != 0)
@@ -84,13 +84,17 @@
 		lookup_flags |= LOOKUP_FOLLOW;
 	if (flag & AT_EMPTY_PATH)
 		lookup_flags |= LOOKUP_EMPTY;
-
+retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (error)
 		goto out;
 
 	error = vfs_getattr(path.mnt, path.dentry, stat);
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 out:
 	return error;
 }
@@ -296,11 +300,13 @@
 	struct path path;
 	int error;
 	int empty = 0;
+	unsigned int lookup_flags = LOOKUP_EMPTY;
 
 	if (bufsiz <= 0)
 		return -EINVAL;
 
-	error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
+retry:
+	error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);
 	if (!error) {
 		struct inode *inode = path.dentry->d_inode;
 
@@ -314,6 +320,10 @@
 			}
 		}
 		path_put(&path);
+		if (retry_estale(error, lookup_flags)) {
+			lookup_flags |= LOOKUP_REVAL;
+			goto retry;
+		}
 	}
 	return error;
 }

diff --git a/fs/statfs.c b/fs/statfs.c
index f8e832e..c219e733 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c

@@ -77,10 +77,17 @@
 int user_statfs(const char __user *pathname, struct kstatfs *st)
 {
 	struct path path;
-	int error = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
+	int error;
+	unsigned int lookup_flags = LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (!error) {
 		error = vfs_statfs(&path, st);
 		path_put(&path);
+		if (retry_estale(error, lookup_flags)) {
+			lookup_flags |= LOOKUP_REVAL;
+			goto retry;
+		}
 	}
 	return error;
 }

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 71eb7e2..db940a9 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c

@@ -149,6 +149,7 @@
 	.name		= "sysfs",
 	.mount		= sysfs_mount,
 	.kill_sb	= sysfs_kill_sb,
+	.fs_flags	= FS_USERNS_MOUNT,
 };
 
 int __init sysfs_init(void)

diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0a65939..9d4dc683 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c

@@ -41,9 +41,11 @@
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
+		error = inode_newsize_ok(inode, attr->ia_size);
 		if (error)
 			return error;
+		truncate_setsize(inode, attr->ia_size);
+		sysv_truncate(inode);
 	}
 
 	setattr_copy(inode, attr);
@@ -52,7 +54,6 @@
 }
 
 const struct inode_operations sysv_file_inode_operations = {
-	.truncate	= sysv_truncate,
 	.setattr	= sysv_setattr,
 	.getattr	= sysv_getattr,
 };

diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 90b54b4..c1a591a 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c

@@ -464,6 +464,16 @@
 	return __block_write_begin(page, pos, len, get_block);
 }
 
+static void sysv_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, to, inode->i_size);
+		sysv_truncate(inode);
+	}
+}
+
 static int sysv_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -471,11 +481,8 @@
 	int ret;
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep, get_block);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		sysv_write_failed(mapping, pos + len);
 
 	return ret;
 }

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 6291163..12817ff 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c

@@ -2560,7 +2560,7 @@
 static int corrupt_data(const struct ubifs_info *c, const void *buf,
 			unsigned int len)
 {
-	unsigned int from, to, i, ffs = chance(1, 2);
+	unsigned int from, to, ffs = chance(1, 2);
 	unsigned char *p = (void *)buf;
 
 	from = random32() % (len + 1);
@@ -2571,11 +2571,9 @@
 		   ffs ? "0xFFs" : "random data");
 
 	if (ffs)
-		for (i = from; i < to; i++)
-			p[i] = 0xFF;
+		memset(p + from, 0xFF, to - from);
 	else
-		for (i = from; i < to; i++)
-			p[i] = random32() % 0x100;
+		prandom_bytes(p + from, to - from);
 
 	return to;
 }

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e271fba..8a57477 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c

@@ -453,11 +453,11 @@
 }
 
 /* If a directory is seeked, we have to free saved readdir() state */
-static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	kfree(file->private_data);
 	file->private_data = NULL;
-	return generic_file_llseek(file, offset, origin);
+	return generic_file_llseek(file, offset, whence);
 }
 
 /* Free saved readdir() state when the directory is closed */

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index df88b95..cbae1ed 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c

@@ -587,7 +587,6 @@
 static sector_t inode_getblk(struct inode *inode, sector_t block,
 			     int *err, int *new)
 {
-	static sector_t last_block;
 	struct kernel_long_ad laarr[EXTENT_MERGE_SIZE];
 	struct extent_position prev_epos, cur_epos, next_epos;
 	int count = 0, startnum = 0, endnum = 0;
@@ -601,6 +600,7 @@
 	struct udf_inode_info *iinfo = UDF_I(inode);
 	int goal = 0, pgoal = iinfo->i_location.logicalBlockNum;
 	int lastblock = 0;
+	bool isBeyondEOF;
 
 	*err = 0;
 	*new = 0;
@@ -676,11 +676,10 @@
 		return newblock;
 	}
 
-	last_block = block;
 	/* Are we beyond EOF? */
 	if (etype == -1) {
 		int ret;
-
+		isBeyondEOF = 1;
 		if (count) {
 			if (c)
 				laarr[0] = laarr[1];
@@ -718,11 +717,11 @@
 			memset(&laarr[c].extLocation, 0x00,
 				sizeof(struct kernel_lb_addr));
 			count++;
-			endnum++;
 		}
 		endnum = c + 1;
 		lastblock = 1;
 	} else {
+		isBeyondEOF = 0;
 		endnum = startnum = ((count > 2) ? 2 : count);
 
 		/* if the current extent is in position 0,
@@ -765,10 +764,13 @@
 				goal, err);
 		if (!newblocknum) {
 			brelse(prev_epos.bh);
+			brelse(cur_epos.bh);
+			brelse(next_epos.bh);
 			*err = -ENOSPC;
 			return 0;
 		}
-		iinfo->i_lenExtents += inode->i_sb->s_blocksize;
+		if (isBeyondEOF)
+			iinfo->i_lenExtents += inode->i_sb->s_blocksize;
 	}
 
 	/* if the extent the requsted block is located in contains multiple
@@ -795,6 +797,8 @@
 	udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
 
 	brelse(prev_epos.bh);
+	brelse(cur_epos.bh);
+	brelse(next_epos.bh);
 
 	newblock = udf_get_pblock(inode->i_sb, newblocknum,
 				iinfo->i_location.partitionReferenceNum, 0);

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index eb6d0b7..ff24e44 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c

@@ -526,6 +526,14 @@
 	return __block_write_begin(page, pos, len, ufs_getfrag_block);
 }
 
+static void ufs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size)
+		truncate_pagecache(inode, to, inode->i_size);
+}
+
 static int ufs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -534,11 +542,8 @@
 
 	ret = block_write_begin(mapping, pos, len, flags, pagep,
 				ufs_getfrag_block);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		ufs_write_failed(mapping, pos + len);
 
 	return ret;
 }

diff --git a/fs/utimes.c b/fs/utimes.c
index bb0696a..f4fb7ec 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c

@@ -158,13 +158,17 @@
 
 		if (!(flags & AT_SYMLINK_NOFOLLOW))
 			lookup_flags |= LOOKUP_FOLLOW;
-
+retry:
 		error = user_path_at(dfd, filename, lookup_flags, &path);
 		if (error)
 			goto out;
 
 		error = utimes_common(&path, times);
 		path_put(&path);
+		if (retry_estale(error, lookup_flags)) {
+			lookup_flags |= LOOKUP_REVAL;
+			goto retry;
+		}
 	}
 
 out:

diff --git a/fs/xattr.c b/fs/xattr.c
index e21c119..3377dff 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c

@@ -370,8 +370,9 @@
 {
 	struct path path;
 	int error;
-
-	error = user_path(pathname, &path);
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = mnt_want_write(path.mnt);
@@ -380,6 +381,10 @@
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -389,8 +394,9 @@
 {
 	struct path path;
 	int error;
-
-	error = user_lpath(pathname, &path);
+	unsigned int lookup_flags = 0;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = mnt_want_write(path.mnt);
@@ -399,6 +405,10 @@
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -476,12 +486,17 @@
 {
 	struct path path;
 	ssize_t error;
-
-	error = user_path(pathname, &path);
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = getxattr(path.dentry, name, value, size);
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -490,12 +505,17 @@
 {
 	struct path path;
 	ssize_t error;
-
-	error = user_lpath(pathname, &path);
+	unsigned int lookup_flags = 0;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = getxattr(path.dentry, name, value, size);
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -556,12 +576,17 @@
 {
 	struct path path;
 	ssize_t error;
-
-	error = user_path(pathname, &path);
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = listxattr(path.dentry, list, size);
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -570,12 +595,17 @@
 {
 	struct path path;
 	ssize_t error;
-
-	error = user_lpath(pathname, &path);
+	unsigned int lookup_flags = 0;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = listxattr(path.dentry, list, size);
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -615,8 +645,9 @@
 {
 	struct path path;
 	int error;
-
-	error = user_path(pathname, &path);
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = mnt_want_write(path.mnt);
@@ -625,6 +656,10 @@
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 
@@ -633,8 +668,9 @@
 {
 	struct path path;
 	int error;
-
-	error = user_lpath(pathname, &path);
+	unsigned int lookup_flags = 0;
+retry:
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
 	error = mnt_want_write(path.mnt);
@@ -643,6 +679,10 @@
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
+	if (retry_estale(error, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
+	}
 	return error;
 }
 

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 9e0ebe0..33bbbae 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h

@@ -12,7 +12,6 @@
 #define __ASM_GENERIC_IO_H
 
 #include <asm/page.h> /* I/O is all done through memory accesses */
-#include <asm/cacheflush.h>
 #include <linux/types.h>
 
 #ifdef CONFIG_GENERIC_IOMAP
@@ -154,7 +153,7 @@
 	if (count) {
 		u8 *buf = buffer;
 		do {
-			u8 x = inb(addr);
+			u8 x = __raw_readb(addr + PCI_IOBASE);
 			*buf++ = x;
 		} while (--count);
 	}
@@ -167,7 +166,7 @@
 	if (count) {
 		u16 *buf = buffer;
 		do {
-			u16 x = inw(addr);
+			u16 x = __raw_readw(addr + PCI_IOBASE);
 			*buf++ = x;
 		} while (--count);
 	}
@@ -180,7 +179,7 @@
 	if (count) {
 		u32 *buf = buffer;
 		do {
-			u32 x = inl(addr);
+			u32 x = __raw_readl(addr + PCI_IOBASE);
 			*buf++ = x;
 		} while (--count);
 	}
@@ -193,7 +192,7 @@
 	if (count) {
 		const u8 *buf = buffer;
 		do {
-			outb(*buf++, addr);
+			__raw_writeb(*buf++, addr + PCI_IOBASE);
 		} while (--count);
 	}
 }
@@ -205,7 +204,7 @@
 	if (count) {
 		const u16 *buf = buffer;
 		do {
-			outw(*buf++, addr);
+			__raw_writew(*buf++, addr + PCI_IOBASE);
 		} while (--count);
 	}
 }
@@ -217,42 +216,12 @@
 	if (count) {
 		const u32 *buf = buffer;
 		do {
-			outl(*buf++, addr);
+			__raw_writel(*buf++, addr + PCI_IOBASE);
 		} while (--count);
 	}
 }
 #endif
 
-static inline void readsl(const void __iomem *addr, void *buf, int len)
-{
-	insl(addr - PCI_IOBASE, buf, len);
-}
-
-static inline void readsw(const void __iomem *addr, void *buf, int len)
-{
-	insw(addr - PCI_IOBASE, buf, len);
-}
-
-static inline void readsb(const void __iomem *addr, void *buf, int len)
-{
-	insb(addr - PCI_IOBASE, buf, len);
-}
-
-static inline void writesl(const void __iomem *addr, const void *buf, int len)
-{
-	outsl(addr - PCI_IOBASE, buf, len);
-}
-
-static inline void writesw(const void __iomem *addr, const void *buf, int len)
-{
-	outsw(addr - PCI_IOBASE, buf, len);
-}
-
-static inline void writesb(const void __iomem *addr, const void *buf, int len)
-{
-	outsb(addr - PCI_IOBASE, buf, len);
-}
-
 #ifndef CONFIG_GENERIC_IOMAP
 #define ioread8(addr)		readb(addr)
 #define ioread16(addr)		readw(addr)

diff --git a/include/asm-generic/mmu.h b/include/asm-generic/mmu.h
index 4f4aa56..0ed3f1c 100644
--- a/include/asm-generic/mmu.h
+++ b/include/asm-generic/mmu.h

@@ -7,8 +7,12 @@
  */
 #ifndef __ASSEMBLY__
 typedef struct {
-	struct vm_list_struct	*vmlist;
 	unsigned long		end_brk;
+
+#ifdef CONFIG_BINFMT_ELF_FDPIC
+	unsigned long		exec_fdpic_loadmap;
+	unsigned long		interp_fdpic_loadmap;
+#endif
 } mm_context_t;
 #endif
 

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 284e808..701beab 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h

@@ -219,6 +219,10 @@
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef pte_accessible
+# define pte_accessible(pte)		((void)(pte),1)
+#endif
+
 #ifndef flush_tlb_fix_spurious_fault
 #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 #endif
@@ -580,6 +584,112 @@
 #endif
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+/*
+ * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the
+ * same bit too). It's set only when _PAGE_PRESET is not set and it's
+ * never set if _PAGE_PRESENT is set.
+ *
+ * pte/pmd_present() returns true if pte/pmd_numa returns true. Page
+ * fault triggers on those regions if pte/pmd_numa returns true
+ * (because _PAGE_PRESENT is not set).
+ */
+#ifndef pte_numa
+static inline int pte_numa(pte_t pte)
+{
+	return (pte_flags(pte) &
+		(_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+}
+#endif
+
+#ifndef pmd_numa
+static inline int pmd_numa(pmd_t pmd)
+{
+	return (pmd_flags(pmd) &
+		(_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+}
+#endif
+
+/*
+ * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically
+ * because they're called by the NUMA hinting minor page fault. If we
+ * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler
+ * would be forced to set it later while filling the TLB after we
+ * return to userland. That would trigger a second write to memory
+ * that we optimize away by setting _PAGE_ACCESSED here.
+ */
+#ifndef pte_mknonnuma
+static inline pte_t pte_mknonnuma(pte_t pte)
+{
+	pte = pte_clear_flags(pte, _PAGE_NUMA);
+	return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED);
+}
+#endif
+
+#ifndef pmd_mknonnuma
+static inline pmd_t pmd_mknonnuma(pmd_t pmd)
+{
+	pmd = pmd_clear_flags(pmd, _PAGE_NUMA);
+	return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED);
+}
+#endif
+
+#ifndef pte_mknuma
+static inline pte_t pte_mknuma(pte_t pte)
+{
+	pte = pte_set_flags(pte, _PAGE_NUMA);
+	return pte_clear_flags(pte, _PAGE_PRESENT);
+}
+#endif
+
+#ifndef pmd_mknuma
+static inline pmd_t pmd_mknuma(pmd_t pmd)
+{
+	pmd = pmd_set_flags(pmd, _PAGE_NUMA);
+	return pmd_clear_flags(pmd, _PAGE_PRESENT);
+}
+#endif
+#else
+extern int pte_numa(pte_t pte);
+extern int pmd_numa(pmd_t pmd);
+extern pte_t pte_mknonnuma(pte_t pte);
+extern pmd_t pmd_mknonnuma(pmd_t pmd);
+extern pte_t pte_mknuma(pte_t pte);
+extern pmd_t pmd_mknuma(pmd_t pmd);
+#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
+#else
+static inline int pmd_numa(pmd_t pmd)
+{
+	return 0;
+}
+
+static inline int pte_numa(pte_t pte)
+{
+	return 0;
+}
+
+static inline pte_t pte_mknonnuma(pte_t pte)
+{
+	return pte;
+}
+
+static inline pmd_t pmd_mknonnuma(pmd_t pmd)
+{
+	return pmd;
+}
+
+static inline pte_t pte_mknuma(pte_t pte)
+{
+	return pte;
+}
+
+static inline pmd_t pmd_mknuma(pmd_t pmd)
+{
+	return pmd;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 #endif /* CONFIG_MMU */
 
 #endif /* !__ASSEMBLY__ */

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 3fd8280..fad21c9 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h

@@ -1431,6 +1431,8 @@
 extern u32 drm_vblank_count(struct drm_device *dev, int crtc);
 extern u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
 				     struct timeval *vblanktime);
+extern void drm_send_vblank_event(struct drm_device *dev, int crtc,
+				     struct drm_pending_vblank_event *e);
 extern bool drm_handle_vblank(struct drm_device *dev, int crtc);
 extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
@@ -1503,6 +1505,7 @@
 
 extern unsigned int drm_vblank_offdelay;
 extern unsigned int drm_timestamp_precision;
+extern unsigned int drm_timestamp_monotonic;
 
 extern struct class *drm_class;
 extern struct proc_dir_entry *drm_proc_root;

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 3fa18b7..00d78b5 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h

@@ -792,6 +792,7 @@
 
 	/* output poll support */
 	bool poll_enabled;
+	bool poll_running;
 	struct delayed_work output_poll_work;
 
 	/* pointers to standard properties */
@@ -887,14 +888,14 @@
 extern void drm_mode_copy(struct drm_display_mode *dst, const struct drm_display_mode *src);
 extern struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
 						   const struct drm_display_mode *mode);
-extern void drm_mode_debug_printmodeline(struct drm_display_mode *mode);
+extern void drm_mode_debug_printmodeline(const struct drm_display_mode *mode);
 extern void drm_mode_config_init(struct drm_device *dev);
 extern void drm_mode_config_reset(struct drm_device *dev);
 extern void drm_mode_config_cleanup(struct drm_device *dev);
 extern void drm_mode_set_name(struct drm_display_mode *mode);
-extern bool drm_mode_equal(struct drm_display_mode *mode1, struct drm_display_mode *mode2);
-extern int drm_mode_width(struct drm_display_mode *mode);
-extern int drm_mode_height(struct drm_display_mode *mode);
+extern bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2);
+extern int drm_mode_width(const struct drm_display_mode *mode);
+extern int drm_mode_height(const struct drm_display_mode *mode);
 
 /* for us by fb module */
 extern int drm_mode_attachmode_crtc(struct drm_device *dev,
@@ -919,12 +920,6 @@
 extern void drm_mode_connector_list_update(struct drm_connector *connector);
 extern int drm_mode_connector_update_edid_property(struct drm_connector *connector,
 						struct edid *edid);
-extern int drm_connector_property_set_value(struct drm_connector *connector,
-					 struct drm_property *property,
-					 uint64_t value);
-extern int drm_connector_property_get_value(struct drm_connector *connector,
-					 struct drm_property *property,
-					 uint64_t *value);
 extern int drm_object_property_set_value(struct drm_mode_object *obj,
 					 struct drm_property *property,
 					 uint64_t val);
@@ -946,8 +941,6 @@
 extern void drm_crtc_probe_connector_modes(struct drm_device *dev, int maxX, int maxY);
 extern bool drm_crtc_in_use(struct drm_crtc *crtc);
 
-extern void drm_connector_attach_property(struct drm_connector *connector,
-					  struct drm_property *property, uint64_t init_val);
 extern void drm_object_attach_property(struct drm_mode_object *obj,
 				       struct drm_property *property,
 				       uint64_t init_val);
@@ -1037,6 +1030,7 @@
 extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern u8 *drm_find_cea_extension(struct edid *edid);
+extern u8 drm_match_cea_mode(struct drm_display_mode *to_match);
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern bool drm_detect_monitor_audio(struct edid *edid);
 extern int drm_mode_page_flip_ioctl(struct drm_device *dev,
@@ -1053,6 +1047,7 @@
 				int GTF_2C, int GTF_K, int GTF_2J);
 extern int drm_add_modes_noedid(struct drm_connector *connector,
 				int hdisplay, int vdisplay);
+extern uint8_t drm_mode_cea_vic(const struct drm_display_mode *mode);
 
 extern int drm_edid_header_is_valid(const u8 *raw_edid);
 extern bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid);

diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index e01cc80..f43d556 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h

@@ -137,6 +137,8 @@
 
 extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode);
 
+extern void drm_helper_move_panel_connectors_to_head(struct drm_device *);
+
 extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
 					  struct drm_mode_fb_cmd2 *mode_cmd);
 
@@ -162,6 +164,7 @@
 extern void drm_kms_helper_poll_init(struct drm_device *dev);
 extern void drm_kms_helper_poll_fini(struct drm_device *dev);
 extern void drm_helper_hpd_irq_event(struct drm_device *dev);
+extern void drm_kms_helper_hotplug_event(struct drm_device *dev);
 
 extern void drm_kms_helper_poll_disable(struct drm_device *dev);
 extern void drm_kms_helper_poll_enable(struct drm_device *dev);

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index fe06148..e8e1417 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h

@@ -25,6 +25,7 @@
 
 #include <linux/types.h>
 #include <linux/i2c.h>
+#include <linux/delay.h>
 
 /*
  * Unless otherwise noted, all values are from the DP 1.1a spec.  Note that
@@ -311,6 +312,14 @@
 #define MODE_I2C_READ	4
 #define MODE_I2C_STOP	8
 
+/**
+ * struct i2c_algo_dp_aux_data - driver interface structure for i2c over dp
+ * 				 aux algorithm
+ * @running: set by the algo indicating whether an i2c is ongoing or whether
+ * 	     the i2c bus is quiescent
+ * @address: i2c target address for the currently ongoing transfer
+ * @aux_ch: driver callback to transfer a single byte of the i2c payload
+ */
 struct i2c_algo_dp_aux_data {
 	bool running;
 	u16 address;
@@ -322,4 +331,34 @@
 int
 i2c_dp_aux_add_bus(struct i2c_adapter *adapter);
 
+
+#define DP_LINK_STATUS_SIZE	   6
+bool drm_dp_channel_eq_ok(u8 link_status[DP_LINK_STATUS_SIZE],
+			  int lane_count);
+bool drm_dp_clock_recovery_ok(u8 link_status[DP_LINK_STATUS_SIZE],
+			      int lane_count);
+u8 drm_dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE],
+				     int lane);
+u8 drm_dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE],
+					  int lane);
+
+#define DP_RECEIVER_CAP_SIZE	0xf
+void drm_dp_link_train_clock_recovery_delay(u8 dpcd[DP_RECEIVER_CAP_SIZE]);
+void drm_dp_link_train_channel_eq_delay(u8 dpcd[DP_RECEIVER_CAP_SIZE]);
+
+u8 drm_dp_link_rate_to_bw_code(int link_rate);
+int drm_dp_bw_code_to_link_rate(u8 link_bw);
+
+static inline int
+drm_dp_max_link_rate(u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+	return drm_dp_bw_code_to_link_rate(dpcd[DP_MAX_LINK_RATE]);
+}
+
+static inline u8
+drm_dp_max_lane_count(u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+	return dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK;
+}
+
 #endif /* _DRM_DP_HELPER_H_ */

diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h
index 3650d5d..fce2ef3 100644
--- a/include/drm/drm_hashtab.h
+++ b/include/drm/drm_hashtab.h

@@ -61,5 +61,19 @@
 extern int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item);
 extern void drm_ht_remove(struct drm_open_hash *ht);
 
+/*
+ * RCU-safe interface
+ *
+ * The user of this API needs to make sure that two or more instances of the
+ * hash table manipulation functions are never run simultaneously.
+ * The lookup function drm_ht_find_item_rcu may, however, run simultaneously
+ * with any of the manipulation functions as long as it's called from within
+ * an RCU read-locked section.
+ */
+#define drm_ht_insert_item_rcu drm_ht_insert_item
+#define drm_ht_just_insert_please_rcu drm_ht_just_insert_please
+#define drm_ht_remove_key_rcu drm_ht_remove_key
+#define drm_ht_remove_item_rcu drm_ht_remove_item
+#define drm_ht_find_item_rcu drm_ht_find_item
 
 #endif

diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h
index 3c13a3a..808dad2 100644
--- a/include/drm/exynos_drm.h
+++ b/include/drm/exynos_drm.h

@@ -85,4 +85,30 @@
 	int (*get_hpd)(void);
 };
 
+/**
+ * Platform Specific Structure for DRM based IPP.
+ *
+ * @inv_pclk: if set 1. invert pixel clock
+ * @inv_vsync: if set 1. invert vsync signal for wb
+ * @inv_href: if set 1. invert href signal
+ * @inv_hsync: if set 1. invert hsync signal for wb
+ */
+struct exynos_drm_ipp_pol {
+	unsigned int inv_pclk;
+	unsigned int inv_vsync;
+	unsigned int inv_href;
+	unsigned int inv_hsync;
+};
+
+/**
+ * Platform Specific Structure for DRM based FIMC.
+ *
+ * @pol: current hardware block polarity settings.
+ * @clk_rate: current hardware clock rate.
+ */
+struct exynos_drm_fimc_pdata {
+	struct exynos_drm_ipp_pol pol;
+	int clk_rate;
+};
+
 #endif	/* _EXYNOS_DRM_H_ */

diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 2e37e9f..6eb76a1 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h

@@ -3,7 +3,7 @@
 #ifndef _DRM_INTEL_GTT_H
 #define	_DRM_INTEL_GTT_H
 
-const struct intel_gtt {
+struct intel_gtt {
 	/* Size of memory reserved for graphics by the BIOS */
 	unsigned int stolen_size;
 	/* Total number of gtt entries. */
@@ -17,6 +17,7 @@
 	unsigned int do_idle_maps : 1;
 	/* Share the scratch page dma with ppgtts. */
 	dma_addr_t scratch_page_dma;
+	struct page *scratch_page;
 	/* for ppgtt PDE access */
 	u32 __iomem *gtt;
 	/* needed for ioremap in drm/i915 */
@@ -39,10 +40,6 @@
 #define AGP_DCACHE_MEMORY	1
 #define AGP_PHYS_MEMORY		2
 
-/* New caching attributes for gen6/sandybridge */
-#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
-#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
-
 /* flag for GFDT type */
 #define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
 

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index e8028ad..3cb5d84 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h

@@ -141,8 +141,6 @@
  * struct ttm_buffer_object
  *
  * @bdev: Pointer to the buffer object device structure.
- * @buffer_start: The virtual user-space start address of ttm_bo_type_user
- * buffers.
  * @type: The bo type.
  * @destroy: Destruction function. If NULL, kfree is used.
  * @num_pages: Actual number of pages.
@@ -172,7 +170,6 @@
  * @seq_valid: The value of @val_seq is valid. This value is protected by
  * the bo_device::lru_lock.
  * @reserved: Deadlock-free lock used for synchronization state transitions.
- * @sync_obj_arg: Opaque argument to synchronization object function.
  * @sync_obj: Pointer to a synchronization object.
  * @priv_flags: Flags describing buffer object internal state.
  * @vm_rb: Rb node for the vm rb tree.
@@ -200,7 +197,6 @@
 
 	struct ttm_bo_global *glob;
 	struct ttm_bo_device *bdev;
-	unsigned long buffer_start;
 	enum ttm_bo_type type;
 	void (*destroy) (struct ttm_buffer_object *);
 	unsigned long num_pages;
@@ -255,7 +251,6 @@
 	 * checking NULL while reserved but not holding the mentioned lock.
 	 */
 
-	void *sync_obj_arg;
 	void *sync_obj;
 	unsigned long priv_flags;
 
@@ -342,7 +337,6 @@
  * @bo: The buffer object.
  * @placement: Proposed placement for the buffer object.
  * @interruptible: Sleep interruptible if sleeping.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  *
  * Changes placement and caching policy of the buffer object
@@ -355,7 +349,7 @@
  */
 extern int ttm_bo_validate(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement,
-				bool interruptible, bool no_wait_reserve,
+				bool interruptible,
 				bool no_wait_gpu);
 
 /**
@@ -429,8 +423,9 @@
  * @no_wait: Return immediately if buffer is busy.
  *
  * Synchronizes a buffer object for CPU RW access. This means
- * blocking command submission that affects the buffer and
- * waiting for buffer idle. This lock is recursive.
+ * command submission that affects the buffer will return -EBUSY
+ * until ttm_bo_synccpu_write_release is called.
+ *
  * Returns
  * -EBUSY if the buffer is busy and no_wait is true.
  * -ERESTARTSYS if interrupted by a signal.
@@ -472,8 +467,6 @@
  * @type: Requested type of buffer object.
  * @flags: Initial placement flags.
  * @page_alignment: Data alignment in pages.
- * @buffer_start: Virtual address of user space data backing a
- * user buffer object.
  * @interruptible: If needing to sleep to wait for GPU resources,
  * sleep interruptible.
  * @persistent_swap_storage: Usually the swap storage is deleted for buffers
@@ -505,7 +498,6 @@
 			enum ttm_bo_type type,
 			struct ttm_placement *placement,
 			uint32_t page_alignment,
-			unsigned long buffer_start,
 			bool interrubtible,
 			struct file *persistent_swap_storage,
 			size_t acc_size,
@@ -521,8 +513,6 @@
  * @type: Requested type of buffer object.
  * @flags: Initial placement flags.
  * @page_alignment: Data alignment in pages.
- * @buffer_start: Virtual address of user space data backing a
- * user buffer object.
  * @interruptible: If needing to sleep while waiting for GPU resources,
  * sleep interruptible.
  * @persistent_swap_storage: Usually the swap storage is deleted for buffers
@@ -545,7 +535,6 @@
 				enum ttm_bo_type type,
 				struct ttm_placement *placement,
 				uint32_t page_alignment,
-				unsigned long buffer_start,
 				bool interruptible,
 				struct file *persistent_swap_storage,
 				struct ttm_buffer_object **p_bo);
@@ -736,4 +725,18 @@
 
 extern void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
 
+/**
+ * ttm_bo_is_reserved - return an indication if a ttm buffer object is reserved
+ *
+ * @bo:     The buffer object to check.
+ *
+ * This function returns an indication if a bo is reserved or not, and should
+ * only be used to print an error when it is not from incorrect api usage, since
+ * there's no guarantee that it is the caller that is holding the reservation.
+ */
+static inline bool ttm_bo_is_reserved(struct ttm_buffer_object *bo)
+{
+	return atomic_read(&bo->reserved);
+}
+
 #endif

diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d803b92..e3a43a4 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h

@@ -394,7 +394,7 @@
 	 */
 	int (*move) (struct ttm_buffer_object *bo,
 		     bool evict, bool interruptible,
-		     bool no_wait_reserve, bool no_wait_gpu,
+		     bool no_wait_gpu,
 		     struct ttm_mem_reg *new_mem);
 
 	/**
@@ -422,10 +422,10 @@
 	 * documentation.
 	 */
 
-	bool (*sync_obj_signaled) (void *sync_obj, void *sync_arg);
-	int (*sync_obj_wait) (void *sync_obj, void *sync_arg,
+	bool (*sync_obj_signaled) (void *sync_obj);
+	int (*sync_obj_wait) (void *sync_obj,
 			      bool lazy, bool interruptible);
-	int (*sync_obj_flush) (void *sync_obj, void *sync_arg);
+	int (*sync_obj_flush) (void *sync_obj);
 	void (*sync_obj_unref) (void **sync_obj);
 	void *(*sync_obj_ref) (void *sync_obj);
 
@@ -521,8 +521,6 @@
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @val_seq: Current validation sequence.
- * @nice_mode: Try nicely to wait for buffer idle when cleaning a manager.
- * If a GPU lockup has been detected, this is forced to 0.
  * @dev_mapping: A pointer to the struct address_space representing the
  * device address space.
  * @wq: Work queue structure for the delayed delete workqueue.
@@ -556,7 +554,6 @@
 	 * Protected by load / firstopen / lastclose /unload sync.
 	 */
 
-	bool nice_mode;
 	struct address_space *dev_mapping;
 
 	/*
@@ -706,7 +703,6 @@
  * @proposed_placement: Proposed new placement for the buffer object.
  * @mem: A struct ttm_mem_reg.
  * @interruptible: Sleep interruptible when sliping.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  *
  * Allocate memory space for the buffer object pointed to by @bo, using
@@ -722,27 +718,13 @@
 				struct ttm_placement *placement,
 				struct ttm_mem_reg *mem,
 				bool interruptible,
-				bool no_wait_reserve, bool no_wait_gpu);
+				bool no_wait_gpu);
 
 extern void ttm_bo_mem_put(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *mem);
 extern void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *mem);
 
-/**
- * ttm_bo_wait_for_cpu
- *
- * @bo: Pointer to a struct ttm_buffer_object.
- * @no_wait: Don't sleep while waiting.
- *
- * Wait until a buffer object is no longer sync'ed for CPU access.
- * Returns:
- * -EBUSY: Buffer object was sync'ed for CPU access. (only if no_wait == 1).
- * -ERESTARTSYS: An interruptible sleep was interrupted by a signal.
- */
-
-extern int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait);
-
 extern void ttm_bo_global_release(struct drm_global_reference *ref);
 extern int ttm_bo_global_init(struct drm_global_reference *ref);
 
@@ -918,7 +900,6 @@
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @evict: 1: This is an eviction. Don't try to pipeline.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -933,15 +914,14 @@
  */
 
 extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-			   bool evict, bool no_wait_reserve,
-			   bool no_wait_gpu, struct ttm_mem_reg *new_mem);
+			   bool evict, bool no_wait_gpu,
+			   struct ttm_mem_reg *new_mem);
 
 /**
  * ttm_bo_move_memcpy
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @evict: 1: This is an eviction. Don't try to pipeline.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -956,8 +936,8 @@
  */
 
 extern int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-			      bool evict, bool no_wait_reserve,
-			      bool no_wait_gpu, struct ttm_mem_reg *new_mem);
+			      bool evict, bool no_wait_gpu,
+			      struct ttm_mem_reg *new_mem);
 
 /**
  * ttm_bo_free_old_node
@@ -973,10 +953,7 @@
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @sync_obj: A sync object that signals when moving is complete.
- * @sync_obj_arg: An argument to pass to the sync object idle / wait
- * functions.
  * @evict: This is an evict move. Don't return until the buffer is idle.
- * @no_wait_reserve: Return immediately if other buffers are busy.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -990,9 +967,7 @@
 
 extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 				     void *sync_obj,
-				     void *sync_obj_arg,
-				     bool evict, bool no_wait_reserve,
-				     bool no_wait_gpu,
+				     bool evict, bool no_wait_gpu,
 				     struct ttm_mem_reg *new_mem);
 /**
  * ttm_io_prot

diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 1926cae..547e19f 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h

@@ -39,8 +39,6 @@
  *
  * @head:           list head for thread-private list.
  * @bo:             refcounted buffer object pointer.
- * @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once
- * adding a new sync object.
  * @reserved:       Indicates whether @bo has been reserved for validation.
  * @removed:        Indicates whether @bo has been removed from lru lists.
  * @put_count:      Number of outstanding references on bo::list_kref.
@@ -50,7 +48,6 @@
 struct ttm_validate_buffer {
 	struct list_head head;
 	struct ttm_buffer_object *bo;
-	void *new_sync_obj_arg;
 	bool reserved;
 	bool removed;
 	int put_count;

diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
index d6d1da4..72dcbe8 100644
--- a/include/drm/ttm/ttm_memory.h
+++ b/include/drm/ttm/ttm_memory.h

@@ -60,7 +60,6 @@
  * for the GPU, and this will otherwise block other workqueue tasks(?)
  * At this point we use only a single-threaded workqueue.
  * @work: The workqueue callback for the shrink queue.
- * @queue: Wait queue for processes suspended waiting for memory.
  * @lock: Lock to protect the @shrink - and the memory accounting members,
  * that is, essentially the whole structure with some exceptions.
  * @zones: Array of pointers to accounting zones.
@@ -80,7 +79,6 @@
 	struct ttm_mem_shrink *shrink;
 	struct workqueue_struct *swap_queue;
 	struct work_struct work;
-	wait_queue_head_t queue;
 	spinlock_t lock;
 	struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES];
 	unsigned int num_zones;

diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index b01c563..fc0cf06 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h

@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <drm/drm_hashtab.h>
 #include <linux/kref.h>
+#include <linux/rcupdate.h>
 #include <ttm/ttm_memory.h>
 
 /**
@@ -120,6 +121,7 @@
  */
 
 struct ttm_base_object {
+	struct rcu_head rhead;
 	struct drm_hash_item hash;
 	enum ttm_object_type object_type;
 	bool shareable;
@@ -268,4 +270,6 @@
 
 extern void ttm_object_device_release(struct ttm_object_device **p_tdev);
 
+#define ttm_base_object_kfree(__object, __base)\
+	kfree_rcu(__object, __base.rhead)
 #endif

diff --git a/include/linux/asn1.h b/include/linux/asn1.h
index 5c3f4e4..eed6982 100644
--- a/include/linux/asn1.h
+++ b/include/linux/asn1.h

@@ -64,4 +64,6 @@
 	ASN1_LONG_TAG	= 31	/* Long form tag */
 };
 
+#define ASN1_INDEFINITE_LENGTH 0x80
+
 #endif /* _LINUX_ASN1_H */

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2a9a9ab..12731a19 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h

@@ -114,6 +114,7 @@
 int bdi_init(struct backing_dev_info *bdi);
 void bdi_destroy(struct backing_dev_info *bdi);
 
+__printf(3, 4)
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 5ffc6dd..da9a082 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h

@@ -134,4 +134,14 @@
 	void (*kick_battery)(void);
 };
 
+#ifdef CONFIG_OF
+struct backlight_device *of_find_backlight_by_node(struct device_node *node);
+#else
+static inline struct backlight_device *
+of_find_backlight_by_node(struct device_node *node)
+{
+	return NULL;
+}
+#endif
+
 #endif

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 93b1e09..e0ce311 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h

@@ -350,6 +350,7 @@
 				    enum bcma_clkmode clkmode);
 extern void bcma_core_pll_ctl(struct bcma_device *core, u32 req, u32 status,
 			      bool on);
+extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset);
 #define BCMA_DMA_TRANSLATION_MASK	0xC0000000
 #define  BCMA_DMA_TRANSLATION_NONE	0x00000000
 #define  BCMA_DMA_TRANSLATION_DMA32_CMT	0x40000000 /* Client Mode Translation for 32-bit DMA */

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2630c9b..0530b98 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h

@@ -54,8 +54,6 @@
 #define BINPRM_FLAGS_EXECFD_BIT 1
 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
 
-#define BINPRM_MAX_RECURSION 4
-
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
 	siginfo_t *siginfo;
@@ -114,6 +112,7 @@
 			   unsigned long stack_top,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
+extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc, const char *const *argv,
 			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
@@ -121,8 +120,4 @@
 extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
-#ifdef __ARCH_WANT_KERNEL_EXECVE
-extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn;
-#endif
-
 #endif /* _LINUX_BINFMTS_H */

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1756001..f94bc83 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -378,6 +378,12 @@
 
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
+	/*
+	 * Number of active block driver functions for which blk_drain_queue()
+	 * must wait. Must be incremented around functions that unlock the
+	 * queue_lock internally, e.g. scsi_request_fn().
+	 */
+	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
@@ -437,7 +443,7 @@
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
-#define QUEUE_FLAG_DEAD		5	/* queue being torn down */
+#define QUEUE_FLAG_DYING	5	/* queue being torn down */
 #define QUEUE_FLAG_BYPASS	6	/* act as dumb FIFO queue */
 #define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
@@ -452,6 +458,7 @@
 #define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
 #define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 #define QUEUE_FLAG_SAME_FORCE  18	/* force complete on same CPU */
+#define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -521,6 +528,7 @@
 
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
 #define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_bypass(q)	test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
@@ -1180,13 +1188,25 @@
 
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
-	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+	unsigned int alignment, granularity, offset;
 
 	if (!lim->max_discard_sectors)
 		return 0;
 
-	return (lim->discard_granularity + lim->discard_alignment - alignment)
-		& (lim->discard_granularity - 1);
+	/* Why are these in bytes, not sectors? */
+	alignment = lim->discard_alignment >> 9;
+	granularity = lim->discard_granularity >> 9;
+	if (!granularity)
+		return 0;
+
+	/* Offset of the partition start in 'granularity' sectors */
+	offset = sector_div(sector, granularity);
+
+	/* And why do we do this modulus *again* in blkdev_issue_discard()? */
+	offset = (granularity + alignment - offset) % granularity;
+
+	/* Turn it back into bytes, gaah */
+	return offset << 9;
 }
 
 static inline int bdev_discard_alignment(struct block_device *bdev)

diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 4d0fb3d..a226652 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h

@@ -67,6 +67,5 @@
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
-void bsg_goose_queue(struct request_queue *q);
 
 #endif

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 6470792..084d3c6 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h

@@ -43,7 +43,6 @@
 	struct ceph_entity_addr my_addr;
 	int mount_timeout;
 	int osd_idle_ttl;
-	int osd_timeout;
 	int osd_keepalive_timeout;
 
 	/*
@@ -63,7 +62,6 @@
  * defaults
  */
 #define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
 #define CEPH_OSD_KEEPALIVE_DEFAULT  5
 #define CEPH_OSD_IDLE_TTL_DEFAULT    60
 

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e37acbe..10a417f 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h

@@ -123,6 +123,7 @@
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
 				struct ceph_pg pgid);
 
+extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
 
 #endif

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index de91fbd..2c04afe 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h

@@ -87,6 +87,8 @@
  *
  *  lpgp_num -- as above.
  */
+#define CEPH_NOPOOL  ((__u64) (-1))  /* pool id not defined */
+
 #define CEPH_PG_TYPE_REP     1
 #define CEPH_PG_TYPE_RAID4   2
 #define CEPH_PG_POOL_VERSION 2

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 784ebfe..dec7e2d1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h

@@ -23,6 +23,61 @@
 #define COMPAT_USE_64BIT_TIME 0
 #endif
 
+#ifndef __SC_DELOUSE
+#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v))
+#endif
+
+#define __SC_CCAST1(t1, a1)      __SC_DELOUSE(t1,a1)
+#define __SC_CCAST2(t2, a2, ...) __SC_DELOUSE(t2,a2), __SC_CCAST1(__VA_ARGS__)
+#define __SC_CCAST3(t3, a3, ...) __SC_DELOUSE(t3,a3), __SC_CCAST2(__VA_ARGS__)
+#define __SC_CCAST4(t4, a4, ...) __SC_DELOUSE(t4,a4), __SC_CCAST3(__VA_ARGS__)
+#define __SC_CCAST5(t5, a5, ...) __SC_DELOUSE(t5,a5), __SC_CCAST4(__VA_ARGS__)
+#define __SC_CCAST6(t6, a6, ...) __SC_DELOUSE(t6,a6), __SC_CCAST5(__VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE1(name, ...) \
+        COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE2(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE3(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE4(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE5(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE6(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
+
+#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__));	\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__));	\
+	asmlinkage long compat_SyS##name(__SC_LONG##x(__VA_ARGS__))	\
+	{								\
+		return (long) C_SYSC##name(__SC_CCAST##x(__VA_ARGS__));	\
+	}								\
+	SYSCALL_ALIAS(compat_sys##name, compat_SyS##name);		\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__))
+
+#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__))
+
+#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
+#ifndef compat_user_stack_pointer
+#define compat_user_stack_pointer() current_user_stack_pointer()
+#endif
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+#ifndef compat_sigaltstack	/* we'll need that for MIPS */
+typedef struct compat_sigaltstack {
+	compat_uptr_t			ss_sp;
+	int				ss_flags;
+	compat_size_t			ss_size;
+} compat_stack_t;
+#endif
+#endif
+
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
@@ -587,6 +642,16 @@
 
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
+				       compat_stack_t __user *uoss_ptr);
+
+int compat_restore_altstack(const compat_stack_t __user *uss);
+int __compat_save_altstack(compat_stack_t __user *, unsigned long);
+#endif
+
+asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+						 struct compat_timespec __user *interval);
 
 #else
 

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c..662fd1b 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h

@@ -31,6 +31,8 @@
 
 #define __linktime_error(message) __attribute__((__error__(message)))
 
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
 #if __GNUC_MINOR__ >= 5
 /*
  * Mark a position in code as unreachable.  This can be used to
@@ -63,3 +65,13 @@
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#if __GNUC_MINOR__ >= 4
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#endif
+#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6)
+#define __HAVE_BUILTIN_BSWAP16__
+#endif
+#endif

diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index d8e636e..973ce10 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h

@@ -29,3 +29,10 @@
 #endif
 
 #define uninitialized_var(x) x
+
+#ifndef __HAVE_BUILTIN_BSWAP16__
+/* icc has this, but it's called _bswap16 */
+#define __HAVE_BUILTIN_BSWAP16__
+#define __builtin_bswap16 _bswap16
+#endif
+

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f430e41..dd852b7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h

@@ -10,6 +10,7 @@
 # define __force	__attribute__((force))
 # define __nocast	__attribute__((nocast))
 # define __iomem	__attribute__((noderef, address_space(2)))
+# define __must_hold(x)	__attribute__((context(x,1,1)))
 # define __acquires(x)	__attribute__((context(x,0,1)))
 # define __releases(x)	__attribute__((context(x,1,0)))
 # define __acquire(x)	__context__(x,1)
@@ -33,6 +34,7 @@
 # define __chk_user_ptr(x) (void)0
 # define __chk_io_ptr(x) (void)0
 # define __builtin_warning(x, y...) (1)
+# define __must_hold(x)
 # define __acquires(x)
 # define __releases(x)
 # define __acquire(x) (void)0
@@ -42,6 +44,10 @@
 # define __rcu
 #endif
 
+/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+
 #ifdef __KERNEL__
 
 #ifdef __GNUC__
@@ -164,6 +170,11 @@
     (typeof(ptr)) (__ptr + (off)); })
 #endif
 
+/* Not-quite-unique ID. */
+#ifndef __UNIQUE_ID
+# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */

diff --git a/include/linux/cred.h b/include/linux/cred.h
index ebbed2c..abb2cd5 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h

@@ -77,21 +77,6 @@
 extern int in_egroup_p(kgid_t);
 
 /*
- * The common credentials for a thread group
- * - shared by CLONE_THREAD
- */
-#ifdef CONFIG_KEYS
-struct thread_group_cred {
-	atomic_t	usage;
-	pid_t		tgid;			/* thread group process ID */
-	spinlock_t	lock;
-	struct key __rcu *session_keyring;	/* keyring inherited over fork */
-	struct key	*process_keyring;	/* keyring private to this process */
-	struct rcu_head	rcu;			/* RCU deletion hook */
-};
-#endif
-
-/*
  * The security context of a task
  *
  * The parts of the context break down into two categories:
@@ -139,6 +124,8 @@
 #ifdef CONFIG_KEYS
 	unsigned char	jit_keyring;	/* default keyring to attach requested
 					 * keys to */
+	struct key __rcu *session_keyring; /* keyring inherited over fork */
+	struct key	*process_keyring; /* keyring private to this process */
 	struct key	*thread_keyring; /* keyring private to this thread */
 	struct key	*request_key_auth; /* assumed request_key authority */
 	struct thread_group_cred *tgcred; /* thread-group shared credentials */
@@ -357,10 +344,8 @@
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
 #define current_user_ns()	(current_cred_xxx(user_ns))
-#define task_user_ns(task)	(task_cred_xxx((task), user_ns))
 #else
 #define current_user_ns()	(&init_user_ns)
-#define task_user_ns(task)	(&init_user_ns)
 #endif
 
 

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 5920079..c1754b5 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h

@@ -202,7 +202,6 @@
 #define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
 #define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
 #define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
-#define DCACHE_NEED_LOOKUP	0x80000 /* dentry requires i_op->lookup */
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
@@ -408,13 +407,6 @@
 	return dentry->d_flags & DCACHE_MOUNTED;
 }
 
-static inline bool d_need_lookup(struct dentry *dentry)
-{
-	return dentry->d_flags & DCACHE_NEED_LOOKUP;
-}
-
-extern void d_clear_need_lookup(struct dentry *dentry);
-
 extern int sysctl_vfs_cache_pressure;
 
 #endif	/* __LINUX_DCACHE_H */

diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h
index f83f793..c8e1831 100644
--- a/include/linux/dma-attrs.h
+++ b/include/linux/dma-attrs.h

@@ -17,6 +17,7 @@
 	DMA_ATTR_NON_CONSISTENT,
 	DMA_ATTR_NO_KERNEL_MAPPING,
 	DMA_ATTR_SKIP_CPU_SYNC,
+	DMA_ATTR_FORCE_CONTIGUOUS,
 	DMA_ATTR_MAX,
 };
 

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index eb48f38..bd2e52c 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h

@@ -156,7 +156,6 @@
 	get_file(dmabuf->file);
 }
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 							struct device *dev);
 void dma_buf_detach(struct dma_buf *dmabuf,
@@ -184,103 +183,5 @@
 		 unsigned long);
 void *dma_buf_vmap(struct dma_buf *);
 void dma_buf_vunmap(struct dma_buf *, void *vaddr);
-#else
-
-static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-							struct device *dev)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_detach(struct dma_buf *dmabuf,
-				  struct dma_buf_attachment *dmabuf_attach)
-{
-	return;
-}
-
-static inline struct dma_buf *dma_buf_export(void *priv,
-					     const struct dma_buf_ops *ops,
-					     size_t size, int flags)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline int dma_buf_fd(struct dma_buf *dmabuf, int flags)
-{
-	return -ENODEV;
-}
-
-static inline struct dma_buf *dma_buf_get(int fd)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_put(struct dma_buf *dmabuf)
-{
-	return;
-}
-
-static inline struct sg_table *dma_buf_map_attachment(
-	struct dma_buf_attachment *attach, enum dma_data_direction write)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
-			struct sg_table *sg, enum dma_data_direction dir)
-{
-	return;
-}
-
-static inline int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-					   size_t start, size_t len,
-					   enum dma_data_direction dir)
-{
-	return -ENODEV;
-}
-
-static inline void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-					  size_t start, size_t len,
-					  enum dma_data_direction dir)
-{
-}
-
-static inline void *dma_buf_kmap_atomic(struct dma_buf *dmabuf,
-					unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap_atomic(struct dma_buf *dmabuf,
-					 unsigned long pnum, void *vaddr)
-{
-}
-
-static inline void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap(struct dma_buf *dmabuf,
-				  unsigned long pnum, void *vaddr)
-{
-}
-
-static inline int dma_buf_mmap(struct dma_buf *dmabuf,
-			       struct vm_area_struct *vma,
-			       unsigned long pgoff)
-{
-	return -ENODEV;
-}
-
-static inline void *dma_buf_vmap(struct dma_buf *dmabuf)
-{
-	return NULL;
-}
-
-static inline void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
-{
-}
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 
 #endif /* __DMA_BUF_H__ */

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 171ad8a..fc0e34c 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h

@@ -39,6 +39,8 @@
 			       int direction, dma_addr_t dma_addr,
 			       bool map_single);
 
+extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
 extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 				 size_t size, int direction, bool map_single);
 
@@ -105,6 +107,11 @@
 {
 }
 
+static inline void debug_dma_mapping_error(struct device *dev,
+					  dma_addr_t dma_addr)
+{
+}
+
 static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 					size_t size, int direction,
 					bool map_single)

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 47e3d48..0c5a18e 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h

@@ -51,12 +51,11 @@
 
 #endif
 
-
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.13"
-#define API_VERSION 88
+#define REL_VERSION "8.4.2"
+#define API_VERSION 1
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 96
+#define PRO_VERSION_MAX 101
 
 
 enum drbd_io_error_p {
@@ -66,7 +65,8 @@
 };
 
 enum drbd_fencing_p {
-	FP_DONT_CARE,
+	FP_NOT_AVAIL = -1, /* Not a policy */
+	FP_DONT_CARE = 0,
 	FP_RESOURCE,
 	FP_STONITH
 };
@@ -102,6 +102,20 @@
 	OC_DISCONNECT,
 };
 
+enum drbd_read_balancing {
+	RB_PREFER_LOCAL,
+	RB_PREFER_REMOTE,
+	RB_ROUND_ROBIN,
+	RB_LEAST_PENDING,
+	RB_CONGESTED_REMOTE,
+	RB_32K_STRIPING,
+	RB_64K_STRIPING,
+	RB_128K_STRIPING,
+	RB_256K_STRIPING,
+	RB_512K_STRIPING,
+	RB_1M_STRIPING,
+};
+
 /* KEEP the order, do not delete or insert. Only append. */
 enum drbd_ret_code {
 	ERR_CODE_BASE		= 100,
@@ -122,7 +136,7 @@
 	ERR_AUTH_ALG		= 120,
 	ERR_AUTH_ALG_ND		= 121,
 	ERR_NOMEM		= 122,
-	ERR_DISCARD		= 123,
+	ERR_DISCARD_IMPOSSIBLE	= 123,
 	ERR_DISK_CONFIGURED	= 124,
 	ERR_NET_CONFIGURED	= 125,
 	ERR_MANDATORY_TAG	= 126,
@@ -130,8 +144,8 @@
 	ERR_INTR		= 129, /* EINTR */
 	ERR_RESIZE_RESYNC	= 130,
 	ERR_NO_PRIMARY		= 131,
-	ERR_SYNC_AFTER		= 132,
-	ERR_SYNC_AFTER_CYCLE	= 133,
+	ERR_RESYNC_AFTER	= 132,
+	ERR_RESYNC_AFTER_CYCLE	= 133,
 	ERR_PAUSE_IS_SET	= 134,
 	ERR_PAUSE_IS_CLEAR	= 135,
 	ERR_PACKET_NR		= 137,
@@ -155,6 +169,14 @@
 	ERR_CONG_NOT_PROTO_A	= 155,
 	ERR_PIC_AFTER_DEP	= 156,
 	ERR_PIC_PEER_DEP	= 157,
+	ERR_RES_NOT_KNOWN	= 158,
+	ERR_RES_IN_USE		= 159,
+	ERR_MINOR_CONFIGURED    = 160,
+	ERR_MINOR_EXISTS	= 161,
+	ERR_INVALID_REQUEST	= 162,
+	ERR_NEED_APV_100	= 163,
+	ERR_NEED_ALLOW_TWO_PRI  = 164,
+	ERR_MD_UNCLEAN          = 165,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -296,7 +318,8 @@
 	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */
 	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
 	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
-	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */
+	SS_O_VOL_PEER_PRI = -20,
+	SS_AFTER_LAST_ERROR = -21,    /* Keep this at bottom */
 };
 
 /* from drbd_strings.c */
@@ -313,7 +336,9 @@
 #define MDF_FULL_SYNC		(1 << 3)
 #define MDF_WAS_UP_TO_DATE	(1 << 4)
 #define MDF_PEER_OUT_DATED	(1 << 5)
-#define MDF_CRASHED_PRIMARY     (1 << 6)
+#define MDF_CRASHED_PRIMARY	(1 << 6)
+#define MDF_AL_CLEAN		(1 << 7)
+#define MDF_AL_DISABLED		(1 << 8)
 
 enum drbd_uuid_index {
 	UI_CURRENT,
@@ -333,37 +358,23 @@
 
 #define UUID_JUST_CREATED ((__u64)4)
 
+/* magic numbers used in meta data and network packets */
 #define DRBD_MAGIC 0x83740267
-#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
 #define DRBD_MAGIC_BIG 0x835a
-#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
+#define DRBD_MAGIC_100 0x8620ec20
+
+#define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
+#define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4)
+#define DRBD_MD_MAGIC_84_UNCLEAN	(DRBD_MAGIC+5)
+
+
+/* how I came up with this magic?
+ * base64 decode "actlog==" ;) */
+#define DRBD_AL_MAGIC 0x69cb65a2
 
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
 #define DRBD_MD_INDEX_FLEX_EXT -2
 #define DRBD_MD_INDEX_FLEX_INT -3
 
-/* Start of the new netlink/connector stuff */
-
-#define DRBD_NL_CREATE_DEVICE 0x01
-#define DRBD_NL_SET_DEFAULTS  0x02
-
-
-/* For searching a vacant cn_idx value */
-#define CN_IDX_STEP			6977
-
-struct drbd_nl_cfg_req {
-	int packet_type;
-	unsigned int drbd_minor;
-	int flags;
-	unsigned short tag_list[];
-};
-
-struct drbd_nl_cfg_reply {
-	int packet_type;
-	unsigned int minor;
-	int ret_code; /* enum ret_code or set_st_err_t */
-	unsigned short tag_list[]; /* only used with get_* calls */
-};
-
 #endif

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
new file mode 100644
index 0000000..d0d8fac
--- /dev/null
+++ b/include/linux/drbd_genl.h

@@ -0,0 +1,378 @@
+/*
+ * General overview:
+ * full generic netlink message:
+ * |nlmsghdr|genlmsghdr|<payload>
+ *
+ * payload:
+ * |optional fixed size family header|<sequence of netlink attributes>
+ *
+ * sequence of netlink attributes:
+ * I chose to have all "top level" attributes NLA_NESTED,
+ * corresponding to some real struct.
+ * So we have a sequence of |tla, len|<nested nla sequence>
+ *
+ * nested nla sequence:
+ * may be empty, or contain a sequence of netlink attributes
+ * representing the struct fields.
+ *
+ * The tag number of any field (regardless of containing struct)
+ * will be available as T_ ## field_name,
+ * so you cannot have the same field name in two differnt structs.
+ *
+ * The tag numbers themselves are per struct, though,
+ * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
+ * which we won't use here).
+ * The tag numbers are used as index in the respective nla_policy array.
+ *
+ * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
+ *	genl_magic_struct.h
+ *		generates the struct declaration,
+ *		generates an entry in the tla enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static tla policy
+ *		with .type = NLA_NESTED
+ *		generates the static <struct_name>_nl_policy definition,
+ *		and static conversion functions
+ *
+ *	genl_magic_func.h
+ *
+ * GENL_mc_group(group)
+ *	genl_magic_struct.h
+ *		does nothing
+ *	genl_magic_func.h
+ *		defines and registers the mcast group,
+ *		and provides a send helper
+ *
+ * GENL_notification(op_name, op_num, mcast_group, tla list)
+ *	These are notifications to userspace.
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		does nothing
+ *
+ *	mcast group: the name of the mcast group this notification should be
+ *	expected on
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ *
+ * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
+ *	These are requests from userspace.
+ *
+ *	_op and _notification share the same "number space",
+ *	op_nr will be assigned to "genlmsghdr->cmd"
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static genl_ops array,
+ *		and static register/unregister functions to
+ *		genl_register_family_with_ops().
+ *
+ *	flags and handler:
+ *		GENL_op_init( .doit = x, .dumpit = y, .flags = something)
+ *		GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ */
+
+/*
+ * STRUCTS
+ */
+
+/* this is sent kernel -> userland on various error conditions, and contains
+ * informational textual info, which is supposedly human readable.
+ * The computer relevant return code is in the drbd_genlmsghdr.
+ */
+GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
+		/* "arbitrary" size strings, nla_policy.len = 0 */
+	__str_field(1, DRBD_GENLA_F_MANDATORY,	info_text, 0)
+)
+
+/* Configuration requests typically need a context to operate on.
+ * Possible keys are device minor (fits in the drbd_genlmsghdr),
+ * the replication link (aka connection) name,
+ * and/or the replication group (aka resource) name,
+ * and the volume id within the resource. */
+GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
+	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
+	__bin_field(3, DRBD_GENLA_F_MANDATORY,	ctx_my_addr, 128)
+	__bin_field(4, DRBD_GENLA_F_MANDATORY,	ctx_peer_addr, 128)
+)
+
+GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
+	__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	backing_dev,	128)
+	__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev,	128)
+	__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev_idx)
+
+	/* use the resize command to try and change the disk_size */
+	__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	disk_size)
+	/* we could change the max_bio_bvecs,
+	 * but it won't propagate through the stack */
+	__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	max_bio_bvecs)
+
+	__u32_field_def(6, DRBD_GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
+	__u32_field_def(7, DRBD_GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
+
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
+	__s32_field_def(9,	DRBD_GENLA_F_MANDATORY,	resync_after, DRBD_MINOR_NUMBER_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+
+	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
+	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
+	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
+	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
+	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
+	/* 9: __u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
+	__flg_field_def(23,     0 /* OPTIONAL */,	al_updates, DRBD_AL_UPDATES_DEF)
+)
+
+GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field_def(2,	DRBD_GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
+)
+
+GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
+						shared_secret,	SHARED_SECRET_MAX)
+	__str_field_def(2,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(3,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(6,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(7,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(24, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
+	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
+	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
+	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */
+)
+
+GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	assume_uptodate)
+)
+
+GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
+	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
+	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
+)
+
+GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
+	/* the reason of the broadcast,
+	 * if this is an event triggered broadcast. */
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	sib_reason)
+	__u32_field(2, DRBD_F_REQUIRED,	current_state)
+	__u64_field(3, DRBD_GENLA_F_MANDATORY,	capacity)
+	__u64_field(4, DRBD_GENLA_F_MANDATORY,	ed_uuid)
+
+	/* These are for broadcast from after state change work.
+	 * prev_state and new_state are from the moment the state change took
+	 * place, new_state is not neccessarily the same as current_state,
+	 * there may have been more state changes since.  Which will be
+	 * broadcasted soon, in their respective after state change work.  */
+	__u32_field(5, DRBD_GENLA_F_MANDATORY,	prev_state)
+	__u32_field(6, DRBD_GENLA_F_MANDATORY,	new_state)
+
+	/* if we have a local disk: */
+	__bin_field(7, DRBD_GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
+	__u32_field(8, DRBD_GENLA_F_MANDATORY,	disk_flags)
+	__u64_field(9, DRBD_GENLA_F_MANDATORY,	bits_total)
+	__u64_field(10, DRBD_GENLA_F_MANDATORY,	bits_oos)
+	/* and in case resync or online verify is active */
+	__u64_field(11, DRBD_GENLA_F_MANDATORY,	bits_rs_total)
+	__u64_field(12, DRBD_GENLA_F_MANDATORY,	bits_rs_failed)
+
+	/* for pre and post notifications of helper execution */
+	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
+	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
+
+	__u64_field(15,                      0, send_cnt)
+	__u64_field(16,                      0, recv_cnt)
+	__u64_field(17,                      0, read_cnt)
+	__u64_field(18,                      0, writ_cnt)
+	__u64_field(19,                      0, al_writ_cnt)
+	__u64_field(20,                      0, bm_writ_cnt)
+	__u32_field(21,                      0, ap_bio_cnt)
+	__u32_field(22,                      0, ap_pending_cnt)
+	__u32_field(23,                      0, rs_pending_cnt)
+)
+
+GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
+	__u64_field(2, DRBD_GENLA_F_MANDATORY,	ov_stop_sector)
+)
+
+GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
+)
+
+GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
+	__u32_field(1,	DRBD_F_REQUIRED,	timeout_type)
+)
+
+GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
+)
+
+GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_detach)
+)
+
+/*
+ * Notifications and commands (genlmsghdr->cmd)
+ */
+GENL_mc_group(events)
+
+	/* kernel -> userspace announcement of changes */
+GENL_notification(
+	DRBD_EVENT, 1, events,
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
+)
+
+	/* query kernel for specific or all info */
+GENL_op(
+	DRBD_ADM_GET_STATUS, 2,
+	GENL_op_init(
+		.doit = drbd_adm_get_status,
+		.dumpit = drbd_adm_get_status_all,
+		/* anyone may ask for the status,
+		 * it is broadcasted anyways */
+	),
+	/* To select the object .doit.
+	 * Or a subset of objects in .dumpit. */
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+)
+
+	/* add DRBD minor devices as volumes to resources */
+GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+	/* add or delete resources */
+GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
+	GENL_doit(drbd_adm_resource_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_CONNECT, 10,
+	GENL_doit(drbd_adm_connect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_CHG_NET_OPTS, 29,
+	GENL_doit(drbd_adm_net_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_ATTACH, 12,
+	GENL_doit(drbd_adm_attach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
+	GENL_doit(drbd_adm_disk_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_RESIZE, 13,
+	GENL_doit(drbd_adm_resize),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_PRIMARY, 14,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_SECONDARY, 15,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_NEW_C_UUID, 16,
+	GENL_doit(drbd_adm_new_c_uuid),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_START_OV, 17,
+	GENL_doit(drbd_adm_start_ov),
+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))

diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h
new file mode 100644
index 0000000..9ef50d5
--- /dev/null
+++ b/include/linux/drbd_genl_api.h

@@ -0,0 +1,55 @@
+#ifndef DRBD_GENL_STRUCT_H
+#define DRBD_GENL_STRUCT_H
+
+/**
+ * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
+ * @minor:
+ *     For admin requests (user -> kernel): which minor device to operate on.
+ *     For (unicast) replies or informational (broadcast) messages
+ *     (kernel -> user): which minor device the information is about.
+ *     If we do not operate on minors, but on connections or resources,
+ *     the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
+ *     is used instead.
+ * @flags: possible operation modifiers (relevant only for user->kernel):
+ *     DRBD_GENL_F_SET_DEFAULTS
+ * @volume:
+ *     When creating a new minor (adding it to a resource), the resource needs
+ *     to know which volume number within the resource this is supposed to be.
+ *     The volume number corresponds to the same volume number on the remote side,
+ *     whereas the minor number on the remote side may be different
+ *     (union with flags).
+ * @ret_code: kernel->userland unicast cfg reply return code (union with flags);
+ */
+struct drbd_genlmsghdr {
+	__u32 minor;
+	union {
+	__u32 flags;
+	__s32 ret_code;
+	};
+};
+
+/* To be used in drbd_genlmsghdr.flags */
+enum {
+	DRBD_GENL_F_SET_DEFAULTS = 1,
+};
+
+enum drbd_state_info_bcast_reason {
+	SIB_GET_STATUS_REPLY = 1,
+	SIB_STATE_CHANGE = 2,
+	SIB_HELPER_PRE = 3,
+	SIB_HELPER_POST = 4,
+	SIB_SYNC_PROGRESS = 5,
+};
+
+/* hack around predefined gcc/cpp "linux=1",
+ * we cannot possibly include <1/drbd_genl.h> */
+#undef linux
+
+#include <linux/drbd.h>
+#define GENL_MAGIC_VERSION	API_VERSION
+#define GENL_MAGIC_FAMILY	drbd
+#define GENL_MAGIC_FAMILY_HDRSZ	sizeof(struct drbd_genlmsghdr)
+#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
+#include <linux/genl_magic_struct.h>
+
+#endif

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index fb670bf..1fa19c5 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h

@@ -16,29 +16,37 @@
 #define DEBUG_RANGE_CHECK 0
 
 #define DRBD_MINOR_COUNT_MIN 1
-#define DRBD_MINOR_COUNT_MAX 256
+#define DRBD_MINOR_COUNT_MAX 255
 #define DRBD_MINOR_COUNT_DEF 32
+#define DRBD_MINOR_COUNT_SCALE '1'
+
+#define DRBD_VOLUME_MAX 65535
 
 #define DRBD_DIALOG_REFRESH_MIN 0
 #define DRBD_DIALOG_REFRESH_MAX 600
+#define DRBD_DIALOG_REFRESH_SCALE '1'
 
 /* valid port number */
 #define DRBD_PORT_MIN 1
 #define DRBD_PORT_MAX 0xffff
+#define DRBD_PORT_SCALE '1'
 
 /* startup { */
   /* if you want more than 3.4 days, disable */
 #define DRBD_WFC_TIMEOUT_MIN 0
 #define DRBD_WFC_TIMEOUT_MAX 300000
 #define DRBD_WFC_TIMEOUT_DEF 0
+#define DRBD_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_DEGR_WFC_TIMEOUT_MIN 0
 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0
+#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
+#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
 /* }*/
 
 /* net { */
@@ -47,75 +55,91 @@
 #define DRBD_TIMEOUT_MIN 1
 #define DRBD_TIMEOUT_MAX 600
 #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
+#define DRBD_TIMEOUT_SCALE '1'
 
  /* If backing disk takes longer than disk_timeout, mark the disk as failed */
 #define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
 #define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
 #define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */
+#define DRBD_DISK_TIMEOUT_SCALE '1'
 
   /* active connection retries when C_WF_CONNECTION */
 #define DRBD_CONNECT_INT_MIN 1
 #define DRBD_CONNECT_INT_MAX 120
 #define DRBD_CONNECT_INT_DEF 10   /* seconds */
+#define DRBD_CONNECT_INT_SCALE '1'
 
   /* keep-alive probes when idle */
 #define DRBD_PING_INT_MIN 1
 #define DRBD_PING_INT_MAX 120
 #define DRBD_PING_INT_DEF 10
+#define DRBD_PING_INT_SCALE '1'
 
  /* timeout for the ping packets.*/
 #define DRBD_PING_TIMEO_MIN  1
 #define DRBD_PING_TIMEO_MAX  300
 #define DRBD_PING_TIMEO_DEF  5
+#define DRBD_PING_TIMEO_SCALE '1'
 
   /* max number of write requests between write barriers */
 #define DRBD_MAX_EPOCH_SIZE_MIN 1
 #define DRBD_MAX_EPOCH_SIZE_MAX 20000
 #define DRBD_MAX_EPOCH_SIZE_DEF 2048
+#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
 
   /* I don't think that a tcp send buffer of more than 10M is useful */
 #define DRBD_SNDBUF_SIZE_MIN  0
 #define DRBD_SNDBUF_SIZE_MAX  (10<<20)
 #define DRBD_SNDBUF_SIZE_DEF  0
+#define DRBD_SNDBUF_SIZE_SCALE '1'
 
 #define DRBD_RCVBUF_SIZE_MIN  0
 #define DRBD_RCVBUF_SIZE_MAX  (10<<20)
 #define DRBD_RCVBUF_SIZE_DEF  0
+#define DRBD_RCVBUF_SIZE_SCALE '1'
 
   /* @4k PageSize -> 128kB - 512MB */
 #define DRBD_MAX_BUFFERS_MIN  32
 #define DRBD_MAX_BUFFERS_MAX  131072
 #define DRBD_MAX_BUFFERS_DEF  2048
+#define DRBD_MAX_BUFFERS_SCALE '1'
 
   /* @4k PageSize -> 4kB - 512MB */
 #define DRBD_UNPLUG_WATERMARK_MIN  1
 #define DRBD_UNPLUG_WATERMARK_MAX  131072
 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
+#define DRBD_UNPLUG_WATERMARK_SCALE '1'
 
   /* 0 is disabled.
    * 200 should be more than enough even for very short timeouts */
 #define DRBD_KO_COUNT_MIN  0
 #define DRBD_KO_COUNT_MAX  200
-#define DRBD_KO_COUNT_DEF  0
+#define DRBD_KO_COUNT_DEF  7
+#define DRBD_KO_COUNT_SCALE '1'
 /* } */
 
 /* syncer { */
   /* FIXME allow rate to be zero? */
-#define DRBD_RATE_MIN 1
+#define DRBD_RESYNC_RATE_MIN 1
 /* channel bonding 10 GbE, or other hardware */
-#define DRBD_RATE_MAX (4 << 20)
-#define DRBD_RATE_DEF 250  /* kb/second */
+#define DRBD_RESYNC_RATE_MAX (4 << 20)
+#define DRBD_RESYNC_RATE_DEF 250
+#define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */
 
   /* less than 7 would hit performance unnecessarily.
-   * 3833 is the largest prime that still does fit
-   * into 64 sectors of activity log */
+   * 919 slots context information per transaction,
+   * 32k activity log, 4k transaction size,
+   * one transaction in flight:
+   * 919 * 7 = 6433 */
 #define DRBD_AL_EXTENTS_MIN  7
-#define DRBD_AL_EXTENTS_MAX  3833
-#define DRBD_AL_EXTENTS_DEF  127
+#define DRBD_AL_EXTENTS_MAX  6433
+#define DRBD_AL_EXTENTS_DEF  1237
+#define DRBD_AL_EXTENTS_SCALE '1'
 
-#define DRBD_AFTER_MIN  -1
-#define DRBD_AFTER_MAX  255
-#define DRBD_AFTER_DEF  -1
+#define DRBD_MINOR_NUMBER_MIN  -1
+#define DRBD_MINOR_NUMBER_MAX  ((1 << 20) - 1)
+#define DRBD_MINOR_NUMBER_DEF  -1
+#define DRBD_MINOR_NUMBER_SCALE '1'
 
 /* } */
 
@@ -124,11 +148,12 @@
  * the upper limit with 64bit kernel, enough ram and flexible meta data
  * is 1 PiB, currently. */
 /* DRBD_MAX_SECTORS */
-#define DRBD_DISK_SIZE_SECT_MIN  0
-#define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40))
-#define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_MIN  0
+#define DRBD_DISK_SIZE_MAX  (1 * (2LLU << 40))
+#define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_SCALE 's'  /* sectors */
 
-#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
+#define DRBD_ON_IO_ERROR_DEF EP_DETACH
 #define DRBD_FENCING_DEF FP_DONT_CARE
 #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
@@ -136,38 +161,59 @@
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
 #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
 #define DRBD_ON_CONGESTION_DEF OC_BLOCK
+#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
 
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
 #define DRBD_MAX_BIO_BVECS_DEF 0
+#define DRBD_MAX_BIO_BVECS_SCALE '1'
 
 #define DRBD_C_PLAN_AHEAD_MIN  0
 #define DRBD_C_PLAN_AHEAD_MAX  300
-#define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */
+#define DRBD_C_PLAN_AHEAD_DEF  20
+#define DRBD_C_PLAN_AHEAD_SCALE '1'
 
 #define DRBD_C_DELAY_TARGET_MIN 1
 #define DRBD_C_DELAY_TARGET_MAX 100
 #define DRBD_C_DELAY_TARGET_DEF 10
+#define DRBD_C_DELAY_TARGET_SCALE '1'
 
 #define DRBD_C_FILL_TARGET_MIN 0
 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
-#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
+#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
+#define DRBD_C_FILL_TARGET_SCALE 's'  /* sectors */
 
-#define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
+#define DRBD_C_MAX_RATE_MIN     250
 #define DRBD_C_MAX_RATE_MAX     (4 << 20)
 #define DRBD_C_MAX_RATE_DEF     102400
+#define DRBD_C_MAX_RATE_SCALE	'k'  /* kilobytes */
 
-#define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
+#define DRBD_C_MIN_RATE_MIN     0
 #define DRBD_C_MIN_RATE_MAX     (4 << 20)
-#define DRBD_C_MIN_RATE_DEF     4096
+#define DRBD_C_MIN_RATE_DEF     250
+#define DRBD_C_MIN_RATE_SCALE	'k'  /* kilobytes */
 
 #define DRBD_CONG_FILL_MIN	0
 #define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
 #define DRBD_CONG_FILL_DEF	0
+#define DRBD_CONG_FILL_SCALE	's'  /* sectors */
 
 #define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN
 #define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
 #define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
+#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
 
-#undef RANGE
+#define DRBD_PROTOCOL_DEF DRBD_PROT_C
+
+#define DRBD_DISK_BARRIER_DEF	0
+#define DRBD_DISK_FLUSHES_DEF	1
+#define DRBD_DISK_DRAIN_DEF	1
+#define DRBD_MD_FLUSHES_DEF	1
+#define DRBD_TCP_CORK_DEF	1
+#define DRBD_AL_UPDATES_DEF     1
+
+#define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
+#define DRBD_ALWAYS_ASBP_DEF	0
+#define DRBD_USE_RLE_DEF	1
+
 #endif

diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
deleted file mode 100644
index a8706f0..0000000
--- a/include/linux/drbd_nl.h
+++ /dev/null

@@ -1,163 +0,0 @@
-/*
-   PAKET( name,
-	  TYPE ( pn, pr, member )
-	  ...
-   )
-
-   You may never reissue one of the pn arguments
-*/
-
-#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
-#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
-#endif
-
-NL_PACKET(primary, 1,
-       NL_BIT(		1,	T_MAY_IGNORE,	primary_force)
-)
-
-NL_PACKET(secondary, 2, )
-
-NL_PACKET(disk_conf, 3,
-	NL_INT64(	2,	T_MAY_IGNORE,	disk_size)
-	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128)
-	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128)
-	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx)
-	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error)
-	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing)
-	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv)
-	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush)
-	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush)
-	  /*  55 max_bio_size was available in 8.2.6rc2 */
-	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)
-	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)
-	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain)
-	NL_INTEGER(	89,	T_MAY_IGNORE,	disk_timeout)
-)
-
-NL_PACKET(detach, 4,
-	NL_BIT(		88,	T_MANDATORY,	detach_force)
-)
-
-NL_PACKET(net_conf, 5,
-	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)
-	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128)
-	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX)
-	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX)
-	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout)
-	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol)
-	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int)
-	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int)
-	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size)
-	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers)
-	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark)
-	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size)
-	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count)
-	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p)
-	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p)
-	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
-	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
-	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
-	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
-	NL_INTEGER(	81,	T_MAY_IGNORE,	on_congestion)
-	NL_INTEGER(	82,	T_MAY_IGNORE,	cong_fill)
-	NL_INTEGER(	83,	T_MAY_IGNORE,	cong_extents)
-	  /* 59 addr_family was available in GIT, never released */
-	NL_BIT(		60,	T_MANDATORY,	mind_af)
-	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
-	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries)
-	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp)
-	NL_BIT(		61,	T_MAY_IGNORE,	no_cork)
-	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size)
-	NL_BIT(		70,	T_MANDATORY,	dry_run)
-)
-
-NL_PACKET(disconnect, 6,
-	NL_BIT(		84,	T_MAY_IGNORE,	force)
-)
-
-NL_PACKET(resize, 7,
-	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
-	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
-	NL_BIT(			69,	T_MANDATORY,	no_resync)
-)
-
-NL_PACKET(syncer_conf, 8,
-	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
-	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
-	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
-/*	NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume)
- *	NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval)
- *	NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th)
- *	NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th)
- * feature will be reimplemented differently with 8.3.9 */
-	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
-	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
-	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
-	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
-	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
-	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead)
-	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
-	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
-	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
-	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate)
-)
-
-NL_PACKET(invalidate, 9, )
-NL_PACKET(invalidate_peer, 10, )
-NL_PACKET(pause_sync, 11, )
-NL_PACKET(resume_sync, 12, )
-NL_PACKET(suspend_io, 13, )
-NL_PACKET(resume_io, 14, )
-NL_PACKET(outdate, 15, )
-NL_PACKET(get_config, 16, )
-NL_PACKET(get_state, 17,
-	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i)
-)
-
-NL_PACKET(get_uuids, 18,
-	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64)))
-	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags)
-)
-
-NL_PACKET(get_timeout_flag, 19,
-	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded)
-)
-
-NL_PACKET(call_helper, 20,
-	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32)
-)
-
-/* Tag nr 42 already allocated in drbd-8.1 development. */
-
-NL_PACKET(sync_progress, 23,
-	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress)
-)
-
-NL_PACKET(dump_ee, 24,
-	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32)
-	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX)
-	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX)
-	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector)
-	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id)
-	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10)
-)
-
-NL_PACKET(start_ov, 25,
-	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
-)
-
-NL_PACKET(new_c_uuid, 26,
-       NL_BIT(		63,	T_MANDATORY,	clear_bm)
-)
-
-#ifdef NL_RESPONSE
-NL_RESPONSE(return_code_only, 27)
-#endif
-
-#undef NL_PACKET
-#undef NL_INTEGER
-#undef NL_INT64
-#undef NL_BIT
-#undef NL_STRING
-#undef NL_RESPONSE

diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
deleted file mode 100644
index 82de1f9..0000000
--- a/include/linux/drbd_tag_magic.h
+++ /dev/null

@@ -1,84 +0,0 @@
-#ifndef DRBD_TAG_MAGIC_H
-#define DRBD_TAG_MAGIC_H
-
-#define TT_END     0
-#define TT_REMOVED 0xE000
-
-/* declare packet_type enums */
-enum packet_types {
-#define NL_PACKET(name, number, fields) P_ ## name = number,
-#define NL_RESPONSE(name, number) P_ ## name = number,
-#define NL_INTEGER(pn, pr, member)
-#define NL_INT64(pn, pr, member)
-#define NL_BIT(pn, pr, member)
-#define NL_STRING(pn, pr, member, len)
-#include <linux/drbd_nl.h>
-	P_nl_after_last_packet,
-};
-
-/* These struct are used to deduce the size of the tag lists: */
-#define NL_PACKET(name, number, fields)	\
-	struct name ## _tag_len_struct { fields };
-#define NL_INTEGER(pn, pr, member)		\
-	int member; int tag_and_len ## member;
-#define NL_INT64(pn, pr, member)		\
-	__u64 member; int tag_and_len ## member;
-#define NL_BIT(pn, pr, member)		\
-	unsigned char member:1; int tag_and_len ## member;
-#define NL_STRING(pn, pr, member, len)	\
-	unsigned char member[len]; int member ## _len; \
-	int tag_and_len ## member;
-#include <linux/drbd_nl.h>
-
-/* declare tag-list-sizes */
-static const int tag_list_sizes[] = {
-#define NL_PACKET(name, number, fields) 2 fields ,
-#define NL_INTEGER(pn, pr, member)      + 4 + 4
-#define NL_INT64(pn, pr, member)        + 4 + 8
-#define NL_BIT(pn, pr, member)          + 4 + 1
-#define NL_STRING(pn, pr, member, len)  + 4 + (len)
-#include <linux/drbd_nl.h>
-};
-
-/* The two highest bits are used for the tag type */
-#define TT_MASK      0xC000
-#define TT_INTEGER   0x0000
-#define TT_INT64     0x4000
-#define TT_BIT       0x8000
-#define TT_STRING    0xC000
-/* The next bit indicates if processing of the tag is mandatory */
-#define T_MANDATORY  0x2000
-#define T_MAY_IGNORE 0x0000
-#define TN_MASK      0x1fff
-/* The remaining 13 bits are used to enumerate the tags */
-
-#define tag_type(T)   ((T) & TT_MASK)
-#define tag_number(T) ((T) & TN_MASK)
-
-/* declare tag enums */
-#define NL_PACKET(name, number, fields) fields
-enum drbd_tags {
-#define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr ,
-#define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
-#define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
-#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
-#include <linux/drbd_nl.h>
-};
-
-struct tag {
-	const char *name;
-	int type_n_flags;
-	int max_len;
-};
-
-/* declare tag names */
-#define NL_PACKET(name, number, fields) fields
-static const struct tag tag_descriptions[] = {
-#define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   },
-#define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
-#define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
-#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
-#include <linux/drbd_nl.h>
-};
-
-#endif

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 02a6941..8b84916 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h

@@ -587,6 +587,8 @@
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
+extern unsigned long efi_get_time(void);
+extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 12291a7..5b9b5b3 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h

@@ -83,6 +83,11 @@
 	 * 64 bit parent inode number.
 	 */
 	FILEID_NILFS_WITH_PARENT = 0x62,
+
+	/*
+	 * Filesystems must not use 0xff file ID.
+	 */
+	FILEID_INVALID = 0xff,
 };
 
 struct fid {
@@ -177,6 +182,8 @@
 	int (*commit_metadata)(struct inode *inode);
 };
 
+extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
+				    int *max_len, struct inode *parent);
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
 	int *max_len, int connectable);
 extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 0000000..f9a12f6
--- /dev/null
+++ b/include/linux/f2fs_fs.h

@@ -0,0 +1,413 @@
+/**
+ * include/linux/f2fs_fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_FS_H
+#define _LINUX_F2FS_FS_H
+
+#include <linux/pagemap.h>
+#include <linux/types.h>
+
+#define F2FS_SUPER_OFFSET		1024	/* byte-size offset */
+#define F2FS_LOG_SECTOR_SIZE		9	/* 9 bits for 512 byte */
+#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
+#define F2FS_BLKSIZE			4096	/* support only 4KB block */
+#define F2FS_MAX_EXTENSION		64	/* # of extension entries */
+
+#define NULL_ADDR		0x0U
+#define NEW_ADDR		-1U
+
+#define F2FS_ROOT_INO(sbi)	(sbi->root_ino_num)
+#define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
+#define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
+
+/* This flag is used by node and meta inodes, and by recovery */
+#define GFP_F2FS_ZERO	(GFP_NOFS | __GFP_ZERO)
+
+/*
+ * For further optimization on multi-head logs, on-disk layout supports maximum
+ * 16 logs by default. The number, 16, is expected to cover all the cases
+ * enoughly. The implementaion currently uses no more than 6 logs.
+ * Half the logs are used for nodes, and the other half are used for data.
+ */
+#define MAX_ACTIVE_LOGS	16
+#define MAX_ACTIVE_NODE_LOGS	8
+#define MAX_ACTIVE_DATA_LOGS	8
+
+/*
+ * For superblock
+ */
+struct f2fs_super_block {
+	__le32 magic;			/* Magic Number */
+	__le16 major_ver;		/* Major Version */
+	__le16 minor_ver;		/* Minor Version */
+	__le32 log_sectorsize;		/* log2 sector size in bytes */
+	__le32 log_sectors_per_block;	/* log2 # of sectors per block */
+	__le32 log_blocksize;		/* log2 block size in bytes */
+	__le32 log_blocks_per_seg;	/* log2 # of blocks per segment */
+	__le32 segs_per_sec;		/* # of segments per section */
+	__le32 secs_per_zone;		/* # of sections per zone */
+	__le32 checksum_offset;		/* checksum offset inside super block */
+	__le64 block_count;		/* total # of user blocks */
+	__le32 section_count;		/* total # of sections */
+	__le32 segment_count;		/* total # of segments */
+	__le32 segment_count_ckpt;	/* # of segments for checkpoint */
+	__le32 segment_count_sit;	/* # of segments for SIT */
+	__le32 segment_count_nat;	/* # of segments for NAT */
+	__le32 segment_count_ssa;	/* # of segments for SSA */
+	__le32 segment_count_main;	/* # of segments for main area */
+	__le32 segment0_blkaddr;	/* start block address of segment 0 */
+	__le32 cp_blkaddr;		/* start block address of checkpoint */
+	__le32 sit_blkaddr;		/* start block address of SIT */
+	__le32 nat_blkaddr;		/* start block address of NAT */
+	__le32 ssa_blkaddr;		/* start block address of SSA */
+	__le32 main_blkaddr;		/* start block address of main area */
+	__le32 root_ino;		/* root inode number */
+	__le32 node_ino;		/* node inode number */
+	__le32 meta_ino;		/* meta inode number */
+	__u8 uuid[16];			/* 128-bit uuid for volume */
+	__le16 volume_name[512];	/* volume name */
+	__le32 extension_count;		/* # of extensions below */
+	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
+} __packed;
+
+/*
+ * For checkpoint
+ */
+#define CP_ERROR_FLAG		0x00000008
+#define CP_COMPACT_SUM_FLAG	0x00000004
+#define CP_ORPHAN_PRESENT_FLAG	0x00000002
+#define CP_UMOUNT_FLAG		0x00000001
+
+struct f2fs_checkpoint {
+	__le64 checkpoint_ver;		/* checkpoint block version number */
+	__le64 user_block_count;	/* # of user blocks */
+	__le64 valid_block_count;	/* # of valid blocks in main area */
+	__le32 rsvd_segment_count;	/* # of reserved segments for gc */
+	__le32 overprov_segment_count;	/* # of overprovision segments */
+	__le32 free_segment_count;	/* # of free segments in main area */
+
+	/* information of current node segments */
+	__le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS];
+	__le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS];
+	/* information of current data segments */
+	__le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS];
+	__le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS];
+	__le32 ckpt_flags;		/* Flags : umount and journal_present */
+	__le32 cp_pack_total_block_count;	/* total # of one cp pack */
+	__le32 cp_pack_start_sum;	/* start block number of data summary */
+	__le32 valid_node_count;	/* Total number of valid nodes */
+	__le32 valid_inode_count;	/* Total number of valid inodes */
+	__le32 next_free_nid;		/* Next free node number */
+	__le32 sit_ver_bitmap_bytesize;	/* Default value 64 */
+	__le32 nat_ver_bitmap_bytesize; /* Default value 256 */
+	__le32 checksum_offset;		/* checksum offset inside cp block */
+	__le64 elapsed_time;		/* mounted time */
+	/* allocation type of current segment */
+	unsigned char alloc_type[MAX_ACTIVE_LOGS];
+
+	/* SIT and NAT version bitmap */
+	unsigned char sit_nat_version_bitmap[1];
+} __packed;
+
+/*
+ * For orphan inode management
+ */
+#define F2FS_ORPHANS_PER_BLOCK	1020
+
+struct f2fs_orphan_block {
+	__le32 ino[F2FS_ORPHANS_PER_BLOCK];	/* inode numbers */
+	__le32 reserved;	/* reserved */
+	__le16 blk_addr;	/* block index in current CP */
+	__le16 blk_count;	/* Number of orphan inode blocks in CP */
+	__le32 entry_count;	/* Total number of orphan nodes in current CP */
+	__le32 check_sum;	/* CRC32 for orphan inode block */
+} __packed;
+
+/*
+ * For NODE structure
+ */
+struct f2fs_extent {
+	__le32 fofs;		/* start file offset of the extent */
+	__le32 blk_addr;	/* start block address of the extent */
+	__le32 len;		/* lengh of the extent */
+} __packed;
+
+#define F2FS_MAX_NAME_LEN	256
+#define ADDRS_PER_INODE         923	/* Address Pointers in an Inode */
+#define ADDRS_PER_BLOCK         1018	/* Address Pointers in a Direct Block */
+#define NIDS_PER_BLOCK          1018	/* Node IDs in an Indirect Block */
+
+struct f2fs_inode {
+	__le16 i_mode;			/* file mode */
+	__u8 i_advise;			/* file hints */
+	__u8 i_reserved;		/* reserved */
+	__le32 i_uid;			/* user ID */
+	__le32 i_gid;			/* group ID */
+	__le32 i_links;			/* links count */
+	__le64 i_size;			/* file size in bytes */
+	__le64 i_blocks;		/* file size in blocks */
+	__le64 i_atime;			/* access time */
+	__le64 i_ctime;			/* change time */
+	__le64 i_mtime;			/* modification time */
+	__le32 i_atime_nsec;		/* access time in nano scale */
+	__le32 i_ctime_nsec;		/* change time in nano scale */
+	__le32 i_mtime_nsec;		/* modification time in nano scale */
+	__le32 i_generation;		/* file version (for NFS) */
+	__le32 i_current_depth;		/* only for directory depth */
+	__le32 i_xattr_nid;		/* nid to save xattr */
+	__le32 i_flags;			/* file attributes */
+	__le32 i_pino;			/* parent inode number */
+	__le32 i_namelen;		/* file name length */
+	__u8 i_name[F2FS_MAX_NAME_LEN];	/* file name for SPOR */
+
+	struct f2fs_extent i_ext;	/* caching a largest extent */
+
+	__le32 i_addr[ADDRS_PER_INODE];	/* Pointers to data blocks */
+
+	__le32 i_nid[5];		/* direct(2), indirect(2),
+						double_indirect(1) node id */
+} __packed;
+
+struct direct_node {
+	__le32 addr[ADDRS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+struct indirect_node {
+	__le32 nid[NIDS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+enum {
+	COLD_BIT_SHIFT = 0,
+	FSYNC_BIT_SHIFT,
+	DENT_BIT_SHIFT,
+	OFFSET_BIT_SHIFT
+};
+
+struct node_footer {
+	__le32 nid;		/* node id */
+	__le32 ino;		/* inode nunmber */
+	__le32 flag;		/* include cold/fsync/dentry marks and offset */
+	__le64 cp_ver;		/* checkpoint version */
+	__le32 next_blkaddr;	/* next node page block address */
+} __packed;
+
+struct f2fs_node {
+	/* can be one of three types: inode, direct, and indirect types */
+	union {
+		struct f2fs_inode i;
+		struct direct_node dn;
+		struct indirect_node in;
+	};
+	struct node_footer footer;
+} __packed;
+
+/*
+ * For NAT entries
+ */
+#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+
+struct f2fs_nat_entry {
+	__u8 version;		/* latest version of cached nat entry */
+	__le32 ino;		/* inode number */
+	__le32 block_addr;	/* block address */
+} __packed;
+
+struct f2fs_nat_block {
+	struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For SIT entries
+ *
+ * Each segment is 2MB in size by default so that a bitmap for validity of
+ * there-in blocks should occupy 64 bytes, 512 bits.
+ * Not allow to change this.
+ */
+#define SIT_VBLOCK_MAP_SIZE 64
+#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+
+/*
+ * Note that f2fs_sit_entry->vblocks has the following bit-field information.
+ * [15:10] : allocation type such as CURSEG_XXXX_TYPE
+ * [9:0] : valid block count
+ */
+#define SIT_VBLOCKS_SHIFT	10
+#define SIT_VBLOCKS_MASK	((1 << SIT_VBLOCKS_SHIFT) - 1)
+#define GET_SIT_VBLOCKS(raw_sit)				\
+	(le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK)
+#define GET_SIT_TYPE(raw_sit)					\
+	((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK)	\
+	 >> SIT_VBLOCKS_SHIFT)
+
+struct f2fs_sit_entry {
+	__le16 vblocks;				/* reference above */
+	__u8 valid_map[SIT_VBLOCK_MAP_SIZE];	/* bitmap for valid blocks */
+	__le64 mtime;				/* segment age for cleaning */
+} __packed;
+
+struct f2fs_sit_block {
+	struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For segment summary
+ *
+ * One summary block contains exactly 512 summary entries, which represents
+ * exactly 2MB segment by default. Not allow to change the basic units.
+ *
+ * NOTE: For initializing fields, you must use set_summary
+ *
+ * - If data page, nid represents dnode's nid
+ * - If node page, nid represents the node page's nid.
+ *
+ * The ofs_in_node is used by only data page. It represents offset
+ * from node's page's beginning to get a data block address.
+ * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
+ */
+#define ENTRIES_IN_SUM		512
+#define	SUMMARY_SIZE		(7)	/* sizeof(struct summary) */
+#define	SUM_FOOTER_SIZE		(5)	/* sizeof(struct summary_footer) */
+#define SUM_ENTRY_SIZE		(SUMMARY_SIZE * ENTRIES_IN_SUM)
+
+/* a summary entry for a 4KB-sized block in a segment */
+struct f2fs_summary {
+	__le32 nid;		/* parent node id */
+	union {
+		__u8 reserved[3];
+		struct {
+			__u8 version;		/* node version number */
+			__le16 ofs_in_node;	/* block index in parent node */
+		} __packed;
+	};
+} __packed;
+
+/* summary block type, node or data, is stored to the summary_footer */
+#define SUM_TYPE_NODE		(1)
+#define SUM_TYPE_DATA		(0)
+
+struct summary_footer {
+	unsigned char entry_type;	/* SUM_TYPE_XXX */
+	__u32 check_sum;		/* summary checksum */
+} __packed;
+
+#define SUM_JOURNAL_SIZE	(F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
+				SUM_ENTRY_SIZE)
+#define NAT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct nat_journal_entry))
+#define NAT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct nat_journal_entry))
+#define SIT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct sit_journal_entry))
+#define SIT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct sit_journal_entry))
+/*
+ * frequently updated NAT/SIT entries can be stored in the spare area in
+ * summary blocks
+ */
+enum {
+	NAT_JOURNAL = 0,
+	SIT_JOURNAL
+};
+
+struct nat_journal_entry {
+	__le32 nid;
+	struct f2fs_nat_entry ne;
+} __packed;
+
+struct nat_journal {
+	struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
+	__u8 reserved[NAT_JOURNAL_RESERVED];
+} __packed;
+
+struct sit_journal_entry {
+	__le32 segno;
+	struct f2fs_sit_entry se;
+} __packed;
+
+struct sit_journal {
+	struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
+	__u8 reserved[SIT_JOURNAL_RESERVED];
+} __packed;
+
+/* 4KB-sized summary block structure */
+struct f2fs_summary_block {
+	struct f2fs_summary entries[ENTRIES_IN_SUM];
+	union {
+		__le16 n_nats;
+		__le16 n_sits;
+	};
+	/* spare area is used by NAT or SIT journals */
+	union {
+		struct nat_journal nat_j;
+		struct sit_journal sit_j;
+	};
+	struct summary_footer footer;
+} __packed;
+
+/*
+ * For directory operations
+ */
+#define F2FS_DOT_HASH		0
+#define F2FS_DDOT_HASH		F2FS_DOT_HASH
+#define F2FS_MAX_HASH		(~((0x3ULL) << 62))
+#define F2FS_HASH_COL_BIT	((0x1ULL) << 63)
+
+typedef __le32	f2fs_hash_t;
+
+/* One directory entry slot covers 8bytes-long file name */
+#define F2FS_NAME_LEN		8
+#define F2FS_NAME_LEN_BITS	3
+
+#define GET_DENTRY_SLOTS(x)	((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS)
+
+/* the number of dentry in a block */
+#define NR_DENTRY_IN_BLOCK	214
+
+/* MAX level for dir lookup */
+#define MAX_DIR_HASH_DEPTH	63
+
+#define SIZE_OF_DIR_ENTRY	11	/* by byte */
+#define SIZE_OF_DENTRY_BITMAP	((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
+					BITS_PER_BYTE)
+#define SIZE_OF_RESERVED	(PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
+				F2FS_NAME_LEN) * \
+				NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
+
+/* One directory entry slot representing F2FS_NAME_LEN-sized file name */
+struct f2fs_dir_entry {
+	__le32 hash_code;	/* hash code of file name */
+	__le32 ino;		/* inode number */
+	__le16 name_len;	/* lengh of file name */
+	__u8 file_type;		/* file type */
+} __packed;
+
+/* 4KB-sized directory entry block */
+struct f2fs_dentry_block {
+	/* validity bitmap for directory entries in each block */
+	__u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];
+	__u8 reserved[SIZE_OF_RESERVED];
+	struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK];
+	__u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN];
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+	F2FS_FT_UNKNOWN,
+	F2FS_FT_REG_FILE,
+	F2FS_FT_DIR,
+	F2FS_FT_CHRDEV,
+	F2FS_FT_BLKDEV,
+	F2FS_FT_FIFO,
+	F2FS_FT_SOCK,
+	F2FS_FT_SYMLINK,
+	F2FS_FT_MAX
+};
+
+#endif  /* _LINUX_F2FS_FS_H */

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408fb1e..7617ee0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -44,6 +44,7 @@
 struct vfsmount;
 struct cred;
 struct swap_info_struct;
+struct seq_file;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -1444,10 +1445,6 @@
 
 extern bool inode_owner_or_capable(const struct inode *inode);
 
-/* not quite ready to be deprecated, but... */
-extern void lock_super(struct super_block *);
-extern void unlock_super(struct super_block *);
-
 /*
  * VFS helper functions..
  */
@@ -1543,6 +1540,7 @@
 	int (*setlease)(struct file *, long, struct file_lock **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
+	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
 
 struct inode_operations {
@@ -1563,7 +1561,6 @@
 	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
-	void (*truncate) (struct inode *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1578,8 +1575,6 @@
 			   umode_t create_mode, int *opened);
 } ____cacheline_aligned;
 
-struct seq_file;
-
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
@@ -1810,6 +1805,8 @@
 #define FS_REQUIRES_DEV		1 
 #define FS_BINARY_MOUNTDATA	2
 #define FS_HAS_SUBTYPE		4
+#define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
+#define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
 #define FS_REVAL_DOT		16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
@@ -1997,6 +1994,7 @@
 	bool			separate; /* should "name" be freed? */
 };
 
+extern long vfs_truncate(struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
 extern int do_fallocate(struct file *file, int mode, loff_t offset,
@@ -2286,9 +2284,9 @@
 #include <linux/err.h>
 
 /* needed for stackable file system support */
-extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
 
-extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
@@ -2396,11 +2394,11 @@
 
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
+extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
+extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
-		int origin, loff_t maxsize, loff_t eof);
+		int whence, loff_t maxsize, loff_t eof);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index ce31408..5dfa0aa 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h

@@ -75,6 +75,16 @@
 typedef void (*fscache_operation_release_t)(struct fscache_operation *op);
 typedef void (*fscache_operation_processor_t)(struct fscache_operation *op);
 
+enum fscache_operation_state {
+	FSCACHE_OP_ST_BLANK,		/* Op is not yet submitted */
+	FSCACHE_OP_ST_INITIALISED,	/* Op is initialised */
+	FSCACHE_OP_ST_PENDING,		/* Op is blocked from running */
+	FSCACHE_OP_ST_IN_PROGRESS,	/* Op is in progress */
+	FSCACHE_OP_ST_COMPLETE,		/* Op is complete */
+	FSCACHE_OP_ST_CANCELLED,	/* Op has been cancelled */
+	FSCACHE_OP_ST_DEAD		/* Op is now dead */
+};
+
 struct fscache_operation {
 	struct work_struct	work;		/* record for async ops */
 	struct list_head	pend_link;	/* link in object->pending_ops */
@@ -86,10 +96,10 @@
 #define FSCACHE_OP_MYTHREAD	0x0002	/* - processing is done be issuing thread, not pool */
 #define FSCACHE_OP_WAITING	4	/* cleared when op is woken */
 #define FSCACHE_OP_EXCLUSIVE	5	/* exclusive op, other ops must wait */
-#define FSCACHE_OP_DEAD		6	/* op is now dead */
-#define FSCACHE_OP_DEC_READ_CNT	7	/* decrement object->n_reads on destruction */
-#define FSCACHE_OP_KEEP_FLAGS	0xc0	/* flags to keep when repurposing an op */
+#define FSCACHE_OP_DEC_READ_CNT	6	/* decrement object->n_reads on destruction */
+#define FSCACHE_OP_KEEP_FLAGS	0x0070	/* flags to keep when repurposing an op */
 
+	enum fscache_operation_state state;
 	atomic_t		usage;
 	unsigned		debug_id;	/* debugging ID */
 
@@ -106,6 +116,7 @@
 extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
+extern void fscache_op_complete(struct fscache_operation *, bool);
 extern void fscache_put_operation(struct fscache_operation *);
 
 /**
@@ -122,6 +133,7 @@
 {
 	INIT_WORK(&op->work, fscache_op_work_func);
 	atomic_set(&op->usage, 1);
+	op->state = FSCACHE_OP_ST_INITIALISED;
 	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
 	op->processor = processor;
 	op->release = release;
@@ -138,6 +150,7 @@
 	void			*context;	/* netfs read context (pinned) */
 	struct list_head	to_do;		/* list of things to be done by the backend */
 	unsigned long		start_time;	/* time at which retrieval started */
+	unsigned		n_pages;	/* number of pages to be retrieved */
 };
 
 typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
@@ -174,8 +187,22 @@
 }
 
 /**
+ * fscache_retrieval_complete - Record (partial) completion of a retrieval
+ * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
+ */
+static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
+					      int n_pages)
+{
+	op->n_pages -= n_pages;
+	if (op->n_pages <= 0)
+		fscache_op_complete(&op->op, true);
+}
+
+/**
  * fscache_put_retrieval - Drop a reference to a retrieval operation
  * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
  *
  * Drop a reference to a retrieval operation.
  */
@@ -227,6 +254,9 @@
 	/* store the updated auxiliary data on an object */
 	void (*update_object)(struct fscache_object *object);
 
+	/* Invalidate an object */
+	void (*invalidate_object)(struct fscache_operation *op);
+
 	/* discard the resources pinned by an object and effect retirement if
 	 * necessary */
 	void (*drop_object)(struct fscache_object *object);
@@ -301,11 +331,30 @@
 #define FSCACHE_COOKIE_PENDING_FILL	3	/* T if pending initial fill on object */
 #define FSCACHE_COOKIE_FILLING		4	/* T if filling object incrementally */
 #define FSCACHE_COOKIE_UNAVAILABLE	5	/* T if cookie is unavailable (error, etc) */
+#define FSCACHE_COOKIE_WAITING_ON_READS	6	/* T if cookie is waiting on reads */
+#define FSCACHE_COOKIE_INVALIDATING	7	/* T if cookie is being invalidated */
 };
 
 extern struct fscache_cookie fscache_fsdef_index;
 
 /*
+ * Event list for fscache_object::{event_mask,events}
+ */
+enum {
+	FSCACHE_OBJECT_EV_REQUEUE,	/* T if object should be requeued */
+	FSCACHE_OBJECT_EV_UPDATE,	/* T if object should be updated */
+	FSCACHE_OBJECT_EV_INVALIDATE,	/* T if cache requested object invalidation */
+	FSCACHE_OBJECT_EV_CLEARED,	/* T if accessors all gone */
+	FSCACHE_OBJECT_EV_ERROR,	/* T if fatal error occurred during processing */
+	FSCACHE_OBJECT_EV_RELEASE,	/* T if netfs requested object release */
+	FSCACHE_OBJECT_EV_RETIRE,	/* T if netfs requested object retirement */
+	FSCACHE_OBJECT_EV_WITHDRAW,	/* T if cache requested object withdrawal */
+	NR_FSCACHE_OBJECT_EVENTS
+};
+
+#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1)
+
+/*
  * on-disk cache file or index handle
  */
 struct fscache_object {
@@ -317,6 +366,7 @@
 		/* active states */
 		FSCACHE_OBJECT_AVAILABLE,	/* cleaning up object after creation */
 		FSCACHE_OBJECT_ACTIVE,		/* object is usable */
+		FSCACHE_OBJECT_INVALIDATING,	/* object is invalidating */
 		FSCACHE_OBJECT_UPDATING,	/* object is updating */
 
 		/* terminal states */
@@ -332,10 +382,10 @@
 
 	int			debug_id;	/* debugging ID */
 	int			n_children;	/* number of child objects */
-	int			n_ops;		/* number of ops outstanding on object */
+	int			n_ops;		/* number of extant ops on object */
 	int			n_obj_ops;	/* number of object ops outstanding on object */
 	int			n_in_progress;	/* number of ops in progress */
-	int			n_exclusive;	/* number of exclusive ops queued */
+	int			n_exclusive;	/* number of exclusive ops queued or in progress */
 	atomic_t		n_reads;	/* number of read ops in progress */
 	spinlock_t		lock;		/* state and operations lock */
 
@@ -343,14 +393,6 @@
 	unsigned long		event_mask;	/* events this object is interested in */
 	unsigned long		events;		/* events to be processed by this object
 						 * (order is important - using fls) */
-#define FSCACHE_OBJECT_EV_REQUEUE	0	/* T if object should be requeued */
-#define FSCACHE_OBJECT_EV_UPDATE	1	/* T if object should be updated */
-#define FSCACHE_OBJECT_EV_CLEARED	2	/* T if accessors all gone */
-#define FSCACHE_OBJECT_EV_ERROR		3	/* T if fatal error occurred during processing */
-#define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
-#define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
-#define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
-#define FSCACHE_OBJECT_EVENTS_MASK	0x7f	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
@@ -504,6 +546,9 @@
 
 extern void fscache_io_error(struct fscache_cache *cache);
 
+extern void fscache_mark_page_cached(struct fscache_retrieval *op,
+				     struct page *page);
+
 extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
 				      struct pagevec *pagevec);
 

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9ec20de..7a08623 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h

@@ -135,14 +135,14 @@
 	 */
 	void (*put_context)(void *cookie_netfs_data, void *context);
 
-	/* indicate pages that now have cache metadata retained
-	 * - this function should mark the specified pages as now being cached
-	 * - the pages will have been marked with PG_fscache before this is
+	/* indicate page that now have cache metadata retained
+	 * - this function should mark the specified page as now being cached
+	 * - the page will have been marked with PG_fscache before this is
 	 *   called, so this is optional
 	 */
-	void (*mark_pages_cached)(void *cookie_netfs_data,
-				  struct address_space *mapping,
-				  struct pagevec *cached_pvec);
+	void (*mark_page_cached)(void *cookie_netfs_data,
+				 struct address_space *mapping,
+				 struct page *page);
 
 	/* indicate the cookie is no longer cached
 	 * - this function is called when the backing store currently caching
@@ -185,6 +185,8 @@
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, int);
 extern void __fscache_update_cookie(struct fscache_cookie *);
 extern int __fscache_attr_changed(struct fscache_cookie *);
+extern void __fscache_invalidate(struct fscache_cookie *);
+extern void __fscache_wait_on_invalidate(struct fscache_cookie *);
 extern int __fscache_read_or_alloc_page(struct fscache_cookie *,
 					struct page *,
 					fscache_rw_complete_t,
@@ -390,6 +392,42 @@
 }
 
 /**
+ * fscache_invalidate - Notify cache that an object needs invalidation
+ * @cookie: The cookie representing the cache object
+ *
+ * Notify the cache that an object is needs to be invalidated and that it
+ * should abort any retrievals or stores it is doing on the cache.  The object
+ * is then marked non-caching until such time as the invalidation is complete.
+ *
+ * This can be called with spinlocks held.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_invalidate(cookie);
+}
+
+/**
+ * fscache_wait_on_invalidate - Wait for invalidation to complete
+ * @cookie: The cookie representing the cache object
+ *
+ * Wait for the invalidation of an object to complete.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_wait_on_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_wait_on_invalidate(cookie);
+}
+
+/**
  * fscache_reserve_space - Reserve data space for a cached object
  * @cookie: The cookie representing the cache object
  * @i_size: The amount of space to be reserved

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 63d966d..d5b0910 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h

@@ -88,9 +88,10 @@
  *		if the group is interested in this event.
  * handle_event - main call for a group to handle an fs event
  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
- * freeing-mark - this means that a mark has been flagged to die when everything
- *		finishes using it.  The function is supplied with what must be a
- *		valid group and inode to use to clean up.
+ * freeing_mark - called when a mark is being destroyed for some reason.  The group
+ * 		MUST be holding a reference on each mark and that reference must be
+ * 		dropped in this function.  inotify uses this function to send
+ * 		userspace messages that marks have been removed.
  */
 struct fsnotify_ops {
 	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
@@ -141,12 +142,14 @@
 	unsigned int priority;
 
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
-	spinlock_t mark_lock;		/* protect marks_list */
+	struct mutex mark_mutex;	/* protect marks_list */
 	atomic_t num_marks;		/* 1 for each mark and 1 for not being
 					 * past the point of no return when freeing
 					 * a group */
 	struct list_head marks_list;	/* all inode marks for this group */
 
+	struct fasync_struct    *fsn_fa;    /* async notification */
+
 	/* groups can define private fields here or use the void *private */
 	union {
 		void *private;
@@ -155,7 +158,6 @@
 			spinlock_t	idr_lock;
 			struct idr      idr;
 			u32             last_wd;
-			struct fasync_struct    *fa;    /* async notification */
 			struct user_struct      *user;
 		} inotify_data;
 #endif
@@ -287,7 +289,6 @@
 		struct fsnotify_inode_mark i;
 		struct fsnotify_vfsmount_mark m;
 	};
-	struct list_head free_g_list;	/* tmp list used when freeing this mark */
 	__u32 ignored_mask;		/* events types to ignore */
 #define FSNOTIFY_MARK_FLAG_INODE		0x01
 #define FSNOTIFY_MARK_FLAG_VFSMOUNT		0x02
@@ -360,11 +361,16 @@
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
 
-/* get a reference to an existing or create a new group */
+/* create a new group */
 extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
+/* get reference to a group */
+extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
-
+/* destroy group */
+extern void fsnotify_destroy_group(struct fsnotify_group *group);
+/* fasync handler function */
+extern int fsnotify_fasync(int fd, struct file *file, int on);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
@@ -405,8 +411,13 @@
 /* attach the mark to both the group and the inode */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 			     struct inode *inode, struct vfsmount *mnt, int allow_dups);
-/* given a mark, flag it to be freed when all references are dropped */
-extern void fsnotify_destroy_mark(struct fsnotify_mark *mark);
+extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group,
+				    struct inode *inode, struct vfsmount *mnt, int allow_dups);
+/* given a group and a mark, flag mark to be freed when all references are dropped */
+extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+				  struct fsnotify_group *group);
+extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
+					 struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the inode marks */

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a52f2f4..92691d8 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h

@@ -394,7 +394,7 @@
 			    size_t cnt, loff_t *ppos);
 ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 			     size_t cnt, loff_t *ppos);
-loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
+loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence);
 int ftrace_regex_release(struct inode *inode, struct file *file);
 
 void __init
@@ -559,7 +559,7 @@
 			    size_t cnt, loff_t *ppos) { return -ENODEV; }
 static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 			     size_t cnt, loff_t *ppos) { return -ENODEV; }
-static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
 {
 	return -ENODEV;
 }

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4f440b3..79b8bba 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h

@@ -88,10 +88,14 @@
 };
 
 #define PARTITION_META_INFO_VOLNAMELTH	64
-#define PARTITION_META_INFO_UUIDLTH	16
+/*
+ * Enough for the string representation of any kind of UUID plus NULL.
+ * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
+ */
+#define PARTITION_META_INFO_UUIDLTH	37
 
 struct partition_meta_info {
-	u8 uuid[PARTITION_META_INFO_UUIDLTH];	/* always big endian */
+	char uuid[PARTITION_META_INFO_UUIDLTH];
 	u8 volname[PARTITION_META_INFO_VOLNAMELTH];
 };
 

diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
new file mode 100644
index 0000000..023bc34
--- /dev/null
+++ b/include/linux/genl_magic_func.h

@@ -0,0 +1,422 @@
+#ifndef GENL_MAGIC_FUNC_H
+#define GENL_MAGIC_FUNC_H
+
+#include <linux/genl_magic_struct.h>
+
+/*
+ * Magic: declare tla policy						{{{1
+ * Magic: declare nested policies
+ *									{{{2
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+	[tag_name] = { .type = NLA_NESTED },
+
+static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
+{ s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\
+		 __put, __is_signed)					\
+	[attr_nr] = { .type = nla_type },
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
+		__get, __put, __is_signed)				\
+	[attr_nr] = { .type = nla_type,					\
+		      .len = maxlen - (nla_type == NLA_NUL_STRING) },
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#ifndef __KERNEL__
+#ifndef pr_info
+#define pr_info(args...)	fprintf(stderr, args);
+#endif
+#endif
+
+#ifdef GENL_MAGIC_DEBUG
+static void dprint_field(const char *dir, int nla_type,
+		const char *name, void *valp)
+{
+	__u64 val = valp ? *(__u32 *)valp : 1;
+	switch (nla_type) {
+	case NLA_U8:  val = (__u8)val;
+	case NLA_U16: val = (__u16)val;
+	case NLA_U32: val = (__u32)val;
+		pr_info("%s attr %s: %d 0x%08x\n", dir,
+			name, (int)val, (unsigned)val);
+		break;
+	case NLA_U64:
+		val = *(__u64*)valp;
+		pr_info("%s attr %s: %lld 0x%08llx\n", dir,
+			name, (long long)val, (unsigned long long)val);
+		break;
+	case NLA_FLAG:
+		if (val)
+			pr_info("%s attr %s: set\n", dir, name);
+		break;
+	}
+}
+
+static void dprint_array(const char *dir, int nla_type,
+		const char *name, const char *val, unsigned len)
+{
+	switch (nla_type) {
+	case NLA_NUL_STRING:
+		if (len && val[len-1] == '\0')
+			len--;
+		pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
+		break;
+	default:
+		/* we can always show 4 byte,
+		 * thats what nlattr are aligned to. */
+		pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
+			dir, name, len, val[0], val[1], val[2], val[3]);
+	}
+}
+
+#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
+
+/* Name is a member field name of the struct s.
+ * If s is NULL (only parsing, no copy requested in *_from_attrs()),
+ * nla is supposed to point to the attribute containing the information
+ * corresponding to that struct member. */
+#define DPRINT_FIELD(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_field(dir, nla_type, #name, &s->name);	\
+		else if (nla)						\
+			dprint_field(dir, nla_type, #name,		\
+				(nla_type == NLA_FLAG) ? NULL		\
+						: nla_data(nla));	\
+	} while (0)
+
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_array(dir, nla_type, #name,		\
+					s->name, s->name ## _len);	\
+		else if (nla)						\
+			dprint_array(dir, nla_type, #name,		\
+					nla_data(nla), nla_len(nla));	\
+	} while (0)
+#else
+#define DPRINT_TLA(a, op, b) do {} while (0)
+#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
+#endif
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate struct from attribute table:
+ *									{{{2
+ */
+
+/* processing of generic netlink messages is serialized.
+ * use one static buffer for parsing of nested attributes */
+static struct nlattr *nested_attr_tb[128];
+
+#ifndef BUILD_BUG_ON
+/* Force a compilation error if condition is true */
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+/* Force a compilation error if condition is true, but also produce a
+   result (of value 0 and type size_t), so the expression can be used
+   e.g. in a structure initializer (or where-ever else comma expressions
+   aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+#endif
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+/* *_from_attrs functions are static, but potentially unused */		\
+static int __ ## s_name ## _from_attrs(struct s_name *s,		\
+		struct genl_info *info, bool exclude_invariants)	\
+{									\
+	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\
+	struct nlattr *tla = info->attrs[tag_number];			\
+	struct nlattr **ntb = nested_attr_tb;				\
+	struct nlattr *nla;						\
+	int err;							\
+	BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb));	\
+	if (!tla)							\
+		return -ENOMSG;						\
+	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
+	err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy);	\
+	if (err)							\
+		return err;						\
+									\
+	s_fields							\
+	return 0;							\
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs(struct s_name *s,			\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, false);		\
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs_for_change(struct s_name *s,		\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, true);		\
+}					__attribute__((unused))		\
+
+#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
+		nla = ntb[attr_nr];						\
+		if (nla) {						\
+			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+				pr_info("<< must not change invariant attr: %s\n", #name);	\
+				return -EEXIST;				\
+			}						\
+			assignment;					\
+		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+			/* attribute missing from payload, */		\
+			/* which was expected */			\
+		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\
+			pr_info("<< missing attr: %s\n", #name);	\
+			return -ENOMSG;					\
+		}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
+			if (s)						\
+				s->name = __get(nla);			\
+			DPRINT_FIELD("<<", nla_type, name, s, nla))
+
+/* validate_nla() already checked nla_len <= maxlen appropriately. */
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
+			if (s)						\
+				s->name ## _len =			\
+					__get(s->name, nla, maxlen);	\
+			DPRINT_ARRAY("<<", nla_type, name, s, nla))
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+/*
+ * Magic: define op number to op name mapping				{{{1
+ *									{{{2
+ */
+const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
+{
+	switch (cmd) {
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+	case op_num: return #op_name;
+#include GENL_MAGIC_INCLUDE_FILE
+	default:
+		     return "unknown";
+	}
+}
+
+#ifdef __KERNEL__
+#include <linux/stringify.h>
+/*
+ * Magic: define genl_ops						{{{1
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+{								\
+	handler							\
+	.cmd = op_name,						\
+	.policy	= CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),	\
+},
+
+#define ZZZ_genl_ops		CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
+static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+/*
+ * Define the genl_family, multicast groups,				{{{1
+ * and provide register/unregister functions.
+ *									{{{2
+ */
+#define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
+static struct genl_family ZZZ_genl_family __read_mostly = {
+	.id = GENL_ID_GENERATE,
+	.name = __stringify(GENL_MAGIC_FAMILY),
+	.version = GENL_MAGIC_VERSION,
+#ifdef GENL_MAGIC_FAMILY_HDRSZ
+	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
+#endif
+	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+};
+
+/*
+ * Magic: define multicast groups
+ * Magic: define multicast group registration helper
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+static struct genl_multicast_group					\
+CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = {		\
+	.name = #group,							\
+};									\
+static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
+	struct sk_buff *skb, gfp_t flags)				\
+{									\
+	unsigned int group_id =						\
+		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\
+	if (!group_id)							\
+		return -EINVAL;						\
+	return genlmsg_multicast(skb, 0, group_id, flags);		\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
+{
+	int err = genl_register_family_with_ops(&ZZZ_genl_family,
+		ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
+	if (err)
+		return err;
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+	err = genl_register_mc_group(&ZZZ_genl_family,			\
+		&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group));		\
+	if (err)							\
+		goto fail;						\
+	else								\
+		pr_info("%s: mcg %s: %u\n", #group,			\
+			__stringify(GENL_MAGIC_FAMILY),			\
+			CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+	return 0;
+fail:
+	genl_unregister_family(&ZZZ_genl_family);
+	return err;
+}
+
+void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
+{
+	genl_unregister_family(&ZZZ_genl_family);
+}
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate skb from struct.
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s,	\
+		const bool exclude_sensitive)				\
+{									\
+	struct nlattr *tla = nla_nest_start(skb, tag_number);		\
+	if (!tla)							\
+		goto nla_put_failure;					\
+	DPRINT_TLA(#s_name, "-=>", #tag_name);				\
+	s_fields							\
+	nla_nest_end(skb, tla);						\
+	return 0;							\
+									\
+nla_put_failure:							\
+	if (tla)							\
+		nla_nest_cancel(skb, tla);				\
+        return -EMSGSIZE;						\
+}									\
+static inline int s_name ## _to_priv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 0);				\
+}									\
+static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 1);				\
+}
+
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
+		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
+		if (__put(skb, attr_nr, s->name))			\
+			goto nla_put_failure;				\
+	}
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
+		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
+		if (__put(skb, attr_nr, min_t(int, maxlen,		\
+			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
+						s->name))		\
+			goto nla_put_failure;				\
+	}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+
+/* Functions for initializing structs to default values.  */
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)
+#undef __u32_field_def
+#define __u32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __s32_field_def
+#define __s32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __flg_field_def
+#define __flg_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __str_field_def
+#define __str_field_def(attr_nr, attr_flag, name, maxlen)		\
+	memset(x->name, 0, sizeof(x->name));				\
+	x->name ## _len = 0;
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
+static void set_ ## s_name ## _defaults(struct s_name *x) {	\
+s_fields								\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#endif /* __KERNEL__ */
+
+/* }}}1 */
+#endif /* GENL_MAGIC_FUNC_H */
+/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */

diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
new file mode 100644
index 0000000..eecd19b
--- /dev/null
+++ b/include/linux/genl_magic_struct.h

@@ -0,0 +1,277 @@
+#ifndef GENL_MAGIC_STRUCT_H
+#define GENL_MAGIC_STRUCT_H
+
+#ifndef GENL_MAGIC_FAMILY
+# error "you need to define GENL_MAGIC_FAMILY before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_VERSION
+# error "you need to define GENL_MAGIC_VERSION before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_INCLUDE_FILE
+# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
+#endif
+
+#include <linux/genetlink.h>
+#include <linux/types.h>
+
+#define CONCAT__(a,b)	a ## b
+#define CONCAT_(a,b)	CONCAT__(a,b)
+
+extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
+extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
+
+/*
+ * Extension of genl attribute validation policies			{{{2
+ */
+
+/*
+ * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
+ * know about.  This flag can be set in nlattr->nla_type to indicate that this
+ * attribute must not be ignored.
+ *
+ * We check and remove this flag in drbd_nla_check_mandatory() before
+ * validating the attribute types and lengths via nla_parse_nested().
+ */
+#define DRBD_GENLA_F_MANDATORY (1 << 14)
+
+/*
+ * Flags specific to drbd and not visible at the netlink layer, used in
+ * <struct>_from_attrs and <struct>_to_skb:
+ *
+ * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
+ * invalid.
+ *
+ * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
+ * included in unpriviledged get requests or broadcasts.
+ *
+ * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
+ * cannot subsequently be changed.
+ */
+#define DRBD_F_REQUIRED (1 << 0)
+#define DRBD_F_SENSITIVE (1 << 1)
+#define DRBD_F_INVARIANT (1 << 2)
+
+#define __nla_type(x)	((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
+
+/*									}}}1
+ * MAGIC
+ * multi-include macro expansion magic starts here
+ */
+
+/* MAGIC helpers							{{{2 */
+
+/* possible field types */
+#define __flg_field(attr_nr, attr_flag, name) \
+	__field(attr_nr, attr_flag, name, NLA_U8, char, \
+			nla_get_u8, nla_put_u8, false)
+#define __u8_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
+			nla_get_u8, nla_put_u8, false)
+#define __u16_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
+			nla_get_u16, nla_put_u16, false)
+#define __u32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
+			nla_get_u32, nla_put_u32, false)
+#define __s32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
+			nla_get_u32, nla_put_u32, true)
+#define __u64_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
+			nla_get_u64, nla_put_u64, false)
+#define __str_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
+			nla_strlcpy, nla_put, false)
+#define __bin_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
+			nla_memcpy, nla_put, false)
+
+/* fields with default values */
+#define __flg_field_def(attr_nr, attr_flag, name, default) \
+	__flg_field(attr_nr, attr_flag, name)
+#define __u32_field_def(attr_nr, attr_flag, name, default) \
+	__u32_field(attr_nr, attr_flag, name)
+#define __s32_field_def(attr_nr, attr_flag, name, default) \
+	__s32_field(attr_nr, attr_flag, name)
+#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
+	__str_field(attr_nr, attr_flag, name, maxlen)
+
+#define GENL_op_init(args...)	args
+#define GENL_doit(handler)		\
+	.doit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+#define GENL_dumpit(handler)		\
+	.dumpit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+
+/*									}}}1
+ * Magic: define the enum symbols for genl_ops
+ * Magic: define the enum symbols for top level attributes
+ * Magic: define the enum symbols for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	op_name = op_num,
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)			\
+	op_name = op_num,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+		tag_name = tag_number,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)	\
+enum {								\
+	s_fields						\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type,	\
+		__get, __put, __is_signed)			\
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type,	\
+		maxlen, __get, __put, __is_signed)		\
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: compile time assert unique numbers for operations
+ * Magic: -"- unique numbers for top level attributes
+ * Magic: -"- unique numbers for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)	\
+	case op_name:
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	case op_name:
+
+static inline void ct_assert_unique_operations(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+		case tag_number:
+
+static inline void ct_assert_unique_top_level_attributes(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
+{									\
+	switch (0) {							\
+		s_fields						\
+			;						\
+	}								\
+}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	case attr_nr:
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	case attr_nr:
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: declare structs
+ * struct <name> {
+ *	fields
+ * };
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+struct s_name { s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	type name;
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	type name[maxlen];	\
+	__u32 name ## _len;
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+enum {									\
+	s_fields							\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		is_signed)						\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, is_signed)				\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/* }}}1 */
+#endif /* GENL_MAGIC_STRUCT_H */
+/* vim: set foldmethod=marker nofoldenable : */

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f74856e..0f615eb 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h

@@ -30,6 +30,7 @@
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
+#define ___GFP_KMEMCG		0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
@@ -89,6 +90,7 @@
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
+#define __GFP_KMEMCG	((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
 /*
@@ -365,6 +367,9 @@
 extern void free_hot_cold_page(struct page *page, int cold);
 extern void free_hot_cold_page_list(struct list_head *list, int cold);
 
+extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
+extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 092dc53..1d76f8c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h

@@ -31,7 +31,8 @@
 			 unsigned long new_addr, unsigned long old_end,
 			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-			unsigned long addr, pgprot_t newprot);
+			unsigned long addr, pgprot_t newprot,
+			int prot_numa);
 
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
@@ -111,7 +112,7 @@
 #define wait_split_huge_page(__anon_vma, __pmd)				\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
-		anon_vma_lock(__anon_vma);				\
+		anon_vma_lock_write(__anon_vma);			\
 		anon_vma_unlock(__anon_vma);				\
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
@@ -171,6 +172,10 @@
 	}
 	return page;
 }
+
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -209,6 +214,13 @@
 {
 	return 0;
 }
+
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+					unsigned long addr, pmd_t pmd, pmd_t *pmdp)
+{
+	return 0;
+}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3e7fa1a..0c80d3f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h

@@ -87,7 +87,7 @@
 				pud_t *pud, int write);
 int pmd_huge(pmd_t pmd);
 int pud_huge(pud_t pmd);
-void hugetlb_change_protection(struct vm_area_struct *vma,
+unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
 #else /* !CONFIG_HUGETLB_PAGE */
@@ -132,7 +132,11 @@
 {
 }
 
-#define hugetlb_change_protection(vma, address, end, newprot)
+static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+		unsigned long address, unsigned long end, pgprot_t newprot)
+{
+	return 0;
+}
 
 static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 			struct vm_area_struct *vma, unsigned long start,

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index d73878c..ce8217f 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h

@@ -62,7 +62,7 @@
 					 struct page *page);
 extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg);
-extern int hugetlb_cgroup_file_init(int idx) __init;
+extern void hugetlb_cgroup_file_init(void) __init;
 extern void hugetlb_cgroup_migrate(struct page *oldhpage,
 				   struct page *newhpage);
 
@@ -111,9 +111,8 @@
 	return;
 }
 
-static inline int __init hugetlb_cgroup_file_init(int idx)
+static inline void hugetlb_cgroup_file_init(void)
 {
-	return 0;
 }
 
 static inline void hugetlb_cgroup_migrate(struct page *oldhpage,

diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 92a0dc7..babe0cf 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h

@@ -20,8 +20,6 @@
 #define OMAP_I2C_FLAG_NO_FIFO			BIT(0)
 #define OMAP_I2C_FLAG_SIMPLE_CLOCK		BIT(1)
 #define OMAP_I2C_FLAG_16BIT_DATA_REG		BIT(2)
-#define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE	BIT(3)
-#define OMAP_I2C_FLAG_APPLY_ERRATA_I207	BIT(4)
 #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK	BIT(5)
 #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK	BIT(6)
 /* how the CPU address bus must be translated for I2C unit access */

diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
index beda7081..06e3089 100644
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ b/include/linux/i2c/i2c-sh_mobile.h

@@ -5,6 +5,7 @@
 
 struct i2c_sh_mobile_platform_data {
 	unsigned long bus_speed;
+	unsigned int clks_per_count;
 };
 
 #endif /* __I2C_SH_MOBILE_H__ */

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 9a5e284..1ff54b1 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h

@@ -39,52 +39,51 @@
  * address each module uses within a given i2c slave.
  */
 
-/* Slave 0 (i2c address 0x48) */
-#define TWL4030_MODULE_USB		0x00
+enum twl4030_module_ids {
+	TWL4030_MODULE_USB = 0,		/* Slave 0 (i2c address 0x48) */
+	TWL4030_MODULE_AUDIO_VOICE,	/* Slave 1 (i2c address 0x49) */
+	TWL4030_MODULE_GPIO,
+	TWL4030_MODULE_INTBR,
+	TWL4030_MODULE_PIH,
 
-/* Slave 1 (i2c address 0x49) */
-#define TWL4030_MODULE_AUDIO_VOICE	0x01
-#define TWL4030_MODULE_GPIO		0x02
-#define TWL4030_MODULE_INTBR		0x03
-#define TWL4030_MODULE_PIH		0x04
-#define TWL4030_MODULE_TEST		0x05
+	TWL4030_MODULE_TEST,
+	TWL4030_MODULE_KEYPAD,		/* Slave 2 (i2c address 0x4a) */
+	TWL4030_MODULE_MADC,
+	TWL4030_MODULE_INTERRUPTS,
+	TWL4030_MODULE_LED,
 
-/* Slave 2 (i2c address 0x4a) */
-#define TWL4030_MODULE_KEYPAD		0x06
-#define TWL4030_MODULE_MADC		0x07
-#define TWL4030_MODULE_INTERRUPTS	0x08
-#define TWL4030_MODULE_LED		0x09
-#define TWL4030_MODULE_MAIN_CHARGE	0x0A
-#define TWL4030_MODULE_PRECHARGE	0x0B
-#define TWL4030_MODULE_PWM0		0x0C
-#define TWL4030_MODULE_PWM1		0x0D
-#define TWL4030_MODULE_PWMA		0x0E
-#define TWL4030_MODULE_PWMB		0x0F
+	TWL4030_MODULE_MAIN_CHARGE,
+	TWL4030_MODULE_PRECHARGE,
+	TWL4030_MODULE_PWM0,
+	TWL4030_MODULE_PWM1,
+	TWL4030_MODULE_PWMA,
 
-#define TWL5031_MODULE_ACCESSORY	0x10
-#define TWL5031_MODULE_INTERRUPTS	0x11
+	TWL4030_MODULE_PWMB,
+	TWL5031_MODULE_ACCESSORY,
+	TWL5031_MODULE_INTERRUPTS,
+	TWL4030_MODULE_BACKUP,		/* Slave 3 (i2c address 0x4b) */
+	TWL4030_MODULE_INT,
 
-/* Slave 3 (i2c address 0x4b) */
-#define TWL4030_MODULE_BACKUP		0x12
-#define TWL4030_MODULE_INT		0x13
-#define TWL4030_MODULE_PM_MASTER	0x14
-#define TWL4030_MODULE_PM_RECEIVER	0x15
-#define TWL4030_MODULE_RTC		0x16
-#define TWL4030_MODULE_SECURED_REG	0x17
+	TWL4030_MODULE_PM_MASTER,
+	TWL4030_MODULE_PM_RECEIVER,
+	TWL4030_MODULE_RTC,
+	TWL4030_MODULE_SECURED_REG,
+	TWL4030_MODULE_LAST,
+};
 
+/* Similar functionalities implemented in TWL4030/6030 */
 #define TWL_MODULE_USB		TWL4030_MODULE_USB
-#define TWL_MODULE_AUDIO_VOICE	TWL4030_MODULE_AUDIO_VOICE
 #define TWL_MODULE_PIH		TWL4030_MODULE_PIH
-#define TWL_MODULE_MADC		TWL4030_MODULE_MADC
 #define TWL_MODULE_MAIN_CHARGE	TWL4030_MODULE_MAIN_CHARGE
 #define TWL_MODULE_PM_MASTER	TWL4030_MODULE_PM_MASTER
 #define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
 #define TWL_MODULE_RTC		TWL4030_MODULE_RTC
 #define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
+#define TWL_MODULE_LED		TWL4030_MODULE_LED
 
-#define TWL6030_MODULE_ID0	0x0D
-#define TWL6030_MODULE_ID1	0x0E
-#define TWL6030_MODULE_ID2	0x0F
+#define TWL6030_MODULE_ID0	13
+#define TWL6030_MODULE_ID1	14
+#define TWL6030_MODULE_ID2	15
 
 #define GPIO_INTR_OFFSET	0
 #define KEYPAD_INTR_OFFSET	1

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 87259a4..de7e190 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h

@@ -152,4 +152,15 @@
 
 void __init idr_init_cache(void);
 
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ */
+#define idr_for_each_entry(idp, entry, id)				\
+	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
+	     entry != NULL;                                             \
+	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+
 #endif /* __IDR_H__ */

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 2c7223d..86c361e 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h

@@ -18,6 +18,7 @@
 extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
+extern int ima_module_check(struct file *file);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -40,6 +41,11 @@
 	return 0;
 }
 
+static inline int ima_module_check(struct file *file)
+{
+	return 0;
+}
+
 #endif /* CONFIG_IMA_H */
 
 #ifdef CONFIG_IMA_APPRAISE

diff --git a/include/linux/init.h b/include/linux/init.h
index f63692d..a799273 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h

@@ -182,16 +182,16 @@
  * can point at the same handler without causing duplicate-symbol build errors.
  */
 
-#define __define_initcall(level,fn,id) \
+#define __define_initcall(fn, id) \
 	static initcall_t __initcall_##fn##id __used \
-	__attribute__((__section__(".initcall" level ".init"))) = fn
+	__attribute__((__section__(".initcall" #id ".init"))) = fn
 
 /*
  * Early initcalls run before initializing SMP.
  *
  * Only for built-in code, not modules.
  */
-#define early_initcall(fn)		__define_initcall("early",fn,early)
+#define early_initcall(fn)		__define_initcall(fn, early)
 
 /*
  * A "pure" initcall has no dependencies on anything else, and purely
@@ -200,23 +200,23 @@
  * This only exists for built-in code, not for modules.
  * Keep main.c:initcall_level_names[] in sync.
  */
-#define pure_initcall(fn)		__define_initcall("0",fn,0)
+#define pure_initcall(fn)		__define_initcall(fn, 0)
 
-#define core_initcall(fn)		__define_initcall("1",fn,1)
-#define core_initcall_sync(fn)		__define_initcall("1s",fn,1s)
-#define postcore_initcall(fn)		__define_initcall("2",fn,2)
-#define postcore_initcall_sync(fn)	__define_initcall("2s",fn,2s)
-#define arch_initcall(fn)		__define_initcall("3",fn,3)
-#define arch_initcall_sync(fn)		__define_initcall("3s",fn,3s)
-#define subsys_initcall(fn)		__define_initcall("4",fn,4)
-#define subsys_initcall_sync(fn)	__define_initcall("4s",fn,4s)
-#define fs_initcall(fn)			__define_initcall("5",fn,5)
-#define fs_initcall_sync(fn)		__define_initcall("5s",fn,5s)
-#define rootfs_initcall(fn)		__define_initcall("rootfs",fn,rootfs)
-#define device_initcall(fn)		__define_initcall("6",fn,6)
-#define device_initcall_sync(fn)	__define_initcall("6s",fn,6s)
-#define late_initcall(fn)		__define_initcall("7",fn,7)
-#define late_initcall_sync(fn)		__define_initcall("7s",fn,7s)
+#define core_initcall(fn)		__define_initcall(fn, 1)
+#define core_initcall_sync(fn)		__define_initcall(fn, 1s)
+#define postcore_initcall(fn)		__define_initcall(fn, 2)
+#define postcore_initcall_sync(fn)	__define_initcall(fn, 2s)
+#define arch_initcall(fn)		__define_initcall(fn, 3)
+#define arch_initcall_sync(fn)		__define_initcall(fn, 3s)
+#define subsys_initcall(fn)		__define_initcall(fn, 4)
+#define subsys_initcall_sync(fn)	__define_initcall(fn, 4s)
+#define fs_initcall(fn)			__define_initcall(fn, 5)
+#define fs_initcall_sync(fn)		__define_initcall(fn, 5s)
+#define rootfs_initcall(fn)		__define_initcall(fn, rootfs)
+#define device_initcall(fn)		__define_initcall(fn, 6)
+#define device_initcall_sync(fn)	__define_initcall(fn, 6s)
+#define late_initcall(fn)		__define_initcall(fn, 7)
+#define late_initcall_sync(fn)		__define_initcall(fn, 7s)
 
 #define __initcall(fn) device_initcall(fn)
 

diff --git a/include/linux/input.h b/include/linux/input.h
index cab994b..82ce323 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h

@@ -112,6 +112,11 @@
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
+ * @num_vals: number of values queued in the current frame
+ * @max_vals: maximum number of values queued in a frame
+ * @vals: array of values queued in the current frame
+ * @devres_managed: indicates that devices is managed with devres framework
+ *	and needs not be explicitly unregistered or freed.
  */
 struct input_dev {
 	const char *name;
@@ -180,6 +185,8 @@
 	unsigned int num_vals;
 	unsigned int max_vals;
 	struct input_value *vals;
+
+	bool devres_managed;
 };
 #define to_input_dev(d) container_of(d, struct input_dev, dev)
 
@@ -323,7 +330,8 @@
 	struct list_head	h_node;
 };
 
-struct input_dev *input_allocate_device(void);
+struct input_dev __must_check *input_allocate_device(void);
+struct input_dev __must_check *devm_input_allocate_device(struct device *);
 void input_free_device(struct input_dev *dev);
 
 static inline struct input_dev *input_get_device(struct input_dev *dev)

diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
index 05e0328..6230d76 100644
--- a/include/linux/input/bu21013.h
+++ b/include/linux/input/bu21013.h

@@ -9,13 +9,10 @@
 
 /**
  * struct bu21013_platform_device - Handle the platform data
- * @cs_en:	pointer to the cs enable function
- * @cs_dis:	pointer to the cs disable function
- * @irq_read_val:    pointer to read the pen irq value function
  * @touch_x_max: touch x max
  * @touch_y_max: touch y max
  * @cs_pin: chip select pin
- * @irq: irq pin
+ * @touch_pin: touch gpio pin
  * @ext_clk: external clock flag
  * @x_flip: x flip flag
  * @y_flip: y flip flag
@@ -24,13 +21,10 @@
  * This is used to handle the platform data
  */
 struct bu21013_platform_device {
-	int (*cs_en)(int reset_pin);
-	int (*cs_dis)(int reset_pin);
-	int (*irq_read_val)(void);
 	int touch_x_max;
 	int touch_y_max;
 	unsigned int cs_pin;
-	unsigned int irq;
+	unsigned int touch_pin;
 	bool ext_clk;
 	bool x_flip;
 	bool y_flip;

diff --git a/include/linux/input/ti_am335x_tsc.h b/include/linux/input/ti_am335x_tsc.h
new file mode 100644
index 0000000..49269a2
--- /dev/null
+++ b/include/linux/input/ti_am335x_tsc.h

@@ -0,0 +1,23 @@
+#ifndef __LINUX_TI_AM335X_TSC_H
+#define __LINUX_TI_AM335X_TSC_H
+
+/**
+ * struct tsc_data	Touchscreen wire configuration
+ * @wires:		Wires refer to application modes
+ *			i.e. 4/5/8 wire touchscreen support
+ *			on the platform.
+ * @x_plate_resistance:	X plate resistance.
+ * @steps_to_configure:	The sequencer supports a total of
+ *			16 programmable steps.
+ *			A step configured to read a single
+ *			co-ordinate value, can be applied
+ *			more number of times for better results.
+ */
+
+struct tsc_data {
+	int wires;
+	int x_plate_resistance;
+	int steps_to_configure;
+};
+
+#endif

diff --git a/include/linux/input/ti_tscadc.h b/include/linux/input/ti_tscadc.h
deleted file mode 100644
index b10a527..0000000
--- a/include/linux/input/ti_tscadc.h
+++ /dev/null

@@ -1,17 +0,0 @@
-#ifndef __LINUX_TI_TSCADC_H
-#define __LINUX_TI_TSCADC_H
-
-/**
- * struct tsc_data	Touchscreen wire configuration
- * @wires:		Wires refer to application modes
- *			i.e. 4/5/8 wire touchscreen support
- *			on the platform.
- * @x_plate_resistance:	X plate resistance.
- */
-
-struct tsc_data {
-	int wires;
-	int x_plate_resistance;
-};
-
-#endif

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 5499c92..fe77197 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h

@@ -67,6 +67,8 @@
 
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
+
+	unsigned int	proc_inum;
 };
 
 extern struct ipc_namespace init_ipc_ns;
@@ -133,7 +135,8 @@
 
 #if defined(CONFIG_IPC_NS)
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
-				       struct task_struct *tsk);
+	struct user_namespace *user_ns, struct ipc_namespace *ns);
+
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
 	if (ns)
@@ -144,12 +147,12 @@
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
-					      struct task_struct *tsk)
+	struct user_namespace *user_ns, struct ipc_namespace *ns)
 {
 	if (flags & CLONE_NEWIPC)
 		return ERR_PTR(-EINVAL);
 
-	return tsk->nsproxy->ipc_ns;
+	return ns;
 }
 
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3efc43f..1be23d9 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h

@@ -1096,7 +1096,6 @@
 void		 jbd2_journal_set_triggers(struct buffer_head *,
 					   struct jbd2_buffer_trigger_type *type);
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
-extern void	 jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
 extern void	 jbd2_journal_invalidatepage(journal_t *,
@@ -1303,15 +1302,21 @@
 
 extern int jbd_blocks_per_page(struct inode *inode);
 
+/* JBD uses a CRC32 checksum */
+#define JBD_MAX_CHECKSUM_SIZE 4
+
 static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
 			      const void *address, unsigned int length)
 {
 	struct {
 		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(journal->j_chksum_driver)];
+		char ctx[JBD_MAX_CHECKSUM_SIZE];
 	} desc;
 	int err;
 
+	BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
+		JBD_MAX_CHECKSUM_SIZE);
+
 	desc.shash.tfm = journal->j_chksum_driver;
 	desc.shash.flags = 0;
 	*(u32 *)desc.ctx = crc;

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d97ed58..c566927 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h

@@ -77,13 +77,15 @@
 
 /*
  * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors.
+ * to closest integer. Result is undefined for negative divisors and
+ * for negative dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) > 0 || (__x) > 0) ?		\
+	(((typeof(x))-1) > 0 ||				\
+	 ((typeof(divisor))-1) > 0 || (__x) > 0) ?	\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
@@ -220,6 +222,23 @@
 
 int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
 int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+*/
 static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
 {
 	/*
@@ -233,6 +252,22 @@
 		return _kstrtoul(s, base, res);
 }
 
+/**
+ * kstrtol - convert a string to a long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
 {
 	/*

diff --git a/include/linux/key.h b/include/linux/key.h
index 2393b1c..4dfde11 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h

@@ -265,6 +265,7 @@
 
 extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
 				 const struct cred *cred,
+				 key_perm_t perm,
 				 unsigned long flags,
 				 struct key *dest);
 

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 65af688..4972e6e 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h

@@ -111,4 +111,25 @@
 	}
 	return 0;
 }
+
+/**
+ * kref_get_unless_zero - Increment refcount for object unless it is zero.
+ * @kref: object.
+ *
+ * Return non-zero if the increment succeeded. Otherwise return 0.
+ *
+ * This function is intended to simplify locking around refcounting for
+ * objects that can be looked up from a lookup structure, and which are
+ * removed from that lookup structure in the object destructor.
+ * Operations on such objects require at least a read lock around
+ * lookup + kref_get, and a write lock around kref_put + remove from lookup
+ * structure. Furthermore, RCU implementations become extremely tricky.
+ * With a lookup followed by a kref_get_unless_zero *with return value check*
+ * locking in the kref_put path can be deferred to the actual removal from
+ * the lookup structure and RCU lookups become trivial.
+ */
+static inline int __must_check kref_get_unless_zero(struct kref *kref)
+{
+	return atomic_add_unless(&kref->refcount, 1, 0);
+}
 #endif /* _KREF_H_ */

diff --git a/include/linux/loop.h b/include/linux/loop.h
index 6492181..460b60f 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h

@@ -53,10 +53,13 @@
 
 	spinlock_t		lo_lock;
 	struct bio_list		lo_bio_list;
+	unsigned int		lo_bio_count;
 	int			lo_state;
 	struct mutex		lo_ctl_mutex;
 	struct task_struct	*lo_thread;
 	wait_queue_head_t	lo_event;
+	/* wait queue for incoming requests */
+	wait_queue_head_t	lo_req_wait;
 
 	struct request_queue	*lo_queue;
 	struct gendisk		*lo_disk;

diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index cafc7f9..4019013 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h

@@ -166,9 +166,11 @@
 	/* if we want to track a larger set of objects,
 	 * it needs to become arch independend u64 */
 	unsigned lc_number;
-
 	/* special label when on free list */
 #define LC_FREE (~0U)
+
+	/* for pending changes */
+	unsigned lc_new_number;
 };
 
 struct lru_cache {
@@ -176,6 +178,7 @@
 	struct list_head lru;
 	struct list_head free;
 	struct list_head in_use;
+	struct list_head to_be_changed;
 
 	/* the pre-created kmem cache to allocate the objects from */
 	struct kmem_cache *lc_cache;
@@ -186,7 +189,7 @@
 	size_t element_off;
 
 	/* number of elements (indices) */
-	unsigned int  nr_elements;
+	unsigned int nr_elements;
 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
 	 * lower due to allocation failures, probably. For typical use cases,
 	 * nr_elements should be a few thousand at most.
@@ -194,18 +197,19 @@
 	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
 #define LC_MAX_ACTIVE	(1<<24)
 
+	/* allow to accumulate a few (index:label) changes,
+	 * but no more than max_pending_changes */
+	unsigned int max_pending_changes;
+	/* number of elements currently on to_be_changed list */
+	unsigned int pending_changes;
+
 	/* statistics */
-	unsigned used; /* number of lelements currently on in_use list */
-	unsigned long hits, misses, starving, dirty, changed;
+	unsigned used; /* number of elements currently on in_use list */
+	unsigned long hits, misses, starving, locked, changed;
 
 	/* see below: flag-bits for lru_cache */
 	unsigned long flags;
 
-	/* when changing the label of an index element */
-	unsigned int  new_number;
-
-	/* for paranoia when changing the label of an index element */
-	struct lc_element *changing_element;
 
 	void  *lc_private;
 	const char *name;
@@ -221,10 +225,15 @@
 	/* debugging aid, to catch concurrent access early.
 	 * user needs to guarantee exclusive access by proper locking! */
 	__LC_PARANOIA,
-	/* if we need to change the set, but currently there is a changing
-	 * transaction pending, we are "dirty", and must deferr further
-	 * changing requests */
+
+	/* annotate that the set is "dirty", possibly accumulating further
+	 * changes, until a transaction is finally triggered */
 	__LC_DIRTY,
+
+	/* Locked, no further changes allowed.
+	 * Also used to serialize changing transactions. */
+	__LC_LOCKED,
+
 	/* if we need to change the set, but currently there is no free nor
 	 * unused element available, we are "starving", and must not give out
 	 * further references, to guarantee that eventually some refcnt will
@@ -236,9 +245,11 @@
 };
 #define LC_PARANOIA (1<<__LC_PARANOIA)
 #define LC_DIRTY    (1<<__LC_DIRTY)
+#define LC_LOCKED   (1<<__LC_LOCKED)
 #define LC_STARVING (1<<__LC_STARVING)
 
 extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off);
 extern void lc_reset(struct lru_cache *lc);
 extern void lc_destroy(struct lru_cache *lc);
@@ -249,7 +260,7 @@
 extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
 extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
-extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
+extern void lc_committed(struct lru_cache *lc);
 
 struct seq_file;
 extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
@@ -258,16 +269,28 @@
 				void (*detail) (struct seq_file *, struct lc_element *));
 
 /**
- * lc_try_lock - can be used to stop lc_get() from changing the tracked set
+ * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
+ * @lc: the lru cache to operate on
+ *
+ * Allows (expects) the set to be "dirty".  Note that the reference counts and
+ * order on the active and lru lists may still change.  Used to serialize
+ * changing transactions.  Returns true if we aquired the lock.
+ */
+static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
+{
+	return !test_and_set_bit(__LC_LOCKED, &lc->flags);
+}
+
+/**
+ * lc_try_lock - variant to stop lc_get() from changing the tracked set
  * @lc: the lru cache to operate on
  *
  * Note that the reference counts and order on the active and lru lists may
- * still change.  Returns true if we acquired the lock.
+ * still change.  Only works on a "clean" set.  Returns true if we aquired the
+ * lock, which means there are no pending changes, and any further attempt to
+ * change the set will not succeed until the next lc_unlock().
  */
-static inline int lc_try_lock(struct lru_cache *lc)
-{
-	return !test_and_set_bit(__LC_DIRTY, &lc->flags);
-}
+extern int lc_try_lock(struct lru_cache *lc);
 
 /**
  * lc_unlock - unlock @lc, allow lc_get() to change the set again
@@ -276,14 +299,10 @@
 static inline void lc_unlock(struct lru_cache *lc)
 {
 	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(__LC_LOCKED, &lc->flags);
 }
 
-static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
-{
-	struct lc_element *e = lc_find(lc, enr);
-	return e && e->refcnt;
-}
+extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
 
 #define lc_entry(ptr, type, member) \
 	container_of(ptr, type, member)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e98a74c..0108a56 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h

@@ -21,11 +21,14 @@
 #define _LINUX_MEMCONTROL_H
 #include <linux/cgroup.h>
 #include <linux/vm_event_item.h>
+#include <linux/hardirq.h>
+#include <linux/jump_label.h>
 
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
+struct kmem_cache;
 
 /* Stats that can be updated by kernel. */
 enum mem_cgroup_page_stat_item {
@@ -414,5 +417,211 @@
 {
 }
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
+
+#ifdef CONFIG_MEMCG_KMEM
+extern struct static_key memcg_kmem_enabled_key;
+
+extern int memcg_limited_groups_array_size;
+
+/*
+ * Helper macro to loop through all memcg-specific caches. Callers must still
+ * check if the cache is valid (it is either valid or NULL).
+ * the slab_mutex must be held when looping through those caches
+ */
+#define for_each_memcg_cache_index(_idx)	\
+	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
+
+static inline bool memcg_kmem_enabled(void)
+{
+	return static_key_false(&memcg_kmem_enabled_key);
+}
+
+/*
+ * In general, we'll do everything in our power to not incur in any overhead
+ * for non-memcg users for the kmem functions. Not even a function call, if we
+ * can avoid it.
+ *
+ * Therefore, we'll inline all those functions so that in the best case, we'll
+ * see that kmemcg is off for everybody and proceed quickly.  If it is on,
+ * we'll still do most of the flag checking inline. We check a lot of
+ * conditions, but because they are pretty simple, they are expected to be
+ * fast.
+ */
+bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
+					int order);
+void __memcg_kmem_commit_charge(struct page *page,
+				       struct mem_cgroup *memcg, int order);
+void __memcg_kmem_uncharge_pages(struct page *page, int order);
+
+int memcg_cache_id(struct mem_cgroup *memcg);
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+			 struct kmem_cache *root_cache);
+void memcg_release_cache(struct kmem_cache *cachep);
+void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
+
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
+void memcg_update_array_size(int num_groups);
+
+struct kmem_cache *
+__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
+
+/**
+ * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
+ * @gfp: the gfp allocation flags.
+ * @memcg: a pointer to the memcg this was charged against.
+ * @order: allocation order.
+ *
+ * returns true if the memcg where the current task belongs can hold this
+ * allocation.
+ *
+ * We return true automatically if this allocation is not to be accounted to
+ * any memcg.
+ */
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	if (!memcg_kmem_enabled())
+		return true;
+
+	/*
+	 * __GFP_NOFAIL allocations will move on even if charging is not
+	 * possible. Therefore we don't even try, and have this allocation
+	 * unaccounted. We could in theory charge it with
+	 * res_counter_charge_nofail, but we hope those allocations are rare,
+	 * and won't be worth the trouble.
+	 */
+	if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+		return true;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return true;
+
+	/* If the test is dying, just let it go. */
+	if (unlikely(fatal_signal_pending(current)))
+		return true;
+
+	return __memcg_kmem_newpage_charge(gfp, memcg, order);
+}
+
+/**
+ * memcg_kmem_uncharge_pages: uncharge pages from memcg
+ * @page: pointer to struct page being freed
+ * @order: allocation order.
+ *
+ * there is no need to specify memcg here, since it is embedded in page_cgroup
+ */
+static inline void
+memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+	if (memcg_kmem_enabled())
+		__memcg_kmem_uncharge_pages(page, order);
+}
+
+/**
+ * memcg_kmem_commit_charge: embeds correct memcg in a page
+ * @page: pointer to struct page recently allocated
+ * @memcg: the memcg structure we charged against
+ * @order: allocation order.
+ *
+ * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
+ * failure of the allocation. if @page is NULL, this function will revert the
+ * charges. Otherwise, it will commit the memcg given by @memcg to the
+ * corresponding page_cgroup.
+ */
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+	if (memcg_kmem_enabled() && memcg)
+		__memcg_kmem_commit_charge(page, memcg, order);
+}
+
+/**
+ * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
+ * @cachep: the original global kmem cache
+ * @gfp: allocation flags.
+ *
+ * This function assumes that the task allocating, which determines the memcg
+ * in the page allocator, belongs to the same cgroup throughout the whole
+ * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
+ * while belonging to a cgroup, and later on changes. This is considered
+ * acceptable, and should only happen upon task migration.
+ *
+ * Before the cache is created by the memcg core, there is also a possible
+ * imbalance: the task belongs to a memcg, but the cache being allocated from
+ * is the global cache, since the child cache is not yet guaranteed to be
+ * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
+ * passed and the page allocator will not attempt any cgroup accounting.
+ */
+static __always_inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	if (!memcg_kmem_enabled())
+		return cachep;
+	if (gfp & __GFP_NOFAIL)
+		return cachep;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return cachep;
+	if (unlikely(fatal_signal_pending(current)))
+		return cachep;
+
+	return __memcg_kmem_get_cache(cachep, gfp);
+}
+#else
+#define for_each_memcg_cache_index(_idx)	\
+	for (; NULL; )
+
+static inline bool memcg_kmem_enabled(void)
+{
+	return false;
+}
+
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	return true;
+}
+
+static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+}
+
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+}
+
+static inline int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return -1;
+}
+
+static inline int
+memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+		     struct kmem_cache *root_cache)
+{
+	return 0;
+}
+
+static inline void memcg_release_cache(struct kmem_cache *cachep)
+{
+}
+
+static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
+					struct kmem_cache *s)
+{
+}
+
+static inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	return cachep;
+}
+
+static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index dbd2127..9adc270 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h

@@ -188,6 +188,8 @@
 	return 1;
 }
 
+extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+
 #else
 
 struct mempolicy {};
@@ -307,5 +309,11 @@
 	return 0;
 }
 
+static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+				 unsigned long address)
+{
+	return -1; /* no node preference */
+}
+
 #endif /* CONFIG_NUMA */
 #endif

diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index ba26e99..1f6fe31 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h

@@ -981,6 +981,7 @@
 #define ARIZONA_DSP1_CLOCKING_1                  0x1101
 #define ARIZONA_DSP1_STATUS_1                    0x1104
 #define ARIZONA_DSP1_STATUS_2                    0x1105
+#define ARIZONA_DSP1_STATUS_3                    0x1106
 #define ARIZONA_DSP2_CONTROL_1                   0x1200
 #define ARIZONA_DSP2_CLOCKING_1                  0x1201
 #define ARIZONA_DSP2_STATUS_1                    0x1204

diff --git a/include/linux/mfd/as3711.h b/include/linux/mfd/as3711.h
new file mode 100644
index 0000000..38452ce
--- /dev/null
+++ b/include/linux/mfd/as3711.h

@@ -0,0 +1,126 @@
+/*
+ * AS3711 PMIC MFC driver header
+ *
+ * Copyright (C) 2012 Renesas Electronics Corporation
+ * Author: Guennadi Liakhovetski, <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License as
+ * published by the Free Software Foundation
+ */
+
+#ifndef MFD_AS3711_H
+#define MFD_AS3711_H
+
+/*
+ * Client data
+ */
+
+/* Register addresses */
+#define AS3711_SD_1_VOLTAGE		0	/* Digital Step-Down */
+#define AS3711_SD_2_VOLTAGE		1
+#define AS3711_SD_3_VOLTAGE		2
+#define AS3711_SD_4_VOLTAGE		3
+#define AS3711_LDO_1_VOLTAGE		4	/* Analog LDO */
+#define AS3711_LDO_2_VOLTAGE		5
+#define AS3711_LDO_3_VOLTAGE		6	/* Digital LDO */
+#define AS3711_LDO_4_VOLTAGE		7
+#define AS3711_LDO_5_VOLTAGE		8
+#define AS3711_LDO_6_VOLTAGE		9
+#define AS3711_LDO_7_VOLTAGE		0xa
+#define AS3711_LDO_8_VOLTAGE		0xb
+#define AS3711_SD_CONTROL		0x10
+#define AS3711_GPIO_SIGNAL_OUT		0x20
+#define AS3711_GPIO_SIGNAL_IN		0x21
+#define AS3711_SD_CONTROL_1		0x30
+#define AS3711_SD_CONTROL_2		0x31
+#define AS3711_CURR_CONTROL		0x40
+#define AS3711_CURR1_VALUE		0x43
+#define AS3711_CURR2_VALUE		0x44
+#define AS3711_CURR3_VALUE		0x45
+#define AS3711_STEPUP_CONTROL_1		0x50
+#define AS3711_STEPUP_CONTROL_2		0x51
+#define AS3711_STEPUP_CONTROL_4		0x53
+#define AS3711_STEPUP_CONTROL_5		0x54
+#define AS3711_REG_STATUS		0x73
+#define AS3711_INTERRUPT_STATUS_1	0x77
+#define AS3711_INTERRUPT_STATUS_2	0x78
+#define AS3711_INTERRUPT_STATUS_3	0x79
+#define AS3711_CHARGER_STATUS_1		0x86
+#define AS3711_CHARGER_STATUS_2		0x87
+#define AS3711_ASIC_ID_1		0x90
+#define AS3711_ASIC_ID_2		0x91
+
+#define AS3711_MAX_REGS			0x92
+
+/* Regulators */
+enum {
+	AS3711_REGULATOR_SD_1,
+	AS3711_REGULATOR_SD_2,
+	AS3711_REGULATOR_SD_3,
+	AS3711_REGULATOR_SD_4,
+	AS3711_REGULATOR_LDO_1,
+	AS3711_REGULATOR_LDO_2,
+	AS3711_REGULATOR_LDO_3,
+	AS3711_REGULATOR_LDO_4,
+	AS3711_REGULATOR_LDO_5,
+	AS3711_REGULATOR_LDO_6,
+	AS3711_REGULATOR_LDO_7,
+	AS3711_REGULATOR_LDO_8,
+
+	AS3711_REGULATOR_MAX,
+};
+
+struct device;
+struct regmap;
+
+struct as3711 {
+	struct device *dev;
+	struct regmap *regmap;
+};
+
+#define AS3711_MAX_STEPDOWN 4
+#define AS3711_MAX_STEPUP 2
+#define AS3711_MAX_LDO 8
+
+enum as3711_su2_feedback {
+	AS3711_SU2_VOLTAGE,
+	AS3711_SU2_CURR1,
+	AS3711_SU2_CURR2,
+	AS3711_SU2_CURR3,
+	AS3711_SU2_CURR_AUTO,
+};
+
+enum as3711_su2_fbprot {
+	AS3711_SU2_LX_SD4,
+	AS3711_SU2_GPIO2,
+	AS3711_SU2_GPIO3,
+	AS3711_SU2_GPIO4,
+};
+
+/*
+ * Platform data
+ */
+
+struct as3711_regulator_pdata {
+	struct regulator_init_data *init_data[AS3711_REGULATOR_MAX];
+};
+
+struct as3711_bl_pdata {
+	const char *su1_fb;
+	int su1_max_uA;
+	const char *su2_fb;
+	int su2_max_uA;
+	enum as3711_su2_feedback su2_feedback;
+	enum as3711_su2_fbprot su2_fbprot;
+	bool su2_auto_curr1;
+	bool su2_auto_curr2;
+	bool su2_auto_curr3;
+};
+
+struct as3711_platform_data {
+	struct as3711_regulator_pdata regulator;
+	struct as3711_bl_pdata backlight;
+};
+
+#endif

diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 0507c4c..86dd93d 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h

@@ -146,4 +146,14 @@
 
 extern struct regmap_config da9052_regmap_config;
 
+int da9052_irq_init(struct da9052 *da9052);
+int da9052_irq_exit(struct da9052 *da9052);
+int da9052_request_irq(struct da9052 *da9052, int irq, char *name,
+			   irq_handler_t handler, void *data);
+void da9052_free_irq(struct da9052 *da9052, int irq, void *data);
+
+int da9052_enable_irq(struct da9052 *da9052, int irq);
+int da9052_disable_irq(struct da9052 *da9052, int irq);
+int da9052_disable_irq_nosync(struct da9052 *da9052, int irq);
+
 #endif /* __MFD_DA9052_DA9052_H */

diff --git a/include/linux/mfd/da9055/core.h b/include/linux/mfd/da9055/core.h
index c96ad68..956afa4 100644
--- a/include/linux/mfd/da9055/core.h
+++ b/include/linux/mfd/da9055/core.h

@@ -1,4 +1,4 @@
-/*
+/*
  * da9055 declarations for DA9055 PMICs.
  *
  * Copyright(c) 2012 Dialog Semiconductor Ltd.

diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h
index f87a6c1..04e092b 100644
--- a/include/linux/mfd/da9055/pdata.h
+++ b/include/linux/mfd/da9055/pdata.h

@@ -1,4 +1,4 @@
-/* Copyright (C) 2012 Dialog Semiconductor Ltd.
+/* Copyright (C) 2012 Dialog Semiconductor Ltd.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by

diff --git a/include/linux/mfd/da9055/reg.h b/include/linux/mfd/da9055/reg.h
index df237ee..2b592e0 100644
--- a/include/linux/mfd/da9055/reg.h
+++ b/include/linux/mfd/da9055/reg.h

@@ -1,4 +1,4 @@
-/*
+/*
  * DA9055 declarations for DA9055 PMICs.
  *
  * Copyright(c) 2012 Dialog Semiconductor Ltd.

diff --git a/include/linux/mfd/rc5t583.h b/include/linux/mfd/rc5t583.h
index 36c242e..fd413cc 100644
--- a/include/linux/mfd/rc5t583.h
+++ b/include/linux/mfd/rc5t583.h

@@ -33,6 +33,7 @@
 /* Maximum number of main interrupts */
 #define MAX_MAIN_INTERRUPT		5
 #define RC5T583_MAX_GPEDGE_REG		2
+#define RC5T583_MAX_INTERRUPT_EN_REGS	8
 #define RC5T583_MAX_INTERRUPT_MASK_REGS	9
 
 /* Interrupt enable register */
@@ -304,7 +305,7 @@
 	uint8_t		intc_inten_reg;
 
 	/* For group interrupt bits and address */
-	uint8_t		irq_en_reg[RC5T583_MAX_INTERRUPT_MASK_REGS];
+	uint8_t		irq_en_reg[RC5T583_MAX_INTERRUPT_EN_REGS];
 
 	/* For gpio edge */
 	uint8_t		gpedge_reg[RC5T583_MAX_GPEDGE_REG];

diff --git a/include/linux/mfd/retu.h b/include/linux/mfd/retu.h
new file mode 100644
index 0000000..1e2715d
--- /dev/null
+++ b/include/linux/mfd/retu.h

@@ -0,0 +1,22 @@
+/*
+ * Retu MFD driver interface
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ */
+
+#ifndef __LINUX_MFD_RETU_H
+#define __LINUX_MFD_RETU_H
+
+struct retu_dev;
+
+int retu_read(struct retu_dev *, u8);
+int retu_write(struct retu_dev *, u8, u16);
+
+/* Registers */
+#define RETU_REG_WATCHDOG	0x17		/* Watchdog */
+#define RETU_REG_CC1		0x0d		/* Common control register 1 */
+#define RETU_REG_STATUS		0x16		/* Status register */
+
+#endif /* __LINUX_MFD_RETU_H */

diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index d179227..9a855ac 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h

@@ -26,6 +26,28 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 
+enum sta2x11_mfd_plat_dev {
+	sta2x11_sctl = 0,
+	sta2x11_gpio,
+	sta2x11_scr,
+	sta2x11_time,
+	sta2x11_apbreg,
+	sta2x11_apb_soc_regs,
+	sta2x11_vic,
+	sta2x11_n_mfd_plat_devs,
+};
+
+#define STA2X11_MFD_SCTL_NAME	       "sta2x11-sctl"
+#define STA2X11_MFD_GPIO_NAME	       "sta2x11-gpio"
+#define STA2X11_MFD_SCR_NAME	       "sta2x11-scr"
+#define STA2X11_MFD_TIME_NAME	       "sta2x11-time"
+#define STA2X11_MFD_APBREG_NAME	       "sta2x11-apbreg"
+#define STA2X11_MFD_APB_SOC_REGS_NAME  "sta2x11-apb-soc-regs"
+#define STA2X11_MFD_VIC_NAME	       "sta2x11-vic"
+
+extern u32
+__sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev);
+
 /*
  * The MFD PCI block includes the GPIO peripherals and other register blocks.
  * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".)
@@ -182,7 +204,11 @@
  * The APB bridge has its own registers, needed by our users as well.
  * They are accessed with the following read/mask/write function.
  */
-u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+static inline u32
+sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apbreg);
+}
 
 /* CAN and MLB */
 #define APBREG_BSR	0x00	/* Bridge Status Reg */
@@ -211,19 +237,45 @@
  * The system controller has its own registers. Some of these are accessed
  * by out users as well, using the following read/mask/write/function
  */
-u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+static inline
+u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_sctl);
+}
 
 #define SCTL_SCCTL		0x00	/* System controller control register */
 #define SCTL_ARMCFG		0x04	/* ARM configuration register */
 #define SCTL_SCPLLCTL		0x08	/* PLL control status register */
+
+#define SCTL_SCPLLCTL_AUDIO_PLL_PD	     BIT(1)
+#define SCTL_SCPLLCTL_FRAC_CONTROL	     BIT(3)
+#define SCTL_SCPLLCTL_STRB_BYPASS	     BIT(6)
+#define SCTL_SCPLLCTL_STRB_INPUT	     BIT(8)
+
 #define SCTL_SCPLLFCTRL		0x0c	/* PLL frequency control register */
+
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_MASK	0xff
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_SHIFT	  10
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_MASK	   7
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_SHIFT	  21
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_MASK	   7
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_SHIFT	  18
+#define SCTL_SCPLLFCTRL_DITHER_DISABLE_MASK     0x03
+#define SCTL_SCPLLFCTRL_DITHER_DISABLE_SHIFT       4
+
+
 #define SCTL_SCRESFRACT		0x10	/* PLL fractional input register */
+
+#define SCTL_SCRESFRACT_MASK	0x0000ffff
+
+
 #define SCTL_SCRESCTRL1		0x14	/* Peripheral reset control 1 */
 #define SCTL_SCRESXTRL2		0x18	/* Peripheral reset control 2 */
 #define SCTL_SCPEREN0		0x1c	/* Peripheral clock enable register 0 */
 #define SCTL_SCPEREN1		0x20	/* Peripheral clock enable register 1 */
 #define SCTL_SCPEREN2		0x24	/* Peripheral clock enable register 2 */
 #define SCTL_SCGRST		0x28	/* Peripheral global reset */
+#define SCTL_SCPCIECSBRST       0x2c    /* PCIe PAB CSB reset status register */
 #define SCTL_SCPCIPMCR1		0x30	/* PCI power management control 1 */
 #define SCTL_SCPCIPMCR2		0x34	/* PCI power management control 2 */
 #define SCTL_SCPCIPMSR1		0x38	/* PCI power management status 1 */
@@ -321,4 +373,146 @@
 #define SCTL_SCPEREN1_I2C3		(1 << 16)
 #define SCTL_SCPEREN1_USB_PHY		(1 << 17)
 
+/*
+ * APB-SOC registers
+ */
+static inline
+u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apb_soc_regs);
+}
+
+#define PCIE_EP1_FUNC3_0_INTR_REG	0x000
+#define PCIE_EP1_FUNC7_4_INTR_REG	0x004
+#define PCIE_EP2_FUNC3_0_INTR_REG	0x008
+#define PCIE_EP2_FUNC7_4_INTR_REG	0x00c
+#define PCIE_EP3_FUNC3_0_INTR_REG	0x010
+#define PCIE_EP3_FUNC7_4_INTR_REG	0x014
+#define PCIE_EP4_FUNC3_0_INTR_REG	0x018
+#define PCIE_EP4_FUNC7_4_INTR_REG	0x01c
+#define PCIE_INTR_ENABLE0_REG		0x020
+#define PCIE_INTR_ENABLE1_REG		0x024
+#define PCIE_EP1_FUNC_TC_REG		0x028
+#define PCIE_EP2_FUNC_TC_REG		0x02c
+#define PCIE_EP3_FUNC_TC_REG		0x030
+#define PCIE_EP4_FUNC_TC_REG		0x034
+#define PCIE_EP1_FUNC_F_REG		0x038
+#define PCIE_EP2_FUNC_F_REG		0x03c
+#define PCIE_EP3_FUNC_F_REG		0x040
+#define PCIE_EP4_FUNC_F_REG		0x044
+#define PCIE_PAB_AMBA_SW_RST_REG	0x048
+#define PCIE_PM_STATUS_0_PORT_0_4	0x04c
+#define PCIE_PM_STATUS_7_0_EP1		0x050
+#define PCIE_PM_STATUS_7_0_EP2		0x054
+#define PCIE_PM_STATUS_7_0_EP3		0x058
+#define PCIE_PM_STATUS_7_0_EP4		0x05c
+#define PCIE_DEV_ID_0_EP1_REG		0x060
+#define PCIE_CC_REV_ID_0_EP1_REG	0x064
+#define PCIE_DEV_ID_1_EP1_REG		0x068
+#define PCIE_CC_REV_ID_1_EP1_REG	0x06c
+#define PCIE_DEV_ID_2_EP1_REG		0x070
+#define PCIE_CC_REV_ID_2_EP1_REG	0x074
+#define PCIE_DEV_ID_3_EP1_REG		0x078
+#define PCIE_CC_REV_ID_3_EP1_REG	0x07c
+#define PCIE_DEV_ID_4_EP1_REG		0x080
+#define PCIE_CC_REV_ID_4_EP1_REG	0x084
+#define PCIE_DEV_ID_5_EP1_REG		0x088
+#define PCIE_CC_REV_ID_5_EP1_REG	0x08c
+#define PCIE_DEV_ID_6_EP1_REG		0x090
+#define PCIE_CC_REV_ID_6_EP1_REG	0x094
+#define PCIE_DEV_ID_7_EP1_REG		0x098
+#define PCIE_CC_REV_ID_7_EP1_REG	0x09c
+#define PCIE_DEV_ID_0_EP2_REG		0x0a0
+#define PCIE_CC_REV_ID_0_EP2_REG	0x0a4
+#define PCIE_DEV_ID_1_EP2_REG		0x0a8
+#define PCIE_CC_REV_ID_1_EP2_REG	0x0ac
+#define PCIE_DEV_ID_2_EP2_REG		0x0b0
+#define PCIE_CC_REV_ID_2_EP2_REG	0x0b4
+#define PCIE_DEV_ID_3_EP2_REG		0x0b8
+#define PCIE_CC_REV_ID_3_EP2_REG	0x0bc
+#define PCIE_DEV_ID_4_EP2_REG		0x0c0
+#define PCIE_CC_REV_ID_4_EP2_REG	0x0c4
+#define PCIE_DEV_ID_5_EP2_REG		0x0c8
+#define PCIE_CC_REV_ID_5_EP2_REG	0x0cc
+#define PCIE_DEV_ID_6_EP2_REG		0x0d0
+#define PCIE_CC_REV_ID_6_EP2_REG	0x0d4
+#define PCIE_DEV_ID_7_EP2_REG		0x0d8
+#define PCIE_CC_REV_ID_7_EP2_REG	0x0dC
+#define PCIE_DEV_ID_0_EP3_REG		0x0e0
+#define PCIE_CC_REV_ID_0_EP3_REG	0x0e4
+#define PCIE_DEV_ID_1_EP3_REG		0x0e8
+#define PCIE_CC_REV_ID_1_EP3_REG	0x0ec
+#define PCIE_DEV_ID_2_EP3_REG		0x0f0
+#define PCIE_CC_REV_ID_2_EP3_REG	0x0f4
+#define PCIE_DEV_ID_3_EP3_REG		0x0f8
+#define PCIE_CC_REV_ID_3_EP3_REG	0x0fc
+#define PCIE_DEV_ID_4_EP3_REG		0x100
+#define PCIE_CC_REV_ID_4_EP3_REG	0x104
+#define PCIE_DEV_ID_5_EP3_REG		0x108
+#define PCIE_CC_REV_ID_5_EP3_REG	0x10c
+#define PCIE_DEV_ID_6_EP3_REG		0x110
+#define PCIE_CC_REV_ID_6_EP3_REG	0x114
+#define PCIE_DEV_ID_7_EP3_REG		0x118
+#define PCIE_CC_REV_ID_7_EP3_REG	0x11c
+#define PCIE_DEV_ID_0_EP4_REG		0x120
+#define PCIE_CC_REV_ID_0_EP4_REG	0x124
+#define PCIE_DEV_ID_1_EP4_REG		0x128
+#define PCIE_CC_REV_ID_1_EP4_REG	0x12c
+#define PCIE_DEV_ID_2_EP4_REG		0x130
+#define PCIE_CC_REV_ID_2_EP4_REG	0x134
+#define PCIE_DEV_ID_3_EP4_REG		0x138
+#define PCIE_CC_REV_ID_3_EP4_REG	0x13c
+#define PCIE_DEV_ID_4_EP4_REG		0x140
+#define PCIE_CC_REV_ID_4_EP4_REG	0x144
+#define PCIE_DEV_ID_5_EP4_REG		0x148
+#define PCIE_CC_REV_ID_5_EP4_REG	0x14c
+#define PCIE_DEV_ID_6_EP4_REG		0x150
+#define PCIE_CC_REV_ID_6_EP4_REG	0x154
+#define PCIE_DEV_ID_7_EP4_REG		0x158
+#define PCIE_CC_REV_ID_7_EP4_REG	0x15c
+#define PCIE_SUBSYS_VEN_ID_REG		0x160
+#define PCIE_COMMON_CLOCK_CONFIG_0_4_0	0x164
+#define PCIE_MIPHYP_SSC_EN_REG		0x168
+#define PCIE_MIPHYP_ADDR_REG		0x16c
+#define PCIE_L1_ASPM_READY_REG		0x170
+#define PCIE_EXT_CFG_RDY_REG		0x174
+#define PCIE_SoC_INT_ROUTER_STATUS0_REG 0x178
+#define PCIE_SoC_INT_ROUTER_STATUS1_REG 0x17c
+#define PCIE_SoC_INT_ROUTER_STATUS2_REG 0x180
+#define PCIE_SoC_INT_ROUTER_STATUS3_REG 0x184
+#define DMA_IP_CTRL_REG			0x324
+#define DISP_BRIDGE_PU_PD_CTRL_REG	0x328
+#define VIP_PU_PD_CTRL_REG		0x32c
+#define USB_MLB_PU_PD_CTRL_REG		0x330
+#define SDIO_PU_PD_MISCFUNC_CTRL_REG1	0x334
+#define SDIO_PU_PD_MISCFUNC_CTRL_REG2	0x338
+#define UART_PU_PD_CTRL_REG		0x33c
+#define ARM_Lock			0x340
+#define SYS_IO_CHAR_REG1		0x344
+#define SYS_IO_CHAR_REG2		0x348
+#define SATA_CORE_ID_REG		0x34c
+#define SATA_CTRL_REG			0x350
+#define I2C_HSFIX_MISC_REG		0x354
+#define SPARE2_RESERVED			0x358
+#define SPARE3_RESERVED			0x35c
+#define MASTER_LOCK_REG			0x368
+#define SYSTEM_CONFIG_STATUS_REG	0x36c
+#define MSP_CLK_CTRL_REG		0x39c
+#define COMPENSATION_REG1		0x3c4
+#define COMPENSATION_REG2		0x3c8
+#define COMPENSATION_REG3		0x3cc
+#define TEST_CTL_REG			0x3d0
+
+/*
+ * SECR (OTP) registers
+ */
+#define STA2X11_SECR_CR			0x00
+#define STA2X11_SECR_FVR0		0x10
+#define STA2X11_SECR_FVR1		0x14
+
+extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev,
+				     enum sta2x11_mfd_plat_dev index,
+				     void __iomem **regs,
+				     spinlock_t **lock);
+
 #endif /* __STA2X11_MFD_H */

diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index f8d5b4d..383ac15 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h

@@ -62,6 +62,7 @@
  * @lock: lock protecting I/O operations
  * @irq_lock: IRQ bus lock
  * @dev: device, mostly for dev_dbg()
+ * @irq_domain: IRQ domain
  * @client: client - i2c or spi
  * @ci: client specific information
  * @partnum: part number
@@ -79,6 +80,7 @@
 	struct mutex lock;
 	struct mutex irq_lock;
 	struct device *dev;
+	struct irq_domain *domain;
 	void *client;
 	struct stmpe_client_info *ci;
 	enum stmpe_partnum partnum;
@@ -188,7 +190,6 @@
  * @id: device id to distinguish between multiple STMPEs on the same board
  * @blocks: bitmask of blocks to enable (use STMPE_BLOCK_*)
  * @irq_trigger: IRQ trigger to use for the interrupt to the host
- * @irq_invert_polarity: IRQ line is connected with reversed polarity
  * @autosleep: bool to enable/disable stmpe autosleep
  * @autosleep_timeout: inactivity timeout in milliseconds for autosleep
  * @irq_base: base IRQ number.  %STMPE_NR_IRQS irqs will be used, or
@@ -205,7 +206,6 @@
 	unsigned int blocks;
 	int irq_base;
 	unsigned int irq_trigger;
-	bool irq_invert_polarity;
 	bool autosleep;
 	bool irq_over_gpio;
 	int irq_gpio;

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
new file mode 100644
index 0000000..c79ad5d
--- /dev/null
+++ b/include/linux/mfd/ti_am335x_tscadc.h

@@ -0,0 +1,152 @@
+#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H
+#define __LINUX_TI_AM335X_TSCADC_MFD_H
+
+/*
+ * TI Touch Screen / ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mfd/core.h>
+
+#define REG_RAWIRQSTATUS	0x024
+#define REG_IRQSTATUS		0x028
+#define REG_IRQENABLE		0x02C
+#define REG_IRQCLR		0x030
+#define REG_IRQWAKEUP		0x034
+#define REG_CTRL		0x040
+#define REG_ADCFSM		0x044
+#define REG_CLKDIV		0x04C
+#define REG_SE			0x054
+#define REG_IDLECONFIG		0x058
+#define REG_CHARGECONFIG	0x05C
+#define REG_CHARGEDELAY		0x060
+#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
+#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
+#define REG_FIFO0CNT		0xE4
+#define REG_FIFO0THR		0xE8
+#define REG_FIFO1CNT		0xF0
+#define REG_FIFO1THR		0xF4
+#define REG_FIFO0		0x100
+#define REG_FIFO1		0x200
+
+/*	Register Bitfields	*/
+/* IRQ wakeup enable */
+#define IRQWKUP_ENB		BIT(0)
+
+/* Step Enable */
+#define STEPENB_MASK		(0x1FFFF << 0)
+#define STEPENB(val)		((val) << 0)
+#define STPENB_STEPENB		STEPENB(0x1FFFF)
+#define STPENB_STEPENB_TC	STEPENB(0x1FFF)
+
+/* IRQ enable */
+#define IRQENB_HW_PEN		BIT(0)
+#define IRQENB_FIFO0THRES	BIT(2)
+#define IRQENB_FIFO1THRES	BIT(5)
+#define IRQENB_PENUP		BIT(9)
+
+/* Step Configuration */
+#define STEPCONFIG_MODE_MASK	(3 << 0)
+#define STEPCONFIG_MODE(val)	((val) << 0)
+#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
+#define STEPCONFIG_AVG_MASK	(7 << 2)
+#define STEPCONFIG_AVG(val)	((val) << 2)
+#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
+#define STEPCONFIG_XPP		BIT(5)
+#define STEPCONFIG_XNN		BIT(6)
+#define STEPCONFIG_YPP		BIT(7)
+#define STEPCONFIG_YNN		BIT(8)
+#define STEPCONFIG_XNP		BIT(9)
+#define STEPCONFIG_YPN		BIT(10)
+#define STEPCONFIG_INM_MASK	(0xF << 15)
+#define STEPCONFIG_INM(val)	((val) << 15)
+#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
+#define STEPCONFIG_INP_MASK	(0xF << 19)
+#define STEPCONFIG_INP(val)	((val) << 19)
+#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
+#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
+#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
+#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
+#define STEPCONFIG_FIFO1	BIT(26)
+
+/* Delay register */
+#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
+#define STEPDELAY_OPEN(val)	((val) << 0)
+#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
+#define STEPDELAY_SAMPLE_MASK	(0xFF << 24)
+#define STEPDELAY_SAMPLE(val)	((val) << 24)
+#define STEPCONFIG_SAMPLEDLY	STEPDELAY_SAMPLE(0)
+
+/* Charge Config */
+#define STEPCHARGE_RFP_MASK	(7 << 12)
+#define STEPCHARGE_RFP(val)	((val) << 12)
+#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
+#define STEPCHARGE_INM_MASK	(0xF << 15)
+#define STEPCHARGE_INM(val)	((val) << 15)
+#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
+#define STEPCHARGE_INP_MASK	(0xF << 19)
+#define STEPCHARGE_INP(val)	((val) << 19)
+#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
+#define STEPCHARGE_RFM_MASK	(3 << 23)
+#define STEPCHARGE_RFM(val)	((val) << 23)
+#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
+
+/* Charge delay */
+#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
+#define CHARGEDLY_OPEN(val)	((val) << 0)
+#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
+
+/* Control register */
+#define CNTRLREG_TSCSSENB	BIT(0)
+#define CNTRLREG_STEPID		BIT(1)
+#define CNTRLREG_STEPCONFIGWRT	BIT(2)
+#define CNTRLREG_POWERDOWN	BIT(4)
+#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
+#define CNTRLREG_AFE_CTRL(val)	((val) << 5)
+#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
+#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
+#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
+#define CNTRLREG_TSCENB		BIT(7)
+
+#define ADC_CLK			3000000
+#define	MAX_CLK_DIV		7
+#define TOTAL_STEPS		16
+#define TOTAL_CHANNELS		8
+
+#define TSCADC_CELLS		2
+
+enum tscadc_cells {
+	TSC_CELL,
+	ADC_CELL,
+};
+
+struct mfd_tscadc_board {
+	struct tsc_data *tsc_init;
+	struct adc_data *adc_init;
+};
+
+struct ti_tscadc_dev {
+	struct device *dev;
+	struct regmap *regmap_tscadc;
+	void __iomem *tscadc_base;
+	int irq;
+	struct mfd_cell cells[TSCADC_CELLS];
+
+	/* tsc device */
+	struct titsc *tsc;
+
+	/* adc device */
+	struct adc_device *adc;
+};
+
+#endif

diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 804e280..6694cf4 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h

@@ -23,6 +23,26 @@
 #define __LINUX_MFD_TPS65090_H
 
 #include <linux/irq.h>
+#include <linux/regmap.h>
+
+/* TPS65090 IRQs */
+enum {
+	TPS65090_IRQ_VAC_STATUS_CHANGE,
+	TPS65090_IRQ_VSYS_STATUS_CHANGE,
+	TPS65090_IRQ_BAT_STATUS_CHANGE,
+	TPS65090_IRQ_CHARGING_STATUS_CHANGE,
+	TPS65090_IRQ_CHARGING_COMPLETE,
+	TPS65090_IRQ_OVERLOAD_DCDC1,
+	TPS65090_IRQ_OVERLOAD_DCDC2,
+	TPS65090_IRQ_OVERLOAD_DCDC3,
+	TPS65090_IRQ_OVERLOAD_FET1,
+	TPS65090_IRQ_OVERLOAD_FET2,
+	TPS65090_IRQ_OVERLOAD_FET3,
+	TPS65090_IRQ_OVERLOAD_FET4,
+	TPS65090_IRQ_OVERLOAD_FET5,
+	TPS65090_IRQ_OVERLOAD_FET6,
+	TPS65090_IRQ_OVERLOAD_FET7,
+};
 
 /* TPS65090 Regulator ID */
 enum {
@@ -44,20 +64,9 @@
 };
 
 struct tps65090 {
-	struct mutex		lock;
 	struct device		*dev;
-	struct i2c_client	*client;
 	struct regmap		*rmap;
-	struct irq_chip		irq_chip;
-	struct mutex		irq_lock;
-	int			irq_base;
-	unsigned int		id;
-};
-
-struct tps65090_subdev_info {
-	int		id;
-	const char	*name;
-	void		*platform_data;
+	struct regmap_irq_chip_data *irq_data;
 };
 
 /*
@@ -77,8 +86,6 @@
 
 struct tps65090_platform_data {
 	int irq_base;
-	int num_subdevs;
-	struct tps65090_subdev_info *subdevs;
 	struct tps65090_regulator_plat_data *reg_pdata[TPS65090_REGULATOR_MAX];
 };
 
@@ -86,9 +93,39 @@
  * NOTE: the functions below are not intended for use outside
  * of the TPS65090 sub-device drivers
  */
-extern int tps65090_write(struct device *dev, int reg, uint8_t val);
-extern int tps65090_read(struct device *dev, int reg, uint8_t *val);
-extern int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num);
-extern int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num);
+static inline int tps65090_write(struct device *dev, int reg, uint8_t val)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_write(tps->rmap, reg, val);
+}
+
+static inline int tps65090_read(struct device *dev, int reg, uint8_t *val)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+	unsigned int temp_val;
+	int ret;
+
+	ret = regmap_read(tps->rmap, reg, &temp_val);
+	if (!ret)
+		*val = temp_val;
+	return ret;
+}
+
+static inline int tps65090_set_bits(struct device *dev, int reg,
+		uint8_t bit_num)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u);
+}
+
+static inline int tps65090_clr_bits(struct device *dev, int reg,
+		uint8_t bit_num)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u);
+}
 
 #endif /*__LINUX_MFD_TPS65090_H */

diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h
index f8da0e1..8799454 100644
--- a/include/linux/mfd/tps6586x.h
+++ b/include/linux/mfd/tps6586x.h

@@ -96,5 +96,6 @@
 extern int tps6586x_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
 extern int tps6586x_update(struct device *dev, int reg, uint8_t val,
 			   uint8_t mask);
+extern int tps6586x_irq_get_virq(struct device *dev, int irq);
 
 #endif /*__LINUX_MFD_TPS6586X_H */

diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 02e894f..20e433e 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h

@@ -572,6 +572,49 @@
 #define SPARE_SPARE_MASK				0xFF
 #define SPARE_SPARE_SHIFT				0
 
+#define TPS65910_INT_STS_RTC_PERIOD_IT_MASK			0x80
+#define TPS65910_INT_STS_RTC_PERIOD_IT_SHIFT			7
+#define TPS65910_INT_STS_RTC_ALARM_IT_MASK			0x40
+#define TPS65910_INT_STS_RTC_ALARM_IT_SHIFT			6
+#define TPS65910_INT_STS_HOTDIE_IT_MASK				0x20
+#define TPS65910_INT_STS_HOTDIE_IT_SHIFT			5
+#define TPS65910_INT_STS_PWRHOLD_F_IT_MASK			0x10
+#define TPS65910_INT_STS_PWRHOLD_F_IT_SHIFT			4
+#define TPS65910_INT_STS_PWRON_LP_IT_MASK			0x08
+#define TPS65910_INT_STS_PWRON_LP_IT_SHIFT			3
+#define TPS65910_INT_STS_PWRON_IT_MASK				0x04
+#define TPS65910_INT_STS_PWRON_IT_SHIFT				2
+#define TPS65910_INT_STS_VMBHI_IT_MASK				0x02
+#define TPS65910_INT_STS_VMBHI_IT_SHIFT				1
+#define TPS65910_INT_STS_VMBDCH_IT_MASK				0x01
+#define TPS65910_INT_STS_VMBDCH_IT_SHIFT			0
+
+#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK			0x80
+#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_SHIFT		7
+#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK			0x40
+#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_SHIFT			6
+#define TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK			0x20
+#define TPS65910_INT_MSK_HOTDIE_IT_MSK_SHIFT			5
+#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK			0x10
+#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_SHIFT			4
+#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK			0x08
+#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_SHIFT			3
+#define TPS65910_INT_MSK_PWRON_IT_MSK_MASK			0x04
+#define TPS65910_INT_MSK_PWRON_IT_MSK_SHIFT			2
+#define TPS65910_INT_MSK_VMBHI_IT_MSK_MASK			0x02
+#define TPS65910_INT_MSK_VMBHI_IT_MSK_SHIFT			1
+#define TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK			0x01
+#define TPS65910_INT_MSK_VMBDCH_IT_MSK_SHIFT			0
+
+#define TPS65910_INT_STS2_GPIO0_F_IT_SHIFT			2
+#define TPS65910_INT_STS2_GPIO0_F_IT_MASK			0x02
+#define TPS65910_INT_STS2_GPIO0_R_IT_SHIFT			1
+#define TPS65910_INT_STS2_GPIO0_R_IT_MASK			0x01
+
+#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_SHIFT			2
+#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK			0x02
+#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_SHIFT			1
+#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK			0x01
 
 /*Register INT_STS  (0x80) register.RegisterDescription */
 #define INT_STS_RTC_PERIOD_IT_MASK			0x80
@@ -580,16 +623,16 @@
 #define INT_STS_RTC_ALARM_IT_SHIFT			6
 #define INT_STS_HOTDIE_IT_MASK				0x20
 #define INT_STS_HOTDIE_IT_SHIFT				5
-#define INT_STS_PWRHOLD_IT_MASK				0x10
-#define INT_STS_PWRHOLD_IT_SHIFT			4
+#define INT_STS_PWRHOLD_R_IT_MASK			0x10
+#define INT_STS_PWRHOLD_R_IT_SHIFT			4
 #define INT_STS_PWRON_LP_IT_MASK			0x08
 #define INT_STS_PWRON_LP_IT_SHIFT			3
 #define INT_STS_PWRON_IT_MASK				0x04
 #define INT_STS_PWRON_IT_SHIFT				2
 #define INT_STS_VMBHI_IT_MASK				0x02
 #define INT_STS_VMBHI_IT_SHIFT				1
-#define INT_STS_VMBDCH_IT_MASK				0x01
-#define INT_STS_VMBDCH_IT_SHIFT				0
+#define INT_STS_PWRHOLD_F_IT_MASK			0x01
+#define INT_STS_PWRHOLD_F_IT_SHIFT			0
 
 
 /*Register INT_MSK  (0x80) register.RegisterDescription */
@@ -599,16 +642,16 @@
 #define INT_MSK_RTC_ALARM_IT_MSK_SHIFT			6
 #define INT_MSK_HOTDIE_IT_MSK_MASK			0x20
 #define INT_MSK_HOTDIE_IT_MSK_SHIFT			5
-#define INT_MSK_PWRHOLD_IT_MSK_MASK			0x10
-#define INT_MSK_PWRHOLD_IT_MSK_SHIFT			4
+#define INT_MSK_PWRHOLD_R_IT_MSK_MASK			0x10
+#define INT_MSK_PWRHOLD_R_IT_MSK_SHIFT			4
 #define INT_MSK_PWRON_LP_IT_MSK_MASK			0x08
 #define INT_MSK_PWRON_LP_IT_MSK_SHIFT			3
 #define INT_MSK_PWRON_IT_MSK_MASK			0x04
 #define INT_MSK_PWRON_IT_MSK_SHIFT			2
 #define INT_MSK_VMBHI_IT_MSK_MASK			0x02
 #define INT_MSK_VMBHI_IT_MSK_SHIFT			1
-#define INT_MSK_VMBDCH_IT_MSK_MASK			0x01
-#define INT_MSK_VMBDCH_IT_MSK_SHIFT			0
+#define INT_MSK_PWRHOLD_F_IT_MSK_MASK			0x01
+#define INT_MSK_PWRHOLD_F_IT_MSK_SHIFT			0
 
 
 /*Register INT_STS2  (0x80) register.RegisterDescription */
@@ -650,6 +693,14 @@
 
 
 /*Register INT_STS3  (0x80) register.RegisterDescription */
+#define INT_STS3_PWRDN_IT_MASK				0x80
+#define INT_STS3_PWRDN_IT_SHIFT				7
+#define INT_STS3_VMBCH2_L_IT_MASK			0x40
+#define INT_STS3_VMBCH2_L_IT_SHIFT			6
+#define INT_STS3_VMBCH2_H_IT_MASK			0x20
+#define INT_STS3_VMBCH2_H_IT_SHIFT			5
+#define INT_STS3_WTCHDG_IT_MASK				0x10
+#define INT_STS3_WTCHDG_IT_SHIFT			4
 #define INT_STS3_GPIO5_F_IT_MASK			0x08
 #define INT_STS3_GPIO5_F_IT_SHIFT			3
 #define INT_STS3_GPIO5_R_IT_MASK			0x04
@@ -661,6 +712,14 @@
 
 
 /*Register INT_MSK3  (0x80) register.RegisterDescription */
+#define INT_MSK3_PWRDN_IT_MSK_MASK			0x80
+#define INT_MSK3_PWRDN_IT_MSK_SHIFT			7
+#define INT_MSK3_VMBCH2_L_IT_MSK_MASK			0x40
+#define INT_MSK3_VMBCH2_L_IT_MSK_SHIFT			6
+#define INT_MSK3_VMBCH2_H_IT_MSK_MASK			0x20
+#define INT_MSK3_VMBCH2_H_IT_MSK_SHIFT			5
+#define INT_MSK3_WTCHDG_IT_MSK_MASK			0x10
+#define INT_MSK3_WTCHDG_IT_MSK_SHIFT			4
 #define INT_MSK3_GPIO5_F_IT_MSK_MASK			0x08
 #define INT_MSK3_GPIO5_F_IT_MSK_SHIFT			3
 #define INT_MSK3_GPIO5_R_IT_MSK_MASK			0x04
@@ -721,34 +780,32 @@
 #define TPS65910_IRQ_GPIO_F				9
 #define TPS65910_NUM_IRQ				10
 
-#define TPS65911_IRQ_VBAT_VMBDCH			0
-#define TPS65911_IRQ_VBAT_VMBDCH2L			1
-#define TPS65911_IRQ_VBAT_VMBDCH2H			2
-#define TPS65911_IRQ_VBAT_VMHI				3
-#define TPS65911_IRQ_PWRON				4
-#define TPS65911_IRQ_PWRON_LP				5
-#define TPS65911_IRQ_PWRHOLD_F				6
-#define TPS65911_IRQ_PWRHOLD_R				7
-#define TPS65911_IRQ_HOTDIE				8
-#define TPS65911_IRQ_RTC_ALARM				9
-#define TPS65911_IRQ_RTC_PERIOD				10
-#define TPS65911_IRQ_GPIO0_R				11
-#define TPS65911_IRQ_GPIO0_F				12
-#define TPS65911_IRQ_GPIO1_R				13
-#define TPS65911_IRQ_GPIO1_F				14
-#define TPS65911_IRQ_GPIO2_R				15
-#define TPS65911_IRQ_GPIO2_F				16
-#define TPS65911_IRQ_GPIO3_R				17
-#define TPS65911_IRQ_GPIO3_F				18
-#define TPS65911_IRQ_GPIO4_R				19
-#define TPS65911_IRQ_GPIO4_F				20
-#define TPS65911_IRQ_GPIO5_R				21
-#define TPS65911_IRQ_GPIO5_F				22
-#define TPS65911_IRQ_WTCHDG				23
-#define TPS65911_IRQ_PWRDN				24
+#define TPS65911_IRQ_PWRHOLD_F				0
+#define TPS65911_IRQ_VBAT_VMHI				1
+#define TPS65911_IRQ_PWRON				2
+#define TPS65911_IRQ_PWRON_LP				3
+#define TPS65911_IRQ_PWRHOLD_R				4
+#define TPS65911_IRQ_HOTDIE				5
+#define TPS65911_IRQ_RTC_ALARM				6
+#define TPS65911_IRQ_RTC_PERIOD				7
+#define TPS65911_IRQ_GPIO0_R				8
+#define TPS65911_IRQ_GPIO0_F				9
+#define TPS65911_IRQ_GPIO1_R				10
+#define TPS65911_IRQ_GPIO1_F				11
+#define TPS65911_IRQ_GPIO2_R				12
+#define TPS65911_IRQ_GPIO2_F				13
+#define TPS65911_IRQ_GPIO3_R				14
+#define TPS65911_IRQ_GPIO3_F				15
+#define TPS65911_IRQ_GPIO4_R				16
+#define TPS65911_IRQ_GPIO4_F				17
+#define TPS65911_IRQ_GPIO5_R				18
+#define TPS65911_IRQ_GPIO5_F				19
+#define TPS65911_IRQ_WTCHDG				20
+#define TPS65911_IRQ_VMBCH2_H				21
+#define TPS65911_IRQ_VMBCH2_L				22
+#define TPS65911_IRQ_PWRDN				23
 
-#define TPS65911_NUM_IRQ				25
-
+#define TPS65911_NUM_IRQ				24
 
 /* GPIO Register Definitions */
 #define TPS65910_GPIO_DEB				BIT(2)
@@ -836,7 +893,6 @@
 	struct device *dev;
 	struct i2c_client *i2c_client;
 	struct regmap *regmap;
-	struct mutex io_mutex;
 	unsigned int id;
 
 	/* Client devices */
@@ -848,12 +904,8 @@
 	struct tps65910_board *of_plat_data;
 
 	/* IRQ Handling */
-	struct mutex irq_lock;
 	int chip_irq;
-	int irq_base;
-	int irq_num;
-	u32 irq_mask;
-	struct irq_domain *domain;
+	struct regmap_irq_chip_data *irq_data;
 };
 
 struct tps65910_platform_data {
@@ -861,10 +913,6 @@
 	int irq_base;
 };
 
-int tps65910_irq_init(struct tps65910 *tps65910, int irq,
-		struct tps65910_platform_data *pdata);
-int tps65910_irq_exit(struct tps65910 *tps65910);
-
 static inline int tps65910_chip_id(struct tps65910 *tps65910)
 {
 	return tps65910->id;
@@ -900,4 +948,9 @@
 	return regmap_update_bits(tps65910->regmap, reg, mask, val);
 }
 
+static inline int tps65910_irq_get_virq(struct tps65910 *tps65910, int irq)
+{
+	return regmap_irq_get_virq(tps65910->irq_data, irq);
+}
+
 #endif /*  __LINUX_MFD_TPS65910_H */

diff --git a/include/linux/mfd/tps80031.h b/include/linux/mfd/tps80031.h
new file mode 100644
index 0000000..2c75c9c
--- /dev/null
+++ b/include/linux/mfd/tps80031.h

@@ -0,0 +1,637 @@
+/*
+ * tps80031.h -- TI TPS80031 and TI TPS80032 PMIC driver.
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#ifndef __LINUX_MFD_TPS80031_H
+#define __LINUX_MFD_TPS80031_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+/* Pull-ups/Pull-downs */
+#define TPS80031_CFG_INPUT_PUPD1			0xF0
+#define TPS80031_CFG_INPUT_PUPD2			0xF1
+#define TPS80031_CFG_INPUT_PUPD3			0xF2
+#define TPS80031_CFG_INPUT_PUPD4			0xF3
+#define TPS80031_CFG_LDO_PD1				0xF4
+#define TPS80031_CFG_LDO_PD2				0xF5
+#define TPS80031_CFG_SMPS_PD				0xF6
+
+/* Real Time Clock */
+#define TPS80031_SECONDS_REG				0x00
+#define TPS80031_MINUTES_REG				0x01
+#define TPS80031_HOURS_REG				0x02
+#define TPS80031_DAYS_REG				0x03
+#define TPS80031_MONTHS_REG				0x04
+#define TPS80031_YEARS_REG				0x05
+#define TPS80031_WEEKS_REG				0x06
+#define TPS80031_ALARM_SECONDS_REG			0x08
+#define TPS80031_ALARM_MINUTES_REG			0x09
+#define TPS80031_ALARM_HOURS_REG			0x0A
+#define TPS80031_ALARM_DAYS_REG				0x0B
+#define TPS80031_ALARM_MONTHS_REG			0x0C
+#define TPS80031_ALARM_YEARS_REG			0x0D
+#define TPS80031_RTC_CTRL_REG				0x10
+#define TPS80031_RTC_STATUS_REG				0x11
+#define TPS80031_RTC_INTERRUPTS_REG			0x12
+#define TPS80031_RTC_COMP_LSB_REG			0x13
+#define TPS80031_RTC_COMP_MSB_REG			0x14
+#define TPS80031_RTC_RESET_STATUS_REG			0x16
+
+/*PMC Master Module */
+#define TPS80031_PHOENIX_START_CONDITION		0x1F
+#define TPS80031_PHOENIX_MSK_TRANSITION			0x20
+#define TPS80031_STS_HW_CONDITIONS			0x21
+#define TPS80031_PHOENIX_LAST_TURNOFF_STS		0x22
+#define TPS80031_VSYSMIN_LO_THRESHOLD			0x23
+#define TPS80031_VSYSMIN_HI_THRESHOLD			0x24
+#define TPS80031_PHOENIX_DEV_ON				0x25
+#define TPS80031_STS_PWR_GRP_STATE			0x27
+#define TPS80031_PH_CFG_VSYSLOW				0x28
+#define TPS80031_PH_STS_BOOT				0x29
+#define TPS80031_PHOENIX_SENS_TRANSITION		0x2A
+#define TPS80031_PHOENIX_SEQ_CFG			0x2B
+#define TPS80031_PRIMARY_WATCHDOG_CFG			0X2C
+#define TPS80031_KEY_PRESS_DUR_CFG			0X2D
+#define TPS80031_SMPS_LDO_SHORT_STS			0x2E
+
+/* PMC Slave Module - Broadcast */
+#define TPS80031_BROADCAST_ADDR_ALL			0x31
+#define TPS80031_BROADCAST_ADDR_REF			0x32
+#define TPS80031_BROADCAST_ADDR_PROV			0x33
+#define TPS80031_BROADCAST_ADDR_CLK_RST			0x34
+
+/* PMC Slave Module  SMPS Regulators */
+#define TPS80031_SMPS4_CFG_TRANS			0x41
+#define TPS80031_SMPS4_CFG_STATE			0x42
+#define TPS80031_SMPS4_CFG_VOLTAGE			0x44
+#define TPS80031_VIO_CFG_TRANS				0x47
+#define TPS80031_VIO_CFG_STATE				0x48
+#define TPS80031_VIO_CFG_FORCE				0x49
+#define TPS80031_VIO_CFG_VOLTAGE			0x4A
+#define TPS80031_VIO_CFG_STEP				0x48
+#define TPS80031_SMPS1_CFG_TRANS			0x53
+#define TPS80031_SMPS1_CFG_STATE			0x54
+#define TPS80031_SMPS1_CFG_FORCE			0x55
+#define TPS80031_SMPS1_CFG_VOLTAGE			0x56
+#define TPS80031_SMPS1_CFG_STEP				0x57
+#define TPS80031_SMPS2_CFG_TRANS			0x59
+#define TPS80031_SMPS2_CFG_STATE			0x5A
+#define TPS80031_SMPS2_CFG_FORCE			0x5B
+#define TPS80031_SMPS2_CFG_VOLTAGE			0x5C
+#define TPS80031_SMPS2_CFG_STEP				0x5D
+#define TPS80031_SMPS3_CFG_TRANS			0x65
+#define TPS80031_SMPS3_CFG_STATE			0x66
+#define TPS80031_SMPS3_CFG_VOLTAGE			0x68
+
+/* PMC Slave Module  LDO Regulators */
+#define TPS80031_VANA_CFG_TRANS				0x81
+#define TPS80031_VANA_CFG_STATE				0x82
+#define TPS80031_VANA_CFG_VOLTAGE			0x83
+#define TPS80031_LDO2_CFG_TRANS				0x85
+#define TPS80031_LDO2_CFG_STATE				0x86
+#define TPS80031_LDO2_CFG_VOLTAGE			0x87
+#define TPS80031_LDO4_CFG_TRANS				0x89
+#define TPS80031_LDO4_CFG_STATE				0x8A
+#define TPS80031_LDO4_CFG_VOLTAGE			0x8B
+#define TPS80031_LDO3_CFG_TRANS				0x8D
+#define TPS80031_LDO3_CFG_STATE				0x8E
+#define TPS80031_LDO3_CFG_VOLTAGE			0x8F
+#define TPS80031_LDO6_CFG_TRANS				0x91
+#define TPS80031_LDO6_CFG_STATE				0x92
+#define TPS80031_LDO6_CFG_VOLTAGE			0x93
+#define TPS80031_LDOLN_CFG_TRANS			0x95
+#define TPS80031_LDOLN_CFG_STATE			0x96
+#define TPS80031_LDOLN_CFG_VOLTAGE			0x97
+#define TPS80031_LDO5_CFG_TRANS				0x99
+#define TPS80031_LDO5_CFG_STATE				0x9A
+#define TPS80031_LDO5_CFG_VOLTAGE			0x9B
+#define TPS80031_LDO1_CFG_TRANS				0x9D
+#define TPS80031_LDO1_CFG_STATE				0x9E
+#define TPS80031_LDO1_CFG_VOLTAGE			0x9F
+#define TPS80031_LDOUSB_CFG_TRANS			0xA1
+#define TPS80031_LDOUSB_CFG_STATE			0xA2
+#define TPS80031_LDOUSB_CFG_VOLTAGE			0xA3
+#define TPS80031_LDO7_CFG_TRANS				0xA5
+#define TPS80031_LDO7_CFG_STATE				0xA6
+#define TPS80031_LDO7_CFG_VOLTAGE			0xA7
+
+/* PMC Slave Module  External Control */
+#define TPS80031_REGEN1_CFG_TRANS			0xAE
+#define TPS80031_REGEN1_CFG_STATE			0xAF
+#define TPS80031_REGEN2_CFG_TRANS			0xB1
+#define TPS80031_REGEN2_CFG_STATE			0xB2
+#define TPS80031_SYSEN_CFG_TRANS			0xB4
+#define TPS80031_SYSEN_CFG_STATE			0xB5
+
+/* PMC Slave Module  Internal Control */
+#define TPS80031_NRESPWRON_CFG_TRANS			0xB7
+#define TPS80031_NRESPWRON_CFG_STATE			0xB8
+#define TPS80031_CLK32KAO_CFG_TRANS			0xBA
+#define TPS80031_CLK32KAO_CFG_STATE			0xBB
+#define TPS80031_CLK32KG_CFG_TRANS			0xBD
+#define TPS80031_CLK32KG_CFG_STATE			0xBE
+#define TPS80031_CLK32KAUDIO_CFG_TRANS			0xC0
+#define TPS80031_CLK32KAUDIO_CFG_STATE			0xC1
+#define TPS80031_VRTC_CFG_TRANS				0xC3
+#define TPS80031_VRTC_CFG_STATE				0xC4
+#define TPS80031_BIAS_CFG_TRANS				0xC6
+#define TPS80031_BIAS_CFG_STATE				0xC7
+#define TPS80031_VSYSMIN_HI_CFG_TRANS			0xC9
+#define TPS80031_VSYSMIN_HI_CFG_STATE			0xCA
+#define TPS80031_RC6MHZ_CFG_TRANS			0xCC
+#define TPS80031_RC6MHZ_CFG_STATE			0xCD
+#define TPS80031_TMP_CFG_TRANS				0xCF
+#define TPS80031_TMP_CFG_STATE				0xD0
+
+/* PMC Slave Module  resources assignment */
+#define TPS80031_PREQ1_RES_ASS_A			0xD7
+#define TPS80031_PREQ1_RES_ASS_B			0xD8
+#define TPS80031_PREQ1_RES_ASS_C			0xD9
+#define TPS80031_PREQ2_RES_ASS_A			0xDA
+#define TPS80031_PREQ2_RES_ASS_B			0xDB
+#define TPS80031_PREQ2_RES_ASS_C			0xDC
+#define TPS80031_PREQ3_RES_ASS_A			0xDD
+#define TPS80031_PREQ3_RES_ASS_B			0xDE
+#define TPS80031_PREQ3_RES_ASS_C			0xDF
+
+/* PMC Slave Module  Miscellaneous */
+#define TPS80031_SMPS_OFFSET				0xE0
+#define TPS80031_SMPS_MULT				0xE3
+#define TPS80031_MISC1					0xE4
+#define TPS80031_MISC2					0xE5
+#define TPS80031_BBSPOR_CFG				0xE6
+#define TPS80031_TMP_CFG				0xE7
+
+/* Battery Charging Controller and Indicator LED */
+#define TPS80031_CONTROLLER_CTRL2			0xDA
+#define TPS80031_CONTROLLER_VSEL_COMP			0xDB
+#define TPS80031_CHARGERUSB_VSYSREG			0xDC
+#define TPS80031_CHARGERUSB_VICHRG_PC			0xDD
+#define TPS80031_LINEAR_CHRG_STS			0xDE
+#define TPS80031_CONTROLLER_INT_MASK			0xE0
+#define TPS80031_CONTROLLER_CTRL1			0xE1
+#define TPS80031_CONTROLLER_WDG				0xE2
+#define TPS80031_CONTROLLER_STAT1			0xE3
+#define TPS80031_CHARGERUSB_INT_STATUS			0xE4
+#define TPS80031_CHARGERUSB_INT_MASK			0xE5
+#define TPS80031_CHARGERUSB_STATUS_INT1			0xE6
+#define TPS80031_CHARGERUSB_STATUS_INT2			0xE7
+#define TPS80031_CHARGERUSB_CTRL1			0xE8
+#define TPS80031_CHARGERUSB_CTRL2			0xE9
+#define TPS80031_CHARGERUSB_CTRL3			0xEA
+#define TPS80031_CHARGERUSB_STAT1			0xEB
+#define TPS80031_CHARGERUSB_VOREG			0xEC
+#define TPS80031_CHARGERUSB_VICHRG			0xED
+#define TPS80031_CHARGERUSB_CINLIMIT			0xEE
+#define TPS80031_CHARGERUSB_CTRLLIMIT1			0xEF
+#define TPS80031_CHARGERUSB_CTRLLIMIT2			0xF0
+#define TPS80031_LED_PWM_CTRL1				0xF4
+#define TPS80031_LED_PWM_CTRL2				0xF5
+
+/* USB On-The-Go  */
+#define TPS80031_BACKUP_REG				0xFA
+#define TPS80031_USB_VENDOR_ID_LSB			0x00
+#define TPS80031_USB_VENDOR_ID_MSB			0x01
+#define TPS80031_USB_PRODUCT_ID_LSB			0x02
+#define TPS80031_USB_PRODUCT_ID_MSB			0x03
+#define TPS80031_USB_VBUS_CTRL_SET			0x04
+#define TPS80031_USB_VBUS_CTRL_CLR			0x05
+#define TPS80031_USB_ID_CTRL_SET			0x06
+#define TPS80031_USB_ID_CTRL_CLR			0x07
+#define TPS80031_USB_VBUS_INT_SRC			0x08
+#define TPS80031_USB_VBUS_INT_LATCH_SET			0x09
+#define TPS80031_USB_VBUS_INT_LATCH_CLR			0x0A
+#define TPS80031_USB_VBUS_INT_EN_LO_SET			0x0B
+#define TPS80031_USB_VBUS_INT_EN_LO_CLR			0x0C
+#define TPS80031_USB_VBUS_INT_EN_HI_SET			0x0D
+#define TPS80031_USB_VBUS_INT_EN_HI_CLR			0x0E
+#define TPS80031_USB_ID_INT_SRC				0x0F
+#define TPS80031_USB_ID_INT_LATCH_SET			0x10
+#define TPS80031_USB_ID_INT_LATCH_CLR			0x11
+#define TPS80031_USB_ID_INT_EN_LO_SET			0x12
+#define TPS80031_USB_ID_INT_EN_LO_CLR			0x13
+#define TPS80031_USB_ID_INT_EN_HI_SET			0x14
+#define TPS80031_USB_ID_INT_EN_HI_CLR			0x15
+#define TPS80031_USB_OTG_ADP_CTRL			0x16
+#define TPS80031_USB_OTG_ADP_HIGH			0x17
+#define TPS80031_USB_OTG_ADP_LOW			0x18
+#define TPS80031_USB_OTG_ADP_RISE			0x19
+#define TPS80031_USB_OTG_REVISION			0x1A
+
+/* Gas Gauge */
+#define TPS80031_FG_REG_00				0xC0
+#define TPS80031_FG_REG_01				0xC1
+#define TPS80031_FG_REG_02				0xC2
+#define TPS80031_FG_REG_03				0xC3
+#define TPS80031_FG_REG_04				0xC4
+#define TPS80031_FG_REG_05				0xC5
+#define TPS80031_FG_REG_06				0xC6
+#define TPS80031_FG_REG_07				0xC7
+#define TPS80031_FG_REG_08				0xC8
+#define TPS80031_FG_REG_09				0xC9
+#define TPS80031_FG_REG_10				0xCA
+#define TPS80031_FG_REG_11				0xCB
+
+/* General Purpose ADC */
+#define TPS80031_GPADC_CTRL				0x2E
+#define TPS80031_GPADC_CTRL2				0x2F
+#define TPS80031_RTSELECT_LSB				0x32
+#define TPS80031_RTSELECT_ISB				0x33
+#define TPS80031_RTSELECT_MSB				0x34
+#define TPS80031_GPSELECT_ISB				0x35
+#define TPS80031_CTRL_P1				0x36
+#define TPS80031_RTCH0_LSB				0x37
+#define TPS80031_RTCH0_MSB				0x38
+#define TPS80031_RTCH1_LSB				0x39
+#define TPS80031_RTCH1_MSB				0x3A
+#define TPS80031_GPCH0_LSB				0x3B
+#define TPS80031_GPCH0_MSB				0x3C
+
+/* SIM, MMC and Battery Detection */
+#define TPS80031_SIMDEBOUNCING				0xEB
+#define TPS80031_SIMCTRL				0xEC
+#define TPS80031_MMCDEBOUNCING				0xED
+#define TPS80031_MMCCTRL				0xEE
+#define TPS80031_BATDEBOUNCING				0xEF
+
+/* Vibrator Driver and PWMs */
+#define TPS80031_VIBCTRL				0x9B
+#define TPS80031_VIBMODE				0x9C
+#define TPS80031_PWM1ON					0xBA
+#define TPS80031_PWM1OFF				0xBB
+#define TPS80031_PWM2ON					0xBD
+#define TPS80031_PWM2OFF				0xBE
+
+/* Control Interface */
+#define TPS80031_INT_STS_A				0xD0
+#define TPS80031_INT_STS_B				0xD1
+#define TPS80031_INT_STS_C				0xD2
+#define TPS80031_INT_MSK_LINE_A				0xD3
+#define TPS80031_INT_MSK_LINE_B				0xD4
+#define TPS80031_INT_MSK_LINE_C				0xD5
+#define TPS80031_INT_MSK_STS_A				0xD6
+#define TPS80031_INT_MSK_STS_B				0xD7
+#define TPS80031_INT_MSK_STS_C				0xD8
+#define TPS80031_TOGGLE1				0x90
+#define TPS80031_TOGGLE2				0x91
+#define TPS80031_TOGGLE3				0x92
+#define TPS80031_PWDNSTATUS1				0x93
+#define TPS80031_PWDNSTATUS2				0x94
+#define TPS80031_VALIDITY0				0x17
+#define TPS80031_VALIDITY1				0x18
+#define TPS80031_VALIDITY2				0x19
+#define TPS80031_VALIDITY3				0x1A
+#define TPS80031_VALIDITY4				0x1B
+#define TPS80031_VALIDITY5				0x1C
+#define TPS80031_VALIDITY6				0x1D
+#define TPS80031_VALIDITY7				0x1E
+
+/* Version number related register */
+#define TPS80031_JTAGVERNUM				0x87
+#define TPS80031_EPROM_REV				0xDF
+
+/* GPADC Trimming Bits. */
+#define TPS80031_GPADC_TRIM0				0xCC
+#define TPS80031_GPADC_TRIM1				0xCD
+#define TPS80031_GPADC_TRIM2				0xCE
+#define TPS80031_GPADC_TRIM3				0xCF
+#define TPS80031_GPADC_TRIM4				0xD0
+#define TPS80031_GPADC_TRIM5				0xD1
+#define TPS80031_GPADC_TRIM6				0xD2
+#define TPS80031_GPADC_TRIM7				0xD3
+#define TPS80031_GPADC_TRIM8				0xD4
+#define TPS80031_GPADC_TRIM9				0xD5
+#define TPS80031_GPADC_TRIM10				0xD6
+#define TPS80031_GPADC_TRIM11				0xD7
+#define TPS80031_GPADC_TRIM12				0xD8
+#define TPS80031_GPADC_TRIM13				0xD9
+#define TPS80031_GPADC_TRIM14				0xDA
+#define TPS80031_GPADC_TRIM15				0xDB
+#define TPS80031_GPADC_TRIM16				0xDC
+#define TPS80031_GPADC_TRIM17				0xDD
+#define TPS80031_GPADC_TRIM18				0xDE
+
+/* TPS80031_CONTROLLER_STAT1 bit fields */
+#define TPS80031_CONTROLLER_STAT1_BAT_TEMP		0
+#define TPS80031_CONTROLLER_STAT1_BAT_REMOVED		1
+#define TPS80031_CONTROLLER_STAT1_VBUS_DET		2
+#define TPS80031_CONTROLLER_STAT1_VAC_DET		3
+#define TPS80031_CONTROLLER_STAT1_FAULT_WDG		4
+#define TPS80031_CONTROLLER_STAT1_LINCH_GATED		6
+/* TPS80031_CONTROLLER_INT_MASK bit filed */
+#define TPS80031_CONTROLLER_INT_MASK_MVAC_DET		0
+#define TPS80031_CONTROLLER_INT_MASK_MVBUS_DET		1
+#define TPS80031_CONTROLLER_INT_MASK_MBAT_TEMP		2
+#define TPS80031_CONTROLLER_INT_MASK_MFAULT_WDG		3
+#define TPS80031_CONTROLLER_INT_MASK_MBAT_REMOVED	4
+#define TPS80031_CONTROLLER_INT_MASK_MLINCH_GATED	5
+
+#define TPS80031_CHARGE_CONTROL_SUB_INT_MASK		0x3F
+
+/* TPS80031_PHOENIX_DEV_ON bit field */
+#define TPS80031_DEVOFF					0x1
+
+#define TPS80031_EXT_CONTROL_CFG_TRANS			0
+#define TPS80031_EXT_CONTROL_CFG_STATE			1
+
+/* State register field */
+#define TPS80031_STATE_OFF				0x00
+#define TPS80031_STATE_ON				0x01
+#define TPS80031_STATE_MASK				0x03
+
+/* Trans register field */
+#define TPS80031_TRANS_ACTIVE_OFF			0x00
+#define TPS80031_TRANS_ACTIVE_ON			0x01
+#define TPS80031_TRANS_ACTIVE_MASK			0x03
+#define TPS80031_TRANS_SLEEP_OFF			0x00
+#define TPS80031_TRANS_SLEEP_ON				0x04
+#define TPS80031_TRANS_SLEEP_MASK			0x0C
+#define TPS80031_TRANS_OFF_OFF				0x00
+#define TPS80031_TRANS_OFF_ACTIVE			0x10
+#define TPS80031_TRANS_OFF_MASK				0x30
+
+#define TPS80031_EXT_PWR_REQ		(TPS80031_PWR_REQ_INPUT_PREQ1 | \
+					TPS80031_PWR_REQ_INPUT_PREQ2 | \
+					TPS80031_PWR_REQ_INPUT_PREQ3)
+
+/* TPS80031_BBSPOR_CFG bit field */
+#define TPS80031_BBSPOR_CHG_EN				0x8
+#define TPS80031_MAX_REGISTER				0xFF
+
+struct i2c_client;
+
+/* Supported chips */
+enum chips {
+	TPS80031 = 0x00000001,
+	TPS80032 = 0x00000002,
+};
+
+enum {
+	TPS80031_INT_PWRON,
+	TPS80031_INT_RPWRON,
+	TPS80031_INT_SYS_VLOW,
+	TPS80031_INT_RTC_ALARM,
+	TPS80031_INT_RTC_PERIOD,
+	TPS80031_INT_HOT_DIE,
+	TPS80031_INT_VXX_SHORT,
+	TPS80031_INT_SPDURATION,
+	TPS80031_INT_WATCHDOG,
+	TPS80031_INT_BAT,
+	TPS80031_INT_SIM,
+	TPS80031_INT_MMC,
+	TPS80031_INT_RES,
+	TPS80031_INT_GPADC_RT,
+	TPS80031_INT_GPADC_SW2_EOC,
+	TPS80031_INT_CC_AUTOCAL,
+	TPS80031_INT_ID_WKUP,
+	TPS80031_INT_VBUSS_WKUP,
+	TPS80031_INT_ID,
+	TPS80031_INT_VBUS,
+	TPS80031_INT_CHRG_CTRL,
+	TPS80031_INT_EXT_CHRG,
+	TPS80031_INT_INT_CHRG,
+	TPS80031_INT_RES2,
+	TPS80031_INT_BAT_TEMP_OVRANGE,
+	TPS80031_INT_BAT_REMOVED,
+	TPS80031_INT_VBUS_DET,
+	TPS80031_INT_VAC_DET,
+	TPS80031_INT_FAULT_WDG,
+	TPS80031_INT_LINCH_GATED,
+
+	/* Last interrupt id to get the end number */
+	TPS80031_INT_NR,
+};
+
+/* TPS80031 Slave IDs */
+#define TPS80031_NUM_SLAVES				4
+#define TPS80031_SLAVE_ID0				0
+#define TPS80031_SLAVE_ID1				1
+#define TPS80031_SLAVE_ID2				2
+#define TPS80031_SLAVE_ID3				3
+
+/* TPS80031 I2C addresses */
+#define TPS80031_I2C_ID0_ADDR				0x12
+#define TPS80031_I2C_ID1_ADDR				0x48
+#define TPS80031_I2C_ID2_ADDR				0x49
+#define TPS80031_I2C_ID3_ADDR				0x4A
+
+enum {
+	TPS80031_REGULATOR_VIO,
+	TPS80031_REGULATOR_SMPS1,
+	TPS80031_REGULATOR_SMPS2,
+	TPS80031_REGULATOR_SMPS3,
+	TPS80031_REGULATOR_SMPS4,
+	TPS80031_REGULATOR_VANA,
+	TPS80031_REGULATOR_LDO1,
+	TPS80031_REGULATOR_LDO2,
+	TPS80031_REGULATOR_LDO3,
+	TPS80031_REGULATOR_LDO4,
+	TPS80031_REGULATOR_LDO5,
+	TPS80031_REGULATOR_LDO6,
+	TPS80031_REGULATOR_LDO7,
+	TPS80031_REGULATOR_LDOLN,
+	TPS80031_REGULATOR_LDOUSB,
+	TPS80031_REGULATOR_VBUS,
+	TPS80031_REGULATOR_REGEN1,
+	TPS80031_REGULATOR_REGEN2,
+	TPS80031_REGULATOR_SYSEN,
+	TPS80031_REGULATOR_MAX,
+};
+
+/* Different configurations for the rails */
+enum {
+	/* USBLDO input selection */
+	TPS80031_USBLDO_INPUT_VSYS		= 0x00000001,
+	TPS80031_USBLDO_INPUT_PMID		= 0x00000002,
+
+	/* LDO3 output mode */
+	TPS80031_LDO3_OUTPUT_VIB		= 0x00000004,
+
+	/* VBUS configuration */
+	TPS80031_VBUS_DISCHRG_EN_PDN		= 0x00000004,
+	TPS80031_VBUS_SW_ONLY			= 0x00000008,
+	TPS80031_VBUS_SW_N_ID			= 0x00000010,
+};
+
+/* External controls requests */
+enum tps80031_ext_control {
+	TPS80031_PWR_REQ_INPUT_NONE		= 0x00000000,
+	TPS80031_PWR_REQ_INPUT_PREQ1		= 0x00000001,
+	TPS80031_PWR_REQ_INPUT_PREQ2		= 0x00000002,
+	TPS80031_PWR_REQ_INPUT_PREQ3		= 0x00000004,
+	TPS80031_PWR_OFF_ON_SLEEP		= 0x00000008,
+	TPS80031_PWR_ON_ON_SLEEP		= 0x00000010,
+};
+
+enum tps80031_pupd_pins {
+	TPS80031_PREQ1 = 0,
+	TPS80031_PREQ2A,
+	TPS80031_PREQ2B,
+	TPS80031_PREQ2C,
+	TPS80031_PREQ3,
+	TPS80031_NRES_WARM,
+	TPS80031_PWM_FORCE,
+	TPS80031_CHRG_EXT_CHRG_STATZ,
+	TPS80031_SIM,
+	TPS80031_MMC,
+	TPS80031_GPADC_START,
+	TPS80031_DVSI2C_SCL,
+	TPS80031_DVSI2C_SDA,
+	TPS80031_CTLI2C_SCL,
+	TPS80031_CTLI2C_SDA,
+};
+
+enum tps80031_pupd_settings {
+	TPS80031_PUPD_NORMAL,
+	TPS80031_PUPD_PULLDOWN,
+	TPS80031_PUPD_PULLUP,
+};
+
+struct tps80031 {
+	struct device		*dev;
+	unsigned long		chip_info;
+	int			es_version;
+	struct i2c_client	*clients[TPS80031_NUM_SLAVES];
+	struct regmap		*regmap[TPS80031_NUM_SLAVES];
+	struct regmap_irq_chip_data *irq_data;
+};
+
+struct tps80031_pupd_init_data {
+	int input_pin;
+	int setting;
+};
+
+/*
+ * struct tps80031_regulator_platform_data - tps80031 regulator platform data.
+ *
+ * @reg_init_data: The regulator init data.
+ * @ext_ctrl_flag: External control flag for sleep/power request control.
+ * @config_flags: Configuration flag to configure the rails.
+ *		  It should be ORed of config enums.
+ */
+
+struct tps80031_regulator_platform_data {
+	struct regulator_init_data *reg_init_data;
+	unsigned int ext_ctrl_flag;
+	unsigned int config_flags;
+};
+
+struct tps80031_platform_data {
+	int irq_base;
+	bool use_power_off;
+	struct tps80031_pupd_init_data *pupd_init_data;
+	int pupd_init_data_size;
+	struct tps80031_regulator_platform_data
+			*regulator_pdata[TPS80031_REGULATOR_MAX];
+};
+
+static inline int tps80031_write(struct device *dev, int sid,
+		int reg, uint8_t val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_write(tps80031->regmap[sid], reg, val);
+}
+
+static inline int tps80031_writes(struct device *dev, int sid, int reg,
+		int len, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_bulk_write(tps80031->regmap[sid], reg, val, len);
+}
+
+static inline int tps80031_read(struct device *dev, int sid,
+		int reg, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+	unsigned int ival;
+	int ret;
+
+	ret = regmap_read(tps80031->regmap[sid], reg, &ival);
+	if (ret < 0) {
+		dev_err(dev, "failed reading from reg 0x%02x\n", reg);
+		return ret;
+	}
+
+	*val = ival;
+	return ret;
+}
+
+static inline int tps80031_reads(struct device *dev, int sid,
+		int reg, int len, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_bulk_read(tps80031->regmap[sid], reg, val, len);
+}
+
+static inline int tps80031_set_bits(struct device *dev, int sid,
+		int reg, uint8_t bit_mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg,
+				bit_mask, bit_mask);
+}
+
+static inline int tps80031_clr_bits(struct device *dev, int sid,
+		int reg, uint8_t bit_mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg, bit_mask, 0);
+}
+
+static inline int tps80031_update(struct device *dev, int sid,
+		int reg, uint8_t val, uint8_t mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg, mask, val);
+}
+
+static inline unsigned long tps80031_get_chip_info(struct device *dev)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return tps80031->chip_info;
+}
+
+static inline int tps80031_get_pmu_version(struct device *dev)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return tps80031->es_version;
+}
+
+static inline int tps80031_irq_get_virq(struct device *dev, int irq)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_irq_get_virq(tps80031->irq_data, irq);
+}
+
+extern int tps80031_ext_power_req_config(struct device *dev,
+		unsigned long ext_ctrl_flag, int preq_bit,
+		int state_reg_add, int trans_reg_add);
+#endif /*__LINUX_MFD_TPS80031_H */

diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index a8eff4a..94ac944 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h

@@ -207,10 +207,12 @@
 };
 
 struct regmap;
+struct regmap_irq_chips_data;
 
 struct twl6040 {
 	struct device *dev;
 	struct regmap *regmap;
+	struct regmap_irq_chip_data *irq_data;
 	struct regulator_bulk_data supplies[2]; /* supplies for vio, v2v1 */
 	struct mutex mutex;
 	struct mutex irq_mutex;
@@ -228,9 +230,8 @@
 	unsigned int mclk;
 
 	unsigned int irq;
-	unsigned int irq_base;
-	u8 irq_masks_cur;
-	u8 irq_masks_cache;
+	unsigned int irq_ready;
+	unsigned int irq_th;
 };
 
 int twl6040_reg_read(struct twl6040 *twl6040, unsigned int reg);
@@ -245,8 +246,7 @@
 		    unsigned int freq_in, unsigned int freq_out);
 int twl6040_get_pll(struct twl6040 *twl6040);
 unsigned int twl6040_get_sysclk(struct twl6040 *twl6040);
-int twl6040_irq_init(struct twl6040 *twl6040);
-void twl6040_irq_exit(struct twl6040 *twl6040);
+
 /* Get the combined status of the vibra control register */
 int twl6040_get_vibralr_status(struct twl6040 *twl6040);
 

diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
new file mode 100644
index 0000000..1934528
--- /dev/null
+++ b/include/linux/mfd/viperboard.h

@@ -0,0 +1,110 @@
+/*
+ *  include/linux/mfd/viperboard.h
+ *
+ *  Nano River Technologies viperboard definitions
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_VIPERBOARD_H__
+#define __MFD_VIPERBOARD_H__
+
+#include <linux/types.h>
+#include <linux/usb.h>
+
+#define VPRBRD_EP_OUT               0x02
+#define VPRBRD_EP_IN                0x86
+
+#define VPRBRD_I2C_MSG_LEN          512 /* max length of a msg on USB level */
+
+#define VPRBRD_I2C_FREQ_6MHZ        1                        /*   6 MBit/s */
+#define VPRBRD_I2C_FREQ_3MHZ        2                        /*   3 MBit/s */
+#define VPRBRD_I2C_FREQ_1MHZ        3                        /*   1 MBit/s */
+#define VPRBRD_I2C_FREQ_FAST        4                        /* 400 kbit/s */
+#define VPRBRD_I2C_FREQ_400KHZ      VPRBRD_I2C_FREQ_FAST
+#define VPRBRD_I2C_FREQ_200KHZ      5                        /* 200 kbit/s */
+#define VPRBRD_I2C_FREQ_STD         6                        /* 100 kbit/s */
+#define VPRBRD_I2C_FREQ_100KHZ      VPRBRD_I2C_FREQ_STD
+#define VPRBRD_I2C_FREQ_10KHZ       7                        /*  10 kbit/s */
+
+#define VPRBRD_I2C_CMD_WRITE        0x00
+#define VPRBRD_I2C_CMD_READ         0x01
+#define VPRBRD_I2C_CMD_ADDR         0x02
+
+#define VPRBRD_USB_TYPE_OUT	    0x40
+#define VPRBRD_USB_TYPE_IN	    0xc0
+#define VPRBRD_USB_TIMEOUT_MS       100
+#define VPRBRD_USB_REQUEST_I2C_FREQ 0xe6
+#define VPRBRD_USB_REQUEST_I2C      0xe9
+#define VPRBRD_USB_REQUEST_MAJOR    0xea
+#define VPRBRD_USB_REQUEST_MINOR    0xeb
+#define VPRBRD_USB_REQUEST_ADC      0xec
+#define VPRBRD_USB_REQUEST_GPIOA    0xed
+#define VPRBRD_USB_REQUEST_GPIOB    0xdd
+
+struct vprbrd_i2c_write_hdr {
+	u8 cmd;
+	u16 addr;
+	u8 len1;
+	u8 len2;
+	u8 last;
+	u8 chan;
+	u16 spi;
+} __packed;
+
+struct vprbrd_i2c_read_hdr {
+	u8 cmd;
+	u16 addr;
+	u8 len0;
+	u8 len1;
+	u8 len2;
+	u8 len3;
+	u8 len4;
+	u8 len5;
+	u16 tf1;                        /* transfer 1 length */
+	u16 tf2;                        /* transfer 2 length */
+} __packed;
+
+struct vprbrd_i2c_status {
+	u8 unknown[11];
+	u8 status;
+} __packed;
+
+struct vprbrd_i2c_write_msg {
+	struct vprbrd_i2c_write_hdr header;
+	u8 data[VPRBRD_I2C_MSG_LEN
+		- sizeof(struct vprbrd_i2c_write_hdr)];
+} __packed;
+
+struct vprbrd_i2c_read_msg {
+	struct vprbrd_i2c_read_hdr header;
+	u8 data[VPRBRD_I2C_MSG_LEN
+		- sizeof(struct vprbrd_i2c_read_hdr)];
+} __packed;
+
+struct vprbrd_i2c_addr_msg {
+	u8 cmd;
+	u8 addr;
+	u8 unknown1;
+	u16 len;
+	u8 unknown2;
+	u8 unknown3;
+} __packed;
+
+/* Structure to hold all device specific stuff */
+struct vprbrd {
+	struct usb_device *usb_dev; /* the usb device for this device */
+	struct mutex lock;
+	u8 buf[sizeof(struct vprbrd_i2c_write_msg)];
+	struct platform_device pdev;
+};
+
+#endif /* __MFD_VIPERBOARD_H__ */

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 0b5865c6..1e9f627 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h

@@ -23,6 +23,15 @@
 #define MIGRATEPAGE_BALLOON_SUCCESS	1 /* special ret code for balloon page
 					   * sucessful migration case.
 					   */
+enum migrate_reason {
+	MR_COMPACTION,
+	MR_MEMORY_FAILURE,
+	MR_MEMORY_HOTPLUG,
+	MR_SYSCALL,		/* also applies to cpusets */
+	MR_MEMPOLICY_MBIND,
+	MR_NUMA_MISPLACED,
+	MR_CMA
+};
 
 #ifdef CONFIG_MIGRATION
 
@@ -32,7 +41,7 @@
 			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			enum migrate_mode mode);
+			enum migrate_mode mode, int reason);
 extern int migrate_huge_page(struct page *, new_page_t x,
 			unsigned long private, bool offlining,
 			enum migrate_mode mode);
@@ -54,7 +63,7 @@
 static inline void putback_movable_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		enum migrate_mode mode) { return -ENOSYS; }
+		enum migrate_mode mode, int reason) { return -ENOSYS; }
 static inline int migrate_huge_page(struct page *page, new_page_t x,
 		unsigned long private, bool offlining,
 		enum migrate_mode mode) { return -ENOSYS; }
@@ -83,4 +92,37 @@
 #define fail_migrate_page NULL
 
 #endif /* CONFIG_MIGRATION */
+
+#ifdef CONFIG_NUMA_BALANCING
+extern int migrate_misplaced_page(struct page *page, int node);
+extern int migrate_misplaced_page(struct page *page, int node);
+extern bool migrate_ratelimited(int node);
+#else
+static inline int migrate_misplaced_page(struct page *page, int node)
+{
+	return -EAGAIN; /* can't migrate now */
+}
+static inline bool migrate_ratelimited(int node)
+{
+	return false;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node);
+#else
+static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node)
+{
+	return -EAGAIN;
+}
+#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
+
 #endif /* _LINUX_MIGRATE_H */

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4af4f0b..6320407 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -693,6 +693,36 @@
 }
 #endif
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+	return xchg(&page->_last_nid, nid);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+	return page->_last_nid;
+}
+static inline void reset_page_last_nid(struct page *page)
+{
+	page->_last_nid = -1;
+}
+#else
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+	return page_to_nid(page);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+	return page_to_nid(page);
+}
+
+static inline void reset_page_last_nid(struct page *page)
+{
+}
+#endif
+
 static inline struct zone *page_zone(const struct page *page)
 {
 	return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
@@ -977,7 +1007,6 @@
 
 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
-extern int vmtruncate(struct inode *inode, loff_t offset);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
@@ -1078,6 +1107,9 @@
 extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long old_len, unsigned long new_len,
 			       unsigned long flags, unsigned long new_addr);
+extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+			      unsigned long end, pgprot_t newprot,
+			      int dirty_accountable, int prot_numa);
 extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
@@ -1579,6 +1611,11 @@
 }
 #endif
 
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+unsigned long change_prot_numa(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+#endif
+
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
@@ -1600,6 +1637,7 @@
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
+#define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7ade273..f8f5162 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h

@@ -128,10 +128,7 @@
 		};
 
 		struct list_head list;	/* slobs list of pages */
-		struct {		/* slab fields */
-			struct kmem_cache *slab_cache;
-			struct slab *slab_page;
-		};
+		struct slab *slab_page; /* slab fields */
 	};
 
 	/* Remainder is not double word aligned */
@@ -146,7 +143,7 @@
 #if USE_SPLIT_PTLOCKS
 		spinlock_t ptl;
 #endif
-		struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
 		struct page *first_page;	/* Compound tail pages */
 	};
 
@@ -175,6 +172,10 @@
 	 */
 	void *shadow;
 #endif
+
+#ifdef CONFIG_NUMA_BALANCING
+	int _last_nid;
+#endif
 }
 /*
  * The struct page can be forced to be double word aligned so that atomic ops
@@ -411,9 +412,36 @@
 #ifdef CONFIG_CPUMASK_OFFSTACK
 	struct cpumask cpumask_allocation;
 #endif
+#ifdef CONFIG_NUMA_BALANCING
+	/*
+	 * numa_next_scan is the next time when the PTEs will me marked
+	 * pte_numa to gather statistics and migrate pages to new nodes
+	 * if necessary
+	 */
+	unsigned long numa_next_scan;
+
+	/* numa_next_reset is when the PTE scanner period will be reset */
+	unsigned long numa_next_reset;
+
+	/* Restart point for scanning and setting pte_numa */
+	unsigned long numa_scan_offset;
+
+	/* numa_scan_seq prevents two threads setting pte_numa */
+	int numa_scan_seq;
+
+	/*
+	 * The first node a task was scheduled on. If a task runs on
+	 * a different node than Make PTE Scan Go Now.
+	 */
+	int first_nid;
+#endif
 	struct uprobes_state uprobes_state;
 };
 
+/* first nid will either be a valid NID or one of these values */
+#define NUMA_PTE_SCAN_INIT	-1
+#define NUMA_PTE_SCAN_ACTIVE	-2
+
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index cd55dad..4bec5be 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h

@@ -735,6 +735,19 @@
 	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
 	int kswapd_max_order;
 	enum zone_type classzone_idx;
+#ifdef CONFIG_NUMA_BALANCING
+	/*
+	 * Lock serializing the per destination node AutoNUMA memory
+	 * migration rate limiting data.
+	 */
+	spinlock_t numabalancing_migrate_lock;
+
+	/* Rate limiting time interval */
+	unsigned long numabalancing_migrate_next_window;
+
+	/* Number of pages migrated during the rate limiting time interval */
+	unsigned long numabalancing_migrate_nr_pages;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)

diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 5a8e390..12b2ab5 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h

@@ -4,9 +4,10 @@
 
 struct mnt_namespace;
 struct fs_struct;
+struct user_namespace;
 
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
-		struct fs_struct *);
+		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
 
 extern const struct file_operations proc_mounts_operations;

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index d6a5806..137b419 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h

@@ -16,17 +16,15 @@
 /* Chosen so that structs with an unsigned long line up. */
 #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
 
-#define ___module_cat(a,b) __mod_ ## a ## b
-#define __module_cat(a,b) ___module_cat(a,b)
 #ifdef MODULE
 #define __MODULE_INFO(tag, name, info)					  \
-static const char __module_cat(name,__LINE__)[]				  \
+static const char __UNIQUE_ID(name)[]					  \
   __used __attribute__((section(".modinfo"), unused, aligned(1)))	  \
   = __stringify(tag) "=" info
 #else  /* !MODULE */
 /* This struct is here for syntactic coherency, it is not used */
 #define __MODULE_INFO(tag, name, info)					  \
-  struct __module_cat(name,__LINE__) {}
+  struct __UNIQUE_ID(name) {}
 #endif
 #define __MODULE_PARM_TYPE(name, _type)					  \
   __MODULE_INFO(parmtype, name##type, #name ":" _type)

diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index ed270bd..4eb0a50 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h

@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/sysfs.h>
+#include <linux/workqueue.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -43,7 +44,8 @@
 	struct kref ref;
 	struct gendisk *disk;
 	struct attribute_group *disk_attributes;
-	struct task_struct *thread;
+	struct workqueue_struct *wq;
+	struct work_struct work;
 	struct request_queue *rq;
 	spinlock_t queue_lock;
 	void *priv;

diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h
index 0f6fea7..407d1e5 100644
--- a/include/linux/mtd/doc2000.h
+++ b/include/linux/mtd/doc2000.h

@@ -92,12 +92,26 @@
  * Others use readb/writeb
  */
 #if defined(__arm__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u32 *)(((unsigned long)adr)+((reg)<<2))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0)
+static inline u8 ReadDOC_(u32 __iomem *addr, unsigned long reg)
+{
+	return __raw_readl(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u32 __iomem *addr, unsigned long reg)
+{
+	__raw_writel(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x8000
 #elif defined(__ppc__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u16 *)(((unsigned long)adr)+((reg)<<1))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0)
+static inline u8 ReadDOC_(u16 __iomem *addr, unsigned long reg)
+{
+	return __raw_readw(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u16 __iomem *addr, unsigned long reg)
+{
+	__raw_writew(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x4000
 #else
 #define ReadDOC_(adr, reg)      readb((void __iomem *)(adr) + (reg))

diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index b200292..d6ed61e 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h

@@ -155,9 +155,6 @@
 	unsigned int		width;
 	unsigned int		bank;
 
-	/* CLE, ALE offsets */
-	unsigned int		cle_off;
-	unsigned int		ale_off;
 	enum access_mode	mode;
 
 	void			(*select_bank)(uint32_t bank, uint32_t busw);

diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
deleted file mode 100644
index ed3c4e0..0000000
--- a/include/linux/mtd/gpmi-nand.h
+++ /dev/null

@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef __MACH_MXS_GPMI_NAND_H__
-#define __MACH_MXS_GPMI_NAND_H__
-
-/* The size of the resources is fixed. */
-#define GPMI_NAND_RES_SIZE	6
-
-/* Resource names for the GPMI NAND driver. */
-#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
-#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
-#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
-#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
-#define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
-#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
-
-/**
- * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
- *
- * This structure communicates platform-specific information to the GPMI NAND
- * driver that can't be expressed as resources.
- *
- * @platform_init:           A pointer to a function the driver will call to
- *                           initialize the platform (e.g., set up the pin mux).
- * @min_prop_delay_in_ns:    Minimum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_prop_delay_in_ns:    Maximum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_chip_count:          The maximum number of chips for which the driver
- *                           should configure the hardware. This value most
- *                           likely reflects the number of pins that are
- *                           connected to a NAND Flash device. If this is
- *                           greater than the SoC hardware can support, the
- *                           driver will print a message and fail to initialize.
- * @partitions:              An optional pointer to an array of partition
- *                           descriptions.
- * @partition_count:         The number of elements in the partitions array.
- */
-struct gpmi_nand_platform_data {
-	/* SoC hardware information. */
-	int		(*platform_init)(void);
-
-	/* NAND Flash information. */
-	unsigned int	min_prop_delay_in_ns;
-	unsigned int	max_prop_delay_in_ns;
-	unsigned int	max_chip_count;
-
-	/* Medium information. */
-	struct		mtd_partition *partitions;
-	unsigned	partition_count;
-};
-#endif

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 3595a02..f6eb433 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h

@@ -328,7 +328,7 @@
 
 static inline map_word map_word_load(struct map_info *map, const void *ptr)
 {
-	map_word r;
+	map_word r = {{0} };
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = *(unsigned char *)ptr;
@@ -391,7 +391,7 @@
 
 static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 {
-	map_word r;
+	map_word uninitialized_var(r);
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = __raw_readb(map->virt + ofs);

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 81d61e7..f9ac289 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h

@@ -98,7 +98,7 @@
 };
 
 #define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
-#define MTD_MAX_ECCPOS_ENTRIES_LARGE	448
+#define MTD_MAX_ECCPOS_ENTRIES_LARGE	640
 /*
  * Internal ECC layout control structure. For historical reasons, there is a
  * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 24e9159..7ccb3c5 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h

@@ -219,6 +219,13 @@
 #define NAND_OWN_BUFFERS	0x00020000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00040000
+/*
+ * Autodetect nand buswidth with readid/onfi.
+ * This suppose the driver will configure the hardware in 8 bits mode
+ * when calling nand_scan_ident, and update its configuration
+ * before calling nand_scan_tail.
+ */
+#define NAND_BUSWIDTH_AUTO      0x00080000
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
@@ -471,8 +478,8 @@
  *			non 0 if ONFI supported.
  * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is
  *			supported, 0 otherwise.
- * @onfi_set_features	[REPLACEABLE] set the features for ONFI nand
- * @onfi_get_features	[REPLACEABLE] get the features for ONFI nand
+ * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
+ * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
  * @ecclayout:		[REPLACEABLE] the default ECC placement scheme
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash

diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 01e4b15..1c28f88 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h

@@ -20,6 +20,7 @@
 #ifndef __SH_FLCTL_H__
 #define __SH_FLCTL_H__
 
+#include <linux/completion.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
@@ -107,6 +108,7 @@
 #define ESTERINTE	(0x1 << 24)	/* ECC error interrupt enable */
 #define AC1CLR		(0x1 << 19)	/* ECC FIFO clear */
 #define AC0CLR		(0x1 << 18)	/* Data FIFO clear */
+#define DREQ0EN		(0x1 << 16)	/* FLDTFIFODMA Request Enable */
 #define ECERB		(0x1 << 9)	/* ECC error */
 #define STERB		(0x1 << 8)	/* Status error */
 #define STERINTE	(0x1 << 4)	/* Status error enable */
@@ -138,6 +140,8 @@
 	FL_TIMEOUT
 };
 
+struct dma_chan;
+
 struct sh_flctl {
 	struct mtd_info		mtd;
 	struct nand_chip	chip;
@@ -147,7 +151,7 @@
 
 	uint8_t	done_buff[2048 + 64];	/* max size 2048 + 64 */
 	int	read_bytes;
-	int	index;
+	unsigned int index;
 	int	seqin_column;		/* column in SEQIN cmd */
 	int	seqin_page_addr;	/* page_addr in SEQIN cmd */
 	uint32_t seqin_read_cmd;		/* read cmd in SEQIN cmd */
@@ -161,6 +165,11 @@
 	unsigned hwecc:1;	/* Hardware ECC (0 = disabled, 1 = enabled) */
 	unsigned holden:1;	/* Hardware has FLHOLDCR and HOLDEN is set */
 	unsigned qos_request:1;	/* QoS request to prevent deep power shutdown */
+
+	/* DMA related objects */
+	struct dma_chan		*chan_fifo0_rx;
+	struct dma_chan		*chan_fifo0_tx;
+	struct completion	dma_complete;
 };
 
 struct sh_flctl_platform_data {
@@ -170,6 +179,9 @@
 
 	unsigned has_hwecc:1;
 	unsigned use_holden:1;
+
+	unsigned int            slave_id_fifo0_tx;
+	unsigned int            slave_id_fifo0_rx;
 };
 
 static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo)

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4bf19d8..e998c03 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h

@@ -65,8 +65,8 @@
 
 extern int kern_path(const char *, unsigned, struct path *);
 
-extern struct dentry *kern_path_create(int, const char *, struct path *, int);
-extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
+extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
+extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
@@ -98,4 +98,20 @@
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/**
+ * retry_estale - determine whether the caller should retry an operation
+ * @error: the error that would currently be returned
+ * @flags: flags being used for next lookup attempt
+ *
+ * Check to see if the error code was -ESTALE, and then determine whether
+ * to retry the call based on whether "flags" already has LOOKUP_REVAL set.
+ *
+ * Returns true if the caller should try the operation again.
+ */
+static inline bool
+retry_estale(const long error, const unsigned int flags)
+{
+	return error == -ESTALE && !(flags & LOOKUP_REVAL);
+}
+
 #endif /* _LINUX_NAMEI_H */

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a9e76ee..6c6ed15 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h

@@ -198,51 +198,4 @@
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
 #define NFS_CAP_UIDGID_NOMAP	(1U << 15)
 
-
-/* maximum number of slots to use */
-#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
-#define NFS4_MAX_SLOT_TABLE (256U)
-#define NFS4_NO_SLOT ((u32)-1)
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-
-/* Sessions */
-#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
-struct nfs4_slot_table {
-	struct nfs4_slot *slots;		/* seqid per slot */
-	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
-	spinlock_t	slot_tbl_lock;
-	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
-	u32		max_slots;		/* # slots in table */
-	u32		highest_used_slotid;	/* sent to server on each SEQ.
-						 * op for dynamic resizing */
-	u32		target_max_slots;	/* Set by CB_RECALL_SLOT as
-						 * the new max_slots */
-	struct completion complete;
-};
-
-static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
-{
-	return sp - tbl->slots;
-}
-
-/*
- * Session related parameters
- */
-struct nfs4_session {
-	struct nfs4_sessionid		sess_id;
-	u32				flags;
-	unsigned long			session_state;
-	u32				hash_alg;
-	u32				ssv_len;
-
-	/* The fore and back channel */
-	struct nfs4_channel_attrs	fc_attrs;
-	struct nfs4_slot_table		fc_slot_table;
-	struct nfs4_channel_attrs	bc_attrs;
-	struct nfs4_slot_table		bc_slot_table;
-	struct nfs_client		*clp;
-};
-
-#endif /* CONFIG_NFS_V4 */
 #endif

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a73ea89..29adb12 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h

@@ -185,23 +185,20 @@
 	u32			max_reqs;
 };
 
-/* nfs41 sessions slot seqid */
-struct nfs4_slot {
-	u32		 	seq_nr;
-};
-
+struct nfs4_slot;
 struct nfs4_sequence_args {
-	struct nfs4_session	*sa_session;
-	u32			sa_slotid;
-	u8			sa_cache_this;
+	struct nfs4_slot	*sa_slot;
+	u8			sa_cache_this : 1,
+				sa_privileged : 1;
 };
 
 struct nfs4_sequence_res {
-	struct nfs4_session	*sr_session;
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
+	unsigned long		sr_timestamp;
 	int			sr_status;	/* sequence operation status */
-	unsigned long		sr_renewal_time;
 	u32			sr_status_flags;
+	u32			sr_highest_slotid;
+	u32			sr_target_highest_slotid;
 };
 
 struct nfs4_get_lease_time_args {
@@ -209,8 +206,8 @@
 };
 
 struct nfs4_get_lease_time_res {
-	struct nfs_fsinfo	       *lr_fsinfo;
 	struct nfs4_sequence_res	lr_seq_res;
+	struct nfs_fsinfo	       *lr_fsinfo;
 };
 
 #define PNFS_LAYOUT_MAXSIZE 4096
@@ -228,23 +225,23 @@
 };
 
 struct nfs4_layoutget_args {
+	struct nfs4_sequence_args seq_args;
 	__u32 type;
 	struct pnfs_layout_range range;
 	__u64 minlength;
 	__u32 maxcount;
 	struct inode *inode;
 	struct nfs_open_context *ctx;
-	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	struct nfs4_layoutdriver_data layout;
 };
 
 struct nfs4_layoutget_res {
+	struct nfs4_sequence_res seq_res;
 	__u32 return_on_close;
 	struct pnfs_layout_range range;
 	__u32 type;
 	nfs4_stateid stateid;
-	struct nfs4_sequence_res seq_res;
 	struct nfs4_layoutdriver_data *layoutp;
 };
 
@@ -255,38 +252,38 @@
 };
 
 struct nfs4_getdevicelist_args {
+	struct nfs4_sequence_args seq_args;
 	const struct nfs_fh *fh;
 	u32 layoutclass;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_getdevicelist_res {
-	struct pnfs_devicelist *devlist;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_devicelist *devlist;
 };
 
 struct nfs4_getdeviceinfo_args {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_args seq_args;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_getdeviceinfo_res {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_layoutcommit_args {
+	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	__u64 lastbytewritten;
 	struct inode *inode;
 	const u32 *bitmask;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutcommit_res {
+	struct nfs4_sequence_res seq_res;
 	struct nfs_fattr *fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res seq_res;
 	int status;
 };
 
@@ -300,11 +297,11 @@
 };
 
 struct nfs4_layoutreturn_args {
+	struct nfs4_sequence_args seq_args;
 	struct pnfs_layout_hdr *layout;
 	struct inode *inode;
 	nfs4_stateid stateid;
 	__u32   layout_type;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutreturn_res {
@@ -330,6 +327,7 @@
  * Arguments to the open call.
  */
 struct nfs_openargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *	fh;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
@@ -350,10 +348,10 @@
 	const u32 *		bitmask;
 	const u32 *		open_bitmap;
 	__u32			claim;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_openres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fh           fh;
 	struct nfs4_change_info	cinfo;
@@ -368,7 +366,6 @@
 	__u32			attrset[NFS4_BITMAP_SIZE];
 	struct nfs4_string	*owner;
 	struct nfs4_string	*group_owner;
-	struct nfs4_sequence_res	seq_res;
 	__u32			access_request;
 	__u32			access_supported;
 	__u32			access_result;
@@ -392,20 +389,20 @@
  * Arguments to the close call.
  */
 struct nfs_closeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *         fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 	fmode_t			fmode;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_closeres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fattr *	fattr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 /*
  *  * Arguments to the lock,lockt, and locku call.
@@ -417,6 +414,7 @@
 };
 
 struct nfs_lock_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	lock_seqid;
@@ -427,40 +425,39 @@
 	unsigned char		block : 1;
 	unsigned char		reclaim : 1;
 	unsigned char		new_lock_owner : 1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lock_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	lock_seqid;
 	struct nfs_seqid *	open_seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_locku_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	seqid;
 	nfs4_stateid *		stateid;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_locku_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_lockt_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_lowner	lock_owner;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lockt_res {
-	struct file_lock *	denied; /* LOCK, LOCKT failed */
 	struct nfs4_sequence_res	seq_res;
+	struct file_lock *	denied; /* LOCK, LOCKT failed */
 };
 
 struct nfs_release_lockowner_args {
@@ -468,22 +465,23 @@
 };
 
 struct nfs4_delegreturnargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
 	const u32 * bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_delegreturnres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr * fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the read call.
  */
 struct nfs_readargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -491,20 +489,20 @@
 	__u32			count;
 	unsigned int		pgbase;
 	struct page **		pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_readres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	__u32			count;
 	int                     eof;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the write call.
  */
 struct nfs_writeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -514,7 +512,6 @@
 	unsigned int		pgbase;
 	struct page **		pages;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_write_verifier {
@@ -527,65 +524,65 @@
 };
 
 struct nfs_writeres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	struct nfs_writeverf *	verf;
 	__u32			count;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the commit call.
  */
 struct nfs_commitargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh		*fh;
 	__u64			offset;
 	__u32			count;
 	const u32		*bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_commitres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr	*fattr;
 	struct nfs_writeverf	*verf;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Common arguments to the unlink call
  */
 struct nfs_removeargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh	*fh;
 	struct qstr		name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_removeres {
+	struct nfs4_sequence_res 	seq_res;
 	const struct nfs_server *server;
 	struct nfs_fattr	*dir_attr;
 	struct nfs4_change_info	cinfo;
-	struct nfs4_sequence_res 	seq_res;
 };
 
 /*
  * Common arguments to the rename call
  */
 struct nfs_renameargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*old_dir;
 	const struct nfs_fh		*new_dir;
 	const struct qstr		*old_name;
 	const struct qstr		*new_name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_renameres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server		*server;
 	struct nfs4_change_info		old_cinfo;
 	struct nfs_fattr		*old_fattr;
 	struct nfs4_change_info		new_cinfo;
 	struct nfs_fattr		*new_fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -626,20 +623,20 @@
 };
 
 struct nfs_setattrargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *                 fh;
 	nfs4_stateid                    stateid;
 	struct iattr *                  iap;
 	const struct nfs_server *	server; /* Needed for name mapping */
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs_setaclargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_setaclres {
@@ -647,27 +644,27 @@
 };
 
 struct nfs_getaclargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 /* getxattr ACL interface flags */
 #define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
 struct nfs_getaclres {
+	struct nfs4_sequence_res	seq_res;
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
 	struct page *			acl_scratch;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_setattrres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_linkargs {
@@ -832,21 +829,22 @@
 typedef u64 clientid4;
 
 struct nfs4_accessargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
 	u32				access;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_accessres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	u32				supported;
 	u32				access;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_create_arg {
+	struct nfs4_sequence_args 	seq_args;
 	u32				ftype;
 	union {
 		struct {
@@ -863,88 +861,88 @@
 	const struct iattr *		attrs;
 	const struct nfs_fh *		dir_fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_create_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fh *			fh;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		dir_cinfo;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_fsinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fsinfo_res {
-	struct nfs_fsinfo	       *fsinfo;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsinfo	       *fsinfo;
 };
 
 struct nfs4_getattr_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_getattr_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_link_arg {
+	struct nfs4_sequence_args 	seq_args;
 	const struct nfs_fh *		fh;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_link_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		cinfo;
 	struct nfs_fattr *		dir_attr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 
 struct nfs4_lookup_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_lookup_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs_fh *			fh;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_lookup_root_arg {
-	const u32 *			bitmask;
 	struct nfs4_sequence_args	seq_args;
+	const u32 *			bitmask;
 };
 
 struct nfs4_pathconf_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_pathconf_res {
-	struct nfs_pathconf	       *pathconf;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_pathconf	       *pathconf;
 };
 
 struct nfs4_readdir_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	u64				cookie;
 	nfs4_verifier			verifier;
@@ -953,21 +951,20 @@
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
 	int				plus;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readdir_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_verifier			verifier;
 	unsigned int			pgbase;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_readlink {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	unsigned int			pgbase;
 	unsigned int			pglen;   /* zero-copy data */
 	struct page **			pages;   /* zero-copy data */
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readlink_res {
@@ -993,28 +990,28 @@
 };
 
 struct nfs4_statfs_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_statfs_res {
-	struct nfs_fsstat	       *fsstat;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsstat	       *fsstat;
 };
 
 struct nfs4_server_caps_arg {
-	struct nfs_fh		       *fhandle;
 	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh		       *fhandle;
 };
 
 struct nfs4_server_caps_res {
+	struct nfs4_sequence_res	seq_res;
 	u32				attr_bitmask[3];
 	u32				acl_bitmask;
 	u32				has_links;
 	u32				has_symlinks;
 	u32				fh_expire_type;
-	struct nfs4_sequence_res	seq_res;
 };
 
 #define NFS4_PATHNAME_MAXCOMPONENTS 512
@@ -1040,16 +1037,16 @@
 };
 
 struct nfs4_fs_locations_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *dir_fh;
 	const struct qstr *name;
 	struct page *page;
 	const u32 *bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fs_locations_res {
-	struct nfs4_fs_locations       *fs_locations;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_fs_locations       *fs_locations;
 };
 
 struct nfs4_secinfo_oid {
@@ -1074,14 +1071,14 @@
 };
 
 struct nfs4_secinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*dir_fh;
 	const struct qstr		*name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_secinfo_res {
-	struct nfs4_secinfo_flavors	*flavors;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_secinfo_flavors	*flavors;
 };
 
 #endif /* CONFIG_NFS_V4 */
@@ -1161,9 +1158,9 @@
 };
 
 struct nfs41_reclaim_complete_args {
+	struct nfs4_sequence_args	seq_args;
 	/* In the future extend to include curr_fh for use with migration */
 	unsigned char			one_fs:1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs41_reclaim_complete_res {
@@ -1173,28 +1170,28 @@
 #define SECINFO_STYLE_CURRENT_FH 0
 #define SECINFO_STYLE_PARENT 1
 struct nfs41_secinfo_no_name_args {
-	int				style;
 	struct nfs4_sequence_args	seq_args;
+	int				style;
 };
 
 struct nfs41_test_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_test_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 struct nfs41_free_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_free_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 #else

diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cc37a55..10e5947 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h

@@ -67,7 +67,7 @@
 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
-	struct fs_struct *);
+	struct cred *, struct fs_struct *);
 int __init nsproxy_cache_init(void);
 
 static inline void put_nsproxy(struct nsproxy *ns)

diff --git a/include/linux/of.h b/include/linux/of.h
index 6cfea9a..5ebcc5c 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h

@@ -22,6 +22,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/spinlock.h>
 #include <linux/topology.h>
+#include <linux/notifier.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -282,16 +283,28 @@
 
 extern int of_machine_is_compatible(const char *compat);
 
-extern int prom_add_property(struct device_node* np, struct property* prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
-				struct property *newprop);
+extern int of_add_property(struct device_node *np, struct property *prop);
+extern int of_remove_property(struct device_node *np, struct property *prop);
+extern int of_update_property(struct device_node *np, struct property *newprop);
 
-#if defined(CONFIG_OF_DYNAMIC)
 /* For updating the device tree at runtime */
-extern void of_attach_node(struct device_node *);
-extern void of_detach_node(struct device_node *);
-#endif
+#define OF_RECONFIG_ATTACH_NODE		0x0001
+#define OF_RECONFIG_DETACH_NODE		0x0002
+#define OF_RECONFIG_ADD_PROPERTY	0x0003
+#define OF_RECONFIG_REMOVE_PROPERTY	0x0004
+#define OF_RECONFIG_UPDATE_PROPERTY	0x0005
+
+struct of_prop_reconfig {
+	struct device_node	*dn;
+	struct property		*prop;
+};
+
+extern int of_reconfig_notifier_register(struct notifier_block *);
+extern int of_reconfig_notifier_unregister(struct notifier_block *);
+extern int of_reconfig_notify(unsigned long, void *);
+
+extern int of_attach_node(struct device_node *);
+extern int of_detach_node(struct device_node *);
 
 #define of_match_ptr(_ptr)	(_ptr)
 

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index b47d204..3863a4d 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h

@@ -100,6 +100,7 @@
 
 #if !defined(CONFIG_OF_ADDRESS)
 struct of_dev_auxdata;
+struct device;
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
 					const struct of_dev_auxdata *lookup,

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index bd1e860..3e88c9a 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h

@@ -1,83 +1,34 @@
 #ifndef _LINUX_PERCPU_RWSEM_H
 #define _LINUX_PERCPU_RWSEM_H
 
-#include <linux/mutex.h>
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/rcupdate.h>
-#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
-	unsigned __percpu *counters;
-	bool locked;
-	struct mutex mtx;
+	unsigned int __percpu	*fast_read_ctr;
+	atomic_t		write_ctr;
+	struct rw_semaphore	rw_sem;
+	atomic_t		slow_read_ctr;
+	wait_queue_head_t	write_waitq;
 };
 
-#define light_mb()	barrier()
-#define heavy_mb()	synchronize_sched_expedited()
+extern void percpu_down_read(struct percpu_rw_semaphore *);
+extern void percpu_up_read(struct percpu_rw_semaphore *);
 
-static inline void percpu_down_read(struct percpu_rw_semaphore *p)
-{
-	rcu_read_lock_sched();
-	if (unlikely(p->locked)) {
-		rcu_read_unlock_sched();
-		mutex_lock(&p->mtx);
-		this_cpu_inc(*p->counters);
-		mutex_unlock(&p->mtx);
-		return;
-	}
-	this_cpu_inc(*p->counters);
-	rcu_read_unlock_sched();
-	light_mb(); /* A, between read of p->locked and read of data, paired with D */
-}
+extern void percpu_down_write(struct percpu_rw_semaphore *);
+extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-static inline void percpu_up_read(struct percpu_rw_semaphore *p)
-{
-	light_mb(); /* B, between read of the data and write to p->counter, paired with C */
-	this_cpu_dec(*p->counters);
-}
+extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
+				const char *, struct lock_class_key *);
+extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
-static inline unsigned __percpu_count(unsigned __percpu *counters)
-{
-	unsigned total = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
-
-	return total;
-}
-
-static inline void percpu_down_write(struct percpu_rw_semaphore *p)
-{
-	mutex_lock(&p->mtx);
-	p->locked = true;
-	synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */
-	while (__percpu_count(p->counters))
-		msleep(1);
-	heavy_mb(); /* C, between read of p->counter and write to data, paired with B */
-}
-
-static inline void percpu_up_write(struct percpu_rw_semaphore *p)
-{
-	heavy_mb(); /* D, between write to data and write to p->locked, paired with A */
-	p->locked = false;
-	mutex_unlock(&p->mtx);
-}
-
-static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
-{
-	p->counters = alloc_percpu(unsigned);
-	if (unlikely(!p->counters))
-		return -ENOMEM;
-	p->locked = false;
-	mutex_init(&p->mtx);
-	return 0;
-}
-
-static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
-{
-	free_percpu(p->counters);
-	p->counters = NULL; /* catch use after free bugs */
-}
+#define percpu_init_rwsem(brw)	\
+({								\
+	static struct lock_class_key rwsem_key;			\
+	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
+})
 
 #endif

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 65e3e87..bf28599 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h

@@ -21,6 +21,7 @@
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
+	int nr_hashed;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
@@ -31,9 +32,12 @@
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
 #endif
+	struct user_namespace *user_ns;
+	struct work_struct proc_work;
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
+	unsigned int proc_inum;
 };
 
 extern struct pid_namespace init_pid_ns;
@@ -46,7 +50,8 @@
 	return ns;
 }
 
-extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
+extern struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *ns);
 extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
 extern void put_pid_ns(struct pid_namespace *ns);
@@ -59,8 +64,8 @@
 	return ns;
 }
 
-static inline struct pid_namespace *
-copy_pid_ns(unsigned long flags, struct pid_namespace *ns)
+static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *ns)
 {
 	if (flags & CLONE_NEWPID)
 		ns = ERR_PTR(-EINVAL);

diff --git a/include/linux/platform_data/i2c-cbus-gpio.h b/include/linux/platform_data/i2c-cbus-gpio.h
new file mode 100644
index 0000000..6faa992
--- /dev/null
+++ b/include/linux/platform_data/i2c-cbus-gpio.h

@@ -0,0 +1,27 @@
+/*
+ * i2c-cbus-gpio.h - CBUS I2C platform_data definition
+ *
+ * Copyright (C) 2004-2009 Nokia Corporation
+ *
+ * Written by Felipe Balbi and Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+#define __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+
+struct i2c_cbus_platform_data {
+	int dat_gpio;
+	int clk_gpio;
+	int sel_gpio;
+};
+
+#endif /* __INCLUDE_LINUX_I2C_CBUS_GPIO_H */

diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index c677b9f..5b429c4 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h

@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/platform_device.h>
+
 #define MMU_REG_SIZE		256
 
 /**
@@ -42,8 +44,11 @@
 
 struct iommu_platform_data {
 	const char *name;
-	const char *clk_name;
-	const int nr_tlb_entries;
+	const char *reset_name;
+	int nr_tlb_entries;
 	u32 da_start;
 	u32 da_end;
+
+	int (*assert_reset)(struct platform_device *pdev, const char *name);
+	int (*deassert_reset)(struct platform_device *pdev, const char *name);
 };

diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h
index 761f317..e81f62d 100644
--- a/include/linux/platform_data/lp855x.h
+++ b/include/linux/platform_data/lp855x.h

@@ -89,11 +89,6 @@
 	LP8556_COMBINED2,	/* pwm + i2c after the shaper block */
 };
 
-struct lp855x_pwm_data {
-	void (*pwm_set_intensity) (int brightness, int max_brightness);
-	int (*pwm_get_intensity) (int max_brightness);
-};
-
 struct lp855x_rom_data {
 	u8 addr;
 	u8 val;
@@ -105,7 +100,7 @@
  * @mode : brightness control by pwm or lp855x register
  * @device_control : value of DEVICE CONTROL register
  * @initial_brightness : initial value of backlight brightness
- * @pwm_data : platform specific pwm generation functions.
+ * @period_ns : platform specific pwm period value. unit is nano.
 		Only valid when mode is PWM_BASED.
  * @load_new_rom_data :
 	0 : use default configuration data
@@ -118,7 +113,7 @@
 	enum lp855x_brightness_ctrl_mode mode;
 	u8 device_control;
 	int initial_brightness;
-	struct lp855x_pwm_data pwm_data;
+	unsigned int period_ns;
 	u8 load_new_rom_data;
 	int size_program;
 	struct lp855x_rom_data *rom_data;

diff --git a/include/linux/platform_data/mtd-nomadik-nand.h b/include/linux/platform_data/mtd-nomadik-nand.h
deleted file mode 100644
index c3c8254..0000000
--- a/include/linux/platform_data/mtd-nomadik-nand.h
+++ /dev/null

@@ -1,16 +0,0 @@
-#ifndef __ASM_ARCH_NAND_H
-#define __ASM_ARCH_NAND_H
-
-struct nomadik_nand_platform_data {
-	struct mtd_partition *parts;
-	int nparts;
-	int options;
-	int (*init) (void);
-	int (*exit) (void);
-};
-
-#define NAND_IO_DATA	0x40000000
-#define NAND_IO_CMD	0x40800000
-#define NAND_IO_ADDR	0x41000000
-
-#endif				/* __ASM_ARCH_NAND_H */

diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/include/linux/platform_data/serial-omap.h
similarity index 100%
rename from arch/arm/plat-omap/include/plat/omap-serial.h
rename to include/linux/platform_data/serial-omap.h


diff --git a/include/linux/platform_data/ti_am335x_adc.h b/include/linux/platform_data/ti_am335x_adc.h
new file mode 100644
index 0000000..e41d583
--- /dev/null
+++ b/include/linux/platform_data/ti_am335x_adc.h

@@ -0,0 +1,14 @@
+#ifndef __LINUX_TI_AM335X_ADC_H
+#define __LINUX_TI_AM335X_ADC_H
+
+/**
+ * struct adc_data	ADC Input information
+ * @adc_channels:	Number of analog inputs
+ *			available for ADC.
+ */
+
+struct adc_data {
+	unsigned int adc_channels;
+};
+
+#endif

diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h
index 8570bcf..ef65b67 100644
--- a/include/linux/platform_data/usb-omap.h
+++ b/include/linux/platform_data/usb-omap.h

@@ -59,6 +59,9 @@
 
 	struct ehci_hcd_omap_platform_data	*ehci_data;
 	struct ohci_hcd_omap_platform_data	*ohci_data;
+
+	/* OMAP3 <= ES2.1 have a single ulpi bypass control bit */
+	unsigned				single_ulpi_bypass:1;
 };
 
 /*-------------------------------------------------------------------------*/

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e87..32676b3 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h

@@ -28,7 +28,11 @@
  */
 
 enum {
-	PROC_ROOT_INO = 1,
+	PROC_ROOT_INO		= 1,
+	PROC_IPC_INIT_INO	= 0xEFFFFFFFU,
+	PROC_UTS_INIT_INO	= 0xEFFFFFFEU,
+	PROC_USER_INIT_INO	= 0xEFFFFFFDU,
+	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
 };
 
 /*
@@ -174,7 +178,10 @@
 	struct proc_dir_entry *parent);
 
 extern struct file *proc_ns_fget(int fd);
+extern bool proc_ns_inode(struct inode *inode);
 
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
 #else
 
 #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -229,6 +236,19 @@
 	return ERR_PTR(-EINVAL);
 }
 
+static inline bool proc_ns_inode(struct inode *inode)
+{
+	return false;
+}
+
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+	*inum = 1;
+	return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
@@ -247,10 +267,14 @@
 	void *(*get)(struct task_struct *task);
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
+	unsigned int (*inum)(void *ns);
 };
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations mntns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
@@ -290,4 +314,7 @@
 	return pde->parent->data;
 }
 
+#include <linux/signal.h>
+
+void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set);
 #endif /* _LINUX_PROC_FS_H */

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a89ff04..1693775 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h

@@ -32,6 +32,8 @@
 #define PT_TRACE_EXIT		PT_EVENT_FLAG(PTRACE_EVENT_EXIT)
 #define PT_TRACE_SECCOMP	PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
 
+#define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+
 /* single stepping state bits (used on ARM and PA-RISC) */
 #define PT_SINGLESTEP_BIT	31
 #define PT_SINGLESTEP		(1<<PT_SINGLESTEP_BIT)
@@ -342,6 +344,10 @@
 #define signal_pt_regs() task_pt_regs(current)
 #endif
 
+#ifndef current_user_stack_pointer
+#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 112b314..6d661f3 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h

@@ -171,6 +171,9 @@
 					 unsigned int index,
 					 const char *label);
 
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+		const struct of_phandle_args *args);
+
 struct pwm_device *pwm_get(struct device *dev, const char *consumer);
 void pwm_put(struct pwm_device *pwm);
 

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 640c69c..8dfaa2c 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h

@@ -98,6 +98,9 @@
 extern const struct raid6_calls raid6_altivec2;
 extern const struct raid6_calls raid6_altivec4;
 extern const struct raid6_calls raid6_altivec8;
+extern const struct raid6_calls raid6_avx2x1;
+extern const struct raid6_calls raid6_avx2x2;
+extern const struct raid6_calls raid6_avx2x4;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
@@ -109,6 +112,7 @@
 
 extern const struct raid6_recov_calls raid6_recov_intx1;
 extern const struct raid6_recov_calls raid6_recov_ssse3;
+extern const struct raid6_recov_calls raid6_recov_avx2;
 
 /* Algorithm list */
 extern const struct raid6_calls * const raid6_algos[];

diff --git a/include/linux/random.h b/include/linux/random.h
index 6330ed4..d984608 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h

@@ -25,10 +25,19 @@
 unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
-u32 random32(void);
-void srandom32(u32 seed);
+u32 prandom_u32(void);
+void prandom_bytes(void *buf, int nbytes);
+void prandom_seed(u32 seed);
 
-u32 prandom32(struct rnd_state *);
+/*
+ * These macros are preserved for backward compatibility and should be
+ * removed as soon as a transition is finished.
+ */
+#define random32() prandom_u32()
+#define srandom32(seed) prandom_seed(seed)
+
+u32 prandom_u32_state(struct rnd_state *);
+void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
 
 /*
  * Handle minimum values for seeds
@@ -39,11 +48,11 @@
 }
 
 /**
- * prandom32_seed - set seed for prandom32().
+ * prandom_seed_state - set seed for prandom_u32_state().
  * @state: pointer to state structure to receive the seed.
  * @seed: arbitrary 64-bit value to use as a seed.
  */
-static inline void prandom32_seed(struct rnd_state *state, u64 seed)
+static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
 	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
 

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6f54e40..5ae8456 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h

@@ -125,14 +125,16 @@
  *
  * these calls check for usage underflow and show a warning on the console
  * _locked call expects the counter->lock to be taken
+ *
+ * returns the total charges still present in @counter.
  */
 
-void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
-void res_counter_uncharge_until(struct res_counter *counter,
-				struct res_counter *top,
-				unsigned long val);
+u64 res_counter_uncharge_until(struct res_counter *counter,
+			       struct res_counter *top,
+			       unsigned long val);
 /**
  * res_counter_margin - calculate chargeable space of a counter
  * @cnt: the counter

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bfe1f47..c20635c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h

@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/memcontrol.h>
 
 /*
@@ -25,8 +25,8 @@
  * pointing to this anon_vma once its vma list is empty.
  */
 struct anon_vma {
-	struct anon_vma *root;	/* Root of this anon_vma tree */
-	struct mutex mutex;	/* Serialize access to vma list */
+	struct anon_vma *root;		/* Root of this anon_vma tree */
+	struct rw_semaphore rwsem;	/* W: modification, R: walking the list */
 	/*
 	 * The refcount is taken on an anon_vma when there is no
 	 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@
 	struct vm_area_struct *vma;
 	struct anon_vma *anon_vma;
 	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
-	struct rb_node rb;			/* locked by anon_vma->mutex */
+	struct rb_node rb;			/* locked by anon_vma->rwsem */
 	unsigned long rb_subtree_last;
 #ifdef CONFIG_DEBUG_VM_RB
 	unsigned long cached_vma_start, cached_vma_last;
@@ -108,26 +108,37 @@
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		mutex_lock(&anon_vma->root->mutex);
+		down_write(&anon_vma->root->rwsem);
 }
 
 static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		mutex_unlock(&anon_vma->root->mutex);
+		up_write(&anon_vma->root->rwsem);
 }
 
-static inline void anon_vma_lock(struct anon_vma *anon_vma)
+static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
-	mutex_lock(&anon_vma->root->mutex);
+	down_write(&anon_vma->root->rwsem);
 }
 
 static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 {
-	mutex_unlock(&anon_vma->root->mutex);
+	up_write(&anon_vma->root->rwsem);
 }
 
+static inline void anon_vma_lock_read(struct anon_vma *anon_vma)
+{
+	down_read(&anon_vma->root->rwsem);
+}
+
+static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
+{
+	up_read(&anon_vma->root->rwsem);
+}
+
+
 /*
  * anon_vma helper functions.
  */
@@ -220,8 +231,8 @@
 /*
  * Called by memory-failure.c to kill processes.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page);
-void page_unlock_anon_vma(struct anon_vma *anon_vma);
+struct anon_vma *page_lock_anon_vma_read(struct page *page);
+void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
 /*

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c2f307..206bb08 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -1527,6 +1527,14 @@
 	short il_next;
 	short pref_node_fork;
 #endif
+#ifdef CONFIG_NUMA_BALANCING
+	int numa_scan_seq;
+	int numa_migrate_seq;
+	unsigned int numa_scan_period;
+	u64 node_stamp;			/* migration stamp  */
+	struct callback_head numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
+
 	struct rcu_head rcu;
 
 	/*
@@ -1589,6 +1597,7 @@
 		unsigned long nr_pages;	/* uncharged usage */
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
+	unsigned int memcg_kmem_skip_account;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	atomic_t ptrace_bp_refcnt;
@@ -1601,6 +1610,18 @@
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int node, int pages, bool migrated);
+extern void set_numabalancing_state(bool enabled);
+#else
+static inline void task_numa_fault(int node, int pages, bool migrated)
+{
+}
+static inline void set_numabalancing_state(bool enabled)
+{
+}
+#endif
+
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -1758,12 +1779,6 @@
 	return tsk->pid == 1;
 }
 
-/*
- * is_container_init:
- * check whether in the task is init in its own pid namespace.
- */
-extern int is_container_init(struct task_struct *tsk);
-
 extern struct pid *cad_pid;
 
 extern void free_task(struct task_struct *tsk);
@@ -2030,6 +2045,13 @@
 };
 extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
+extern unsigned int sysctl_numa_balancing_scan_size;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
 #ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
@@ -2331,9 +2353,7 @@
 		     const char __user * const __user *);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-#endif
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);

diff --git a/include/linux/security.h b/include/linux/security.h
index 05e88bd..0f6afc6 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h

@@ -694,6 +694,12 @@
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
+ * @kernel_module_from_file:
+ *	Load a kernel module from userspace.
+ *	@file contains the file structure pointing to the file containing
+ *	the kernel module to load. If the module is being loaded from a blob,
+ *	this argument will be NULL.
+ *	Return 0 if permission is granted.
  * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
  *	identity attributes of the current process.  The @flags parameter
@@ -1508,6 +1514,7 @@
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_module_from_file)(struct file *file);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
 	int (*task_setpgid) (struct task_struct *p, pid_t pgid);
@@ -1765,6 +1772,7 @@
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
+int security_kernel_module_from_file(struct file *file);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
@@ -2278,6 +2286,11 @@
 	return 0;
 }
 
+static inline int security_kernel_module_from_file(struct file *file)
+{
+	return 0;
+}
+
 static inline int security_task_fix_setuid(struct cred *new,
 					   const struct cred *old,
 					   int flags)

diff --git a/include/linux/signal.h b/include/linux/signal.h
index e19a011..0a89ffc 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h

@@ -385,4 +385,7 @@
 
 void signals_init(void);
 
+int restore_altstack(const stack_t __user *);
+int __save_altstack(stack_t __user *, unsigned long);
+
 #endif /* _LINUX_SIGNAL_H */

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 83d1a14..5d168d7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h

@@ -11,6 +11,8 @@
 
 #include <linux/gfp.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
+
 
 /*
  * Flags to pass to kmem_cache_create().
@@ -116,6 +118,7 @@
 };
 #endif
 
+struct mem_cgroup;
 /*
  * struct kmem_cache related prototypes
  */
@@ -125,10 +128,12 @@
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
 			void (*)(void *));
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
+			unsigned long, void (*)(void *), struct kmem_cache *);
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
-unsigned int kmem_cache_size(struct kmem_cache *);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
@@ -176,6 +181,48 @@
 #ifndef ARCH_SLAB_MINALIGN
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * struct kmem_cache will hold a pointer to it, so the memory cost while
+ * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
+ * would otherwise be if that would be bundled in kmem_cache: we'll need an
+ * extra pointer chase. But the trade off clearly lays in favor of not
+ * penalizing non-users.
+ *
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system.
+ *
+ * Child caches will hold extra metadata needed for its operation. Fields are:
+ *
+ * @memcg: pointer to the memcg this cache belongs to
+ * @list: list_head for the list of all caches in this memcg
+ * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @dead: set to true after the memcg dies; the cache may still be around.
+ * @nr_pages: number of pages that belongs to this cache.
+ * @destroy: worker to be called whenever we are ready, or believe we may be
+ *           ready, to destroy this cache.
+ */
+struct memcg_cache_params {
+	bool is_root_cache;
+	union {
+		struct kmem_cache *memcg_caches[0];
+		struct {
+			struct mem_cgroup *memcg;
+			struct list_head list;
+			struct kmem_cache *root_cache;
+			bool dead;
+			atomic_t nr_pages;
+			struct work_struct destroy;
+		};
+	};
+};
+
+int memcg_update_all_caches(int num_memcgs);
+
+struct seq_file;
+int cache_show(struct kmem_cache *s, struct seq_file *m);
+void print_slabinfo_header(struct seq_file *m);
 
 /*
  * Common kmalloc functions provided by all allocators
@@ -388,6 +435,14 @@
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+/*
+ * Determine the size of a slab object
+ */
+static inline unsigned int kmem_cache_size(struct kmem_cache *s)
+{
+	return s->object_size;
+}
+
 void __init kmem_cache_init_late(void);
 
 #endif	/* _LINUX_SLAB_H */

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index cc290f0..8bb6e0e 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h

@@ -81,6 +81,9 @@
 	 */
 	int obj_offset;
 #endif /* CONFIG_DEBUG_SLAB */
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+#endif
 
 /* 6) per-cpu/per-node data, touched during every alloc/free */
 	/*
@@ -89,9 +92,13 @@
 	 * (see kmem_cache_init())
 	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
 	 * is statically defined, so we reserve the max number of cpus.
+	 *
+	 * We also need to guarantee that the list is able to accomodate a
+	 * pointer for each node since "nodelists" uses the remainder of
+	 * available pointers.
 	 */
 	struct kmem_list3 **nodelists;
-	struct array_cache *array[NR_CPUS];
+	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
 	/*
 	 * Do not add fields after array[]
 	 */

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index df448ad..9db4825 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h

@@ -101,6 +101,10 @@
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+	int max_attr_size; /* for propagation, maximum size of a stored attr */
+#endif
 
 #ifdef CONFIG_NUMA
 	/*
@@ -222,7 +226,10 @@
 static __always_inline void *
 kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 {
-	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+	void *ret;
+
+	flags |= (__GFP_COMP | __GFP_KMEMCG);
+	ret = (void *) __get_free_pages(flags, order);
 	kmemleak_alloc(ret, size, 1, flags);
 	return ret;
 }

diff --git a/include/linux/string.h b/include/linux/string.h
index 63012581..ac889c5 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h

@@ -143,4 +143,15 @@
 
 extern size_t memweight(const void *ptr, size_t bytes);
 
+/**
+ * kbasename - return the last part of a pathname.
+ *
+ * @path: path to extract the filename from.
+ */
+static inline const char *kbasename(const char *path)
+{
+	const char *tail = strrchr(path, '/');
+	return tail ? tail + 1 : path;
+}
+
 #endif /* _LINUX_STRING_H_ */

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index f792794..5dc9ee4 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h

@@ -217,6 +217,8 @@
 static inline int get_int(char **bpp, int *anint)
 {
 	char buf[50];
+	char *ep;
+	int rv;
 	int len = qword_get(bpp, buf, sizeof(buf));
 
 	if (len < 0)
@@ -224,9 +226,11 @@
 	if (len == 0)
 		return -ENOENT;
 
-	if (kstrtoint(buf, 0, anint))
+	rv = simple_strtol(buf, &ep, 0);
+	if (*ep)
 		return -EINVAL;
 
+	*anint = rv;
 	return 0;
 }
 

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index dc0c3cc..b64f8eb 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h

@@ -192,7 +192,6 @@
 	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
-	unsigned char		count;			/* # task groups remaining serviced so far */
 	unsigned char		nr;			/* # tasks remaining for cookie */
 	unsigned short		qlen;			/* total # tasks waiting in queue */
 	struct rpc_timer	timer_list;

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d83db80..676ddf5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h

@@ -243,6 +243,7 @@
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	int			rq_resused;	/* number of pages used for result */
+	struct page *		*rq_next_page; /* next reply page to use */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
@@ -338,9 +339,8 @@
 
 static inline void svc_free_res_pages(struct svc_rqst *rqstp)
 {
-	while (rqstp->rq_resused) {
-		struct page **pp = (rqstp->rq_respages +
-				    --rqstp->rq_resused);
+	while (rqstp->rq_next_page != rqstp->rq_respages) {
+		struct page **pp = --rqstp->rq_next_page;
 		if (*pp) {
 			put_page(*pp);
 			*pp = NULL;

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 92ad02f..62fd1b7 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h

@@ -26,11 +26,28 @@
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	u32			sk_reclen;	/* length of record */
-	u32			sk_tcplen;	/* current read length */
+	/* On-the-wire fragment header: */
+	__be32			sk_reclen;
+	/* As we receive a record, this includes the length received so
+	 * far (including the fragment header): */
+	u32			sk_tcplen;
+	/* Total length of the data (not including fragment headers)
+	 * received so far in the fragments making up this rpc: */
+	u32			sk_datalen;
+
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
+static inline u32 svc_sock_reclen(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK;
+}
+
+static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT;
+}
+
 /*
  * Function prototypes.
  */

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 8d08b3e..071d62c 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h

@@ -34,21 +34,25 @@
 	SYNC_FOR_CPU = 0,
 	SYNC_FOR_DEVICE = 1,
 };
-extern void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
-				    phys_addr_t phys, size_t size,
-				    enum dma_data_direction dir);
 
-extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
+/* define the last possible byte of physical address space as a mapping error */
+#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0)
+
+extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+					  dma_addr_t tbl_dma_addr,
+					  phys_addr_t phys, size_t size,
+					  enum dma_data_direction dir);
+
+extern void swiotlb_tbl_unmap_single(struct device *hwdev,
+				     phys_addr_t tlb_addr,
 				     size_t size, enum dma_data_direction dir);
 
-extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
+extern void swiotlb_tbl_sync_single(struct device *hwdev,
+				    phys_addr_t tlb_addr,
 				    size_t size, enum dma_data_direction dir,
 				    enum dma_sync_target target);
 
 /* Accessory functions. */
-extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-			   enum dma_data_direction dir);
-
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91835e7..45e2db2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h

@@ -63,6 +63,7 @@
 struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
+struct sigaltstack;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -299,6 +300,11 @@
 asmlinkage long sys_sigpending(old_sigset_t __user *set);
 asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
 				old_sigset_t __user *oset);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss,
+				struct sigaltstack __user *uoss);
+#endif
+
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
 asmlinkage long sys_setitimer(int which,
 				struct itimerval __user *value,
@@ -560,10 +566,10 @@
 asmlinkage long sys_utimes(char __user *filename,
 				struct timeval __user *utimes);
 asmlinkage long sys_lseek(unsigned int fd, off_t offset,
-			  unsigned int origin);
+			  unsigned int whence);
 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
 			unsigned long offset_low, loff_t __user *result,
-			unsigned int origin);
+			unsigned int whence);
 asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
 asmlinkage long sys_readahead(int fd, loff_t offset, size_t count);
 asmlinkage long sys_readv(unsigned long fd,
@@ -827,15 +833,6 @@
 				  const char  __user *pathname);
 asmlinkage long sys_syncfs(int fd);
 
-#ifndef CONFIG_GENERIC_KERNEL_EXECVE
-int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
-#else
-#define kernel_execve(filename, argv, envp) \
-	do_execve(filename, \
-		(const char __user *const __user *)argv, \
-		(const char __user *const __user *)envp)
-#endif
-
 asmlinkage long sys_fork(void);
 asmlinkage long sys_vfork(void);
 #ifdef CONFIG_CLONE_BACKWARDS
@@ -880,4 +877,5 @@
 
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
+asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
 #endif

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ccc1899..e7e0473 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h

@@ -61,6 +61,8 @@
 # define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK)
 #endif
 
+#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
+
 /*
  * flag set/clear/test wrappers
  * - pass TIF_xxxx constants to these functions

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 9bbeabf..bd45eb7 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h

@@ -69,6 +69,7 @@
 #		define EVENT_DEV_ASLEEP 6
 #		define EVENT_DEV_OPEN	7
 #		define EVENT_DEVICE_REPORT_IDLE	8
+#		define EVENT_NO_RUNTIME_PM	9
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
@@ -240,4 +241,6 @@
 extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 extern int usbnet_nway_reset(struct net_device *net);
 
+extern int usbnet_manage_power(struct usbnet *, int);
+
 #endif /* __LINUX_USB_USBNET_H */

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 95142ca..b9bd2e6 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h

@@ -25,6 +25,7 @@
 	struct user_namespace	*parent;
 	kuid_t			owner;
 	kgid_t			group;
+	unsigned int		proc_inum;
 };
 
 extern struct user_namespace init_user_ns;
@@ -39,6 +40,7 @@
 }
 
 extern int create_user_ns(struct cred *new);
+extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
 extern void free_user_ns(struct kref *kref);
 
 static inline void put_user_ns(struct user_namespace *ns)
@@ -66,6 +68,14 @@
 	return -EINVAL;
 }
 
+static inline int unshare_userns(unsigned long unshare_flags,
+				 struct cred **new_cred)
+{
+	if (unshare_flags & CLONE_NEWUSER)
+		return -EINVAL;
+	return 0;
+}
+
 static inline void put_user_ns(struct user_namespace *ns)
 {
 }

diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b34520..239e277 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h

@@ -23,6 +23,7 @@
 	struct kref kref;
 	struct new_utsname name;
 	struct user_namespace *user_ns;
+	unsigned int proc_inum;
 };
 extern struct uts_namespace init_uts_ns;
 
@@ -33,7 +34,7 @@
 }
 
 extern struct uts_namespace *copy_utsname(unsigned long flags,
-					  struct task_struct *tsk);
+	struct user_namespace *user_ns, struct uts_namespace *old_ns);
 extern void free_uts_ns(struct kref *kref);
 
 static inline void put_uts_ns(struct uts_namespace *ns)
@@ -50,12 +51,12 @@
 }
 
 static inline struct uts_namespace *copy_utsname(unsigned long flags,
-						 struct task_struct *tsk)
+	struct user_namespace *user_ns, struct uts_namespace *old_ns)
 {
 	if (flags & CLONE_NEWUTS)
 		return ERR_PTR(-EINVAL);
 
-	return tsk->nsproxy->uts_ns;
+	return old_ns;
 }
 #endif
 

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 533b115..cf8adb1 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h

@@ -16,12 +16,20 @@
  * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @priv: a pointer for the virtqueue implementation to use.
+ * @index: the zero-based ordinal number for this queue.
+ * @num_free: number of elements we expect to be able to fit.
+ *
+ * A note on @num_free: with indirect buffers, each buffer needs one
+ * element in the queue, otherwise a buffer will need one element per
+ * sg element.
  */
 struct virtqueue {
 	struct list_head list;
 	void (*callback)(struct virtqueue *vq);
 	const char *name;
 	struct virtio_device *vdev;
+	unsigned int index;
+	unsigned int num_free;
 	void *priv;
 };
 
@@ -50,7 +58,11 @@
 
 unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
-int virtqueue_get_queue_index(struct virtqueue *vq);
+/* FIXME: Obsolete accessor, but required for virtio_net merge. */
+static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq)
+{
+	return vq->index;
+}
 
 /**
  * virtio_device - representation of a device using virtio
@@ -73,7 +85,11 @@
 	void *priv;
 };
 
-#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
+static inline struct virtio_device *dev_to_virtio(struct device *_dev)
+{
+	return container_of(_dev, struct virtio_device, dev);
+}
+
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
@@ -103,6 +119,11 @@
 #endif
 };
 
+static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
+{
+	return container_of(drv, struct virtio_driver, driver);
+}
+
 int register_virtio_driver(struct virtio_driver *drv);
 void unregister_virtio_driver(struct virtio_driver *drv);
 #endif /* _LINUX_VIRTIO_H */

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index d6b4440..4195b97 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h

@@ -1,7 +1,31 @@
+/*
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
 #ifndef _LINUX_VIRTIO_SCSI_H
 #define _LINUX_VIRTIO_SCSI_H
-/* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
 
 #define VIRTIO_SCSI_CDB_SIZE   32
 #define VIRTIO_SCSI_SENSE_SIZE 96

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index fe786f0..fce0a27 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h

@@ -38,8 +38,18 @@
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
 		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_NUMA_BALANCING
+		NUMA_PTE_UPDATES,
+		NUMA_HINT_FAULTS,
+		NUMA_HINT_FAULTS_LOCAL,
+		NUMA_PAGE_MIGRATE,
+#endif
+#ifdef CONFIG_MIGRATION
+		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
+#endif
 #ifdef CONFIG_COMPACTION
-		COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
+		COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
+		COMPACTISOLATED,
 		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
 #endif
 #ifdef CONFIG_HUGETLB_PAGE

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 92a86b2..a13291f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h

@@ -80,6 +80,14 @@
 
 #endif /* CONFIG_VM_EVENT_COUNTERS */
 
+#ifdef CONFIG_NUMA_BALANCING
+#define count_vm_numa_event(x)     count_vm_event(x)
+#define count_vm_numa_events(x, y) count_vm_events(x, y)
+#else
+#define count_vm_numa_event(x) do {} while (0)
+#define count_vm_numa_events(x, y) do {} while (0)
+#endif /* CONFIG_NUMA_BALANCING */
+
 #define __count_zone_vm_events(item, zone, delta) \
 		__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
 		zone_idx(zone), delta)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 168dfe1..7cb64d4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h

@@ -550,6 +550,170 @@
 	__ret;								\
 })
 
+
+#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_lock_irq_cmd - sleep until a condition gets true. The
+ *			     condition is checked under the lock. This
+ *			     is expected to be called with the lock
+ *			     taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd
+ *	  and schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ */
+#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+} while (0)
+
+/**
+ * wait_event_lock_irq - sleep until a condition gets true. The
+ *			 condition is checked under the lock. This
+ *			 is expected to be called with the lock
+ *			 taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ */
+#define wait_event_lock_irq(wq, condition, lock)			\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, );			\
+} while (0)
+
+
+#define __wait_event_interruptible_lock_irq(wq, condition,		\
+					    lock, ret, cmd)		\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected to
+ *		be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd and
+ *	  schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, cmd);	\
+	__ret;								\
+})
+
+/**
+ * wait_event_interruptible_lock_irq - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected
+ *		to be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, );	\
+	__ret;								\
+})
+
+
 /*
  * These are the old interfaces to sleep waiting for an event.
  * They are racy.  DO NOT use them, use the wait_event* interfaces above.

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ba1d361..1832927 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h

@@ -318,6 +318,7 @@
 				       const unsigned long max_rto);
 
 extern void inet_csk_destroy_sock(struct sock *sk);
+extern void inet_csk_prepare_forced_close(struct sock *sk);
 
 /*
  * LISTEN is a special case for poll..

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 7af1ea8..23b3a7c 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h

@@ -78,6 +78,13 @@
 	__be32			retrans_timer;
 };
 
+struct rd_msg {
+	struct icmp6hdr icmph;
+	struct in6_addr	target;
+	struct in6_addr	dest;
+	__u8		opt[0];
+};
+
 struct nd_opt_hdr {
 	__u8		nd_opt_type;
 	__u8		nd_opt_len;

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c5a43f5..de644bc 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h

@@ -56,6 +56,8 @@
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 
+	unsigned int		proc_inum;
+
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
 

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 628db7b..3953cea 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h

@@ -242,7 +242,6 @@
 	unsigned int symmetric_rates:1;
 	struct snd_pcm_runtime *runtime;
 	unsigned int active;
-	unsigned char pop_wait:1;
 	unsigned char probed:1;
 
 	struct snd_soc_dapm_widget *playback_widget;

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 91244a0..769e27c 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h

@@ -1039,6 +1039,7 @@
 	struct snd_soc_dpcm_runtime dpcm[2];
 
 	long pmdown_time;
+	unsigned char pop_wait:1;
 
 	/* runtime devices */
 	struct snd_pcm *pcm;

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 54fab04..ea546a4 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h

@@ -45,7 +45,8 @@
 
 #define show_root_type(obj)						\
 	obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) ||		\
-	      (obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-"
+	      (obj >= BTRFS_ROOT_TREE_OBJECTID &&			\
+	       obj <= BTRFS_CSUM_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
 
 #define BTRFS_GROUP_FLAGS	\
 	{ BTRFS_BLOCK_GROUP_DATA,	"DATA"}, \

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d49b285..f6372b0 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h

@@ -15,6 +15,7 @@
 struct mpage_da_data;
 struct ext4_map_blocks;
 struct ext4_extent;
+struct extent_status;
 
 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
@@ -1519,10 +1520,9 @@
 );
 
 DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
-	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
-		 ext4_fsblk_t pblk, unsigned int len, int ret),
+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int ret),
 
-	TP_ARGS(inode, lblk, pblk, len, ret),
+	TP_ARGS(inode, map, ret),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,		dev		)
@@ -1530,37 +1530,37 @@
 		__field(	ext4_fsblk_t,	pblk		)
 		__field(	ext4_lblk_t,	lblk		)
 		__field(	unsigned int,	len		)
+		__field(	unsigned int,	flags		)
 		__field(	int,		ret		)
 	),
 
 	TP_fast_assign(
 		__entry->dev    = inode->i_sb->s_dev;
 		__entry->ino    = inode->i_ino;
-		__entry->pblk	= pblk;
-		__entry->lblk	= lblk;
-		__entry->len	= len;
+		__entry->pblk	= map->m_pblk;
+		__entry->lblk	= map->m_lblk;
+		__entry->len	= map->m_len;
+		__entry->flags	= map->m_flags;
 		__entry->ret	= ret;
 	),
 
-	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d",
+	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u flags %x ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->lblk, __entry->pblk,
-		  __entry->len, __entry->ret)
+		  __entry->len, __entry->flags, __entry->ret)
 );
 
 DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit,
-	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
-		 ext4_fsblk_t pblk, unsigned len, int ret),
+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int ret),
 
-	TP_ARGS(inode, lblk, pblk, len, ret)
+	TP_ARGS(inode, map, ret)
 );
 
 DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit,
-	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
-		 ext4_fsblk_t pblk, unsigned len, int ret),
+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int ret),
 
-	TP_ARGS(inode, lblk, pblk, len, ret)
+	TP_ARGS(inode, map, ret)
 );
 
 TRACE_EVENT(ext4_ext_load_extent,
@@ -1680,10 +1680,10 @@
 );
 
 TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
-	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int flags,
 		 unsigned int allocated, ext4_fsblk_t newblock),
 
-	TP_ARGS(inode, map, allocated, newblock),
+	TP_ARGS(inode, map, flags, allocated, newblock),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,		dev		)
@@ -1699,7 +1699,7 @@
 	TP_fast_assign(
 		__entry->dev		= inode->i_sb->s_dev;
 		__entry->ino		= inode->i_ino;
-		__entry->flags		= map->m_flags;
+		__entry->flags		= flags;
 		__entry->lblk		= map->m_lblk;
 		__entry->pblk		= map->m_pblk;
 		__entry->len		= map->m_len;
@@ -1707,7 +1707,7 @@
 		__entry->newblk		= newblock;
 	),
 
-	TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %d"
+	TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %x "
 		  "allocated %d newblock %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
@@ -2055,6 +2055,106 @@
 		  (unsigned short) __entry->eh_entries)
 );
 
+TRACE_EVENT(ext4_es_insert_extent,
+	TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t len),
+
+	TP_ARGS(inode, start, len),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	start			)
+		__field(	loff_t, len			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->start	= start;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu es [%lld/%lld)",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->start, __entry->len)
+);
+
+TRACE_EVENT(ext4_es_remove_extent,
+	TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t len),
+
+	TP_ARGS(inode, start, len),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	start			)
+		__field(	loff_t,	len			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->start	= start;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu es [%lld/%lld)",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->start, __entry->len)
+);
+
+TRACE_EVENT(ext4_es_find_extent_enter,
+	TP_PROTO(struct inode *inode, ext4_lblk_t start),
+
+	TP_ARGS(inode, start),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	ino_t,		ino		)
+		__field(	ext4_lblk_t,	start		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->start	= start;
+	),
+
+	TP_printk("dev %d,%d ino %lu start %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino, __entry->start)
+);
+
+TRACE_EVENT(ext4_es_find_extent_exit,
+	TP_PROTO(struct inode *inode, struct extent_status *es,
+		 ext4_lblk_t ret),
+
+	TP_ARGS(inode, es, ret),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	ino_t,		ino		)
+		__field(	ext4_lblk_t,	start		)
+		__field(	ext4_lblk_t,	len		)
+		__field(	ext4_lblk_t,	ret		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->start	= es->start;
+		__entry->len	= es->len;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->start, __entry->len, __entry->ret)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */

diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5..1eddbf1 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h

@@ -34,6 +34,7 @@
 	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
 	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_KMEMCG,		"GFP_KMEMCG"},		\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
 	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\

diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
new file mode 100644
index 0000000..ec2a6cc
--- /dev/null
+++ b/include/trace/events/migrate.h

@@ -0,0 +1,51 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM migrate
+
+#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MIGRATE_H
+
+#define MIGRATE_MODE						\
+	{MIGRATE_ASYNC,		"MIGRATE_ASYNC"},		\
+	{MIGRATE_SYNC_LIGHT,	"MIGRATE_SYNC_LIGHT"},		\
+	{MIGRATE_SYNC,		"MIGRATE_SYNC"}		
+
+#define MIGRATE_REASON						\
+	{MR_COMPACTION,		"compaction"},			\
+	{MR_MEMORY_FAILURE,	"memory_failure"},		\
+	{MR_MEMORY_HOTPLUG,	"memory_hotplug"},		\
+	{MR_SYSCALL,		"syscall_or_cpuset"},		\
+	{MR_MEMPOLICY_MBIND,	"mempolicy_mbind"},		\
+	{MR_CMA,		"cma"}
+
+TRACE_EVENT(mm_migrate_pages,
+
+	TP_PROTO(unsigned long succeeded, unsigned long failed,
+		 enum migrate_mode mode, int reason),
+
+	TP_ARGS(succeeded, failed, mode, reason),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,		succeeded)
+		__field(	unsigned long,		failed)
+		__field(	enum migrate_mode,	mode)
+		__field(	int,			reason)
+	),
+
+	TP_fast_assign(
+		__entry->succeeded	= succeeded;
+		__entry->failed		= failed;
+		__entry->mode		= mode;
+		__entry->reason		= reason;
+	),
+
+	TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s",
+		__entry->succeeded,
+		__entry->failed,
+		__print_symbolic(__entry->mode, MIGRATE_MODE),
+		__print_symbolic(__entry->reason, MIGRATE_REASON))
+);
+
+#endif /* _TRACE_MIGRATE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h
index 0a78028..6fae30f 100644
--- a/include/uapi/asm-generic/signal.h
+++ b/include/uapi/asm-generic/signal.h

@@ -80,12 +80,6 @@
  *	SA_RESTORER	0x04000000
  */
 
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 6e595ba..2c531f4 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h

@@ -690,9 +690,11 @@
           compat_sys_process_vm_writev)
 #define __NR_kcmp 272
 __SYSCALL(__NR_kcmp, sys_kcmp)
+#define __NR_finit_module 273
+__SYSCALL(__NR_finit_module, sys_finit_module)
 
 #undef __NR_syscalls
-#define __NR_syscalls 273
+#define __NR_syscalls 274
 
 /*
  * All syscalls below here should go away really,

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 1e3481e..8d1e2bb 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h

@@ -778,6 +778,7 @@
 #define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3
 #define DRM_CAP_DUMB_PREFER_SHADOW 0x4
 #define DRM_CAP_PRIME 0x5
+#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6
 
 #define DRM_PRIME_CAP_IMPORT 0x1
 #define DRM_PRIME_CAP_EXPORT 0x2

diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index c0494d5..e7f52c3 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h

@@ -133,17 +133,26 @@
 	__u32	data;
 };
 
+enum drm_exynos_g2d_buf_type {
+	G2D_BUF_USERPTR = 1 << 31,
+};
+
 enum drm_exynos_g2d_event_type {
 	G2D_EVENT_NOT,
 	G2D_EVENT_NONSTOP,
 	G2D_EVENT_STOP,		/* not yet */
 };
 
+struct drm_exynos_g2d_userptr {
+	unsigned long userptr;
+	unsigned long size;
+};
+
 struct drm_exynos_g2d_set_cmdlist {
 	__u64					cmd;
-	__u64					cmd_gem;
+	__u64					cmd_buf;
 	__u32					cmd_nr;
-	__u32					cmd_gem_nr;
+	__u32					cmd_buf_nr;
 
 	/* for g2d event */
 	__u64					event_type;
@@ -154,6 +163,170 @@
 	__u64					async;
 };
 
+enum drm_exynos_ops_id {
+	EXYNOS_DRM_OPS_SRC,
+	EXYNOS_DRM_OPS_DST,
+	EXYNOS_DRM_OPS_MAX,
+};
+
+struct drm_exynos_sz {
+	__u32	hsize;
+	__u32	vsize;
+};
+
+struct drm_exynos_pos {
+	__u32	x;
+	__u32	y;
+	__u32	w;
+	__u32	h;
+};
+
+enum drm_exynos_flip {
+	EXYNOS_DRM_FLIP_NONE = (0 << 0),
+	EXYNOS_DRM_FLIP_VERTICAL = (1 << 0),
+	EXYNOS_DRM_FLIP_HORIZONTAL = (1 << 1),
+};
+
+enum drm_exynos_degree {
+	EXYNOS_DRM_DEGREE_0,
+	EXYNOS_DRM_DEGREE_90,
+	EXYNOS_DRM_DEGREE_180,
+	EXYNOS_DRM_DEGREE_270,
+};
+
+enum drm_exynos_planer {
+	EXYNOS_DRM_PLANAR_Y,
+	EXYNOS_DRM_PLANAR_CB,
+	EXYNOS_DRM_PLANAR_CR,
+	EXYNOS_DRM_PLANAR_MAX,
+};
+
+/**
+ * A structure for ipp supported property list.
+ *
+ * @version: version of this structure.
+ * @ipp_id: id of ipp driver.
+ * @count: count of ipp driver.
+ * @writeback: flag of writeback supporting.
+ * @flip: flag of flip supporting.
+ * @degree: flag of degree information.
+ * @csc: flag of csc supporting.
+ * @crop: flag of crop supporting.
+ * @scale: flag of scale supporting.
+ * @refresh_min: min hz of refresh.
+ * @refresh_max: max hz of refresh.
+ * @crop_min: crop min resolution.
+ * @crop_max: crop max resolution.
+ * @scale_min: scale min resolution.
+ * @scale_max: scale max resolution.
+ */
+struct drm_exynos_ipp_prop_list {
+	__u32	version;
+	__u32	ipp_id;
+	__u32	count;
+	__u32	writeback;
+	__u32	flip;
+	__u32	degree;
+	__u32	csc;
+	__u32	crop;
+	__u32	scale;
+	__u32	refresh_min;
+	__u32	refresh_max;
+	__u32	reserved;
+	struct drm_exynos_sz	crop_min;
+	struct drm_exynos_sz	crop_max;
+	struct drm_exynos_sz	scale_min;
+	struct drm_exynos_sz	scale_max;
+};
+
+/**
+ * A structure for ipp config.
+ *
+ * @ops_id: property of operation directions.
+ * @flip: property of mirror, flip.
+ * @degree: property of rotation degree.
+ * @fmt: property of image format.
+ * @sz: property of image size.
+ * @pos: property of image position(src-cropped,dst-scaler).
+ */
+struct drm_exynos_ipp_config {
+	enum drm_exynos_ops_id ops_id;
+	enum drm_exynos_flip	flip;
+	enum drm_exynos_degree	degree;
+	__u32	fmt;
+	struct drm_exynos_sz	sz;
+	struct drm_exynos_pos	pos;
+};
+
+enum drm_exynos_ipp_cmd {
+	IPP_CMD_NONE,
+	IPP_CMD_M2M,
+	IPP_CMD_WB,
+	IPP_CMD_OUTPUT,
+	IPP_CMD_MAX,
+};
+
+/**
+ * A structure for ipp property.
+ *
+ * @config: source, destination config.
+ * @cmd: definition of command.
+ * @ipp_id: id of ipp driver.
+ * @prop_id: id of property.
+ * @refresh_rate: refresh rate.
+ */
+struct drm_exynos_ipp_property {
+	struct drm_exynos_ipp_config config[EXYNOS_DRM_OPS_MAX];
+	enum drm_exynos_ipp_cmd	cmd;
+	__u32	ipp_id;
+	__u32	prop_id;
+	__u32	refresh_rate;
+};
+
+enum drm_exynos_ipp_buf_type {
+	IPP_BUF_ENQUEUE,
+	IPP_BUF_DEQUEUE,
+};
+
+/**
+ * A structure for ipp buffer operations.
+ *
+ * @ops_id: operation directions.
+ * @buf_type: definition of buffer.
+ * @prop_id: id of property.
+ * @buf_id: id of buffer.
+ * @handle: Y, Cb, Cr each planar handle.
+ * @user_data: user data.
+ */
+struct drm_exynos_ipp_queue_buf {
+	enum drm_exynos_ops_id	ops_id;
+	enum drm_exynos_ipp_buf_type	buf_type;
+	__u32	prop_id;
+	__u32	buf_id;
+	__u32	handle[EXYNOS_DRM_PLANAR_MAX];
+	__u32	reserved;
+	__u64	user_data;
+};
+
+enum drm_exynos_ipp_ctrl {
+	IPP_CTRL_PLAY,
+	IPP_CTRL_STOP,
+	IPP_CTRL_PAUSE,
+	IPP_CTRL_RESUME,
+	IPP_CTRL_MAX,
+};
+
+/**
+ * A structure for ipp start/stop operations.
+ *
+ * @prop_id: id of property.
+ * @ctrl: definition of control.
+ */
+struct drm_exynos_ipp_cmd_ctrl {
+	__u32	prop_id;
+	enum drm_exynos_ipp_ctrl	ctrl;
+};
+
 #define DRM_EXYNOS_GEM_CREATE		0x00
 #define DRM_EXYNOS_GEM_MAP_OFFSET	0x01
 #define DRM_EXYNOS_GEM_MMAP		0x02
@@ -166,6 +339,12 @@
 #define DRM_EXYNOS_G2D_SET_CMDLIST	0x21
 #define DRM_EXYNOS_G2D_EXEC		0x22
 
+/* IPP - Image Post Processing */
+#define DRM_EXYNOS_IPP_GET_PROPERTY	0x30
+#define DRM_EXYNOS_IPP_SET_PROPERTY	0x31
+#define DRM_EXYNOS_IPP_QUEUE_BUF	0x32
+#define DRM_EXYNOS_IPP_CMD_CTRL	0x33
+
 #define DRM_IOCTL_EXYNOS_GEM_CREATE		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create)
 
@@ -188,8 +367,18 @@
 #define DRM_IOCTL_EXYNOS_G2D_EXEC		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec)
 
+#define DRM_IOCTL_EXYNOS_IPP_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_GET_PROPERTY, struct drm_exynos_ipp_prop_list)
+#define DRM_IOCTL_EXYNOS_IPP_SET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_SET_PROPERTY, struct drm_exynos_ipp_property)
+#define DRM_IOCTL_EXYNOS_IPP_QUEUE_BUF	DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_QUEUE_BUF, struct drm_exynos_ipp_queue_buf)
+#define DRM_IOCTL_EXYNOS_IPP_CMD_CTRL		DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_CMD_CTRL, struct drm_exynos_ipp_cmd_ctrl)
+
 /* EXYNOS specific events */
 #define DRM_EXYNOS_G2D_EVENT		0x80000000
+#define DRM_EXYNOS_IPP_EVENT		0x80000001
 
 struct drm_exynos_g2d_event {
 	struct drm_event	base;
@@ -200,4 +389,14 @@
 	__u32			reserved;
 };
 
+struct drm_exynos_ipp_event {
+	struct drm_event	base;
+	__u64			user_data;
+	__u32			tv_sec;
+	__u32			tv_usec;
+	__u32			prop_id;
+	__u32			reserved;
+	__u32			buf_id[EXYNOS_DRM_OPS_MAX];
+};
+
 #endif /* _UAPI_EXYNOS_DRM_H_ */

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4322b1e..b746a3c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h

@@ -306,6 +306,7 @@
 #define I915_PARAM_HAS_SEMAPHORES	 20
 #define I915_PARAM_HAS_PRIME_VMAP_FLUSH	 21
 #define I915_PARAM_RSVD_FOR_FUTURE_USE	 22
+#define I915_PARAM_HAS_SECURE_BATCHES	 23
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -671,6 +672,11 @@
 /** Resets the SO write offset registers for transform feedback on gen7. */
 #define I915_EXEC_GEN7_SOL_RESET	(1<<8)
 
+/** Request a privileged ("secure") batch buffer. Note only available for
+ * DRM_ROOT_ONLY | DRM_MASTER processes.
+ */
+#define I915_EXEC_SECURE		(1<<9)
+
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
 	(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK

diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 4766c0f..eeda917 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h

@@ -913,9 +913,11 @@
 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
 #define RADEON_CS_USE_VM            0x02
+#define RADEON_CS_END_OF_FRAME      0x04 /* a hint from userspace which CS is the last one */
 /* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
 #define RADEON_CS_RING_GFX          0
 #define RADEON_CS_RING_COMPUTE      1
+#define RADEON_CS_RING_DMA          2
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
 /* 0 = normal, + = higher priority, - = lower priority */
 
@@ -966,6 +968,10 @@
 #define RADEON_INFO_MAX_PIPES		0x10
 /* timestamp for GL_ARB_timer_query (OpenGL), returns the current GPU clock */
 #define RADEON_INFO_TIMESTAMP		0x11
+/* max shader engines (SE) - needed for geometry shaders, etc. */
+#define RADEON_INFO_MAX_SE		0x12
+/* max SH per SE */
+#define RADEON_INFO_MAX_SH_PER_SE	0x13
 
 struct drm_radeon_info {
 	uint32_t		request;

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index afbb18a..5db2975 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h

@@ -163,6 +163,9 @@
 
 struct br_mdb_entry {
 	__u32 ifindex;
+#define MDB_TEMPORARY 0
+#define MDB_PERMANENT 1
+	__u8 state;
 	struct {
 		union {
 			__be32	ip4;

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 12f68c7..873e086 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h

@@ -23,6 +23,7 @@
 #define EXT4_SUPER_MAGIC	0xEF53
 #define BTRFS_SUPER_MAGIC	0x9123683E
 #define NILFS_SUPER_MAGIC	0x3434
+#define F2FS_SUPER_MAGIC	0xF2F52010
 #define HPFS_SUPER_MAGIC	0xf995e849
 #define ISOFS_SUPER_MAGIC	0x9660
 #define JFFS2_SUPER_MAGIC	0x72b6

diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 23e62e0..0d11c3d 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h

@@ -20,6 +20,7 @@
 	MPOL_PREFERRED,
 	MPOL_BIND,
 	MPOL_INTERLEAVE,
+	MPOL_LOCAL,
 	MPOL_MAX,	/* always last member of enum */
 };
 
@@ -47,9 +48,15 @@
 
 /* Flags for mbind */
 #define MPOL_MF_STRICT	(1<<0)	/* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE	(1<<1)	/* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to mapping */
-#define MPOL_MF_INTERNAL (1<<3)	/* Internal flags start here */
+#define MPOL_MF_MOVE	 (1<<1)	/* Move pages owned by this process to conform
+				   to policy */
+#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to policy */
+#define MPOL_MF_LAZY	 (1<<3)	/* Modifies '_MOVE:  lazy migrate on fault */
+#define MPOL_MF_INTERNAL (1<<4)	/* Internal flags start here */
+
+#define MPOL_MF_VALID	(MPOL_MF_STRICT   | 	\
+			 MPOL_MF_MOVE     | 	\
+			 MPOL_MF_MOVE_ALL)
 
 /*
  * Internal flags that share the struct mempolicy flags word with
@@ -59,6 +66,8 @@
 #define MPOL_F_SHARED  (1 << 0)	/* identify shared policies */
 #define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
 #define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
+#define MPOL_F_MOF	(1 << 3) /* this policy wants migrate on fault */
+#define MPOL_F_MORON	(1 << 4) /* Migrate On pte_numa Reference On Node */
 
 
 #endif /* _UAPI_LINUX_MEMPOLICY_H */

diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h
new file mode 100644
index 0000000..38da425
--- /dev/null
+++ b/include/uapi/linux/module.h

@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MODULE_H
+#define _UAPI_LINUX_MODULE_H
+
+/* Flags for sys_finit_module: */
+#define MODULE_INIT_IGNORE_MODVERSIONS	1
+#define MODULE_INIT_IGNORE_VERMAGIC	2
+
+#endif /* _UAPI_LINUX_MODULE_H */

diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 1ef6c05..022ab18 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h

@@ -73,7 +73,10 @@
 #define PTRACE_O_TRACEEXIT	(1 << PTRACE_EVENT_EXIT)
 #define PTRACE_O_TRACESECCOMP	(1 << PTRACE_EVENT_SECCOMP)
 
-#define PTRACE_O_MASK		0x000000ff
+/* eventless options */
+#define PTRACE_O_EXITKILL	(1 << 20)
+
+#define PTRACE_O_MASK		(0x000000ff | PTRACE_O_EXITKILL)
 
 #include <asm/ptrace.h>
 

diff --git a/include/uapi/linux/signal.h b/include/uapi/linux/signal.h
index dff452e..e1bd50c2 100644
--- a/include/uapi/linux/signal.h
+++ b/include/uapi/linux/signal.h

@@ -4,5 +4,7 @@
 #include <asm/signal.h>
 #include <asm/siginfo.h>
 
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
 
 #endif /* _UAPI_LINUX_SIGNAL_H */

diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index e811474..0e011eb 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h

@@ -45,7 +45,9 @@
 
 static inline __attribute_const__ __u16 __fswab16(__u16 val)
 {
-#ifdef __arch_swab16
+#ifdef __HAVE_BUILTIN_BSWAP16__
+	return __builtin_bswap16(val);
+#elif defined (__arch_swab16)
 	return __arch_swab16(val);
 #else
 	return ___constant_swab16(val);
@@ -54,7 +56,9 @@
 
 static inline __attribute_const__ __u32 __fswab32(__u32 val)
 {
-#ifdef __arch_swab32
+#ifdef __HAVE_BUILTIN_BSWAP32__
+	return __builtin_bswap32(val);
+#elif defined(__arch_swab32)
 	return __arch_swab32(val);
 #else
 	return ___constant_swab32(val);
@@ -63,7 +67,9 @@
 
 static inline __attribute_const__ __u64 __fswab64(__u64 val)
 {
-#ifdef __arch_swab64
+#ifdef __HAVE_BUILTIN_BSWAP64__
+	return __builtin_bswap64(val);
+#elif defined (__arch_swab64)
 	return __arch_swab64(val);
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;

diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 270fb22..a7630d0 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h

@@ -37,5 +37,6 @@
 #define VIRTIO_ID_RPMSG		7 /* virtio remote processor messaging */
 #define VIRTIO_ID_SCSI		8 /* virtio scsi */
 #define VIRTIO_ID_9P		9 /* 9p virtio console */
+#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
 
 #endif /* _LINUX_VIRTIO_IDS_H */

diff --git a/include/video/omap-panel-tfp410.h b/include/video/omap-panel-tfp410.h
index 68c31d7..aef35e4 100644
--- a/include/video/omap-panel-tfp410.h
+++ b/include/video/omap-panel-tfp410.h

@@ -28,7 +28,7 @@
  * @power_down_gpio: gpio number for PD pin (or -1 if not available)
  */
 struct tfp410_platform_data {
-	u16 i2c_bus_num;
+	int i2c_bus_num;
 	int power_down_gpio;
 };
 

diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index 2090881..f494292 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h

@@ -177,6 +177,19 @@
 	evtchn_port_t port;
 };
 
+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+#define EVTCHNOP_reset		 10
+struct evtchn_reset {
+	/* IN parameters. */
+	domid_t dom;
+};
+typedef struct evtchn_reset evtchn_reset_t;
+
 struct evtchn_op {
 	uint32_t cmd; /* EVTCHNOP_* */
 	union {

diff --git a/init/Kconfig b/init/Kconfig
index 2054e04..7d30240 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -717,6 +717,50 @@
 config HAVE_UNSTABLE_SCHED_CLOCK
 	bool
 
+#
+# For architectures that want to enable the support for NUMA-affine scheduler
+# balancing logic:
+#
+config ARCH_SUPPORTS_NUMA_BALANCING
+	bool
+
+# For architectures that (ab)use NUMA to represent different memory regions
+# all cpu-local but of different latencies, such as SuperH.
+#
+config ARCH_WANT_NUMA_VARIABLE_LOCALITY
+	bool
+
+#
+# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
+config ARCH_WANTS_PROT_NUMA_PROT_NONE
+	bool
+
+config ARCH_USES_NUMA_PROT_NONE
+	bool
+	default y
+	depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
+	depends on NUMA_BALANCING
+
+config NUMA_BALANCING_DEFAULT_ENABLED
+	bool "Automatically enable NUMA aware memory/task placement"
+	default y
+	depends on NUMA_BALANCING
+	help
+	  If set, autonumic NUMA balancing will be enabled if running on a NUMA
+	  machine.
+
+config NUMA_BALANCING
+	bool "Memory placement aware NUMA scheduler"
+	depends on ARCH_SUPPORTS_NUMA_BALANCING
+	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+	depends on SMP && NUMA && MIGRATION
+	help
+	  This option adds support for automatic NUMA aware memory/task placement.
+	  The mechanism is quite primitive and is based on migrating memory when
+	  it is references to the node the task is running on.
+
+	  This system will be inactive on UMA systems.
+
 menuconfig CGROUPS
 	boolean "Control Group support"
 	depends on EVENTFD
@@ -838,7 +882,7 @@
 config MEMCG_KMEM
 	bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
 	depends on MEMCG && EXPERIMENTAL
-	default n
+	depends on SLUB || SLAB
 	help
 	  The Kernel Memory extension for Memory Resource Controller can limit
 	  the amount of memory used by kernel objects in the system. Those are
@@ -1025,11 +1069,9 @@
 	# Filesystems
 	depends on 9P_FS = n
 	depends on AFS_FS = n
-	depends on AUTOFS4_FS = n
 	depends on CEPH_FS = n
 	depends on CIFS = n
 	depends on CODA_FS = n
-	depends on FUSE_FS = n
 	depends on GFS2_FS = n
 	depends on NCP_FS = n
 	depends on NFSD = n

diff --git a/init/do_mounts.c b/init/do_mounts.c
index f8a6642..1d1b634 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c

@@ -69,23 +69,28 @@
 __setup("rw", readwrite);
 
 #ifdef CONFIG_BLOCK
+struct uuidcmp {
+	const char *uuid;
+	int len;
+};
+
 /**
  * match_dev_by_uuid - callback for finding a partition using its uuid
  * @dev:	device passed in by the caller
- * @data:	opaque pointer to a 36 byte char array with a UUID
+ * @data:	opaque pointer to the desired struct uuidcmp to match
  *
  * Returns 1 if the device matches, and 0 otherwise.
  */
 static int match_dev_by_uuid(struct device *dev, void *data)
 {
-	u8 *uuid = data;
+	struct uuidcmp *cmp = data;
 	struct hd_struct *part = dev_to_part(dev);
 
 	if (!part->info)
 		goto no_match;
 
-	if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
-			goto no_match;
+	if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len))
+		goto no_match;
 
 	return 1;
 no_match:
@@ -95,7 +100,7 @@
 
 /**
  * devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid:	min 36 byte char array containing a hex ascii UUID
+ * @uuid:	char array containing ascii UUID
  *
  * The function will return the first partition which contains a matching
  * UUID value in its partition_meta_info struct.  This does not search
@@ -106,38 +111,41 @@
  *
  * Returns the matching dev_t on success or 0 on failure.
  */
-static dev_t devt_from_partuuid(char *uuid_str)
+static dev_t devt_from_partuuid(const char *uuid_str)
 {
 	dev_t res = 0;
+	struct uuidcmp cmp;
 	struct device *dev = NULL;
-	u8 uuid[16];
 	struct gendisk *disk;
 	struct hd_struct *part;
 	int offset = 0;
+	bool clear_root_wait = false;
+	char *slash;
 
-	if (strlen(uuid_str) < 36)
-		goto done;
+	cmp.uuid = uuid_str;
 
+	slash = strchr(uuid_str, '/');
 	/* Check for optional partition number offset attributes. */
-	if (uuid_str[36]) {
+	if (slash) {
 		char c = 0;
 		/* Explicitly fail on poor PARTUUID syntax. */
-		if (sscanf(&uuid_str[36],
-			   "/PARTNROFF=%d%c", &offset, &c) != 1) {
-			printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
-			 "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
-			if (root_wait)
-				printk(KERN_ERR
-				     "Disabling rootwait; root= is invalid.\n");
-			root_wait = 0;
+		if (sscanf(slash + 1,
+			   "PARTNROFF=%d%c", &offset, &c) != 1) {
+			clear_root_wait = true;
 			goto done;
 		}
+		cmp.len = slash - uuid_str;
+	} else {
+		cmp.len = strlen(uuid_str);
 	}
 
-	/* Pack the requested UUID in the expected format. */
-	part_pack_uuid(uuid_str, uuid);
+	if (!cmp.len) {
+		clear_root_wait = true;
+		goto done;
+	}
 
-	dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
+	dev = class_find_device(&block_class, NULL, &cmp,
+				&match_dev_by_uuid);
 	if (!dev)
 		goto done;
 
@@ -158,6 +166,13 @@
 no_offset:
 	put_device(dev);
 done:
+	if (clear_root_wait) {
+		pr_err("VFS: PARTUUID= is invalid.\n"
+		       "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+		if (root_wait)
+			pr_err("Disabling rootwait; root= is invalid.\n");
+		root_wait = 0;
+	}
 	return res;
 }
 #endif
@@ -174,6 +189,10 @@
  *	   used when disk name of partitioned disk ends on a digit.
  *	6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
  *	   unique id of a partition if the partition table provides it.
+ *	   The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ *	   partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ *	   filled hex representation of the 32-bit "NT disk signature", and PP
+ *	   is a zero-filled hex representation of the 1-based partition number.
  *	7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
  *	   a partition with a known unique id.
  *

diff --git a/init/main.c b/init/main.c
index 6af5470..85d69df 100644
--- a/init/main.c
+++ b/init/main.c

@@ -463,10 +463,6 @@
 	percpu_init_late();
 	pgtable_cache_init();
 	vmalloc_init();
-#ifdef CONFIG_X86
-	if (efi_enabled)
-		efi_enter_virtual_mode();
-#endif
 }
 
 asmlinkage void __init start_kernel(void)
@@ -607,6 +603,10 @@
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
+#ifdef CONFIG_X86
+	if (efi_enabled)
+		efi_enter_virtual_mode();
+#endif
 	thread_info_cache_init();
 	cred_init();
 	fork_init(totalram_pages);
@@ -797,7 +797,9 @@
 static int run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
-	return kernel_execve(init_filename, argv_init, envp_init);
+	return do_execve(init_filename,
+		(const char __user *const __user *)argv_init,
+		(const char __user *const __user *)envp_init);
 }
 
 static void __init kernel_init_freeable(void);
@@ -812,7 +814,6 @@
 	system_state = SYSTEM_RUNNING;
 	numa_default_policy();
 
-	current->signal->flags |= SIGNAL_UNKILLABLE;
 	flush_delayed_fput();
 
 	if (ramdisk_execute_command) {

diff --git a/init/version.c b/init/version.c
index 86fe0cc..58170f1 100644
--- a/init/version.c
+++ b/init/version.c

@@ -12,6 +12,7 @@
 #include <linux/utsname.h>
 #include <generated/utsrelease.h>
 #include <linux/version.h>
+#include <linux/proc_fs.h>
 
 #ifndef CONFIG_KALLSYMS
 #define version(a) Version_ ## a
@@ -34,6 +35,7 @@
 		.domainname	= UTS_DOMAINNAME,
 	},
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_UTS_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_uts_ns);
 

diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 26143d3..6471f1b 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c

@@ -16,6 +16,7 @@
 #include <linux/msg.h>
 #include <linux/ipc_namespace.h>
 #include <linux/utsname.h>
+#include <linux/proc_fs.h>
 #include <asm/uaccess.h>
 
 #include "util.h"
@@ -30,6 +31,7 @@
 struct ipc_namespace init_ipc_ns = {
 	.count		= ATOMIC_INIT(1),
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_IPC_INIT_INO,
 };
 
 atomic_t nr_ipc_ns = ATOMIC_INIT(1);

diff --git a/ipc/namespace.c b/ipc/namespace.c
index f362298c..7c1fa45 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c

@@ -16,7 +16,7 @@
 
 #include "util.h"
 
-static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
+static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 					   struct ipc_namespace *old_ns)
 {
 	struct ipc_namespace *ns;
@@ -26,9 +26,16 @@
 	if (ns == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	atomic_set(&ns->count, 1);
 	err = mq_init_ns(ns);
 	if (err) {
+		proc_free_inum(ns->proc_inum);
 		kfree(ns);
 		return ERR_PTR(err);
 	}
@@ -46,19 +53,17 @@
 	ipcns_notify(IPCNS_CREATED);
 	register_ipcns_notifier(ns);
 
-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+	ns->user_ns = get_user_ns(user_ns);
 
 	return ns;
 }
 
 struct ipc_namespace *copy_ipcs(unsigned long flags,
-				struct task_struct *tsk)
+	struct user_namespace *user_ns, struct ipc_namespace *ns)
 {
-	struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
-
 	if (!(flags & CLONE_NEWIPC))
 		return get_ipc_ns(ns);
-	return create_ipc_ns(tsk, ns);
+	return create_ipc_ns(user_ns, ns);
 }
 
 /*
@@ -113,6 +118,7 @@
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -161,8 +167,13 @@
 	return put_ipc_ns(ns);
 }
 
-static int ipcns_install(struct nsproxy *nsproxy, void *ns)
+static int ipcns_install(struct nsproxy *nsproxy, void *new)
 {
+	struct ipc_namespace *ns = new;
+	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
+	    !nsown_capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	/* Ditch state from the old ipc namespace */
 	exit_sem(current);
 	put_ipc_ns(nsproxy->ipc_ns);
@@ -170,10 +181,18 @@
 	return 0;
 }
 
+static unsigned int ipcns_inum(void *vp)
+{
+	struct ipc_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations ipcns_operations = {
 	.name		= "ipc",
 	.type		= CLONE_NEWIPC,
 	.get		= ipcns_get,
 	.put		= ipcns_put,
 	.install	= ipcns_install,
+	.inum		= ipcns_inum,
 };

diff --git a/kernel/Makefile b/kernel/Makefile
index ac0d533..6c072b6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile

@@ -54,7 +54,7 @@
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o
+obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
@@ -137,10 +137,14 @@
 #
 # Pull the signing certificate and any extra certificates into the kernel
 #
-extra_certificates:
-	touch $@
 
-kernel/modsign_pubkey.o: signing_key.x509 extra_certificates
+quiet_cmd_touch = TOUCH   $@
+      cmd_touch = touch   $@
+
+extra_certificates:
+	$(call cmd,touch)
+
+kernel/modsign_certificate.o: signing_key.x509 extra_certificates
 
 ###############################################################################
 #

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ed206fd..e81175e 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c

@@ -249,7 +249,7 @@
 		list_del_rcu(&chunk->hash);
 		spin_unlock(&hash_lock);
 		spin_unlock(&entry->lock);
-		fsnotify_destroy_mark(entry);
+		fsnotify_destroy_mark(entry, audit_tree_group);
 		goto out;
 	}
 
@@ -291,7 +291,7 @@
 		owner->root = new;
 	spin_unlock(&hash_lock);
 	spin_unlock(&entry->lock);
-	fsnotify_destroy_mark(entry);
+	fsnotify_destroy_mark(entry, audit_tree_group);
 	fsnotify_put_mark(&new->mark);	/* drop initial reference */
 	goto out;
 
@@ -331,7 +331,7 @@
 		spin_unlock(&hash_lock);
 		chunk->dead = 1;
 		spin_unlock(&entry->lock);
-		fsnotify_destroy_mark(entry);
+		fsnotify_destroy_mark(entry, audit_tree_group);
 		fsnotify_put_mark(entry);
 		return 0;
 	}
@@ -412,7 +412,7 @@
 		spin_unlock(&chunk_entry->lock);
 		spin_unlock(&old_entry->lock);
 
-		fsnotify_destroy_mark(chunk_entry);
+		fsnotify_destroy_mark(chunk_entry, audit_tree_group);
 
 		fsnotify_put_mark(chunk_entry);
 		fsnotify_put_mark(old_entry);
@@ -443,7 +443,7 @@
 	spin_unlock(&hash_lock);
 	spin_unlock(&chunk_entry->lock);
 	spin_unlock(&old_entry->lock);
-	fsnotify_destroy_mark(old_entry);
+	fsnotify_destroy_mark(old_entry, audit_tree_group);
 	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
 	return 0;

diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 9a9ae6e..4a599f6 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c

@@ -350,7 +350,7 @@
 	}
 	mutex_unlock(&audit_filter_mutex);
 
-	fsnotify_destroy_mark(&parent->mark);
+	fsnotify_destroy_mark(&parent->mark, audit_watch_group);
 }
 
 /* Get path information necessary for adding watches. */
@@ -457,7 +457,7 @@
 
 		if (list_empty(&parent->watches)) {
 			audit_get_parent(parent);
-			fsnotify_destroy_mark(&parent->mark);
+			fsnotify_destroy_mark(&parent->mark, audit_watch_group);
 			audit_put_parent(parent);
 		}
 	}

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f34c41b..4855892 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c

@@ -1333,7 +1333,6 @@
 	if (ret)
 		goto out_unlock;
 
-	/* See feature-removal-schedule.txt */
 	if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent)
 		pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
 			   task_tgid_nr(current), current->comm);
@@ -3409,7 +3408,7 @@
 {
 	struct cgroup_pidlist *l;
 	/* don't need task_nsproxy() if we're looking at ourself */
-	struct pid_namespace *ns = current->nsproxy->pid_ns;
+	struct pid_namespace *ns = task_active_pid_ns(current);
 
 	/*
 	 * We can't drop the pidlist_mutex before taking the l->mutex in case

diff --git a/kernel/compat.c b/kernel/compat.c
index c28a306..f6150e9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c

@@ -1215,6 +1215,23 @@
 	return 0;
 }
 
+#ifdef __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
+asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+						 struct compat_timespec __user *interval)
+{
+	struct timespec t;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
+	set_fs(old_fs);
+	if (put_compat_timespec(&t, interval))
+		return -EFAULT;
+	return ret;
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL */
+
 /*
  * Allocate user-space memory for the duration of a single system call,
  * in order to marshall parameters inside a compat thunk.

diff --git a/kernel/cred.c b/kernel/cred.c
index 48cea3d..e0573a4 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c

@@ -30,17 +30,6 @@
 static struct kmem_cache *cred_jar;
 
 /*
- * The common credentials for the initial task's thread group
- */
-#ifdef CONFIG_KEYS
-static struct thread_group_cred init_tgcred = {
-	.usage	= ATOMIC_INIT(2),
-	.tgid	= 0,
-	.lock	= __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
-};
-#endif
-
-/*
  * The initial credentials for the initial task
  */
 struct cred init_cred = {
@@ -65,9 +54,6 @@
 	.user			= INIT_USER,
 	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
-#ifdef CONFIG_KEYS
-	.tgcred			= &init_tgcred,
-#endif
 };
 
 static inline void set_cred_subscribers(struct cred *cred, int n)
@@ -96,36 +82,6 @@
 }
 
 /*
- * Dispose of the shared task group credentials
- */
-#ifdef CONFIG_KEYS
-static void release_tgcred_rcu(struct rcu_head *rcu)
-{
-	struct thread_group_cred *tgcred =
-		container_of(rcu, struct thread_group_cred, rcu);
-
-	BUG_ON(atomic_read(&tgcred->usage) != 0);
-
-	key_put(tgcred->session_keyring);
-	key_put(tgcred->process_keyring);
-	kfree(tgcred);
-}
-#endif
-
-/*
- * Release a set of thread group credentials.
- */
-static void release_tgcred(struct cred *cred)
-{
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred = cred->tgcred;
-
-	if (atomic_dec_and_test(&tgcred->usage))
-		call_rcu(&tgcred->rcu, release_tgcred_rcu);
-#endif
-}
-
-/*
  * The RCU callback to actually dispose of a set of credentials
  */
 static void put_cred_rcu(struct rcu_head *rcu)
@@ -150,9 +106,10 @@
 #endif
 
 	security_cred_free(cred);
+	key_put(cred->session_keyring);
+	key_put(cred->process_keyring);
 	key_put(cred->thread_keyring);
 	key_put(cred->request_key_auth);
-	release_tgcred(cred);
 	if (cred->group_info)
 		put_group_info(cred->group_info);
 	free_uid(cred->user);
@@ -246,15 +203,6 @@
 	if (!new)
 		return NULL;
 
-#ifdef CONFIG_KEYS
-	new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
-	if (!new->tgcred) {
-		kmem_cache_free(cred_jar, new);
-		return NULL;
-	}
-	atomic_set(&new->tgcred->usage, 1);
-#endif
-
 	atomic_set(&new->usage, 1);
 #ifdef CONFIG_DEBUG_CREDENTIALS
 	new->magic = CRED_MAGIC;
@@ -308,9 +256,10 @@
 	get_user_ns(new->user_ns);
 
 #ifdef CONFIG_KEYS
+	key_get(new->session_keyring);
+	key_get(new->process_keyring);
 	key_get(new->thread_keyring);
 	key_get(new->request_key_auth);
-	atomic_inc(&new->tgcred->usage);
 #endif
 
 #ifdef CONFIG_SECURITY
@@ -334,39 +283,20 @@
  */
 struct cred *prepare_exec_creds(void)
 {
-	struct thread_group_cred *tgcred = NULL;
 	struct cred *new;
 
-#ifdef CONFIG_KEYS
-	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-	if (!tgcred)
-		return NULL;
-#endif
-
 	new = prepare_creds();
-	if (!new) {
-		kfree(tgcred);
+	if (!new)
 		return new;
-	}
 
 #ifdef CONFIG_KEYS
 	/* newly exec'd tasks don't get a thread keyring */
 	key_put(new->thread_keyring);
 	new->thread_keyring = NULL;
 
-	/* create a new per-thread-group creds for all this set of threads to
-	 * share */
-	memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
-
-	atomic_set(&tgcred->usage, 1);
-	spin_lock_init(&tgcred->lock);
-
 	/* inherit the session keyring; new process keyring */
-	key_get(tgcred->session_keyring);
-	tgcred->process_keyring = NULL;
-
-	release_tgcred(new);
-	new->tgcred = tgcred;
+	key_put(new->process_keyring);
+	new->process_keyring = NULL;
 #endif
 
 	return new;
@@ -383,9 +313,6 @@
  */
 int copy_creds(struct task_struct *p, unsigned long clone_flags)
 {
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred;
-#endif
 	struct cred *new;
 	int ret;
 
@@ -425,22 +352,12 @@
 			install_thread_keyring_to_cred(new);
 	}
 
-	/* we share the process and session keyrings between all the threads in
-	 * a process - this is slightly icky as we violate COW credentials a
-	 * bit */
+	/* The process keyring is only shared between the threads in a process;
+	 * anything outside of those threads doesn't inherit.
+	 */
 	if (!(clone_flags & CLONE_THREAD)) {
-		tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-		if (!tgcred) {
-			ret = -ENOMEM;
-			goto error_put;
-		}
-		atomic_set(&tgcred->usage, 1);
-		spin_lock_init(&tgcred->lock);
-		tgcred->process_keyring = NULL;
-		tgcred->session_keyring = key_get(new->tgcred->session_keyring);
-
-		release_tgcred(new);
-		new->tgcred = tgcred;
+		key_put(new->process_keyring);
+		new->process_keyring = NULL;
 	}
 #endif
 
@@ -455,6 +372,31 @@
 	return ret;
 }
 
+static bool cred_cap_issubset(const struct cred *set, const struct cred *subset)
+{
+	const struct user_namespace *set_ns = set->user_ns;
+	const struct user_namespace *subset_ns = subset->user_ns;
+
+	/* If the two credentials are in the same user namespace see if
+	 * the capabilities of subset are a subset of set.
+	 */
+	if (set_ns == subset_ns)
+		return cap_issubset(subset->cap_permitted, set->cap_permitted);
+
+	/* The credentials are in a different user namespaces
+	 * therefore one is a subset of the other only if a set is an
+	 * ancestor of subset and set->euid is owner of subset or one
+	 * of subsets ancestors.
+	 */
+	for (;subset_ns != &init_user_ns; subset_ns = subset_ns->parent) {
+		if ((set_ns == subset_ns->parent)  &&
+		    uid_eq(subset_ns->owner, set->euid))
+			return true;
+	}
+
+	return false;
+}
+
 /**
  * commit_creds - Install new credentials upon the current task
  * @new: The credentials to be assigned
@@ -493,7 +435,7 @@
 	    !gid_eq(old->egid, new->egid) ||
 	    !uid_eq(old->fsuid, new->fsuid) ||
 	    !gid_eq(old->fsgid, new->fsgid) ||
-	    !cap_issubset(new->cap_permitted, old->cap_permitted)) {
+	    !cred_cap_issubset(old, new)) {
 		if (task->mm)
 			set_dumpable(task->mm, suid_dumpable);
 		task->pdeath_signal = 0;
@@ -643,9 +585,6 @@
  */
 struct cred *prepare_kernel_cred(struct task_struct *daemon)
 {
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred;
-#endif
 	const struct cred *old;
 	struct cred *new;
 
@@ -653,14 +592,6 @@
 	if (!new)
 		return NULL;
 
-#ifdef CONFIG_KEYS
-	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-	if (!tgcred) {
-		kmem_cache_free(cred_jar, new);
-		return NULL;
-	}
-#endif
-
 	kdebug("prepare_kernel_cred() alloc %p", new);
 
 	if (daemon)
@@ -678,13 +609,10 @@
 	get_group_info(new->group_info);
 
 #ifdef CONFIG_KEYS
-	atomic_set(&tgcred->usage, 1);
-	spin_lock_init(&tgcred->lock);
-	tgcred->process_keyring = NULL;
-	tgcred->session_keyring = NULL;
-	new->tgcred = tgcred;
-	new->request_key_auth = NULL;
+	new->session_keyring = NULL;
+	new->process_keyring = NULL;
 	new->thread_keyring = NULL;
+	new->request_key_auth = NULL;
 	new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
 #endif
 

diff --git a/kernel/events/core.c b/kernel/events/core.c
index f9ff549..301079d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -6155,7 +6155,7 @@
 
 	event->parent		= parent_event;
 
-	event->ns		= get_pid_ns(current->nsproxy->pid_ns);
+	event->ns		= get_pid_ns(task_active_pid_ns(current));
 	event->id		= atomic64_inc_return(&perf_event_id);
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;

diff --git a/kernel/exit.c b/kernel/exit.c
index 50d2e93..b4df219 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c

@@ -72,18 +72,6 @@
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
 		__this_cpu_dec(process_counts);
-		/*
-		 * If we are the last child process in a pid namespace to be
-		 * reaped, notify the reaper sleeping zap_pid_ns_processes().
-		 */
-		if (IS_ENABLED(CONFIG_PID_NS)) {
-			struct task_struct *parent = p->real_parent;
-
-			if ((task_active_pid_ns(parent)->child_reaper == parent) &&
-			    list_empty(&parent->children) &&
-			    (parent->flags & PF_EXITING))
-				wake_up_process(parent);
-		}
 	}
 	list_del_rcu(&p->thread_group);
 }

diff --git a/kernel/fork.c b/kernel/fork.c
index 3c31e87..a31b823 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -146,7 +146,7 @@
 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 						  int node)
 {
-	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+	struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
 					     THREAD_SIZE_ORDER);
 
 	return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+	free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
@@ -823,6 +823,9 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	mm->pmd_huge_pte = NULL;
 #endif
+#ifdef CONFIG_NUMA_BALANCING
+	mm->first_nid = NUMA_PTE_SCAN_INIT;
+#endif
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
@@ -1041,8 +1044,6 @@
 	atomic_set(&sig->live, 1);
 	atomic_set(&sig->sigcnt, 1);
 	init_waitqueue_head(&sig->wait_chldexit);
-	if (clone_flags & CLONE_NEWPID)
-		sig->flags |= SIGNAL_UNKILLABLE;
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
 	INIT_LIST_HEAD(&sig->posix_timers);
@@ -1435,8 +1436,10 @@
 		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 
 		if (thread_group_leader(p)) {
-			if (is_child_reaper(pid))
-				p->nsproxy->pid_ns->child_reaper = p;
+			if (is_child_reaper(pid)) {
+				ns_of_pid(pid)->child_reaper = p;
+				p->signal->flags |= SIGNAL_UNKILLABLE;
+			}
 
 			p->signal->leader_pid = pid;
 			p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1470,8 +1473,6 @@
 	if (p->io_context)
 		exit_io_context(p);
 bad_fork_cleanup_namespaces:
-	if (unlikely(clone_flags & CLONE_NEWPID))
-		pid_ns_release_proc(p->nsproxy->pid_ns);
 	exit_task_namespaces(p);
 bad_fork_cleanup_mm:
 	if (p->mm)
@@ -1551,15 +1552,9 @@
 	 * Do some preliminary argument and permissions checking before we
 	 * actually start allocating stuff
 	 */
-	if (clone_flags & CLONE_NEWUSER) {
-		if (clone_flags & CLONE_THREAD)
+	if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
+		if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
 			return -EINVAL;
-		/* hopefully this check will go away when userns support is
-		 * complete
-		 */
-		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
-				!capable(CAP_SETGID))
-			return -EPERM;
 	}
 
 	/*
@@ -1618,7 +1613,6 @@
 	return nr;
 }
 
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
 /*
  * Create a kernel thread.
  */
@@ -1627,7 +1621,6 @@
 	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
 		(unsigned long)arg, NULL, NULL);
 }
-#endif
 
 #ifdef __ARCH_WANT_SYS_FORK
 SYSCALL_DEFINE0(fork)
@@ -1721,7 +1714,8 @@
 {
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
+				CLONE_NEWUSER|CLONE_NEWPID))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing to
@@ -1788,19 +1782,40 @@
 {
 	struct fs_struct *fs, *new_fs = NULL;
 	struct files_struct *fd, *new_fd = NULL;
+	struct cred *new_cred = NULL;
 	struct nsproxy *new_nsproxy = NULL;
 	int do_sysvsem = 0;
 	int err;
 
-	err = check_unshare_flags(unshare_flags);
-	if (err)
-		goto bad_unshare_out;
-
+	/*
+	 * If unsharing a user namespace must also unshare the thread.
+	 */
+	if (unshare_flags & CLONE_NEWUSER)
+		unshare_flags |= CLONE_THREAD;
+	/*
+	 * If unsharing a pid namespace must also unshare the thread.
+	 */
+	if (unshare_flags & CLONE_NEWPID)
+		unshare_flags |= CLONE_THREAD;
+	/*
+	 * If unsharing a thread from a thread group, must also unshare vm.
+	 */
+	if (unshare_flags & CLONE_THREAD)
+		unshare_flags |= CLONE_VM;
+	/*
+	 * If unsharing vm, must also unshare signal handlers.
+	 */
+	if (unshare_flags & CLONE_VM)
+		unshare_flags |= CLONE_SIGHAND;
 	/*
 	 * If unsharing namespace, must also unshare filesystem information.
 	 */
 	if (unshare_flags & CLONE_NEWNS)
 		unshare_flags |= CLONE_FS;
+
+	err = check_unshare_flags(unshare_flags);
+	if (err)
+		goto bad_unshare_out;
 	/*
 	 * CLONE_NEWIPC must also detach from the undolist: after switching
 	 * to a new ipc namespace, the semaphore arrays from the old
@@ -1814,11 +1829,15 @@
 	err = unshare_fd(unshare_flags, &new_fd);
 	if (err)
 		goto bad_unshare_cleanup_fs;
-	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+	err = unshare_userns(unshare_flags, &new_cred);
 	if (err)
 		goto bad_unshare_cleanup_fd;
+	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
+					 new_cred, new_fs);
+	if (err)
+		goto bad_unshare_cleanup_cred;
 
-	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
+	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
 			 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1851,11 +1870,20 @@
 		}
 
 		task_unlock(current);
+
+		if (new_cred) {
+			/* Install the new user namespace */
+			commit_creds(new_cred);
+			new_cred = NULL;
+		}
 	}
 
 	if (new_nsproxy)
 		put_nsproxy(new_nsproxy);
 
+bad_unshare_cleanup_cred:
+	if (new_cred)
+		put_cred(new_cred);
 bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 35c70c9..e49a288 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c

@@ -818,7 +818,7 @@
 	action = kthread_data(tsk);
 
 	pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
-	       tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
+	       tsk->comm, tsk->pid, action->irq);
 
 
 	desc = irq_to_desc(action->irq);

diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 30b7b22..e30ac0f 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c

@@ -4,6 +4,7 @@
 #include <linux/string.h>
 #include <linux/random.h>
 #include <linux/module.h>
+#include <linux/ptrace.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/cache.h>

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1c317e3..0023a87 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c

@@ -219,9 +219,9 @@
 
 	commit_creds(new);
 
-	retval = kernel_execve(sub_info->path,
-			       (const char *const *)sub_info->argv,
-			       (const char *const *)sub_info->envp);
+	retval = do_execve(sub_info->path,
+			   (const char __user *const __user *)sub_info->argv,
+			   (const char __user *const __user *)sub_info->envp);
 	if (!retval)
 		return 0;
 

diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S
new file mode 100644
index 0000000..246b4c6
--- /dev/null
+++ b/kernel/modsign_certificate.S

@@ -0,0 +1,19 @@
+/* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */
+#ifndef SYMBOL_PREFIX
+#define ASM_SYMBOL(sym) sym
+#else
+#define PASTE2(x,y) x##y
+#define PASTE(x,y) PASTE2(x,y)
+#define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
+#endif
+
+#define GLOBAL(name)	\
+	.globl ASM_SYMBOL(name);	\
+	ASM_SYMBOL(name):
+
+	.section ".init.data","aw"
+
+GLOBAL(modsign_certificate_list)
+	.incbin "signing_key.x509"
+	.incbin "extra_certificates"
+GLOBAL(modsign_certificate_list_end)

diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
index 767e559..2b6e699 100644
--- a/kernel/modsign_pubkey.c
+++ b/kernel/modsign_pubkey.c

@@ -20,12 +20,6 @@
 
 extern __initdata const u8 modsign_certificate_list[];
 extern __initdata const u8 modsign_certificate_list_end[];
-asm(".section .init.data,\"aw\"\n"
-    SYMBOL_PREFIX "modsign_certificate_list:\n"
-    ".incbin \"signing_key.x509\"\n"
-    ".incbin \"extra_certificates\"\n"
-    SYMBOL_PREFIX "modsign_certificate_list_end:"
-    );
 
 /*
  * We need to make sure ccache doesn't cache the .o file as it doesn't notice
@@ -40,18 +34,15 @@
 {
 	pr_notice("Initialise module verification\n");
 
-	modsign_keyring = key_alloc(&key_type_keyring, ".module_sign",
-				    KUIDT_INIT(0), KGIDT_INIT(0),
-				    current_cred(),
-				    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-				    KEY_USR_VIEW | KEY_USR_READ,
-				    KEY_ALLOC_NOT_IN_QUOTA);
+	modsign_keyring = keyring_alloc(".module_sign",
+					KUIDT_INIT(0), KGIDT_INIT(0),
+					current_cred(),
+					((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+					 KEY_USR_VIEW | KEY_USR_READ),
+					KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(modsign_keyring))
 		panic("Can't allocate module signing keyring\n");
 
-	if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0)
-		panic("Can't instantiate module signing keyring\n");
-
 	return 0;
 }
 

diff --git a/kernel/module.c b/kernel/module.c
index 6e48c3a..250092c 100644
--- a/kernel/module.c
+++ b/kernel/module.c

@@ -21,6 +21,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kernel.h>
@@ -28,6 +29,7 @@
 #include <linux/vmalloc.h>
 #include <linux/elf.h>
 #include <linux/proc_fs.h>
+#include <linux/security.h>
 #include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
@@ -59,6 +61,7 @@
 #include <linux/pfn.h>
 #include <linux/bsearch.h>
 #include <linux/fips.h>
+#include <uapi/linux/module.h>
 #include "module-internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -372,9 +375,6 @@
 			printk(KERN_WARNING "Symbol %s is being used "
 			       "by a non-GPL module, which will not "
 			       "be allowed in the future\n", fsa->name);
-			printk(KERN_WARNING "Please see the file "
-			       "Documentation/feature-removal-schedule.txt "
-			       "in the kernel source tree for more details.\n");
 		}
 	}
 
@@ -2282,7 +2282,7 @@
 	Elf_Shdr *symsect = info->sechdrs + info->index.sym;
 	Elf_Shdr *strsect = info->sechdrs + info->index.str;
 	const Elf_Sym *src;
-	unsigned int i, nsrc, ndst, strtab_size;
+	unsigned int i, nsrc, ndst, strtab_size = 0;
 
 	/* Put symbol section at end of init part of module. */
 	symsect->sh_flags |= SHF_ALLOC;
@@ -2293,9 +2293,6 @@
 	src = (void *)info->hdr + symsect->sh_offset;
 	nsrc = symsect->sh_size / sizeof(*src);
 
-	/* strtab always starts with a nul, so offset 0 is the empty string. */
-	strtab_size = 1;
-
 	/* Compute total space required for the core symbols' strtab. */
 	for (ndst = i = 0; i < nsrc; i++) {
 		if (i == 0 ||
@@ -2337,7 +2334,6 @@
 	mod->core_symtab = dst = mod->module_core + info->symoffs;
 	mod->core_strtab = s = mod->module_core + info->stroffs;
 	src = mod->symtab;
-	*s++ = 0;
 	for (ndst = i = 0; i < mod->num_symtab; i++) {
 		if (i == 0 ||
 		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
@@ -2378,7 +2374,7 @@
 
 void * __weak module_alloc(unsigned long size)
 {
-	return size == 0 ? NULL : vmalloc_exec(size);
+	return vmalloc_exec(size);
 }
 
 static void *module_alloc_update_bounds(unsigned long size)
@@ -2425,18 +2421,17 @@
 #endif
 
 #ifdef CONFIG_MODULE_SIG
-static int module_sig_check(struct load_info *info,
-			    const void *mod, unsigned long *_len)
+static int module_sig_check(struct load_info *info)
 {
 	int err = -ENOKEY;
-	unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
-	unsigned long len = *_len;
+	const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+	const void *mod = info->hdr;
 
-	if (len > markerlen &&
-	    memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
+	if (info->len > markerlen &&
+	    memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
 		/* We truncate the module to discard the signature */
-		*_len -= markerlen;
-		err = mod_verify_sig(mod, _len);
+		info->len -= markerlen;
+		err = mod_verify_sig(mod, &info->len);
 	}
 
 	if (!err) {
@@ -2454,59 +2449,107 @@
 	return err;
 }
 #else /* !CONFIG_MODULE_SIG */
-static int module_sig_check(struct load_info *info,
-			    void *mod, unsigned long *len)
+static int module_sig_check(struct load_info *info)
 {
 	return 0;
 }
 #endif /* !CONFIG_MODULE_SIG */
 
-/* Sets info->hdr, info->len and info->sig_ok. */
-static int copy_and_check(struct load_info *info,
-			  const void __user *umod, unsigned long len,
-			  const char __user *uargs)
+/* Sanity checks against invalid binaries, wrong arch, weird elf version. */
+static int elf_header_check(struct load_info *info)
 {
-	int err;
-	Elf_Ehdr *hdr;
-
-	if (len < sizeof(*hdr))
+	if (info->len < sizeof(*(info->hdr)))
 		return -ENOEXEC;
 
+	if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0
+	    || info->hdr->e_type != ET_REL
+	    || !elf_check_arch(info->hdr)
+	    || info->hdr->e_shentsize != sizeof(Elf_Shdr))
+		return -ENOEXEC;
+
+	if (info->hdr->e_shoff >= info->len
+	    || (info->hdr->e_shnum * sizeof(Elf_Shdr) >
+		info->len - info->hdr->e_shoff))
+		return -ENOEXEC;
+
+	return 0;
+}
+
+/* Sets info->hdr and info->len. */
+static int copy_module_from_user(const void __user *umod, unsigned long len,
+				  struct load_info *info)
+{
+	int err;
+
+	info->len = len;
+	if (info->len < sizeof(*(info->hdr)))
+		return -ENOEXEC;
+
+	err = security_kernel_module_from_file(NULL);
+	if (err)
+		return err;
+
 	/* Suck in entire file: we'll want most of it. */
-	if ((hdr = vmalloc(len)) == NULL)
+	info->hdr = vmalloc(info->len);
+	if (!info->hdr)
 		return -ENOMEM;
 
-	if (copy_from_user(hdr, umod, len) != 0) {
-		err = -EFAULT;
-		goto free_hdr;
+	if (copy_from_user(info->hdr, umod, info->len) != 0) {
+		vfree(info->hdr);
+		return -EFAULT;
 	}
 
-	err = module_sig_check(info, hdr, &len);
-	if (err)
-		goto free_hdr;
-
-	/* Sanity checks against insmoding binaries or wrong arch,
-	   weird elf version */
-	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
-	    || hdr->e_type != ET_REL
-	    || !elf_check_arch(hdr)
-	    || hdr->e_shentsize != sizeof(Elf_Shdr)) {
-		err = -ENOEXEC;
-		goto free_hdr;
-	}
-
-	if (hdr->e_shoff >= len ||
-	    hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) {
-		err = -ENOEXEC;
-		goto free_hdr;
-	}
-
-	info->hdr = hdr;
-	info->len = len;
 	return 0;
+}
 
-free_hdr:
-	vfree(hdr);
+/* Sets info->hdr and info->len. */
+static int copy_module_from_fd(int fd, struct load_info *info)
+{
+	struct file *file;
+	int err;
+	struct kstat stat;
+	loff_t pos;
+	ssize_t bytes = 0;
+
+	file = fget(fd);
+	if (!file)
+		return -ENOEXEC;
+
+	err = security_kernel_module_from_file(file);
+	if (err)
+		goto out;
+
+	err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
+	if (err)
+		goto out;
+
+	if (stat.size > INT_MAX) {
+		err = -EFBIG;
+		goto out;
+	}
+	info->hdr = vmalloc(stat.size);
+	if (!info->hdr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	pos = 0;
+	while (pos < stat.size) {
+		bytes = kernel_read(file, pos, (char *)(info->hdr) + pos,
+				    stat.size - pos);
+		if (bytes < 0) {
+			vfree(info->hdr);
+			err = bytes;
+			goto out;
+		}
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+	info->len = pos;
+
+out:
+	fput(file);
 	return err;
 }
 
@@ -2515,7 +2558,7 @@
 	vfree(info->hdr);
 }
 
-static int rewrite_section_headers(struct load_info *info)
+static int rewrite_section_headers(struct load_info *info, int flags)
 {
 	unsigned int i;
 
@@ -2543,7 +2586,10 @@
 	}
 
 	/* Track but don't keep modinfo and version sections. */
-	info->index.vers = find_sec(info, "__versions");
+	if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
+		info->index.vers = 0; /* Pretend no __versions section! */
+	else
+		info->index.vers = find_sec(info, "__versions");
 	info->index.info = find_sec(info, ".modinfo");
 	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -2558,7 +2604,7 @@
  * Return the temporary module pointer (we'll replace it with the final
  * one when we move the module sections around).
  */
-static struct module *setup_load_info(struct load_info *info)
+static struct module *setup_load_info(struct load_info *info, int flags)
 {
 	unsigned int i;
 	int err;
@@ -2569,7 +2615,7 @@
 	info->secstrings = (void *)info->hdr
 		+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;
 
-	err = rewrite_section_headers(info);
+	err = rewrite_section_headers(info, flags);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2607,11 +2653,14 @@
 	return mod;
 }
 
-static int check_modinfo(struct module *mod, struct load_info *info)
+static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 {
 	const char *modmagic = get_modinfo(info, "vermagic");
 	int err;
 
+	if (flags & MODULE_INIT_IGNORE_VERMAGIC)
+		modmagic = NULL;
+
 	/* This is allowed: modprobe --force will invalidate it. */
 	if (!modmagic) {
 		err = try_to_force_load(mod, "bad vermagic");
@@ -2741,20 +2790,23 @@
 	memset(ptr, 0, mod->core_size);
 	mod->module_core = ptr;
 
-	ptr = module_alloc_update_bounds(mod->init_size);
-	/*
-	 * The pointer to this block is stored in the module structure
-	 * which is inside the block. This block doesn't need to be
-	 * scanned as it contains data and code that will be freed
-	 * after the module is initialized.
-	 */
-	kmemleak_ignore(ptr);
-	if (!ptr && mod->init_size) {
-		module_free(mod, mod->module_core);
-		return -ENOMEM;
-	}
-	memset(ptr, 0, mod->init_size);
-	mod->module_init = ptr;
+	if (mod->init_size) {
+		ptr = module_alloc_update_bounds(mod->init_size);
+		/*
+		 * The pointer to this block is stored in the module structure
+		 * which is inside the block. This block doesn't need to be
+		 * scanned as it contains data and code that will be freed
+		 * after the module is initialized.
+		 */
+		kmemleak_ignore(ptr);
+		if (!ptr) {
+			module_free(mod, mod->module_core);
+			return -ENOMEM;
+		}
+		memset(ptr, 0, mod->init_size);
+		mod->module_init = ptr;
+	} else
+		mod->module_init = NULL;
 
 	/* Transfer each section which specifies SHF_ALLOC */
 	pr_debug("final section addresses:\n");
@@ -2847,18 +2899,18 @@
 	return 0;
 }
 
-static struct module *layout_and_allocate(struct load_info *info)
+static struct module *layout_and_allocate(struct load_info *info, int flags)
 {
 	/* Module within temporary copy. */
 	struct module *mod;
 	Elf_Shdr *pcpusec;
 	int err;
 
-	mod = setup_load_info(info);
+	mod = setup_load_info(info, flags);
 	if (IS_ERR(mod))
 		return mod;
 
-	err = check_modinfo(mod, info);
+	err = check_modinfo(mod, info, flags);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2945,156 +2997,6 @@
 	return ret;
 }
 
-/* Allocate and load the module: note that size of section 0 is always
-   zero, and we rely on this for optional sections. */
-static struct module *load_module(void __user *umod,
-				  unsigned long len,
-				  const char __user *uargs)
-{
-	struct load_info info = { NULL, };
-	struct module *mod, *old;
-	long err;
-
-	pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
-	       umod, len, uargs);
-
-	/* Copy in the blobs from userspace, check they are vaguely sane. */
-	err = copy_and_check(&info, umod, len, uargs);
-	if (err)
-		return ERR_PTR(err);
-
-	/* Figure out module layout, and allocate all the memory. */
-	mod = layout_and_allocate(&info);
-	if (IS_ERR(mod)) {
-		err = PTR_ERR(mod);
-		goto free_copy;
-	}
-
-#ifdef CONFIG_MODULE_SIG
-	mod->sig_ok = info.sig_ok;
-	if (!mod->sig_ok)
-		add_taint_module(mod, TAINT_FORCED_MODULE);
-#endif
-
-	/* Now module is in final location, initialize linked lists, etc. */
-	err = module_unload_init(mod);
-	if (err)
-		goto free_module;
-
-	/* Now we've got everything in the final locations, we can
-	 * find optional sections. */
-	find_module_sections(mod, &info);
-
-	err = check_module_license_and_versions(mod);
-	if (err)
-		goto free_unload;
-
-	/* Set up MODINFO_ATTR fields */
-	setup_modinfo(mod, &info);
-
-	/* Fix up syms, so that st_value is a pointer to location. */
-	err = simplify_symbols(mod, &info);
-	if (err < 0)
-		goto free_modinfo;
-
-	err = apply_relocations(mod, &info);
-	if (err < 0)
-		goto free_modinfo;
-
-	err = post_relocation(mod, &info);
-	if (err < 0)
-		goto free_modinfo;
-
-	flush_module_icache(mod);
-
-	/* Now copy in args */
-	mod->args = strndup_user(uargs, ~0UL >> 1);
-	if (IS_ERR(mod->args)) {
-		err = PTR_ERR(mod->args);
-		goto free_arch_cleanup;
-	}
-
-	/* Mark state as coming so strong_try_module_get() ignores us. */
-	mod->state = MODULE_STATE_COMING;
-
-	/* Now sew it into the lists so we can get lockdep and oops
-	 * info during argument parsing.  No one should access us, since
-	 * strong_try_module_get() will fail.
-	 * lockdep/oops can run asynchronous, so use the RCU list insertion
-	 * function to insert in a way safe to concurrent readers.
-	 * The mutex protects against concurrent writers.
-	 */
-again:
-	mutex_lock(&module_mutex);
-	if ((old = find_module(mod->name)) != NULL) {
-		if (old->state == MODULE_STATE_COMING) {
-			/* Wait in case it fails to load. */
-			mutex_unlock(&module_mutex);
-			err = wait_event_interruptible(module_wq,
-					       finished_loading(mod->name));
-			if (err)
-				goto free_arch_cleanup;
-			goto again;
-		}
-		err = -EEXIST;
-		goto unlock;
-	}
-
-	/* This has to be done once we're sure module name is unique. */
-	dynamic_debug_setup(info.debug, info.num_debug);
-
-	/* Find duplicate symbols */
-	err = verify_export_symbols(mod);
-	if (err < 0)
-		goto ddebug;
-
-	module_bug_finalize(info.hdr, info.sechdrs, mod);
-	list_add_rcu(&mod->list, &modules);
-	mutex_unlock(&module_mutex);
-
-	/* Module is ready to execute: parsing args may do that. */
-	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-			 -32768, 32767, &ddebug_dyndbg_module_param_cb);
-	if (err < 0)
-		goto unlink;
-
-	/* Link in to syfs. */
-	err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
-	if (err < 0)
-		goto unlink;
-
-	/* Get rid of temporary copy. */
-	free_copy(&info);
-
-	/* Done! */
-	trace_module_load(mod);
-	return mod;
-
- unlink:
-	mutex_lock(&module_mutex);
-	/* Unlink carefully: kallsyms could be walking list. */
-	list_del_rcu(&mod->list);
-	module_bug_cleanup(mod);
-	wake_up_all(&module_wq);
- ddebug:
-	dynamic_debug_remove(info.debug);
- unlock:
-	mutex_unlock(&module_mutex);
-	synchronize_sched();
-	kfree(mod->args);
- free_arch_cleanup:
-	module_arch_cleanup(mod);
- free_modinfo:
-	free_modinfo(mod);
- free_unload:
-	module_unload_free(mod);
- free_module:
-	module_deallocate(mod, &info);
- free_copy:
-	free_copy(&info);
-	return ERR_PTR(err);
-}
-
 /* Call module constructors. */
 static void do_mod_ctors(struct module *mod)
 {
@@ -3107,21 +3009,10 @@
 }
 
 /* This is where the real work happens */
-SYSCALL_DEFINE3(init_module, void __user *, umod,
-		unsigned long, len, const char __user *, uargs)
+static int do_init_module(struct module *mod)
 {
-	struct module *mod;
 	int ret = 0;
 
-	/* Must have permission */
-	if (!capable(CAP_SYS_MODULE) || modules_disabled)
-		return -EPERM;
-
-	/* Do all the hard work */
-	mod = load_module(umod, len, uargs);
-	if (IS_ERR(mod))
-		return PTR_ERR(mod);
-
 	blocking_notifier_call_chain(&module_notify_list,
 			MODULE_STATE_COMING, mod);
 
@@ -3191,6 +3082,205 @@
 	return 0;
 }
 
+static int may_init_module(void)
+{
+	if (!capable(CAP_SYS_MODULE) || modules_disabled)
+		return -EPERM;
+
+	return 0;
+}
+
+/* Allocate and load the module: note that size of section 0 is always
+   zero, and we rely on this for optional sections. */
+static int load_module(struct load_info *info, const char __user *uargs,
+		       int flags)
+{
+	struct module *mod, *old;
+	long err;
+
+	err = module_sig_check(info);
+	if (err)
+		goto free_copy;
+
+	err = elf_header_check(info);
+	if (err)
+		goto free_copy;
+
+	/* Figure out module layout, and allocate all the memory. */
+	mod = layout_and_allocate(info, flags);
+	if (IS_ERR(mod)) {
+		err = PTR_ERR(mod);
+		goto free_copy;
+	}
+
+#ifdef CONFIG_MODULE_SIG
+	mod->sig_ok = info->sig_ok;
+	if (!mod->sig_ok)
+		add_taint_module(mod, TAINT_FORCED_MODULE);
+#endif
+
+	/* Now module is in final location, initialize linked lists, etc. */
+	err = module_unload_init(mod);
+	if (err)
+		goto free_module;
+
+	/* Now we've got everything in the final locations, we can
+	 * find optional sections. */
+	find_module_sections(mod, info);
+
+	err = check_module_license_and_versions(mod);
+	if (err)
+		goto free_unload;
+
+	/* Set up MODINFO_ATTR fields */
+	setup_modinfo(mod, info);
+
+	/* Fix up syms, so that st_value is a pointer to location. */
+	err = simplify_symbols(mod, info);
+	if (err < 0)
+		goto free_modinfo;
+
+	err = apply_relocations(mod, info);
+	if (err < 0)
+		goto free_modinfo;
+
+	err = post_relocation(mod, info);
+	if (err < 0)
+		goto free_modinfo;
+
+	flush_module_icache(mod);
+
+	/* Now copy in args */
+	mod->args = strndup_user(uargs, ~0UL >> 1);
+	if (IS_ERR(mod->args)) {
+		err = PTR_ERR(mod->args);
+		goto free_arch_cleanup;
+	}
+
+	/* Mark state as coming so strong_try_module_get() ignores us. */
+	mod->state = MODULE_STATE_COMING;
+
+	/* Now sew it into the lists so we can get lockdep and oops
+	 * info during argument parsing.  No one should access us, since
+	 * strong_try_module_get() will fail.
+	 * lockdep/oops can run asynchronous, so use the RCU list insertion
+	 * function to insert in a way safe to concurrent readers.
+	 * The mutex protects against concurrent writers.
+	 */
+again:
+	mutex_lock(&module_mutex);
+	if ((old = find_module(mod->name)) != NULL) {
+		if (old->state == MODULE_STATE_COMING) {
+			/* Wait in case it fails to load. */
+			mutex_unlock(&module_mutex);
+			err = wait_event_interruptible(module_wq,
+					       finished_loading(mod->name));
+			if (err)
+				goto free_arch_cleanup;
+			goto again;
+		}
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	/* This has to be done once we're sure module name is unique. */
+	dynamic_debug_setup(info->debug, info->num_debug);
+
+	/* Find duplicate symbols */
+	err = verify_export_symbols(mod);
+	if (err < 0)
+		goto ddebug;
+
+	module_bug_finalize(info->hdr, info->sechdrs, mod);
+	list_add_rcu(&mod->list, &modules);
+	mutex_unlock(&module_mutex);
+
+	/* Module is ready to execute: parsing args may do that. */
+	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
+			 -32768, 32767, &ddebug_dyndbg_module_param_cb);
+	if (err < 0)
+		goto unlink;
+
+	/* Link in to syfs. */
+	err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
+	if (err < 0)
+		goto unlink;
+
+	/* Get rid of temporary copy. */
+	free_copy(info);
+
+	/* Done! */
+	trace_module_load(mod);
+
+	return do_init_module(mod);
+
+ unlink:
+	mutex_lock(&module_mutex);
+	/* Unlink carefully: kallsyms could be walking list. */
+	list_del_rcu(&mod->list);
+	module_bug_cleanup(mod);
+	wake_up_all(&module_wq);
+ ddebug:
+	dynamic_debug_remove(info->debug);
+ unlock:
+	mutex_unlock(&module_mutex);
+	synchronize_sched();
+	kfree(mod->args);
+ free_arch_cleanup:
+	module_arch_cleanup(mod);
+ free_modinfo:
+	free_modinfo(mod);
+ free_unload:
+	module_unload_free(mod);
+ free_module:
+	module_deallocate(mod, info);
+ free_copy:
+	free_copy(info);
+	return err;
+}
+
+SYSCALL_DEFINE3(init_module, void __user *, umod,
+		unsigned long, len, const char __user *, uargs)
+{
+	int err;
+	struct load_info info = { };
+
+	err = may_init_module();
+	if (err)
+		return err;
+
+	pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
+	       umod, len, uargs);
+
+	err = copy_module_from_user(umod, len, &info);
+	if (err)
+		return err;
+
+	return load_module(&info, uargs, 0);
+}
+
+SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
+{
+	int err;
+	struct load_info info = { };
+
+	err = may_init_module();
+	if (err)
+		return err;
+
+	pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
+
+	if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
+		      |MODULE_INIT_IGNORE_VERMAGIC))
+		return -EINVAL;
+
+	err = copy_module_from_fd(fd, &info);
+	if (err)
+		return err;
+
+	return load_module(&info, uargs, flags);
+}
+
 static inline int within(unsigned long addr, void *start, unsigned long size)
 {
 	return ((void *)addr >= start && (void *)addr < start + size);

diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 7e1c3de..78e2ecb 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c

@@ -57,7 +57,8 @@
  * leave it to the caller to do proper locking and attach it to task.
  */
 static struct nsproxy *create_new_namespaces(unsigned long flags,
-			struct task_struct *tsk, struct fs_struct *new_fs)
+	struct task_struct *tsk, struct user_namespace *user_ns,
+	struct fs_struct *new_fs)
 {
 	struct nsproxy *new_nsp;
 	int err;
@@ -66,31 +67,31 @@
 	if (!new_nsp)
 		return ERR_PTR(-ENOMEM);
 
-	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
+	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
 	if (IS_ERR(new_nsp->mnt_ns)) {
 		err = PTR_ERR(new_nsp->mnt_ns);
 		goto out_ns;
 	}
 
-	new_nsp->uts_ns = copy_utsname(flags, tsk);
+	new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
 	if (IS_ERR(new_nsp->uts_ns)) {
 		err = PTR_ERR(new_nsp->uts_ns);
 		goto out_uts;
 	}
 
-	new_nsp->ipc_ns = copy_ipcs(flags, tsk);
+	new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
 	if (IS_ERR(new_nsp->ipc_ns)) {
 		err = PTR_ERR(new_nsp->ipc_ns);
 		goto out_ipc;
 	}
 
-	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
+	new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns);
 	if (IS_ERR(new_nsp->pid_ns)) {
 		err = PTR_ERR(new_nsp->pid_ns);
 		goto out_pid;
 	}
 
-	new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns);
+	new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
 	if (IS_ERR(new_nsp->net_ns)) {
 		err = PTR_ERR(new_nsp->net_ns);
 		goto out_net;
@@ -122,6 +123,7 @@
 int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 {
 	struct nsproxy *old_ns = tsk->nsproxy;
+	struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
 	struct nsproxy *new_ns;
 	int err = 0;
 
@@ -134,7 +136,7 @@
 				CLONE_NEWPID | CLONE_NEWNET)))
 		return 0;
 
-	if (!capable(CAP_SYS_ADMIN)) {
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
 		err = -EPERM;
 		goto out;
 	}
@@ -151,7 +153,8 @@
 		goto out;
 	}
 
-	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
+	new_ns = create_new_namespaces(flags, tsk,
+				       task_cred_xxx(tsk, user_ns), tsk->fs);
 	if (IS_ERR(new_ns)) {
 		err = PTR_ERR(new_ns);
 		goto out;
@@ -183,19 +186,21 @@
  * On success, returns the new nsproxy.
  */
 int unshare_nsproxy_namespaces(unsigned long unshare_flags,
-		struct nsproxy **new_nsp, struct fs_struct *new_fs)
+	struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
 {
+	struct user_namespace *user_ns;
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWNET)))
+			       CLONE_NEWNET | CLONE_NEWPID)))
 		return 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	*new_nsp = create_new_namespaces(unshare_flags, current,
-				new_fs ? new_fs : current->fs);
+	*new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
+					 new_fs ? new_fs : current->fs);
 	if (IS_ERR(*new_nsp)) {
 		err = PTR_ERR(*new_nsp);
 		goto out;
@@ -241,9 +246,6 @@
 	struct file *file;
 	int err;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	file = proc_ns_fget(fd);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
@@ -254,7 +256,7 @@
 	if (nstype && (ops->type != nstype))
 		goto out;
 
-	new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+	new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
 	if (IS_ERR(new_nsproxy)) {
 		err = PTR_ERR(new_nsproxy);
 		goto out;

diff --git a/kernel/pid.c b/kernel/pid.c
index fd996c1..36aa02f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c

@@ -36,6 +36,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
 #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
 
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -78,24 +79,11 @@
 	.last_pid = 0,
 	.level = 0,
 	.child_reaper = &init_task,
+	.user_ns = &init_user_ns,
+	.proc_inum = PROC_PID_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
-int is_container_init(struct task_struct *tsk)
-{
-	int ret = 0;
-	struct pid *pid;
-
-	rcu_read_lock();
-	pid = task_pid(tsk);
-	if (pid != NULL && pid->numbers[pid->level].nr == 1)
-		ret = 1;
-	rcu_read_unlock();
-
-	return ret;
-}
-EXPORT_SYMBOL(is_container_init);
-
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
  * interrupt might come in and do read_lock(&tasklist_lock).
@@ -269,8 +257,24 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&pidmap_lock, flags);
-	for (i = 0; i <= pid->level; i++)
-		hlist_del_rcu(&pid->numbers[i].pid_chain);
+	for (i = 0; i <= pid->level; i++) {
+		struct upid *upid = pid->numbers + i;
+		struct pid_namespace *ns = upid->ns;
+		hlist_del_rcu(&upid->pid_chain);
+		switch(--ns->nr_hashed) {
+		case 1:
+			/* When all that is left in the pid namespace
+			 * is the reaper wake up the reaper.  The reaper
+			 * may be sleeping in zap_pid_ns_processes().
+			 */
+			wake_up_process(ns->child_reaper);
+			break;
+		case 0:
+			ns->nr_hashed = -1;
+			schedule_work(&ns->proc_work);
+			break;
+		}
+	}
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
 	for (i = 0; i <= pid->level; i++)
@@ -292,6 +296,7 @@
 		goto out;
 
 	tmp = ns;
+	pid->level = ns->level;
 	for (i = ns->level; i >= 0; i--) {
 		nr = alloc_pidmap(tmp);
 		if (nr < 0)
@@ -302,22 +307,32 @@
 		tmp = tmp->parent;
 	}
 
+	if (unlikely(is_child_reaper(pid))) {
+		if (pid_ns_prepare_proc(ns))
+			goto out_free;
+	}
+
 	get_pid_ns(ns);
-	pid->level = ns->level;
 	atomic_set(&pid->count, 1);
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);
 
 	upid = pid->numbers + ns->level;
 	spin_lock_irq(&pidmap_lock);
-	for ( ; upid >= pid->numbers; --upid)
+	if (ns->nr_hashed < 0)
+		goto out_unlock;
+	for ( ; upid >= pid->numbers; --upid) {
 		hlist_add_head_rcu(&upid->pid_chain,
 				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+		upid->ns->nr_hashed++;
+	}
 	spin_unlock_irq(&pidmap_lock);
 
 out:
 	return pid;
 
+out_unlock:
+	spin_unlock(&pidmap_lock);
 out_free:
 	while (++i <= ns->level)
 		free_pidmap(pid->numbers + i);
@@ -344,7 +359,7 @@
 
 struct pid *find_vpid(int nr)
 {
-	return find_pid_ns(nr, current->nsproxy->pid_ns);
+	return find_pid_ns(nr, task_active_pid_ns(current));
 }
 EXPORT_SYMBOL_GPL(find_vpid);
 
@@ -428,7 +443,7 @@
 
 struct task_struct *find_task_by_vpid(pid_t vnr)
 {
-	return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
+	return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
 }
 
 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
@@ -483,7 +498,7 @@
 
 pid_t pid_vnr(struct pid *pid)
 {
-	return pid_nr_ns(pid, current->nsproxy->pid_ns);
+	return pid_nr_ns(pid, task_active_pid_ns(current));
 }
 EXPORT_SYMBOL_GPL(pid_vnr);
 
@@ -494,7 +509,7 @@
 
 	rcu_read_lock();
 	if (!ns)
-		ns = current->nsproxy->pid_ns;
+		ns = task_active_pid_ns(current);
 	if (likely(pid_alive(task))) {
 		if (type != PIDTYPE_PID)
 			task = task->group_leader;
@@ -569,6 +584,7 @@
 	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+	init_pid_ns.nr_hashed = 1;
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7b07cc0..fdbd0cd 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c

@@ -10,6 +10,7 @@
 
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/err.h>
 #include <linux/acct.h>
@@ -71,10 +72,17 @@
 	return NULL;
 }
 
+static void proc_cleanup_work(struct work_struct *work)
+{
+	struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
+	pid_ns_release_proc(ns);
+}
+
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
-static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
+static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
+	struct pid_namespace *parent_pid_ns)
 {
 	struct pid_namespace *ns;
 	unsigned int level = parent_pid_ns->level + 1;
@@ -99,9 +107,15 @@
 	if (ns->pid_cachep == NULL)
 		goto out_free_map;
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err)
+		goto out_free_map;
+
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
+	ns->user_ns = get_user_ns(user_ns);
+	INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -109,14 +123,8 @@
 	for (i = 1; i < PIDMAP_ENTRIES; i++)
 		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
 
-	err = pid_ns_prepare_proc(ns);
-	if (err)
-		goto out_put_parent_pid_ns;
-
 	return ns;
 
-out_put_parent_pid_ns:
-	put_pid_ns(parent_pid_ns);
 out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
@@ -129,18 +137,21 @@
 {
 	int i;
 
+	proc_free_inum(ns->proc_inum);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
+	put_user_ns(ns->user_ns);
 	kmem_cache_free(pid_ns_cachep, ns);
 }
 
-struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *old_ns)
 {
 	if (!(flags & CLONE_NEWPID))
 		return get_pid_ns(old_ns);
-	if (flags & (CLONE_THREAD|CLONE_PARENT))
+	if (task_active_pid_ns(current) != old_ns)
 		return ERR_PTR(-EINVAL);
-	return create_pid_namespace(old_ns);
+	return create_pid_namespace(user_ns, old_ns);
 }
 
 static void free_pid_ns(struct kref *kref)
@@ -211,22 +222,15 @@
 
 	/*
 	 * sys_wait4() above can't reap the TASK_DEAD children.
-	 * Make sure they all go away, see __unhash_process().
+	 * Make sure they all go away, see free_pid().
 	 */
 	for (;;) {
-		bool need_wait = false;
-
-		read_lock(&tasklist_lock);
-		if (!list_empty(&current->children)) {
-			__set_current_state(TASK_UNINTERRUPTIBLE);
-			need_wait = true;
-		}
-		read_unlock(&tasklist_lock);
-
-		if (!need_wait)
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (pid_ns->nr_hashed == 1)
 			break;
 		schedule();
 	}
+	__set_current_state(TASK_RUNNING);
 
 	if (pid_ns->reboot)
 		current->signal->group_exit_code = pid_ns->reboot;
@@ -239,9 +243,10 @@
 static int pid_ns_ctl_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct pid_namespace *pid_ns = task_active_pid_ns(current);
 	struct ctl_table tmp = *table;
 
-	if (write && !capable(CAP_SYS_ADMIN))
+	if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/*
@@ -250,7 +255,7 @@
 	 * it should synchronize its usage with external means.
 	 */
 
-	tmp.data = &current->nsproxy->pid_ns->last_pid;
+	tmp.data = &pid_ns->last_pid;
 	return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 }
 
@@ -299,6 +304,68 @@
 	return 0;
 }
 
+static void *pidns_get(struct task_struct *task)
+{
+	struct pid_namespace *ns;
+
+	rcu_read_lock();
+	ns = get_pid_ns(task_active_pid_ns(task));
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void pidns_put(void *ns)
+{
+	put_pid_ns(ns);
+}
+
+static int pidns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct pid_namespace *active = task_active_pid_ns(current);
+	struct pid_namespace *ancestor, *new = ns;
+
+	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
+	    !nsown_capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/*
+	 * Only allow entering the current active pid namespace
+	 * or a child of the current active pid namespace.
+	 *
+	 * This is required for fork to return a usable pid value and
+	 * this maintains the property that processes and their
+	 * children can not escape their current pid namespace.
+	 */
+	if (new->level < active->level)
+		return -EINVAL;
+
+	ancestor = new;
+	while (ancestor->level > active->level)
+		ancestor = ancestor->parent;
+	if (ancestor != active)
+		return -EINVAL;
+
+	put_pid_ns(nsproxy->pid_ns);
+	nsproxy->pid_ns = get_pid_ns(new);
+	return 0;
+}
+
+static unsigned int pidns_inum(void *ns)
+{
+	struct pid_namespace *pid_ns = ns;
+	return pid_ns->proc_inum;
+}
+
+const struct proc_ns_operations pidns_operations = {
+	.name		= "pid",
+	.type		= CLONE_NEWPID,
+	.get		= pidns_get,
+	.put		= pidns_put,
+	.install	= pidns_install,
+	.inum		= pidns_inum,
+};
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index d738402..a278cad 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c

@@ -9,6 +9,7 @@
 #include <asm/uaccess.h>
 #include <linux/kernel_stat.h>
 #include <trace/events/timer.h>
+#include <linux/random.h>
 
 /*
  * Called after updating RLIMIT_CPU to run cpu timer and update
@@ -470,6 +471,8 @@
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
+	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
+						sizeof(unsigned long long));
 	cleanup_timers(tsk->cpu_timers,
 		       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
 

diff --git a/kernel/printk.c b/kernel/printk.c
index 22e070f..19c0d7b 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c

@@ -747,6 +747,21 @@
 		free, (free * 100) / __LOG_BUF_LEN);
 }
 
+static bool __read_mostly ignore_loglevel;
+
+static int __init ignore_loglevel_setup(char *str)
+{
+	ignore_loglevel = 1;
+	printk(KERN_INFO "debug: ignoring loglevel setting.\n");
+
+	return 0;
+}
+
+early_param("ignore_loglevel", ignore_loglevel_setup);
+module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
+	"print all kernel messages to the console.");
+
 #ifdef CONFIG_BOOT_PRINTK_DELAY
 
 static int boot_delay; /* msecs delay after each printk during bootup */
@@ -770,13 +785,15 @@
 }
 __setup("boot_delay=", boot_delay_setup);
 
-static void boot_delay_msec(void)
+static void boot_delay_msec(int level)
 {
 	unsigned long long k;
 	unsigned long timeout;
 
-	if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
+	if ((boot_delay == 0 || system_state != SYSTEM_BOOTING)
+		|| (level >= console_loglevel && !ignore_loglevel)) {
 		return;
+	}
 
 	k = (unsigned long long)loops_per_msec * boot_delay;
 
@@ -795,7 +812,7 @@
 	}
 }
 #else
-static inline void boot_delay_msec(void)
+static inline void boot_delay_msec(int level)
 {
 }
 #endif
@@ -1238,21 +1255,6 @@
 	return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
 }
 
-static bool __read_mostly ignore_loglevel;
-
-static int __init ignore_loglevel_setup(char *str)
-{
-	ignore_loglevel = 1;
-	printk(KERN_INFO "debug: ignoring loglevel setting.\n");
-
-	return 0;
-}
-
-early_param("ignore_loglevel", ignore_loglevel_setup);
-module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
-	"print all kernel messages to the console.");
-
 /*
  * Call the console drivers, asking them to write out
  * log_buf[start] to log_buf[end - 1].
@@ -1498,7 +1500,7 @@
 	int this_cpu;
 	int printed_len = 0;
 
-	boot_delay_msec();
+	boot_delay_msec(level);
 	printk_delay();
 
 	/* This stops the holder of console_sem just where we want him */

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1f5e55d..1599157 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c

@@ -215,8 +215,12 @@
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
-	if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
+	rcu_read_lock();
+	if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+		rcu_read_unlock();
 		return -EPERM;
+	}
+	rcu_read_unlock();
 
 	return security_ptrace_access_check(task, mode);
 }
@@ -280,8 +284,10 @@
 
 	if (seize)
 		flags |= PT_SEIZED;
-	if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+	rcu_read_lock();
+	if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
 		flags |= PT_PTRACE_CAP;
+	rcu_read_unlock();
 	task->ptrace = flags;
 
 	__ptrace_link(task, current);
@@ -457,6 +463,9 @@
 		return;
 
 	list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
+		if (unlikely(p->ptrace & PT_EXITKILL))
+			send_sig_info(SIGKILL, SEND_SIG_FORCED, p);
+
 		if (__ptrace_detach(tracer, p))
 			list_add(&p->ptrace_entry, &ptrace_dead);
 	}

diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 3920d59..ff55247 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c

@@ -86,33 +86,39 @@
 	return __res_counter_charge(counter, val, limit_fail_at, true);
 }
 
-void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
+u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 {
 	if (WARN_ON(counter->usage < val))
 		val = counter->usage;
 
 	counter->usage -= val;
+	return counter->usage;
 }
 
-void res_counter_uncharge_until(struct res_counter *counter,
-				struct res_counter *top,
-				unsigned long val)
+u64 res_counter_uncharge_until(struct res_counter *counter,
+			       struct res_counter *top,
+			       unsigned long val)
 {
 	unsigned long flags;
 	struct res_counter *c;
+	u64 ret = 0;
 
 	local_irq_save(flags);
 	for (c = counter; c != top; c = c->parent) {
+		u64 r;
 		spin_lock(&c->lock);
-		res_counter_uncharge_locked(c, val);
+		r = res_counter_uncharge_locked(c, val);
+		if (c == counter)
+			ret = r;
 		spin_unlock(&c->lock);
 	}
 	local_irq_restore(flags);
+	return ret;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
-	res_counter_uncharge_until(counter, NULL, val);
+	return res_counter_uncharge_until(counter, NULL, val);
 }
 
 static inline unsigned long long *

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0533496..257002c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -193,23 +193,10 @@
 static void sched_feat_enable(int i) { };
 #endif /* HAVE_JUMP_LABEL */
 
-static ssize_t
-sched_feat_write(struct file *filp, const char __user *ubuf,
-		size_t cnt, loff_t *ppos)
+static int sched_feat_set(char *cmp)
 {
-	char buf[64];
-	char *cmp;
-	int neg = 0;
 	int i;
-
-	if (cnt > 63)
-		cnt = 63;
-
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-	cmp = strstrip(buf);
+	int neg = 0;
 
 	if (strncmp(cmp, "NO_", 3) == 0) {
 		neg = 1;
@@ -229,6 +216,27 @@
 		}
 	}
 
+	return i;
+}
+
+static ssize_t
+sched_feat_write(struct file *filp, const char __user *ubuf,
+		size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	char *cmp;
+	int i;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+	cmp = strstrip(buf);
+
+	i = sched_feat_set(cmp);
 	if (i == __SCHED_FEAT_NR)
 		return -EINVAL;
 
@@ -1560,8 +1568,41 @@
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
 #endif
+
+#ifdef CONFIG_NUMA_BALANCING
+	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
+		p->mm->numa_next_scan = jiffies;
+		p->mm->numa_next_reset = jiffies;
+		p->mm->numa_scan_seq = 0;
+	}
+
+	p->node_stamp = 0ULL;
+	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
+	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
+	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
+	p->numa_work.next = &p->numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_SCHED_DEBUG
+void set_numabalancing_state(bool enabled)
+{
+	if (enabled)
+		sched_feat_set("NUMA");
+	else
+		sched_feat_set("NO_NUMA");
+}
+#else
+__read_mostly bool numabalancing_enabled;
+
+void set_numabalancing_state(bool enabled)
+{
+	numabalancing_enabled = enabled;
+}
+#endif /* CONFIG_SCHED_DEBUG */
+#endif /* CONFIG_NUMA_BALANCING */
+
 /*
  * fork()/clone()-time setup:
  */
@@ -4056,8 +4097,14 @@
 		goto out_free_cpus_allowed;
 	}
 	retval = -EPERM;
-	if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
-		goto out_unlock;
+	if (!check_same_owner(p)) {
+		rcu_read_lock();
+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+			rcu_read_unlock();
+			goto out_unlock;
+		}
+		rcu_read_unlock();
+	}
 
 	retval = security_task_setscheduler(p);
 	if (retval)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 756f9f9..5eea870 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -26,6 +26,9 @@
 #include <linux/slab.h>
 #include <linux/profile.h>
 #include <linux/interrupt.h>
+#include <linux/mempolicy.h>
+#include <linux/migrate.h>
+#include <linux/task_work.h>
 
 #include <trace/events/sched.h>
 
@@ -774,6 +777,230 @@
  * Scheduling class queueing methods:
  */
 
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * numa task sample period in ms
+ */
+unsigned int sysctl_numa_balancing_scan_period_min = 100;
+unsigned int sysctl_numa_balancing_scan_period_max = 100*50;
+unsigned int sysctl_numa_balancing_scan_period_reset = 100*600;
+
+/* Portion of address space to scan in MB */
+unsigned int sysctl_numa_balancing_scan_size = 256;
+
+/* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
+unsigned int sysctl_numa_balancing_scan_delay = 1000;
+
+static void task_numa_placement(struct task_struct *p)
+{
+	int seq;
+
+	if (!p->mm)	/* for example, ksmd faulting in a user's mm */
+		return;
+	seq = ACCESS_ONCE(p->mm->numa_scan_seq);
+	if (p->numa_scan_seq == seq)
+		return;
+	p->numa_scan_seq = seq;
+
+	/* FIXME: Scheduling placement policy hints go here */
+}
+
+/*
+ * Got a PROT_NONE fault for a page on @node.
+ */
+void task_numa_fault(int node, int pages, bool migrated)
+{
+	struct task_struct *p = current;
+
+	if (!sched_feat_numa(NUMA))
+		return;
+
+	/* FIXME: Allocate task-specific structure for placement policy here */
+
+	/*
+	 * If pages are properly placed (did not migrate) then scan slower.
+	 * This is reset periodically in case of phase changes
+	 */
+        if (!migrated)
+		p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
+			p->numa_scan_period + jiffies_to_msecs(10));
+
+	task_numa_placement(p);
+}
+
+static void reset_ptenuma_scan(struct task_struct *p)
+{
+	ACCESS_ONCE(p->mm->numa_scan_seq)++;
+	p->mm->numa_scan_offset = 0;
+}
+
+/*
+ * The expensive part of numa migration is done from task_work context.
+ * Triggered from task_tick_numa().
+ */
+void task_numa_work(struct callback_head *work)
+{
+	unsigned long migrate, next_scan, now = jiffies;
+	struct task_struct *p = current;
+	struct mm_struct *mm = p->mm;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
+	long pages;
+
+	WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
+
+	work->next = work; /* protect against double add */
+	/*
+	 * Who cares about NUMA placement when they're dying.
+	 *
+	 * NOTE: make sure not to dereference p->mm before this check,
+	 * exit_task_work() happens _after_ exit_mm() so we could be called
+	 * without p->mm even though we still had it when we enqueued this
+	 * work.
+	 */
+	if (p->flags & PF_EXITING)
+		return;
+
+	/*
+	 * We do not care about task placement until a task runs on a node
+	 * other than the first one used by the address space. This is
+	 * largely because migrations are driven by what CPU the task
+	 * is running on. If it's never scheduled on another node, it'll
+	 * not migrate so why bother trapping the fault.
+	 */
+	if (mm->first_nid == NUMA_PTE_SCAN_INIT)
+		mm->first_nid = numa_node_id();
+	if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) {
+		/* Are we running on a new node yet? */
+		if (numa_node_id() == mm->first_nid &&
+		    !sched_feat_numa(NUMA_FORCE))
+			return;
+
+		mm->first_nid = NUMA_PTE_SCAN_ACTIVE;
+	}
+
+	/*
+	 * Reset the scan period if enough time has gone by. Objective is that
+	 * scanning will be reduced if pages are properly placed. As tasks
+	 * can enter different phases this needs to be re-examined. Lacking
+	 * proper tracking of reference behaviour, this blunt hammer is used.
+	 */
+	migrate = mm->numa_next_reset;
+	if (time_after(now, migrate)) {
+		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+		next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
+		xchg(&mm->numa_next_reset, next_scan);
+	}
+
+	/*
+	 * Enforce maximal scan/migration frequency..
+	 */
+	migrate = mm->numa_next_scan;
+	if (time_before(now, migrate))
+		return;
+
+	if (p->numa_scan_period == 0)
+		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+
+	next_scan = now + msecs_to_jiffies(p->numa_scan_period);
+	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
+		return;
+
+	/*
+	 * Do not set pte_numa if the current running node is rate-limited.
+	 * This loses statistics on the fault but if we are unwilling to
+	 * migrate to this node, it is less likely we can do useful work
+	 */
+	if (migrate_ratelimited(numa_node_id()))
+		return;
+
+	start = mm->numa_scan_offset;
+	pages = sysctl_numa_balancing_scan_size;
+	pages <<= 20 - PAGE_SHIFT; /* MB in pages */
+	if (!pages)
+		return;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, start);
+	if (!vma) {
+		reset_ptenuma_scan(p);
+		start = 0;
+		vma = mm->mmap;
+	}
+	for (; vma; vma = vma->vm_next) {
+		if (!vma_migratable(vma))
+			continue;
+
+		/* Skip small VMAs. They are not likely to be of relevance */
+		if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
+			continue;
+
+		do {
+			start = max(start, vma->vm_start);
+			end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
+			end = min(end, vma->vm_end);
+			pages -= change_prot_numa(vma, start, end);
+
+			start = end;
+			if (pages <= 0)
+				goto out;
+		} while (end != vma->vm_end);
+	}
+
+out:
+	/*
+	 * It is possible to reach the end of the VMA list but the last few VMAs are
+	 * not guaranteed to the vma_migratable. If they are not, we would find the
+	 * !migratable VMA on the next scan but not reset the scanner to the start
+	 * so check it now.
+	 */
+	if (vma)
+		mm->numa_scan_offset = start;
+	else
+		reset_ptenuma_scan(p);
+	up_read(&mm->mmap_sem);
+}
+
+/*
+ * Drive the periodic memory faults..
+ */
+void task_tick_numa(struct rq *rq, struct task_struct *curr)
+{
+	struct callback_head *work = &curr->numa_work;
+	u64 period, now;
+
+	/*
+	 * We don't care about NUMA placement if we don't have memory.
+	 */
+	if (!curr->mm || (curr->flags & PF_EXITING) || work->next != work)
+		return;
+
+	/*
+	 * Using runtime rather than walltime has the dual advantage that
+	 * we (mostly) drive the selection from busy threads and that the
+	 * task needs to have done some actual work before we bother with
+	 * NUMA placement.
+	 */
+	now = curr->se.sum_exec_runtime;
+	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
+
+	if (now - curr->node_stamp > period) {
+		if (!curr->node_stamp)
+			curr->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+		curr->node_stamp = now;
+
+		if (!time_before(jiffies, curr->mm->numa_next_scan)) {
+			init_task_work(work, task_numa_work); /* TODO: move this into sched_fork() */
+			task_work_add(curr, work, true);
+		}
+	}
+}
+#else
+static void task_tick_numa(struct rq *rq, struct task_struct *curr)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 static void
 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
@@ -5501,6 +5728,9 @@
 		entity_tick(cfs_rq, se, queued);
 	}
 
+	if (sched_feat_numa(NUMA))
+		task_tick_numa(rq, curr);
+
 	update_rq_runnable_avg(rq, 1);
 }
 

diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index e68e69a..1ad1d2b 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h

@@ -66,3 +66,14 @@
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
+
+/*
+ * Apply the automatic NUMA scheduling policy. Enabled automatically
+ * at runtime if running on a NUMA machine. Can be controlled via
+ * numa_balancing=. Allow PTE scanning to be forced on UMA machines
+ * for debugging the core machinery.
+ */
+#ifdef CONFIG_NUMA_BALANCING
+SCHED_FEAT(NUMA,	false)
+SCHED_FEAT(NUMA_FORCE,	false)
+#endif

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5eca173..fc88644 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -663,6 +663,18 @@
 #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
 #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
 
+#ifdef CONFIG_NUMA_BALANCING
+#define sched_feat_numa(x) sched_feat(x)
+#ifdef CONFIG_SCHED_DEBUG
+#define numabalancing_enabled sched_feat_numa(NUMA)
+#else
+extern bool numabalancing_enabled;
+#endif /* CONFIG_SCHED_DEBUG */
+#else
+#define sched_feat_numa(x) (0)
+#define numabalancing_enabled (0)
+#endif /* CONFIG_NUMA_BALANCING */
+
 static inline u64 global_rt_period(void)
 {
 	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ee376be..5af44b5 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c

@@ -396,25 +396,29 @@
 #ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER: {
 		int data;
+		struct pt_regs *regs = task_pt_regs(current);
 		ret = seccomp_run_filters(this_syscall);
 		data = ret & SECCOMP_RET_DATA;
 		ret &= SECCOMP_RET_ACTION;
 		switch (ret) {
 		case SECCOMP_RET_ERRNO:
 			/* Set the low-order 16-bits as a errno. */
-			syscall_set_return_value(current, task_pt_regs(current),
+			syscall_set_return_value(current, regs,
 						 -data, 0);
 			goto skip;
 		case SECCOMP_RET_TRAP:
 			/* Show the handler the original registers. */
-			syscall_rollback(current, task_pt_regs(current));
+			syscall_rollback(current, regs);
 			/* Let the filter pass back 16 bits of data. */
 			seccomp_send_sigsys(this_syscall, data);
 			goto skip;
 		case SECCOMP_RET_TRACE:
 			/* Skip these calls if there is no tracer. */
-			if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP))
+			if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
+				syscall_set_return_value(current, regs,
+							 -ENOSYS, 0);
 				goto skip;
+			}
 			/* Allow the BPF to provide the event message */
 			ptrace_event(PTRACE_EVENT_SECCOMP, data);
 			/*
@@ -425,6 +429,9 @@
 			 */
 			if (fatal_signal_pending(current))
 				break;
+			if (syscall_get_nr(current, regs) < 0)
+				goto skip;  /* Explicit request to skip. */
+
 			return 0;
 		case SECCOMP_RET_ALLOW:
 			return 0;

diff --git a/kernel/signal.c b/kernel/signal.c
index a49c7f3..7aaa51d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c

@@ -31,6 +31,7 @@
 #include <linux/nsproxy.h>
 #include <linux/user_namespace.h>
 #include <linux/uprobes.h>
+#include <linux/compat.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
 
@@ -1753,7 +1754,7 @@
 	 * see comment in do_notify_parent() about the following 4 lines
 	 */
 	rcu_read_lock();
-	info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
+	info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent));
 	info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
 	rcu_read_unlock();
 
@@ -3094,6 +3095,79 @@
 out:
 	return error;
 }
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
+{
+	return do_sigaltstack(uss, uoss, current_user_stack_pointer());
+}
+#endif
+
+int restore_altstack(const stack_t __user *uss)
+{
+	int err = do_sigaltstack(uss, NULL, current_user_stack_pointer());
+	/* squash all but EFAULT for now */
+	return err == -EFAULT ? err : 0;
+}
+
+int __save_altstack(stack_t __user *uss, unsigned long sp)
+{
+	struct task_struct *t = current;
+	return  __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
+		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+		__put_user(t->sas_ss_size, &uss->ss_size);
+}
+
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
+				       compat_stack_t __user *uoss_ptr)
+{
+	stack_t uss, uoss;
+	int ret;
+	mm_segment_t seg;
+
+	if (uss_ptr) {
+		compat_stack_t uss32;
+
+		memset(&uss, 0, sizeof(stack_t));
+		if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t)))
+			return -EFAULT;
+		uss.ss_sp = compat_ptr(uss32.ss_sp);
+		uss.ss_flags = uss32.ss_flags;
+		uss.ss_size = uss32.ss_size;
+	}
+	seg = get_fs();
+	set_fs(KERNEL_DS);
+	ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
+			     (stack_t __force __user *) &uoss,
+			     compat_user_stack_pointer());
+	set_fs(seg);
+	if (ret >= 0 && uoss_ptr)  {
+		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(compat_stack_t)) ||
+		    __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
+		    __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
+		    __put_user(uoss.ss_size, &uoss_ptr->ss_size))
+			ret = -EFAULT;
+	}
+	return ret;
+}
+
+int compat_restore_altstack(const compat_stack_t __user *uss)
+{
+	int err = compat_sys_sigaltstack(uss, NULL);
+	/* squash all but -EFAULT for now */
+	return err == -EFAULT ? err : 0;
+}
+
+int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
+{
+	struct task_struct *t = current;
+	return  __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
+		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+		__put_user(t->sas_ss_size, &uss->ss_size);
+}
+#endif
+#endif
 
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 

diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index dbff751..395084d 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c

@@ -25,6 +25,7 @@
 cond_syscall(sys_kexec_load);
 cond_syscall(compat_sys_kexec_load);
 cond_syscall(sys_init_module);
+cond_syscall(sys_finit_module);
 cond_syscall(sys_delete_module);
 cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 33f71f3..c88878d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -256,9 +256,11 @@
 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
 static int min_wakeup_granularity_ns;			/* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
+#ifdef CONFIG_SMP
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-#endif
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_SCHED_DEBUG */
 
 #ifdef CONFIG_COMPACTION
 static int min_extfrag_threshold;
@@ -301,6 +303,7 @@
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
+#ifdef CONFIG_SMP
 	{
 		.procname	= "sched_tunable_scaling",
 		.data		= &sysctl_sched_tunable_scaling,
@@ -347,7 +350,45 @@
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-#endif
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_NUMA_BALANCING
+	{
+		.procname	= "numa_balancing_scan_delay_ms",
+		.data		= &sysctl_numa_balancing_scan_delay,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_min_ms",
+		.data		= &sysctl_numa_balancing_scan_period_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_reset",
+		.data		= &sysctl_numa_balancing_scan_period_reset,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_max_ms",
+		.data		= &sysctl_numa_balancing_scan_period_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_size_mb",
+		.data		= &sysctl_numa_balancing_scan_size,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
 	{
 		.procname	= "sched_rt_period_us",
 		.data		= &sysctl_sched_rt_period,

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 65bdcf1..5a63844 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c

@@ -1344,7 +1344,7 @@
 		goto out_putname;
 	}
 
-	mnt = current->nsproxy->pid_ns->proc_mnt;
+	mnt = task_active_pid_ns(current)->proc_mnt;
 	file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
 	result = PTR_ERR(file);
 	if (IS_ERR(file))

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index afd092d..3ffe4c5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -2675,12 +2675,12 @@
 }
 
 loff_t
-ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t ret;
 
 	if (file->f_mode & FMODE_READ)
-		ret = seq_lseek(file, offset, origin);
+		ret = seq_lseek(file, offset, whence);
 	else
 		file->f_pos = ret = 1;
 

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 61e081b..e512567 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c

@@ -3034,6 +3034,31 @@
 		tr->data[cpu]->entries = val;
 }
 
+/* resize @tr's buffer to the size of @size_tr's entries */
+static int resize_buffer_duplicate_size(struct trace_array *tr,
+					struct trace_array *size_tr, int cpu_id)
+{
+	int cpu, ret = 0;
+
+	if (cpu_id == RING_BUFFER_ALL_CPUS) {
+		for_each_tracing_cpu(cpu) {
+			ret = ring_buffer_resize(tr->buffer,
+					size_tr->data[cpu]->entries, cpu);
+			if (ret < 0)
+				break;
+			tr->data[cpu]->entries = size_tr->data[cpu]->entries;
+		}
+	} else {
+		ret = ring_buffer_resize(tr->buffer,
+					size_tr->data[cpu_id]->entries, cpu_id);
+		if (ret == 0)
+			tr->data[cpu_id]->entries =
+				size_tr->data[cpu_id]->entries;
+	}
+
+	return ret;
+}
+
 static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 {
 	int ret;
@@ -3058,23 +3083,8 @@
 
 	ret = ring_buffer_resize(max_tr.buffer, size, cpu);
 	if (ret < 0) {
-		int r = 0;
-
-		if (cpu == RING_BUFFER_ALL_CPUS) {
-			int i;
-			for_each_tracing_cpu(i) {
-				r = ring_buffer_resize(global_trace.buffer,
-						global_trace.data[i]->entries,
-						i);
-				if (r < 0)
-					break;
-			}
-		} else {
-			r = ring_buffer_resize(global_trace.buffer,
-						global_trace.data[cpu]->entries,
-						cpu);
-		}
-
+		int r = resize_buffer_duplicate_size(&global_trace,
+						     &global_trace, cpu);
 		if (r < 0) {
 			/*
 			 * AARGH! We are left with different
@@ -3212,17 +3222,11 @@
 
 	topts = create_trace_option_files(t);
 	if (t->use_max_tr) {
-		int cpu;
 		/* we need to make per cpu buffer sizes equivalent */
-		for_each_tracing_cpu(cpu) {
-			ret = ring_buffer_resize(max_tr.buffer,
-						global_trace.data[cpu]->entries,
-						cpu);
-			if (ret < 0)
-				goto out;
-			max_tr.data[cpu]->entries =
-					global_trace.data[cpu]->entries;
-		}
+		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
+						   RING_BUFFER_ALL_CPUS);
+		if (ret < 0)
+			goto out;
 	}
 
 	if (t->init) {
@@ -4271,13 +4275,11 @@
 		return -ENOMEM;
 
 	if (*ppos & (PAGE_SIZE - 1)) {
-		WARN_ONCE(1, "Ftrace: previous read must page-align\n");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (len & (PAGE_SIZE - 1)) {
-		WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
 		if (len < PAGE_SIZE) {
 			ret = -EINVAL;
 			goto out;

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 0c1b1657..42ca822 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c

@@ -33,7 +33,6 @@
 static arch_spinlock_t max_stack_lock =
 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
-static int stack_trace_disabled __read_mostly;
 static DEFINE_PER_CPU(int, trace_active);
 static DEFINE_MUTEX(stack_sysctl_mutex);
 
@@ -116,9 +115,6 @@
 {
 	int cpu;
 
-	if (unlikely(!ftrace_enabled || stack_trace_disabled))
-		return;
-
 	preempt_disable_notrace();
 
 	cpu = raw_smp_processor_id();

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 9614db8..c86e6d4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c

@@ -22,6 +22,7 @@
 #include <linux/uaccess.h>
 #include <linux/uprobes.h>
 #include <linux/namei.h>
+#include <linux/string.h>
 
 #include "trace_probe.h"
 
@@ -263,16 +264,15 @@
 
 	/* setup a probe */
 	if (!event) {
-		char *tail = strrchr(filename, '/');
+		char *tail;
 		char *ptr;
 
-		ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL);
-		if (!ptr) {
+		tail = kstrdup(kbasename(filename), GFP_KERNEL);
+		if (!tail) {
 			ret = -ENOMEM;
 			goto fail_address_parse;
 		}
 
-		tail = ptr;
 		ptr = strpbrk(tail, ".-_");
 		if (ptr)
 			*ptr = '\0';

diff --git a/kernel/user.c b/kernel/user.c
index 750acff..33acb5e 100644
--- a/kernel/user.c
+++ b/kernel/user.c

@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 
 /*
  * userns count is 1 for root user, 1 for init_uts_ns,
@@ -51,6 +52,7 @@
 	},
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
+	.proc_inum = PROC_USER_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 456a6b9..2b042c4 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c

@@ -9,6 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 #include <linux/highuid.h>
 #include <linux/cred.h>
 #include <linux/securebits.h>
@@ -26,6 +27,24 @@
 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
 
+static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
+{
+	/* Start with the same capabilities as init but useless for doing
+	 * anything as the capabilities are bound to the new user namespace.
+	 */
+	cred->securebits = SECUREBITS_DEFAULT;
+	cred->cap_inheritable = CAP_EMPTY_SET;
+	cred->cap_permitted = CAP_FULL_SET;
+	cred->cap_effective = CAP_FULL_SET;
+	cred->cap_bset = CAP_FULL_SET;
+#ifdef CONFIG_KEYS
+	key_put(cred->request_key_auth);
+	cred->request_key_auth = NULL;
+#endif
+	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
+	cred->user_ns = user_ns;
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -39,6 +58,7 @@
 	struct user_namespace *ns, *parent_ns = new->user_ns;
 	kuid_t owner = new->euid;
 	kgid_t group = new->egid;
+	int ret;
 
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
@@ -52,38 +72,45 @@
 	if (!ns)
 		return -ENOMEM;
 
+	ret = proc_alloc_inum(&ns->proc_inum);
+	if (ret) {
+		kmem_cache_free(user_ns_cachep, ns);
+		return ret;
+	}
+
 	kref_init(&ns->kref);
+	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
 	ns->owner = owner;
 	ns->group = group;
 
-	/* Start with the same capabilities as init but useless for doing
-	 * anything as the capabilities are bound to the new user namespace.
-	 */
-	new->securebits = SECUREBITS_DEFAULT;
-	new->cap_inheritable = CAP_EMPTY_SET;
-	new->cap_permitted = CAP_FULL_SET;
-	new->cap_effective = CAP_FULL_SET;
-	new->cap_bset = CAP_FULL_SET;
-#ifdef CONFIG_KEYS
-	key_put(new->request_key_auth);
-	new->request_key_auth = NULL;
-#endif
-	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
-
-	/* Leave the new->user_ns reference with the new user namespace. */
-	/* Leave the reference to our user_ns with the new cred. */
-	new->user_ns = ns;
+	set_cred_user_ns(new, ns);
 
 	return 0;
 }
 
+int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
+{
+	struct cred *cred;
+
+	if (!(unshare_flags & CLONE_NEWUSER))
+		return 0;
+
+	cred = prepare_creds();
+	if (!cred)
+		return -ENOMEM;
+
+	*new_cred = cred;
+	return create_user_ns(cred);
+}
+
 void free_user_ns(struct kref *kref)
 {
 	struct user_namespace *parent, *ns =
 		container_of(kref, struct user_namespace, kref);
 
 	parent = ns->parent;
+	proc_free_inum(ns->proc_inum);
 	kmem_cache_free(user_ns_cachep, ns);
 	put_user_ns(parent);
 }
@@ -372,7 +399,7 @@
 	struct user_namespace *lower_ns;
 	uid_t lower;
 
-	lower_ns = current_user_ns();
+	lower_ns = seq_user_ns(seq);
 	if ((lower_ns == ns) && lower_ns->parent)
 		lower_ns = lower_ns->parent;
 
@@ -393,7 +420,7 @@
 	struct user_namespace *lower_ns;
 	gid_t lower;
 
-	lower_ns = current_user_ns();
+	lower_ns = seq_user_ns(seq);
 	if ((lower_ns == ns) && lower_ns->parent)
 		lower_ns = lower_ns->parent;
 
@@ -669,10 +696,14 @@
 {
 	struct seq_file *seq = file->private_data;
 	struct user_namespace *ns = seq->private;
+	struct user_namespace *seq_ns = seq_user_ns(seq);
 
 	if (!ns->parent)
 		return -EPERM;
 
+	if ((seq_ns != ns) && (seq_ns != ns->parent))
+		return -EPERM;
+
 	return map_write(file, buf, size, ppos, CAP_SETUID,
 			 &ns->uid_map, &ns->parent->uid_map);
 }
@@ -681,10 +712,14 @@
 {
 	struct seq_file *seq = file->private_data;
 	struct user_namespace *ns = seq->private;
+	struct user_namespace *seq_ns = seq_user_ns(seq);
 
 	if (!ns->parent)
 		return -EPERM;
 
+	if ((seq_ns != ns) && (seq_ns != ns->parent))
+		return -EPERM;
+
 	return map_write(file, buf, size, ppos, CAP_SETGID,
 			 &ns->gid_map, &ns->parent->gid_map);
 }
@@ -709,6 +744,21 @@
 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *new_map)
 {
+	/* Allow mapping to your own filesystem ids */
+	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+		u32 id = new_map->extent[0].lower_first;
+		if (cap_setid == CAP_SETUID) {
+			kuid_t uid = make_kuid(ns->parent, id);
+			if (uid_eq(uid, current_fsuid()))
+				return true;
+		}
+		else if (cap_setid == CAP_SETGID) {
+			kgid_t gid = make_kgid(ns->parent, id);
+			if (gid_eq(gid, current_fsgid()))
+				return true;
+		}
+	}
+
 	/* Allow anyone to set a mapping that doesn't require privilege */
 	if (!cap_valid(cap_setid))
 		return true;
@@ -722,6 +772,65 @@
 	return false;
 }
 
+static void *userns_get(struct task_struct *task)
+{
+	struct user_namespace *user_ns;
+
+	rcu_read_lock();
+	user_ns = get_user_ns(__task_cred(task)->user_ns);
+	rcu_read_unlock();
+
+	return user_ns;
+}
+
+static void userns_put(void *ns)
+{
+	put_user_ns(ns);
+}
+
+static int userns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct user_namespace *user_ns = ns;
+	struct cred *cred;
+
+	/* Don't allow gaining capabilities by reentering
+	 * the same user namespace.
+	 */
+	if (user_ns == current_user_ns())
+		return -EINVAL;
+
+	/* Threaded processes may not enter a different user namespace */
+	if (atomic_read(&current->mm->mm_users) > 1)
+		return -EINVAL;
+
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	cred = prepare_creds();
+	if (!cred)
+		return -ENOMEM;
+
+	put_user_ns(cred->user_ns);
+	set_cred_user_ns(cred, get_user_ns(user_ns));
+
+	return commit_creds(cred);
+}
+
+static unsigned int userns_inum(void *ns)
+{
+	struct user_namespace *user_ns = ns;
+	return user_ns->proc_inum;
+}
+
+const struct proc_ns_operations userns_operations = {
+	.name		= "user",
+	.type		= CLONE_NEWUSER,
+	.get		= userns_get,
+	.put		= userns_put,
+	.install	= userns_install,
+	.inum		= userns_inum,
+};
+
 static __init int user_namespaces_init(void)
 {
 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);

diff --git a/kernel/utsname.c b/kernel/utsname.c
index 679d97a..08b197e 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c

@@ -32,18 +32,25 @@
  * @old_ns: namespace to clone
  * Return NULL on error (failure to kmalloc), new ns otherwise
  */
-static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 					  struct uts_namespace *old_ns)
 {
 	struct uts_namespace *ns;
+	int err;
 
 	ns = create_uts_ns();
 	if (!ns)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+	ns->user_ns = get_user_ns(user_ns);
 	up_read(&uts_sem);
 	return ns;
 }
@@ -55,9 +62,8 @@
  * versa.
  */
 struct uts_namespace *copy_utsname(unsigned long flags,
-				   struct task_struct *tsk)
+	struct user_namespace *user_ns, struct uts_namespace *old_ns)
 {
-	struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
 	struct uts_namespace *new_ns;
 
 	BUG_ON(!old_ns);
@@ -66,7 +72,7 @@
 	if (!(flags & CLONE_NEWUTS))
 		return old_ns;
 
-	new_ns = clone_uts_ns(tsk, old_ns);
+	new_ns = clone_uts_ns(user_ns, old_ns);
 
 	put_uts_ns(old_ns);
 	return new_ns;
@@ -78,6 +84,7 @@
 
 	ns = container_of(kref, struct uts_namespace, kref);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -102,19 +109,32 @@
 	put_uts_ns(ns);
 }
 
-static int utsns_install(struct nsproxy *nsproxy, void *ns)
+static int utsns_install(struct nsproxy *nsproxy, void *new)
 {
+	struct uts_namespace *ns = new;
+
+	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
+	    !nsown_capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	get_uts_ns(ns);
 	put_uts_ns(nsproxy->uts_ns);
 	nsproxy->uts_ns = ns;
 	return 0;
 }
 
+static unsigned int utsns_inum(void *vp)
+{
+	struct uts_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations utsns_operations = {
 	.name		= "uts",
 	.type		= CLONE_NEWUTS,
 	.get		= utsns_get,
 	.put		= utsns_put,
 	.install	= utsns_install,
+	.inum		= utsns_inum,
 };
-

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c8c21be..75a2ab3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c

@@ -31,6 +31,7 @@
 int watchdog_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
 static int __read_mostly watchdog_disabled;
+static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -116,7 +117,7 @@
 	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
 }
 
-static u64 get_sample_period(void)
+static void set_sample_period(void)
 {
 	/*
 	 * convert watchdog_thresh from seconds to ns
@@ -125,7 +126,7 @@
 	 * and hard thresholds) to increment before the
 	 * hardlockup detector generates a warning
 	 */
-	return get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
+	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
@@ -275,7 +276,7 @@
 	wake_up_process(__this_cpu_read(softlockup_watchdog));
 
 	/* .. and repeat */
-	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
+	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 
 	if (touch_ts == 0) {
 		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
@@ -343,6 +344,10 @@
 {
 	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
+	/* kick off the timer for the hardlockup detector */
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer->function = watchdog_timer_fn;
+
 	if (!watchdog_enabled) {
 		kthread_park(current);
 		return;
@@ -351,12 +356,8 @@
 	/* Enable the perf event */
 	watchdog_nmi_enable(cpu);
 
-	/* kick off the timer for the hardlockup detector */
-	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hrtimer->function = watchdog_timer_fn;
-
 	/* done here because hrtimer_start can only pin to smp_processor_id() */
-	hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
+	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
 		      HRTIMER_MODE_REL_PINNED);
 
 	/* initialize timestamp */
@@ -368,9 +369,6 @@
 {
 	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
-	if (!watchdog_enabled)
-		return;
-
 	watchdog_set_prio(SCHED_NORMAL, 0);
 	hrtimer_cancel(hrtimer);
 	/* disable the perf event */
@@ -386,7 +384,7 @@
 /*
  * The watchdog thread function - touches the timestamp.
  *
- * It only runs once every get_sample_period() seconds (4 seconds by
+ * It only runs once every sample_period seconds (4 seconds by
  * default) to reset the softlockup timestamp. If this gets delayed
  * for more than 2*watchdog_thresh seconds then the debug-printout
  * triggers in watchdog_timer_fn().
@@ -519,6 +517,7 @@
 	if (ret || !write)
 		return ret;
 
+	set_sample_period();
 	if (watchdog_enabled && watchdog_thresh)
 		watchdog_enable_all_cpus();
 	else
@@ -540,6 +539,7 @@
 
 void __init lockup_detector_init(void)
 {
+	set_sample_period();
 	if (smpboot_register_percpu_thread(&watchdog_threads)) {
 		pr_err("Failed to create watchdog threads, disabled\n");
 		watchdog_disabled = -ENODEV;

diff --git a/lib/Kconfig b/lib/Kconfig
index 4b31a46..75cdb77 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig

@@ -42,6 +42,9 @@
 config STMP_DEVICE
 	bool
 
+config PERCPU_RWSEM
+	boolean
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e458782..3a35309 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -1192,14 +1192,14 @@
 
 	  If unsure, say N.
 
-config PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT
-	tristate "pSeries reconfig notifier error injection module"
-	depends on PPC_PSERIES && NOTIFIER_ERROR_INJECTION
+config OF_RECONFIG_NOTIFIER_ERROR_INJECT
+	tristate "OF reconfig notifier error injection module"
+	depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION
 	help
 	  This option provides the ability to inject artificial errors to
-	  pSeries reconfig notifier chain callbacks.  It is controlled
+	  OF reconfig notifier chain callbacks.  It is controlled
 	  through debugfs interface under
-	  /sys/kernel/debug/notifier-error-inject/pSeries-reconfig/
+	  /sys/kernel/debug/notifier-error-inject/OF-reconfig/
 
 	  If the notifier call chain should be failed with some events
 	  notified, write the error code to "actions/<notifier event>/error".

diff --git a/lib/Makefile b/lib/Makefile
index e2152fa..02ed6c0 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -40,6 +40,7 @@
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
 
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
@@ -94,8 +95,8 @@
 obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
-obj-$(CONFIG_PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT) += \
-	pSeries-reconfig-notifier-error-inject.o
+obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
+	of-reconfig-notifier-error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 

diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 5293d24..11b9b01 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c

@@ -81,7 +81,7 @@
 		goto next_tag;
 	}
 
-	if (unlikely((tag & 0x1f) == 0x1f)) {
+	if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) {
 		do {
 			if (unlikely(datalen - dp < 2))
 				goto data_overrun_error;
@@ -96,7 +96,7 @@
 		goto next_tag;
 	}
 
-	if (unlikely(len == 0x80)) {
+	if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
 		/* Indefinite length */
 		if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5))
 			goto indefinite_len_primitive;
@@ -222,7 +222,7 @@
 		if (unlikely(dp >= datalen - 1))
 			goto data_overrun_error;
 		tag = data[dp++];
-		if (unlikely((tag & 0x1f) == 0x1f))
+		if (unlikely((tag & 0x1f) == ASN1_LONG_TAG))
 			goto long_tag_not_supported;
 
 		if (op & ASN1_OP_MATCH__ANY) {
@@ -254,7 +254,7 @@
 
 		len = data[dp++];
 		if (len > 0x7f) {
-			if (unlikely(len == 0x80)) {
+			if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
 				/* Indefinite length */
 				if (unlikely(!(tag & ASN1_CONS_BIT)))
 					goto indefinite_len_primitive;

diff --git a/lib/atomic64.c b/lib/atomic64.c
index 9785378..08a4f06 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c

@@ -31,7 +31,11 @@
 static union {
 	raw_spinlock_t lock;
 	char pad[L1_CACHE_BYTES];
-} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
+	[0 ... (NR_LOCKS - 1)] = {
+		.lock =  __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+	},
+};
 
 static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
 {
@@ -173,14 +177,3 @@
 	return ret;
 }
 EXPORT_SYMBOL(atomic64_add_unless);
-
-static int init_atomic64_lock(void)
-{
-	int i;
-
-	for (i = 0; i < NR_LOCKS; ++i)
-		raw_spin_lock_init(&atomic64_lock[i].lock);
-	return 0;
-}
-
-pure_initcall(init_atomic64_lock);

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d84beb9..5e396ac 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c

@@ -45,6 +45,12 @@
 	dma_debug_coherent,
 };
 
+enum map_err_types {
+	MAP_ERR_CHECK_NOT_APPLICABLE,
+	MAP_ERR_NOT_CHECKED,
+	MAP_ERR_CHECKED,
+};
+
 #define DMA_DEBUG_STACKTRACE_ENTRIES 5
 
 struct dma_debug_entry {
@@ -57,6 +63,7 @@
 	int              direction;
 	int		 sg_call_ents;
 	int		 sg_mapped_ents;
+	enum map_err_types  map_err_type;
 #ifdef CONFIG_STACKTRACE
 	struct		 stack_trace stacktrace;
 	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
@@ -114,6 +121,12 @@
 
 static DEFINE_RWLOCK(driver_name_lock);
 
+static const char *const maperr2str[] = {
+	[MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
+	[MAP_ERR_NOT_CHECKED] = "dma map error not checked",
+	[MAP_ERR_CHECKED] = "dma map error checked",
+};
+
 static const char *type2name[4] = { "single", "page",
 				    "scather-gather", "coherent" };
 
@@ -376,11 +389,12 @@
 		list_for_each_entry(entry, &bucket->list, list) {
 			if (!dev || dev == entry->dev) {
 				dev_info(entry->dev,
-					 "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
+					 "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
 					 type2name[entry->type], idx,
 					 (unsigned long long)entry->paddr,
 					 entry->dev_addr, entry->size,
-					 dir2name[entry->direction]);
+					 dir2name[entry->direction],
+					 maperr2str[entry->map_err_type]);
 			}
 		}
 
@@ -844,16 +858,16 @@
 	struct hash_bucket *bucket;
 	unsigned long flags;
 
-	if (dma_mapping_error(ref->dev, ref->dev_addr)) {
-		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
-			   "to free an invalid DMA memory address\n");
-		return;
-	}
-
 	bucket = get_hash_bucket(ref, &flags);
 	entry = bucket_find_exact(bucket, ref);
 
 	if (!entry) {
+		if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+			err_printk(ref->dev, NULL,
+				   "DMA-API: device driver tries "
+				   "to free an invalid DMA memory address\n");
+			return;
+		}
 		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
 			   "to free DMA memory it has not allocated "
 			   "[device address=0x%016llx] [size=%llu bytes]\n",
@@ -910,6 +924,15 @@
 			   dir2name[ref->direction]);
 	}
 
+	if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+		err_printk(ref->dev, entry,
+			   "DMA-API: device driver failed to check map error"
+			   "[device address=0x%016llx] [size=%llu bytes] "
+			   "[mapped as %s]",
+			   ref->dev_addr, ref->size,
+			   type2name[entry->type]);
+	}
+
 	hash_bucket_del(entry);
 	dma_entry_free(entry);
 
@@ -1017,7 +1040,7 @@
 	if (unlikely(global_disable))
 		return;
 
-	if (unlikely(dma_mapping_error(dev, dma_addr)))
+	if (dma_mapping_error(dev, dma_addr))
 		return;
 
 	entry = dma_entry_alloc();
@@ -1030,6 +1053,7 @@
 	entry->dev_addr  = dma_addr;
 	entry->size      = size;
 	entry->direction = direction;
+	entry->map_err_type = MAP_ERR_NOT_CHECKED;
 
 	if (map_single)
 		entry->type = dma_debug_single;
@@ -1045,6 +1069,30 @@
 }
 EXPORT_SYMBOL(debug_dma_map_page);
 
+void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	struct dma_debug_entry ref;
+	struct dma_debug_entry *entry;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+
+	if (unlikely(global_disable))
+		return;
+
+	ref.dev = dev;
+	ref.dev_addr = dma_addr;
+	bucket = get_hash_bucket(&ref, &flags);
+	entry = bucket_find_exact(bucket, &ref);
+
+	if (!entry)
+		goto out;
+
+	entry->map_err_type = MAP_ERR_CHECKED;
+out:
+	put_hash_bucket(bucket, &flags);
+}
+EXPORT_SYMBOL(debug_dma_mapping_error);
+
 void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 			  size_t size, int direction, bool map_single)
 {

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e7f7d99..1db1fc6 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c

@@ -62,13 +62,6 @@
 static int verbose = 0;
 module_param(verbose, int, 0644);
 
-/* Return the last part of a pathname */
-static inline const char *basename(const char *path)
-{
-	const char *tail = strrchr(path, '/');
-	return tail ? tail+1 : path;
-}
-
 /* Return the path relative to source root */
 static inline const char *trim_prefix(const char *path)
 {
@@ -154,7 +147,7 @@
 			/* match against the source filename */
 			if (query->filename &&
 			    strcmp(query->filename, dp->filename) &&
-			    strcmp(query->filename, basename(dp->filename)) &&
+			    strcmp(query->filename, kbasename(dp->filename)) &&
 			    strcmp(query->filename, trim_prefix(dp->filename)))
 				continue;
 

diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c
index b259039..245900b 100644
--- a/lib/interval_tree_test_main.c
+++ b/lib/interval_tree_test_main.c

@@ -30,7 +30,8 @@
 {
 	int i;
 	for (i = 0; i < NODES; i++) {
-		u32 a = prandom32(&rnd), b = prandom32(&rnd);
+		u32 a = prandom_u32_state(&rnd);
+		u32 b = prandom_u32_state(&rnd);
 		if (a <= b) {
 			nodes[i].start = a;
 			nodes[i].last = b;
@@ -40,7 +41,7 @@
 		}
 	}
 	for (i = 0; i < SEARCHES; i++)
-		queries[i] = prandom32(&rnd);
+		queries[i] = prandom_u32_state(&rnd);
 }
 
 static int interval_tree_test_init(void)
@@ -51,7 +52,7 @@
 
 	printk(KERN_ALERT "interval tree insert/remove");
 
-	prandom32_seed(&rnd, 3141592653589793238ULL);
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
 	init();
 
 	time1 = get_cycles();

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index c3615ea..f78ae0c 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c

@@ -104,6 +104,22 @@
 	return 0;
 }
 
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
 	if (s[0] == '+')
@@ -112,6 +128,22 @@
 }
 EXPORT_SYMBOL(kstrtoull);
 
+/**
+ * kstrtoll - convert a string to a long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoll(const char *s, unsigned int base, long long *res)
 {
 	unsigned long long tmp;
@@ -168,6 +200,22 @@
 }
 EXPORT_SYMBOL(_kstrtol);
 
+/**
+ * kstrtouint - convert a string to an unsigned int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtouint(const char *s, unsigned int base, unsigned int *res)
 {
 	unsigned long long tmp;
@@ -183,6 +231,22 @@
 }
 EXPORT_SYMBOL(kstrtouint);
 
+/**
+ * kstrtoint - convert a string to an int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoint(const char *s, unsigned int base, int *res)
 {
 	long long tmp;

diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index a07e726..d71d894 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c

@@ -44,8 +44,8 @@
 } while (0)
 
 #define RETURN(x...)     do { \
-	clear_bit(__LC_PARANOIA, &lc->flags); \
-	smp_mb__after_clear_bit(); return x ; } while (0)
+	clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
+	return x ; } while (0)
 
 /* BUG() if e is not one of the elements tracked by lc */
 #define PARANOIA_LC_ELEMENT(lc, e) do {	\
@@ -55,9 +55,40 @@
 	BUG_ON(i >= lc_->nr_elements);	\
 	BUG_ON(lc_->lc_element[i] != e_); } while (0)
 
+
+/* We need to atomically
+ *  - try to grab the lock (set LC_LOCKED)
+ *  - only if there is no pending transaction
+ *    (neither LC_DIRTY nor LC_STARVING is set)
+ * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
+ * it is not sufficient to just say
+ *	return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
+ */
+int lc_try_lock(struct lru_cache *lc)
+{
+	unsigned long val;
+	do {
+		val = cmpxchg(&lc->flags, 0, LC_LOCKED);
+	} while (unlikely (val == LC_PARANOIA));
+	/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
+	return 0 == val;
+#if 0
+	/* Alternative approach, spin in case someone enters or leaves a
+	 * PARANOIA_ENTRY()/RETURN() section. */
+	unsigned long old, new, val;
+	do {
+		old = lc->flags & LC_PARANOIA;
+		new = old | LC_LOCKED;
+		val = cmpxchg(&lc->flags, old, new);
+	} while (unlikely (val == (old ^ LC_PARANOIA)));
+	return old == val;
+#endif
+}
+
 /**
  * lc_create - prepares to track objects in an active set
  * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @max_pending_changes: maximum changes to accumulate until a transaction is required
  * @e_count: number of elements allowed to be active simultaneously
  * @e_size: size of the tracked objects
  * @e_off: offset to the &struct lc_element member in a tracked object
@@ -66,6 +97,7 @@
  * or NULL on (allocation) failure.
  */
 struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off)
 {
 	struct hlist_head *slot = NULL;
@@ -98,12 +130,13 @@
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 
 	lc->name = name;
 	lc->element_size = e_size;
 	lc->element_off = e_off;
 	lc->nr_elements = e_count;
-	lc->new_number = LC_FREE;
+	lc->max_pending_changes = max_pending_changes;
 	lc->lc_cache = cache;
 	lc->lc_element = element;
 	lc->lc_slot = slot;
@@ -117,6 +150,7 @@
 		e = p + e_off;
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 		element[i] = e;
 	}
@@ -175,15 +209,15 @@
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 	lc->used = 0;
 	lc->hits = 0;
 	lc->misses = 0;
 	lc->starving = 0;
-	lc->dirty = 0;
+	lc->locked = 0;
 	lc->changed = 0;
+	lc->pending_changes = 0;
 	lc->flags = 0;
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
 	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
 
 	for (i = 0; i < lc->nr_elements; i++) {
@@ -194,6 +228,7 @@
 		/* re-init it */
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 	}
 }
@@ -208,14 +243,14 @@
 	/* NOTE:
 	 * total calls to lc_get are
 	 * (starving + hits + misses)
-	 * misses include "dirty" count (update from an other thread in
+	 * misses include "locked" count (update from an other thread in
 	 * progress) and "changed", when this in fact lead to an successful
 	 * update of the cache.
 	 */
 	return seq_printf(seq, "\t%s: used:%u/%u "
-		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+		"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
 		lc->name, lc->used, lc->nr_elements,
-		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+		lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
 }
 
 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
@@ -224,6 +259,27 @@
 }
 
 
+static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
+		bool include_changing)
+{
+	struct hlist_node *n;
+	struct lc_element *e;
+
+	BUG_ON(!lc);
+	BUG_ON(!lc->nr_elements);
+	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
+		/* "about to be changed" elements, pending transaction commit,
+		 * are hashed by their "new number". "Normal" elements have
+		 * lc_number == lc_new_number. */
+		if (e->lc_new_number != enr)
+			continue;
+		if (e->lc_new_number == e->lc_number || include_changing)
+			return e;
+		break;
+	}
+	return NULL;
+}
+
 /**
  * lc_find - find element by label, if present in the hash table
  * @lc: The lru_cache object
@@ -232,38 +288,28 @@
  * Returns the pointer to an element, if the element with the requested
  * "label" or element number is present in the hash table,
  * or NULL if not found. Does not change the refcnt.
+ * Ignores elements that are "about to be used", i.e. not yet in the active
+ * set, but still pending transaction commit.
  */
 struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 {
-	struct hlist_node *n;
-	struct lc_element *e;
-
-	BUG_ON(!lc);
-	BUG_ON(!lc->nr_elements);
-	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
-		if (e->lc_number == enr)
-			return e;
-	}
-	return NULL;
+	return __lc_find(lc, enr, 0);
 }
 
-/* returned element will be "recycled" immediately */
-static struct lc_element *lc_evict(struct lru_cache *lc)
+/**
+ * lc_is_used - find element by label
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns true, if the element with the requested "label" or element number is
+ * present in the hash table, and is used (refcnt > 0).
+ * Also finds elements that are not _currently_ used but only "about to be
+ * used", i.e. on the "to_be_changed" list, pending transaction commit.
+ */
+bool lc_is_used(struct lru_cache *lc, unsigned int enr)
 {
-	struct list_head  *n;
-	struct lc_element *e;
-
-	if (list_empty(&lc->lru))
-		return NULL;
-
-	n = lc->lru.prev;
-	e = list_entry(n, struct lc_element, list);
-
-	PARANOIA_LC_ELEMENT(lc, e);
-
-	list_del(&e->list);
-	hlist_del(&e->colision);
-	return e;
+	struct lc_element *e = __lc_find(lc, enr, 1);
+	return e && e->refcnt;
 }
 
 /**
@@ -280,22 +326,34 @@
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt);
 
-	e->lc_number = LC_FREE;
+	e->lc_number = e->lc_new_number = LC_FREE;
 	hlist_del_init(&e->colision);
 	list_move(&e->list, &lc->free);
 	RETURN();
 }
 
-static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
 {
 	struct list_head *n;
+	struct lc_element *e;
 
-	if (list_empty(&lc->free))
-		return lc_evict(lc);
+	if (!list_empty(&lc->free))
+		n = lc->free.next;
+	else if (!list_empty(&lc->lru))
+		n = lc->lru.prev;
+	else
+		return NULL;
 
-	n = lc->free.next;
-	list_del(n);
-	return list_entry(n, struct lc_element, list);
+	e = list_entry(n, struct lc_element, list);
+	PARANOIA_LC_ELEMENT(lc, e);
+
+	e->lc_new_number = new_number;
+	if (!hlist_unhashed(&e->colision))
+		__hlist_del(&e->colision);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
+	list_move(&e->list, &lc->to_be_changed);
+
+	return e;
 }
 
 static int lc_unused_element_available(struct lru_cache *lc)
@@ -308,6 +366,75 @@
 	return 0;
 }
 
+static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change)
+{
+	struct lc_element *e;
+
+	PARANOIA_ENTRY();
+	if (lc->flags & LC_STARVING) {
+		++lc->starving;
+		RETURN(NULL);
+	}
+
+	e = __lc_find(lc, enr, 1);
+	/* if lc_new_number != lc_number,
+	 * this enr is currently being pulled in already,
+	 * and will be available once the pending transaction
+	 * has been committed. */
+	if (e && e->lc_new_number == e->lc_number) {
+		++lc->hits;
+		if (e->refcnt++ == 0)
+			lc->used++;
+		list_move(&e->list, &lc->in_use); /* Not evictable... */
+		RETURN(e);
+	}
+
+	++lc->misses;
+	if (!may_change)
+		RETURN(NULL);
+
+	/* It has been found above, but on the "to_be_changed" list, not yet
+	 * committed.  Don't pull it in twice, wait for the transaction, then
+	 * try again */
+	if (e)
+		RETURN(NULL);
+
+	/* To avoid races with lc_try_lock(), first, mark us dirty
+	 * (using test_and_set_bit, as it implies memory barriers), ... */
+	test_and_set_bit(__LC_DIRTY, &lc->flags);
+
+	/* ... only then check if it is locked anyways. If lc_unlock clears
+	 * the dirty bit again, that's not a problem, we will come here again.
+	 */
+	if (test_bit(__LC_LOCKED, &lc->flags)) {
+		++lc->locked;
+		RETURN(NULL);
+	}
+
+	/* In case there is nothing available and we can not kick out
+	 * the LRU element, we have to wait ...
+	 */
+	if (!lc_unused_element_available(lc)) {
+		__set_bit(__LC_STARVING, &lc->flags);
+		RETURN(NULL);
+	}
+
+	/* It was not present in the active set.  We are going to recycle an
+	 * unused (or even "free") element, but we won't accumulate more than
+	 * max_pending_changes changes.  */
+	if (lc->pending_changes >= lc->max_pending_changes)
+		RETURN(NULL);
+
+	e = lc_prepare_for_change(lc, enr);
+	BUG_ON(!e);
+
+	clear_bit(__LC_STARVING, &lc->flags);
+	BUG_ON(++e->refcnt != 1);
+	lc->used++;
+	lc->pending_changes++;
+
+	RETURN(e);
+}
 
 /**
  * lc_get - get element by label, maybe change the active set
@@ -336,110 +463,65 @@
  *  pointer to an UNUSED element with some different element number,
  *          where that different number may also be %LC_FREE.
  *
- *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
- *          and the returned element pointer is removed from the lru list and
- *          hash collision chains.  The user now should do whatever housekeeping
- *          is necessary.
- *          Then he must call lc_changed(lc,element_pointer), to finish
- *          the change.
+ *          In this case, the cache is marked %LC_DIRTY,
+ *          so lc_try_lock() will no longer succeed.
+ *          The returned element pointer is moved to the "to_be_changed" list,
+ *          and registered with the new element number on the hash collision chains,
+ *          so it is possible to pick it up from lc_is_used().
+ *          Up to "max_pending_changes" (see lc_create()) can be accumulated.
+ *          The user now should do whatever housekeeping is necessary,
+ *          typically serialize on lc_try_lock_for_transaction(), then call
+ *          lc_committed(lc) and lc_unlock(), to finish the change.
  *
  * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
  *       any cache set change.
  */
 struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
 {
-	struct lc_element *e;
-
-	PARANOIA_ENTRY();
-	if (lc->flags & LC_STARVING) {
-		++lc->starving;
-		RETURN(NULL);
-	}
-
-	e = lc_find(lc, enr);
-	if (e) {
-		++lc->hits;
-		if (e->refcnt++ == 0)
-			lc->used++;
-		list_move(&e->list, &lc->in_use); /* Not evictable... */
-		RETURN(e);
-	}
-
-	++lc->misses;
-
-	/* In case there is nothing available and we can not kick out
-	 * the LRU element, we have to wait ...
-	 */
-	if (!lc_unused_element_available(lc)) {
-		__set_bit(__LC_STARVING, &lc->flags);
-		RETURN(NULL);
-	}
-
-	/* it was not present in the active set.
-	 * we are going to recycle an unused (or even "free") element.
-	 * user may need to commit a transaction to record that change.
-	 * we serialize on flags & TF_DIRTY */
-	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
-		++lc->dirty;
-		RETURN(NULL);
-	}
-
-	e = lc_get_unused_element(lc);
-	BUG_ON(!e);
-
-	clear_bit(__LC_STARVING, &lc->flags);
-	BUG_ON(++e->refcnt != 1);
-	lc->used++;
-
-	lc->changing_element = e;
-	lc->new_number = enr;
-
-	RETURN(e);
-}
-
-/* similar to lc_get,
- * but only gets a new reference on an existing element.
- * you either get the requested element, or NULL.
- * will be consolidated into one function.
- */
-struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
-{
-	struct lc_element *e;
-
-	PARANOIA_ENTRY();
-	if (lc->flags & LC_STARVING) {
-		++lc->starving;
-		RETURN(NULL);
-	}
-
-	e = lc_find(lc, enr);
-	if (e) {
-		++lc->hits;
-		if (e->refcnt++ == 0)
-			lc->used++;
-		list_move(&e->list, &lc->in_use); /* Not evictable... */
-	}
-	RETURN(e);
+	return __lc_get(lc, enr, 1);
 }
 
 /**
- * lc_changed - tell @lc that the change has been recorded
+ * lc_try_get - get element by label, if present; do not change the active set
  * @lc: the lru cache to operate on
- * @e: the element pending label change
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * Return values:
+ *  NULL
+ *     The cache was marked %LC_STARVING,
+ *     or the requested label was not in the active set
+ *
+ *  pointer to the element with the REQUESTED element number.
+ *     In this case, it can be used right away
  */
-void lc_changed(struct lru_cache *lc, struct lc_element *e)
+struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
 {
+	return __lc_get(lc, enr, 0);
+}
+
+/**
+ * lc_committed - tell @lc that pending changes have been recorded
+ * @lc: the lru cache to operate on
+ *
+ * User is expected to serialize on explicit lc_try_lock_for_transaction()
+ * before the transaction is started, and later needs to lc_unlock() explicitly
+ * as well.
+ */
+void lc_committed(struct lru_cache *lc)
+{
+	struct lc_element *e, *tmp;
+
 	PARANOIA_ENTRY();
-	BUG_ON(e != lc->changing_element);
-	PARANOIA_LC_ELEMENT(lc, e);
-	++lc->changed;
-	e->lc_number = lc->new_number;
-	list_add(&e->list, &lc->in_use);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
-	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
+		/* count number of changes, not number of transactions */
+		++lc->changed;
+		e->lc_number = e->lc_new_number;
+		list_move(&e->list, &lc->in_use);
+	}
+	lc->pending_changes = 0;
 	RETURN();
 }
 
@@ -458,13 +540,12 @@
 	PARANOIA_ENTRY();
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt == 0);
-	BUG_ON(e == lc->changing_element);
+	BUG_ON(e->lc_number != e->lc_new_number);
 	if (--e->refcnt == 0) {
 		/* move it to the front of LRU. */
 		list_move(&e->list, &lc->lru);
 		lc->used--;
-		clear_bit(__LC_STARVING, &lc->flags);
-		smp_mb__after_clear_bit();
+		clear_bit_unlock(__LC_STARVING, &lc->flags);
 	}
 	RETURN(e->refcnt);
 }
@@ -504,16 +585,24 @@
 void lc_set(struct lru_cache *lc, unsigned int enr, int index)
 {
 	struct lc_element *e;
+	struct list_head *lh;
 
 	if (index < 0 || index >= lc->nr_elements)
 		return;
 
 	e = lc_element_by_index(lc, index);
-	e->lc_number = enr;
+	BUG_ON(e->lc_number != e->lc_new_number);
+	BUG_ON(e->refcnt != 0);
 
+	e->lc_number = e->lc_new_number = enr;
 	hlist_del_init(&e->colision);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
-	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+	if (enr == LC_FREE)
+		lh = &lc->free;
+	else {
+		hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+		lh = &lc->lru;
+	}
+	list_move(&e->list, lh);
 }
 
 /**
@@ -553,8 +642,10 @@
 EXPORT_SYMBOL(lc_find);
 EXPORT_SYMBOL(lc_get);
 EXPORT_SYMBOL(lc_put);
-EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_committed);
 EXPORT_SYMBOL(lc_element_by_index);
 EXPORT_SYMBOL(lc_index_of);
 EXPORT_SYMBOL(lc_seq_printf_stats);
 EXPORT_SYMBOL(lc_seq_dump_details);
+EXPORT_SYMBOL(lc_try_lock);
+EXPORT_SYMBOL(lc_is_used);

diff --git a/lib/of-reconfig-notifier-error-inject.c b/lib/of-reconfig-notifier-error-inject.c
new file mode 100644
index 0000000..8dc7986
--- /dev/null
+++ b/lib/of-reconfig-notifier-error-inject.c

@@ -0,0 +1,51 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include "notifier-error-inject.h"
+
+static int priority;
+module_param(priority, int, 0);
+MODULE_PARM_DESC(priority, "specify OF reconfig notifier priority");
+
+static struct notifier_err_inject reconfig_err_inject = {
+	.actions = {
+		{ NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ATTACH_NODE) },
+		{ NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_DETACH_NODE) },
+		{ NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ADD_PROPERTY) },
+		{ NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_REMOVE_PROPERTY) },
+		{ NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_UPDATE_PROPERTY) },
+		{}
+	}
+};
+
+static struct dentry *dir;
+
+static int err_inject_init(void)
+{
+	int err;
+
+	dir = notifier_err_inject_init("OF-reconfig",
+		notifier_err_inject_dir, &reconfig_err_inject, priority);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	err = of_reconfig_notifier_register(&reconfig_err_inject.nb);
+	if (err)
+		debugfs_remove_recursive(dir);
+
+	return err;
+}
+
+static void err_inject_exit(void)
+{
+	of_reconfig_notifier_unregister(&reconfig_err_inject.nb);
+	debugfs_remove_recursive(dir);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_DESCRIPTION("OF reconfig notifier error injection module");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");

diff --git a/lib/pSeries-reconfig-notifier-error-inject.c b/lib/pSeries-reconfig-notifier-error-inject.c
deleted file mode 100644
index 7f7c98d..0000000
--- a/lib/pSeries-reconfig-notifier-error-inject.c
+++ /dev/null

@@ -1,51 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/pSeries_reconfig.h>
-
-#include "notifier-error-inject.h"
-
-static int priority;
-module_param(priority, int, 0);
-MODULE_PARM_DESC(priority, "specify pSeries reconfig notifier priority");
-
-static struct notifier_err_inject reconfig_err_inject = {
-	.actions = {
-		{ NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_ADD) },
-		{ NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_REMOVE) },
-		{ NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_ADD) },
-		{ NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_REMOVE) },
-		{}
-	}
-};
-
-static struct dentry *dir;
-
-static int err_inject_init(void)
-{
-	int err;
-
-	dir = notifier_err_inject_init("pSeries-reconfig",
-		notifier_err_inject_dir, &reconfig_err_inject, priority);
-	if (IS_ERR(dir))
-		return PTR_ERR(dir);
-
-	err = pSeries_reconfig_notifier_register(&reconfig_err_inject.nb);
-	if (err)
-		debugfs_remove_recursive(dir);
-
-	return err;
-}
-
-static void err_inject_exit(void)
-{
-	pSeries_reconfig_notifier_unregister(&reconfig_err_inject.nb);
-	debugfs_remove_recursive(dir);
-}
-
-module_init(err_inject_init);
-module_exit(err_inject_exit);
-
-MODULE_DESCRIPTION("pSeries reconfig notifier error injection module");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");

diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
new file mode 100644
index 0000000..652a8ee
--- /dev/null
+++ b/lib/percpu-rwsem.c

@@ -0,0 +1,165 @@
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
+#include <linux/percpu.h>
+#include <linux/wait.h>
+#include <linux/lockdep.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+
+int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+			const char *name, struct lock_class_key *rwsem_key)
+{
+	brw->fast_read_ctr = alloc_percpu(int);
+	if (unlikely(!brw->fast_read_ctr))
+		return -ENOMEM;
+
+	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
+	__init_rwsem(&brw->rw_sem, name, rwsem_key);
+	atomic_set(&brw->write_ctr, 0);
+	atomic_set(&brw->slow_read_ctr, 0);
+	init_waitqueue_head(&brw->write_waitq);
+	return 0;
+}
+
+void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+{
+	free_percpu(brw->fast_read_ctr);
+	brw->fast_read_ctr = NULL; /* catch use after free bugs */
+}
+
+/*
+ * This is the fast-path for down_read/up_read, it only needs to ensure
+ * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
+ * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
+ * serialize with the preempt-disabled section below.
+ *
+ * The nontrivial part is that we should guarantee acquire/release semantics
+ * in case when
+ *
+ *	R_W: down_write() comes after up_read(), the writer should see all
+ *	     changes done by the reader
+ * or
+ *	W_R: down_read() comes after up_write(), the reader should see all
+ *	     changes done by the writer
+ *
+ * If this helper fails the callers rely on the normal rw_semaphore and
+ * atomic_dec_and_test(), so in this case we have the necessary barriers.
+ *
+ * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
+ * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
+ * reader inside the critical section. See the comments in down_write and
+ * up_write below.
+ */
+static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
+{
+	bool success = false;
+
+	preempt_disable();
+	if (likely(!atomic_read(&brw->write_ctr))) {
+		__this_cpu_add(*brw->fast_read_ctr, val);
+		success = true;
+	}
+	preempt_enable();
+
+	return success;
+}
+
+/*
+ * Like the normal down_read() this is not recursive, the writer can
+ * come after the first percpu_down_read() and create the deadlock.
+ *
+ * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
+ * percpu_up_read() does rwsem_release(). This pairs with the usage
+ * of ->rw_sem in percpu_down/up_write().
+ */
+void percpu_down_read(struct percpu_rw_semaphore *brw)
+{
+	might_sleep();
+	if (likely(update_fast_ctr(brw, +1))) {
+		rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+		return;
+	}
+
+	down_read(&brw->rw_sem);
+	atomic_inc(&brw->slow_read_ctr);
+	/* avoid up_read()->rwsem_release() */
+	__up_read(&brw->rw_sem);
+}
+
+void percpu_up_read(struct percpu_rw_semaphore *brw)
+{
+	rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
+
+	if (likely(update_fast_ctr(brw, -1)))
+		return;
+
+	/* false-positive is possible but harmless */
+	if (atomic_dec_and_test(&brw->slow_read_ctr))
+		wake_up_all(&brw->write_waitq);
+}
+
+static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
+{
+	unsigned int sum = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		sum += per_cpu(*brw->fast_read_ctr, cpu);
+		per_cpu(*brw->fast_read_ctr, cpu) = 0;
+	}
+
+	return sum;
+}
+
+/*
+ * A writer increments ->write_ctr to force the readers to switch to the
+ * slow mode, note the atomic_read() check in update_fast_ctr().
+ *
+ * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
+ * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
+ * counter it represents the number of active readers.
+ *
+ * Finally the writer takes ->rw_sem for writing and blocks the new readers,
+ * then waits until the slow counter becomes zero.
+ */
+void percpu_down_write(struct percpu_rw_semaphore *brw)
+{
+	/* tell update_fast_ctr() there is a pending writer */
+	atomic_inc(&brw->write_ctr);
+	/*
+	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
+	 *    so that update_fast_ctr() can't succeed.
+	 *
+	 * 2. Ensures we see the result of every previous this_cpu_add() in
+	 *    update_fast_ctr().
+	 *
+	 * 3. Ensures that if any reader has exited its critical section via
+	 *    fast-path, it executes a full memory barrier before we return.
+	 *    See R_W case in the comment above update_fast_ctr().
+	 */
+	synchronize_sched_expedited();
+
+	/* exclude other writers, and block the new readers completely */
+	down_write(&brw->rw_sem);
+
+	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
+	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+
+	/* wait for all readers to complete their percpu_up_read() */
+	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+}
+
+void percpu_up_write(struct percpu_rw_semaphore *brw)
+{
+	/* release the lock, but the readers can't use the fast-path */
+	up_write(&brw->rw_sem);
+	/*
+	 * Insert the barrier before the next fast-path in down_read,
+	 * see W_R case in the comment above update_fast_ctr().
+	 */
+	synchronize_sched_expedited();
+	/* the last writer unblocks update_fast_ctr() */
+	atomic_dec(&brw->write_ctr);
+}

diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index de06dfe..9f7c184 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile

@@ -1,8 +1,11 @@
 obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o
 
-raid6_pq-y	+= algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \
-		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
-		   altivec8.o mmx.o sse1.o sse2.o
+raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
+		   int8.o int16.o int32.o
+
+raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+
 hostprogs-y	+= mktables
 
 quiet_cmd_unroll = UNROLL  $@

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 589f5f5..6d7316f 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c

@@ -45,11 +45,20 @@
 	&raid6_sse1x2,
 	&raid6_sse2x1,
 	&raid6_sse2x2,
+#ifdef CONFIG_AS_AVX2
+	&raid6_avx2x1,
+	&raid6_avx2x2,
+#endif
 #endif
 #if defined(__x86_64__) && !defined(__arch_um__)
 	&raid6_sse2x1,
 	&raid6_sse2x2,
 	&raid6_sse2x4,
+#ifdef CONFIG_AS_AVX2
+	&raid6_avx2x1,
+	&raid6_avx2x2,
+	&raid6_avx2x4,
+#endif
 #endif
 #ifdef CONFIG_ALTIVEC
 	&raid6_altivec1,
@@ -72,6 +81,9 @@
 
 const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+#ifdef CONFIG_AS_AVX2
+	&raid6_recov_avx2,
+#endif
 	&raid6_recov_ssse3,
 #endif
 	&raid6_recov_intx1,

diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index b71012b..7cc12b5 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc

@@ -24,13 +24,10 @@
 
 #include <linux/raid/pq.h>
 
-#ifdef CONFIG_ALTIVEC
-
 #include <altivec.h>
 #ifdef __KERNEL__
 # include <asm/cputable.h>
 # include <asm/switch_to.h>
-#endif
 
 /*
  * This is the C data type to use.  We use a vector of

diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
new file mode 100644
index 0000000..bc3b1dd
--- /dev/null
+++ b/lib/raid6/avx2.c

@@ -0,0 +1,251 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright (C) 2012 Intel Corporation
+ *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
+ *
+ *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * AVX2 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#ifdef CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_avx2_constants {
+	u64 x1d[4];
+} raid6_avx2_constants __aligned(32) = {
+	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+	  0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
+};
+
+static int raid6_have_avx2(void)
+{
+	return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
+}
+
+/*
+ * Plain AVX2 implementation
+ */
+static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* Zero temp */
+
+	for (d = 0; d < bytes; d += 32) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+		asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
+		asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
+		for (z = z0-2; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+			asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
+		}
+		asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+		asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+		asm volatile("vpand %ymm0,%ymm5,%ymm5");
+		asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+		asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+		asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x1 = {
+	raid6_avx21_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x1",
+	1			/* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 AVX2 implementation
+ */
+static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
+
+	/* We uniformly assume a single prefetch covers at least 32 bytes */
+	for (d = 0; d < bytes; d += 64) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
+		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+		asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
+		asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
+		asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
+		for (z = z0-1; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpand %ymm0,%ymm7,%ymm7");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+		}
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x2 = {
+	raid6_avx22_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x2",
+	1			/* Has cache hints */
+};
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Unrolled-by-4 AVX2 implementation
+ */
+static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm1,%ymm1,%ymm1");	/* Zero temp */
+	asm volatile("vpxor %ymm2,%ymm2,%ymm2");	/* P[0] */
+	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* P[1] */
+	asm volatile("vpxor %ymm4,%ymm4,%ymm4");	/* Q[0] */
+	asm volatile("vpxor %ymm6,%ymm6,%ymm6");	/* Q[1] */
+	asm volatile("vpxor %ymm10,%ymm10,%ymm10");	/* P[2] */
+	asm volatile("vpxor %ymm11,%ymm11,%ymm11");	/* P[3] */
+	asm volatile("vpxor %ymm12,%ymm12,%ymm12");	/* Q[2] */
+	asm volatile("vpxor %ymm14,%ymm14,%ymm14");	/* Q[3] */
+
+	for (d = 0; d < bytes; d += 128) {
+		for (z = z0; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
+			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+			asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
+			asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+			asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
+			asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpand %ymm0,%ymm7,%ymm7");
+			asm volatile("vpand %ymm0,%ymm13,%ymm13");
+			asm volatile("vpand %ymm0,%ymm15,%ymm15");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+			asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
+			asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
+			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+			asm volatile("vpxor %ymm13,%ymm10,%ymm10");
+			asm volatile("vpxor %ymm15,%ymm11,%ymm11");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+		}
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+		asm volatile("vpxor %ymm3,%ymm3,%ymm3");
+		asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
+		asm volatile("vpxor %ymm10,%ymm10,%ymm10");
+		asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
+		asm volatile("vpxor %ymm11,%ymm11,%ymm11");
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+		asm volatile("vpxor %ymm6,%ymm6,%ymm6");
+		asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
+		asm volatile("vpxor %ymm12,%ymm12,%ymm12");
+		asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
+		asm volatile("vpxor %ymm14,%ymm14,%ymm14");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x4 = {
+	raid6_avx24_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x4",
+	1			/* Has cache hints */
+};
+#endif
+
+#endif /* CONFIG_AS_AVX2 */

diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 279347f..590c71c 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c

@@ -16,7 +16,7 @@
  * MMX implementation of RAID-6 syndrome functions
  */
 
-#if defined(__i386__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_32
 
 #include <linux/raid/pq.h>
 #include "x86.h"

diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c
new file mode 100644
index 0000000..e1eea43
--- /dev/null
+++ b/lib/raid6/recov_avx2.c

@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#if CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_avx2(void)
+{
+	return boot_cpu_has(X86_FEATURE_AVX2) &&
+		boot_cpu_has(X86_FEATURE_AVX);
+}
+
+static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
+		int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	const u8 x0f = 0x0f;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+		raid6_gfexp[failb]]];
+
+	kernel_fpu_begin();
+
+	/* ymm0 = x0f[16] */
+	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
+		asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
+		asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
+		asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
+		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
+		asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
+		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
+		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
+
+		/*
+		 * 1 = dq[0]  ^ q[0]
+		 * 9 = dq[32] ^ q[32]
+		 * 0 = dp[0]  ^ p[0]
+		 * 8 = dp[32] ^ p[32]
+		 */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+		asm volatile("vpsraw $4, %ymm1, %ymm3");
+		asm volatile("vpsraw $4, %ymm9, %ymm12");
+		asm volatile("vpand %ymm7, %ymm1, %ymm1");
+		asm volatile("vpand %ymm7, %ymm9, %ymm9");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm12, %ymm12");
+		asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
+		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
+		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+		asm volatile("vpxor %ymm14, %ymm15, %ymm15");
+		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+		/*
+		 * 5 = qx[0]
+		 * 15 = qx[32]
+		 */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+		asm volatile("vpsraw $4, %ymm0, %ymm2");
+		asm volatile("vpsraw $4, %ymm8, %ymm6");
+		asm volatile("vpand %ymm7, %ymm0, %ymm3");
+		asm volatile("vpand %ymm7, %ymm8, %ymm14");
+		asm volatile("vpand %ymm7, %ymm2, %ymm2");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
+		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
+		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm12, %ymm13, %ymm13");
+
+		/*
+		 * 1  = pbmul[px[0]]
+		 * 13 = pbmul[px[32]]
+		 */
+		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm15, %ymm13, %ymm13");
+
+		/*
+		 * 1 = db = DQ
+		 * 13 = db[32] = DQ[32]
+		 */
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
+		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+		asm volatile("vpxor %ymm13, %ymm8, %ymm8");
+
+		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+		asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dp += 64;
+		dq += 64;
+#else
+		asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
+		asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
+		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
+		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
+
+		/* 1 = dq ^ q;  0 = dp ^ p */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+		/*
+		 * 1 = dq ^ q
+		 * 3 = dq ^ p >> 4
+		 */
+		asm volatile("vpsraw $4, %ymm1, %ymm3");
+		asm volatile("vpand %ymm7, %ymm1, %ymm1");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+		/* 5 = qx */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+
+		asm volatile("vpsraw $4, %ymm0, %ymm2");
+		asm volatile("vpand %ymm7, %ymm0, %ymm3");
+		asm volatile("vpand %ymm7, %ymm2, %ymm2");
+		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+
+		/* 1 = pbmul[px] */
+		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+		/* 1 = db = DQ */
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+
+		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dp += 32;
+		dq += 32;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
+		void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	const u8 x0f = 0x0f;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_fpu_begin();
+
+	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+		asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
+		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
+
+		/*
+		 * 3 = q[0] ^ dq[0]
+		 * 8 = q[32] ^ dq[32]
+		 */
+		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+		asm volatile("vmovapd %ymm0, %ymm13");
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+		asm volatile("vmovapd %ymm1, %ymm14");
+
+		asm volatile("vpsraw $4, %ymm3, %ymm6");
+		asm volatile("vpsraw $4, %ymm8, %ymm12");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm8, %ymm8");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpand %ymm7, %ymm12, %ymm12");
+		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+		asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+		asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
+		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm13, %ymm14, %ymm14");
+
+		/*
+		 * 1  = qmul[q[0]  ^ dq[0]]
+		 * 14 = qmul[q[32] ^ dq[32]]
+		 */
+		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+		asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
+		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+		asm volatile("vpxor %ymm14, %ymm12, %ymm12");
+
+		/*
+		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
+		 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
+		 */
+
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
+		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+		asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dq += 64;
+#else
+		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+
+		/* 3 = q ^ dq */
+
+		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+
+		asm volatile("vpsraw $4, %ymm3, %ymm6");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+
+		/* 1 = qmul[q ^ dq] */
+
+		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+
+		/* 2 = p ^ qmul[q ^ dq] */
+
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dq += 32;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_avx2 = {
+	.data2 = raid6_2data_recov_avx2,
+	.datap = raid6_datap_recov_avx2,
+	.valid = raid6_has_avx2,
+#ifdef CONFIG_X86_64
+	.name = "avx2x2",
+#else
+	.name = "avx2x1",
+#endif
+	.priority = 2,
+};
+
+#else
+#warning "your version of binutils lacks AVX2 support"
+#endif

diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c
index ecb710c..a916832 100644
--- a/lib/raid6/recov_ssse3.c
+++ b/lib/raid6/recov_ssse3.c

@@ -7,8 +7,6 @@
  * of the License.
  */
 
-#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
-
 #include <linux/raid/pq.h>
 #include "x86.h"
 
@@ -332,5 +330,3 @@
 #endif
 	.priority = 1,
 };
-
-#endif

diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index 10dd9194..f762971 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c

@@ -21,7 +21,7 @@
  * worthwhile as a separate implementation.
  */
 
-#if defined(__i386__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_32
 
 #include <linux/raid/pq.h>
 #include "x86.h"

diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index bc2d57d..85b82c8 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c

@@ -17,8 +17,6 @@
  *
  */
 
-#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
-
 #include <linux/raid/pq.h>
 #include "x86.h"
 
@@ -159,9 +157,7 @@
 	1			/* Has cache hints */
 };
 
-#endif
-
-#if defined(__x86_64__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_64
 
 /*
  * Unrolled-by-4 SSE2 implementation
@@ -259,4 +255,4 @@
 	1			/* Has cache hints */
 };
 
-#endif
+#endif /* CONFIG_X86_64 */

diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index c76151d..087332d 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile

@@ -10,6 +10,31 @@
 AWK	 = awk -f
 AR	 = ar
 RANLIB	 = ranlib
+OBJS	 = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o
+
+ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        CFLAGS += -DCONFIG_X86_32
+        IS_X86 = yes
+endif
+ifeq ($(ARCH),x86_64)
+        CFLAGS += -DCONFIG_X86_64
+        IS_X86 = yes
+endif
+
+ifeq ($(IS_X86),yes)
+        OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
+        CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" |	\
+                    gcc -c -x assembler - >&/dev/null &&	\
+                    rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+else
+        HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
+                         gcc -c -x c - >&/dev/null && \
+                         rm ./-.o && echo yes)
+        ifeq ($(HAS_ALTIVEC),yes)
+                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+        endif
+endif
 
 .c.o:
 	$(CC) $(CFLAGS) -c -o $@ $<
@@ -22,9 +47,7 @@
 
 all:	raid6.a raid6test
 
-raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
-	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
-	 tables.o
+raid6.a: $(OBJS)
 	 rm -f $@
 	 $(AR) cq $@ $^
 	 $(RANLIB) $@

diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index d55d632..b759548 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h

@@ -45,19 +45,23 @@
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
 #define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
 #define X86_FEATURE_AVX	(4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_AVX2        (9*32+ 5) /* AVX2 instructions */
 #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
 
 /* Should work well enough on modern CPUs for testing */
 static inline int boot_cpu_has(int flag)
 {
-	u32 eax = (flag & 0x20) ? 0x80000001 : 1;
-	u32 ecx, edx;
+	u32 eax, ebx, ecx, edx;
+
+	eax = (flag & 0x100) ? 7 :
+		(flag & 0x20) ? 0x80000001 : 1;
+	ecx = 0;
 
 	asm volatile("cpuid"
-		     : "+a" (eax), "=d" (edx), "=c" (ecx)
-		     : : "ebx");
+		     : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
 
-	return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
+	return ((flag & 0x100 ? ebx :
+		(flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1;
 }
 
 #endif /* ndef __KERNEL__ */

diff --git a/lib/random32.c b/lib/random32.c
index 938bde5..52280d5 100644
--- a/lib/random32.c
+++ b/lib/random32.c

@@ -42,13 +42,13 @@
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
 
 /**
- *	prandom32 - seeded pseudo-random number generator.
+ *	prandom_u32_state - seeded pseudo-random number generator.
  *	@state: pointer to state structure holding seeded state.
  *
  *	This is used for pseudo-randomness with no outside seeding.
- *	For more random results, use random32().
+ *	For more random results, use prandom_u32().
  */
-u32 prandom32(struct rnd_state *state)
+u32 prandom_u32_state(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
@@ -58,32 +58,81 @@
 
 	return (state->s1 ^ state->s2 ^ state->s3);
 }
-EXPORT_SYMBOL(prandom32);
+EXPORT_SYMBOL(prandom_u32_state);
 
 /**
- *	random32 - pseudo random number generator
+ *	prandom_u32 - pseudo random number generator
  *
  *	A 32 bit pseudo-random number is generated using a fast
  *	algorithm suitable for simulation. This algorithm is NOT
  *	considered safe for cryptographic use.
  */
-u32 random32(void)
+u32 prandom_u32(void)
 {
 	unsigned long r;
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	r = prandom32(state);
+	r = prandom_u32_state(state);
 	put_cpu_var(state);
 	return r;
 }
-EXPORT_SYMBOL(random32);
+EXPORT_SYMBOL(prandom_u32);
+
+/*
+ *	prandom_bytes_state - get the requested number of pseudo-random bytes
+ *
+ *	@state: pointer to state structure holding seeded state.
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ *
+ *	This is used for pseudo-randomness with no outside seeding.
+ *	For more random results, use prandom_bytes().
+ */
+void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+{
+	unsigned char *p = buf;
+	int i;
+
+	for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
+		u32 random = prandom_u32_state(state);
+		int j;
+
+		for (j = 0; j < sizeof(u32); j++) {
+			p[i + j] = random;
+			random >>= BITS_PER_BYTE;
+		}
+	}
+	if (i < bytes) {
+		u32 random = prandom_u32_state(state);
+
+		for (; i < bytes; i++) {
+			p[i] = random;
+			random >>= BITS_PER_BYTE;
+		}
+	}
+}
+EXPORT_SYMBOL(prandom_bytes_state);
 
 /**
- *	srandom32 - add entropy to pseudo random number generator
+ *	prandom_bytes - get the requested number of pseudo-random bytes
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, int bytes)
+{
+	struct rnd_state *state = &get_cpu_var(net_rand_state);
+
+	prandom_bytes_state(state, buf, bytes);
+	put_cpu_var(state);
+}
+EXPORT_SYMBOL(prandom_bytes);
+
+/**
+ *	prandom_seed - add entropy to pseudo random number generator
  *	@seed: seed value
  *
- *	Add some additional seeding to the random32() pool.
+ *	Add some additional seeding to the prandom pool.
  */
-void srandom32(u32 entropy)
+void prandom_seed(u32 entropy)
 {
 	int i;
 	/*
@@ -95,13 +144,13 @@
 		state->s1 = __seed(state->s1 ^ entropy, 1);
 	}
 }
-EXPORT_SYMBOL(srandom32);
+EXPORT_SYMBOL(prandom_seed);
 
 /*
  *	Generate some initially weak seeding values to allow
- *	to start the random32() engine.
+ *	to start the prandom_u32() engine.
  */
-static int __init random32_init(void)
+static int __init prandom_init(void)
 {
 	int i;
 
@@ -114,22 +163,22 @@
 		state->s3 = __seed(LCG(state->s2), 15);
 
 		/* "warm it up" */
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
 	}
 	return 0;
 }
-core_initcall(random32_init);
+core_initcall(prandom_init);
 
 /*
  *	Generate better values after random number generator
  *	is fully initialized.
  */
-static int __init random32_reseed(void)
+static int __init prandom_reseed(void)
 {
 	int i;
 
@@ -143,8 +192,8 @@
 		state->s3 = __seed(seeds[2], 15);
 
 		/* mix it in */
-		prandom32(state);
+		prandom_u32_state(state);
 	}
 	return 0;
 }
-late_initcall(random32_reseed);
+late_initcall(prandom_reseed);

diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 268b239..af38aed 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c

@@ -96,8 +96,8 @@
 {
 	int i;
 	for (i = 0; i < NODES; i++) {
-		nodes[i].key = prandom32(&rnd);
-		nodes[i].val = prandom32(&rnd);
+		nodes[i].key = prandom_u32_state(&rnd);
+		nodes[i].val = prandom_u32_state(&rnd);
 	}
 }
 
@@ -118,7 +118,7 @@
 {
 	struct rb_node *rb;
 	int count = 0;
-	int blacks;
+	int blacks = 0;
 	u32 prev_key = 0;
 
 	for (rb = rb_first(&root); rb; rb = rb_next(rb)) {
@@ -155,7 +155,7 @@
 
 	printk(KERN_ALERT "rbtree testing");
 
-	prandom32_seed(&rnd, 3141592653589793238ULL);
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
 	init();
 
 	time1 = get_cycles();

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3675452b..7874b01 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c

@@ -248,7 +248,8 @@
 	unsigned int left;
 
 #ifndef ARCH_HAS_SG_CHAIN
-	BUG_ON(nents > max_ents);
+	if (WARN_ON_ONCE(nents > max_ents))
+		return -EINVAL;
 #endif
 
 	memset(table, 0, sizeof(*table));

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index f114bf6..196b069 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c

@@ -57,7 +57,7 @@
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
-static char *io_tlb_start, *io_tlb_end;
+static phys_addr_t io_tlb_start, io_tlb_end;
 
 /*
  * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
@@ -70,7 +70,7 @@
  */
 static unsigned long io_tlb_overflow = 32*1024;
 
-static void *io_tlb_overflow_buffer;
+static phys_addr_t io_tlb_overflow_buffer;
 
 /*
  * This is a free list describing the number of free entries available from
@@ -125,27 +125,38 @@
 void swiotlb_print_info(void)
 {
 	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-	phys_addr_t pstart, pend;
+	unsigned char *vstart, *vend;
 
-	pstart = virt_to_phys(io_tlb_start);
-	pend = virt_to_phys(io_tlb_end);
+	vstart = phys_to_virt(io_tlb_start);
+	vend = phys_to_virt(io_tlb_end);
 
 	printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
-	       (unsigned long long)pstart, (unsigned long long)pend - 1,
-	       bytes >> 20, io_tlb_start, io_tlb_end - 1);
+	       (unsigned long long)io_tlb_start,
+	       (unsigned long long)io_tlb_end,
+	       bytes >> 20, vstart, vend - 1);
 }
 
 void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
+	void *v_overflow_buffer;
 	unsigned long i, bytes;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
 	io_tlb_nslabs = nslabs;
-	io_tlb_start = tlb;
+	io_tlb_start = __pa(tlb);
 	io_tlb_end = io_tlb_start + bytes;
 
 	/*
+	 * Get the overflow emergency buffer
+	 */
+	v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
+	if (!v_overflow_buffer)
+		panic("Cannot allocate SWIOTLB overflow buffer!\n");
+
+	io_tlb_overflow_buffer = __pa(v_overflow_buffer);
+
+	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
@@ -156,12 +167,6 @@
 	io_tlb_index = 0;
 	io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
 
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
-	if (!io_tlb_overflow_buffer)
-		panic("Cannot allocate SWIOTLB overflow buffer!\n");
 	if (verbose)
 		swiotlb_print_info();
 }
@@ -173,6 +178,7 @@
 static void __init
 swiotlb_init_with_default_size(size_t default_size, int verbose)
 {
+	unsigned char *vstart;
 	unsigned long bytes;
 
 	if (!io_tlb_nslabs) {
@@ -185,11 +191,11 @@
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
-	if (!io_tlb_start)
+	vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
+	if (!vstart)
 		panic("Cannot allocate SWIOTLB buffer");
 
-	swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
+	swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose);
 }
 
 void __init
@@ -207,6 +213,7 @@
 swiotlb_late_init_with_default_size(size_t default_size)
 {
 	unsigned long bytes, req_nslabs = io_tlb_nslabs;
+	unsigned char *vstart = NULL;
 	unsigned int order;
 	int rc = 0;
 
@@ -223,14 +230,14 @@
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-							order);
-		if (io_tlb_start)
+		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+						  order);
+		if (vstart)
 			break;
 		order--;
 	}
 
-	if (!io_tlb_start) {
+	if (!vstart) {
 		io_tlb_nslabs = req_nslabs;
 		return -ENOMEM;
 	}
@@ -239,9 +246,9 @@
 		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
 		io_tlb_nslabs = SLABS_PER_PAGE << order;
 	}
-	rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs);
+	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
 	if (rc)
-		free_pages((unsigned long)io_tlb_start, order);
+		free_pages((unsigned long)vstart, order);
 	return rc;
 }
 
@@ -249,14 +256,25 @@
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
 	unsigned long i, bytes;
+	unsigned char *v_overflow_buffer;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
 	io_tlb_nslabs = nslabs;
-	io_tlb_start = tlb;
+	io_tlb_start = virt_to_phys(tlb);
 	io_tlb_end = io_tlb_start + bytes;
 
-	memset(io_tlb_start, 0, bytes);
+	memset(tlb, 0, bytes);
+
+	/*
+	 * Get the overflow emergency buffer
+	 */
+	v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
+						     get_order(io_tlb_overflow));
+	if (!v_overflow_buffer)
+		goto cleanup2;
+
+	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
 
 	/*
 	 * Allocate and initialize the free list array.  This array is used
@@ -266,7 +284,7 @@
 	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
 	                              get_order(io_tlb_nslabs * sizeof(int)));
 	if (!io_tlb_list)
-		goto cleanup2;
+		goto cleanup3;
 
 	for (i = 0; i < io_tlb_nslabs; i++)
  		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
@@ -277,18 +295,10 @@
 				 get_order(io_tlb_nslabs *
 					   sizeof(phys_addr_t)));
 	if (!io_tlb_orig_addr)
-		goto cleanup3;
+		goto cleanup4;
 
 	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
 
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-	                                          get_order(io_tlb_overflow));
-	if (!io_tlb_overflow_buffer)
-		goto cleanup4;
-
 	swiotlb_print_info();
 
 	late_alloc = 1;
@@ -296,42 +306,42 @@
 	return 0;
 
 cleanup4:
-	free_pages((unsigned long)io_tlb_orig_addr,
-		   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-	io_tlb_orig_addr = NULL;
-cleanup3:
 	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
 	                                                 sizeof(int)));
 	io_tlb_list = NULL;
+cleanup3:
+	free_pages((unsigned long)v_overflow_buffer,
+		   get_order(io_tlb_overflow));
+	io_tlb_overflow_buffer = 0;
 cleanup2:
-	io_tlb_end = NULL;
-	io_tlb_start = NULL;
+	io_tlb_end = 0;
+	io_tlb_start = 0;
 	io_tlb_nslabs = 0;
 	return -ENOMEM;
 }
 
 void __init swiotlb_free(void)
 {
-	if (!io_tlb_overflow_buffer)
+	if (!io_tlb_orig_addr)
 		return;
 
 	if (late_alloc) {
-		free_pages((unsigned long)io_tlb_overflow_buffer,
+		free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
 			   get_order(io_tlb_overflow));
 		free_pages((unsigned long)io_tlb_orig_addr,
 			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
 		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
 								 sizeof(int)));
-		free_pages((unsigned long)io_tlb_start,
+		free_pages((unsigned long)phys_to_virt(io_tlb_start),
 			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
 	} else {
-		free_bootmem_late(__pa(io_tlb_overflow_buffer),
+		free_bootmem_late(io_tlb_overflow_buffer,
 				  PAGE_ALIGN(io_tlb_overflow));
 		free_bootmem_late(__pa(io_tlb_orig_addr),
 				  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
 		free_bootmem_late(__pa(io_tlb_list),
 				  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		free_bootmem_late(__pa(io_tlb_start),
+		free_bootmem_late(io_tlb_start,
 				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
 	io_tlb_nslabs = 0;
@@ -339,21 +349,21 @@
 
 static int is_swiotlb_buffer(phys_addr_t paddr)
 {
-	return paddr >= virt_to_phys(io_tlb_start) &&
-		paddr < virt_to_phys(io_tlb_end);
+	return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
 
 /*
  * Bounce: copy the swiotlb buffer back to the original dma location
  */
-void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-		    enum dma_data_direction dir)
+static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
+			   size_t size, enum dma_data_direction dir)
 {
-	unsigned long pfn = PFN_DOWN(phys);
+	unsigned long pfn = PFN_DOWN(orig_addr);
+	unsigned char *vaddr = phys_to_virt(tlb_addr);
 
 	if (PageHighMem(pfn_to_page(pfn))) {
 		/* The buffer does not have a mapping.  Map it in and copy */
-		unsigned int offset = phys & ~PAGE_MASK;
+		unsigned int offset = orig_addr & ~PAGE_MASK;
 		char *buffer;
 		unsigned int sz = 0;
 		unsigned long flags;
@@ -364,32 +374,31 @@
 			local_irq_save(flags);
 			buffer = kmap_atomic(pfn_to_page(pfn));
 			if (dir == DMA_TO_DEVICE)
-				memcpy(dma_addr, buffer + offset, sz);
+				memcpy(vaddr, buffer + offset, sz);
 			else
-				memcpy(buffer + offset, dma_addr, sz);
+				memcpy(buffer + offset, vaddr, sz);
 			kunmap_atomic(buffer);
 			local_irq_restore(flags);
 
 			size -= sz;
 			pfn++;
-			dma_addr += sz;
+			vaddr += sz;
 			offset = 0;
 		}
+	} else if (dir == DMA_TO_DEVICE) {
+		memcpy(vaddr, phys_to_virt(orig_addr), size);
 	} else {
-		if (dir == DMA_TO_DEVICE)
-			memcpy(dma_addr, phys_to_virt(phys), size);
-		else
-			memcpy(phys_to_virt(phys), dma_addr, size);
+		memcpy(phys_to_virt(orig_addr), vaddr, size);
 	}
 }
-EXPORT_SYMBOL_GPL(swiotlb_bounce);
 
-void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
-			     phys_addr_t phys, size_t size,
-			     enum dma_data_direction dir)
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+				   dma_addr_t tbl_dma_addr,
+				   phys_addr_t orig_addr, size_t size,
+				   enum dma_data_direction dir)
 {
 	unsigned long flags;
-	char *dma_addr;
+	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
 	int i;
 	unsigned long mask;
@@ -453,7 +462,7 @@
 				io_tlb_list[i] = 0;
 			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
 				io_tlb_list[i] = ++count;
-			dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
@@ -471,7 +480,7 @@
 
 not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	return NULL;
+	return SWIOTLB_MAP_ERROR;
 found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 
@@ -481,11 +490,11 @@
 	 * needed.
 	 */
 	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
+		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
 
-	return dma_addr;
+	return tlb_addr;
 }
 EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
 
@@ -493,11 +502,10 @@
  * Allocates bounce buffer and returns its kernel virtual address.
  */
 
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-	   enum dma_data_direction dir)
+phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+		       enum dma_data_direction dir)
 {
-	dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+	dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
 
 	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
 }
@@ -505,20 +513,19 @@
 /*
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
-void
-swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
-			enum dma_data_direction dir)
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+			      size_t size, enum dma_data_direction dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t phys = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
-	if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+	if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -547,26 +554,27 @@
 }
 EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
 
-void
-swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-			enum dma_data_direction dir,
-			enum dma_sync_target target)
+void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
+			     size_t size, enum dma_data_direction dir,
+			     enum dma_sync_target target)
 {
-	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t phys = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
-	phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
 
 	switch (target) {
 	case SYNC_FOR_CPU:
 		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+			swiotlb_bounce(orig_addr, tlb_addr,
+				       size, DMA_FROM_DEVICE);
 		else
 			BUG_ON(dir != DMA_TO_DEVICE);
 		break;
 	case SYNC_FOR_DEVICE:
 		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+			swiotlb_bounce(orig_addr, tlb_addr,
+				       size, DMA_TO_DEVICE);
 		else
 			BUG_ON(dir != DMA_FROM_DEVICE);
 		break;
@@ -589,12 +597,15 @@
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
-		/*
-		 * The allocated memory isn't reachable by the device.
-		 */
-		free_pages((unsigned long) ret, order);
-		ret = NULL;
+	if (ret) {
+		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+		if (dev_addr + size - 1 > dma_mask) {
+			/*
+			 * The allocated memory isn't reachable by the device.
+			 */
+			free_pages((unsigned long) ret, order);
+			ret = NULL;
+		}
 	}
 	if (!ret) {
 		/*
@@ -602,25 +613,29 @@
 		 * GFP_DMA memory; fall back on map_single(), which
 		 * will grab memory from the lowest available address range.
 		 */
-		ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
-		if (!ret)
+		phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
+		if (paddr == SWIOTLB_MAP_ERROR)
 			return NULL;
+
+		ret = phys_to_virt(paddr);
+		dev_addr = phys_to_dma(hwdev, paddr);
+
+		/* Confirm address can be DMA'd by device */
+		if (dev_addr + size - 1 > dma_mask) {
+			printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+			       (unsigned long long)dma_mask,
+			       (unsigned long long)dev_addr);
+
+			/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+			swiotlb_tbl_unmap_single(hwdev, paddr,
+						 size, DMA_TO_DEVICE);
+			return NULL;
+		}
 	}
 
-	memset(ret, 0, size);
-	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
-
-	/* Confirm address can be DMA'd by device */
-	if (dev_addr + size - 1 > dma_mask) {
-		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		       (unsigned long long)dma_mask,
-		       (unsigned long long)dev_addr);
-
-		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
-		return NULL;
-	}
 	*dma_handle = dev_addr;
+	memset(ret, 0, size);
+
 	return ret;
 }
 EXPORT_SYMBOL(swiotlb_alloc_coherent);
@@ -636,7 +651,7 @@
 		free_pages((unsigned long)vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
-		swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -677,9 +692,8 @@
 			    enum dma_data_direction dir,
 			    struct dma_attrs *attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t map, phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = phys_to_dma(dev, phys);
-	void *map;
 
 	BUG_ON(dir == DMA_NONE);
 	/*
@@ -690,23 +704,19 @@
 	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
-	/*
-	 * Oh well, have to allocate and map a bounce buffer.
-	 */
+	/* Oh well, have to allocate and map a bounce buffer. */
 	map = map_single(dev, phys, size, dir);
-	if (!map) {
+	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
-		map = io_tlb_overflow_buffer;
+		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
-	dev_addr = swiotlb_virt_to_bus(dev, map);
+	dev_addr = phys_to_dma(dev, map);
 
-	/*
-	 * Ensure that the address returned is DMA'ble
-	 */
+	/* Ensure that the address returned is DMA'ble */
 	if (!dma_capable(dev, dev_addr, size)) {
 		swiotlb_tbl_unmap_single(dev, map, size, dir);
-		dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
+		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
 	return dev_addr;
@@ -729,7 +739,7 @@
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
 		return;
 	}
 
@@ -773,8 +783,7 @@
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
-				       target);
+		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
 		return;
 	}
 
@@ -831,9 +840,9 @@
 
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length)) {
-			void *map = map_single(hwdev, sg_phys(sg),
-					       sg->length, dir);
-			if (!map) {
+			phys_addr_t map = map_single(hwdev, sg_phys(sg),
+						     sg->length, dir);
+			if (map == SWIOTLB_MAP_ERROR) {
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
 				swiotlb_full(hwdev, sg->length, dir, 0);
@@ -842,7 +851,7 @@
 				sgl[0].dma_length = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
+			sg->dma_address = phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
@@ -925,7 +934,7 @@
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
+	return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer));
 }
 EXPORT_SYMBOL(swiotlb_dma_mapping_error);
 
@@ -938,6 +947,6 @@
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask;
+	return phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 EXPORT_SYMBOL(swiotlb_dma_supported);

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 39c99fe..fab33a9 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c

@@ -23,12 +23,12 @@
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 #include <linux/kallsyms.h>
+#include <linux/math64.h>
 #include <linux/uaccess.h>
 #include <linux/ioport.h>
 #include <net/addrconf.h>
 
 #include <asm/page.h>		/* for PAGE_SIZE */
-#include <asm/div64.h>
 #include <asm/sections.h>	/* for dereference_function_descriptor() */
 
 #include "kstrtox.h"
@@ -38,6 +38,8 @@
  * @cp: The start of the string
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtoull instead.
  */
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
@@ -61,6 +63,8 @@
  * @cp: The start of the string
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtoul instead.
  */
 unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
 {
@@ -73,6 +77,8 @@
  * @cp: The start of the string
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtol instead.
  */
 long simple_strtol(const char *cp, char **endp, unsigned int base)
 {
@@ -88,6 +94,8 @@
  * @cp: The start of the string
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtoll instead.
  */
 long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 {
@@ -1485,7 +1493,10 @@
 				num = va_arg(args, long);
 				break;
 			case FORMAT_TYPE_SIZE_T:
-				num = va_arg(args, size_t);
+				if (spec.flags & SIGN)
+					num = va_arg(args, ssize_t);
+				else
+					num = va_arg(args, size_t);
 				break;
 			case FORMAT_TYPE_PTRDIFF:
 				num = va_arg(args, ptrdiff_t);
@@ -2013,7 +2024,11 @@
 	char digit;
 	int num = 0;
 	u8 qualifier;
-	u8 base;
+	unsigned int base;
+	union {
+		long long s;
+		unsigned long long u;
+	} val;
 	s16 field_width;
 	bool is_sign;
 
@@ -2053,8 +2068,11 @@
 
 		/* get field width */
 		field_width = -1;
-		if (isdigit(*fmt))
+		if (isdigit(*fmt)) {
 			field_width = skip_atoi(&fmt);
+			if (field_width <= 0)
+				break;
+		}
 
 		/* get conversion qualifier */
 		qualifier = -1;
@@ -2154,58 +2172,61 @@
 		    || (base == 0 && !isdigit(digit)))
 			break;
 
+		if (is_sign)
+			val.s = qualifier != 'L' ?
+				simple_strtol(str, &next, base) :
+				simple_strtoll(str, &next, base);
+		else
+			val.u = qualifier != 'L' ?
+				simple_strtoul(str, &next, base) :
+				simple_strtoull(str, &next, base);
+
+		if (field_width > 0 && next - str > field_width) {
+			if (base == 0)
+				_parse_integer_fixup_radix(str, &base);
+			while (next - str > field_width) {
+				if (is_sign)
+					val.s = div_s64(val.s, base);
+				else
+					val.u = div_u64(val.u, base);
+				--next;
+			}
+		}
+
 		switch (qualifier) {
 		case 'H':	/* that's 'hh' in format */
-			if (is_sign) {
-				signed char *s = (signed char *)va_arg(args, signed char *);
-				*s = (signed char)simple_strtol(str, &next, base);
-			} else {
-				unsigned char *s = (unsigned char *)va_arg(args, unsigned char *);
-				*s = (unsigned char)simple_strtoul(str, &next, base);
-			}
+			if (is_sign)
+				*va_arg(args, signed char *) = val.s;
+			else
+				*va_arg(args, unsigned char *) = val.u;
 			break;
 		case 'h':
-			if (is_sign) {
-				short *s = (short *)va_arg(args, short *);
-				*s = (short)simple_strtol(str, &next, base);
-			} else {
-				unsigned short *s = (unsigned short *)va_arg(args, unsigned short *);
-				*s = (unsigned short)simple_strtoul(str, &next, base);
-			}
+			if (is_sign)
+				*va_arg(args, short *) = val.s;
+			else
+				*va_arg(args, unsigned short *) = val.u;
 			break;
 		case 'l':
-			if (is_sign) {
-				long *l = (long *)va_arg(args, long *);
-				*l = simple_strtol(str, &next, base);
-			} else {
-				unsigned long *l = (unsigned long *)va_arg(args, unsigned long *);
-				*l = simple_strtoul(str, &next, base);
-			}
+			if (is_sign)
+				*va_arg(args, long *) = val.s;
+			else
+				*va_arg(args, unsigned long *) = val.u;
 			break;
 		case 'L':
-			if (is_sign) {
-				long long *l = (long long *)va_arg(args, long long *);
-				*l = simple_strtoll(str, &next, base);
-			} else {
-				unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *);
-				*l = simple_strtoull(str, &next, base);
-			}
+			if (is_sign)
+				*va_arg(args, long long *) = val.s;
+			else
+				*va_arg(args, unsigned long long *) = val.u;
 			break;
 		case 'Z':
 		case 'z':
-		{
-			size_t *s = (size_t *)va_arg(args, size_t *);
-			*s = (size_t)simple_strtoul(str, &next, base);
-		}
-		break;
+			*va_arg(args, size_t *) = val.u;
+			break;
 		default:
-			if (is_sign) {
-				int *i = (int *)va_arg(args, int *);
-				*i = (int)simple_strtol(str, &next, base);
-			} else {
-				unsigned int *i = (unsigned int *)va_arg(args, unsigned int*);
-				*i = (unsigned int)simple_strtoul(str, &next, base);
-			}
+			if (is_sign)
+				*va_arg(args, int *) = val.s;
+			else
+				*va_arg(args, unsigned int *) = val.u;
 			break;
 		}
 		num++;

diff --git a/mm/Kconfig b/mm/Kconfig
index 71259e0..278e3ab 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig

@@ -149,7 +149,18 @@
 	depends on NO_BOOTMEM
 	depends on X86_64
 	depends on NUMA
-	depends on BROKEN
+	default n
+	help
+	  Allow a node to have only movable memory.  Pages used by the kernel,
+	  such as direct mapping pages cannot be migrated.  So the corresponding
+	  memory device cannot be hotplugged.  This option allows users to
+	  online all the memory of a node as movable memory so that the whole
+	  node can be hotplugged.  Users who don't use the memory hotplug
+	  feature are fine with this option on since they don't online memory
+	  as movable.
+
+	  Say Y here if you want to hotplug a whole node.
+	  Say N here if you want kernel to use memory on all nodes evenly.
 
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG

diff --git a/mm/compaction.c b/mm/compaction.c
index 1297912..6b807e4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c

@@ -17,6 +17,21 @@
 #include <linux/balloon_compaction.h>
 #include "internal.h"
 
+#ifdef CONFIG_COMPACTION
+static inline void count_compact_event(enum vm_event_item item)
+{
+	count_vm_event(item);
+}
+
+static inline void count_compact_events(enum vm_event_item item, long delta)
+{
+	count_vm_events(item, delta);
+}
+#else
+#define count_compact_event(item) do { } while (0)
+#define count_compact_events(item, delta) do { } while (0)
+#endif
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 #define CREATE_TRACE_POINTS
@@ -303,6 +318,9 @@
 	if (blockpfn == end_pfn)
 		update_pageblock_skip(cc, valid_page, total_isolated, false);
 
+	count_compact_events(COMPACTFREE_SCANNED, nr_scanned);
+	if (total_isolated)
+		count_compact_events(COMPACTISOLATED, total_isolated);
 	return total_isolated;
 }
 
@@ -609,6 +627,10 @@
 
 	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
 
+	count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned);
+	if (nr_isolated)
+		count_compact_events(COMPACTISOLATED, nr_isolated);
+
 	return low_pfn;
 }
 
@@ -1015,14 +1037,11 @@
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
-				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
+				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
+				MR_COMPACTION);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
-		count_vm_event(COMPACTBLOCKS);
-		count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
-		if (nr_remaining)
-			count_vm_events(COMPACTPAGEFAILED, nr_remaining);
 		trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
 						nr_remaining);
 
@@ -1105,7 +1124,7 @@
 	if (!order || !may_enter_fs || !may_perform_io)
 		return rc;
 
-	count_vm_event(COMPACTSTALL);
+	count_compact_event(COMPACTSTALL);
 
 #ifdef CONFIG_CMA
 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)

diff --git a/mm/highmem.c b/mm/highmem.c
index d999077..b32b70c 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c

@@ -105,6 +105,7 @@
 
 	return virt_to_page(addr);
 }
+EXPORT_SYMBOL(kmap_to_page);
 
 static void flush_all_zero_pkmaps(void)
 {

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 827d9c8..9e894ed 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c

@@ -19,6 +19,7 @@
 #include <linux/freezer.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
+#include <linux/migrate.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
@@ -573,19 +574,19 @@
 
 	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
 	if (unlikely(!*hugepage_kobj)) {
-		printk(KERN_ERR "hugepage: failed kobject create\n");
+		printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n");
 		return -ENOMEM;
 	}
 
 	err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
 	if (err) {
-		printk(KERN_ERR "hugepage: failed register hugeage group\n");
+		printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
 		goto delete_obj;
 	}
 
 	err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
 	if (err) {
-		printk(KERN_ERR "hugepage: failed register hugeage group\n");
+		printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
 		goto remove_hp_group;
 	}
 
@@ -690,7 +691,7 @@
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
 
-static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
 		pmd = pmd_mkwrite(pmd);
@@ -848,7 +849,8 @@
 	 * run pte_offset_map on the pmd, if an huge pmd could
 	 * materialize from under us from a different thread.
 	 */
-	if (unlikely(__pte_alloc(mm, vma, pmd, address)))
+	if (unlikely(pmd_none(*pmd)) &&
+	    unlikely(__pte_alloc(mm, vma, pmd, address)))
 		return VM_FAULT_OOM;
 	/* if an huge pmd materialized from under us just retry later */
 	if (unlikely(pmd_trans_huge(*pmd)))
@@ -1287,6 +1289,81 @@
 	return page;
 }
 
+/* NUMA hinting page fault entry point for trans huge pmds */
+int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
+{
+	struct page *page;
+	unsigned long haddr = addr & HPAGE_PMD_MASK;
+	int target_nid;
+	int current_nid = -1;
+	bool migrated;
+	bool page_locked = false;
+
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(pmd, *pmdp)))
+		goto out_unlock;
+
+	page = pmd_page(pmd);
+	get_page(page);
+	current_nid = page_to_nid(page);
+	count_vm_numa_event(NUMA_HINT_FAULTS);
+	if (current_nid == numa_node_id())
+		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+
+	target_nid = mpol_misplaced(page, vma, haddr);
+	if (target_nid == -1) {
+		put_page(page);
+		goto clear_pmdnuma;
+	}
+
+	/* Acquire the page lock to serialise THP migrations */
+	spin_unlock(&mm->page_table_lock);
+	lock_page(page);
+	page_locked = true;
+
+	/* Confirm the PTE did not while locked */
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(pmd, *pmdp))) {
+		unlock_page(page);
+		put_page(page);
+		goto out_unlock;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	/* Migrate the THP to the requested node */
+	migrated = migrate_misplaced_transhuge_page(mm, vma,
+				pmdp, pmd, addr,
+				page, target_nid);
+	if (migrated)
+		current_nid = target_nid;
+	else {
+		spin_lock(&mm->page_table_lock);
+		if (unlikely(!pmd_same(pmd, *pmdp))) {
+			unlock_page(page);
+			goto out_unlock;
+		}
+		goto clear_pmdnuma;
+	}
+
+	task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
+	return 0;
+
+clear_pmdnuma:
+	pmd = pmd_mknonnuma(pmd);
+	set_pmd_at(mm, haddr, pmdp, pmd);
+	VM_BUG_ON(pmd_numa(*pmdp));
+	update_mmu_cache_pmd(vma, addr, pmdp);
+	if (page_locked)
+		unlock_page(page);
+
+out_unlock:
+	spin_unlock(&mm->page_table_lock);
+	if (current_nid != -1)
+		task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
+	return 0;
+}
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 pmd_t *pmd, unsigned long addr)
 {
@@ -1375,7 +1452,7 @@
 }
 
 int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long addr, pgprot_t newprot)
+		unsigned long addr, pgprot_t newprot, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	int ret = 0;
@@ -1383,8 +1460,18 @@
 	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
 		pmd_t entry;
 		entry = pmdp_get_and_clear(mm, addr, pmd);
-		entry = pmd_modify(entry, newprot);
-		BUG_ON(pmd_write(entry));
+		if (!prot_numa) {
+			entry = pmd_modify(entry, newprot);
+			BUG_ON(pmd_write(entry));
+		} else {
+			struct page *page = pmd_page(*pmd);
+
+			/* only check non-shared pages */
+			if (page_mapcount(page) == 1 &&
+			    !pmd_numa(*pmd)) {
+				entry = pmd_mknuma(entry);
+			}
+		}
 		set_pmd_at(mm, addr, pmd, entry);
 		spin_unlock(&vma->vm_mm->page_table_lock);
 		ret = 1;
@@ -1474,7 +1561,7 @@
 		 * We can't temporarily set the pmd to null in order
 		 * to split it, the pmd must remain marked huge at all
 		 * times or the VM won't take the pmd_trans_huge paths
-		 * and it won't wait on the anon_vma->root->mutex to
+		 * and it won't wait on the anon_vma->root->rwsem to
 		 * serialize against split_huge_page*.
 		 */
 		pmdp_splitting_flush(vma, address, pmd);
@@ -1565,6 +1652,7 @@
 		page_tail->mapping = page->mapping;
 
 		page_tail->index = page->index + i;
+		page_xchg_last_nid(page_tail, page_last_nid(page));
 
 		BUG_ON(!PageAnon(page_tail));
 		BUG_ON(!PageUptodate(page_tail));
@@ -1632,6 +1720,8 @@
 				BUG_ON(page_mapcount(page) != 1);
 			if (!pmd_young(*pmd))
 				entry = pte_mkold(entry);
+			if (pmd_numa(*pmd))
+				entry = pte_mknuma(entry);
 			pte = pte_offset_map(&_pmd, haddr);
 			BUG_ON(!pte_none(*pte));
 			set_pte_at(mm, haddr, pte, entry);
@@ -1674,7 +1764,7 @@
 	return ret;
 }
 
-/* must be called with anon_vma->root->mutex hold */
+/* must be called with anon_vma->root->rwsem held */
 static void __split_huge_page(struct page *page,
 			      struct anon_vma *anon_vma)
 {
@@ -1729,7 +1819,7 @@
 
 	BUG_ON(is_huge_zero_pfn(page_to_pfn(page)));
 	BUG_ON(!PageAnon(page));
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		goto out;
 	ret = 0;
@@ -1742,7 +1832,7 @@
 
 	BUG_ON(PageCompound(page));
 out_unlock:
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 out:
 	return ret;
 }
@@ -2234,7 +2324,7 @@
 	if (pmd_trans_huge(*pmd))
 		goto out;
 
-	anon_vma_lock(vma->anon_vma);
+	anon_vma_lock_write(vma->anon_vma);
 
 	pte = pte_offset_map(pmd, address);
 	ptl = pte_lockptr(mm, pmd);

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 88e7293..4f3ea0b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c

@@ -1906,14 +1906,12 @@
 		default_hstate.max_huge_pages = default_hstate_max_huge_pages;
 
 	hugetlb_init_hstates();
-
 	gather_bootmem_prealloc();
-
 	report_hugepages();
 
 	hugetlb_sysfs_init();
-
 	hugetlb_register_all_nodes();
+	hugetlb_cgroup_file_init();
 
 	return 0;
 }
@@ -1943,13 +1941,6 @@
 	h->next_nid_to_free = first_node(node_states[N_MEMORY]);
 	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
 					huge_page_size(h)/1024);
-	/*
-	 * Add cgroup control files only if the huge page consists
-	 * of more than two normal pages. This is because we use
-	 * page[2].lru.next for storing cgoup details.
-	 */
-	if (order >= HUGETLB_CGROUP_MIN_ORDER)
-		hugetlb_cgroup_file_init(hugetlb_max_hstate - 1);
 
 	parsed_hstate = h;
 }
@@ -3016,7 +3007,7 @@
 	return i ? i : -EFAULT;
 }
 
-void hugetlb_change_protection(struct vm_area_struct *vma,
+unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -3024,6 +3015,7 @@
 	pte_t *ptep;
 	pte_t pte;
 	struct hstate *h = hstate_vma(vma);
+	unsigned long pages = 0;
 
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
@@ -3034,12 +3026,15 @@
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
-		if (huge_pmd_unshare(mm, &address, ptep))
+		if (huge_pmd_unshare(mm, &address, ptep)) {
+			pages++;
 			continue;
+		}
 		if (!huge_pte_none(huge_ptep_get(ptep))) {
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
 			pte = pte_mkhuge(pte_modify(pte, newprot));
 			set_huge_pte_at(mm, address, ptep, pte);
+			pages++;
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -3051,6 +3046,8 @@
 	 */
 	flush_tlb_range(vma, start, end);
 	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+
+	return pages << h->order;
 }
 
 int hugetlb_reserve_pages(struct inode *inode,

diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index b5bde7a..9cea7de 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c

@@ -333,7 +333,7 @@
 	return buf;
 }
 
-int __init hugetlb_cgroup_file_init(int idx)
+static void __init __hugetlb_cgroup_file_init(int idx)
 {
 	char buf[32];
 	struct cftype *cft;
@@ -375,7 +375,22 @@
 
 	WARN_ON(cgroup_add_cftypes(&hugetlb_subsys, h->cgroup_files));
 
-	return 0;
+	return;
+}
+
+void __init hugetlb_cgroup_file_init(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		/*
+		 * Add cgroup control files only if the huge page consists
+		 * of more than two normal pages. This is because we use
+		 * page[2].lru.next for storing cgroup details.
+		 */
+		if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
+			__hugetlb_cgroup_file_init(hstate_index(h));
+	}
 }
 
 /*

diff --git a/mm/internal.h b/mm/internal.h
index 52d1fa9..d597f94 100644
--- a/mm/internal.h
+++ b/mm/internal.h

@@ -217,15 +217,18 @@
 {
 	if (TestClearPageMlocked(page)) {
 		unsigned long flags;
+		int nr_pages = hpage_nr_pages(page);
 
 		local_irq_save(flags);
-		__dec_zone_page_state(page, NR_MLOCK);
+		__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
 		SetPageMlocked(newpage);
-		__inc_zone_page_state(newpage, NR_MLOCK);
+		__mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
 		local_irq_restore(flags);
 	}
 }
 
+extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern unsigned long vma_address(struct page *page,
 				 struct vm_area_struct *vma);

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a217cc5..752a705 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c

@@ -1556,7 +1556,8 @@
 	struct kmemleak_object *object;
 	unsigned long addr;
 
-	addr= simple_strtoul(str, NULL, 0);
+	if (kstrtoul(str, 0, &addr))
+		return -EINVAL;
 	object = find_and_get_object(addr, 0);
 	if (!object) {
 		pr_info("Unknown object at 0x%08lx\n", addr);

diff --git a/mm/ksm.c b/mm/ksm.c
index 382d930..5157385 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c

@@ -1624,7 +1624,7 @@
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_read(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
@@ -1648,7 +1648,7 @@
 			if (!search_new_forks || !mapcount)
 				break;
 		}
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_read(anon_vma);
 		if (!mapcount)
 			goto out;
 	}
@@ -1678,7 +1678,7 @@
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_read(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
@@ -1697,11 +1697,11 @@
 			ret = try_to_unmap_one(page, vma,
 					rmap_item->address, flags);
 			if (ret != SWAP_AGAIN || !page_mapped(page)) {
-				anon_vma_unlock(anon_vma);
+				anon_vma_unlock_read(anon_vma);
 				goto out;
 			}
 		}
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_read(anon_vma);
 	}
 	if (!search_new_forks++)
 		goto again;
@@ -1731,7 +1731,7 @@
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_read(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
@@ -1749,11 +1749,11 @@
 
 			ret = rmap_one(page, vma, rmap_item->address, arg);
 			if (ret != SWAP_AGAIN) {
-				anon_vma_unlock(anon_vma);
+				anon_vma_unlock_read(anon_vma);
 				goto out;
 			}
 		}
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_read(anon_vma);
 	}
 	if (!search_new_forks++)
 		goto again;

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6c05592..09255ec 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c

@@ -10,6 +10,10 @@
  * Copyright (C) 2009 Nokia Corporation
  * Author: Kirill A. Shutemov
  *
+ * Kernel Memory Controller
+ * Copyright (C) 2012 Parallels Inc. and Google Inc.
+ * Authors: Glauber Costa and Suleiman Souhlal
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -268,6 +272,10 @@
 	};
 
 	/*
+	 * the counter to account for kernel memory usage.
+	 */
+	struct res_counter kmem;
+	/*
 	 * Per cgroup active and inactive list, similar to the
 	 * per zone LRU lists.
 	 */
@@ -282,6 +290,7 @@
 	 * Should the accounting and control be hierarchical, per subtree?
 	 */
 	bool use_hierarchy;
+	unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
 
 	bool		oom_lock;
 	atomic_t	under_oom;
@@ -332,8 +341,61 @@
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
 	struct tcp_memcontrol tcp_mem;
 #endif
+#if defined(CONFIG_MEMCG_KMEM)
+	/* analogous to slab_common's slab_caches list. per-memcg */
+	struct list_head memcg_slab_caches;
+	/* Not a spinlock, we can take a lot of time walking the list */
+	struct mutex slab_caches_mutex;
+        /* Index in the kmem_cache->memcg_params->memcg_caches array */
+	int kmemcg_id;
+#endif
 };
 
+/* internal only representation about the status of kmem accounting. */
+enum {
+	KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
+	KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */
+	KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
+};
+
+/* We account when limit is on, but only after call sites are patched */
+#define KMEM_ACCOUNTED_MASK \
+		((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED))
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
+{
+	set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
+}
+
+static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
+{
+	return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
+}
+
+static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
+{
+	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
+
+static void memcg_kmem_clear_activated(struct mem_cgroup *memcg)
+{
+	clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
+
+static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
+{
+	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
+		set_bit(KMEM_ACCOUNTED_DEAD, &memcg->kmem_account_flags);
+}
+
+static bool memcg_kmem_test_and_clear_dead(struct mem_cgroup *memcg)
+{
+	return test_and_clear_bit(KMEM_ACCOUNTED_DEAD,
+				  &memcg->kmem_account_flags);
+}
+#endif
+
 /* Stuffs for move charges at task migration. */
 /*
  * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a
@@ -388,9 +450,13 @@
 };
 
 /* for encoding cft->private value on file */
-#define _MEM			(0)
-#define _MEMSWAP		(1)
-#define _OOM_TYPE		(2)
+enum res_type {
+	_MEM,
+	_MEMSWAP,
+	_OOM_TYPE,
+	_KMEM,
+};
+
 #define MEMFILE_PRIVATE(x, val)	((x) << 16 | (val))
 #define MEMFILE_TYPE(val)	((val) >> 16 & 0xffff)
 #define MEMFILE_ATTR(val)	((val) & 0xffff)
@@ -487,6 +553,75 @@
 }
 #endif
 
+#ifdef CONFIG_MEMCG_KMEM
+/*
+ * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
+ * There are two main reasons for not using the css_id for this:
+ *  1) this works better in sparse environments, where we have a lot of memcgs,
+ *     but only a few kmem-limited. Or also, if we have, for instance, 200
+ *     memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *     200 entry array for that.
+ *
+ *  2) In order not to violate the cgroup API, we would like to do all memory
+ *     allocation in ->create(). At that point, we haven't yet allocated the
+ *     css_id. Having a separate index prevents us from messing with the cgroup
+ *     core for this
+ *
+ * The current size of the caches array is stored in
+ * memcg_limited_groups_array_size.  It will double each time we have to
+ * increase it.
+ */
+static DEFINE_IDA(kmem_limited_groups);
+int memcg_limited_groups_array_size;
+
+/*
+ * MIN_SIZE is different than 1, because we would like to avoid going through
+ * the alloc/free process all the time. In a small machine, 4 kmem-limited
+ * cgroups is a reasonable guess. In the future, it could be a parameter or
+ * tunable, but that is strictly not necessary.
+ *
+ * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * this constant directly from cgroup, but it is understandable that this is
+ * better kept as an internal representation in cgroup.c. In any case, the
+ * css_id space is not getting any smaller, and we don't have to necessarily
+ * increase ours as well if it increases.
+ */
+#define MEMCG_CACHES_MIN_SIZE 4
+#define MEMCG_CACHES_MAX_SIZE 65535
+
+/*
+ * A lot of the calls to the cache allocation functions are expected to be
+ * inlined by the compiler. Since the calls to memcg_kmem_get_cache are
+ * conditional to this static branch, we'll have to allow modules that does
+ * kmem_cache_alloc and the such to see this symbol as well
+ */
+struct static_key memcg_kmem_enabled_key;
+EXPORT_SYMBOL(memcg_kmem_enabled_key);
+
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+	if (memcg_kmem_is_active(memcg)) {
+		static_key_slow_dec(&memcg_kmem_enabled_key);
+		ida_simple_remove(&kmem_limited_groups, memcg->kmemcg_id);
+	}
+	/*
+	 * This check can't live in kmem destruction function,
+	 * since the charges will outlive the cgroup
+	 */
+	WARN_ON(res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0);
+}
+#else
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
+static void disarm_static_keys(struct mem_cgroup *memcg)
+{
+	disarm_sock_keys(memcg);
+	disarm_kmem_keys(memcg);
+}
+
 static void drain_all_stock_async(struct mem_cgroup *memcg);
 
 static struct mem_cgroup_per_zone *
@@ -1453,6 +1588,10 @@
 		res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10,
 		res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10,
 		res_counter_read_u64(&memcg->memsw, RES_FAILCNT));
+	printk(KERN_INFO "kmem: usage %llukB, limit %llukB, failcnt %llu\n",
+		res_counter_read_u64(&memcg->kmem, RES_USAGE) >> 10,
+		res_counter_read_u64(&memcg->kmem, RES_LIMIT) >> 10,
+		res_counter_read_u64(&memcg->kmem, RES_FAILCNT));
 }
 
 /*
@@ -2060,20 +2199,28 @@
 static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
 static DEFINE_MUTEX(percpu_charge_mutex);
 
-/*
- * Try to consume stocked charge on this cpu. If success, one page is consumed
- * from local stock and true is returned. If the stock is 0 or charges from a
- * cgroup which is not current target, returns false. This stock will be
- * refilled.
+/**
+ * consume_stock: Try to consume stocked charge on this cpu.
+ * @memcg: memcg to consume from.
+ * @nr_pages: how many pages to charge.
+ *
+ * The charges will only happen if @memcg matches the current cpu's memcg
+ * stock, and at least @nr_pages are available in that stock.  Failure to
+ * service an allocation will refill the stock.
+ *
+ * returns true if successful, false otherwise.
  */
-static bool consume_stock(struct mem_cgroup *memcg)
+static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	struct memcg_stock_pcp *stock;
 	bool ret = true;
 
+	if (nr_pages > CHARGE_BATCH)
+		return false;
+
 	stock = &get_cpu_var(memcg_stock);
-	if (memcg == stock->cached && stock->nr_pages)
-		stock->nr_pages--;
+	if (memcg == stock->cached && stock->nr_pages >= nr_pages)
+		stock->nr_pages -= nr_pages;
 	else /* need to call res_counter_charge */
 		ret = false;
 	put_cpu_var(memcg_stock);
@@ -2250,7 +2397,8 @@
 };
 
 static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
-				unsigned int nr_pages, bool oom_check)
+				unsigned int nr_pages, unsigned int min_pages,
+				bool oom_check)
 {
 	unsigned long csize = nr_pages * PAGE_SIZE;
 	struct mem_cgroup *mem_over_limit;
@@ -2273,18 +2421,18 @@
 	} else
 		mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
 	/*
-	 * nr_pages can be either a huge page (HPAGE_PMD_NR), a batch
-	 * of regular pages (CHARGE_BATCH), or a single regular page (1).
-	 *
 	 * Never reclaim on behalf of optional batching, retry with a
 	 * single page instead.
 	 */
-	if (nr_pages == CHARGE_BATCH)
+	if (nr_pages > min_pages)
 		return CHARGE_RETRY;
 
 	if (!(gfp_mask & __GFP_WAIT))
 		return CHARGE_WOULDBLOCK;
 
+	if (gfp_mask & __GFP_NORETRY)
+		return CHARGE_NOMEM;
+
 	ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
 		return CHARGE_RETRY;
@@ -2297,7 +2445,7 @@
 	 * unlikely to succeed so close to the limit, and we fall back
 	 * to regular pages anyway in case of failure.
 	 */
-	if (nr_pages == 1 && ret)
+	if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
 		return CHARGE_RETRY;
 
 	/*
@@ -2371,7 +2519,7 @@
 		memcg = *ptr;
 		if (mem_cgroup_is_root(memcg))
 			goto done;
-		if (nr_pages == 1 && consume_stock(memcg))
+		if (consume_stock(memcg, nr_pages))
 			goto done;
 		css_get(&memcg->css);
 	} else {
@@ -2396,7 +2544,7 @@
 			rcu_read_unlock();
 			goto done;
 		}
-		if (nr_pages == 1 && consume_stock(memcg)) {
+		if (consume_stock(memcg, nr_pages)) {
 			/*
 			 * It seems dagerous to access memcg without css_get().
 			 * But considering how consume_stok works, it's not
@@ -2431,7 +2579,8 @@
 			nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
 		}
 
-		ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check);
+		ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, nr_pages,
+		    oom_check);
 		switch (ret) {
 		case CHARGE_OK:
 			break;
@@ -2624,6 +2773,766 @@
 	memcg_check_events(memcg, page);
 }
 
+static DEFINE_MUTEX(set_limit_mutex);
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
+{
+	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
+		(memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
+}
+
+/*
+ * This is a bit cumbersome, but it is rarely used and avoids a backpointer
+ * in the memcg_cache_params struct.
+ */
+static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
+{
+	struct kmem_cache *cachep;
+
+	VM_BUG_ON(p->is_root_cache);
+	cachep = p->root_cache;
+	return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
+}
+
+#ifdef CONFIG_SLABINFO
+static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft,
+					struct seq_file *m)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct memcg_cache_params *params;
+
+	if (!memcg_can_account_kmem(memcg))
+		return -EIO;
+
+	print_slabinfo_header(m);
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_for_each_entry(params, &memcg->memcg_slab_caches, list)
+		cache_show(memcg_params_to_cache(params), m);
+	mutex_unlock(&memcg->slab_caches_mutex);
+
+	return 0;
+}
+#endif
+
+static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+{
+	struct res_counter *fail_res;
+	struct mem_cgroup *_memcg;
+	int ret = 0;
+	bool may_oom;
+
+	ret = res_counter_charge(&memcg->kmem, size, &fail_res);
+	if (ret)
+		return ret;
+
+	/*
+	 * Conditions under which we can wait for the oom_killer. Those are
+	 * the same conditions tested by the core page allocator
+	 */
+	may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
+
+	_memcg = memcg;
+	ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
+				      &_memcg, may_oom);
+
+	if (ret == -EINTR)  {
+		/*
+		 * __mem_cgroup_try_charge() chosed to bypass to root due to
+		 * OOM kill or fatal signal.  Since our only options are to
+		 * either fail the allocation or charge it to this cgroup, do
+		 * it as a temporary condition. But we can't fail. From a
+		 * kmem/slab perspective, the cache has already been selected,
+		 * by mem_cgroup_kmem_get_cache(), so it is too late to change
+		 * our minds.
+		 *
+		 * This condition will only trigger if the task entered
+		 * memcg_charge_kmem in a sane state, but was OOM-killed during
+		 * __mem_cgroup_try_charge() above. Tasks that were already
+		 * dying when the allocation triggers should have been already
+		 * directed to the root cgroup in memcontrol.h
+		 */
+		res_counter_charge_nofail(&memcg->res, size, &fail_res);
+		if (do_swap_account)
+			res_counter_charge_nofail(&memcg->memsw, size,
+						  &fail_res);
+		ret = 0;
+	} else if (ret)
+		res_counter_uncharge(&memcg->kmem, size);
+
+	return ret;
+}
+
+static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+{
+	res_counter_uncharge(&memcg->res, size);
+	if (do_swap_account)
+		res_counter_uncharge(&memcg->memsw, size);
+
+	/* Not down to 0 */
+	if (res_counter_uncharge(&memcg->kmem, size))
+		return;
+
+	if (memcg_kmem_test_and_clear_dead(memcg))
+		mem_cgroup_put(memcg);
+}
+
+void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep)
+{
+	if (!memcg)
+		return;
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
+	mutex_unlock(&memcg->slab_caches_mutex);
+}
+
+/*
+ * helper for acessing a memcg's index. It will be used as an index in the
+ * child cache array in kmem_cache, and also to derive its name. This function
+ * will return -1 when this is not a kmem-limited memcg.
+ */
+int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return memcg ? memcg->kmemcg_id : -1;
+}
+
+/*
+ * This ends up being protected by the set_limit mutex, during normal
+ * operation, because that is its main call site.
+ *
+ * But when we create a new cache, we can call this as well if its parent
+ * is kmem-limited. That will have to hold set_limit_mutex as well.
+ */
+int memcg_update_cache_sizes(struct mem_cgroup *memcg)
+{
+	int num, ret;
+
+	num = ida_simple_get(&kmem_limited_groups,
+				0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
+	if (num < 0)
+		return num;
+	/*
+	 * After this point, kmem_accounted (that we test atomically in
+	 * the beginning of this conditional), is no longer 0. This
+	 * guarantees only one process will set the following boolean
+	 * to true. We don't need test_and_set because we're protected
+	 * by the set_limit_mutex anyway.
+	 */
+	memcg_kmem_set_activated(memcg);
+
+	ret = memcg_update_all_caches(num+1);
+	if (ret) {
+		ida_simple_remove(&kmem_limited_groups, num);
+		memcg_kmem_clear_activated(memcg);
+		return ret;
+	}
+
+	memcg->kmemcg_id = num;
+	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+	mutex_init(&memcg->slab_caches_mutex);
+	return 0;
+}
+
+static size_t memcg_caches_array_size(int num_groups)
+{
+	ssize_t size;
+	if (num_groups <= 0)
+		return 0;
+
+	size = 2 * num_groups;
+	if (size < MEMCG_CACHES_MIN_SIZE)
+		size = MEMCG_CACHES_MIN_SIZE;
+	else if (size > MEMCG_CACHES_MAX_SIZE)
+		size = MEMCG_CACHES_MAX_SIZE;
+
+	return size;
+}
+
+/*
+ * We should update the current array size iff all caches updates succeed. This
+ * can only be done from the slab side. The slab mutex needs to be held when
+ * calling this.
+ */
+void memcg_update_array_size(int num)
+{
+	if (num > memcg_limited_groups_array_size)
+		memcg_limited_groups_array_size = memcg_caches_array_size(num);
+}
+
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
+{
+	struct memcg_cache_params *cur_params = s->memcg_params;
+
+	VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
+
+	if (num_groups > memcg_limited_groups_array_size) {
+		int i;
+		ssize_t size = memcg_caches_array_size(num_groups);
+
+		size *= sizeof(void *);
+		size += sizeof(struct memcg_cache_params);
+
+		s->memcg_params = kzalloc(size, GFP_KERNEL);
+		if (!s->memcg_params) {
+			s->memcg_params = cur_params;
+			return -ENOMEM;
+		}
+
+		s->memcg_params->is_root_cache = true;
+
+		/*
+		 * There is the chance it will be bigger than
+		 * memcg_limited_groups_array_size, if we failed an allocation
+		 * in a cache, in which case all caches updated before it, will
+		 * have a bigger array.
+		 *
+		 * But if that is the case, the data after
+		 * memcg_limited_groups_array_size is certainly unused
+		 */
+		for (i = 0; i < memcg_limited_groups_array_size; i++) {
+			if (!cur_params->memcg_caches[i])
+				continue;
+			s->memcg_params->memcg_caches[i] =
+						cur_params->memcg_caches[i];
+		}
+
+		/*
+		 * Ideally, we would wait until all caches succeed, and only
+		 * then free the old one. But this is not worth the extra
+		 * pointer per-cache we'd have to have for this.
+		 *
+		 * It is not a big deal if some caches are left with a size
+		 * bigger than the others. And all updates will reset this
+		 * anyway.
+		 */
+		kfree(cur_params);
+	}
+	return 0;
+}
+
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+			 struct kmem_cache *root_cache)
+{
+	size_t size = sizeof(struct memcg_cache_params);
+
+	if (!memcg_kmem_enabled())
+		return 0;
+
+	if (!memcg)
+		size += memcg_limited_groups_array_size * sizeof(void *);
+
+	s->memcg_params = kzalloc(size, GFP_KERNEL);
+	if (!s->memcg_params)
+		return -ENOMEM;
+
+	if (memcg) {
+		s->memcg_params->memcg = memcg;
+		s->memcg_params->root_cache = root_cache;
+	}
+	return 0;
+}
+
+void memcg_release_cache(struct kmem_cache *s)
+{
+	struct kmem_cache *root;
+	struct mem_cgroup *memcg;
+	int id;
+
+	/*
+	 * This happens, for instance, when a root cache goes away before we
+	 * add any memcg.
+	 */
+	if (!s->memcg_params)
+		return;
+
+	if (s->memcg_params->is_root_cache)
+		goto out;
+
+	memcg = s->memcg_params->memcg;
+	id  = memcg_cache_id(memcg);
+
+	root = s->memcg_params->root_cache;
+	root->memcg_params->memcg_caches[id] = NULL;
+	mem_cgroup_put(memcg);
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_del(&s->memcg_params->list);
+	mutex_unlock(&memcg->slab_caches_mutex);
+
+out:
+	kfree(s->memcg_params);
+}
+
+/*
+ * During the creation a new cache, we need to disable our accounting mechanism
+ * altogether. This is true even if we are not creating, but rather just
+ * enqueing new caches to be created.
+ *
+ * This is because that process will trigger allocations; some visible, like
+ * explicit kmallocs to auxiliary data structures, name strings and internal
+ * cache structures; some well concealed, like INIT_WORK() that can allocate
+ * objects during debug.
+ *
+ * If any allocation happens during memcg_kmem_get_cache, we will recurse back
+ * to it. This may not be a bounded recursion: since the first cache creation
+ * failed to complete (waiting on the allocation), we'll just try to create the
+ * cache again, failing at the same point.
+ *
+ * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
+ * memcg_kmem_skip_account. So we enclose anything that might allocate memory
+ * inside the following two functions.
+ */
+static inline void memcg_stop_kmem_account(void)
+{
+	VM_BUG_ON(!current->mm);
+	current->memcg_kmem_skip_account++;
+}
+
+static inline void memcg_resume_kmem_account(void)
+{
+	VM_BUG_ON(!current->mm);
+	current->memcg_kmem_skip_account--;
+}
+
+static void kmem_cache_destroy_work_func(struct work_struct *w)
+{
+	struct kmem_cache *cachep;
+	struct memcg_cache_params *p;
+
+	p = container_of(w, struct memcg_cache_params, destroy);
+
+	cachep = memcg_params_to_cache(p);
+
+	/*
+	 * If we get down to 0 after shrink, we could delete right away.
+	 * However, memcg_release_pages() already puts us back in the workqueue
+	 * in that case. If we proceed deleting, we'll get a dangling
+	 * reference, and removing the object from the workqueue in that case
+	 * is unnecessary complication. We are not a fast path.
+	 *
+	 * Note that this case is fundamentally different from racing with
+	 * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
+	 * kmem_cache_shrink, not only we would be reinserting a dead cache
+	 * into the queue, but doing so from inside the worker racing to
+	 * destroy it.
+	 *
+	 * So if we aren't down to zero, we'll just schedule a worker and try
+	 * again
+	 */
+	if (atomic_read(&cachep->memcg_params->nr_pages) != 0) {
+		kmem_cache_shrink(cachep);
+		if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+			return;
+	} else
+		kmem_cache_destroy(cachep);
+}
+
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
+{
+	if (!cachep->memcg_params->dead)
+		return;
+
+	/*
+	 * There are many ways in which we can get here.
+	 *
+	 * We can get to a memory-pressure situation while the delayed work is
+	 * still pending to run. The vmscan shrinkers can then release all
+	 * cache memory and get us to destruction. If this is the case, we'll
+	 * be executed twice, which is a bug (the second time will execute over
+	 * bogus data). In this case, cancelling the work should be fine.
+	 *
+	 * But we can also get here from the worker itself, if
+	 * kmem_cache_shrink is enough to shake all the remaining objects and
+	 * get the page count to 0. In this case, we'll deadlock if we try to
+	 * cancel the work (the worker runs with an internal lock held, which
+	 * is the same lock we would hold for cancel_work_sync().)
+	 *
+	 * Since we can't possibly know who got us here, just refrain from
+	 * running if there is already work pending
+	 */
+	if (work_pending(&cachep->memcg_params->destroy))
+		return;
+	/*
+	 * We have to defer the actual destroying to a workqueue, because
+	 * we might currently be in a context that cannot sleep.
+	 */
+	schedule_work(&cachep->memcg_params->destroy);
+}
+
+static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
+{
+	char *name;
+	struct dentry *dentry;
+
+	rcu_read_lock();
+	dentry = rcu_dereference(memcg->css.cgroup->dentry);
+	rcu_read_unlock();
+
+	BUG_ON(dentry == NULL);
+
+	name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
+			 memcg_cache_id(memcg), dentry->d_name.name);
+
+	return name;
+}
+
+static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
+					 struct kmem_cache *s)
+{
+	char *name;
+	struct kmem_cache *new;
+
+	name = memcg_cache_name(memcg, s);
+	if (!name)
+		return NULL;
+
+	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+				      (s->flags & ~SLAB_PANIC), s->ctor, s);
+
+	if (new)
+		new->allocflags |= __GFP_KMEMCG;
+
+	kfree(name);
+	return new;
+}
+
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);
+static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+						  struct kmem_cache *cachep)
+{
+	struct kmem_cache *new_cachep;
+	int idx;
+
+	BUG_ON(!memcg_can_account_kmem(memcg));
+
+	idx = memcg_cache_id(memcg);
+
+	mutex_lock(&memcg_cache_mutex);
+	new_cachep = cachep->memcg_params->memcg_caches[idx];
+	if (new_cachep)
+		goto out;
+
+	new_cachep = kmem_cache_dup(memcg, cachep);
+	if (new_cachep == NULL) {
+		new_cachep = cachep;
+		goto out;
+	}
+
+	mem_cgroup_get(memcg);
+	atomic_set(&new_cachep->memcg_params->nr_pages , 0);
+
+	cachep->memcg_params->memcg_caches[idx] = new_cachep;
+	/*
+	 * the readers won't lock, make sure everybody sees the updated value,
+	 * so they won't put stuff in the queue again for no reason
+	 */
+	wmb();
+out:
+	mutex_unlock(&memcg_cache_mutex);
+	return new_cachep;
+}
+
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+	struct kmem_cache *c;
+	int i;
+
+	if (!s->memcg_params)
+		return;
+	if (!s->memcg_params->is_root_cache)
+		return;
+
+	/*
+	 * If the cache is being destroyed, we trust that there is no one else
+	 * requesting objects from it. Even if there are, the sanity checks in
+	 * kmem_cache_destroy should caught this ill-case.
+	 *
+	 * Still, we don't want anyone else freeing memcg_caches under our
+	 * noses, which can happen if a new memcg comes to life. As usual,
+	 * we'll take the set_limit_mutex to protect ourselves against this.
+	 */
+	mutex_lock(&set_limit_mutex);
+	for (i = 0; i < memcg_limited_groups_array_size; i++) {
+		c = s->memcg_params->memcg_caches[i];
+		if (!c)
+			continue;
+
+		/*
+		 * We will now manually delete the caches, so to avoid races
+		 * we need to cancel all pending destruction workers and
+		 * proceed with destruction ourselves.
+		 *
+		 * kmem_cache_destroy() will call kmem_cache_shrink internally,
+		 * and that could spawn the workers again: it is likely that
+		 * the cache still have active pages until this very moment.
+		 * This would lead us back to mem_cgroup_destroy_cache.
+		 *
+		 * But that will not execute at all if the "dead" flag is not
+		 * set, so flip it down to guarantee we are in control.
+		 */
+		c->memcg_params->dead = false;
+		cancel_work_sync(&c->memcg_params->destroy);
+		kmem_cache_destroy(c);
+	}
+	mutex_unlock(&set_limit_mutex);
+}
+
+struct create_work {
+	struct mem_cgroup *memcg;
+	struct kmem_cache *cachep;
+	struct work_struct work;
+};
+
+static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+	struct kmem_cache *cachep;
+	struct memcg_cache_params *params;
+
+	if (!memcg_kmem_is_active(memcg))
+		return;
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+		cachep = memcg_params_to_cache(params);
+		cachep->memcg_params->dead = true;
+		INIT_WORK(&cachep->memcg_params->destroy,
+				  kmem_cache_destroy_work_func);
+		schedule_work(&cachep->memcg_params->destroy);
+	}
+	mutex_unlock(&memcg->slab_caches_mutex);
+}
+
+static void memcg_create_cache_work_func(struct work_struct *w)
+{
+	struct create_work *cw;
+
+	cw = container_of(w, struct create_work, work);
+	memcg_create_kmem_cache(cw->memcg, cw->cachep);
+	/* Drop the reference gotten when we enqueued. */
+	css_put(&cw->memcg->css);
+	kfree(cw);
+}
+
+/*
+ * Enqueue the creation of a per-memcg kmem_cache.
+ * Called with rcu_read_lock.
+ */
+static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+					 struct kmem_cache *cachep)
+{
+	struct create_work *cw;
+
+	cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
+	if (cw == NULL)
+		return;
+
+	/* The corresponding put will be done in the workqueue. */
+	if (!css_tryget(&memcg->css)) {
+		kfree(cw);
+		return;
+	}
+
+	cw->memcg = memcg;
+	cw->cachep = cachep;
+
+	INIT_WORK(&cw->work, memcg_create_cache_work_func);
+	schedule_work(&cw->work);
+}
+
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+				       struct kmem_cache *cachep)
+{
+	/*
+	 * We need to stop accounting when we kmalloc, because if the
+	 * corresponding kmalloc cache is not yet created, the first allocation
+	 * in __memcg_create_cache_enqueue will recurse.
+	 *
+	 * However, it is better to enclose the whole function. Depending on
+	 * the debugging options enabled, INIT_WORK(), for instance, can
+	 * trigger an allocation. This too, will make us recurse. Because at
+	 * this point we can't allow ourselves back into memcg_kmem_get_cache,
+	 * the safest choice is to do it like this, wrapping the whole function.
+	 */
+	memcg_stop_kmem_account();
+	__memcg_create_cache_enqueue(memcg, cachep);
+	memcg_resume_kmem_account();
+}
+/*
+ * Return the kmem_cache we're supposed to use for a slab allocation.
+ * We try to use the current memcg's version of the cache.
+ *
+ * If the cache does not exist yet, if we are the first user of it,
+ * we either create it immediately, if possible, or create it asynchronously
+ * in a workqueue.
+ * In the latter case, we will let the current allocation go through with
+ * the original cache.
+ *
+ * Can't be called in interrupt context or from kernel threads.
+ * This function needs to be called with rcu_read_lock() held.
+ */
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
+					  gfp_t gfp)
+{
+	struct mem_cgroup *memcg;
+	int idx;
+
+	VM_BUG_ON(!cachep->memcg_params);
+	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
+
+	if (!current->mm || current->memcg_kmem_skip_account)
+		return cachep;
+
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
+	rcu_read_unlock();
+
+	if (!memcg_can_account_kmem(memcg))
+		return cachep;
+
+	idx = memcg_cache_id(memcg);
+
+	/*
+	 * barrier to mare sure we're always seeing the up to date value.  The
+	 * code updating memcg_caches will issue a write barrier to match this.
+	 */
+	read_barrier_depends();
+	if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) {
+		/*
+		 * If we are in a safe context (can wait, and not in interrupt
+		 * context), we could be be predictable and return right away.
+		 * This would guarantee that the allocation being performed
+		 * already belongs in the new cache.
+		 *
+		 * However, there are some clashes that can arrive from locking.
+		 * For instance, because we acquire the slab_mutex while doing
+		 * kmem_cache_dup, this means no further allocation could happen
+		 * with the slab_mutex held.
+		 *
+		 * Also, because cache creation issue get_online_cpus(), this
+		 * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
+		 * that ends up reversed during cpu hotplug. (cpuset allocates
+		 * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
+		 * better to defer everything.
+		 */
+		memcg_create_cache_enqueue(memcg, cachep);
+		return cachep;
+	}
+
+	return cachep->memcg_params->memcg_caches[idx];
+}
+EXPORT_SYMBOL(__memcg_kmem_get_cache);
+
+/*
+ * We need to verify if the allocation against current->mm->owner's memcg is
+ * possible for the given order. But the page is not allocated yet, so we'll
+ * need a further commit step to do the final arrangements.
+ *
+ * It is possible for the task to switch cgroups in this mean time, so at
+ * commit time, we can't rely on task conversion any longer.  We'll then use
+ * the handle argument to return to the caller which cgroup we should commit
+ * against. We could also return the memcg directly and avoid the pointer
+ * passing, but a boolean return value gives better semantics considering
+ * the compiled-out case as well.
+ *
+ * Returning true means the allocation is possible.
+ */
+bool
+__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
+{
+	struct mem_cgroup *memcg;
+	int ret;
+
+	*_memcg = NULL;
+	memcg = try_get_mem_cgroup_from_mm(current->mm);
+
+	/*
+	 * very rare case described in mem_cgroup_from_task. Unfortunately there
+	 * isn't much we can do without complicating this too much, and it would
+	 * be gfp-dependent anyway. Just let it go
+	 */
+	if (unlikely(!memcg))
+		return true;
+
+	if (!memcg_can_account_kmem(memcg)) {
+		css_put(&memcg->css);
+		return true;
+	}
+
+	ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
+	if (!ret)
+		*_memcg = memcg;
+
+	css_put(&memcg->css);
+	return (ret == 0);
+}
+
+void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
+			      int order)
+{
+	struct page_cgroup *pc;
+
+	VM_BUG_ON(mem_cgroup_is_root(memcg));
+
+	/* The page allocation failed. Revert */
+	if (!page) {
+		memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+		return;
+	}
+
+	pc = lookup_page_cgroup(page);
+	lock_page_cgroup(pc);
+	pc->mem_cgroup = memcg;
+	SetPageCgroupUsed(pc);
+	unlock_page_cgroup(pc);
+}
+
+void __memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct page_cgroup *pc;
+
+
+	pc = lookup_page_cgroup(page);
+	/*
+	 * Fast unlocked return. Theoretically might have changed, have to
+	 * check again after locking.
+	 */
+	if (!PageCgroupUsed(pc))
+		return;
+
+	lock_page_cgroup(pc);
+	if (PageCgroupUsed(pc)) {
+		memcg = pc->mem_cgroup;
+		ClearPageCgroupUsed(pc);
+	}
+	unlock_page_cgroup(pc);
+
+	/*
+	 * We trust that only if there is a memcg associated with the page, it
+	 * is a valid allocation
+	 */
+	if (!memcg)
+		return;
+
+	VM_BUG_ON(mem_cgroup_is_root(memcg));
+	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+}
+#else
+static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
@@ -3289,15 +4198,18 @@
 				  struct mem_cgroup **memcgp)
 {
 	struct mem_cgroup *memcg = NULL;
+	unsigned int nr_pages = 1;
 	struct page_cgroup *pc;
 	enum charge_type ctype;
 
 	*memcgp = NULL;
 
-	VM_BUG_ON(PageTransHuge(page));
 	if (mem_cgroup_disabled())
 		return;
 
+	if (PageTransHuge(page))
+		nr_pages <<= compound_order(page);
+
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
@@ -3359,7 +4271,7 @@
 	 * charged to the res_counter since we plan on replacing the
 	 * old one and only one page is going to be left afterwards.
 	 */
-	__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
+	__mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
 }
 
 /* remove redundant charge if migration failed*/
@@ -3483,8 +4395,6 @@
 }
 #endif
 
-static DEFINE_MUTEX(set_limit_mutex);
-
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 				unsigned long long val)
 {
@@ -3769,6 +4679,7 @@
 static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
 {
 	int node, zid;
+	u64 usage;
 
 	do {
 		/* This is for making all *used* pages to be on LRU. */
@@ -3789,13 +4700,20 @@
 		cond_resched();
 
 		/*
+		 * Kernel memory may not necessarily be trackable to a specific
+		 * process. So they are not migrated, and therefore we can't
+		 * expect their value to drop to 0 here.
+		 * Having res filled up with kmem only is enough.
+		 *
 		 * This is a safety check because mem_cgroup_force_empty_list
 		 * could have raced with mem_cgroup_replace_page_cache callers
 		 * so the lru seemed empty but the page could have been added
 		 * right after the check. RES_USAGE should be safe as we always
 		 * charge before adding to the LRU.
 		 */
-	} while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0);
+		usage = res_counter_read_u64(&memcg->res, RES_USAGE) -
+			res_counter_read_u64(&memcg->kmem, RES_USAGE);
+	} while (usage > 0);
 }
 
 /*
@@ -3939,7 +4857,8 @@
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 	char str[64];
 	u64 val;
-	int type, name, len;
+	int name, len;
+	enum res_type type;
 
 	type = MEMFILE_TYPE(cft->private);
 	name = MEMFILE_ATTR(cft->private);
@@ -3960,6 +4879,9 @@
 		else
 			val = res_counter_read_u64(&memcg->memsw, name);
 		break;
+	case _KMEM:
+		val = res_counter_read_u64(&memcg->kmem, name);
+		break;
 	default:
 		BUG();
 	}
@@ -3967,6 +4889,125 @@
 	len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
 	return simple_read_from_buffer(buf, nbytes, ppos, str, len);
 }
+
+static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
+{
+	int ret = -EINVAL;
+#ifdef CONFIG_MEMCG_KMEM
+	bool must_inc_static_branch = false;
+
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	/*
+	 * For simplicity, we won't allow this to be disabled.  It also can't
+	 * be changed if the cgroup has children already, or if tasks had
+	 * already joined.
+	 *
+	 * If tasks join before we set the limit, a person looking at
+	 * kmem.usage_in_bytes will have no way to determine when it took
+	 * place, which makes the value quite meaningless.
+	 *
+	 * After it first became limited, changes in the value of the limit are
+	 * of course permitted.
+	 *
+	 * Taking the cgroup_lock is really offensive, but it is so far the only
+	 * way to guarantee that no children will appear. There are plenty of
+	 * other offenders, and they should all go away. Fine grained locking
+	 * is probably the way to go here. When we are fully hierarchical, we
+	 * can also get rid of the use_hierarchy check.
+	 */
+	cgroup_lock();
+	mutex_lock(&set_limit_mutex);
+	if (!memcg->kmem_account_flags && val != RESOURCE_MAX) {
+		if (cgroup_task_count(cont) || (memcg->use_hierarchy &&
+						!list_empty(&cont->children))) {
+			ret = -EBUSY;
+			goto out;
+		}
+		ret = res_counter_set_limit(&memcg->kmem, val);
+		VM_BUG_ON(ret);
+
+		ret = memcg_update_cache_sizes(memcg);
+		if (ret) {
+			res_counter_set_limit(&memcg->kmem, RESOURCE_MAX);
+			goto out;
+		}
+		must_inc_static_branch = true;
+		/*
+		 * kmem charges can outlive the cgroup. In the case of slab
+		 * pages, for instance, a page contain objects from various
+		 * processes, so it is unfeasible to migrate them away. We
+		 * need to reference count the memcg because of that.
+		 */
+		mem_cgroup_get(memcg);
+	} else
+		ret = res_counter_set_limit(&memcg->kmem, val);
+out:
+	mutex_unlock(&set_limit_mutex);
+	cgroup_unlock();
+
+	/*
+	 * We are by now familiar with the fact that we can't inc the static
+	 * branch inside cgroup_lock. See disarm functions for details. A
+	 * worker here is overkill, but also wrong: After the limit is set, we
+	 * must start accounting right away. Since this operation can't fail,
+	 * we can safely defer it to here - no rollback will be needed.
+	 *
+	 * The boolean used to control this is also safe, because
+	 * KMEM_ACCOUNTED_ACTIVATED guarantees that only one process will be
+	 * able to set it to true;
+	 */
+	if (must_inc_static_branch) {
+		static_key_slow_inc(&memcg_kmem_enabled_key);
+		/*
+		 * setting the active bit after the inc will guarantee no one
+		 * starts accounting before all call sites are patched
+		 */
+		memcg_kmem_set_active(memcg);
+	}
+
+#endif
+	return ret;
+}
+
+static int memcg_propagate_kmem(struct mem_cgroup *memcg)
+{
+	int ret = 0;
+	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+	if (!parent)
+		goto out;
+
+	memcg->kmem_account_flags = parent->kmem_account_flags;
+#ifdef CONFIG_MEMCG_KMEM
+	/*
+	 * When that happen, we need to disable the static branch only on those
+	 * memcgs that enabled it. To achieve this, we would be forced to
+	 * complicate the code by keeping track of which memcgs were the ones
+	 * that actually enabled limits, and which ones got it from its
+	 * parents.
+	 *
+	 * It is a lot simpler just to do static_key_slow_inc() on every child
+	 * that is accounted.
+	 */
+	if (!memcg_kmem_is_active(memcg))
+		goto out;
+
+	/*
+	 * destroy(), called if we fail, will issue static_key_slow_inc() and
+	 * mem_cgroup_put() if kmem is enabled. We have to either call them
+	 * unconditionally, or clear the KMEM_ACTIVE flag. I personally find
+	 * this more consistent, since it always leads to the same destroy path
+	 */
+	mem_cgroup_get(memcg);
+	static_key_slow_inc(&memcg_kmem_enabled_key);
+
+	mutex_lock(&set_limit_mutex);
+	ret = memcg_update_cache_sizes(memcg);
+	mutex_unlock(&set_limit_mutex);
+#endif
+out:
+	return ret;
+}
+
 /*
  * The user of this function is...
  * RES_LIMIT.
@@ -3975,7 +5016,8 @@
 			    const char *buffer)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
-	int type, name;
+	enum res_type type;
+	int name;
 	unsigned long long val;
 	int ret;
 
@@ -3997,8 +5039,12 @@
 			break;
 		if (type == _MEM)
 			ret = mem_cgroup_resize_limit(memcg, val);
-		else
+		else if (type == _MEMSWAP)
 			ret = mem_cgroup_resize_memsw_limit(memcg, val);
+		else if (type == _KMEM)
+			ret = memcg_update_kmem_limit(cont, val);
+		else
+			return -EINVAL;
 		break;
 	case RES_SOFT_LIMIT:
 		ret = res_counter_memparse_write_strategy(buffer, &val);
@@ -4051,7 +5097,8 @@
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
-	int type, name;
+	int name;
+	enum res_type type;
 
 	type = MEMFILE_TYPE(event);
 	name = MEMFILE_ATTR(event);
@@ -4063,14 +5110,22 @@
 	case RES_MAX_USAGE:
 		if (type == _MEM)
 			res_counter_reset_max(&memcg->res);
-		else
+		else if (type == _MEMSWAP)
 			res_counter_reset_max(&memcg->memsw);
+		else if (type == _KMEM)
+			res_counter_reset_max(&memcg->kmem);
+		else
+			return -EINVAL;
 		break;
 	case RES_FAILCNT:
 		if (type == _MEM)
 			res_counter_reset_failcnt(&memcg->res);
-		else
+		else if (type == _MEMSWAP)
 			res_counter_reset_failcnt(&memcg->memsw);
+		else if (type == _KMEM)
+			res_counter_reset_failcnt(&memcg->kmem);
+		else
+			return -EINVAL;
 		break;
 	}
 
@@ -4387,7 +5442,7 @@
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
 	struct mem_cgroup_thresholds *thresholds;
 	struct mem_cgroup_threshold_ary *new;
-	int type = MEMFILE_TYPE(cft->private);
+	enum res_type type = MEMFILE_TYPE(cft->private);
 	u64 threshold, usage;
 	int i, size, ret;
 
@@ -4470,7 +5525,7 @@
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
 	struct mem_cgroup_thresholds *thresholds;
 	struct mem_cgroup_threshold_ary *new;
-	int type = MEMFILE_TYPE(cft->private);
+	enum res_type type = MEMFILE_TYPE(cft->private);
 	u64 usage;
 	int i, j, size;
 
@@ -4548,7 +5603,7 @@
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
 	struct mem_cgroup_eventfd_list *event;
-	int type = MEMFILE_TYPE(cft->private);
+	enum res_type type = MEMFILE_TYPE(cft->private);
 
 	BUG_ON(type != _OOM_TYPE);
 	event = kmalloc(sizeof(*event),	GFP_KERNEL);
@@ -4573,7 +5628,7 @@
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
 	struct mem_cgroup_eventfd_list *ev, *tmp;
-	int type = MEMFILE_TYPE(cft->private);
+	enum res_type type = MEMFILE_TYPE(cft->private);
 
 	BUG_ON(type != _OOM_TYPE);
 
@@ -4632,12 +5687,33 @@
 #ifdef CONFIG_MEMCG_KMEM
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+	int ret;
+
+	memcg->kmemcg_id = -1;
+	ret = memcg_propagate_kmem(memcg);
+	if (ret)
+		return ret;
+
 	return mem_cgroup_sockets_init(memcg, ss);
 };
 
 static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
 {
 	mem_cgroup_sockets_destroy(memcg);
+
+	memcg_kmem_mark_dead(memcg);
+
+	if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0)
+		return;
+
+	/*
+	 * Charges already down to 0, undo mem_cgroup_get() done in the charge
+	 * path here, being careful not to race with memcg_uncharge_kmem: it is
+	 * possible that the charges went down to 0 between mark_dead and the
+	 * res_counter read, so in that case, we don't need the put
+	 */
+	if (memcg_kmem_test_and_clear_dead(memcg))
+		mem_cgroup_put(memcg);
 }
 #else
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
@@ -4746,6 +5822,37 @@
 		.read = mem_cgroup_read,
 	},
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	{
+		.name = "kmem.limit_in_bytes",
+		.private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
+		.write_string = mem_cgroup_write,
+		.read = mem_cgroup_read,
+	},
+	{
+		.name = "kmem.usage_in_bytes",
+		.private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
+		.read = mem_cgroup_read,
+	},
+	{
+		.name = "kmem.failcnt",
+		.private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
+		.trigger = mem_cgroup_reset,
+		.read = mem_cgroup_read,
+	},
+	{
+		.name = "kmem.max_usage_in_bytes",
+		.private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
+		.trigger = mem_cgroup_reset,
+		.read = mem_cgroup_read,
+	},
+#ifdef CONFIG_SLABINFO
+	{
+		.name = "kmem.slabinfo",
+		.read_seq_string = mem_cgroup_slabinfo_read,
+	},
+#endif
+#endif
 	{ },	/* terminate */
 };
 
@@ -4813,44 +5920,6 @@
 }
 
 /*
- * Helpers for freeing a kmalloc()ed/vzalloc()ed mem_cgroup by RCU,
- * but in process context.  The work_freeing structure is overlaid
- * on the rcu_freeing structure, which itself is overlaid on memsw.
- */
-static void free_work(struct work_struct *work)
-{
-	struct mem_cgroup *memcg;
-	int size = sizeof(struct mem_cgroup);
-
-	memcg = container_of(work, struct mem_cgroup, work_freeing);
-	/*
-	 * We need to make sure that (at least for now), the jump label
-	 * destruction code runs outside of the cgroup lock. This is because
-	 * get_online_cpus(), which is called from the static_branch update,
-	 * can't be called inside the cgroup_lock. cpusets are the ones
-	 * enforcing this dependency, so if they ever change, we might as well.
-	 *
-	 * schedule_work() will guarantee this happens. Be careful if you need
-	 * to move this code around, and make sure it is outside
-	 * the cgroup_lock.
-	 */
-	disarm_sock_keys(memcg);
-	if (size < PAGE_SIZE)
-		kfree(memcg);
-	else
-		vfree(memcg);
-}
-
-static void free_rcu(struct rcu_head *rcu_head)
-{
-	struct mem_cgroup *memcg;
-
-	memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing);
-	INIT_WORK(&memcg->work_freeing, free_work);
-	schedule_work(&memcg->work_freeing);
-}
-
-/*
  * At destroying mem_cgroup, references from swap_cgroup can remain.
  * (scanning all at force_empty is too costly...)
  *
@@ -4864,6 +5933,7 @@
 static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	int node;
+	int size = sizeof(struct mem_cgroup);
 
 	mem_cgroup_remove_from_trees(memcg);
 	free_css_id(&mem_cgroup_subsys, &memcg->css);
@@ -4872,7 +5942,46 @@
 		free_mem_cgroup_per_zone_info(memcg, node);
 
 	free_percpu(memcg->stat);
-	call_rcu(&memcg->rcu_freeing, free_rcu);
+
+	/*
+	 * We need to make sure that (at least for now), the jump label
+	 * destruction code runs outside of the cgroup lock. This is because
+	 * get_online_cpus(), which is called from the static_branch update,
+	 * can't be called inside the cgroup_lock. cpusets are the ones
+	 * enforcing this dependency, so if they ever change, we might as well.
+	 *
+	 * schedule_work() will guarantee this happens. Be careful if you need
+	 * to move this code around, and make sure it is outside
+	 * the cgroup_lock.
+	 */
+	disarm_static_keys(memcg);
+	if (size < PAGE_SIZE)
+		kfree(memcg);
+	else
+		vfree(memcg);
+}
+
+
+/*
+ * Helpers for freeing a kmalloc()ed/vzalloc()ed mem_cgroup by RCU,
+ * but in process context.  The work_freeing structure is overlaid
+ * on the rcu_freeing structure, which itself is overlaid on memsw.
+ */
+static void free_work(struct work_struct *work)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = container_of(work, struct mem_cgroup, work_freeing);
+	__mem_cgroup_free(memcg);
+}
+
+static void free_rcu(struct rcu_head *rcu_head)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing);
+	INIT_WORK(&memcg->work_freeing, free_work);
+	schedule_work(&memcg->work_freeing);
 }
 
 static void mem_cgroup_get(struct mem_cgroup *memcg)
@@ -4884,7 +5993,7 @@
 {
 	if (atomic_sub_and_test(count, &memcg->refcnt)) {
 		struct mem_cgroup *parent = parent_mem_cgroup(memcg);
-		__mem_cgroup_free(memcg);
+		call_rcu(&memcg->rcu_freeing, free_rcu);
 		if (parent)
 			mem_cgroup_put(parent);
 	}
@@ -4981,7 +6090,6 @@
 						&per_cpu(memcg_stock, cpu);
 			INIT_WORK(&stock->work, drain_local_stock);
 		}
-		hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
 	} else {
 		parent = mem_cgroup_from_cont(cont->parent);
 		memcg->use_hierarchy = parent->use_hierarchy;
@@ -4991,6 +6099,8 @@
 	if (parent && parent->use_hierarchy) {
 		res_counter_init(&memcg->res, &parent->res);
 		res_counter_init(&memcg->memsw, &parent->memsw);
+		res_counter_init(&memcg->kmem, &parent->kmem);
+
 		/*
 		 * We increment refcnt of the parent to ensure that we can
 		 * safely access it on res_counter_charge/uncharge.
@@ -5001,6 +6111,7 @@
 	} else {
 		res_counter_init(&memcg->res, NULL);
 		res_counter_init(&memcg->memsw, NULL);
+		res_counter_init(&memcg->kmem, NULL);
 		/*
 		 * Deeper hierachy with use_hierarchy == false doesn't make
 		 * much sense so let cgroup subsystem know about this
@@ -5040,6 +6151,7 @@
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
 	mem_cgroup_reparent_charges(memcg);
+	mem_cgroup_destroy_all_caches(memcg);
 }
 
 static void mem_cgroup_css_free(struct cgroup *cont)
@@ -5643,6 +6755,19 @@
 	.use_id = 1,
 };
 
+/*
+ * The rest of init is performed during ->css_alloc() for root css which
+ * happens before initcalls.  hotcpu_notifier() can't be done together as
+ * it would introduce circular locking by adding cgroup_lock -> cpu hotplug
+ * dependency.  Do it from a subsys_initcall().
+ */
+static int __init mem_cgroup_init(void)
+{
+	hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
+	return 0;
+}
+subsys_initcall(mem_cgroup_init);
+
 #ifdef CONFIG_MEMCG_SWAP
 static int __init enable_swap_account(char *s)
 {

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 108c52f..c6e4dd3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c

@@ -402,7 +402,7 @@
 	struct anon_vma *av;
 	pgoff_t pgoff;
 
-	av = page_lock_anon_vma(page);
+	av = page_lock_anon_vma_read(page);
 	if (av == NULL)	/* Not actually mapped anymore */
 		return;
 
@@ -423,7 +423,7 @@
 		}
 	}
 	read_unlock(&tasklist_lock);
-	page_unlock_anon_vma(av);
+	page_unlock_anon_vma_read(av);
 }
 
 /*
@@ -1566,7 +1566,8 @@
 					    page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-							false, MIGRATE_SYNC);
+							false, MIGRATE_SYNC,
+							MR_MEMORY_FAILURE);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",

diff --git a/mm/memory.c b/mm/memory.c
index db2e9e7..e0a9b0c 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -57,6 +57,8 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
+#include <linux/string.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -1503,6 +1505,8 @@
 		page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
 		goto out;
 	}
+	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+		goto no_page_table;
 	if (pmd_trans_huge(*pmd)) {
 		if (flags & FOLL_SPLIT) {
 			split_huge_page_pmd(vma, address, pmd);
@@ -1532,6 +1536,8 @@
 	pte = *ptep;
 	if (!pte_present(pte))
 		goto no_page;
+	if ((flags & FOLL_NUMA) && pte_numa(pte))
+		goto no_page;
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
 
@@ -1683,6 +1689,19 @@
 			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 	vm_flags &= (gup_flags & FOLL_FORCE) ?
 			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+
+	/*
+	 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
+	 * would be called on PROT_NONE ranges. We must never invoke
+	 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
+	 * page faults would unprotect the PROT_NONE ranges if
+	 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
+	 * bitflag. So to avoid that, don't set FOLL_NUMA if
+	 * FOLL_FORCE is set.
+	 */
+	if (!(gup_flags & FOLL_FORCE))
+		gup_flags |= FOLL_NUMA;
+
 	i = 0;
 
 	do {
@@ -3412,6 +3431,170 @@
 	return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
+int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
+				unsigned long addr, int current_nid)
+{
+	get_page(page);
+
+	count_vm_numa_event(NUMA_HINT_FAULTS);
+	if (current_nid == numa_node_id())
+		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+
+	return mpol_misplaced(page, vma, addr);
+}
+
+int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
+{
+	struct page *page = NULL;
+	spinlock_t *ptl;
+	int current_nid = -1;
+	int target_nid;
+	bool migrated = false;
+
+	/*
+	* The "pte" at this point cannot be used safely without
+	* validation through pte_unmap_same(). It's of NUMA type but
+	* the pfn may be screwed if the read is non atomic.
+	*
+	* ptep_modify_prot_start is not called as this is clearing
+	* the _PAGE_NUMA bit and it is not really expected that there
+	* would be concurrent hardware modifications to the PTE.
+	*/
+	ptl = pte_lockptr(mm, pmd);
+	spin_lock(ptl);
+	if (unlikely(!pte_same(*ptep, pte))) {
+		pte_unmap_unlock(ptep, ptl);
+		goto out;
+	}
+
+	pte = pte_mknonnuma(pte);
+	set_pte_at(mm, addr, ptep, pte);
+	update_mmu_cache(vma, addr, ptep);
+
+	page = vm_normal_page(vma, addr, pte);
+	if (!page) {
+		pte_unmap_unlock(ptep, ptl);
+		return 0;
+	}
+
+	current_nid = page_to_nid(page);
+	target_nid = numa_migrate_prep(page, vma, addr, current_nid);
+	pte_unmap_unlock(ptep, ptl);
+	if (target_nid == -1) {
+		/*
+		 * Account for the fault against the current node if it not
+		 * being replaced regardless of where the page is located.
+		 */
+		current_nid = numa_node_id();
+		put_page(page);
+		goto out;
+	}
+
+	/* Migrate to the requested node */
+	migrated = migrate_misplaced_page(page, target_nid);
+	if (migrated)
+		current_nid = target_nid;
+
+out:
+	if (current_nid != -1)
+		task_numa_fault(current_nid, 1, migrated);
+	return 0;
+}
+
+/* NUMA hinting page fault entry point for regular pmds */
+#ifdef CONFIG_NUMA_BALANCING
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		     unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t pmd;
+	pte_t *pte, *orig_pte;
+	unsigned long _addr = addr & PMD_MASK;
+	unsigned long offset;
+	spinlock_t *ptl;
+	bool numa = false;
+	int local_nid = numa_node_id();
+
+	spin_lock(&mm->page_table_lock);
+	pmd = *pmdp;
+	if (pmd_numa(pmd)) {
+		set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
+		numa = true;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	if (!numa)
+		return 0;
+
+	/* we're in a page fault so some vma must be in the range */
+	BUG_ON(!vma);
+	BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
+	offset = max(_addr, vma->vm_start) & ~PMD_MASK;
+	VM_BUG_ON(offset >= PMD_SIZE);
+	orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
+	pte += offset >> PAGE_SHIFT;
+	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
+		pte_t pteval = *pte;
+		struct page *page;
+		int curr_nid = local_nid;
+		int target_nid;
+		bool migrated;
+		if (!pte_present(pteval))
+			continue;
+		if (!pte_numa(pteval))
+			continue;
+		if (addr >= vma->vm_end) {
+			vma = find_vma(mm, addr);
+			/* there's a pte present so there must be a vma */
+			BUG_ON(!vma);
+			BUG_ON(addr < vma->vm_start);
+		}
+		if (pte_numa(pteval)) {
+			pteval = pte_mknonnuma(pteval);
+			set_pte_at(mm, addr, pte, pteval);
+		}
+		page = vm_normal_page(vma, addr, pteval);
+		if (unlikely(!page))
+			continue;
+		/* only check non-shared pages */
+		if (unlikely(page_mapcount(page) != 1))
+			continue;
+
+		/*
+		 * Note that the NUMA fault is later accounted to either
+		 * the node that is currently running or where the page is
+		 * migrated to.
+		 */
+		curr_nid = local_nid;
+		target_nid = numa_migrate_prep(page, vma, addr,
+					       page_to_nid(page));
+		if (target_nid == -1) {
+			put_page(page);
+			continue;
+		}
+
+		/* Migrate to the requested node */
+		pte_unmap_unlock(pte, ptl);
+		migrated = migrate_misplaced_page(page, target_nid);
+		if (migrated)
+			curr_nid = target_nid;
+		task_numa_fault(curr_nid, 1, migrated);
+
+		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	}
+	pte_unmap_unlock(orig_pte, ptl);
+
+	return 0;
+}
+#else
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		     unsigned long addr, pmd_t *pmdp)
+{
+	BUG();
+	return 0;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -3450,6 +3633,9 @@
 					pte, pmd, flags, entry);
 	}
 
+	if (pte_numa(entry))
+		return do_numa_page(mm, vma, address, entry, pte, pmd);
+
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
 	if (unlikely(!pte_same(*pte, entry)))
@@ -3520,8 +3706,11 @@
 		if (pmd_trans_huge(orig_pmd)) {
 			unsigned int dirty = flags & FAULT_FLAG_WRITE;
 
-			if (dirty && !pmd_write(orig_pmd) &&
-			    !pmd_trans_splitting(orig_pmd)) {
+			if (pmd_numa(orig_pmd))
+				return do_huge_pmd_numa_page(mm, vma, address,
+							     orig_pmd, pmd);
+
+			if (dirty && !pmd_write(orig_pmd)) {
 				ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
 							  orig_pmd);
 				/*
@@ -3536,16 +3725,21 @@
 				huge_pmd_set_accessed(mm, vma, address, pmd,
 						      orig_pmd, dirty);
 			}
+
 			return 0;
 		}
 	}
 
+	if (pmd_numa(*pmd))
+		return do_pmd_numa_page(mm, vma, address, pmd);
+
 	/*
 	 * Use __pte_alloc instead of pte_alloc_map, because we can't
 	 * run pte_offset_map on the pmd, if an huge pmd could
 	 * materialize from under us from a different thread.
 	 */
-	if (unlikely(pmd_none(*pmd)) && __pte_alloc(mm, vma, pmd, address))
+	if (unlikely(pmd_none(*pmd)) &&
+	    unlikely(__pte_alloc(mm, vma, pmd, address)))
 		return VM_FAULT_OOM;
 	/* if an huge pmd materialized from under us just retry later */
 	if (unlikely(pmd_trans_huge(*pmd)))
@@ -3925,15 +4119,12 @@
 		struct file *f = vma->vm_file;
 		char *buf = (char *)__get_free_page(GFP_KERNEL);
 		if (buf) {
-			char *p, *s;
+			char *p;
 
 			p = d_path(&f->f_path, buf, PAGE_SIZE);
 			if (IS_ERR(p))
 				p = "?";
-			s = strrchr(p, '/');
-			if (s)
-				p = s+1;
-			printk("%s%s[%lx+%lx]", prefix, p,
+			printk("%s%s[%lx+%lx]", prefix, kbasename(p),
 					vma->vm_start,
 					vma->vm_end - vma->vm_start);
 			free_page((unsigned long)buf);

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 518baa8..d04ed87 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c

@@ -590,18 +590,21 @@
 }
 
 #ifdef CONFIG_MOVABLE_NODE
-/* when CONFIG_MOVABLE_NODE, we allow online node don't have normal memory */
+/*
+ * When CONFIG_MOVABLE_NODE, we permit onlining of a node which doesn't have
+ * normal memory.
+ */
 static bool can_online_high_movable(struct zone *zone)
 {
 	return true;
 }
-#else /* #ifdef CONFIG_MOVABLE_NODE */
+#else /* CONFIG_MOVABLE_NODE */
 /* ensure every online node has NORMAL memory */
 static bool can_online_high_movable(struct zone *zone)
 {
 	return node_state(zone_to_nid(zone), N_NORMAL_MEMORY);
 }
-#endif /* #ifdef CONFIG_MOVABLE_NODE */
+#endif /* CONFIG_MOVABLE_NODE */
 
 /* check which state of node_states will be changed when online memory */
 static void node_states_check_changes_online(unsigned long nr_pages,
@@ -1055,7 +1058,8 @@
 		 * migrate_pages returns # of failed pages.
 		 */
 		ret = migrate_pages(&source, alloc_migrate_target, 0,
-							true, MIGRATE_SYNC);
+							true, MIGRATE_SYNC,
+							MR_MEMORY_HOTPLUG);
 		if (ret)
 			putback_lru_pages(&source);
 	}
@@ -1111,12 +1115,15 @@
 }
 
 #ifdef CONFIG_MOVABLE_NODE
-/* when CONFIG_MOVABLE_NODE, we allow online node don't have normal memory */
+/*
+ * When CONFIG_MOVABLE_NODE, we permit offlining of a node which doesn't have
+ * normal memory.
+ */
 static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
 {
 	return true;
 }
-#else /* #ifdef CONFIG_MOVABLE_NODE */
+#else /* CONFIG_MOVABLE_NODE */
 /* ensure the node has NORMAL memory if it is still online */
 static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
 {
@@ -1140,7 +1147,7 @@
 	 */
 	return present_pages == 0;
 }
-#endif /* #ifdef CONFIG_MOVABLE_NODE */
+#endif /* CONFIG_MOVABLE_NODE */
 
 /* check which state of node_states will be changed when offline memory */
 static void node_states_check_changes_offline(unsigned long nr_pages,

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index aaf5456..d1b315e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c

@@ -90,6 +90,7 @@
 #include <linux/syscalls.h>
 #include <linux/ctype.h>
 #include <linux/mm_inline.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -117,6 +118,26 @@
 	.flags = MPOL_F_LOCAL,
 };
 
+static struct mempolicy preferred_node_policy[MAX_NUMNODES];
+
+static struct mempolicy *get_task_policy(struct task_struct *p)
+{
+	struct mempolicy *pol = p->mempolicy;
+	int node;
+
+	if (!pol) {
+		node = numa_node_id();
+		if (node != -1)
+			pol = &preferred_node_policy[node];
+
+		/* preferred_node_policy is not initialised early in boot */
+		if (!pol->mode)
+			pol = NULL;
+	}
+
+	return pol;
+}
+
 static const struct mempolicy_operations {
 	int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
 	/*
@@ -254,7 +275,7 @@
 	if (mode == MPOL_DEFAULT) {
 		if (nodes && !nodes_empty(*nodes))
 			return ERR_PTR(-EINVAL);
-		return NULL;	/* simply delete any existing policy */
+		return NULL;
 	}
 	VM_BUG_ON(!nodes);
 
@@ -269,6 +290,10 @@
 			     (flags & MPOL_F_RELATIVE_NODES)))
 				return ERR_PTR(-EINVAL);
 		}
+	} else if (mode == MPOL_LOCAL) {
+		if (!nodes_empty(*nodes))
+			return ERR_PTR(-EINVAL);
+		mode = MPOL_PREFERRED;
 	} else if (nodes_empty(*nodes))
 		return ERR_PTR(-EINVAL);
 	policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
@@ -561,6 +586,36 @@
 	return 0;
 }
 
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+/*
+ * This is used to mark a range of virtual addresses to be inaccessible.
+ * These are later cleared by a NUMA hinting fault. Depending on these
+ * faults, pages may be migrated for better NUMA placement.
+ *
+ * This is assuming that NUMA faults are handled using PROT_NONE. If
+ * an architecture makes a different choice, it will need further
+ * changes to the core.
+ */
+unsigned long change_prot_numa(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end)
+{
+	int nr_updated;
+	BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
+
+	nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
+	if (nr_updated)
+		count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
+
+	return nr_updated;
+}
+#else
+static unsigned long change_prot_numa(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end)
+{
+	return 0;
+}
+#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
+
 /*
  * Check if all pages in a range are on a set of nodes.
  * If pagelist != NULL then isolate pages from the LRU and
@@ -579,22 +634,32 @@
 		return ERR_PTR(-EFAULT);
 	prev = NULL;
 	for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
+		unsigned long endvma = vma->vm_end;
+
+		if (endvma > end)
+			endvma = end;
+		if (vma->vm_start > start)
+			start = vma->vm_start;
+
 		if (!(flags & MPOL_MF_DISCONTIG_OK)) {
 			if (!vma->vm_next && vma->vm_end < end)
 				return ERR_PTR(-EFAULT);
 			if (prev && prev->vm_end < vma->vm_start)
 				return ERR_PTR(-EFAULT);
 		}
-		if (!is_vm_hugetlb_page(vma) &&
-		    ((flags & MPOL_MF_STRICT) ||
-		     ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-				vma_migratable(vma)))) {
-			unsigned long endvma = vma->vm_end;
 
-			if (endvma > end)
-				endvma = end;
-			if (vma->vm_start > start)
-				start = vma->vm_start;
+		if (is_vm_hugetlb_page(vma))
+			goto next;
+
+		if (flags & MPOL_MF_LAZY) {
+			change_prot_numa(vma, start, endvma);
+			goto next;
+		}
+
+		if ((flags & MPOL_MF_STRICT) ||
+		     ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
+		      vma_migratable(vma))) {
+
 			err = check_pgd_range(vma, start, endvma, nodes,
 						flags, private);
 			if (err) {
@@ -602,6 +667,7 @@
 				break;
 			}
 		}
+next:
 		prev = vma;
 	}
 	return first;
@@ -961,7 +1027,8 @@
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-							false, MIGRATE_SYNC);
+							false, MIGRATE_SYNC,
+							MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
@@ -1133,8 +1200,7 @@
 	int err;
 	LIST_HEAD(pagelist);
 
-	if (flags & ~(unsigned long)(MPOL_MF_STRICT |
-				     MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+	if (flags & ~(unsigned long)MPOL_MF_VALID)
 		return -EINVAL;
 	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
 		return -EPERM;
@@ -1157,6 +1223,9 @@
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
+	if (flags & MPOL_MF_LAZY)
+		new->flags |= MPOL_F_MOF;
+
 	/*
 	 * If we are using the default policy then operation
 	 * on discontinuous address spaces is okay after all
@@ -1193,21 +1262,24 @@
 	vma = check_range(mm, start, end, nmask,
 			  flags | MPOL_MF_INVERT, &pagelist);
 
-	err = PTR_ERR(vma);
-	if (!IS_ERR(vma)) {
-		int nr_failed = 0;
-
+	err = PTR_ERR(vma);	/* maybe ... */
+	if (!IS_ERR(vma))
 		err = mbind_range(mm, start, end, new);
 
+	if (!err) {
+		int nr_failed = 0;
+
 		if (!list_empty(&pagelist)) {
+			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
 						(unsigned long)vma,
-						false, MIGRATE_SYNC);
+						false, MIGRATE_SYNC,
+						MR_MEMPOLICY_MBIND);
 			if (nr_failed)
 				putback_lru_pages(&pagelist);
 		}
 
-		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
+		if (nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
 	} else
 		putback_lru_pages(&pagelist);
@@ -1546,7 +1618,7 @@
 struct mempolicy *get_vma_policy(struct task_struct *task,
 		struct vm_area_struct *vma, unsigned long addr)
 {
-	struct mempolicy *pol = task->mempolicy;
+	struct mempolicy *pol = get_task_policy(task);
 
 	if (vma) {
 		if (vma->vm_ops && vma->vm_ops->get_policy) {
@@ -1956,7 +2028,7 @@
  */
 struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 {
-	struct mempolicy *pol = current->mempolicy;
+	struct mempolicy *pol = get_task_policy(current);
 	struct page *page;
 	unsigned int cpuset_mems_cookie;
 
@@ -2140,6 +2212,115 @@
 	kmem_cache_free(sn_cache, n);
 }
 
+/**
+ * mpol_misplaced - check whether current page node is valid in policy
+ *
+ * @page   - page to be checked
+ * @vma    - vm area where page mapped
+ * @addr   - virtual address where page mapped
+ *
+ * Lookup current policy node id for vma,addr and "compare to" page's
+ * node id.
+ *
+ * Returns:
+ *	-1	- not misplaced, page is in the right node
+ *	node	- node id where the page should be
+ *
+ * Policy determination "mimics" alloc_page_vma().
+ * Called from fault path where we know the vma and faulting address.
+ */
+int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr)
+{
+	struct mempolicy *pol;
+	struct zone *zone;
+	int curnid = page_to_nid(page);
+	unsigned long pgoff;
+	int polnid = -1;
+	int ret = -1;
+
+	BUG_ON(!vma);
+
+	pol = get_vma_policy(current, vma, addr);
+	if (!(pol->flags & MPOL_F_MOF))
+		goto out;
+
+	switch (pol->mode) {
+	case MPOL_INTERLEAVE:
+		BUG_ON(addr >= vma->vm_end);
+		BUG_ON(addr < vma->vm_start);
+
+		pgoff = vma->vm_pgoff;
+		pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+		polnid = offset_il_node(pol, vma, pgoff);
+		break;
+
+	case MPOL_PREFERRED:
+		if (pol->flags & MPOL_F_LOCAL)
+			polnid = numa_node_id();
+		else
+			polnid = pol->v.preferred_node;
+		break;
+
+	case MPOL_BIND:
+		/*
+		 * allows binding to multiple nodes.
+		 * use current page if in policy nodemask,
+		 * else select nearest allowed node, if any.
+		 * If no allowed nodes, use current [!misplaced].
+		 */
+		if (node_isset(curnid, pol->v.nodes))
+			goto out;
+		(void)first_zones_zonelist(
+				node_zonelist(numa_node_id(), GFP_HIGHUSER),
+				gfp_zone(GFP_HIGHUSER),
+				&pol->v.nodes, &zone);
+		polnid = zone->node;
+		break;
+
+	default:
+		BUG();
+	}
+
+	/* Migrate the page towards the node whose CPU is referencing it */
+	if (pol->flags & MPOL_F_MORON) {
+		int last_nid;
+
+		polnid = numa_node_id();
+
+		/*
+		 * Multi-stage node selection is used in conjunction
+		 * with a periodic migration fault to build a temporal
+		 * task<->page relation. By using a two-stage filter we
+		 * remove short/unlikely relations.
+		 *
+		 * Using P(p) ~ n_p / n_t as per frequentist
+		 * probability, we can equate a task's usage of a
+		 * particular page (n_p) per total usage of this
+		 * page (n_t) (in a given time-span) to a probability.
+		 *
+		 * Our periodic faults will sample this probability and
+		 * getting the same result twice in a row, given these
+		 * samples are fully independent, is then given by
+		 * P(n)^2, provided our sample period is sufficiently
+		 * short compared to the usage pattern.
+		 *
+		 * This quadric squishes small probabilities, making
+		 * it less likely we act on an unlikely task<->page
+		 * relation.
+		 */
+		last_nid = page_xchg_last_nid(page, polnid);
+		if (last_nid != polnid)
+			goto out;
+	}
+
+	if (curnid != polnid)
+		ret = polnid;
+out:
+	mpol_cond_put(pol);
+
+	return ret;
+}
+
 static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 {
 	pr_debug("deleting %lx-l%lx\n", n->start, n->end);
@@ -2305,6 +2486,50 @@
 	mutex_unlock(&p->mutex);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static bool __initdata numabalancing_override;
+
+static void __init check_numabalancing_enable(void)
+{
+	bool numabalancing_default = false;
+
+	if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))
+		numabalancing_default = true;
+
+	if (nr_node_ids > 1 && !numabalancing_override) {
+		printk(KERN_INFO "Enabling automatic NUMA balancing. "
+			"Configure with numa_balancing= or sysctl");
+		set_numabalancing_state(numabalancing_default);
+	}
+}
+
+static int __init setup_numabalancing(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+	numabalancing_override = true;
+
+	if (!strcmp(str, "enable")) {
+		set_numabalancing_state(true);
+		ret = 1;
+	} else if (!strcmp(str, "disable")) {
+		set_numabalancing_state(false);
+		ret = 1;
+	}
+out:
+	if (!ret)
+		printk(KERN_WARNING "Unable to parse numa_balancing=\n");
+
+	return ret;
+}
+__setup("numa_balancing=", setup_numabalancing);
+#else
+static inline void __init check_numabalancing_enable(void)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /* assumes fs == KERNEL_DS */
 void __init numa_policy_init(void)
 {
@@ -2320,6 +2545,15 @@
 				     sizeof(struct sp_node),
 				     0, SLAB_PANIC, NULL);
 
+	for_each_node(nid) {
+		preferred_node_policy[nid] = (struct mempolicy) {
+			.refcnt = ATOMIC_INIT(1),
+			.mode = MPOL_PREFERRED,
+			.flags = MPOL_F_MOF | MPOL_F_MORON,
+			.v = { .preferred_node = nid, },
+		};
+	}
+
 	/*
 	 * Set interleaving policy for system init. Interleaving is only
 	 * enabled across suitably sized nodes (default is >= 16MB), or
@@ -2346,6 +2580,8 @@
 
 	if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
 		printk("numa_policy_init: interleaving failed\n");
+
+	check_numabalancing_enable();
 }
 
 /* Reset policy of current process to default */
@@ -2362,14 +2598,13 @@
  * "local" is pseudo-policy:  MPOL_PREFERRED with MPOL_F_LOCAL flag
  * Used only for mpol_parse_str() and mpol_to_str()
  */
-#define MPOL_LOCAL MPOL_MAX
 static const char * const policy_modes[] =
 {
 	[MPOL_DEFAULT]    = "default",
 	[MPOL_PREFERRED]  = "prefer",
 	[MPOL_BIND]       = "bind",
 	[MPOL_INTERLEAVE] = "interleave",
-	[MPOL_LOCAL]      = "local"
+	[MPOL_LOCAL]      = "local",
 };
 
 
@@ -2415,12 +2650,12 @@
 	if (flags)
 		*flags++ = '\0';	/* terminate mode string */
 
-	for (mode = 0; mode <= MPOL_LOCAL; mode++) {
+	for (mode = 0; mode < MPOL_MAX; mode++) {
 		if (!strcmp(str, policy_modes[mode])) {
 			break;
 		}
 	}
-	if (mode > MPOL_LOCAL)
+	if (mode >= MPOL_MAX)
 		goto out;
 
 	switch (mode) {

diff --git a/mm/migrate.c b/mm/migrate.c
index cae0271..3b676b0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c

@@ -39,6 +39,9 @@
 
 #include <asm/tlbflush.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/migrate.h>
+
 #include "internal.h"
 
 /*
@@ -293,7 +296,7 @@
 		struct page *newpage, struct page *page,
 		struct buffer_head *head, enum migrate_mode mode)
 {
-	int expected_count;
+	int expected_count = 0;
 	void **pslot;
 
 	if (!mapping) {
@@ -421,7 +424,7 @@
  */
 void migrate_page_copy(struct page *newpage, struct page *page)
 {
-	if (PageHuge(page))
+	if (PageHuge(page) || PageTransHuge(page))
 		copy_huge_page(newpage, page);
 	else
 		copy_highpage(newpage, page);
@@ -765,7 +768,7 @@
 	 */
 	if (PageAnon(page)) {
 		/*
-		 * Only page_lock_anon_vma() understands the subtleties of
+		 * Only page_lock_anon_vma_read() understands the subtleties of
 		 * getting a hold on an anon_vma from outside one of its mms.
 		 */
 		anon_vma = page_get_anon_vma(page);
@@ -998,10 +1001,11 @@
  */
 int migrate_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		enum migrate_mode mode)
+		enum migrate_mode mode, int reason)
 {
 	int retry = 1;
 	int nr_failed = 0;
+	int nr_succeeded = 0;
 	int pass = 0;
 	struct page *page;
 	struct page *page2;
@@ -1028,6 +1032,7 @@
 				retry++;
 				break;
 			case MIGRATEPAGE_SUCCESS:
+				nr_succeeded++;
 				break;
 			default:
 				/* Permanent failure */
@@ -1038,6 +1043,12 @@
 	}
 	rc = nr_failed + retry;
 out:
+	if (nr_succeeded)
+		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
+	if (nr_failed)
+		count_vm_events(PGMIGRATE_FAIL, nr_failed);
+	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
+
 	if (!swapwrite)
 		current->flags &= ~PF_SWAPWRITE;
 
@@ -1176,7 +1187,8 @@
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, MIGRATE_SYNC);
+				(unsigned long)pm, 0, MIGRATE_SYNC,
+				MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
@@ -1440,4 +1452,317 @@
  	}
  	return err;
 }
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * Returns true if this is a safe migration target node for misplaced NUMA
+ * pages. Currently it only checks the watermarks which crude
+ */
+static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
+				   int nr_migrate_pages)
+{
+	int z;
+	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
+		struct zone *zone = pgdat->node_zones + z;
+
+		if (!populated_zone(zone))
+			continue;
+
+		if (zone->all_unreclaimable)
+			continue;
+
+		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
+		if (!zone_watermark_ok(zone, 0,
+				       high_wmark_pages(zone) +
+				       nr_migrate_pages,
+				       0, 0))
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static struct page *alloc_misplaced_dst_page(struct page *page,
+					   unsigned long data,
+					   int **result)
+{
+	int nid = (int) data;
+	struct page *newpage;
+
+	newpage = alloc_pages_exact_node(nid,
+					 (GFP_HIGHUSER_MOVABLE | GFP_THISNODE |
+					  __GFP_NOMEMALLOC | __GFP_NORETRY |
+					  __GFP_NOWARN) &
+					 ~GFP_IOFS, 0);
+	if (newpage)
+		page_xchg_last_nid(newpage, page_last_nid(page));
+
+	return newpage;
+}
+
+/*
+ * page migration rate limiting control.
+ * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
+ * window of time. Default here says do not migrate more than 1280M per second.
+ * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
+ * as it is faults that reset the window, pte updates will happen unconditionally
+ * if there has not been a fault since @pteupdate_interval_millisecs after the
+ * throttle window closed.
+ */
+static unsigned int migrate_interval_millisecs __read_mostly = 100;
+static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
+static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
+
+/* Returns true if NUMA migration is currently rate limited */
+bool migrate_ratelimited(int node)
+{
+	pg_data_t *pgdat = NODE_DATA(node);
+
+	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
+				msecs_to_jiffies(pteupdate_interval_millisecs)))
+		return false;
+
+	if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
+		return false;
+
+	return true;
+}
+
+/* Returns true if the node is migrate rate-limited after the update */
+bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
+{
+	bool rate_limited = false;
+
+	/*
+	 * Rate-limit the amount of data that is being migrated to a node.
+	 * Optimal placement is no good if the memory bus is saturated and
+	 * all the time is being spent migrating!
+	 */
+	spin_lock(&pgdat->numabalancing_migrate_lock);
+	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
+		pgdat->numabalancing_migrate_nr_pages = 0;
+		pgdat->numabalancing_migrate_next_window = jiffies +
+			msecs_to_jiffies(migrate_interval_millisecs);
+	}
+	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
+		rate_limited = true;
+	else
+		pgdat->numabalancing_migrate_nr_pages += nr_pages;
+	spin_unlock(&pgdat->numabalancing_migrate_lock);
+	
+	return rate_limited;
+}
+
+int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
+{
+	int ret = 0;
+
+	/* Avoid migrating to a node that is nearly full */
+	if (migrate_balanced_pgdat(pgdat, 1)) {
+		int page_lru;
+
+		if (isolate_lru_page(page)) {
+			put_page(page);
+			return 0;
+		}
+
+		/* Page is isolated */
+		ret = 1;
+		page_lru = page_is_file_cache(page);
+		if (!PageTransHuge(page))
+			inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+		else
+			mod_zone_page_state(page_zone(page),
+					NR_ISOLATED_ANON + page_lru,
+					HPAGE_PMD_NR);
+	}
+
+	/*
+	 * Page is either isolated or there is not enough space on the target
+	 * node. If isolated, then it has taken a reference count and the
+	 * callers reference can be safely dropped without the page
+	 * disappearing underneath us during migration. Otherwise the page is
+	 * not to be migrated but the callers reference should still be
+	 * dropped so it does not leak.
+	 */
+	put_page(page);
+
+	return ret;
+}
+
+/*
+ * Attempt to migrate a misplaced page to the specified destination
+ * node. Caller is expected to have an elevated reference count on
+ * the page that will be dropped by this function before returning.
+ */
+int migrate_misplaced_page(struct page *page, int node)
+{
+	pg_data_t *pgdat = NODE_DATA(node);
+	int isolated = 0;
+	int nr_remaining;
+	LIST_HEAD(migratepages);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1) {
+		put_page(page);
+		goto out;
+	}
+
+	/*
+	 * Rate-limit the amount of data that is being migrated to a node.
+	 * Optimal placement is no good if the memory bus is saturated and
+	 * all the time is being spent migrating!
+	 */
+	if (numamigrate_update_ratelimit(pgdat, 1)) {
+		put_page(page);
+		goto out;
+	}
+
+	isolated = numamigrate_isolate_page(pgdat, page);
+	if (!isolated)
+		goto out;
+
+	list_add(&page->lru, &migratepages);
+	nr_remaining = migrate_pages(&migratepages,
+			alloc_misplaced_dst_page,
+			node, false, MIGRATE_ASYNC,
+			MR_NUMA_MISPLACED);
+	if (nr_remaining) {
+		putback_lru_pages(&migratepages);
+		isolated = 0;
+	} else
+		count_vm_numa_event(NUMA_PAGE_MIGRATE);
+	BUG_ON(!list_empty(&migratepages));
+out:
+	return isolated;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+				struct vm_area_struct *vma,
+				pmd_t *pmd, pmd_t entry,
+				unsigned long address,
+				struct page *page, int node)
+{
+	unsigned long haddr = address & HPAGE_PMD_MASK;
+	pg_data_t *pgdat = NODE_DATA(node);
+	int isolated = 0;
+	struct page *new_page = NULL;
+	struct mem_cgroup *memcg = NULL;
+	int page_lru = page_is_file_cache(page);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1)
+		goto out_dropref;
+
+	/*
+	 * Rate-limit the amount of data that is being migrated to a node.
+	 * Optimal placement is no good if the memory bus is saturated and
+	 * all the time is being spent migrating!
+	 */
+	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
+		goto out_dropref;
+
+	new_page = alloc_pages_node(node,
+		(GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
+	if (!new_page) {
+		count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+		goto out_dropref;
+	}
+	page_xchg_last_nid(new_page, page_last_nid(page));
+
+	isolated = numamigrate_isolate_page(pgdat, page);
+	if (!isolated) {
+		count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+		put_page(new_page);
+		goto out_keep_locked;
+	}
+
+	/* Prepare a page as a migration target */
+	__set_page_locked(new_page);
+	SetPageSwapBacked(new_page);
+
+	/* anon mapping, we can simply copy page->mapping to the new page: */
+	new_page->mapping = page->mapping;
+	new_page->index = page->index;
+	migrate_page_copy(new_page, page);
+	WARN_ON(PageLRU(new_page));
+
+	/* Recheck the target PMD */
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(*pmd, entry))) {
+		spin_unlock(&mm->page_table_lock);
+
+		/* Reverse changes made by migrate_page_copy() */
+		if (TestClearPageActive(new_page))
+			SetPageActive(page);
+		if (TestClearPageUnevictable(new_page))
+			SetPageUnevictable(page);
+		mlock_migrate_page(page, new_page);
+
+		unlock_page(new_page);
+		put_page(new_page);		/* Free it */
+
+		unlock_page(page);
+		putback_lru_page(page);
+
+		count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+		goto out;
+	}
+
+	/*
+	 * Traditional migration needs to prepare the memcg charge
+	 * transaction early to prevent the old page from being
+	 * uncharged when installing migration entries.  Here we can
+	 * save the potential rollback and start the charge transfer
+	 * only when migration is already known to end successfully.
+	 */
+	mem_cgroup_prepare_migration(page, new_page, &memcg);
+
+	entry = mk_pmd(new_page, vma->vm_page_prot);
+	entry = pmd_mknonnuma(entry);
+	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+	entry = pmd_mkhuge(entry);
+
+	page_add_new_anon_rmap(new_page, vma, haddr);
+
+	set_pmd_at(mm, haddr, pmd, entry);
+	update_mmu_cache_pmd(vma, address, &entry);
+	page_remove_rmap(page);
+	/*
+	 * Finish the charge transaction under the page table lock to
+	 * prevent split_huge_page() from dividing up the charge
+	 * before it's fully transferred to the new page.
+	 */
+	mem_cgroup_end_migration(memcg, page, new_page, true);
+	spin_unlock(&mm->page_table_lock);
+
+	unlock_page(new_page);
+	unlock_page(page);
+	put_page(page);			/* Drop the rmap reference */
+	put_page(page);			/* Drop the LRU isolation reference */
+
+	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
+	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
+
+out:
+	mod_zone_page_state(page_zone(page),
+			NR_ISOLATED_ANON + page_lru,
+			-HPAGE_PMD_NR);
+	return isolated;
+
+out_dropref:
+	put_page(page);
+out_keep_locked:
+	return 0;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#endif /* CONFIG_NUMA */

diff --git a/mm/mmap.c b/mm/mmap.c
index 2b7d9e7..f54b235 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c

@@ -736,7 +736,7 @@
 	if (anon_vma) {
 		VM_BUG_ON(adjust_next && next->anon_vma &&
 			  anon_vma != next->anon_vma);
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_pre_update_vma(vma);
 		if (adjust_next)
 			anon_vma_interval_tree_pre_update_vma(next);
@@ -2886,15 +2886,15 @@
 		 * The LSB of head.next can't change from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
+		down_write(&anon_vma->root->rwsem);
 		/*
 		 * We can safely modify head.next after taking the
-		 * anon_vma->root->mutex. If some other vma in this mm shares
+		 * anon_vma->root->rwsem. If some other vma in this mm shares
 		 * the same anon_vma we won't take it again.
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us thanks to the
-		 * anon_vma->root->mutex.
+		 * anon_vma->root->rwsem.
 		 */
 		if (__test_and_set_bit(0, (unsigned long *)
 				       &anon_vma->root->rb_root.rb_node))
@@ -2996,7 +2996,7 @@
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us until we release the
-		 * anon_vma->root->mutex.
+		 * anon_vma->root->rwsem.
 		 */
 		if (!__test_and_clear_bit(0, (unsigned long *)
 					  &anon_vma->root->rb_root.rb_node))

diff --git a/mm/mprotect.c b/mm/mprotect.c
index e8c3938..94722a4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c

@@ -35,12 +35,16 @@
 }
 #endif
 
-static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa, bool *ret_all_same_node)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
+	unsigned long pages = 0;
+	bool all_same_node = true;
+	int last_nid = -1;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -48,17 +52,43 @@
 		oldpte = *pte;
 		if (pte_present(oldpte)) {
 			pte_t ptent;
+			bool updated = false;
 
 			ptent = ptep_modify_prot_start(mm, addr, pte);
-			ptent = pte_modify(ptent, newprot);
+			if (!prot_numa) {
+				ptent = pte_modify(ptent, newprot);
+				updated = true;
+			} else {
+				struct page *page;
+
+				page = vm_normal_page(vma, addr, oldpte);
+				if (page) {
+					int this_nid = page_to_nid(page);
+					if (last_nid == -1)
+						last_nid = this_nid;
+					if (last_nid != this_nid)
+						all_same_node = false;
+
+					/* only check non-shared pages */
+					if (!pte_numa(oldpte) &&
+					    page_mapcount(page) == 1) {
+						ptent = pte_mknuma(ptent);
+						updated = true;
+					}
+				}
+			}
 
 			/*
 			 * Avoid taking write faults for pages we know to be
 			 * dirty.
 			 */
-			if (dirty_accountable && pte_dirty(ptent))
+			if (dirty_accountable && pte_dirty(ptent)) {
 				ptent = pte_mkwrite(ptent);
+				updated = true;
+			}
 
+			if (updated)
+				pages++;
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
 		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -72,18 +102,40 @@
 				set_pte_at(mm, addr, pte,
 					swp_entry_to_pte(entry));
 			}
+			pages++;
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
+
+	*ret_all_same_node = all_same_node;
+	return pages;
 }
 
-static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
-		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+#ifdef CONFIG_NUMA_BALANCING
+static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
+				       pmd_t *pmd)
+{
+	spin_lock(&mm->page_table_lock);
+	set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
+	spin_unlock(&mm->page_table_lock);
+}
+#else
+static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
+				       pmd_t *pmd)
+{
+	BUG();
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
+		pud_t *pud, unsigned long addr, unsigned long end,
+		pgprot_t newprot, int dirty_accountable, int prot_numa)
 {
 	pmd_t *pmd;
 	unsigned long next;
+	unsigned long pages = 0;
+	bool all_same_node;
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -91,42 +143,60 @@
 		if (pmd_trans_huge(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE)
 				split_huge_page_pmd(vma, addr, pmd);
-			else if (change_huge_pmd(vma, pmd, addr, newprot))
+			else if (change_huge_pmd(vma, pmd, addr, newprot,
+						 prot_numa)) {
+				pages += HPAGE_PMD_NR;
 				continue;
+			}
 			/* fall through */
 		}
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
-				 dirty_accountable);
+		pages += change_pte_range(vma, pmd, addr, next, newprot,
+				 dirty_accountable, prot_numa, &all_same_node);
+
+		/*
+		 * If we are changing protections for NUMA hinting faults then
+		 * set pmd_numa if the examined pages were all on the same
+		 * node. This allows a regular PMD to be handled as one fault
+		 * and effectively batches the taking of the PTL
+		 */
+		if (prot_numa && all_same_node)
+			change_pmd_protnuma(vma->vm_mm, addr, pmd);
 	} while (pmd++, addr = next, addr != end);
+
+	return pages;
 }
 
-static inline void change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
-		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+static inline unsigned long change_pud_range(struct vm_area_struct *vma,
+		pgd_t *pgd, unsigned long addr, unsigned long end,
+		pgprot_t newprot, int dirty_accountable, int prot_numa)
 {
 	pud_t *pud;
 	unsigned long next;
+	unsigned long pages = 0;
 
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		change_pmd_range(vma, pud, addr, next, newprot,
-				 dirty_accountable);
+		pages += change_pmd_range(vma, pud, addr, next, newprot,
+				 dirty_accountable, prot_numa);
 	} while (pud++, addr = next, addr != end);
+
+	return pages;
 }
 
-static void change_protection(struct vm_area_struct *vma,
+static unsigned long change_protection_range(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
 	unsigned long next;
 	unsigned long start = addr;
+	unsigned long pages = 0;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
@@ -135,10 +205,32 @@
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		change_pud_range(vma, pgd, addr, next, newprot,
-				 dirty_accountable);
+		pages += change_pud_range(vma, pgd, addr, next, newprot,
+				 dirty_accountable, prot_numa);
 	} while (pgd++, addr = next, addr != end);
-	flush_tlb_range(vma, start, end);
+
+	/* Only flush the TLB if we actually modified any entries: */
+	if (pages)
+		flush_tlb_range(vma, start, end);
+
+	return pages;
+}
+
+unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end, pgprot_t newprot,
+		       int dirty_accountable, int prot_numa)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long pages;
+
+	mmu_notifier_invalidate_range_start(mm, start, end);
+	if (is_vm_hugetlb_page(vma))
+		pages = hugetlb_change_protection(vma, start, end, newprot);
+	else
+		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
+	mmu_notifier_invalidate_range_end(mm, start, end);
+
+	return pages;
 }
 
 int
@@ -213,12 +305,9 @@
 		dirty_accountable = 1;
 	}
 
-	mmu_notifier_invalidate_range_start(mm, start, end);
-	if (is_vm_hugetlb_page(vma))
-		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
-	else
-		change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
-	mmu_notifier_invalidate_range_end(mm, start, end);
+	change_protection(vma, start, end, vma->vm_page_prot,
+			  dirty_accountable, 0);
+
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
 	perf_event_mmap(vma);
@@ -274,8 +363,7 @@
 		error = -EINVAL;
 		if (!(vma->vm_flags & VM_GROWSDOWN))
 			goto out;
-	}
-	else {
+	} else {
 		if (vma->vm_start > start)
 			goto out;
 		if (unlikely(grows & PROT_GROWSUP)) {
@@ -291,9 +379,10 @@
 	for (nstart = start ; ; ) {
 		unsigned long newflags;
 
-		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
+		/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
 
-		newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
+		newflags = vm_flags;
+		newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
 
 		/* newflags >> 4 shift VM_MAY% in place of VM_% */
 		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {

diff --git a/mm/mremap.c b/mm/mremap.c
index eabb24d..e1031e1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c

@@ -104,7 +104,7 @@
 		}
 		if (vma->anon_vma) {
 			anon_vma = vma->anon_vma;
-			anon_vma_lock(anon_vma);
+			anon_vma_lock_write(anon_vma);
 		}
 	}
 

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6f427122..0713bfb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c

@@ -201,6 +201,18 @@
 		     zone_reclaimable_pages(z) - z->dirty_balance_reserve;
 	}
 	/*
+	 * Unreclaimable memory (kernel memory or anonymous memory
+	 * without swap) can bring down the dirtyable pages below
+	 * the zone's dirty balance reserve and the above calculation
+	 * will underflow.  However we still want to add in nodes
+	 * which are below threshold (negative values) to get a more
+	 * accurate calculation but make sure that the total never
+	 * underflows.
+	 */
+	if ((long)x < 0)
+		x = 0;
+
+	/*
 	 * Make sure that the number of highmem pages is never larger
 	 * than the number of the total dirtyable memory. This can only
 	 * occur in very strange VM situations but we want to make sure
@@ -222,8 +234,8 @@
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
-	    dirty_balance_reserve;
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+	x -= min(x, dirty_balance_reserve);
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
@@ -290,9 +302,12 @@
 	 * highmem zone can hold its share of dirty pages, so we don't
 	 * care about vm_highmem_is_dirtyable here.
 	 */
-	return zone_page_state(zone, NR_FREE_PAGES) +
-	       zone_reclaimable_pages(zone) -
-	       zone->dirty_balance_reserve;
+	unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) +
+		zone_reclaimable_pages(zone);
+
+	/* don't allow this to underflow */
+	nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
+	return nr_pages;
 }
 
 /**

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 83637df..4ba5e37 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -371,8 +371,7 @@
 	int nr_pages = 1 << order;
 	int bad = 0;
 
-	if (unlikely(compound_order(page) != order) ||
-	    unlikely(!PageHead(page))) {
+	if (unlikely(compound_order(page) != order)) {
 		bad_page(page);
 		bad++;
 	}
@@ -611,6 +610,7 @@
 		bad_page(page);
 		return 1;
 	}
+	reset_page_last_nid(page);
 	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
 		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	return 0;
@@ -2612,6 +2612,7 @@
 	int migratetype = allocflags_to_migratetype(gfp_mask);
 	unsigned int cpuset_mems_cookie;
 	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
+	struct mem_cgroup *memcg = NULL;
 
 	gfp_mask &= gfp_allowed_mask;
 
@@ -2630,6 +2631,13 @@
 	if (unlikely(!zonelist->_zonerefs->zone))
 		return NULL;
 
+	/*
+	 * Will only have any effect when __GFP_KMEMCG is set.  This is
+	 * verified in the (always inline) callee
+	 */
+	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+		return NULL;
+
 retry_cpuset:
 	cpuset_mems_cookie = get_mems_allowed();
 
@@ -2665,6 +2673,8 @@
 	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
 		goto retry_cpuset;
 
+	memcg_kmem_commit_charge(page, memcg, order);
+
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2717,6 +2727,31 @@
 
 EXPORT_SYMBOL(free_pages);
 
+/*
+ * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
+ * pages allocated with __GFP_KMEMCG.
+ *
+ * Those pages are accounted to a particular memcg, embedded in the
+ * corresponding page_cgroup. To avoid adding a hit in the allocator to search
+ * for that information only to find out that it is NULL for users who have no
+ * interest in that whatsoever, we provide these functions.
+ *
+ * The caller knows better which flags it relies on.
+ */
+void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+{
+	memcg_kmem_uncharge_pages(page, order);
+	__free_pages(page, order);
+}
+
+void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+{
+	if (addr != 0) {
+		VM_BUG_ON(!virt_addr_valid((void *)addr));
+		__free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+	}
+}
+
 static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
 {
 	if (addr) {
@@ -3883,6 +3918,7 @@
 		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		reset_page_mapcount(page);
+		reset_page_last_nid(page);
 		SetPageReserved(page);
 		/*
 		 * Mark the block movable so that blocks are reserved for
@@ -4526,6 +4562,11 @@
 	int ret;
 
 	pgdat_resize_init(pgdat);
+#ifdef CONFIG_NUMA_BALANCING
+	spin_lock_init(&pgdat->numabalancing_migrate_lock);
+	pgdat->numabalancing_migrate_nr_pages = 0;
+	pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
 	pgdat_page_cgroup_init(pgdat);
@@ -5800,7 +5841,8 @@
 
 		ret = migrate_pages(&cc->migratepages,
 				    alloc_migrate_target,
-				    0, false, MIGRATE_SYNC);
+				    0, false, MIGRATE_SYNC,
+				    MR_CMA);
 	}
 
 	putback_movable_pages(&cc->migratepages);
@@ -5936,8 +5978,15 @@
 
 void free_contig_range(unsigned long pfn, unsigned nr_pages)
 {
-	for (; nr_pages--; ++pfn)
-		__free_page(pfn_to_page(pfn));
+	unsigned int count = 0;
+
+	for (; nr_pages--; pfn++) {
+		struct page *page = pfn_to_page(pfn);
+
+		count += page_count(page) != 1;
+		__free_page(page);
+	}
+	WARN(count != 0, "%d pages are still in use!\n", count);
 }
 #endif
 

diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index e642627..0c8323f 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c

@@ -12,8 +12,8 @@
 
 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 /*
- * Only sets the access flags (dirty, accessed, and
- * writable). Furthermore, we know it always gets set to a "more
+ * Only sets the access flags (dirty, accessed), as well as write 
+ * permission. Furthermore, we know it always gets set to a "more
  * permissive" setting, which allows most architectures to optimize
  * this. We return whether the PTE actually changed, which in turn
  * instructs the caller to do things like update__mmu_cache.  This
@@ -27,7 +27,7 @@
 	int changed = !pte_same(*ptep, entry);
 	if (changed) {
 		set_pte_at(vma->vm_mm, address, ptep, entry);
-		flush_tlb_page(vma, address);
+		flush_tlb_fix_spurious_fault(vma, address);
 	}
 	return changed;
 }
@@ -88,7 +88,8 @@
 {
 	pte_t pte;
 	pte = ptep_get_and_clear((vma)->vm_mm, address, ptep);
-	flush_tlb_page(vma, address);
+	if (pte_accessible(pte))
+		flush_tlb_page(vma, address);
 	return pte;
 }
 #endif

diff --git a/mm/rmap.c b/mm/rmap.c
index face808..2c78f8c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c

@@ -24,7 +24,7 @@
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
  *       mapping->i_mmap_mutex
- *         anon_vma->mutex
+ *         anon_vma->rwsem
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
  *             swap_lock (in swap_duplicate, swap_info_get)
@@ -37,7 +37,7 @@
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within bdi.wb->list_lock in __sync_single_inode)
  *
- * anon_vma->mutex,mapping->i_mutex      (memory_failure, collect_procs_anon)
+ * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
  *   ->tasklist_lock
  *     pte map lock
  */
@@ -87,24 +87,24 @@
 	VM_BUG_ON(atomic_read(&anon_vma->refcount));
 
 	/*
-	 * Synchronize against page_lock_anon_vma() such that
+	 * Synchronize against page_lock_anon_vma_read() such that
 	 * we can safely hold the lock without the anon_vma getting
 	 * freed.
 	 *
 	 * Relies on the full mb implied by the atomic_dec_and_test() from
 	 * put_anon_vma() against the acquire barrier implied by
-	 * mutex_trylock() from page_lock_anon_vma(). This orders:
+	 * down_read_trylock() from page_lock_anon_vma_read(). This orders:
 	 *
-	 * page_lock_anon_vma()		VS	put_anon_vma()
-	 *   mutex_trylock()			  atomic_dec_and_test()
+	 * page_lock_anon_vma_read()	VS	put_anon_vma()
+	 *   down_read_trylock()		  atomic_dec_and_test()
 	 *   LOCK				  MB
-	 *   atomic_read()			  mutex_is_locked()
+	 *   atomic_read()			  rwsem_is_locked()
 	 *
 	 * LOCK should suffice since the actual taking of the lock must
 	 * happen _before_ what follows.
 	 */
-	if (mutex_is_locked(&anon_vma->root->mutex)) {
-		anon_vma_lock(anon_vma);
+	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
+		anon_vma_lock_write(anon_vma);
 		anon_vma_unlock(anon_vma);
 	}
 
@@ -146,7 +146,7 @@
  * allocate a new one.
  *
  * Anon-vma allocations are very subtle, because we may have
- * optimistically looked up an anon_vma in page_lock_anon_vma()
+ * optimistically looked up an anon_vma in page_lock_anon_vma_read()
  * and that may actually touch the spinlock even in the newly
  * allocated vma (it depends on RCU to make sure that the
  * anon_vma isn't actually destroyed).
@@ -181,7 +181,7 @@
 			allocated = anon_vma;
 		}
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		/* page_table_lock to protect against threads */
 		spin_lock(&mm->page_table_lock);
 		if (likely(!vma->anon_vma)) {
@@ -219,9 +219,9 @@
 	struct anon_vma *new_root = anon_vma->root;
 	if (new_root != root) {
 		if (WARN_ON_ONCE(root))
-			mutex_unlock(&root->mutex);
+			up_write(&root->rwsem);
 		root = new_root;
-		mutex_lock(&root->mutex);
+		down_write(&root->rwsem);
 	}
 	return root;
 }
@@ -229,7 +229,7 @@
 static inline void unlock_anon_vma_root(struct anon_vma *root)
 {
 	if (root)
-		mutex_unlock(&root->mutex);
+		up_write(&root->rwsem);
 }
 
 /*
@@ -306,7 +306,7 @@
 	get_anon_vma(anon_vma->root);
 	/* Mark this anon_vma as the one where our new (COWed) pages go. */
 	vma->anon_vma = anon_vma;
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_write(anon_vma);
 	anon_vma_chain_link(vma, avc, anon_vma);
 	anon_vma_unlock(anon_vma);
 
@@ -349,7 +349,7 @@
 	/*
 	 * Iterate the list once more, it now only contains empty and unlinked
 	 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
-	 * needing to acquire the anon_vma->root->mutex.
+	 * needing to write-acquire the anon_vma->root->rwsem.
 	 */
 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma = avc->anon_vma;
@@ -365,7 +365,7 @@
 {
 	struct anon_vma *anon_vma = data;
 
-	mutex_init(&anon_vma->mutex);
+	init_rwsem(&anon_vma->rwsem);
 	atomic_set(&anon_vma->refcount, 0);
 	anon_vma->rb_root = RB_ROOT;
 }
@@ -442,7 +442,7 @@
  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
  * reference like with page_get_anon_vma() and then block on the mutex.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma_read(struct page *page)
 {
 	struct anon_vma *anon_vma = NULL;
 	struct anon_vma *root_anon_vma;
@@ -457,14 +457,14 @@
 
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
 	root_anon_vma = ACCESS_ONCE(anon_vma->root);
-	if (mutex_trylock(&root_anon_vma->mutex)) {
+	if (down_read_trylock(&root_anon_vma->rwsem)) {
 		/*
 		 * If the page is still mapped, then this anon_vma is still
 		 * its anon_vma, and holding the mutex ensures that it will
 		 * not go away, see anon_vma_free().
 		 */
 		if (!page_mapped(page)) {
-			mutex_unlock(&root_anon_vma->mutex);
+			up_read(&root_anon_vma->rwsem);
 			anon_vma = NULL;
 		}
 		goto out;
@@ -484,15 +484,15 @@
 
 	/* we pinned the anon_vma, its safe to sleep */
 	rcu_read_unlock();
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_read(anon_vma);
 
 	if (atomic_dec_and_test(&anon_vma->refcount)) {
 		/*
 		 * Oops, we held the last refcount, release the lock
 		 * and bail -- can't simply use put_anon_vma() because
-		 * we'll deadlock on the anon_vma_lock() recursion.
+		 * we'll deadlock on the anon_vma_lock_write() recursion.
 		 */
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_read(anon_vma);
 		__put_anon_vma(anon_vma);
 		anon_vma = NULL;
 	}
@@ -504,9 +504,9 @@
 	return anon_vma;
 }
 
-void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 {
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_read(anon_vma);
 }
 
 /*
@@ -744,7 +744,7 @@
 	struct anon_vma_chain *avc;
 	int referenced = 0;
 
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		return referenced;
 
@@ -766,7 +766,7 @@
 			break;
 	}
 
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 	return referenced;
 }
 
@@ -1315,7 +1315,7 @@
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->mutex or mapping->i_mmap_mutex.
+	 * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
@@ -1480,7 +1480,7 @@
 	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		return ret;
 
@@ -1507,7 +1507,7 @@
 			break;
 	}
 
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 	return ret;
 }
 
@@ -1702,7 +1702,7 @@
 	int ret = SWAP_AGAIN;
 
 	/*
-	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
+	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
 	 * because that depends on page_mapped(); but not all its usages
 	 * are holding mmap_sem. Users without mmap_sem are required to
 	 * take a reference count to prevent the anon_vma disappearing
@@ -1710,7 +1710,7 @@
 	anon_vma = page_anon_vma(page);
 	if (!anon_vma)
 		return ret;
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_read(anon_vma);
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
 		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
@@ -1718,7 +1718,7 @@
 		if (ret != SWAP_AGAIN)
 			break;
 	}
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_read(anon_vma);
 	return ret;
 }
 

diff --git a/mm/shmem.c b/mm/shmem.c
index 03f9ba8..5c90d84 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -1719,7 +1719,7 @@
  * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
  */
 static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
-				    pgoff_t index, pgoff_t end, int origin)
+				    pgoff_t index, pgoff_t end, int whence)
 {
 	struct page *page;
 	struct pagevec pvec;
@@ -1733,13 +1733,13 @@
 		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 					pvec.nr, pvec.pages, indices);
 		if (!pvec.nr) {
-			if (origin == SEEK_DATA)
+			if (whence == SEEK_DATA)
 				index = end;
 			break;
 		}
 		for (i = 0; i < pvec.nr; i++, index++) {
 			if (index < indices[i]) {
-				if (origin == SEEK_HOLE) {
+				if (whence == SEEK_HOLE) {
 					done = true;
 					break;
 				}
@@ -1751,8 +1751,8 @@
 					page = NULL;
 			}
 			if (index >= end ||
-			    (page && origin == SEEK_DATA) ||
-			    (!page && origin == SEEK_HOLE)) {
+			    (page && whence == SEEK_DATA) ||
+			    (!page && whence == SEEK_HOLE)) {
 				done = true;
 				break;
 			}
@@ -1765,15 +1765,15 @@
 	return index;
 }
 
-static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	pgoff_t start, end;
 	loff_t new_offset;
 
-	if (origin != SEEK_DATA && origin != SEEK_HOLE)
-		return generic_file_llseek_size(file, offset, origin,
+	if (whence != SEEK_DATA && whence != SEEK_HOLE)
+		return generic_file_llseek_size(file, offset, whence,
 					MAX_LFS_FILESIZE, i_size_read(inode));
 	mutex_lock(&inode->i_mutex);
 	/* We're holding i_mutex so we can access i_size directly */
@@ -1785,12 +1785,12 @@
 	else {
 		start = offset >> PAGE_CACHE_SHIFT;
 		end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-		new_offset = shmem_seek_hole_data(mapping, start, end, origin);
+		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
 		new_offset <<= PAGE_CACHE_SHIFT;
 		if (new_offset > offset) {
 			if (new_offset < inode->i_size)
 				offset = new_offset;
-			else if (origin == SEEK_DATA)
+			else if (whence == SEEK_DATA)
 				offset = -ENXIO;
 			else
 				offset = inode->i_size;

diff --git a/mm/slab.c b/mm/slab.c
index 33d3363..e7667a3 100644
--- a/mm/slab.c
+++ b/mm/slab.c

@@ -87,7 +87,6 @@
  */
 
 #include	<linux/slab.h>
-#include	"slab.h"
 #include	<linux/mm.h>
 #include	<linux/poison.h>
 #include	<linux/swap.h>
@@ -128,6 +127,8 @@
 
 #include	"internal.h"
 
+#include	"slab.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -162,23 +163,6 @@
  */
 static bool pfmemalloc_active __read_mostly;
 
-/* Legal flag mask for kmem_cache_create(). */
-#if DEBUG
-# define CREATE_MASK	(SLAB_RED_ZONE | \
-			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
-			 SLAB_CACHE_DMA | \
-			 SLAB_STORE_USER | \
-			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
-#else
-# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
-			 SLAB_CACHE_DMA | \
-			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
-#endif
-
 /*
  * kmem_bufctl_t:
  *
@@ -564,15 +548,11 @@
 #undef CACHE
 };
 
-static struct arraycache_init initarray_cache __initdata =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 static struct arraycache_init initarray_generic =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 
 /* internal cache of cache description objs */
-static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache kmem_cache_boot = {
-	.nodelists = kmem_cache_nodelists,
 	.batchcount = 1,
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
@@ -662,6 +642,26 @@
 	}
 }
 
+static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
+{
+	struct kmem_list3 *l3;
+	l3 = cachep->nodelists[q];
+	if (!l3)
+		return;
+
+	slab_set_lock_classes(cachep, &on_slab_l3_key,
+			&on_slab_alc_key, q);
+}
+
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+	int node;
+
+	VM_BUG_ON(OFF_SLAB(cachep));
+	for_each_node(node)
+		on_slab_lock_classes_node(cachep, node);
+}
+
 static inline void init_lock_keys(void)
 {
 	int node;
@@ -678,6 +678,14 @@
 {
 }
 
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+}
+
+static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+}
+
 static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
 {
 }
@@ -1406,6 +1414,9 @@
 		free_alien_cache(alien);
 		if (cachep->flags & SLAB_DEBUG_OBJECTS)
 			slab_set_debugobj_lock_classes_node(cachep, node);
+		else if (!OFF_SLAB(cachep) &&
+			 !(cachep->flags & SLAB_DESTROY_BY_RCU))
+			on_slab_lock_classes_node(cachep, node);
 	}
 	init_node_lock_keys(node);
 
@@ -1577,28 +1588,33 @@
 }
 
 /*
+ * The memory after the last cpu cache pointer is used for the
+ * the nodelists pointer.
+ */
+static void setup_nodelists_pointer(struct kmem_cache *cachep)
+{
+	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+}
+
+/*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
  */
 void __init kmem_cache_init(void)
 {
-	size_t left_over;
 	struct cache_sizes *sizes;
 	struct cache_names *names;
 	int i;
-	int order;
-	int node;
 
 	kmem_cache = &kmem_cache_boot;
+	setup_nodelists_pointer(kmem_cache);
 
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
 
-	for (i = 0; i < NUM_INIT_LISTS; i++) {
+	for (i = 0; i < NUM_INIT_LISTS; i++)
 		kmem_list3_init(&initkmem_list3[i]);
-		if (i < MAX_NUMNODES)
-			kmem_cache->nodelists[i] = NULL;
-	}
+
 	set_up_list3s(kmem_cache, CACHE_CACHE);
 
 	/*
@@ -1629,37 +1645,16 @@
 	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
 	 */
 
-	node = numa_mem_id();
-
 	/* 1) create the kmem_cache */
-	INIT_LIST_HEAD(&slab_caches);
-	list_add(&kmem_cache->list, &slab_caches);
-	kmem_cache->colour_off = cache_line_size();
-	kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
-	kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
 
 	/*
 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
 	 */
-	kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
-				  nr_node_ids * sizeof(struct kmem_list3 *);
-	kmem_cache->object_size = kmem_cache->size;
-	kmem_cache->size = ALIGN(kmem_cache->object_size,
-					cache_line_size());
-	kmem_cache->reciprocal_buffer_size =
-		reciprocal_value(kmem_cache->size);
-
-	for (order = 0; order < MAX_ORDER; order++) {
-		cache_estimate(order, kmem_cache->size,
-			cache_line_size(), 0, &left_over, &kmem_cache->num);
-		if (kmem_cache->num)
-			break;
-	}
-	BUG_ON(!kmem_cache->num);
-	kmem_cache->gfporder = order;
-	kmem_cache->colour = left_over / kmem_cache->colour_off;
-	kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) +
-				      sizeof(struct slab), cache_line_size());
+	create_boot_cache(kmem_cache, "kmem_cache",
+		offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+				  nr_node_ids * sizeof(struct kmem_list3 *),
+				  SLAB_HWCACHE_ALIGN);
+	list_add(&kmem_cache->list, &slab_caches);
 
 	/* 2+3) create the kmalloc caches */
 	sizes = malloc_sizes;
@@ -1671,23 +1666,13 @@
 	 * bug.
 	 */
 
-	sizes[INDEX_AC].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-	sizes[INDEX_AC].cs_cachep->name = names[INDEX_AC].name;
-	sizes[INDEX_AC].cs_cachep->size = sizes[INDEX_AC].cs_size;
-	sizes[INDEX_AC].cs_cachep->object_size = sizes[INDEX_AC].cs_size;
-	sizes[INDEX_AC].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-	__kmem_cache_create(sizes[INDEX_AC].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-	list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches);
+	sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name,
+					sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS);
 
-	if (INDEX_AC != INDEX_L3) {
-		sizes[INDEX_L3].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-		sizes[INDEX_L3].cs_cachep->name = names[INDEX_L3].name;
-		sizes[INDEX_L3].cs_cachep->size = sizes[INDEX_L3].cs_size;
-		sizes[INDEX_L3].cs_cachep->object_size = sizes[INDEX_L3].cs_size;
-		sizes[INDEX_L3].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-		__kmem_cache_create(sizes[INDEX_L3].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-		list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches);
-	}
+	if (INDEX_AC != INDEX_L3)
+		sizes[INDEX_L3].cs_cachep =
+			create_kmalloc_cache(names[INDEX_L3].name,
+				sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS);
 
 	slab_early_init = 0;
 
@@ -1699,24 +1684,14 @@
 		 * Note for systems short on memory removing the alignment will
 		 * allow tighter packing of the smaller caches.
 		 */
-		if (!sizes->cs_cachep) {
-			sizes->cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-			sizes->cs_cachep->name = names->name;
-			sizes->cs_cachep->size = sizes->cs_size;
-			sizes->cs_cachep->object_size = sizes->cs_size;
-			sizes->cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-			__kmem_cache_create(sizes->cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-			list_add(&sizes->cs_cachep->list, &slab_caches);
-		}
+		if (!sizes->cs_cachep)
+			sizes->cs_cachep = create_kmalloc_cache(names->name,
+					sizes->cs_size, ARCH_KMALLOC_FLAGS);
+
 #ifdef CONFIG_ZONE_DMA
-		sizes->cs_dmacachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-		sizes->cs_dmacachep->name = names->name_dma;
-		sizes->cs_dmacachep->size = sizes->cs_size;
-		sizes->cs_dmacachep->object_size = sizes->cs_size;
-		sizes->cs_dmacachep->align = ARCH_KMALLOC_MINALIGN;
-		__kmem_cache_create(sizes->cs_dmacachep,
-			       ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC);
-		list_add(&sizes->cs_dmacachep->list, &slab_caches);
+		sizes->cs_dmacachep = create_kmalloc_cache(
+			names->name_dma, sizes->cs_size,
+			SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS);
 #endif
 		sizes++;
 		names++;
@@ -1727,7 +1702,6 @@
 
 		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
 
-		BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);
 		memcpy(ptr, cpu_cache_get(kmem_cache),
 		       sizeof(struct arraycache_init));
 		/*
@@ -1921,6 +1895,7 @@
 		if (page->pfmemalloc)
 			SetPageSlabPfmemalloc(page + i);
 	}
+	memcg_bind_pages(cachep, cachep->gfporder);
 
 	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
 		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1957,9 +1932,11 @@
 		__ClearPageSlab(page);
 		page++;
 	}
+
+	memcg_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	free_pages((unsigned long)addr, cachep->gfporder);
+	free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
@@ -2282,7 +2259,15 @@
 
 	if (slab_state == DOWN) {
 		/*
-		 * Note: the first kmem_cache_create must create the cache
+		 * Note: Creation of first cache (kmem_cache).
+		 * The setup_list3s is taken care
+		 * of by the caller of __kmem_cache_create
+		 */
+		cachep->array[smp_processor_id()] = &initarray_generic.cache;
+		slab_state = PARTIAL;
+	} else if (slab_state == PARTIAL) {
+		/*
+		 * Note: the second kmem_cache_create must create the cache
 		 * that's used by kmalloc(24), otherwise the creation of
 		 * further caches will BUG().
 		 */
@@ -2290,7 +2275,7 @@
 
 		/*
 		 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
-		 * the first cache, then we need to set up all its list3s,
+		 * the second cache, then we need to set up all its list3s,
 		 * otherwise the creation of further caches will BUG().
 		 */
 		set_up_list3s(cachep, SIZE_AC);
@@ -2299,6 +2284,7 @@
 		else
 			slab_state = PARTIAL_ARRAYCACHE;
 	} else {
+		/* Remaining boot caches */
 		cachep->array[smp_processor_id()] =
 			kmalloc(sizeof(struct arraycache_init), gfp);
 
@@ -2331,11 +2317,8 @@
 
 /**
  * __kmem_cache_create - Create a cache.
- * @name: A string which is used in /proc/slabinfo to identify this cache.
- * @size: The size of objects to be created in this cache.
- * @align: The required alignment for the objects.
+ * @cachep: cache management descriptor
  * @flags: SLAB flags
- * @ctor: A constructor for the objects.
  *
  * Returns a ptr to the cache on success, NULL on failure.
  * Cannot be called within a int, but can be interrupted.
@@ -2378,11 +2361,6 @@
 	if (flags & SLAB_DESTROY_BY_RCU)
 		BUG_ON(flags & SLAB_POISON);
 #endif
-	/*
-	 * Always checks flags, a caller might be expecting debug support which
-	 * isn't available.
-	 */
-	BUG_ON(flags & ~CREATE_MASK);
 
 	/*
 	 * Check that size is in terms of words.  This is needed to avoid
@@ -2394,22 +2372,6 @@
 		size &= ~(BYTES_PER_WORD - 1);
 	}
 
-	/* calculate the final buffer alignment: */
-
-	/* 1) arch recommendation: can be overridden for debug */
-	if (flags & SLAB_HWCACHE_ALIGN) {
-		/*
-		 * Default alignment: as specified by the arch code.  Except if
-		 * an object is really small, then squeeze multiple objects into
-		 * one cacheline.
-		 */
-		ralign = cache_line_size();
-		while (size <= ralign / 2)
-			ralign /= 2;
-	} else {
-		ralign = BYTES_PER_WORD;
-	}
-
 	/*
 	 * Redzoning and user store require word alignment or possibly larger.
 	 * Note this will be overridden by architecture or caller mandated
@@ -2426,10 +2388,6 @@
 		size &= ~(REDZONE_ALIGN - 1);
 	}
 
-	/* 2) arch mandated alignment */
-	if (ralign < ARCH_SLAB_MINALIGN) {
-		ralign = ARCH_SLAB_MINALIGN;
-	}
 	/* 3) caller mandated alignment */
 	if (ralign < cachep->align) {
 		ralign = cachep->align;
@@ -2447,7 +2405,7 @@
 	else
 		gfp = GFP_NOWAIT;
 
-	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+	setup_nodelists_pointer(cachep);
 #if DEBUG
 
 	/*
@@ -2566,7 +2524,8 @@
 		WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
 
 		slab_set_debugobj_lock_classes(cachep);
-	}
+	} else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
+		on_slab_lock_classes(cachep);
 
 	return 0;
 }
@@ -3530,6 +3489,8 @@
 	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
+	cachep = memcg_kmem_get_cache(cachep, flags);
+
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
 
@@ -3615,6 +3576,8 @@
 	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
+	cachep = memcg_kmem_get_cache(cachep, flags);
+
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
 	objp = __do_cache_alloc(cachep, flags);
@@ -3928,6 +3891,9 @@
 void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 {
 	unsigned long flags;
+	cachep = cache_from_obj(cachep, objp);
+	if (!cachep)
+		return;
 
 	local_irq_save(flags);
 	debug_check_no_locks_freed(objp, cachep->object_size);
@@ -3969,12 +3935,6 @@
 }
 EXPORT_SYMBOL(kfree);
 
-unsigned int kmem_cache_size(struct kmem_cache *cachep)
-{
-	return cachep->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
@@ -4081,7 +4041,7 @@
 }
 
 /* Always called with the slab_mutex held */
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
 	struct ccupdate_struct *new;
@@ -4124,12 +4084,49 @@
 	return alloc_kmemlist(cachep, gfp);
 }
 
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+				int batchcount, int shared, gfp_t gfp)
+{
+	int ret;
+	struct kmem_cache *c = NULL;
+	int i = 0;
+
+	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+
+	if (slab_state < FULL)
+		return ret;
+
+	if ((ret < 0) || !is_root_cache(cachep))
+		return ret;
+
+	VM_BUG_ON(!mutex_is_locked(&slab_mutex));
+	for_each_memcg_cache_index(i) {
+		c = cache_from_memcg(cachep, i);
+		if (c)
+			/* return value determined by the parent cache only */
+			__do_tune_cpucache(c, limit, batchcount, shared, gfp);
+	}
+
+	return ret;
+}
+
 /* Called with slab_mutex held always */
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	int err;
-	int limit, shared;
+	int limit = 0;
+	int shared = 0;
+	int batchcount = 0;
 
+	if (!is_root_cache(cachep)) {
+		struct kmem_cache *root = memcg_root_cache(cachep);
+		limit = root->limit;
+		shared = root->shared;
+		batchcount = root->batchcount;
+	}
+
+	if (limit && shared && batchcount)
+		goto skip_setup;
 	/*
 	 * The head array serves three purposes:
 	 * - create a LIFO ordering, i.e. return objects that are cache-warm
@@ -4171,7 +4168,9 @@
 	if (limit > 32)
 		limit = 32;
 #endif
-	err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
+	batchcount = (limit + 1) / 2;
+skip_setup:
+	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
 	if (err)
 		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
 		       cachep->name, -err);
@@ -4276,54 +4275,8 @@
 }
 
 #ifdef CONFIG_SLABINFO
-
-static void print_slabinfo_header(struct seq_file *m)
+void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
 {
-	/*
-	 * Output format version, so at least we can change it
-	 * without _too_ many complaints.
-	 */
-#if STATS
-	seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
-#else
-	seq_puts(m, "slabinfo - version: 2.1\n");
-#endif
-	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
-		 "<objperslab> <pagesperslab>");
-	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
-	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
-#if STATS
-	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
-		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
-	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
-#endif
-	seq_putc(m, '\n');
-}
-
-static void *s_start(struct seq_file *m, loff_t *pos)
-{
-	loff_t n = *pos;
-
-	mutex_lock(&slab_mutex);
-	if (!n)
-		print_slabinfo_header(m);
-
-	return seq_list_start(&slab_caches, *pos);
-}
-
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
-{
-	return seq_list_next(p, &slab_caches, pos);
-}
-
-static void s_stop(struct seq_file *m, void *p)
-{
-	mutex_unlock(&slab_mutex);
-}
-
-static int s_show(struct seq_file *m, void *p)
-{
-	struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
 	struct slab *slabp;
 	unsigned long active_objs;
 	unsigned long num_objs;
@@ -4378,13 +4331,20 @@
 	if (error)
 		printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
 
-	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
-		   name, active_objs, num_objs, cachep->size,
-		   cachep->num, (1 << cachep->gfporder));
-	seq_printf(m, " : tunables %4u %4u %4u",
-		   cachep->limit, cachep->batchcount, cachep->shared);
-	seq_printf(m, " : slabdata %6lu %6lu %6lu",
-		   active_slabs, num_slabs, shared_avail);
+	sinfo->active_objs = active_objs;
+	sinfo->num_objs = num_objs;
+	sinfo->active_slabs = active_slabs;
+	sinfo->num_slabs = num_slabs;
+	sinfo->shared_avail = shared_avail;
+	sinfo->limit = cachep->limit;
+	sinfo->batchcount = cachep->batchcount;
+	sinfo->shared = cachep->shared;
+	sinfo->objects_per_slab = cachep->num;
+	sinfo->cache_order = cachep->gfporder;
+}
+
+void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
+{
 #if STATS
 	{			/* list3 stats */
 		unsigned long high = cachep->high_mark;
@@ -4414,31 +4374,8 @@
 			   allochit, allocmiss, freehit, freemiss);
 	}
 #endif
-	seq_putc(m, '\n');
-	return 0;
 }
 
-/*
- * slabinfo_op - iterator that generates /proc/slabinfo
- *
- * Output layout:
- * cache-name
- * num-active-objs
- * total-objs
- * object size
- * num-active-slabs
- * total-slabs
- * num-pages-per-slab
- * + further values on SMP and with statistics enabled
- */
-
-static const struct seq_operations slabinfo_op = {
-	.start = s_start,
-	.next = s_next,
-	.stop = s_stop,
-	.show = s_show,
-};
-
 #define MAX_SLABINFO_WRITE 128
 /**
  * slabinfo_write - Tuning for the slab allocator
@@ -4447,7 +4384,7 @@
  * @count: data length
  * @ppos: unused
  */
-static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
+ssize_t slabinfo_write(struct file *file, const char __user *buffer,
 		       size_t count, loff_t *ppos)
 {
 	char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
@@ -4490,19 +4427,6 @@
 	return res;
 }
 
-static int slabinfo_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &slabinfo_op);
-}
-
-static const struct file_operations proc_slabinfo_operations = {
-	.open		= slabinfo_open,
-	.read		= seq_read,
-	.write		= slabinfo_write,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 #ifdef CONFIG_DEBUG_SLAB_LEAK
 
 static void *leaks_start(struct seq_file *m, loff_t *pos)
@@ -4631,6 +4555,16 @@
 	return 0;
 }
 
+static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	return seq_list_next(p, &slab_caches, pos);
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+	mutex_unlock(&slab_mutex);
+}
+
 static const struct seq_operations slabstats_op = {
 	.start = leaks_start,
 	.next = s_next,
@@ -4665,7 +4599,6 @@
 
 static int __init slab_proc_init(void)
 {
-	proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations);
 #ifdef CONFIG_DEBUG_SLAB_LEAK
 	proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
 #endif

diff --git a/mm/slab.h b/mm/slab.h
index 7deeb44..34a98d6 100644
--- a/mm/slab.h
+++ b/mm/slab.h

@@ -32,19 +32,201 @@
 /* The slab cache that manages slab cache information */
 extern struct kmem_cache *kmem_cache;
 
+unsigned long calculate_alignment(unsigned long flags,
+		unsigned long align, unsigned long size);
+
 /* Functions provided by the slab allocators */
 extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags);
 
+extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
+			unsigned long flags);
+extern void create_boot_cache(struct kmem_cache *, const char *name,
+			size_t size, unsigned long flags);
+
+struct mem_cgroup;
 #ifdef CONFIG_SLUB
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-	size_t align, unsigned long flags, void (*ctor)(void *));
+struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *));
 #else
-static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-	size_t align, unsigned long flags, void (*ctor)(void *))
+static inline struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *))
 { return NULL; }
 #endif
 
 
+/* Legal flag mask for kmem_cache_create(), for various configurations */
+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
+			 SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
+
+#if defined(CONFIG_DEBUG_SLAB)
+#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
+#elif defined(CONFIG_SLUB_DEBUG)
+#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
+			  SLAB_TRACE | SLAB_DEBUG_FREE)
+#else
+#define SLAB_DEBUG_FLAGS (0)
+#endif
+
+#if defined(CONFIG_SLAB)
+#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
+			  SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | SLAB_NOTRACK)
+#elif defined(CONFIG_SLUB)
+#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
+			  SLAB_TEMPORARY | SLAB_NOTRACK)
+#else
+#define SLAB_CACHE_FLAGS (0)
+#endif
+
+#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
+
 int __kmem_cache_shutdown(struct kmem_cache *);
 
+struct seq_file;
+struct file;
+
+struct slabinfo {
+	unsigned long active_objs;
+	unsigned long num_objs;
+	unsigned long active_slabs;
+	unsigned long num_slabs;
+	unsigned long shared_avail;
+	unsigned int limit;
+	unsigned int batchcount;
+	unsigned int shared;
+	unsigned int objects_per_slab;
+	unsigned int cache_order;
+};
+
+void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
+void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
+ssize_t slabinfo_write(struct file *file, const char __user *buffer,
+		       size_t count, loff_t *ppos);
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+	return !s->memcg_params || s->memcg_params->is_root_cache;
+}
+
+static inline bool cache_match_memcg(struct kmem_cache *cachep,
+				     struct mem_cgroup *memcg)
+{
+	return (is_root_cache(cachep) && !memcg) ||
+				(cachep->memcg_params->memcg == memcg);
+}
+
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+	if (!is_root_cache(s))
+		atomic_add(1 << order, &s->memcg_params->nr_pages);
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+	if (is_root_cache(s))
+		return;
+
+	if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
+		mem_cgroup_destroy_cache(s);
+}
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+					struct kmem_cache *p)
+{
+	return (p == s) ||
+		(s->memcg_params && (p == s->memcg_params->root_cache));
+}
+
+/*
+ * We use suffixes to the name in memcg because we can't have caches
+ * created in the system with the same name. But when we print them
+ * locally, better refer to them with the base name
+ */
+static inline const char *cache_name(struct kmem_cache *s)
+{
+	if (!is_root_cache(s))
+		return s->memcg_params->root_cache->name;
+	return s->name;
+}
+
+static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
+{
+	return s->memcg_params->memcg_caches[idx];
+}
+
+static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
+{
+	if (is_root_cache(s))
+		return s;
+	return s->memcg_params->root_cache;
+}
+#else
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+	return true;
+}
+
+static inline bool cache_match_memcg(struct kmem_cache *cachep,
+				     struct mem_cgroup *memcg)
+{
+	return true;
+}
+
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+}
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+				      struct kmem_cache *p)
+{
+	return true;
+}
+
+static inline const char *cache_name(struct kmem_cache *s)
+{
+	return s->name;
+}
+
+static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
+{
+	return NULL;
+}
+
+static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
+{
+	return s;
+}
+#endif
+
+static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
+{
+	struct kmem_cache *cachep;
+	struct page *page;
+
+	/*
+	 * When kmemcg is not being used, both assignments should return the
+	 * same value. but we don't want to pay the assignment price in that
+	 * case. If it is not compiled in, the compiler should be smart enough
+	 * to not do even the assignment. In that case, slab_equal_or_root
+	 * will also be a constant.
+	 */
+	if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))
+		return s;
+
+	page = virt_to_head_page(x);
+	cachep = page->slab_cache;
+	if (slab_equal_or_root(cachep, s))
+		return cachep;
+
+	pr_err("%s: Wrong slab cache. %s but object is from %s\n",
+		__FUNCTION__, cachep->name, s->name);
+	WARN_ON_ONCE(1);
+	return s;
+}
 #endif

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 069a24e6..3f3cd97 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c

@@ -13,9 +13,12 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/uaccess.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
+#include <linux/memcontrol.h>
 
 #include "slab.h"
 
@@ -25,7 +28,8 @@
 struct kmem_cache *kmem_cache;
 
 #ifdef CONFIG_DEBUG_VM
-static int kmem_cache_sanity_check(const char *name, size_t size)
+static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
+				   size_t size)
 {
 	struct kmem_cache *s = NULL;
 
@@ -51,7 +55,13 @@
 			continue;
 		}
 
-		if (!strcmp(s->name, name)) {
+		/*
+		 * For simplicity, we won't check this in the list of memcg
+		 * caches. We have control over memcg naming, and if there
+		 * aren't duplicates in the global list, there won't be any
+		 * duplicates in the memcg lists as well.
+		 */
+		if (!memcg && !strcmp(s->name, name)) {
 			pr_err("%s (%s): Cache name already exists.\n",
 			       __func__, name);
 			dump_stack();
@@ -64,12 +74,69 @@
 	return 0;
 }
 #else
-static inline int kmem_cache_sanity_check(const char *name, size_t size)
+static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
+					  const char *name, size_t size)
 {
 	return 0;
 }
 #endif
 
+#ifdef CONFIG_MEMCG_KMEM
+int memcg_update_all_caches(int num_memcgs)
+{
+	struct kmem_cache *s;
+	int ret = 0;
+	mutex_lock(&slab_mutex);
+
+	list_for_each_entry(s, &slab_caches, list) {
+		if (!is_root_cache(s))
+			continue;
+
+		ret = memcg_update_cache_size(s, num_memcgs);
+		/*
+		 * See comment in memcontrol.c, memcg_update_cache_size:
+		 * Instead of freeing the memory, we'll just leave the caches
+		 * up to this point in an updated state.
+		 */
+		if (ret)
+			goto out;
+	}
+
+	memcg_update_array_size(num_memcgs);
+out:
+	mutex_unlock(&slab_mutex);
+	return ret;
+}
+#endif
+
+/*
+ * Figure out what the alignment of the objects will be given a set of
+ * flags, a user specified alignment and the size of the objects.
+ */
+unsigned long calculate_alignment(unsigned long flags,
+		unsigned long align, unsigned long size)
+{
+	/*
+	 * If the user wants hardware cache aligned objects then follow that
+	 * suggestion if the object is sufficiently large.
+	 *
+	 * The hardware cache alignment cannot override the specified
+	 * alignment though. If that is greater then use it.
+	 */
+	if (flags & SLAB_HWCACHE_ALIGN) {
+		unsigned long ralign = cache_line_size();
+		while (size <= ralign / 2)
+			ralign /= 2;
+		align = max(align, ralign);
+	}
+
+	if (align < ARCH_SLAB_MINALIGN)
+		align = ARCH_SLAB_MINALIGN;
+
+	return ALIGN(align, sizeof(void *));
+}
+
+
 /*
  * kmem_cache_create - Create a cache.
  * @name: A string which is used in /proc/slabinfo to identify this cache.
@@ -95,8 +162,10 @@
  * as davem.
  */
 
-struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
-		unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
+			size_t align, unsigned long flags, void (*ctor)(void *),
+			struct kmem_cache *parent_cache)
 {
 	struct kmem_cache *s = NULL;
 	int err = 0;
@@ -104,19 +173,33 @@
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
 
-	if (!kmem_cache_sanity_check(name, size) == 0)
+	if (!kmem_cache_sanity_check(memcg, name, size) == 0)
 		goto out_locked;
 
+	/*
+	 * Some allocators will constraint the set of valid flags to a subset
+	 * of all flags. We expect them to define CACHE_CREATE_MASK in this
+	 * case, and we'll just provide them with a sanitized version of the
+	 * passed flags.
+	 */
+	flags &= CACHE_CREATE_MASK;
 
-	s = __kmem_cache_alias(name, size, align, flags, ctor);
+	s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
 	if (s)
 		goto out_locked;
 
 	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
 	if (s) {
 		s->object_size = s->size = size;
-		s->align = align;
+		s->align = calculate_alignment(flags, align, size);
 		s->ctor = ctor;
+
+		if (memcg_register_cache(memcg, s, parent_cache)) {
+			kmem_cache_free(kmem_cache, s);
+			err = -ENOMEM;
+			goto out_locked;
+		}
+
 		s->name = kstrdup(name, GFP_KERNEL);
 		if (!s->name) {
 			kmem_cache_free(kmem_cache, s);
@@ -126,10 +209,9 @@
 
 		err = __kmem_cache_create(s, flags);
 		if (!err) {
-
 			s->refcount = 1;
 			list_add(&s->list, &slab_caches);
-
+			memcg_cache_list_add(memcg, s);
 		} else {
 			kfree(s->name);
 			kmem_cache_free(kmem_cache, s);
@@ -157,10 +239,20 @@
 
 	return s;
 }
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t align,
+		  unsigned long flags, void (*ctor)(void *))
+{
+	return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
+}
 EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
+	/* Destroy all the children caches if we aren't a memcg cache */
+	kmem_cache_destroy_memcg_children(s);
+
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
 	s->refcount--;
@@ -172,6 +264,7 @@
 			if (s->flags & SLAB_DESTROY_BY_RCU)
 				rcu_barrier();
 
+			memcg_release_cache(s);
 			kfree(s->name);
 			kmem_cache_free(kmem_cache, s);
 		} else {
@@ -192,3 +285,182 @@
 {
 	return slab_state >= UP;
 }
+
+#ifndef CONFIG_SLOB
+/* Create a cache during boot when no slab services are available yet */
+void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
+		unsigned long flags)
+{
+	int err;
+
+	s->name = name;
+	s->size = s->object_size = size;
+	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+	err = __kmem_cache_create(s, flags);
+
+	if (err)
+		panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n",
+					name, size, err);
+
+	s->refcount = -1;	/* Exempt from merging for now */
+}
+
+struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
+				unsigned long flags)
+{
+	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+
+	if (!s)
+		panic("Out of memory when creating slab %s\n", name);
+
+	create_boot_cache(s, name, size, flags);
+	list_add(&s->list, &slab_caches);
+	s->refcount = 1;
+	return s;
+}
+
+#endif /* !CONFIG_SLOB */
+
+
+#ifdef CONFIG_SLABINFO
+void print_slabinfo_header(struct seq_file *m)
+{
+	/*
+	 * Output format version, so at least we can change it
+	 * without _too_ many complaints.
+	 */
+#ifdef CONFIG_DEBUG_SLAB
+	seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
+#else
+	seq_puts(m, "slabinfo - version: 2.1\n");
+#endif
+	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
+		 "<objperslab> <pagesperslab>");
+	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
+	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
+#ifdef CONFIG_DEBUG_SLAB
+	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
+		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
+	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
+#endif
+	seq_putc(m, '\n');
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&slab_mutex);
+	if (!n)
+		print_slabinfo_header(m);
+
+	return seq_list_start(&slab_caches, *pos);
+}
+
+static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	return seq_list_next(p, &slab_caches, pos);
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+	mutex_unlock(&slab_mutex);
+}
+
+static void
+memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
+{
+	struct kmem_cache *c;
+	struct slabinfo sinfo;
+	int i;
+
+	if (!is_root_cache(s))
+		return;
+
+	for_each_memcg_cache_index(i) {
+		c = cache_from_memcg(s, i);
+		if (!c)
+			continue;
+
+		memset(&sinfo, 0, sizeof(sinfo));
+		get_slabinfo(c, &sinfo);
+
+		info->active_slabs += sinfo.active_slabs;
+		info->num_slabs += sinfo.num_slabs;
+		info->shared_avail += sinfo.shared_avail;
+		info->active_objs += sinfo.active_objs;
+		info->num_objs += sinfo.num_objs;
+	}
+}
+
+int cache_show(struct kmem_cache *s, struct seq_file *m)
+{
+	struct slabinfo sinfo;
+
+	memset(&sinfo, 0, sizeof(sinfo));
+	get_slabinfo(s, &sinfo);
+
+	memcg_accumulate_slabinfo(s, &sinfo);
+
+	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
+		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
+		   sinfo.objects_per_slab, (1 << sinfo.cache_order));
+
+	seq_printf(m, " : tunables %4u %4u %4u",
+		   sinfo.limit, sinfo.batchcount, sinfo.shared);
+	seq_printf(m, " : slabdata %6lu %6lu %6lu",
+		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
+	slabinfo_show_stats(m, s);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static int s_show(struct seq_file *m, void *p)
+{
+	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+
+	if (!is_root_cache(s))
+		return 0;
+	return cache_show(s, m);
+}
+
+/*
+ * slabinfo_op - iterator that generates /proc/slabinfo
+ *
+ * Output layout:
+ * cache-name
+ * num-active-objs
+ * total-objs
+ * object size
+ * num-active-slabs
+ * total-slabs
+ * num-pages-per-slab
+ * + further values on SMP and with statistics enabled
+ */
+static const struct seq_operations slabinfo_op = {
+	.start = s_start,
+	.next = s_next,
+	.stop = s_stop,
+	.show = s_show,
+};
+
+static int slabinfo_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &slabinfo_op);
+}
+
+static const struct file_operations proc_slabinfo_operations = {
+	.open		= slabinfo_open,
+	.read		= seq_read,
+	.write          = slabinfo_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init slab_proc_init(void)
+{
+	proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
+	return 0;
+}
+module_init(slab_proc_init);
+#endif /* CONFIG_SLABINFO */

diff --git a/mm/slob.c b/mm/slob.c
index 1e921c5..a99fdf7 100644
--- a/mm/slob.c
+++ b/mm/slob.c

@@ -28,9 +28,8 @@
  * from kmalloc are prepended with a 4-byte header with the kmalloc size.
  * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
  * alloc_pages() directly, allocating compound pages so the page order
- * does not have to be separately tracked, and also stores the exact
- * allocation size in page->private so that it can be used to accurately
- * provide ksize(). These objects are detected in kfree() because slob_page()
+ * does not have to be separately tracked.
+ * These objects are detected in kfree() because PageSlab()
  * is false for them.
  *
  * SLAB is emulated on top of SLOB by simply calling constructors and
@@ -59,7 +58,6 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include "slab.h"
 
 #include <linux/mm.h>
 #include <linux/swap.h> /* struct reclaim_state */
@@ -74,6 +72,7 @@
 
 #include <linux/atomic.h>
 
+#include "slab.h"
 /*
  * slob_block has a field 'units', which indicates size of block if +ve,
  * or offset of next block if -ve (in SLOB_UNITs).
@@ -124,7 +123,6 @@
 
 #define SLOB_UNIT sizeof(slob_t)
 #define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
-#define SLOB_ALIGN L1_CACHE_BYTES
 
 /*
  * struct slob_rcu is inserted at the tail of allocated slob blocks, which
@@ -455,11 +453,6 @@
 		if (likely(order))
 			gfp |= __GFP_COMP;
 		ret = slob_new_pages(gfp, order, node);
-		if (ret) {
-			struct page *page;
-			page = virt_to_page(ret);
-			page->private = size;
-		}
 
 		trace_kmalloc_node(caller, ret,
 				   size, PAGE_SIZE << order, gfp, node);
@@ -506,7 +499,7 @@
 		unsigned int *m = (unsigned int *)(block - align);
 		slob_free(m, *m + align);
 	} else
-		put_page(sp);
+		__free_pages(sp, compound_order(sp));
 }
 EXPORT_SYMBOL(kfree);
 
@@ -514,37 +507,30 @@
 size_t ksize(const void *block)
 {
 	struct page *sp;
+	int align;
+	unsigned int *m;
 
 	BUG_ON(!block);
 	if (unlikely(block == ZERO_SIZE_PTR))
 		return 0;
 
 	sp = virt_to_page(block);
-	if (PageSlab(sp)) {
-		int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
-		unsigned int *m = (unsigned int *)(block - align);
-		return SLOB_UNITS(*m) * SLOB_UNIT;
-	} else
-		return sp->private;
+	if (unlikely(!PageSlab(sp)))
+		return PAGE_SIZE << compound_order(sp);
+
+	align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+	m = (unsigned int *)(block - align);
+	return SLOB_UNITS(*m) * SLOB_UNIT;
 }
 EXPORT_SYMBOL(ksize);
 
 int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
 {
-	size_t align = c->size;
-
 	if (flags & SLAB_DESTROY_BY_RCU) {
 		/* leave room for rcu footer at the end of object */
 		c->size += sizeof(struct slob_rcu);
 	}
 	c->flags = flags;
-	/* ignore alignment unless it's forced */
-	c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
-	if (c->align < ARCH_SLAB_MINALIGN)
-		c->align = ARCH_SLAB_MINALIGN;
-	if (c->align < align)
-		c->align = align;
-
 	return 0;
 }
 
@@ -558,12 +544,12 @@
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
 					    SLOB_UNITS(c->size) * SLOB_UNIT,
 					    flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
 					    PAGE_SIZE << get_order(c->size),
 					    flags, node);
 	}
@@ -608,12 +594,6 @@
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
-unsigned int kmem_cache_size(struct kmem_cache *c)
-{
-	return c->size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 int __kmem_cache_shutdown(struct kmem_cache *c)
 {
 	/* No way to check for remaining objects */

diff --git a/mm/slub.c b/mm/slub.c
index 487f0bd..ba2ca53 100644
--- a/mm/slub.c
+++ b/mm/slub.c

@@ -31,6 +31,7 @@
 #include <linux/fault-inject.h>
 #include <linux/stacktrace.h>
 #include <linux/prefetch.h>
+#include <linux/memcontrol.h>
 
 #include <trace/events/kmem.h>
 
@@ -112,9 +113,6 @@
  * 			the fast path and disables lockless freelists.
  */
 
-#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DEBUG_FREE)
-
 static inline int kmem_cache_debug(struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_DEBUG
@@ -179,8 +177,6 @@
 #define __OBJECT_POISON		0x80000000UL /* Poison object */
 #define __CMPXCHG_DOUBLE	0x40000000UL /* Use cmpxchg_double */
 
-static int kmem_size = sizeof(struct kmem_cache);
-
 #ifdef CONFIG_SMP
 static struct notifier_block slab_notifier;
 #endif
@@ -205,13 +201,14 @@
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
 static void sysfs_slab_remove(struct kmem_cache *);
-
+static void memcg_propagate_slab_attrs(struct kmem_cache *s);
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
 static inline void sysfs_slab_remove(struct kmem_cache *s) { }
 
+static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
 #endif
 
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
@@ -1092,11 +1089,11 @@
 	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
 		goto out;
 
-	if (unlikely(s != page->slab)) {
+	if (unlikely(s != page->slab_cache)) {
 		if (!PageSlab(page)) {
 			slab_err(s, page, "Attempt to free object(0x%p) "
 				"outside of slab", object);
-		} else if (!page->slab) {
+		} else if (!page->slab_cache) {
 			printk(KERN_ERR
 				"SLUB <none>: no slab for object 0x%p.\n",
 						object);
@@ -1348,6 +1345,7 @@
 	void *start;
 	void *last;
 	void *p;
+	int order;
 
 	BUG_ON(flags & GFP_SLAB_BUG_MASK);
 
@@ -1356,8 +1354,10 @@
 	if (!page)
 		goto out;
 
+	order = compound_order(page);
 	inc_slabs_node(s, page_to_nid(page), page->objects);
-	page->slab = s;
+	memcg_bind_pages(s, order);
+	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
 		SetPageSlabPfmemalloc(page);
@@ -1365,7 +1365,7 @@
 	start = page_address(page);
 
 	if (unlikely(s->flags & SLAB_POISON))
-		memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
+		memset(start, POISON_INUSE, PAGE_SIZE << order);
 
 	last = start;
 	for_each_object(p, s, start, page->objects) {
@@ -1406,10 +1406,12 @@
 
 	__ClearPageSlabPfmemalloc(page);
 	__ClearPageSlab(page);
+
+	memcg_release_pages(s, order);
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-	__free_pages(page, order);
+	__free_memcg_kmem_pages(page, order);
 }
 
 #define need_reserve_slab_rcu						\
@@ -1424,7 +1426,7 @@
 	else
 		page = container_of((struct list_head *)h, struct page, lru);
 
-	__free_slab(page->slab, page);
+	__free_slab(page->slab_cache, page);
 }
 
 static void free_slab(struct kmem_cache *s, struct page *page)
@@ -1872,12 +1874,14 @@
 /*
  * Unfreeze all the cpu partial slabs.
  *
- * This function must be called with interrupt disabled.
+ * This function must be called with interrupts disabled
+ * for the cpu using c (or some other guarantee must be there
+ * to guarantee no concurrent accesses).
  */
-static void unfreeze_partials(struct kmem_cache *s)
+static void unfreeze_partials(struct kmem_cache *s,
+		struct kmem_cache_cpu *c)
 {
 	struct kmem_cache_node *n = NULL, *n2 = NULL;
-	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
 	struct page *page, *discard_page = NULL;
 
 	while ((page = c->partial)) {
@@ -1963,7 +1967,7 @@
 				 * set to the per node partial list.
 				 */
 				local_irq_save(flags);
-				unfreeze_partials(s);
+				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
 				local_irq_restore(flags);
 				oldpage = NULL;
 				pobjects = 0;
@@ -2006,7 +2010,7 @@
 		if (c->page)
 			flush_slab(s, c);
 
-		unfreeze_partials(s);
+		unfreeze_partials(s, c);
 	}
 }
 
@@ -2325,6 +2329,7 @@
 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;
 
+	s = memcg_kmem_get_cache(s, gfpflags);
 redo:
 
 	/*
@@ -2459,7 +2464,6 @@
 	void *prior;
 	void **object = (void *)x;
 	int was_frozen;
-	int inuse;
 	struct page new;
 	unsigned long counters;
 	struct kmem_cache_node *n = NULL;
@@ -2472,13 +2476,17 @@
 		return;
 
 	do {
+		if (unlikely(n)) {
+			spin_unlock_irqrestore(&n->list_lock, flags);
+			n = NULL;
+		}
 		prior = page->freelist;
 		counters = page->counters;
 		set_freepointer(s, object, prior);
 		new.counters = counters;
 		was_frozen = new.frozen;
 		new.inuse--;
-		if ((!new.inuse || !prior) && !was_frozen && !n) {
+		if ((!new.inuse || !prior) && !was_frozen) {
 
 			if (!kmem_cache_debug(s) && !prior)
 
@@ -2503,7 +2511,6 @@
 
 			}
 		}
-		inuse = new.inuse;
 
 	} while (!cmpxchg_double_slab(s, page,
 		prior, counters,
@@ -2529,25 +2536,17 @@
                 return;
         }
 
-	/*
-	 * was_frozen may have been set after we acquired the list_lock in
-	 * an earlier loop. So we need to check it here again.
-	 */
-	if (was_frozen)
-		stat(s, FREE_FROZEN);
-	else {
-		if (unlikely(!inuse && n->nr_partial > s->min_partial))
-                        goto slab_empty;
+	if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
+		goto slab_empty;
 
-		/*
-		 * Objects left in the slab. If it was not on the partial list before
-		 * then add it.
-		 */
-		if (unlikely(!prior)) {
-			remove_full(s, page);
-			add_partial(n, page, DEACTIVATE_TO_TAIL);
-			stat(s, FREE_ADD_PARTIAL);
-		}
+	/*
+	 * Objects left in the slab. If it was not on the partial list before
+	 * then add it.
+	 */
+	if (kmem_cache_debug(s) && unlikely(!prior)) {
+		remove_full(s, page);
+		add_partial(n, page, DEACTIVATE_TO_TAIL);
+		stat(s, FREE_ADD_PARTIAL);
 	}
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	return;
@@ -2619,19 +2618,10 @@
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
 {
-	struct page *page;
-
-	page = virt_to_head_page(x);
-
-	if (kmem_cache_debug(s) && page->slab != s) {
-		pr_err("kmem_cache_free: Wrong slab cache. %s but object"
-			" is from  %s\n", page->slab->name, s->name);
-		WARN_ON_ONCE(1);
+	s = cache_from_obj(s, x);
+	if (!s)
 		return;
-	}
-
-	slab_free(s, page, x, _RET_IP_);
-
+	slab_free(s, virt_to_head_page(x), x, _RET_IP_);
 	trace_kmem_cache_free(_RET_IP_, x);
 }
 EXPORT_SYMBOL(kmem_cache_free);
@@ -2769,32 +2759,6 @@
 	return -ENOSYS;
 }
 
-/*
- * Figure out what the alignment of the objects will be.
- */
-static unsigned long calculate_alignment(unsigned long flags,
-		unsigned long align, unsigned long size)
-{
-	/*
-	 * If the user wants hardware cache aligned objects then follow that
-	 * suggestion if the object is sufficiently large.
-	 *
-	 * The hardware cache alignment cannot override the specified
-	 * alignment though. If that is greater then use it.
-	 */
-	if (flags & SLAB_HWCACHE_ALIGN) {
-		unsigned long ralign = cache_line_size();
-		while (size <= ralign / 2)
-			ralign /= 2;
-		align = max(align, ralign);
-	}
-
-	if (align < ARCH_SLAB_MINALIGN)
-		align = ARCH_SLAB_MINALIGN;
-
-	return ALIGN(align, sizeof(void *));
-}
-
 static void
 init_kmem_cache_node(struct kmem_cache_node *n)
 {
@@ -2928,7 +2892,6 @@
 {
 	unsigned long flags = s->flags;
 	unsigned long size = s->object_size;
-	unsigned long align = s->align;
 	int order;
 
 	/*
@@ -3000,19 +2963,11 @@
 #endif
 
 	/*
-	 * Determine the alignment based on various parameters that the
-	 * user specified and the dynamic determination of cache line size
-	 * on bootup.
-	 */
-	align = calculate_alignment(flags, align, s->object_size);
-	s->align = align;
-
-	/*
 	 * SLUB stores one object immediately after another beginning from
 	 * offset 0. In order to align the objects we have to simply size
 	 * each object to conform to the alignment.
 	 */
-	size = ALIGN(size, align);
+	size = ALIGN(size, s->align);
 	s->size = size;
 	if (forced_order >= 0)
 		order = forced_order;
@@ -3041,7 +2996,6 @@
 		s->max = s->oo;
 
 	return !!oo_objects(s->oo);
-
 }
 
 static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
@@ -3127,15 +3081,6 @@
 	return -EINVAL;
 }
 
-/*
- * Determine the size of a slab object
- */
-unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-	return s->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
 							const char *text)
 {
@@ -3208,8 +3153,19 @@
 {
 	int rc = kmem_cache_close(s);
 
-	if (!rc)
+	if (!rc) {
+		/*
+		 * We do the same lock strategy around sysfs_slab_add, see
+		 * __kmem_cache_create. Because this is pretty much the last
+		 * operation we do and the lock will be released shortly after
+		 * that in slab_common.c, we could just move sysfs_slab_remove
+		 * to a later point in common code. We should do that when we
+		 * have a common sysfs framework for all allocators.
+		 */
+		mutex_unlock(&slab_mutex);
 		sysfs_slab_remove(s);
+		mutex_lock(&slab_mutex);
+	}
 
 	return rc;
 }
@@ -3261,32 +3217,6 @@
 
 __setup("slub_nomerge", setup_slub_nomerge);
 
-static struct kmem_cache *__init create_kmalloc_cache(const char *name,
-						int size, unsigned int flags)
-{
-	struct kmem_cache *s;
-
-	s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-
-	s->name = name;
-	s->size = s->object_size = size;
-	s->align = ARCH_KMALLOC_MINALIGN;
-
-	/*
-	 * This function is called with IRQs disabled during early-boot on
-	 * single CPU so there's no need to take slab_mutex here.
-	 */
-	if (kmem_cache_open(s, flags))
-		goto panic;
-
-	list_add(&s->list, &slab_caches);
-	return s;
-
-panic:
-	panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
-	return NULL;
-}
-
 /*
  * Conversion table for small slabs sizes / 8 to the index in the
  * kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -3372,7 +3302,7 @@
 	struct page *page;
 	void *ptr = NULL;
 
-	flags |= __GFP_COMP | __GFP_NOTRACK;
+	flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
 	page = alloc_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
@@ -3424,7 +3354,7 @@
 		return PAGE_SIZE << compound_order(page);
 	}
 
-	return slab_ksize(page->slab);
+	return slab_ksize(page->slab_cache);
 }
 EXPORT_SYMBOL(ksize);
 
@@ -3449,8 +3379,8 @@
 	}
 
 	slab_lock(page);
-	if (on_freelist(page->slab, page, object)) {
-		object_err(page->slab, page, object, "Object is on free-list");
+	if (on_freelist(page->slab_cache, page, object)) {
+		object_err(page->slab_cache, page, object, "Object is on free-list");
 		rv = false;
 	} else {
 		rv = true;
@@ -3478,10 +3408,10 @@
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
 		kmemleak_free(x);
-		__free_pages(page, compound_order(page));
+		__free_memcg_kmem_pages(page, compound_order(page));
 		return;
 	}
-	slab_free(page->slab, page, object, _RET_IP_);
+	slab_free(page->slab_cache, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -3676,15 +3606,16 @@
 
 /*
  * Used for early kmem_cache structures that were allocated using
- * the page allocator
+ * the page allocator. Allocate them properly then fix up the pointers
+ * that may be pointing to the wrong kmem_cache structure.
  */
 
-static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
+static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
 {
 	int node;
+	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
 
-	list_add(&s->list, &slab_caches);
-	s->refcount = -1;
+	memcpy(s, static_cache, kmem_cache->object_size);
 
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
@@ -3692,78 +3623,52 @@
 
 		if (n) {
 			list_for_each_entry(p, &n->partial, lru)
-				p->slab = s;
+				p->slab_cache = s;
 
 #ifdef CONFIG_SLUB_DEBUG
 			list_for_each_entry(p, &n->full, lru)
-				p->slab = s;
+				p->slab_cache = s;
 #endif
 		}
 	}
+	list_add(&s->list, &slab_caches);
+	return s;
 }
 
 void __init kmem_cache_init(void)
 {
+	static __initdata struct kmem_cache boot_kmem_cache,
+		boot_kmem_cache_node;
 	int i;
-	int caches = 0;
-	struct kmem_cache *temp_kmem_cache;
-	int order;
-	struct kmem_cache *temp_kmem_cache_node;
-	unsigned long kmalloc_size;
+	int caches = 2;
 
 	if (debug_guardpage_minorder())
 		slub_max_order = 0;
 
-	kmem_size = offsetof(struct kmem_cache, node) +
-			nr_node_ids * sizeof(struct kmem_cache_node *);
+	kmem_cache_node = &boot_kmem_cache_node;
+	kmem_cache = &boot_kmem_cache;
 
-	/* Allocate two kmem_caches from the page allocator */
-	kmalloc_size = ALIGN(kmem_size, cache_line_size());
-	order = get_order(2 * kmalloc_size);
-	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order);
-
-	/*
-	 * Must first have the slab cache available for the allocations of the
-	 * struct kmem_cache_node's. There is special bootstrap code in
-	 * kmem_cache_open for slab_state == DOWN.
-	 */
-	kmem_cache_node = (void *)kmem_cache + kmalloc_size;
-
-	kmem_cache_node->name = "kmem_cache_node";
-	kmem_cache_node->size = kmem_cache_node->object_size =
-		sizeof(struct kmem_cache_node);
-	kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+	create_boot_cache(kmem_cache_node, "kmem_cache_node",
+		sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
 
 	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
 
 	/* Able to allocate the per node structures */
 	slab_state = PARTIAL;
 
-	temp_kmem_cache = kmem_cache;
-	kmem_cache->name = "kmem_cache";
-	kmem_cache->size = kmem_cache->object_size = kmem_size;
-	kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+	create_boot_cache(kmem_cache, "kmem_cache",
+			offsetof(struct kmem_cache, node) +
+				nr_node_ids * sizeof(struct kmem_cache_node *),
+		       SLAB_HWCACHE_ALIGN);
 
-	kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
-	memcpy(kmem_cache, temp_kmem_cache, kmem_size);
+	kmem_cache = bootstrap(&boot_kmem_cache);
 
 	/*
 	 * Allocate kmem_cache_node properly from the kmem_cache slab.
 	 * kmem_cache_node is separately allocated so no need to
 	 * update any list pointers.
 	 */
-	temp_kmem_cache_node = kmem_cache_node;
-
-	kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
-	memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
-
-	kmem_cache_bootstrap_fixup(kmem_cache_node);
-
-	caches++;
-	kmem_cache_bootstrap_fixup(kmem_cache);
-	caches++;
-	/* Free temporary boot structure */
-	free_pages((unsigned long)temp_kmem_cache, order);
+	kmem_cache_node = bootstrap(&boot_kmem_cache_node);
 
 	/* Now we can use the kmem_cache to allocate kmalloc slabs */
 
@@ -3891,7 +3796,7 @@
 	return 0;
 }
 
-static struct kmem_cache *find_mergeable(size_t size,
+static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
 		size_t align, unsigned long flags, const char *name,
 		void (*ctor)(void *))
 {
@@ -3927,17 +3832,21 @@
 		if (s->size - size >= sizeof(void *))
 			continue;
 
+		if (!cache_match_memcg(s, memcg))
+			continue;
+
 		return s;
 	}
 	return NULL;
 }
 
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-		size_t align, unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 
-	s = find_mergeable(size, align, flags, name, ctor);
+	s = find_mergeable(memcg, size, align, flags, name, ctor);
 	if (s) {
 		s->refcount++;
 		/*
@@ -3964,6 +3873,11 @@
 	if (err)
 		return err;
 
+	/* Mutex is not taken during early boot */
+	if (slab_state <= UP)
+		return 0;
+
+	memcg_propagate_slab_attrs(s);
 	mutex_unlock(&slab_mutex);
 	err = sysfs_slab_add(s);
 	mutex_lock(&slab_mutex);
@@ -5197,10 +5111,95 @@
 		return -EIO;
 
 	err = attribute->store(s, buf, len);
+#ifdef CONFIG_MEMCG_KMEM
+	if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
+		int i;
 
+		mutex_lock(&slab_mutex);
+		if (s->max_attr_size < len)
+			s->max_attr_size = len;
+
+		/*
+		 * This is a best effort propagation, so this function's return
+		 * value will be determined by the parent cache only. This is
+		 * basically because not all attributes will have a well
+		 * defined semantics for rollbacks - most of the actions will
+		 * have permanent effects.
+		 *
+		 * Returning the error value of any of the children that fail
+		 * is not 100 % defined, in the sense that users seeing the
+		 * error code won't be able to know anything about the state of
+		 * the cache.
+		 *
+		 * Only returning the error code for the parent cache at least
+		 * has well defined semantics. The cache being written to
+		 * directly either failed or succeeded, in which case we loop
+		 * through the descendants with best-effort propagation.
+		 */
+		for_each_memcg_cache_index(i) {
+			struct kmem_cache *c = cache_from_memcg(s, i);
+			if (c)
+				attribute->store(c, buf, len);
+		}
+		mutex_unlock(&slab_mutex);
+	}
+#endif
 	return err;
 }
 
+static void memcg_propagate_slab_attrs(struct kmem_cache *s)
+{
+#ifdef CONFIG_MEMCG_KMEM
+	int i;
+	char *buffer = NULL;
+
+	if (!is_root_cache(s))
+		return;
+
+	/*
+	 * This mean this cache had no attribute written. Therefore, no point
+	 * in copying default values around
+	 */
+	if (!s->max_attr_size)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
+		char mbuf[64];
+		char *buf;
+		struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+
+		if (!attr || !attr->store || !attr->show)
+			continue;
+
+		/*
+		 * It is really bad that we have to allocate here, so we will
+		 * do it only as a fallback. If we actually allocate, though,
+		 * we can just use the allocated buffer until the end.
+		 *
+		 * Most of the slub attributes will tend to be very small in
+		 * size, but sysfs allows buffers up to a page, so they can
+		 * theoretically happen.
+		 */
+		if (buffer)
+			buf = buffer;
+		else if (s->max_attr_size < ARRAY_SIZE(mbuf))
+			buf = mbuf;
+		else {
+			buffer = (char *) get_zeroed_page(GFP_KERNEL);
+			if (WARN_ON(!buffer))
+				continue;
+			buf = buffer;
+		}
+
+		attr->show(s->memcg_params->root_cache, buf);
+		attr->store(s, buf, strlen(buf));
+	}
+
+	if (buffer)
+		free_page((unsigned long)buffer);
+#endif
+}
+
 static const struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
@@ -5257,6 +5256,12 @@
 	if (p != name + 1)
 		*p++ = '-';
 	p += sprintf(p, "%07d", s->size);
+
+#ifdef CONFIG_MEMCG_KMEM
+	if (!is_root_cache(s))
+		p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
+#endif
+
 	BUG_ON(p > name + ID_STR_LENGTH - 1);
 	return name;
 }
@@ -5265,13 +5270,8 @@
 {
 	int err;
 	const char *name;
-	int unmergeable;
+	int unmergeable = slab_unmergeable(s);
 
-	if (slab_state < FULL)
-		/* Defer until later */
-		return 0;
-
-	unmergeable = slab_unmergeable(s);
 	if (unmergeable) {
 		/*
 		 * Slabcache can never be merged so we can use the name proper.
@@ -5405,49 +5405,14 @@
  * The /proc/slabinfo ABI
  */
 #ifdef CONFIG_SLABINFO
-static void print_slabinfo_header(struct seq_file *m)
-{
-	seq_puts(m, "slabinfo - version: 2.1\n");
-	seq_puts(m, "# name            <active_objs> <num_objs> <object_size> "
-		 "<objperslab> <pagesperslab>");
-	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
-	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
-	seq_putc(m, '\n');
-}
-
-static void *s_start(struct seq_file *m, loff_t *pos)
-{
-	loff_t n = *pos;
-
-	mutex_lock(&slab_mutex);
-	if (!n)
-		print_slabinfo_header(m);
-
-	return seq_list_start(&slab_caches, *pos);
-}
-
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
-{
-	return seq_list_next(p, &slab_caches, pos);
-}
-
-static void s_stop(struct seq_file *m, void *p)
-{
-	mutex_unlock(&slab_mutex);
-}
-
-static int s_show(struct seq_file *m, void *p)
+void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
 {
 	unsigned long nr_partials = 0;
 	unsigned long nr_slabs = 0;
-	unsigned long nr_inuse = 0;
 	unsigned long nr_objs = 0;
 	unsigned long nr_free = 0;
-	struct kmem_cache *s;
 	int node;
 
-	s = list_entry(p, struct kmem_cache, list);
-
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 
@@ -5460,41 +5425,21 @@
 		nr_free += count_partial(n, count_free);
 	}
 
-	nr_inuse = nr_objs - nr_free;
-
-	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
-		   nr_objs, s->size, oo_objects(s->oo),
-		   (1 << oo_order(s->oo)));
-	seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
-	seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
-		   0UL);
-	seq_putc(m, '\n');
-	return 0;
+	sinfo->active_objs = nr_objs - nr_free;
+	sinfo->num_objs = nr_objs;
+	sinfo->active_slabs = nr_slabs;
+	sinfo->num_slabs = nr_slabs;
+	sinfo->objects_per_slab = oo_objects(s->oo);
+	sinfo->cache_order = oo_order(s->oo);
 }
 
-static const struct seq_operations slabinfo_op = {
-	.start = s_start,
-	.next = s_next,
-	.stop = s_stop,
-	.show = s_show,
-};
-
-static int slabinfo_open(struct inode *inode, struct file *file)
+void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
 {
-	return seq_open(file, &slabinfo_op);
 }
 
-static const struct file_operations proc_slabinfo_operations = {
-	.open		= slabinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int __init slab_proc_init(void)
+ssize_t slabinfo_write(struct file *file, const char __user *buffer,
+		       size_t count, loff_t *ppos)
 {
-	proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
-	return 0;
+	return -EIO;
 }
-module_init(slab_proc_init);
 #endif /* CONFIG_SLABINFO */

diff --git a/mm/truncate.c b/mm/truncate.c
index d51ce92..c75b736 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c

@@ -577,29 +577,6 @@
 EXPORT_SYMBOL(truncate_setsize);
 
 /**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @newsize: file offset to start truncating
- *
- * This function is deprecated and truncate_setsize or truncate_pagecache
- * should be used instead, together with filesystem specific block truncation.
- */
-int vmtruncate(struct inode *inode, loff_t newsize)
-{
-	int error;
-
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
-	truncate_setsize(inode, newsize);
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-}
-EXPORT_SYMBOL(vmtruncate);
-
-/**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode
  * @lstart: offset of beginning of hole

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7f30961..adc7e90 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -1177,7 +1177,11 @@
 }
 
 /*
- * Are there way too many processes in the direct reclaim path already?
+ * A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and
+ * then get resheduled. When there are massive number of tasks doing page
+ * allocation, such sleeping direct reclaimers may keep piling up on each CPU,
+ * the LRU list will go small and be scanned faster than necessary, leading to
+ * unnecessary swapping, thrashing and OOM.
  */
 static int too_many_isolated(struct zone *zone, int file,
 		struct scan_control *sc)
@@ -1198,6 +1202,14 @@
 		isolated = zone_page_state(zone, NR_ISOLATED_ANON);
 	}
 
+	/*
+	 * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they
+	 * won't get blocked by normal direct-reclaimers, forming a circular
+	 * deadlock.
+	 */
+	if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS)
+		inactive >>= 3;
+
 	return isolated > inactive;
 }
 
@@ -2558,7 +2570,7 @@
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 							int *classzone_idx)
 {
-	int all_zones_ok;
+	struct zone *unbalanced_zone;
 	unsigned long balanced;
 	int i;
 	int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
@@ -2592,7 +2604,7 @@
 		unsigned long lru_pages = 0;
 		int has_under_min_watermark_zone = 0;
 
-		all_zones_ok = 1;
+		unbalanced_zone = NULL;
 		balanced = 0;
 
 		/*
@@ -2731,7 +2743,7 @@
 			}
 
 			if (!zone_balanced(zone, testorder, 0, end_zone)) {
-				all_zones_ok = 0;
+				unbalanced_zone = zone;
 				/*
 				 * We are still under min water mark.  This
 				 * means that we have a GFP_ATOMIC allocation
@@ -2764,7 +2776,7 @@
 				pfmemalloc_watermark_ok(pgdat))
 			wake_up(&pgdat->pfmemalloc_wait);
 
-		if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))
+		if (!unbalanced_zone || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))
 			break;		/* kswapd: all done */
 		/*
 		 * OK, kswapd is getting into trouble.  Take a nap, then take
@@ -2774,7 +2786,7 @@
 			if (has_under_min_watermark_zone)
 				count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
 			else
-				congestion_wait(BLK_RW_ASYNC, HZ/10);
+				wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10);
 		}
 
 		/*
@@ -2793,7 +2805,7 @@
 	 * high-order: Balanced zones must make up at least 25% of the node
 	 *             for the node to be balanced
 	 */
-	if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) {
+	if (unbalanced_zone && (!order || !pgdat_balanced(pgdat, balanced, *classzone_idx))) {
 		cond_resched();
 
 		try_to_freeze();

diff --git a/mm/vmstat.c b/mm/vmstat.c
index df14808..9800306 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c

@@ -774,10 +774,20 @@
 
 	"pgrotated",
 
+#ifdef CONFIG_NUMA_BALANCING
+	"numa_pte_updates",
+	"numa_hint_faults",
+	"numa_hint_faults_local",
+	"numa_pages_migrated",
+#endif
+#ifdef CONFIG_MIGRATION
+	"pgmigrate_success",
+	"pgmigrate_fail",
+#endif
 #ifdef CONFIG_COMPACTION
-	"compact_blocks_moved",
-	"compact_pages_moved",
-	"compact_pagemigrate_failed",
+	"compact_migrate_scanned",
+	"compact_free_scanned",
+	"compact_isolated",
 	"compact_stall",
 	"compact_fail",
 	"compact_success",

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 35b8911..fd05c81 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c

@@ -39,6 +39,7 @@
 #include <linux/inet.h>
 #include <linux/idr.h>
 #include <linux/file.h>
+#include <linux/highmem.h>
 #include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
@@ -325,7 +326,7 @@
 		int count = nr_pages;
 		while (nr_pages) {
 			s = rest_of_page(data);
-			pages[index++] = virt_to_page(data);
+			pages[index++] = kmap_to_page(data);
 			data += s;
 			nr_pages--;
 		}

diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index f49da58..350bf62 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c

@@ -14,49 +14,45 @@
 			 struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
-	return sprintf(buf, "%s\n", adev->type);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type);
 }
 
 static ssize_t show_address(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
-	int i;
 
-	for (i = 0; i < (ESI_LEN - 1); i++)
-		pos += sprintf(pos, "%02x:", adev->esi[i]);
-	pos += sprintf(pos, "%02x\n", adev->esi[i]);
-
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi);
 }
 
 static ssize_t show_atmaddress(struct device *cdev,
 			       struct device_attribute *attr, char *buf)
 {
 	unsigned long flags;
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 	struct atm_dev_addr *aaddr;
 	int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin;
-	int i, j;
+	int i, j, count = 0;
 
 	spin_lock_irqsave(&adev->lock, flags);
 	list_for_each_entry(aaddr, &adev->local, entry) {
 		for (i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) {
 			if (j == *fmt) {
-				pos += sprintf(pos, ".");
+				count += scnprintf(buf + count,
+						   PAGE_SIZE - count, ".");
 				++fmt;
 				j = 0;
 			}
-			pos += sprintf(pos, "%02x",
-				       aaddr->addr.sas_addr.prv[i]);
+			count += scnprintf(buf + count,
+					   PAGE_SIZE - count, "%02x",
+					   aaddr->addr.sas_addr.prv[i]);
 		}
-		pos += sprintf(pos, "\n");
+		count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
 	}
 	spin_unlock_irqrestore(&adev->lock, flags);
 
-	return pos - buf;
+	return count;
 }
 
 static ssize_t show_atmindex(struct device *cdev,
@@ -64,25 +60,21 @@
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
 
-	return sprintf(buf, "%d\n", adev->number);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number);
 }
 
 static ssize_t show_carrier(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 
-	pos += sprintf(pos, "%d\n",
-		       adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
-
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			 adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
 }
 
 static ssize_t show_link_rate(struct device *cdev,
 			      struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 	int link_rate;
 
@@ -100,9 +92,7 @@
 	default:
 		link_rate = adev->link_rate * 8 * 53;
 	}
-	pos += sprintf(pos, "%d\n", link_rate);
-
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
 }
 
 static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 6f0a2ee..acc9f4c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c

@@ -83,9 +83,12 @@
 				if (port) {
 					struct br_mdb_entry e;
 					e.ifindex = port->dev->ifindex;
-					e.addr.u.ip4 = p->addr.u.ip4;
+					e.state = p->state;
+					if (p->addr.proto == htons(ETH_P_IP))
+						e.addr.u.ip4 = p->addr.u.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
-					e.addr.u.ip6 = p->addr.u.ip6;
+					if (p->addr.proto == htons(ETH_P_IPV6))
+						e.addr.u.ip6 = p->addr.u.ip6;
 #endif
 					e.addr.proto = p->addr.proto;
 					if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
@@ -253,6 +256,8 @@
 #endif
 	} else
 		return false;
+	if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
+		return false;
 
 	return true;
 }
@@ -310,7 +315,7 @@
 }
 
 static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
-			    struct br_ip *group)
+			    struct br_ip *group, unsigned char state)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
@@ -336,7 +341,7 @@
 			break;
 	}
 
-	p = br_multicast_new_port_group(port, group, *pp);
+	p = br_multicast_new_port_group(port, group, *pp, state);
 	if (unlikely(!p))
 		return -ENOMEM;
 	rcu_assign_pointer(*pp, p);
@@ -373,7 +378,7 @@
 #endif
 
 	spin_lock_bh(&br->multicast_lock);
-	ret = br_mdb_add_group(br, p, &ip);
+	ret = br_mdb_add_group(br, p, &ip, entry->state);
 	spin_unlock_bh(&br->multicast_lock);
 	return ret;
 }
@@ -479,3 +484,10 @@
 	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
 }
+
+void br_mdb_uninit(void)
+{
+	rtnl_unregister(PF_BRIDGE, RTM_GETMDB);
+	rtnl_unregister(PF_BRIDGE, RTM_NEWMDB);
+	rtnl_unregister(PF_BRIDGE, RTM_DELMDB);
+}

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 1093c89..5391ca4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c

@@ -279,7 +279,7 @@
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
-	    hlist_unhashed(&pg->mglist))
+	    hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT)
 		goto out;
 
 	br_multicast_del_pg(br, pg);
@@ -622,7 +622,8 @@
 struct net_bridge_port_group *br_multicast_new_port_group(
 			struct net_bridge_port *port,
 			struct br_ip *group,
-			struct net_bridge_port_group __rcu *next)
+			struct net_bridge_port_group __rcu *next,
+			unsigned char state)
 {
 	struct net_bridge_port_group *p;
 
@@ -632,6 +633,7 @@
 
 	p->addr = *group;
 	p->port = port;
+	p->state = state;
 	rcu_assign_pointer(p->next, next);
 	hlist_add_head(&p->mglist, &port->mglist);
 	setup_timer(&p->timer, br_multicast_port_group_expired,
@@ -674,7 +676,7 @@
 			break;
 	}
 
-	p = br_multicast_new_port_group(port, group, *pp);
+	p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY);
 	if (unlikely(!p))
 		goto err;
 	rcu_assign_pointer(*pp, p);
@@ -1165,7 +1167,6 @@
 		if (max_delay)
 			group = &mld->mld_mca;
 	} else if (skb->len >= sizeof(*mld2q)) {
-		u16 mrc;
 		if (!pskb_may_pull(skb, sizeof(*mld2q))) {
 			err = -EINVAL;
 			goto out;
@@ -1173,8 +1174,7 @@
 		mld2q = (struct mld2_query *)icmp6_hdr(skb);
 		if (!mld2q->mld2q_nsrcs)
 			group = &mld2q->mld2q_mca;
-		mrc = ntohs(mld2q->mld2q_mrc);
-		max_delay = mrc ? MLDV2_MRC(mrc) : 1;
+		max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1;
 	}
 
 	if (!group)
@@ -1633,6 +1633,7 @@
 	del_timer_sync(&br->multicast_querier_timer);
 	del_timer_sync(&br->multicast_query_timer);
 
+	br_mdb_uninit();
 	spin_lock_bh(&br->multicast_lock);
 	mdb = mlock_dereference(br->mdb, br);
 	if (!mdb)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index dead9df..97ba018 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c

@@ -305,5 +305,4 @@
 void __exit br_netlink_fini(void)
 {
 	rtnl_link_unregister(&br_link_ops);
-	rtnl_unregister_all(PF_BRIDGE);
 }

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f21a739..8d83be5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h

@@ -83,6 +83,7 @@
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct br_ip			addr;
+	unsigned char			state;
 };
 
 struct net_bridge_mdb_entry
@@ -443,8 +444,10 @@
 extern struct net_bridge_port_group *br_multicast_new_port_group(
 				struct net_bridge_port *port,
 				struct br_ip *group,
-				struct net_bridge_port_group *next);
+				struct net_bridge_port_group *next,
+				unsigned char state);
 extern void br_mdb_init(void);
+extern void br_mdb_uninit(void);
 extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 			  struct br_ip *group, int type);
 

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a802029..ee71ea2 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -305,7 +305,6 @@
 
 	/* start with defaults */
 	opt->flags = CEPH_OPT_DEFAULT;
-	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
@@ -391,7 +390,7 @@
 
 			/* misc */
 		case Opt_osdtimeout:
-			opt->osd_timeout = intval;
+			pr_warning("ignoring deprecated osdtimeout option\n");
 			break;
 		case Opt_osdkeepalivetimeout:
 			opt->osd_keepalive_timeout = intval;

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3ef1759..4d111fd 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c

@@ -2244,22 +2244,62 @@
 
 
 /*
- * Atomically queue work on a connection.  Bump @con reference to
- * avoid races with connection teardown.
+ * Atomically queue work on a connection after the specified delay.
+ * Bump @con reference to avoid races with connection teardown.
+ * Returns 0 if work was queued, or an error code otherwise.
  */
-static void queue_con(struct ceph_connection *con)
+static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
 {
 	if (!con->ops->get(con)) {
-		dout("queue_con %p ref count 0\n", con);
-		return;
+		dout("%s %p ref count 0\n", __func__, con);
+
+		return -ENOENT;
 	}
 
-	if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) {
-		dout("queue_con %p - already queued\n", con);
+	if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
+		dout("%s %p - already queued\n", __func__, con);
 		con->ops->put(con);
-	} else {
-		dout("queue_con %p\n", con);
+
+		return -EBUSY;
 	}
+
+	dout("%s %p %lu\n", __func__, con, delay);
+
+	return 0;
+}
+
+static void queue_con(struct ceph_connection *con)
+{
+	(void) queue_con_delay(con, 0);
+}
+
+static bool con_sock_closed(struct ceph_connection *con)
+{
+	if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags))
+		return false;
+
+#define CASE(x)								\
+	case CON_STATE_ ## x:						\
+		con->error_msg = "socket closed (con state " #x ")";	\
+		break;
+
+	switch (con->state) {
+	CASE(CLOSED);
+	CASE(PREOPEN);
+	CASE(CONNECTING);
+	CASE(NEGOTIATING);
+	CASE(OPEN);
+	CASE(STANDBY);
+	default:
+		pr_warning("%s con %p unrecognized state %lu\n",
+			__func__, con, con->state);
+		con->error_msg = "unrecognized con state";
+		BUG();
+		break;
+	}
+#undef CASE
+
+	return true;
 }
 
 /*
@@ -2273,35 +2313,16 @@
 
 	mutex_lock(&con->mutex);
 restart:
-	if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
-		switch (con->state) {
-		case CON_STATE_CONNECTING:
-			con->error_msg = "connection failed";
-			break;
-		case CON_STATE_NEGOTIATING:
-			con->error_msg = "negotiation failed";
-			break;
-		case CON_STATE_OPEN:
-			con->error_msg = "socket closed";
-			break;
-		default:
-			dout("unrecognized con state %d\n", (int)con->state);
-			con->error_msg = "unrecognized con state";
-			BUG();
-		}
+	if (con_sock_closed(con))
 		goto fault;
-	}
 
 	if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
 		dout("con_work %p backing off\n", con);
-		if (queue_delayed_work(ceph_msgr_wq, &con->work,
-				       round_jiffies_relative(con->delay))) {
-			dout("con_work %p backoff %lu\n", con, con->delay);
-			mutex_unlock(&con->mutex);
-			return;
-		} else {
+		ret = queue_con_delay(con, round_jiffies_relative(con->delay));
+		if (ret) {
 			dout("con_work %p FAILED to back off %lu\n", con,
 			     con->delay);
+			BUG_ON(ret == -ENOENT);
 			set_bit(CON_FLAG_BACKOFF, &con->flags);
 		}
 		goto done;
@@ -2356,7 +2377,7 @@
 static void ceph_fault(struct ceph_connection *con)
 	__releases(con->mutex)
 {
-	pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+	pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
 	       ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
 	dout("fault %p state %lu to peer %s\n",
 	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
@@ -2398,24 +2419,8 @@
 			con->delay = BASE_DELAY_INTERVAL;
 		else if (con->delay < MAX_DELAY_INTERVAL)
 			con->delay *= 2;
-		con->ops->get(con);
-		if (queue_delayed_work(ceph_msgr_wq, &con->work,
-				       round_jiffies_relative(con->delay))) {
-			dout("fault queued %p delay %lu\n", con, con->delay);
-		} else {
-			con->ops->put(con);
-			dout("fault failed to queue %p delay %lu, backoff\n",
-			     con, con->delay);
-			/*
-			 * In many cases we see a socket state change
-			 * while con_work is running and end up
-			 * queuing (non-delayed) work, such that we
-			 * can't backoff with a delay.  Set a flag so
-			 * that when con_work restarts we schedule the
-			 * delay then.
-			 */
-			set_bit(CON_FLAG_BACKOFF, &con->flags);
-		}
+		set_bit(CON_FLAG_BACKOFF, &con->flags);
+		queue_con(con);
 	}
 
 out_unlock:

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index c1d756c..780caf6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -221,6 +221,7 @@
 	kref_init(&req->r_kref);
 	init_completion(&req->r_completion);
 	init_completion(&req->r_safe_completion);
+	RB_CLEAR_NODE(&req->r_node);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 	INIT_LIST_HEAD(&req->r_linger_item);
 	INIT_LIST_HEAD(&req->r_linger_osd);
@@ -580,7 +581,7 @@
 
 	dout("__kick_osd_requests osd%d\n", osd->o_osd);
 	err = __reset_osd(osdc, osd);
-	if (err == -EAGAIN)
+	if (err)
 		return;
 
 	list_for_each_entry(req, &osd->o_requests, r_osd_item) {
@@ -607,14 +608,6 @@
 	}
 }
 
-static void kick_osd_requests(struct ceph_osd_client *osdc,
-			      struct ceph_osd *kickosd)
-{
-	mutex_lock(&osdc->request_mutex);
-	__kick_osd_requests(osdc, kickosd);
-	mutex_unlock(&osdc->request_mutex);
-}
-
 /*
  * If the osd connection drops, we need to resubmit all requests.
  */
@@ -628,7 +621,9 @@
 	dout("osd_reset osd%d\n", osd->o_osd);
 	osdc = osd->o_osdc;
 	down_read(&osdc->map_sem);
-	kick_osd_requests(osdc, osd);
+	mutex_lock(&osdc->request_mutex);
+	__kick_osd_requests(osdc, osd);
+	mutex_unlock(&osdc->request_mutex);
 	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
@@ -647,6 +642,7 @@
 	atomic_set(&osd->o_ref, 1);
 	osd->o_osdc = osdc;
 	osd->o_osd = onum;
+	RB_CLEAR_NODE(&osd->o_node);
 	INIT_LIST_HEAD(&osd->o_requests);
 	INIT_LIST_HEAD(&osd->o_linger_requests);
 	INIT_LIST_HEAD(&osd->o_osd_lru);
@@ -750,6 +746,7 @@
 	if (list_empty(&osd->o_requests) &&
 	    list_empty(&osd->o_linger_requests)) {
 		__remove_osd(osdc, osd);
+		ret = -ENODEV;
 	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
 			  &osd->o_con.peer_addr,
 			  sizeof(osd->o_con.peer_addr)) == 0 &&
@@ -876,9 +873,9 @@
 			req->r_osd = NULL;
 	}
 
+	list_del_init(&req->r_req_lru_item);
 	ceph_osdc_put_request(req);
 
-	list_del_init(&req->r_req_lru_item);
 	if (osdc->num_requests == 0) {
 		dout(" no requests, canceling timeout\n");
 		__cancel_osd_timeout(osdc);
@@ -910,8 +907,8 @@
 					struct ceph_osd_request *req)
 {
 	dout("__unregister_linger_request %p\n", req);
+	list_del_init(&req->r_linger_item);
 	if (req->r_osd) {
-		list_del_init(&req->r_linger_item);
 		list_del_init(&req->r_linger_osd);
 
 		if (list_empty(&req->r_osd->o_requests) &&
@@ -1090,12 +1087,10 @@
 {
 	struct ceph_osd_client *osdc =
 		container_of(work, struct ceph_osd_client, timeout_work.work);
-	struct ceph_osd_request *req, *last_req = NULL;
+	struct ceph_osd_request *req;
 	struct ceph_osd *osd;
-	unsigned long timeout = osdc->client->options->osd_timeout * HZ;
 	unsigned long keepalive =
 		osdc->client->options->osd_keepalive_timeout * HZ;
-	unsigned long last_stamp = 0;
 	struct list_head slow_osds;
 	dout("timeout\n");
 	down_read(&osdc->map_sem);
@@ -1105,37 +1100,6 @@
 	mutex_lock(&osdc->request_mutex);
 
 	/*
-	 * reset osds that appear to be _really_ unresponsive.  this
-	 * is a failsafe measure.. we really shouldn't be getting to
-	 * this point if the system is working properly.  the monitors
-	 * should mark the osd as failed and we should find out about
-	 * it from an updated osd map.
-	 */
-	while (timeout && !list_empty(&osdc->req_lru)) {
-		req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
-				 r_req_lru_item);
-
-		/* hasn't been long enough since we sent it? */
-		if (time_before(jiffies, req->r_stamp + timeout))
-			break;
-
-		/* hasn't been long enough since it was acked? */
-		if (req->r_request->ack_stamp == 0 ||
-		    time_before(jiffies, req->r_request->ack_stamp + timeout))
-			break;
-
-		BUG_ON(req == last_req && req->r_stamp == last_stamp);
-		last_req = req;
-		last_stamp = req->r_stamp;
-
-		osd = req->r_osd;
-		BUG_ON(!osd);
-		pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
-			   req->r_tid, osd->o_osd);
-		__kick_osd_requests(osdc, osd);
-	}
-
-	/*
 	 * ping osds that are a bit slow.  this ensures that if there
 	 * is a break in the TCP connection we will notice, and reopen
 	 * a connection with that osd (from the fault callback).
@@ -1364,8 +1328,8 @@
 
 		dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
 		     req->r_osd ? req->r_osd->o_osd : -1);
-		__unregister_linger_request(osdc, req);
 		__register_request(osdc, req);
+		__unregister_linger_request(osdc, req);
 	}
 	mutex_unlock(&osdc->request_mutex);
 
@@ -1599,6 +1563,7 @@
 	event->data = data;
 	event->osdc = osdc;
 	INIT_LIST_HEAD(&event->osd_node);
+	RB_CLEAR_NODE(&event->node);
 	kref_init(&event->kref);   /* one ref for us */
 	kref_get(&event->kref);    /* one ref for the caller */
 	init_completion(&event->completion);

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 5433fb0..de73214 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c

@@ -469,6 +469,22 @@
 	return NULL;
 }
 
+const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
+{
+	struct ceph_pg_pool_info *pi;
+
+	if (id == CEPH_NOPOOL)
+		return NULL;
+
+	if (WARN_ON_ONCE(id > (u64) INT_MAX))
+		return NULL;
+
+	pi = __lookup_pg_pool(&map->pg_pools, (int) id);
+
+	return pi ? pi->name : NULL;
+}
+EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
+
 int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
 {
 	struct rb_node *rbp;
@@ -645,10 +661,12 @@
 	ceph_decode_32_safe(p, end, max, bad);
 	while (max--) {
 		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
+		err = -ENOMEM;
 		pi = kzalloc(sizeof(*pi), GFP_NOFS);
 		if (!pi)
 			goto bad;
 		pi->id = ceph_decode_32(p);
+		err = -EINVAL;
 		ev = ceph_decode_8(p); /* encoding version */
 		if (ev > CEPH_PG_POOL_VERSION) {
 			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
@@ -664,8 +682,13 @@
 		__insert_pg_pool(&map->pg_pools, pi);
 	}
 
-	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
-		goto bad;
+	if (version >= 5) {
+		err = __decode_pool_names(p, end, map);
+		if (err < 0) {
+			dout("fail to decode pool names");
+			goto bad;
+		}
+	}
 
 	ceph_decode_32_safe(p, end, map->pool_max, bad);
 
@@ -745,7 +768,7 @@
 	return map;
 
 bad:
-	dout("osdmap_decode fail\n");
+	dout("osdmap_decode fail err %d\n", err);
 	ceph_osdmap_destroy(map);
 	return ERR_PTR(err);
 }
@@ -839,6 +862,7 @@
 		if (ev > CEPH_PG_POOL_VERSION) {
 			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
 				   ev, CEPH_PG_POOL_VERSION);
+			err = -EINVAL;
 			goto bad;
 		}
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
@@ -855,8 +879,11 @@
 		if (err < 0)
 			goto bad;
 	}
-	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
-		goto bad;
+	if (version >= 5) {
+		err = __decode_pool_names(p, end, map);
+		if (err < 0)
+			goto bad;
+	}
 
 	/* old_pool */
 	ceph_decode_32_safe(p, end, len, bad);
@@ -932,15 +959,13 @@
 			(void) __remove_pg_mapping(&map->pg_temp, pgid);
 
 			/* insert */
-			if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
-				err = -EINVAL;
+			err = -EINVAL;
+			if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
 				goto bad;
-			}
+			err = -ENOMEM;
 			pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
-			if (!pg) {
-				err = -ENOMEM;
+			if (!pg)
 				goto bad;
-			}
 			pg->pgid = pgid;
 			pg->len = pglen;
 			for (j = 0; j < pglen; j++)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6456439..8acce01 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c

@@ -381,6 +381,21 @@
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
+static __net_init int net_ns_net_init(struct net *net)
+{
+	return proc_alloc_inum(&net->proc_inum);
+}
+
+static __net_exit void net_ns_net_exit(struct net *net)
+{
+	proc_free_inum(net->proc_inum);
+}
+
+static struct pernet_operations __net_initdata net_ns_ops = {
+	.init = net_ns_net_init,
+	.exit = net_ns_net_exit,
+};
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -412,6 +427,8 @@
 
 	mutex_unlock(&net_mutex);
 
+	register_pernet_subsys(&net_ns_ops);
+
 	return 0;
 }
 
@@ -630,16 +647,29 @@
 
 static int netns_install(struct nsproxy *nsproxy, void *ns)
 {
+	struct net *net = ns;
+
+	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
+	    !nsown_capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	put_net(nsproxy->net_ns);
-	nsproxy->net_ns = get_net(ns);
+	nsproxy->net_ns = get_net(net);
 	return 0;
 }
 
+static unsigned int netns_inum(void *ns)
+{
+	struct net *net = ns;
+	return net->proc_inum;
+}
+
 const struct proc_ns_operations netns_operations = {
 	.name		= "net",
 	.type		= CLONE_NEWNET,
 	.get		= netns_get,
 	.put		= netns_put,
 	.install	= netns_install,
+	.inum		= netns_inum,
 };
 #endif

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 176ecdb..4f9f5eb 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c

@@ -439,8 +439,8 @@
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
 put_and_exit:
-	bh_unlock_sock(newsk);
-	sock_put(newsk);
+	inet_csk_prepare_forced_close(newsk);
+	dccp_done(newsk);
 	goto exit;
 }
 

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 56840b2..6e05981 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c

@@ -585,7 +585,8 @@
 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	if (__inet_inherit_port(sk, newsk) < 0) {
-		sock_put(newsk);
+		inet_csk_prepare_forced_close(newsk);
+		dccp_done(newsk);
 		goto out;
 	}
 	__inet6_hash(newsk, NULL);

diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8aa4b11..0a69d07 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c

@@ -259,20 +259,16 @@
 	if (!cred)
 		return -ENOMEM;
 
-	keyring = key_alloc(&key_type_keyring, ".dns_resolver",
-			    GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
-			    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-			    KEY_USR_VIEW | KEY_USR_READ,
-			    KEY_ALLOC_NOT_IN_QUOTA);
+	keyring = keyring_alloc(".dns_resolver",
+				GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
+				(KEY_POS_ALL & ~KEY_POS_SETATTR) |
+				KEY_USR_VIEW | KEY_USR_READ,
+				KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto failed_put_cred;
 	}
 
-	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
-	if (ret < 0)
-		goto failed_put_key;
-
 	ret = register_key_type(&key_type_dns_resolver);
 	if (ret < 0)
 		goto failed_put_key;
@@ -304,3 +300,4 @@
 module_init(init_dns_resolver)
 module_exit(exit_dns_resolver)
 MODULE_LICENSE("GPL");
+

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 2026542..d0670f0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c

@@ -710,6 +710,22 @@
 }
 EXPORT_SYMBOL(inet_csk_destroy_sock);
 
+/* This function allows to force a closure of a socket after the call to
+ * tcp/dccp_create_openreq_child().
+ */
+void inet_csk_prepare_forced_close(struct sock *sk)
+{
+	/* sk_clone_lock locked the socket and set refcnt to 2 */
+	bh_unlock_sock(sk);
+	sock_put(sk);
+
+	/* The below has to be done to allow calling inet_csk_destroy_sock */
+	sock_set_flag(sk, SOCK_DEAD);
+	percpu_counter_inc(sk->sk_prot->orphan_count);
+	inet_sk(sk)->inet_num = 0;
+}
+EXPORT_SYMBOL(inet_csk_prepare_forced_close);
+
 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 {
 	struct inet_sock *inet = inet_sk(sk);

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1ed2307..54139fa 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c

@@ -1767,10 +1767,8 @@
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
 put_and_exit:
-	tcp_clear_xmit_timers(newsk);
-	tcp_cleanup_congestion_control(newsk);
-	bh_unlock_sock(newsk);
-	sock_put(newsk);
+	inet_csk_prepare_forced_close(newsk);
+	tcp_done(newsk);
 	goto exit;
 }
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2068ac4..4ea2448 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile

@@ -41,6 +41,6 @@
 obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
 
 obj-y += addrconf_core.o exthdrs_core.o
-obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6_offload)
+obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
 
 obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6fca01f..408cac4a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -534,8 +534,7 @@
 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
 	return;
 errout:
-	if (err < 0)
-		rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
+	rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
 }
 
 static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f2a007b..6574175 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c

@@ -1314,6 +1314,12 @@
 
 static void ndisc_redirect_rcv(struct sk_buff *skb)
 {
+	u8 *hdr;
+	struct ndisc_options ndopts;
+	struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
+	u32 ndoptlen = skb->tail - (skb->transport_header +
+				    offsetof(struct rd_msg, opt));
+
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	switch (skb->ndisc_nodetype) {
 	case NDISC_NODETYPE_HOST:
@@ -1330,6 +1336,17 @@
 		return;
 	}
 
+	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
+		return;
+
+	if (!ndopts.nd_opts_rh)
+		return;
+
+	hdr = (u8 *)ndopts.nd_opts_rh;
+	hdr += 8;
+	if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
+		return;
+
 	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
 }
 

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6565cf5..93825dd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c

@@ -1288,7 +1288,8 @@
 #endif
 
 	if (__inet_inherit_port(sk, newsk) < 0) {
-		sock_put(newsk);
+		inet_csk_prepare_forced_close(newsk);
+		tcp_done(newsk);
 		goto out;
 	}
 	__inet6_hash(newsk, NULL);

diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index e748aed..b7c7f81 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c

@@ -224,9 +224,9 @@
 
 	BUG_ON(!list_empty(&priv->slaves));
 
-	wpan_phy_free(priv->phy);
-
 	mutex_destroy(&priv->slaves_mtx);
+
+	wpan_phy_free(priv->phy);
 }
 EXPORT_SYMBOL(ieee802154_free_device);
 

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c8a1eb6..c0353d5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c

@@ -669,6 +669,9 @@
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 	int err;
 
+	if (addr_len < sizeof(struct sockaddr_nl))
+		return -EINVAL;
+
 	if (nladdr->nl_family != AF_NETLINK)
 		return -EINVAL;
 
@@ -2059,7 +2062,7 @@
 		struct sock *s = v;
 		struct netlink_sock *nlk = nlk_sk(s);
 
-		seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
+		seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
 			   s,
 			   s->sk_protocol,
 			   nlk->portid,

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index a9edd2e..c262106 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig

@@ -66,12 +66,36 @@
 	  'cat /proc/net/sctp/sctp_dbg_objcnt'
 
 	  If unsure, say N
+choice
+	prompt "Default SCTP cookie HMAC encoding"
+	default SCTP_COOKIE_HMAC_MD5
+	help
+	  This option sets the default sctp cookie hmac algorithm
+	  when in doubt select 'md5'
+
+config SCTP_DEFAULT_COOKIE_HMAC_MD5
+	bool "Enable optional MD5 hmac cookie generation"
+	help
+	  Enable optional MD5 hmac based SCTP cookie generation
+	select SCTP_COOKIE_HMAC_MD5
+
+config SCTP_DEFAULT_COOKIE_HMAC_SHA1
+	bool "Enable optional SHA1 hmac cookie generation"
+	help
+	  Enable optional SHA1 hmac based SCTP cookie generation
+	select SCTP_COOKIE_HMAC_SHA1
+
+config SCTP_DEFAULT_COOKIE_HMAC_NONE
+	bool "Use no hmac alg in SCTP cookie generation"
+	help
+	  Use no hmac algorithm in SCTP cookie generation
+
+endchoice
 
 config SCTP_COOKIE_HMAC_MD5
 	bool "Enable optional MD5 hmac cookie generation"
 	help
 	  Enable optional MD5 hmac based SCTP cookie generation
-	default y
 	select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5
 	select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5
 
@@ -79,7 +103,6 @@
 	bool "Enable optional SHA1 hmac cookie generation"
 	help
 	  Enable optional SHA1 hmac based SCTP cookie generation
-	default y
 	select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1
 	select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1
 

diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index bc6cd75..5f7518d 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c

@@ -122,7 +122,8 @@
 	.llseek = noop_llseek,
 };
 
-sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
+sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
+				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
 				     void *arg,

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2c7785b..f898b1c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c

@@ -1191,9 +1191,9 @@
 	net->sctp.cookie_preserve_enable 	= 1;
 
 	/* Default sctp sockets to use md5 as their hmac alg */
-#if defined (CONFIG_CRYPTO_MD5)
+#if defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5)
 	net->sctp.sctp_hmac_alg			= "md5";
-#elif defined (CONFIG_CRYPTO_SHA1)
+#elif defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1)
 	net->sctp.sctp_hmac_alg			= "sha1";
 #else
 	net->sctp.sctp_hmac_alg			= NULL;

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 909dc0c..6e5c824 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c

@@ -192,17 +192,23 @@
 	const void *q;
 	unsigned int seclen;
 	unsigned int timeout;
+	unsigned long now = jiffies;
 	u32 window_size;
 	int ret;
 
-	/* First unsigned int gives the lifetime (in seconds) of the cred */
+	/* First unsigned int gives the remaining lifetime in seconds of the
+	 * credential - e.g. the remaining TGT lifetime for Kerberos or
+	 * the -t value passed to GSSD.
+	 */
 	p = simple_get_bytes(p, end, &timeout, sizeof(timeout));
 	if (IS_ERR(p))
 		goto err;
 	if (timeout == 0)
 		timeout = GSSD_MIN_TIMEOUT;
-	ctx->gc_expiry = jiffies + (unsigned long)timeout * HZ * 3 / 4;
-	/* Sequence number window. Determines the maximum number of simultaneous requests */
+	ctx->gc_expiry = now + ((unsigned long)timeout * HZ);
+	/* Sequence number window. Determines the maximum number of
+	 * simultaneous requests
+	 */
 	p = simple_get_bytes(p, end, &window_size, sizeof(window_size));
 	if (IS_ERR(p))
 		goto err;
@@ -237,9 +243,12 @@
 		p = ERR_PTR(ret);
 		goto err;
 	}
+	dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u\n",
+		__func__, ctx->gc_expiry, now, timeout);
 	return q;
 err:
-	dprintk("RPC:       %s returning %ld\n", __func__, -PTR_ERR(p));
+	dprintk("RPC:       %s returns %ld gc_expiry %lu now %lu timeout %u\n",
+		__func__, -PTR_ERR(p), ctx->gc_expiry, now, timeout);
 	return p;
 }
 

diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index a9c0bbc..890a299 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c

@@ -59,7 +59,7 @@
 	struct xdr_buf *xbufp;
 
 	dprintk("RPC:        free allocations for req= %p\n", req);
-	BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
+	WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
 	xbufp = &req->rq_private_buf;
 	free_page((unsigned long)xbufp->head[0].iov_base);
 	xbufp = &req->rq_snd_buf;
@@ -191,7 +191,9 @@
 
 	dprintk("RPC:        destroy backchannel transport\n");
 
-	BUG_ON(max_reqs == 0);
+	if (max_reqs == 0)
+		goto out;
+
 	spin_lock_bh(&xprt->bc_pa_lock);
 	xprt_dec_alloc_count(xprt, max_reqs);
 	list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
@@ -202,6 +204,7 @@
 	}
 	spin_unlock_bh(&xprt->bc_pa_lock);
 
+out:
 	dprintk("RPC:        backchannel list empty= %s\n",
 		list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
@@ -255,7 +258,7 @@
 	dprintk("RPC:       free backchannel req=%p\n", req);
 
 	smp_mb__before_clear_bit();
-	BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
+	WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
 	clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
 	smp_mb__after_clear_bit();
 

diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 0b2eb38..15c7a8a 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c

@@ -53,7 +53,7 @@
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
 	else {
-		BUG_ON(atomic_read(&task->tk_count) != 1);
+		WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
 		ret = task->tk_status;
 		rpc_put_task(task);
 	}

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index fc2f7aa..9afa439 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c

@@ -775,11 +775,11 @@
 	if (rp->q.list.next == &cd->queue) {
 		spin_unlock(&queue_lock);
 		mutex_unlock(&inode->i_mutex);
-		BUG_ON(rp->offset);
+		WARN_ON_ONCE(rp->offset);
 		return 0;
 	}
 	rq = container_of(rp->q.list.next, struct cache_request, q.list);
-	BUG_ON(rq->q.reader);
+	WARN_ON_ONCE(rq->q.reader);
 	if (rp->offset == 0)
 		rq->readers++;
 	spin_unlock(&queue_lock);

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cdc7564b..822f020 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c

@@ -132,8 +132,10 @@
 	int error;
 
 	dir = rpc_d_lookup_sb(sb, dir_name);
-	if (dir == NULL)
+	if (dir == NULL) {
+		pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name);
 		return dir;
+	}
 	for (;;) {
 		q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
 		name[sizeof(name) - 1] = '\0';
@@ -192,7 +194,8 @@
 	case RPC_PIPEFS_MOUNT:
 		dentry = rpc_setup_pipedir_sb(sb, clnt,
 					      clnt->cl_program->pipe_dir_name);
-		BUG_ON(dentry == NULL);
+		if (!dentry)
+			return -ENOENT;
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
 		clnt->cl_dentry = dentry;
@@ -234,7 +237,7 @@
 	spin_lock(&sn->rpc_client_lock);
 	list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
 		if (clnt->cl_program->pipe_dir_name == NULL)
-			break;
+			continue;
 		if (rpc_clnt_skip_event(clnt, event))
 			continue;
 		if (atomic_inc_not_zero(&clnt->cl_count) == 0)
@@ -607,6 +610,13 @@
  */
 void rpc_shutdown_client(struct rpc_clnt *clnt)
 {
+	/*
+	 * To avoid deadlock, never call rpc_shutdown_client from a
+	 * workqueue context!
+	 */
+	WARN_ON_ONCE(current->flags & PF_WQ_WORKER);
+	might_sleep();
+
 	dprintk_rcu("RPC:       shutting down %s client for %s\n",
 			clnt->cl_protname,
 			rcu_dereference(clnt->cl_xprt)->servername);
@@ -693,21 +703,19 @@
 				      const struct rpc_program *program,
 				      u32 vers)
 {
+	struct rpc_create_args args = {
+		.program	= program,
+		.prognumber	= program->number,
+		.version	= vers,
+		.authflavor	= old->cl_auth->au_flavor,
+		.client_name	= old->cl_principal,
+	};
 	struct rpc_clnt *clnt;
-	const struct rpc_version *version;
 	int err;
 
-	BUG_ON(vers >= program->nrvers || !program->version[vers]);
-	version = program->version[vers];
-	clnt = rpc_clone_client(old);
+	clnt = __rpc_clone_client(&args, old);
 	if (IS_ERR(clnt))
 		goto out;
-	clnt->cl_procinfo = version->procs;
-	clnt->cl_maxproc  = version->nrprocs;
-	clnt->cl_protname = program->name;
-	clnt->cl_prog     = program->number;
-	clnt->cl_vers     = version->number;
-	clnt->cl_stats    = program->stats;
 	err = rpc_ping(clnt);
 	if (err != 0) {
 		rpc_shutdown_client(clnt);
@@ -832,7 +840,12 @@
 	};
 	int status;
 
-	BUG_ON(flags & RPC_TASK_ASYNC);
+	WARN_ON_ONCE(flags & RPC_TASK_ASYNC);
+	if (flags & RPC_TASK_ASYNC) {
+		rpc_release_calldata(task_setup_data.callback_ops,
+			task_setup_data.callback_data);
+		return -EINVAL;
+	}
 
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
@@ -908,7 +921,7 @@
 
 	task->tk_action = call_bc_transmit;
 	atomic_inc(&task->tk_count);
-	BUG_ON(atomic_read(&task->tk_count) != 2);
+	WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
 	rpc_execute(task);
 
 out:
@@ -1368,6 +1381,7 @@
 		return;
 	case -ETIMEDOUT:
 		rpc_delay(task, 3*HZ);
+	case -EKEYEXPIRED:
 	case -EAGAIN:
 		status = -EACCES;
 		if (!task->tk_cred_retry)
@@ -1654,7 +1668,6 @@
 	task->tk_action = call_transmit_status;
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
 	if (rpc_task_need_encode(task)) {
-		BUG_ON(task->tk_rqstp->rq_bytes_sent != 0);
 		rpc_xdr_encode(task);
 		/* Did the encode result in an error condition? */
 		if (task->tk_status != 0) {
@@ -1738,7 +1751,6 @@
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 
-	BUG_ON(task->tk_status != 0);
 	task->tk_status = xprt_prepare_transmit(task);
 	if (task->tk_status == -EAGAIN) {
 		/*
@@ -1785,7 +1797,7 @@
 		 * We were unable to reply and will have to drop the
 		 * request.  The server should reconnect and retransmit.
 		 */
-		BUG_ON(task->tk_status == -EAGAIN);
+		WARN_ON_ONCE(task->tk_status == -EAGAIN);
 		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
 			"error: %d\n", task->tk_status);
 		break;

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 80f5dd2..fd10981 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c

@@ -1093,7 +1093,7 @@
 {
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
-	BUG_ON(sn->pipefs_sb == NULL);
+	WARN_ON(sn->pipefs_sb == NULL);
 	mutex_unlock(&sn->pipefs_sb_lock);
 }
 EXPORT_SYMBOL_GPL(rpc_put_sb_net);
@@ -1152,14 +1152,19 @@
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
 	mutex_lock(&sn->pipefs_sb_lock);
+	if (sn->pipefs_sb != sb) {
+		mutex_unlock(&sn->pipefs_sb_lock);
+		goto out;
+	}
 	sn->pipefs_sb = NULL;
 	mutex_unlock(&sn->pipefs_sb_lock);
-	put_net(net);
 	dprintk("RPC:       sending pipefs UMOUNT notification for net %p%s\n",
 		net, NET_NAME(net));
 	blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_UMOUNT,
 					   sb);
+	put_net(net);
+out:
 	kill_litter_super(sb);
 }
 

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index a70acae..795a0f4 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c

@@ -23,7 +23,6 @@
 #include <linux/errno.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/nsproxy.h>
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
@@ -884,7 +883,10 @@
 	u32 len;
 
 	len = strlen(string);
-	BUG_ON(len > maxstrlen);
+	WARN_ON_ONCE(len > maxstrlen);
+	if (len > maxstrlen)
+		/* truncate and hope for the best */
+		len = maxstrlen;
 	p = xdr_reserve_space(xdr, 4 + len);
 	xdr_encode_opaque(p, string, len);
 }

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6357fcb..d17a704 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c

@@ -98,6 +98,23 @@
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
+static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
+{
+	queue->priority = priority;
+}
+
+static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
+{
+	queue->owner = pid;
+	queue->nr = RPC_BATCH_COUNT;
+}
+
+static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
+{
+	rpc_set_waitqueue_priority(queue, queue->maxpriority);
+	rpc_set_waitqueue_owner(queue, 0);
+}
+
 /*
  * Add new request to a priority queue.
  */
@@ -109,9 +126,11 @@
 	struct rpc_task *t;
 
 	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	q = &queue->tasks[queue_priority];
 	if (unlikely(queue_priority > queue->maxpriority))
-		q = &queue->tasks[queue->maxpriority];
+		queue_priority = queue->maxpriority;
+	if (queue_priority > queue->priority)
+		rpc_set_waitqueue_priority(queue, queue_priority);
+	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
 			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
@@ -133,7 +152,9 @@
 		struct rpc_task *task,
 		unsigned char queue_priority)
 {
-	BUG_ON (RPC_IS_QUEUED(task));
+	WARN_ON_ONCE(RPC_IS_QUEUED(task));
+	if (RPC_IS_QUEUED(task))
+		return;
 
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_add_wait_queue_priority(queue, task, queue_priority);
@@ -178,24 +199,6 @@
 			task->tk_pid, queue, rpc_qname(queue));
 }
 
-static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
-{
-	queue->priority = priority;
-	queue->count = 1 << (priority * 2);
-}
-
-static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
-static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
-{
-	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
-}
-
 static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
 {
 	int i;
@@ -334,7 +337,7 @@
 
 	__rpc_add_wait_queue(q, task, queue_priority);
 
-	BUG_ON(task->tk_callback != NULL);
+	WARN_ON_ONCE(task->tk_callback != NULL);
 	task->tk_callback = action;
 	__rpc_add_timer(q, task);
 }
@@ -343,7 +346,12 @@
 				rpc_action action)
 {
 	/* We shouldn't ever put an inactive task to sleep */
-	BUG_ON(!RPC_IS_ACTIVATED(task));
+	WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
+	if (!RPC_IS_ACTIVATED(task)) {
+		task->tk_status = -EIO;
+		rpc_put_task_async(task);
+		return;
+	}
 
 	/*
 	 * Protect the queue operations.
@@ -358,7 +366,12 @@
 		rpc_action action, int priority)
 {
 	/* We shouldn't ever put an inactive task to sleep */
-	BUG_ON(!RPC_IS_ACTIVATED(task));
+	WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
+	if (!RPC_IS_ACTIVATED(task)) {
+		task->tk_status = -EIO;
+		rpc_put_task_async(task);
+		return;
+	}
 
 	/*
 	 * Protect the queue operations.
@@ -367,6 +380,7 @@
 	__rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
 	spin_unlock_bh(&q->lock);
 }
+EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
 
 /**
  * __rpc_do_wake_up_task - wake up a single rpc_task
@@ -451,8 +465,7 @@
 		/*
 		 * Check if we need to switch queues.
 		 */
-		if (--queue->count)
-			goto new_owner;
+		goto new_owner;
 	}
 
 	/*
@@ -697,7 +710,9 @@
 	dprintk("RPC: %5u __rpc_execute flags=0x%x\n",
 			task->tk_pid, task->tk_flags);
 
-	BUG_ON(RPC_IS_QUEUED(task));
+	WARN_ON_ONCE(RPC_IS_QUEUED(task));
+	if (RPC_IS_QUEUED(task))
+		return;
 
 	for (;;) {
 		void (*do_action)(struct rpc_task *);
@@ -981,7 +996,7 @@
 {
 	dprintk("RPC: %5u release task\n", task->tk_pid);
 
-	BUG_ON (RPC_IS_QUEUED(task));
+	WARN_ON_ONCE(RPC_IS_QUEUED(task));
 
 	rpc_release_resources_task(task);
 

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 3ee7461..dbf12ac 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c

@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
-#include <linux/nsproxy.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>
@@ -324,7 +323,9 @@
 	 * The caller checks for sv_nrpools > 1, which
 	 * implies that we've been initialized.
 	 */
-	BUG_ON(m->count == 0);
+	WARN_ON_ONCE(m->count == 0);
+	if (m->count == 0)
+		return;
 
 	switch (m->mode) {
 	case SVC_POOL_PERCPU:
@@ -585,7 +586,9 @@
 				       * We assume one is at most one page
 				       */
 	arghi = 0;
-	BUG_ON(pages > RPCSVC_MAXPAGES);
+	WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
+	if (pages > RPCSVC_MAXPAGES)
+		pages = RPCSVC_MAXPAGES;
 	while (pages) {
 		struct page *p = alloc_pages_node(node, GFP_KERNEL, 0);
 		if (!p)
@@ -946,7 +949,9 @@
 	unsigned int		i;
 	int			error = 0;
 
-	BUG_ON(proto == 0 && port == 0);
+	WARN_ON_ONCE(proto == 0 && port == 0);
+	if (proto == 0 && port == 0)
+		return -EINVAL;
 
 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 		for (i = 0; i < progp->pg_nvers; i++) {
@@ -1035,7 +1040,7 @@
 }
 
 /*
- * Printk the given error with the address of the client that caused it.
+ * dprintk the given error with the address of the client that caused it.
  */
 static __printf(2, 3)
 void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
@@ -1049,8 +1054,7 @@
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	net_warn_ratelimited("svc: %s: %pV",
-			     svc_print_addr(rqstp, buf, sizeof(buf)), &vaf);
+	dprintk("svc: %s: %pV", svc_print_addr(rqstp, buf, sizeof(buf)), &vaf);
 
 	va_end(args);
 }
@@ -1299,7 +1303,7 @@
 	 * Setup response xdr_buf.
 	 * Initially it has just one page
 	 */
-	rqstp->rq_resused = 1;
+	rqstp->rq_next_page = &rqstp->rq_respages[1];
 	resv->iov_base = page_address(rqstp->rq_respages[0]);
 	resv->iov_len = 0;
 	rqstp->rq_res.pages = rqstp->rq_respages + 1;

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 194d865..b8e47fa 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c

@@ -218,7 +218,9 @@
  */
 static void svc_xprt_received(struct svc_xprt *xprt)
 {
-	BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
+	WARN_ON_ONCE(!test_bit(XPT_BUSY, &xprt->xpt_flags));
+	if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
+		return;
 	/* As soon as we clear busy, the xprt could be closed and
 	 * 'put', so we need a reference to call svc_xprt_enqueue with:
 	 */
@@ -577,7 +579,10 @@
 
 	/* now allocate needed pages.  If we get a failure, sleep briefly */
 	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
-	BUG_ON(pages >= RPCSVC_MAXPAGES);
+	WARN_ON_ONCE(pages >= RPCSVC_MAXPAGES);
+	if (pages >= RPCSVC_MAXPAGES)
+		/* use as many pages as possible */
+		pages = RPCSVC_MAXPAGES - 1;
 	for (i = 0; i < pages ; i++)
 		while (rqstp->rq_pages[i] == NULL) {
 			struct page *p = alloc_page(GFP_KERNEL);
@@ -926,7 +931,7 @@
 	spin_lock_bh(&serv->sv_lock);
 	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
 		list_del_init(&xprt->xpt_list);
-	BUG_ON(!list_empty(&xprt->xpt_ready));
+	WARN_ON_ONCE(!list_empty(&xprt->xpt_ready));
 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
 		serv->sv_tmpcnt--;
 	spin_unlock_bh(&serv->sv_lock);

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 03827ce..0a148c9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c

@@ -84,7 +84,11 @@
 static void svc_reclassify_socket(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
-	BUG_ON(sock_owned_by_user(sk));
+
+	WARN_ON_ONCE(sock_owned_by_user(sk));
+	if (sock_owned_by_user(sk))
+		return;
+
 	switch (sk->sk_family) {
 	case AF_INET:
 		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
@@ -601,6 +605,7 @@
 		rqstp->rq_respages = rqstp->rq_pages + 1 +
 			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 	}
+	rqstp->rq_next_page = rqstp->rq_respages+1;
 
 	if (serv->sv_stats)
 		serv->sv_stats->netudpcnt++;
@@ -874,9 +879,9 @@
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return 0;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		if (rqstp->rq_pages[i] != NULL)
@@ -893,9 +898,9 @@
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		svsk->sk_pages[i] = rqstp->rq_pages[i];
@@ -907,9 +912,9 @@
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		goto out;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		BUG_ON(svsk->sk_pages[i] == NULL);
@@ -918,13 +923,12 @@
 	}
 out:
 	svsk->sk_tcplen = 0;
+	svsk->sk_datalen = 0;
 }
 
 /*
- * Receive data.
+ * Receive fragment record header.
  * If we haven't gotten the record length yet, get the next four bytes.
- * Otherwise try to gobble up as much as possible up to the complete
- * record length.
  */
 static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
@@ -950,32 +954,16 @@
 			return -EAGAIN;
 		}
 
-		svsk->sk_reclen = ntohl(svsk->sk_reclen);
-		if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) {
-			/* FIXME: technically, a record can be fragmented,
-			 *  and non-terminal fragments will not have the top
-			 *  bit set in the fragment length header.
-			 *  But apparently no known nfs clients send fragmented
-			 *  records. */
-			net_notice_ratelimited("RPC: multiple fragments per record not supported\n");
-			goto err_delete;
-		}
-
-		svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
-		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
-		if (svsk->sk_reclen > serv->sv_max_mesg) {
-			net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n",
-					       (unsigned long)svsk->sk_reclen);
+		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
+		if (svc_sock_reclen(svsk) + svsk->sk_datalen >
+							serv->sv_max_mesg) {
+			net_notice_ratelimited("RPC: fragment too large: %d\n",
+					svc_sock_reclen(svsk));
 			goto err_delete;
 		}
 	}
 
-	if (svsk->sk_reclen < 8)
-		goto err_delete; /* client is nuts. */
-
-	len = svsk->sk_reclen;
-
-	return len;
+	return svc_sock_reclen(svsk);
 error:
 	dprintk("RPC: TCP recv_record got %d\n", len);
 	return len;
@@ -1019,7 +1007,7 @@
 	if (dst->iov_len < src->iov_len)
 		return -EAGAIN; /* whatever; just giving up. */
 	memcpy(dst->iov_base, src->iov_base, src->iov_len);
-	xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
+	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 	rqstp->rq_arg.len = 0;
 	return 0;
 }
@@ -1038,6 +1026,17 @@
 	return i;
 }
 
+static void svc_tcp_fragment_received(struct svc_sock *svsk)
+{
+	/* If we have more data, signal svc_xprt_enqueue() to try again */
+	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
+		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+	dprintk("svc: TCP %s record (%d bytes)\n",
+		svc_sock_final_rec(svsk) ? "final" : "nonfinal",
+		svc_sock_reclen(svsk));
+	svsk->sk_tcplen = 0;
+	svsk->sk_reclen = 0;
+}
 
 /*
  * Receive data from a TCP socket.
@@ -1064,29 +1063,39 @@
 		goto error;
 
 	base = svc_tcp_restore_pages(svsk, rqstp);
-	want = svsk->sk_reclen - base;
+	want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
 
 	vec = rqstp->rq_vec;
 
 	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
-						svsk->sk_reclen);
+						svsk->sk_datalen + want);
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
 	/* Now receive data */
 	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
-	if (len >= 0)
+	if (len >= 0) {
 		svsk->sk_tcplen += len;
-	if (len != want) {
+		svsk->sk_datalen += len;
+	}
+	if (len != want || !svc_sock_final_rec(svsk)) {
 		svc_tcp_save_pages(svsk, rqstp);
 		if (len < 0 && len != -EAGAIN)
-			goto err_other;
-		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			svsk->sk_tcplen, svsk->sk_reclen);
+			goto err_delete;
+		if (len == want)
+			svc_tcp_fragment_received(svsk);
+		else
+			dprintk("svc: incomplete TCP record (%d of %d)\n",
+				(int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)),
+				svc_sock_reclen(svsk));
 		goto err_noclose;
 	}
 
-	rqstp->rq_arg.len = svsk->sk_reclen;
+	if (svc_sock_reclen(svsk) < 8)
+		goto err_delete; /* client is nuts. */
+
+	rqstp->rq_arg.len = svsk->sk_datalen;
 	rqstp->rq_arg.page_base = 0;
 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
 		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
@@ -1103,11 +1112,8 @@
 		len = receive_cb_reply(svsk, rqstp);
 
 	/* Reset TCP read info */
-	svsk->sk_reclen = 0;
-	svsk->sk_tcplen = 0;
-	/* If we have more data, signal svc_xprt_enqueue() to try again */
-	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
-		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+	svsk->sk_datalen = 0;
+	svc_tcp_fragment_received(svsk);
 
 	if (len < 0)
 		goto error;
@@ -1116,15 +1122,14 @@
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
 
-	dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
 	return rqstp->rq_arg.len;
 
 error:
 	if (len != -EAGAIN)
-		goto err_other;
+		goto err_delete;
 	dprintk("RPC: TCP recvfrom got EAGAIN\n");
 	return 0;
-err_other:
+err_delete:
 	printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
 	       svsk->sk_xprt.xpt_server->sv_name, -len);
 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1301,6 +1306,7 @@
 
 		svsk->sk_reclen = 0;
 		svsk->sk_tcplen = 0;
+		svsk->sk_datalen = 0;
 		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 08f50af..5605563 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c

@@ -318,7 +318,10 @@
 
 	tail = buf->tail;
 	head = buf->head;
-	BUG_ON (len > head->iov_len);
+
+	WARN_ON_ONCE(len > head->iov_len);
+	if (len > head->iov_len)
+		len = head->iov_len;
 
 	/* Shift the tail first */
 	if (tail->iov_len != 0) {

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 41cb63b..0ce7552 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

@@ -521,11 +521,11 @@
 		rqstp->rq_pages[ch_no] = NULL;
 
 	/*
-	 * Detach res pages. svc_release must see a resused count of
-	 * zero or it will attempt to put them.
+	 * Detach res pages. If svc_release sees any it will attempt to
+	 * put them.
 	 */
-	while (rqstp->rq_resused)
-		rqstp->rq_respages[--rqstp->rq_resused] = NULL;
+	while (rqstp->rq_next_page != rqstp->rq_respages)
+		*(--rqstp->rq_next_page) = NULL;
 
 	return err;
 }
@@ -550,7 +550,7 @@
 
 	/* rq_respages starts after the last arg page */
 	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-	rqstp->rq_resused = 0;
+	rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
 
 	/* Rebuild rq_arg head and tail. */
 	rqstp->rq_arg.head[0] = head->arg.head[0];

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 42eb7ba..c1d124d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c

@@ -548,6 +548,7 @@
 	int sge_no;
 	int sge_bytes;
 	int page_no;
+	int pages;
 	int ret;
 
 	/* Post a recv buffer to handle another request. */
@@ -611,7 +612,8 @@
 	 * respages array. They are our pages until the I/O
 	 * completes.
 	 */
-	for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
+	pages = rqstp->rq_next_page - rqstp->rq_respages;
+	for (page_no = 0; page_no < pages; page_no++) {
 		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
 		ctxt->count++;
 		rqstp->rq_respages[page_no] = NULL;

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 75853ca..68b0a81 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c

@@ -1746,7 +1746,6 @@
 {
 	struct sock *sk = sock->sk;
 
-	BUG_ON(sock_owned_by_user(sk));
 	sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
 		&xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
 }
@@ -1755,7 +1754,6 @@
 {
 	struct sock *sk = sock->sk;
 
-	BUG_ON(sock_owned_by_user(sk));
 	sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
 		&xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
 }
@@ -1764,13 +1762,16 @@
 {
 	struct sock *sk = sock->sk;
 
-	BUG_ON(sock_owned_by_user(sk));
 	sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
 		&xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
 }
 
 static inline void xs_reclassify_socket(int family, struct socket *sock)
 {
+	WARN_ON_ONCE(sock_owned_by_user(sock->sk));
+	if (sock_owned_by_user(sock->sk))
+		return;
+
 	switch (family) {
 	case AF_LOCAL:
 		xs_reclassify_socketu(sock);
@@ -1901,6 +1902,10 @@
 		dprintk("RPC:       xprt %p: socket %s does not exist\n",
 				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
 		break;
+	case -ECONNREFUSED:
+		dprintk("RPC:       xprt %p: connection refused for %s\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		break;
 	default:
 		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
 				__func__, -status,
@@ -2329,9 +2334,11 @@
 	struct page *page;
 	struct rpc_buffer *buf;
 
-	BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
-	page = alloc_page(GFP_KERNEL);
+	WARN_ON_ONCE(size > PAGE_SIZE - sizeof(struct rpc_buffer));
+	if (size > PAGE_SIZE - sizeof(struct rpc_buffer))
+		return NULL;
 
+	page = alloc_page(GFP_KERNEL);
 	if (!page)
 		return NULL;
 
@@ -2393,7 +2400,6 @@
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct svc_xprt	*xprt;
-	struct svc_sock         *svsk;
 	u32                     len;
 
 	dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
@@ -2401,7 +2407,6 @@
 	 * Get the server socket associated with this callback xprt
 	 */
 	xprt = req->rq_xprt->bc_xprt;
-	svsk = container_of(xprt, struct svc_sock, sk_xprt);
 
 	/*
 	 * Grab the mutex to serialize data as the connection is shared

diff --git a/scripts/Makefile.modsign b/scripts/Makefile.modsign
new file mode 100644
index 0000000..abfda62
--- /dev/null
+++ b/scripts/Makefile.modsign

@@ -0,0 +1,32 @@
+# ==========================================================================
+# Signing modules
+# ==========================================================================
+
+PHONY := __modsign
+__modsign:
+
+include scripts/Kbuild.include
+
+__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
+modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o)))
+
+PHONY += $(modules)
+__modsign: $(modules)
+	@:
+
+quiet_cmd_sign_ko = SIGN [M] $(2)/$(notdir $@)
+        cmd_sign_ko = $(mod_sign_cmd) $(2)/$(notdir $@)
+
+# Modules built outside the kernel source tree go into extra by default
+INSTALL_MOD_DIR ?= extra
+ext-mod-dir = $(INSTALL_MOD_DIR)$(subst $(patsubst %/,%,$(KBUILD_EXTMOD)),,$(@D))
+
+modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
+
+$(modules):
+	$(call cmd,sign_ko,$(MODLIB)/$(modinst_dir))
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index f18750e..4d2c7df 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl

@@ -33,6 +33,7 @@
 my @ignore = ();
 my $help = 0;
 my $configuration_file = ".checkpatch.conf";
+my $max_line_length = 80;
 
 sub help {
 	my ($exitcode) = @_;
@@ -51,6 +52,7 @@
   -f, --file                 treat FILE as regular source file
   --subjective, --strict     enable more subjective tests
   --ignore TYPE(,TYPE2...)   ignore various comma separated message types
+  --max-line-length=n        set the maximum line length, if exceeded, warn
   --show-types               show the message "types" in the output
   --root=PATH                PATH to the kernel tree root
   --no-summary               suppress the per-file summary
@@ -107,6 +109,7 @@
 	'strict!'	=> \$check,
 	'ignore=s'	=> \@ignore,
 	'show-types!'	=> \$show_types,
+	'max-line-length=i' => \$max_line_length,
 	'root=s'	=> \$root,
 	'summary!'	=> \$summary,
 	'mailback!'	=> \$mailback,
@@ -227,7 +230,11 @@
 our $Member	= qr{->$Ident|\.$Ident|\[[^]]*\]};
 our $Lval	= qr{$Ident(?:$Member)*};
 
-our $Constant	= qr{(?i:(?:[0-9]+|0x[0-9a-f]+)[ul]*)};
+our $Float_hex	= qr{(?i:0x[0-9a-f]+p-?[0-9]+[fl]?)};
+our $Float_dec	= qr{(?i:((?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?))};
+our $Float_int	= qr{(?i:[0-9]+e-?[0-9]+[fl]?)};
+our $Float	= qr{$Float_hex|$Float_dec|$Float_int};
+our $Constant	= qr{(?:$Float|(?i:(?:0x[0-9a-f]+|[0-9]+)[ul]*))};
 our $Assignment	= qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)};
 our $Compare    = qr{<=|>=|==|!=|<|>};
 our $Operators	= qr{
@@ -352,27 +359,6 @@
 
 $chk_signoff = 0 if ($file);
 
-my @dep_includes = ();
-my @dep_functions = ();
-my $removal = "Documentation/feature-removal-schedule.txt";
-if ($tree && -f "$root/$removal") {
-	open(my $REMOVE, '<', "$root/$removal") ||
-				die "$P: $removal: open failed - $!\n";
-	while (<$REMOVE>) {
-		if (/^Check:\s+(.*\S)/) {
-			for my $entry (split(/[, ]+/, $1)) {
-				if ($entry =~ m@include/(.*)@) {
-					push(@dep_includes, $1);
-
-				} elsif ($entry !~ m@/@) {
-					push(@dep_functions, $entry);
-				}
-			}
-		}
-	}
-	close($REMOVE);
-}
-
 my @rawlines = ();
 my @lines = ();
 my $vname;
@@ -1412,6 +1398,8 @@
 	my %suppress_export;
 	my $suppress_statement = 0;
 
+	my %camelcase = ();
+
 	# Pre-scan the patch sanitizing the lines.
 	# Pre-scan the patch looking for any __setup documentation.
 	#
@@ -1757,6 +1745,13 @@
 			#print "is_start<$is_start> is_end<$is_end> length<$length>\n";
 		}
 
+# discourage the addition of CONFIG_EXPERIMENTAL in Kconfig.
+		if ($realfile =~ /Kconfig/ &&
+		    $line =~ /.\s*depends on\s+.*\bEXPERIMENTAL\b/) {
+			WARN("CONFIG_EXPERIMENTAL",
+			     "Use of CONFIG_EXPERIMENTAL is deprecated. For alternatives, see https://lkml.org/lkml/2012/10/23/580\n");
+		}
+
 		if (($realfile =~ /Makefile.*/ || $realfile =~ /Kbuild.*/) &&
 		    ($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) {
 			my $flag = $1;
@@ -1774,15 +1769,15 @@
 # check we are in a valid source file if not then ignore this hunk
 		next if ($realfile !~ /\.(h|c|s|S|pl|sh)$/);
 
-#80 column limit
+#line length limit
 		if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ &&
 		    $rawline !~ /^.\s*\*\s*\@$Ident\s/ &&
 		    !($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(KERN_\S+\s*|[^"]*))?"[X\t]*"\s*(?:|,|\)\s*;)\s*$/ ||
 		    $line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) &&
-		    $length > 80)
+		    $length > $max_line_length)
 		{
 			WARN("LONG_LINE",
-			     "line over 80 characters\n" . $herecurr);
+			     "line over $max_line_length characters\n" . $herecurr);
 		}
 
 # Check for user-visible strings broken across lines, which breaks the ability
@@ -1912,6 +1907,12 @@
 # check we are in a valid C source file if not then ignore this hunk
 		next if ($realfile !~ /\.(h|c)$/);
 
+# discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
+		if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
+			WARN("CONFIG_EXPERIMENTAL",
+			     "Use of CONFIG_EXPERIMENTAL is deprecated. For alternatives, see https://lkml.org/lkml/2012/10/23/580\n");
+		}
+
 # check for RCS/CVS revision markers
 		if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) {
 			WARN("CVS_KEYWORD",
@@ -2225,8 +2226,11 @@
 			my $path = $1;
 			if ($path =~ m{//}) {
 				ERROR("MALFORMED_INCLUDE",
-				      "malformed #include filename\n" .
-					$herecurr);
+				      "malformed #include filename\n" . $herecurr);
+			}
+			if ($path =~ "^uapi/" && $realfile =~ m@\binclude/uapi/@) {
+				ERROR("UAPI_INCLUDE",
+				      "No #include in ...include/uapi/... should use a uapi/ path prefix\n" . $herecurr);
 			}
 		}
 
@@ -2906,12 +2910,17 @@
 			}
 		}
 
-#studly caps, commented out until figure out how to distinguish between use of existing and adding new
-#		if (($line=~/[\w_][a-z\d]+[A-Z]/) and !($line=~/print/)) {
-#		    print "No studly caps, use _\n";
-#		    print "$herecurr";
-#		    $clean = 0;
-#		}
+#CamelCase
+		while ($line =~ m{($Constant|$Lval)}g) {
+			my $var = $1;
+			if ($var !~ /$Constant/ &&
+			    $var =~ /[A-Z]\w*[a-z]|[a-z]\w*[A-Z]/ &&
+			    !defined $camelcase{$var}) {
+				$camelcase{$var} = 1;
+				WARN("CAMELCASE",
+				     "Avoid CamelCase: <$var>\n" . $herecurr);
+			}
+		}
 
 #no spaces allowed after \ in define
 		if ($line=~/\#\s*define.*\\\s$/) {
@@ -3013,6 +3022,17 @@
 					      "Macros with complex values should be enclosed in parenthesis\n" . "$herectx");
 				}
 			}
+
+# check for line continuations outside of #defines, preprocessor #, and asm
+
+		} else {
+			if ($prevline !~ /^..*\\$/ &&
+			    $line !~ /^\+\s*\#.*\\$/ &&		# preprocessor
+			    $line !~ /^\+.*\b(__asm__|asm)\b.*\\$/ &&	# asm
+			    $line =~ /^\+.*\\$/) {
+				WARN("LINE_CONTINUATIONS",
+				     "Avoid unnecessary line continuations\n" . $herecurr);
+			}
 		}
 
 # do {} while (0) macro tests:
@@ -3183,20 +3203,14 @@
 			}
 		}
 
-# don't include deprecated include files (uses RAW line)
-		for my $inc (@dep_includes) {
-			if ($rawline =~ m@^.\s*\#\s*include\s*\<$inc>@) {
-				ERROR("DEPRECATED_INCLUDE",
-				      "Don't use <$inc>: see Documentation/feature-removal-schedule.txt\n" . $herecurr);
-			}
+# check for unnecessary blank lines around braces
+		if (($line =~ /^..*}\s*$/ && $prevline =~ /^.\s*$/)) {
+			CHK("BRACES",
+			    "Blank lines aren't necessary before a close brace '}'\n" . $hereprev);
 		}
-
-# don't use deprecated functions
-		for my $func (@dep_functions) {
-			if ($line =~ /\b$func\b/) {
-				ERROR("DEPRECATED_FUNCTION",
-				      "Don't use $func(): see Documentation/feature-removal-schedule.txt\n" . $herecurr);
-			}
+		if (($line =~ /^.\s*$/ && $prevline =~ /^..*{\s*$/)) {
+			CHK("BRACES",
+			    "Blank lines aren't necessary after an open brace '{'\n" . $hereprev);
 		}
 
 # no volatiles please
@@ -3213,20 +3227,12 @@
 				$herecurr);
 		}
 
-# check for needless kfree() checks
-		if ($prevline =~ /\bif\s*\(([^\)]*)\)/) {
-			my $expr = $1;
-			if ($line =~ /\bkfree\(\Q$expr\E\);/) {
-				WARN("NEEDLESS_KFREE",
-				     "kfree(NULL) is safe this check is probably not required\n" . $hereprev);
-			}
-		}
-# check for needless usb_free_urb() checks
-		if ($prevline =~ /\bif\s*\(([^\)]*)\)/) {
-			my $expr = $1;
-			if ($line =~ /\busb_free_urb\(\Q$expr\E\);/) {
-				WARN("NEEDLESS_USB_FREE_URB",
-				     "usb_free_urb(NULL) is safe this check is probably not required\n" . $hereprev);
+# check for needless "if (<foo>) fn(<foo>)" uses
+		if ($prevline =~ /\bif\s*\(\s*($Lval)\s*\)/) {
+			my $expr = '\s*\(\s*' . quotemeta($1) . '\s*\)\s*;';
+			if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?)$expr/) {
+				WARN('NEEDLESS_IF',
+				     "$1(NULL) is safe this check is probably not required\n" . $hereprev);
 			}
 		}
 
@@ -3344,6 +3350,12 @@
 			     "Avoid line continuations in quoted strings\n" . $herecurr);
 		}
 
+# check for struct spinlock declarations
+		if ($line =~ /^.\s*\bstruct\s+spinlock\s+\w+\s*;/) {
+			WARN("USE_SPINLOCK_T",
+			     "struct spinlock should be spinlock_t\n" . $herecurr);
+		}
+
 # Check for misused memsets
 		if ($^V && $^V ge 5.10.0 &&
 		    defined $stat &&
@@ -3450,8 +3462,22 @@
 
 # check for multiple semicolons
 		if ($line =~ /;\s*;\s*$/) {
-		    WARN("ONE_SEMICOLON",
-			 "Statements terminations use 1 semicolon\n" . $herecurr);
+			WARN("ONE_SEMICOLON",
+			     "Statements terminations use 1 semicolon\n" . $herecurr);
+		}
+
+# check for switch/default statements without a break;
+		if ($^V && $^V ge 5.10.0 &&
+		    defined $stat &&
+		    $stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
+			my $ctx = '';
+			my $herectx = $here . "\n";
+			my $cnt = statement_rawlines($stat);
+			for (my $n = 0; $n < $cnt; $n++) {
+				$herectx .= raw_line($linenr, $n) . "\n";
+			}
+			WARN("DEFAULT_NO_BREAK",
+			     "switch default: should use break\n" . $herectx);
 		}
 
 # check for gcc specific __FUNCTION__

diff --git a/scripts/coccinelle/api/d_find_alias.cocci b/scripts/coccinelle/api/d_find_alias.cocci
new file mode 100644
index 0000000..a9694a8
--- /dev/null
+++ b/scripts/coccinelle/api/d_find_alias.cocci

@@ -0,0 +1,80 @@
+/// Make sure calls to d_find_alias() have a corresponding call to dput().
+//
+// Keywords: d_find_alias, dput
+//
+// Confidence: Moderate
+// URL: http://coccinelle.lip6.fr/
+// Options: -include_headers
+
+virtual context
+virtual org
+virtual patch
+virtual report
+
+@r exists@
+local idexpression struct dentry *dent;
+expression E, E1;
+statement S1, S2;
+position p1, p2;
+@@
+(
+	if (!(dent@p1 = d_find_alias(...))) S1
+|
+	dent@p1 = d_find_alias(...)
+)
+
+<...when != dput(dent)
+    when != if (...) { <+... dput(dent) ...+> }
+    when != true !dent || ...
+    when != dent = E
+    when != E = dent
+if (!dent || ...) S2
+...>
+(
+	return <+...dent...+>;
+|
+	return @p2 ...;
+|
+	dent@p2 = E1;
+|
+	E1 = dent;
+)
+
+@depends on context@
+local idexpression struct dentry *r.dent;
+position r.p1,r.p2;
+@@
+* dent@p1 = ...
+  ...
+(
+* return@p2 ...;
+|
+* dent@p2
+)
+
+
+@script:python depends on org@
+p1 << r.p1;
+p2 << r.p2;
+@@
+cocci.print_main("Missing call to dput()",p1)
+cocci.print_secs("",p2)
+
+@depends on patch@
+local idexpression struct dentry *r.dent;
+position r.p2;
+@@
+(
++ dput(dent);
+  return @p2 ...;
+|
++ dput(dent);
+  dent@p2 = ...;
+)
+
+@script:python depends on report@
+p1 << r.p1;
+p2 << r.p2;
+@@
+msg = "Missing call to dput() at line %s."
+coccilib.report.print_report(p1[0], msg % (p2[0].line))

diff --git a/scripts/coccinelle/misc/warn.cocci b/scripts/coccinelle/misc/warn.cocci
new file mode 100644
index 0000000..fda8c35
--- /dev/null
+++ b/scripts/coccinelle/misc/warn.cocci

@@ -0,0 +1,109 @@
+/// Use WARN(1,...) rather than printk followed by WARN_ON(1)
+///
+// Confidence: High
+// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6.  GPLv2.
+// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6.  GPLv2.
+// URL: http://coccinelle.lip6.fr/
+// Comments:
+// Options: -no_includes -include_headers
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+@bad1@
+position p;
+@@
+
+printk(...);
+printk@p(...);
+WARN_ON(1);
+
+@r1 depends on context || report || org@
+position p != bad1.p;
+@@
+
+ printk@p(...);
+*WARN_ON(1);
+
+@script:python depends on org@
+p << r1.p;
+@@
+
+cocci.print_main("printk + WARN_ON can be just WARN",p)
+
+@script:python depends on report@
+p << r1.p;
+@@
+
+msg = "SUGGESTION: printk + WARN_ON can be just WARN"
+coccilib.report.print_report(p[0],msg)
+
+@ok1 depends on patch@
+expression list es;
+position p != bad1.p;
+@@
+
+-printk@p(
++WARN(1,
+  es);
+-WARN_ON(1);
+
+@depends on patch@
+expression list ok1.es;
+@@
+
+if (...)
+- {
+  WARN(1,es);
+- }
+
+// --------------------------------------------------------------------
+
+@bad2@
+position p;
+@@
+
+printk(...);
+printk@p(...);
+WARN_ON_ONCE(1);
+
+@r2 depends on context || report || org@
+position p != bad1.p;
+@@
+
+ printk@p(...);
+*WARN_ON_ONCE(1);
+
+@script:python depends on org@
+p << r2.p;
+@@
+
+cocci.print_main("printk + WARN_ON_ONCE can be just WARN_ONCE",p)
+
+@script:python depends on report@
+p << r2.p;
+@@
+
+msg = "SUGGESTION: printk + WARN_ON_ONCE can be just WARN_ONCE"
+coccilib.report.print_report(p[0],msg)
+
+@ok2 depends on patch@
+expression list es;
+position p != bad2.p;
+@@
+
+-printk@p(
++WARN_ONCE(1,
+  es);
+-WARN_ON_ONCE(1);
+
+@depends on patch@
+expression list ok2.es;
+@@
+
+if (...)
+- {
+  WARN_ONCE(1,es);
+- }

diff --git a/scripts/config b/scripts/config
index ee35539..bb4d3de 100755
--- a/scripts/config
+++ b/scripts/config

@@ -101,7 +101,6 @@
 	case "$CMD" in
 	--keep-case|-k)
 		MUNGE_CASE=no
-		shift
 		continue
 		;;
 	--refresh)

diff --git a/scripts/pnmtologo.c b/scripts/pnmtologo.c
index 5c11312..68bb4ef 100644
--- a/scripts/pnmtologo.c
+++ b/scripts/pnmtologo.c

@@ -74,6 +74,7 @@
 static struct color **logo_data;
 static struct color logo_clut[MAX_LINUX_LOGO_COLORS];
 static unsigned int logo_clutsize;
+static int is_plain_pbm = 0;
 
 static void die(const char *fmt, ...)
     __attribute__ ((noreturn)) __attribute ((format (printf, 1, 2)));
@@ -103,6 +104,11 @@
     val = 0;
     while (isdigit(c)) {
 	val = 10*val+c-'0';
+	/* some PBM are 'broken'; GiMP for example exports a PBM without space
+	 * between the digits. This is Ok cause we know a PBM can only have a '1'
+	 * or a '0' for the digit. */
+	if (is_plain_pbm)
+		break;
 	c = fgetc(fp);
 	if (c == EOF)
 	    die("%s: end of file\n", filename);
@@ -167,6 +173,7 @@
     switch (magic) {
 	case '1':
 	    /* Plain PBM */
+	    is_plain_pbm = 1;
 	    for (i = 0; i < logo_height; i++)
 		for (j = 0; j < logo_width; j++)
 		    logo_data[i][j].red = logo_data[i][j].green =

diff --git a/scripts/tags.sh b/scripts/tags.sh
index 79fdafb..08f06c0 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh

@@ -48,13 +48,14 @@
 	for i in $archincludedir; do
 		prune="$prune -wholename $i -prune -o"
 	done
-	find ${tree}arch/$1 $ignore $prune -name "$2" -print;
+	find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" -print;
 }
 
 # find sources in arch/$1/include
 find_arch_include_sources()
 {
-	include=$(find ${tree}arch/$1/ -name include -type d);
+	include=$(find ${tree}arch/$1/ $subarchprune \
+					-name include -type d -print);
 	if [ -n "$include" ]; then
 		archincludedir="$archincludedir $include"
 		find $include $ignore -name "$2" -print;
@@ -95,6 +96,32 @@
 	find_other_sources '*.[chS]'
 }
 
+all_compiled_sources()
+{
+	for i in $(all_sources); do
+		case "$i" in
+			*.[cS])
+				j=${i/\.[cS]/\.o}
+				if [ -e $j ]; then
+					echo $i
+				fi
+				;;
+			*)
+				echo $i
+				;;
+		esac
+	done
+}
+
+all_target_sources()
+{
+	if [ -n "$COMPILED_SOURCE" ]; then
+		all_compiled_sources
+	else
+		all_sources
+	fi
+}
+
 all_kconfigs()
 {
 	for arch in $ALLSOURCE_ARCHS; do
@@ -110,18 +137,18 @@
 
 docscope()
 {
-	(echo \-k; echo \-q; all_sources) > cscope.files
+	(echo \-k; echo \-q; all_target_sources) > cscope.files
 	cscope -b -f cscope.out
 }
 
 dogtags()
 {
-	all_sources | gtags -i -f -
+	all_target_sources | gtags -i -f -
 }
 
 exuberant()
 {
-	all_sources | xargs $1 -a                               \
+	all_target_sources | xargs $1 -a                        \
 	-I __initdata,__exitdata,__acquires,__releases          \
 	-I __read_mostly,____cacheline_aligned                  \
 	-I ____cacheline_aligned_in_smp                         \
@@ -173,7 +200,7 @@
 
 emacs()
 {
-	all_sources | xargs $1 -a                               \
+	all_target_sources | xargs $1 -a                        \
 	--regex='/^(ENTRY|_GLOBAL)(\([^)]*\)).*/\2/'            \
 	--regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/'   \
 	--regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/'		\
@@ -220,11 +247,10 @@
 	elif $1 --version 2>&1 | grep -iq emacs; then
 		emacs $1
 	else
-		all_sources | xargs $1 -a
+		all_target_sources | xargs $1 -a
         fi
 }
 
-
 # Support um (which uses SUBARCH)
 if [ "${ARCH}" = "um" ]; then
 	if [ "$SUBARCH" = "i386" ]; then
@@ -234,6 +260,21 @@
 	else
 		archinclude=${SUBARCH}
 	fi
+elif [ "${SRCARCH}" = "arm" -a "${SUBARCH}" != "" ]; then
+	subarchdir=$(find ${tree}arch/$SRCARCH/ -name "mach-*" -type d -o \
+							-name "plat-*" -type d);
+	for i in $subarchdir; do
+		case "$i" in
+			*"mach-"${SUBARCH})
+				;;
+			*"plat-"${SUBARCH})
+				;;
+			*)
+				subarchprune="$subarchprune \
+						-wholename $i -prune -o"
+				;;
+		esac
+	done
 fi
 
 remove_structs=

diff --git a/security/capability.c b/security/capability.c
index b14a30c..0fe5a02 100644
--- a/security/capability.c
+++ b/security/capability.c

@@ -395,6 +395,11 @@
 	return 0;
 }
 
+static int cap_kernel_module_from_file(struct file *file)
+{
+	return 0;
+}
+
 static int cap_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return 0;
@@ -967,6 +972,7 @@
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
 	set_to_cap_if_null(ops, kernel_module_request);
+	set_to_cap_if_null(ops, kernel_module_from_file);
 	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setpgid);
 	set_to_cap_if_null(ops, task_getpgid);

diff --git a/security/commoncap.c b/security/commoncap.c
index 6dbae46..7ee08c7 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c

@@ -76,24 +76,33 @@
 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
 		int cap, int audit)
 {
-	for (;;) {
-		/* The owner of the user namespace has all caps. */
-		if (targ_ns != &init_user_ns && uid_eq(targ_ns->owner, cred->euid))
-			return 0;
+	struct user_namespace *ns = targ_ns;
 
+	/* See if cred has the capability in the target user namespace
+	 * by examining the target user namespace and all of the target
+	 * user namespace's parents.
+	 */
+	for (;;) {
 		/* Do we have the necessary capabilities? */
-		if (targ_ns == cred->user_ns)
+		if (ns == cred->user_ns)
 			return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
 
 		/* Have we tried all of the parent namespaces? */
-		if (targ_ns == &init_user_ns)
+		if (ns == &init_user_ns)
 			return -EPERM;
 
+		/* 
+		 * The owner of the user namespace in the parent of the
+		 * user namespace has all caps.
+		 */
+		if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
+			return 0;
+
 		/*
-		 *If you have a capability in a parent user ns, then you have
+		 * If you have a capability in a parent user ns, then you have
 		 * it over all children user namespaces as well.
 		 */
-		targ_ns = targ_ns->parent;
+		ns = ns->parent;
 	}
 
 	/* We never get here */

diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 6ee8826..3b2adb7 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h

@@ -127,7 +127,7 @@
 struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
 
 /* IMA policy related functions */
-enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, POST_SETATTR };
+enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, MODULE_CHECK, POST_SETATTR };
 
 int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
 		     int flags);

diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index b356884..0cea3db 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c

@@ -100,12 +100,12 @@
  * ima_get_action - appraise & measure decision based on policy.
  * @inode: pointer to inode to measure
  * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXECUTE)
- * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP)
+ * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP, MODULE_CHECK)
  *
  * The policy is defined in terms of keypairs:
  * 		subj=, obj=, type=, func=, mask=, fsmagic=
  *	subj,obj, and type: are LSM specific.
- * 	func: FILE_CHECK | BPRM_CHECK | FILE_MMAP
+ * 	func: FILE_CHECK | BPRM_CHECK | FILE_MMAP | MODULE_CHECK
  * 	mask: contains the permission mask
  *	fsmagic: hex value
  *

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 73c9a26..45de18e 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c

@@ -280,6 +280,27 @@
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
+/**
+ * ima_module_check - based on policy, collect/store/appraise measurement.
+ * @file: pointer to the file to be measured/appraised
+ *
+ * Measure/appraise kernel modules based on policy.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * Return code is based upon measurement appraisal.
+ */
+int ima_module_check(struct file *file)
+{
+	int rc;
+
+	if (!file)
+		rc = INTEGRITY_UNKNOWN;
+	else
+		rc = process_measurement(file, file->f_dentry->d_name.name,
+					 MAY_EXEC, MODULE_CHECK);
+	return (ima_appraise & IMA_APPRAISE_ENFORCE) ? rc : 0;
+}
+
 static int __init init_ima(void)
 {
 	int error;

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index c7dacd2..af7d182 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c

@@ -80,6 +80,7 @@
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = FILE_CHECK,.mask = MAY_READ,.uid = GLOBAL_ROOT_UID,
 	 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
+	{.action = MEASURE,.func = MODULE_CHECK, .flags = IMA_FUNC},
 };
 
 static struct ima_rule_entry default_appraise_rules[] = {
@@ -401,6 +402,8 @@
 			/* PATH_CHECK is for backwards compat */
 			else if (strcmp(args[0].from, "PATH_CHECK") == 0)
 				entry->func = FILE_CHECK;
+			else if (strcmp(args[0].from, "MODULE_CHECK") == 0)
+				entry->func = MODULE_CHECK;
 			else if (strcmp(args[0].from, "FILE_MMAP") == 0)
 				entry->func = FILE_MMAP;
 			else if (strcmp(args[0].from, "BPRM_CHECK") == 0)

diff --git a/security/keys/key.c b/security/keys/key.c
index a15c9da..8fb7c7b 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c

@@ -854,13 +854,13 @@
 	/* if the client doesn't provide, decide on the permissions we want */
 	if (perm == KEY_PERM_UNDEF) {
 		perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
-		perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR;
+		perm |= KEY_USR_VIEW;
 
 		if (ktype->read)
-			perm |= KEY_POS_READ | KEY_USR_READ;
+			perm |= KEY_POS_READ;
 
 		if (ktype == &key_type_keyring || ktype->update)
-			perm |= KEY_USR_WRITE;
+			perm |= KEY_POS_WRITE;
 	}
 
 	/* allocate a new key */

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 5d34b4e..4b5c948 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c

@@ -1132,12 +1132,12 @@
 	ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc,
 				    ARRAY_SIZE(iovstack), iovstack, &iov);
 	if (ret < 0)
-		return ret;
+		goto err;
 	if (ret == 0)
 		goto no_payload_free;
 
 	ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
-
+err:
 	if (iov != iovstack)
 		kfree(iov);
 	return ret;
@@ -1495,7 +1495,8 @@
 		goto error_keyring;
 	newwork = &cred->rcu;
 
-	cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
+	cred->session_keyring = key_ref_to_ptr(keyring_r);
+	keyring_r = NULL;
 	init_task_work(newwork, key_change_session_keyring);
 
 	me = current;
@@ -1519,7 +1520,7 @@
 	mycred = current_cred();
 	pcred = __task_cred(parent);
 	if (mycred == pcred ||
-	    mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) {
+	    mycred->session_keyring == pcred->session_keyring) {
 		ret = 0;
 		goto unlock;
 	}
@@ -1535,9 +1536,9 @@
 		goto unlock;
 
 	/* the keyrings must have the same UID */
-	if ((pcred->tgcred->session_keyring &&
-	     !uid_eq(pcred->tgcred->session_keyring->uid, mycred->euid)) ||
-	    !uid_eq(mycred->tgcred->session_keyring->uid, mycred->euid))
+	if ((pcred->session_keyring &&
+	     !uid_eq(pcred->session_keyring->uid, mycred->euid)) ||
+	    !uid_eq(mycred->session_keyring->uid, mycred->euid))
 		goto unlock;
 
 	/* cancel an already pending keyring replacement */

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 6e42df1..6ece7f2 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c

@@ -257,17 +257,14 @@
  * Allocate a keyring and link into the destination keyring.
  */
 struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
-			  const struct cred *cred, unsigned long flags,
-			  struct key *dest)
+			  const struct cred *cred, key_perm_t perm,
+			  unsigned long flags, struct key *dest)
 {
 	struct key *keyring;
 	int ret;
 
 	keyring = key_alloc(&key_type_keyring, description,
-			    uid, gid, cred,
-			    (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL,
-			    flags);
-
+			    uid, gid, cred, perm, flags);
 	if (!IS_ERR(keyring)) {
 		ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
 		if (ret < 0) {
@@ -278,6 +275,7 @@
 
 	return keyring;
 }
+EXPORT_SYMBOL(keyring_alloc);
 
 /**
  * keyring_search_aux - Search a keyring tree for a key matching some criteria

diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 86468f3..20e4bf5 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c

@@ -45,10 +45,12 @@
 	struct user_struct *user;
 	const struct cred *cred;
 	struct key *uid_keyring, *session_keyring;
+	key_perm_t user_keyring_perm;
 	char buf[20];
 	int ret;
 	uid_t uid;
 
+	user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL;
 	cred = current_cred();
 	user = cred->user;
 	uid = from_kuid(cred->user_ns, user->uid);
@@ -73,8 +75,8 @@
 		uid_keyring = find_keyring_by_name(buf, true);
 		if (IS_ERR(uid_keyring)) {
 			uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID,
-						    cred, KEY_ALLOC_IN_QUOTA,
-						    NULL);
+						    cred, user_keyring_perm,
+						    KEY_ALLOC_IN_QUOTA, NULL);
 			if (IS_ERR(uid_keyring)) {
 				ret = PTR_ERR(uid_keyring);
 				goto error;
@@ -89,7 +91,8 @@
 		if (IS_ERR(session_keyring)) {
 			session_keyring =
 				keyring_alloc(buf, user->uid, INVALID_GID,
-					      cred, KEY_ALLOC_IN_QUOTA, NULL);
+					      cred, user_keyring_perm,
+					      KEY_ALLOC_IN_QUOTA, NULL);
 			if (IS_ERR(session_keyring)) {
 				ret = PTR_ERR(session_keyring);
 				goto error_release;
@@ -130,6 +133,7 @@
 	struct key *keyring;
 
 	keyring = keyring_alloc("_tid", new->uid, new->gid, new,
+				KEY_POS_ALL | KEY_USR_VIEW,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
@@ -170,27 +174,18 @@
 int install_process_keyring_to_cred(struct cred *new)
 {
 	struct key *keyring;
-	int ret;
 
-	if (new->tgcred->process_keyring)
+	if (new->process_keyring)
 		return -EEXIST;
 
-	keyring = keyring_alloc("_pid", new->uid, new->gid,
-				new, KEY_ALLOC_QUOTA_OVERRUN, NULL);
+	keyring = keyring_alloc("_pid", new->uid, new->gid, new,
+				KEY_POS_ALL | KEY_USR_VIEW,
+				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
 
-	spin_lock_irq(&new->tgcred->lock);
-	if (!new->tgcred->process_keyring) {
-		new->tgcred->process_keyring = keyring;
-		keyring = NULL;
-		ret = 0;
-	} else {
-		ret = -EEXIST;
-	}
-	spin_unlock_irq(&new->tgcred->lock);
-	key_put(keyring);
-	return ret;
+	new->process_keyring = keyring;
+	return 0;
 }
 
 /*
@@ -231,11 +226,12 @@
 	/* create an empty session keyring */
 	if (!keyring) {
 		flags = KEY_ALLOC_QUOTA_OVERRUN;
-		if (cred->tgcred->session_keyring)
+		if (cred->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
-		keyring = keyring_alloc("_ses", cred->uid, cred->gid,
-					cred, flags, NULL);
+		keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred,
+					KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
+					flags, NULL);
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
 	} else {
@@ -243,17 +239,11 @@
 	}
 
 	/* install the keyring */
-	spin_lock_irq(&cred->tgcred->lock);
-	old = cred->tgcred->session_keyring;
-	rcu_assign_pointer(cred->tgcred->session_keyring, keyring);
-	spin_unlock_irq(&cred->tgcred->lock);
+	old = cred->session_keyring;
+	rcu_assign_pointer(cred->session_keyring, keyring);
 
-	/* we're using RCU on the pointer, but there's no point synchronising
-	 * on it if it didn't previously point to anything */
-	if (old) {
-		synchronize_rcu();
+	if (old)
 		key_put(old);
-	}
 
 	return 0;
 }
@@ -368,17 +358,15 @@
 	}
 
 	/* search the process keyring second */
-	if (cred->tgcred->process_keyring) {
+	if (cred->process_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(cred->tgcred->process_keyring, 1),
+			make_key_ref(cred->process_keyring, 1),
 			cred, type, description, match, no_state_check);
 		if (!IS_ERR(key_ref))
 			goto found;
 
 		switch (PTR_ERR(key_ref)) {
 		case -EAGAIN: /* no key */
-			if (ret)
-				break;
 		case -ENOKEY: /* negative key */
 			ret = key_ref;
 			break;
@@ -389,12 +377,10 @@
 	}
 
 	/* search the session keyring */
-	if (cred->tgcred->session_keyring) {
+	if (cred->session_keyring) {
 		rcu_read_lock();
 		key_ref = keyring_search_aux(
-			make_key_ref(rcu_dereference(
-					     cred->tgcred->session_keyring),
-				     1),
+			make_key_ref(rcu_dereference(cred->session_keyring), 1),
 			cred, type, description, match, no_state_check);
 		rcu_read_unlock();
 
@@ -564,7 +550,7 @@
 		break;
 
 	case KEY_SPEC_PROCESS_KEYRING:
-		if (!cred->tgcred->process_keyring) {
+		if (!cred->process_keyring) {
 			if (!(lflags & KEY_LOOKUP_CREATE))
 				goto error;
 
@@ -576,13 +562,13 @@
 			goto reget_creds;
 		}
 
-		key = cred->tgcred->process_keyring;
+		key = cred->process_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_SESSION_KEYRING:
-		if (!cred->tgcred->session_keyring) {
+		if (!cred->session_keyring) {
 			/* always install a session keyring upon access if one
 			 * doesn't exist yet */
 			ret = install_user_keyrings();
@@ -597,7 +583,7 @@
 			if (ret < 0)
 				goto error;
 			goto reget_creds;
-		} else if (cred->tgcred->session_keyring ==
+		} else if (cred->session_keyring ==
 			   cred->user->session_keyring &&
 			   lflags & KEY_LOOKUP_CREATE) {
 			ret = join_session_keyring(NULL);
@@ -607,7 +593,7 @@
 		}
 
 		rcu_read_lock();
-		key = rcu_dereference(cred->tgcred->session_keyring);
+		key = rcu_dereference(cred->session_keyring);
 		atomic_inc(&key->usage);
 		rcu_read_unlock();
 		key_ref = make_key_ref(key, 1);
@@ -767,12 +753,6 @@
 	struct key *keyring;
 	long ret, serial;
 
-	/* only permit this if there's a single thread in the thread group -
-	 * this avoids us having to adjust the creds on all threads and risking
-	 * ENOMEM */
-	if (!current_is_single_threaded())
-		return -EMLINK;
-
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
@@ -784,7 +764,7 @@
 		if (ret < 0)
 			goto error;
 
-		serial = new->tgcred->session_keyring->serial;
+		serial = new->session_keyring->serial;
 		ret = commit_creds(new);
 		if (ret == 0)
 			ret = serial;
@@ -798,8 +778,10 @@
 	keyring = find_keyring_by_name(name, false);
 	if (PTR_ERR(keyring) == -ENOKEY) {
 		/* not found - try and create a new one */
-		keyring = keyring_alloc(name, old->uid, old->gid, old,
-					KEY_ALLOC_IN_QUOTA, NULL);
+		keyring = keyring_alloc(
+			name, old->uid, old->gid, old,
+			KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK,
+			KEY_ALLOC_IN_QUOTA, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
 			goto error2;
@@ -807,6 +789,9 @@
 	} else if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto error2;
+	} else if (keyring == new->session_keyring) {
+		ret = 0;
+		goto error2;
 	}
 
 	/* we've got a keyring - now to install it */
@@ -863,8 +848,7 @@
 
 	new->jit_keyring	= old->jit_keyring;
 	new->thread_keyring	= key_get(old->thread_keyring);
-	new->tgcred->tgid	= old->tgcred->tgid;
-	new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
+	new->process_keyring	= key_get(old->process_keyring);
 
 	security_transfer_creds(new, old);
 

diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 66e2118..4bd6bdb 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c

@@ -126,6 +126,7 @@
 
 	cred = get_current_cred();
 	keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred,
+				KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	put_cred(cred);
 	if (IS_ERR(keyring)) {
@@ -150,12 +151,12 @@
 		cred->thread_keyring ? cred->thread_keyring->serial : 0);
 
 	prkey = 0;
-	if (cred->tgcred->process_keyring)
-		prkey = cred->tgcred->process_keyring->serial;
+	if (cred->process_keyring)
+		prkey = cred->process_keyring->serial;
 	sprintf(keyring_str[1], "%d", prkey);
 
 	rcu_read_lock();
-	session = rcu_dereference(cred->tgcred->session_keyring);
+	session = rcu_dereference(cred->session_keyring);
 	if (!session)
 		session = cred->user->session_keyring;
 	sskey = session->serial;
@@ -297,14 +298,14 @@
 				break;
 
 		case KEY_REQKEY_DEFL_PROCESS_KEYRING:
-			dest_keyring = key_get(cred->tgcred->process_keyring);
+			dest_keyring = key_get(cred->process_keyring);
 			if (dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_SESSION_KEYRING:
 			rcu_read_lock();
 			dest_keyring = key_get(
-				rcu_dereference(cred->tgcred->session_keyring));
+				rcu_dereference(cred->session_keyring));
 			rcu_read_unlock();
 
 			if (dest_keyring)
@@ -347,6 +348,7 @@
 	const struct cred *cred = current_cred();
 	unsigned long prealloc;
 	struct key *key;
+	key_perm_t perm;
 	key_ref_t key_ref;
 	int ret;
 
@@ -355,8 +357,15 @@
 	*_key = NULL;
 	mutex_lock(&user->cons_lock);
 
+	perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+	perm |= KEY_USR_VIEW;
+	if (type->read)
+		perm |= KEY_POS_READ;
+	if (type == &key_type_keyring || type->update)
+		perm |= KEY_POS_WRITE;
+
 	key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
-			KEY_POS_ALL, flags);
+			perm, flags);
 	if (IS_ERR(key))
 		goto alloc_failed;
 

diff --git a/security/security.c b/security/security.c
index 8dcd4ae..daa97f4 100644
--- a/security/security.c
+++ b/security/security.c

@@ -820,6 +820,16 @@
 	return security_ops->kernel_module_request(kmod_name);
 }
 
+int security_kernel_module_from_file(struct file *file)
+{
+	int ret;
+
+	ret = security_ops->kernel_module_from_file(file);
+	if (ret)
+		return ret;
+	return ima_module_check(file);
+}
+
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {

diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 370a646..855e464 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c

@@ -69,6 +69,8 @@
 	{ RTM_SETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_NEWNETCONF,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_GETNETCONF,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+	{ RTM_NEWMDB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_DELMDB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE  },
 	{ RTM_GETMDB,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 

diff --git a/security/smack/Kconfig b/security/smack/Kconfig
index 603b087..e69de9c 100644
--- a/security/smack/Kconfig
+++ b/security/smack/Kconfig

@@ -1,6 +1,10 @@
 config SECURITY_SMACK
 	bool "Simplified Mandatory Access Control Kernel Support"
-	depends on NETLABEL && SECURITY_NETWORK
+	depends on NET
+	depends on INET
+	depends on SECURITY
+	select NETLABEL
+	select SECURITY_NETWORK
 	default n
 	help
 	  This selects the Simplified Mandatory Access Control Kernel.

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 99929a5..76a5dca 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c

@@ -2063,6 +2063,19 @@
 	.llseek		= generic_file_llseek,
 };
 
+static struct kset *smackfs_kset;
+/**
+ * smk_init_sysfs - initialize /sys/fs/smackfs
+ *
+ */
+static int smk_init_sysfs(void)
+{
+	smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj);
+	if (!smackfs_kset)
+		return -ENOMEM;
+	return 0;
+}
+
 /**
  * smk_fill_super - fill the /smackfs superblock
  * @sb: the empty superblock
@@ -2183,6 +2196,10 @@
 	if (!security_module_enable(&smack_ops))
 		return 0;
 
+	err = smk_init_sysfs();
+	if (err)
+		printk(KERN_ERR "smackfs: sysfs mountpoint problem.\n");
+
 	err = register_filesystem(&smk_fs_type);
 	if (!err) {
 		smackfs_mount = kern_mount(&smk_fs_type);

diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index b4c2984..23414b9 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c

@@ -17,6 +17,7 @@
 #include <linux/ptrace.h>
 #include <linux/prctl.h>
 #include <linux/ratelimit.h>
+#include <linux/workqueue.h>
 
 #define YAMA_SCOPE_DISABLED	0
 #define YAMA_SCOPE_RELATIONAL	1
@@ -29,12 +30,37 @@
 struct ptrace_relation {
 	struct task_struct *tracer;
 	struct task_struct *tracee;
+	bool invalid;
 	struct list_head node;
+	struct rcu_head rcu;
 };
 
 static LIST_HEAD(ptracer_relations);
 static DEFINE_SPINLOCK(ptracer_relations_lock);
 
+static void yama_relation_cleanup(struct work_struct *work);
+static DECLARE_WORK(yama_relation_work, yama_relation_cleanup);
+
+/**
+ * yama_relation_cleanup - remove invalid entries from the relation list
+ *
+ */
+static void yama_relation_cleanup(struct work_struct *work)
+{
+	struct ptrace_relation *relation;
+
+	spin_lock(&ptracer_relations_lock);
+	rcu_read_lock();
+	list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+		if (relation->invalid) {
+			list_del_rcu(&relation->node);
+			kfree_rcu(relation, rcu);
+		}
+	}
+	rcu_read_unlock();
+	spin_unlock(&ptracer_relations_lock);
+}
+
 /**
  * yama_ptracer_add - add/replace an exception for this tracer/tracee pair
  * @tracer: the task_struct of the process doing the ptrace
@@ -48,32 +74,34 @@
 static int yama_ptracer_add(struct task_struct *tracer,
 			    struct task_struct *tracee)
 {
-	int rc = 0;
-	struct ptrace_relation *added;
-	struct ptrace_relation *entry, *relation = NULL;
+	struct ptrace_relation *relation, *added;
 
 	added = kmalloc(sizeof(*added), GFP_KERNEL);
 	if (!added)
 		return -ENOMEM;
 
-	spin_lock_bh(&ptracer_relations_lock);
-	list_for_each_entry(entry, &ptracer_relations, node)
-		if (entry->tracee == tracee) {
-			relation = entry;
-			break;
+	added->tracee = tracee;
+	added->tracer = tracer;
+	added->invalid = false;
+
+	spin_lock(&ptracer_relations_lock);
+	rcu_read_lock();
+	list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+		if (relation->invalid)
+			continue;
+		if (relation->tracee == tracee) {
+			list_replace_rcu(&relation->node, &added->node);
+			kfree_rcu(relation, rcu);
+			goto out;
 		}
-	if (!relation) {
-		relation = added;
-		relation->tracee = tracee;
-		list_add(&relation->node, &ptracer_relations);
 	}
-	relation->tracer = tracer;
 
-	spin_unlock_bh(&ptracer_relations_lock);
-	if (added != relation)
-		kfree(added);
+	list_add_rcu(&added->node, &ptracer_relations);
 
-	return rc;
+out:
+	rcu_read_unlock();
+	spin_unlock(&ptracer_relations_lock);
+	return 0;
 }
 
 /**
@@ -84,16 +112,23 @@
 static void yama_ptracer_del(struct task_struct *tracer,
 			     struct task_struct *tracee)
 {
-	struct ptrace_relation *relation, *safe;
+	struct ptrace_relation *relation;
+	bool marked = false;
 
-	spin_lock_bh(&ptracer_relations_lock);
-	list_for_each_entry_safe(relation, safe, &ptracer_relations, node)
+	rcu_read_lock();
+	list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+		if (relation->invalid)
+			continue;
 		if (relation->tracee == tracee ||
 		    (tracer && relation->tracer == tracer)) {
-			list_del(&relation->node);
-			kfree(relation);
+			relation->invalid = true;
+			marked = true;
 		}
-	spin_unlock_bh(&ptracer_relations_lock);
+	}
+	rcu_read_unlock();
+
+	if (marked)
+		schedule_work(&yama_relation_work);
 }
 
 /**
@@ -217,21 +252,22 @@
 	struct task_struct *parent = NULL;
 	bool found = false;
 
-	spin_lock_bh(&ptracer_relations_lock);
 	rcu_read_lock();
 	if (!thread_group_leader(tracee))
 		tracee = rcu_dereference(tracee->group_leader);
-	list_for_each_entry(relation, &ptracer_relations, node)
+	list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+		if (relation->invalid)
+			continue;
 		if (relation->tracee == tracee) {
 			parent = relation->tracer;
 			found = true;
 			break;
 		}
+	}
 
 	if (found && (parent == NULL || task_is_descendant(parent, tracer)))
 		rc = 1;
 	rcu_read_unlock();
-	spin_unlock_bh(&ptracer_relations_lock);
 
 	return rc;
 }
@@ -262,14 +298,18 @@
 			/* No additional restrictions. */
 			break;
 		case YAMA_SCOPE_RELATIONAL:
+			rcu_read_lock();
 			if (!task_is_descendant(current, child) &&
 			    !ptracer_exception_found(current, child) &&
-			    !ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+			    !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
 				rc = -EPERM;
+			rcu_read_unlock();
 			break;
 		case YAMA_SCOPE_CAPABILITY:
-			if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+			rcu_read_lock();
+			if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
 				rc = -EPERM;
+			rcu_read_unlock();
 			break;
 		case YAMA_SCOPE_NO_ATTACH:
 		default:
@@ -307,8 +347,10 @@
 	/* Only disallow PTRACE_TRACEME on more aggressive settings. */
 	switch (ptrace_scope) {
 	case YAMA_SCOPE_CAPABILITY:
-		if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE))
+		rcu_read_lock();
+		if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
 			rc = -EPERM;
+		rcu_read_unlock();
 		break;
 	case YAMA_SCOPE_NO_ATTACH:
 		rc = -EPERM;

diff --git a/sound/Kconfig b/sound/Kconfig
index 261a03c..c710ce2 100644
--- a/sound/Kconfig
+++ b/sound/Kconfig

@@ -52,9 +52,6 @@
 
 	  Disabling this allows alternative OSS implementations.
 
-	  Please read Documentation/feature-removal-schedule.txt for
-	  details.
-
 	  If unsure, say Y.
 
 source "sound/oss/dmasound/Kconfig"

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 0f3d3db..cca8727 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c

@@ -2876,7 +2876,7 @@
 	azx_notifier_unregister(chip);
 
 	chip->init_failed = 1; /* to be sure */
-	complete(&chip->probe_wait);
+	complete_all(&chip->probe_wait);
 
 	if (use_vga_switcheroo(chip)) {
 		if (chip->disabled && chip->bus)
@@ -3504,7 +3504,7 @@
 		pm_runtime_put_noidle(&pci->dev);
 
 	dev++;
-	complete(&chip->probe_wait);
+	complete_all(&chip->probe_wait);
 	return 0;
 
 out_free:

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 0fcfa6f..b6c21ea 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c

@@ -431,9 +431,11 @@
 	if (get_wcaps(codec, pin_nid) & AC_WCAP_OUT_AMP)
 		snd_hda_codec_write(codec, pin_nid, 0,
 				AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
-	/* Disable pin out until stream is active*/
+	/* Enable pin out: some machines with GM965 gets broken output when
+	 * the pin is disabled or changed while using with HDMI
+	 */
 	snd_hda_codec_write(codec, pin_nid, 0,
-			    AC_VERB_SET_PIN_WIDGET_CONTROL, 0);
+			    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
 }
 
 static int hdmi_get_channel_count(struct hda_codec *codec, hda_nid_t cvt_nid)
@@ -1341,7 +1343,6 @@
 	struct hdmi_spec *spec = codec->spec;
 	int pin_idx = hinfo_to_pin_index(spec, hinfo);
 	hda_nid_t pin_nid = spec->pins[pin_idx].pin_nid;
-	int pinctl;
 	bool non_pcm;
 
 	non_pcm = check_non_pcm_per_cvt(codec, cvt_nid);
@@ -1350,11 +1351,6 @@
 
 	hdmi_setup_audio_infoframe(codec, pin_idx, non_pcm, substream);
 
-	pinctl = snd_hda_codec_read(codec, pin_nid, 0,
-				    AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
-	snd_hda_codec_write(codec, pin_nid, 0,
-			    AC_VERB_SET_PIN_WIDGET_CONTROL, pinctl | PIN_OUT);
-
 	return hdmi_setup_stream(codec, cvt_nid, pin_nid, stream_tag, format);
 }
 
@@ -1374,7 +1370,6 @@
 	int cvt_idx, pin_idx;
 	struct hdmi_spec_per_cvt *per_cvt;
 	struct hdmi_spec_per_pin *per_pin;
-	int pinctl;
 
 	if (hinfo->nid) {
 		cvt_idx = cvt_nid_to_cvt_index(spec, hinfo->nid);
@@ -1391,11 +1386,6 @@
 			return -EINVAL;
 		per_pin = &spec->pins[pin_idx];
 
-		pinctl = snd_hda_codec_read(codec, per_pin->pin_nid, 0,
-					    AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
-		snd_hda_codec_write(codec, per_pin->pin_nid, 0,
-				    AC_VERB_SET_PIN_WIDGET_CONTROL,
-				    pinctl & ~PIN_OUT);
 		snd_hda_spdif_ctls_unassign(codec, pin_idx);
 		per_pin->chmap_set = false;
 		memset(per_pin->chmap, 0, sizeof(per_pin->chmap));
@@ -1691,6 +1681,30 @@
 	.unsol_event		= hdmi_unsol_event,
 };
 
+static void intel_haswell_fixup_connect_list(struct hda_codec *codec)
+{
+	unsigned int vendor_param;
+	hda_nid_t list[3] = {0x2, 0x3, 0x4};
+
+	vendor_param = snd_hda_codec_read(codec, 0x08, 0, 0xf81, 0);
+	if (vendor_param == -1 || vendor_param & 0x02)
+		return;
+
+	/* enable DP1.2 mode */
+	vendor_param |= 0x02;
+	snd_hda_codec_read(codec, 0x08, 0, 0x781, vendor_param);
+
+	vendor_param = snd_hda_codec_read(codec, 0x08, 0, 0xf81, 0);
+	if (vendor_param == -1 || !(vendor_param & 0x02))
+		return;
+
+	/* override 3 pins connection list */
+	snd_hda_override_conn_list(codec, 0x05, 3, list);
+	snd_hda_override_conn_list(codec, 0x06, 3, list);
+	snd_hda_override_conn_list(codec, 0x07, 3, list);
+}
+
+
 static int patch_generic_hdmi(struct hda_codec *codec)
 {
 	struct hdmi_spec *spec;
@@ -1700,6 +1714,10 @@
 		return -ENOMEM;
 
 	codec->spec = spec;
+
+	if (codec->vendor_id == 0x80862807)
+		intel_haswell_fixup_connect_list(codec);
+
 	if (hdmi_parse_codec(codec) < 0) {
 		codec->spec = NULL;
 		kfree(spec);

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7743775..6ee3459 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -4373,6 +4373,7 @@
 	if (!spec)
 		return -ENOMEM;
 	codec->spec = spec;
+	codec->single_adc_amp = 1;
 	spec->mixer_nid = mixer_nid;
 	snd_hda_gen_init(&spec->gen);
 	snd_array_init(&spec->kctls, sizeof(struct snd_kcontrol_new), 32);
@@ -6569,8 +6570,8 @@
 				  const struct alc_fixup *fix, int action)
 {
 	if (action == ALC_FIXUP_ACT_PRE_PROBE) {
-		snd_hda_override_pin_caps(codec, 0x18, 0x00001714);
-		snd_hda_override_pin_caps(codec, 0x19, 0x0000171c);
+		snd_hda_override_pin_caps(codec, 0x18, 0x00000734);
+		snd_hda_override_pin_caps(codec, 0x19, 0x0000073c);
 	}
 }
 

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index df13c0f..a86547c 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c

@@ -1725,7 +1725,7 @@
 	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1658,
 			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1659,
-			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+			  "HP Pavilion dv7", STAC_HP_DV7_4000),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165A,
 			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165B,

diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c
index a0791ec..6361dab 100644
--- a/sound/soc/codecs/cs42l73.c
+++ b/sound/soc/codecs/cs42l73.c

@@ -40,6 +40,7 @@
 	u32 sysclk;
 	u8 mclksel;
 	u32 mclk;
+	int shutdwn_delay;
 };
 
 static const struct reg_default cs42l73_reg_defaults[] = {
@@ -588,7 +589,60 @@
 	SOC_ENUM("XSPOUT Mono/Stereo Select", xsp_output_mux_enum),
 };
 
+static int cs42l73_spklo_spk_amp_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct cs42l73_private *priv = snd_soc_codec_get_drvdata(codec);
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMD:
+		/* 150 ms delay between setting PDN and MCLKDIS */
+		priv->shutdwn_delay = 150;
+		break;
+	default:
+		pr_err("Invalid event = 0x%x\n", event);
+	}
+	return 0;
+}
+
+static int cs42l73_ear_amp_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct cs42l73_private *priv = snd_soc_codec_get_drvdata(codec);
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMD:
+		/* 50 ms delay between setting PDN and MCLKDIS */
+		if (priv->shutdwn_delay < 50)
+			priv->shutdwn_delay = 50;
+		break;
+	default:
+		pr_err("Invalid event = 0x%x\n", event);
+	}
+	return 0;
+}
+
+
+static int cs42l73_hp_amp_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct cs42l73_private *priv = snd_soc_codec_get_drvdata(codec);
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMD:
+		/* 30 ms delay between setting PDN and MCLKDIS */
+		if (priv->shutdwn_delay < 30)
+			priv->shutdwn_delay = 30;
+		break;
+	default:
+		pr_err("Invalid event = 0x%x\n", event);
+	}
+	return 0;
+}
+
 static const struct snd_soc_dapm_widget cs42l73_dapm_widgets[] = {
+	SND_SOC_DAPM_INPUT("DMICA"),
+	SND_SOC_DAPM_INPUT("DMICB"),
 	SND_SOC_DAPM_INPUT("LINEINA"),
 	SND_SOC_DAPM_INPUT("LINEINB"),
 	SND_SOC_DAPM_INPUT("MIC1"),
@@ -604,9 +658,7 @@
 			CS42L73_PWRCTL2, 3, 1),
 	SND_SOC_DAPM_AIF_OUT("ASPOUTR", NULL,  0,
 			CS42L73_PWRCTL2, 3, 1),
-	SND_SOC_DAPM_AIF_OUT("VSPOUTL", NULL,  0,
-			CS42L73_PWRCTL2, 4, 1),
-	SND_SOC_DAPM_AIF_OUT("VSPOUTR", NULL,  0,
+	SND_SOC_DAPM_AIF_OUT("VSPINOUT", NULL,  0,
 			CS42L73_PWRCTL2, 4, 1),
 
 	SND_SOC_DAPM_PGA("PGA Left", SND_SOC_NOPM, 0, 0, NULL, 0),
@@ -632,8 +684,7 @@
 	SND_SOC_DAPM_MIXER("ASPR Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 	SND_SOC_DAPM_MIXER("XSPL Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 	SND_SOC_DAPM_MIXER("XSPR Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("VSPL Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("VSPR Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("VSP Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 
 	SND_SOC_DAPM_AIF_IN("XSPINL", NULL, 0,
 				CS42L73_PWRCTL2, 0, 1),
@@ -649,7 +700,7 @@
 	SND_SOC_DAPM_AIF_IN("ASPINM", NULL, 0,
 				CS42L73_PWRCTL2, 2, 1),
 
-	SND_SOC_DAPM_AIF_IN("VSPIN", NULL, 0,
+	SND_SOC_DAPM_AIF_IN("VSPINOUT", NULL, 0,
 				CS42L73_PWRCTL2, 4, 1),
 
 	SND_SOC_DAPM_MIXER("HL Left Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
@@ -674,16 +725,20 @@
 	SND_SOC_DAPM_PGA("SPK DAC", SND_SOC_NOPM, 0, 0, NULL, 0),
 	SND_SOC_DAPM_PGA("ESL DAC", SND_SOC_NOPM, 0, 0, NULL, 0),
 
-	SND_SOC_DAPM_SWITCH("HP Amp", CS42L73_PWRCTL3, 0, 1,
-			    &hp_amp_ctl),
+	SND_SOC_DAPM_SWITCH_E("HP Amp",  CS42L73_PWRCTL3, 0, 1,
+			    &hp_amp_ctl, cs42l73_hp_amp_event,
+			SND_SOC_DAPM_POST_PMD),
 	SND_SOC_DAPM_SWITCH("LO Amp", CS42L73_PWRCTL3, 1, 1,
 			    &lo_amp_ctl),
-	SND_SOC_DAPM_SWITCH("SPK Amp", CS42L73_PWRCTL3, 2, 1,
-			    &spk_amp_ctl),
-	SND_SOC_DAPM_SWITCH("EAR Amp", CS42L73_PWRCTL3, 3, 1,
-			    &ear_amp_ctl),
-	SND_SOC_DAPM_SWITCH("SPKLO Amp", CS42L73_PWRCTL3, 4, 1,
-			    &spklo_amp_ctl),
+	SND_SOC_DAPM_SWITCH_E("SPK Amp", CS42L73_PWRCTL3, 2, 1,
+			&spk_amp_ctl, cs42l73_spklo_spk_amp_event,
+			SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_SWITCH_E("EAR Amp", CS42L73_PWRCTL3, 3, 1,
+			    &ear_amp_ctl, cs42l73_ear_amp_event,
+			SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_SWITCH_E("SPKLO Amp", CS42L73_PWRCTL3, 4, 1,
+			    &spklo_amp_ctl, cs42l73_spklo_spk_amp_event,
+			SND_SOC_DAPM_POST_PMD),
 
 	SND_SOC_DAPM_OUTPUT("HPOUTA"),
 	SND_SOC_DAPM_OUTPUT("HPOUTB"),
@@ -705,7 +760,7 @@
 
 	{"ESL DAC", "ESL-ASP Mono Volume", "ESL Mixer"},
 	{"ESL DAC", "ESL-XSP Mono Volume", "ESL Mixer"},
-	{"ESL DAC", "ESL-VSP Mono Volume", "VSPIN"},
+	{"ESL DAC", "ESL-VSP Mono Volume", "VSPINOUT"},
 	/* Loopback */
 	{"ESL DAC", "ESL-IP Mono Volume", "Input Left Capture"},
 	{"ESL DAC", "ESL-IP Mono Volume", "Input Right Capture"},
@@ -727,7 +782,7 @@
 
 	{"SPK DAC", "SPK-ASP Mono Volume", "SPK Mixer"},
 	{"SPK DAC", "SPK-XSP Mono Volume", "SPK Mixer"},
-	{"SPK DAC", "SPK-VSP Mono Volume", "VSPIN"},
+	{"SPK DAC", "SPK-VSP Mono Volume", "VSPINOUT"},
 	/* Loopback */
 	{"SPK DAC", "SPK-IP Mono Volume", "Input Left Capture"},
 	{"SPK DAC", "SPK-IP Mono Volume", "Input Right Capture"},
@@ -770,8 +825,8 @@
 	{"HL Right Mixer", NULL, "ASPINR"},
 	{"HL Left Mixer", NULL, "XSPINL"},
 	{"HL Right Mixer", NULL, "XSPINR"},
-	{"HL Left Mixer", NULL, "VSPIN"},
-	{"HL Right Mixer", NULL, "VSPIN"},
+	{"HL Left Mixer", NULL, "VSPINOUT"},
+	{"HL Right Mixer", NULL, "VSPINOUT"},
 
 	{"ASPINL", NULL, "ASP Playback"},
 	{"ASPINM", NULL, "ASP Playback"},
@@ -779,7 +834,7 @@
 	{"XSPINL", NULL, "XSP Playback"},
 	{"XSPINM", NULL, "XSP Playback"},
 	{"XSPINR", NULL, "XSP Playback"},
-	{"VSPIN", NULL, "VSP Playback"},
+	{"VSPINOUT", NULL, "VSP Playback"},
 
 	/* Capture Paths */
 	{"MIC1", NULL, "MIC1 Bias"},
@@ -795,6 +850,8 @@
 
 	{"ADC Left", NULL, "PGA Left"},
 	{"ADC Right", NULL, "PGA Right"},
+	{"DMIC Left", NULL, "DMICA"},
+	{"DMIC Right", NULL, "DMICB"},
 
 	{"Input Left Capture", "ADC Left Input", "ADC Left"},
 	{"Input Right Capture", "ADC Right Input", "ADC Right"},
@@ -819,21 +876,18 @@
 	{"XSPOUTR", NULL, "XSPR Output Mixer"},
 
 	/* Voice Capture */
-	{"VSPL Output Mixer", NULL, "Input Left Capture"},
-	{"VSPR Output Mixer", NULL, "Input Left Capture"},
+	{"VSP Output Mixer", NULL, "Input Left Capture"},
+	{"VSP Output Mixer", NULL, "Input Right Capture"},
 
-	{"VSPOUTL", "VSP-IP Volume", "VSPL Output Mixer"},
-	{"VSPOUTR", "VSP-IP Volume", "VSPR Output Mixer"},
+	{"VSPINOUT", "VSP-IP Volume", "VSP Output Mixer"},
 
-	{"VSPOUTL", NULL, "VSPL Output Mixer"},
-	{"VSPOUTR", NULL, "VSPR Output Mixer"},
+	{"VSPINOUT", NULL, "VSP Output Mixer"},
 
 	{"ASP Capture", NULL, "ASPOUTL"},
 	{"ASP Capture", NULL, "ASPOUTR"},
 	{"XSP Capture", NULL, "XSPOUTL"},
 	{"XSP Capture", NULL, "XSPOUTR"},
-	{"VSP Capture", NULL, "VSPOUTL"},
-	{"VSP Capture", NULL, "VSPOUTR"},
+	{"VSP Capture", NULL, "VSPINOUT"},
 };
 
 struct cs42l73_mclk_div {
@@ -1167,6 +1221,14 @@
 
 	case SND_SOC_BIAS_OFF:
 		snd_soc_update_bits(codec, CS42L73_PWRCTL1, PDN, 1);
+		if (cs42l73->shutdwn_delay > 0) {
+			mdelay(cs42l73->shutdwn_delay);
+			cs42l73->shutdwn_delay = 0;
+		} else {
+			mdelay(15); /* Min amount of time requred to power
+				     * down.
+				     */
+		}
 		snd_soc_update_bits(codec, CS42L73_DMMCC, MCLKDIS, 1);
 		break;
 	}

diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c
index 5be42bf..4068f24 100644
--- a/sound/soc/codecs/sigmadsp.c
+++ b/sound/soc/codecs/sigmadsp.c

@@ -225,7 +225,7 @@
 static int sigma_action_write_regmap(void *control_data,
 	const struct sigma_action *sa, size_t len)
 {
-	return regmap_raw_write(control_data, le16_to_cpu(sa->addr),
+	return regmap_raw_write(control_data, be16_to_cpu(sa->addr),
 		sa->payload, len - 2);
 }
 

diff --git a/sound/soc/codecs/tpa6130a2.c b/sound/soc/codecs/tpa6130a2.c
index 8d75aa1..c58bee8 100644
--- a/sound/soc/codecs/tpa6130a2.c
+++ b/sound/soc/codecs/tpa6130a2.c

@@ -398,7 +398,8 @@
 						TPA6130A2_MUTE_L;
 
 	if (data->power_gpio >= 0) {
-		ret = gpio_request(data->power_gpio, "tpa6130a2 enable");
+		ret = devm_gpio_request(dev, data->power_gpio,
+					"tpa6130a2 enable");
 		if (ret < 0) {
 			dev_err(dev, "Failed to request power GPIO (%d)\n",
 				data->power_gpio);
@@ -419,16 +420,16 @@
 		break;
 	}
 
-	data->supply = regulator_get(dev, regulator);
+	data->supply = devm_regulator_get(dev, regulator);
 	if (IS_ERR(data->supply)) {
 		ret = PTR_ERR(data->supply);
 		dev_err(dev, "Failed to request supply: %d\n", ret);
-		goto err_regulator;
+		goto err_gpio;
 	}
 
 	ret = tpa6130a2_power(1);
 	if (ret != 0)
-		goto err_power;
+		goto err_gpio;
 
 
 	/* Read version */
@@ -440,15 +441,10 @@
 	/* Disable the chip */
 	ret = tpa6130a2_power(0);
 	if (ret != 0)
-		goto err_power;
+		goto err_gpio;
 
 	return 0;
 
-err_power:
-	regulator_put(data->supply);
-err_regulator:
-	if (data->power_gpio >= 0)
-		gpio_free(data->power_gpio);
 err_gpio:
 	tpa6130a2_client = NULL;
 
@@ -457,14 +453,7 @@
 
 static int tpa6130a2_remove(struct i2c_client *client)
 {
-	struct tpa6130a2_data *data = i2c_get_clientdata(client);
-
 	tpa6130a2_power(0);
-
-	if (data->power_gpio >= 0)
-		gpio_free(data->power_gpio);
-
-	regulator_put(data->supply);
 	tpa6130a2_client = NULL;
 
 	return 0;

diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index 967d0e1..5fbfb06 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c

@@ -113,7 +113,7 @@
 					SNDRV_PCM_STREAM_PLAYBACK,
 					SND_SOC_DAPM_STREAM_STOP);
 		} else
-			codec_dai->pop_wait = 1;
+			rtd->pop_wait = 1;
 			schedule_delayed_work(&rtd->delayed_work,
 				msecs_to_jiffies(rtd->pmdown_time));
 	} else {

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 9c768bc..91d592f 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c

@@ -4155,9 +4155,9 @@
 		ret = of_property_read_string_index(np, propname,
 			2 * i, &routes[i].sink);
 		if (ret) {
-			dev_err(card->dev, "ASoC: Property '%s' index %d"
-				" could not be read: %d\n", propname, 2 * i,
-				ret);
+			dev_err(card->dev,
+				"ASoC: Property '%s' index %d could not be read: %d\n",
+				propname, 2 * i, ret);
 			kfree(routes);
 			return -EINVAL;
 		}
@@ -4165,8 +4165,8 @@
 			(2 * i) + 1, &routes[i].source);
 		if (ret) {
 			dev_err(card->dev,
-				"ASoC: Property '%s' index %d could not be"
-				" read: %d\n", propname, (2 * i) + 1, ret);
+				"ASoC: Property '%s' index %d could not be read: %d\n",
+				propname, (2 * i) + 1, ret);
 			kfree(routes);
 			return -EINVAL;
 		}

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 5c3ca2a..d7711fc 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c

@@ -334,11 +334,11 @@
 	dev_dbg(rtd->dev, "ASoC: pop wq checking: %s status: %s waiting: %s\n",
 		 codec_dai->driver->playback.stream_name,
 		 codec_dai->playback_active ? "active" : "inactive",
-		 codec_dai->pop_wait ? "yes" : "no");
+		 rtd->pop_wait ? "yes" : "no");
 
 	/* are we waiting on this codec DAI stream */
-	if (codec_dai->pop_wait == 1) {
-		codec_dai->pop_wait = 0;
+	if (rtd->pop_wait == 1) {
+		rtd->pop_wait = 0;
 		snd_soc_dapm_stream_event(rtd, SNDRV_PCM_STREAM_PLAYBACK,
 					  SND_SOC_DAPM_STREAM_STOP);
 	}
@@ -408,7 +408,7 @@
 						  SND_SOC_DAPM_STREAM_STOP);
 		} else {
 			/* start delayed pop wq here for playback streams */
-			codec_dai->pop_wait = 1;
+			rtd->pop_wait = 1;
 			schedule_delayed_work(&rtd->delayed_work,
 				msecs_to_jiffies(rtd->pmdown_time));
 		}
@@ -480,8 +480,8 @@
 
 	/* cancel any delayed stream shutdown that is pending */
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
-	    codec_dai->pop_wait) {
-		codec_dai->pop_wait = 0;
+	    rtd->pop_wait) {
+		rtd->pop_wait = 0;
 		cancel_delayed_work(&rtd->delayed_work);
 	}
 

diff --git a/sound/sound_core.c b/sound/sound_core.c
index fb9255c..bb23009 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c

@@ -146,8 +146,7 @@
  * devices only the standard chrdev aliases are requested.
  *
  * All these clutters are scheduled to be removed along with
- * sound-slot/service-* module aliases.  Please take a look at
- * feature-removal-schedule.txt for details.
+ * sound-slot/service-* module aliases.
  */
 #ifdef CONFIG_SOUND_OSS_CORE_PRECLAIM
 static int preclaim_oss = 1;

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 34b9bb7..c183d34 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c

@@ -2181,6 +2181,10 @@
 		umidi->usb_protocol_ops = &snd_usbmidi_novation_ops;
 		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
 		break;
+	case QUIRK_MIDI_MBOX2:
+		umidi->usb_protocol_ops = &snd_usbmidi_midiman_ops;
+		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
+		break;
 	case QUIRK_MIDI_RAW_BYTES:
 		umidi->usb_protocol_ops = &snd_usbmidi_raw_ops;
 		/*

diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 49f9af9..cdcf6b4 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h

@@ -99,6 +99,42 @@
 },
 
 /*
+ * HP Wireless Audio
+ * When not ignored, causes instability issues for some users, forcing them to
+ * blacklist the entire module.
+ */
+{
+	USB_DEVICE(0x0424, 0xb832),
+	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+		.vendor_name = "Standard Microsystems Corp.",
+		.product_name = "HP Wireless Audio",
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_COMPOSITE,
+		.data = (const struct snd_usb_audio_quirk[]) {
+			/* Mixer */
+			{
+				.ifnum = 0,
+				.type = QUIRK_IGNORE_INTERFACE,
+			},
+			/* Playback */
+			{
+				.ifnum = 1,
+				.type = QUIRK_IGNORE_INTERFACE,
+			},
+			/* Capture */
+			{
+				.ifnum = 2,
+				.type = QUIRK_IGNORE_INTERFACE,
+			},
+			/* HID Device, .ifnum = 3 */
+			{
+				.ifnum = -1,
+			}
+		}
+	}
+},
+
+/*
  * Logitech QuickCam: bDeviceClass is vendor-specific, so generic interface
  * class matches do not take effect without an explicit ID match.
  */
@@ -2885,6 +2921,93 @@
 
 	}
 },
+
+/* DIGIDESIGN MBOX 2 */
+{
+	USB_DEVICE(0x0dba, 0x3000),
+	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+		.vendor_name = "Digidesign",
+		.product_name = "Mbox 2",
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_COMPOSITE,
+		.data = (const struct snd_usb_audio_quirk[]) {
+			{
+				.ifnum = 0,
+				.type = QUIRK_IGNORE_INTERFACE
+			},
+			{
+				.ifnum = 1,
+				.type = QUIRK_IGNORE_INTERFACE
+			},
+			{
+				.ifnum = 2,
+				.type = QUIRK_AUDIO_FIXED_ENDPOINT,
+				.data = &(const struct audioformat) {
+					.formats = SNDRV_PCM_FMTBIT_S24_3BE,
+					.channels = 2,
+					.iface = 2,
+					.altsetting = 2,
+					.altset_idx = 1,
+					.attributes = 0x00,
+					.endpoint = 0x03,
+					.ep_attr = USB_ENDPOINT_SYNC_ASYNC,
+					.maxpacksize = 0x128,
+					.rates = SNDRV_PCM_RATE_48000,
+					.rate_min = 48000,
+					.rate_max = 48000,
+					.nr_rates = 1,
+					.rate_table = (unsigned int[]) {
+						48000
+					}
+				}
+			},
+			{
+				.ifnum = 3,
+				.type = QUIRK_IGNORE_INTERFACE
+			},
+			{
+				.ifnum = 4,
+				.type = QUIRK_AUDIO_FIXED_ENDPOINT,
+				.data = &(const struct audioformat) {
+				.formats = SNDRV_PCM_FMTBIT_S24_3BE,
+					.channels = 2,
+					.iface = 4,
+					.altsetting = 2,
+					.altset_idx = 1,
+					.attributes = UAC_EP_CS_ATTR_SAMPLE_RATE,
+					.endpoint = 0x85,
+					.ep_attr = USB_ENDPOINT_SYNC_SYNC,
+					.maxpacksize = 0x128,
+					.rates = SNDRV_PCM_RATE_48000,
+					.rate_min = 48000,
+					.rate_max = 48000,
+					.nr_rates = 1,
+					.rate_table = (unsigned int[]) {
+						48000
+					}
+				}
+			},
+			{
+				.ifnum = 5,
+				.type = QUIRK_IGNORE_INTERFACE
+			},
+			{
+				.ifnum = 6,
+				.type = QUIRK_MIDI_MBOX2,
+				.data = &(const struct snd_usb_midi_endpoint_info) {
+					.out_ep =  0x02,
+					.out_cables = 0x0001,
+					.in_ep = 0x81,
+					.in_interval = 0x01,
+					.in_cables = 0x0001
+				}
+			},
+			{
+				.ifnum = -1
+			}
+		}
+	}
+},
 {
 	/* Tascam US122 MKII - playback-only support */
 	.match_flags = USB_DEVICE_ID_MATCH_DEVICE,

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 007fcec..f104c68 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c

@@ -306,6 +306,7 @@
 		[QUIRK_MIDI_YAMAHA] = create_any_midi_quirk,
 		[QUIRK_MIDI_MIDIMAN] = create_any_midi_quirk,
 		[QUIRK_MIDI_NOVATION] = create_any_midi_quirk,
+		[QUIRK_MIDI_MBOX2] = create_any_midi_quirk,
 		[QUIRK_MIDI_RAW_BYTES] = create_any_midi_quirk,
 		[QUIRK_MIDI_EMAGIC] = create_any_midi_quirk,
 		[QUIRK_MIDI_CME] = create_any_midi_quirk,
@@ -497,6 +498,92 @@
 	return -EAGAIN;
 }
 
+static void mbox2_setup_48_24_magic(struct usb_device *dev)
+{
+	u8 srate[3];
+	u8 temp[12];
+
+	/* Choose 48000Hz permanently */
+	srate[0] = 0x80;
+	srate[1] = 0xbb;
+	srate[2] = 0x00;
+
+	/* Send the magic! */
+	snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0),
+		0x01, 0x22, 0x0100, 0x0085, &temp, 0x0003);
+	snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0),
+		0x81, 0xa2, 0x0100, 0x0085, &srate, 0x0003);
+	snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0),
+		0x81, 0xa2, 0x0100, 0x0086, &srate, 0x0003);
+	snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0),
+		0x81, 0xa2, 0x0100, 0x0003, &srate, 0x0003);
+	return;
+}
+
+/* Digidesign Mbox 2 needs to load firmware onboard
+ * and driver must wait a few seconds for initialisation.
+ */
+
+#define MBOX2_FIRMWARE_SIZE    646
+#define MBOX2_BOOT_LOADING     0x01 /* Hard coded into the device */
+#define MBOX2_BOOT_READY       0x02 /* Hard coded into the device */
+
+int snd_usb_mbox2_boot_quirk(struct usb_device *dev)
+{
+	struct usb_host_config *config = dev->actconfig;
+	int err;
+	u8 bootresponse;
+	int fwsize;
+	int count;
+
+	fwsize = le16_to_cpu(get_cfg_desc(config)->wTotalLength);
+
+	if (fwsize != MBOX2_FIRMWARE_SIZE) {
+		snd_printk(KERN_ERR "usb-audio: Invalid firmware size=%d.\n", fwsize);
+		return -ENODEV;
+	}
+
+	snd_printd("usb-audio: Sending Digidesign Mbox 2 boot sequence...\n");
+
+	count = 0;
+	bootresponse = MBOX2_BOOT_LOADING;
+	while ((bootresponse == MBOX2_BOOT_LOADING) && (count < 10)) {
+		msleep(500); /* 0.5 second delay */
+		snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0),
+			/* Control magic - load onboard firmware */
+			0x85, 0xc0, 0x0001, 0x0000, &bootresponse, 0x0012);
+		if (bootresponse == MBOX2_BOOT_READY)
+			break;
+		snd_printd("usb-audio: device not ready, resending boot sequence...\n");
+		count++;
+	}
+
+	if (bootresponse != MBOX2_BOOT_READY) {
+		snd_printk(KERN_ERR "usb-audio: Unknown bootresponse=%d, or timed out, ignoring device.\n", bootresponse);
+		return -ENODEV;
+	}
+
+	snd_printdd("usb-audio: device initialised!\n");
+
+	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0,
+		&dev->descriptor, sizeof(dev->descriptor));
+	config = dev->actconfig;
+	if (err < 0)
+		snd_printd("error usb_get_descriptor: %d\n", err);
+
+	err = usb_reset_configuration(dev);
+	if (err < 0)
+		snd_printd("error usb_reset_configuration: %d\n", err);
+	snd_printdd("mbox2_boot: new boot length = %d\n",
+		le16_to_cpu(get_cfg_desc(config)->wTotalLength));
+
+	mbox2_setup_48_24_magic(dev);
+
+	snd_printk(KERN_INFO "usb-audio: Digidesign Mbox 2: 24bit 48kHz");
+
+	return 0; /* Successful boot */
+}
+
 /*
  * Setup quirks
  */
@@ -655,6 +742,10 @@
 	case USB_ID(0x0ccd, 0x00b1): /* Terratec Aureon 7.1 USB */
 		return snd_usb_cm6206_boot_quirk(dev);
 
+	case USB_ID(0x0dba, 0x3000):
+		/* Digidesign Mbox 2 */
+		return snd_usb_mbox2_boot_quirk(dev);
+
 	case USB_ID(0x133e, 0x0815):
 		/* Access Music VirusTI Desktop */
 		return snd_usb_accessmusic_boot_quirk(dev);

diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 1ac3fd9..a8172c1 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h

@@ -76,6 +76,7 @@
 	QUIRK_MIDI_YAMAHA,
 	QUIRK_MIDI_MIDIMAN,
 	QUIRK_MIDI_NOVATION,
+	QUIRK_MIDI_MBOX2,
 	QUIRK_MIDI_RAW_BYTES,
 	QUIRK_MIDI_EMAGIC,
 	QUIRK_MIDI_CME,

diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index fd2f922..07a0345 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c

@@ -179,29 +179,6 @@
 #define wmb() __asm__ __volatile__("" : : : "memory")
 #define mb() __asm__ __volatile__("" : : : "memory")
 
-/*
- * Convert an iovec element to the given type.
- *
- * This is a fairly ugly trick: we need to know the size of the type and
- * alignment requirement to check the pointer is kosher.  It's also nice to
- * have the name of the type in case we report failure.
- *
- * Typing those three things all the time is cumbersome and error prone, so we
- * have a macro which sets them all up and passes to the real function.
- */
-#define convert(iov, type) \
-	((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
-
-static void *_convert(struct iovec *iov, size_t size, size_t align,
-		      const char *name)
-{
-	if (iov->iov_len != size)
-		errx(1, "Bad iovec size %zu for %s", iov->iov_len, name);
-	if ((unsigned long)iov->iov_base % align != 0)
-		errx(1, "Bad alignment %p for %s", iov->iov_base, name);
-	return iov->iov_base;
-}
-
 /* Wrapper for the last available index.  Makes it easier to change. */
 #define lg_last_avail(vq)	((vq)->last_avail_idx)
 
@@ -228,7 +205,8 @@
 }
 
 /* Take len bytes from the front of this iovec. */
-static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
+static void iov_consume(struct iovec iov[], unsigned num_iov,
+			void *dest, unsigned len)
 {
 	unsigned int i;
 
@@ -236,11 +214,16 @@
 		unsigned int used;
 
 		used = iov[i].iov_len < len ? iov[i].iov_len : len;
+		if (dest) {
+			memcpy(dest, iov[i].iov_base, used);
+			dest += used;
+		}
 		iov[i].iov_base += used;
 		iov[i].iov_len -= used;
 		len -= used;
 	}
-	assert(len == 0);
+	if (len != 0)
+		errx(1, "iovec too short!");
 }
 
 /* The device virtqueue descriptors are followed by feature bitmasks. */
@@ -864,7 +847,7 @@
 			warn("Write to stdout gave %i (%d)", len, errno);
 			break;
 		}
-		iov_consume(iov, out, len);
+		iov_consume(iov, out, NULL, len);
 	}
 
 	/*
@@ -1591,9 +1574,9 @@
 {
 	struct vblk_info *vblk = vq->dev->priv;
 	unsigned int head, out_num, in_num, wlen;
-	int ret;
+	int ret, i;
 	u8 *in;
-	struct virtio_blk_outhdr *out;
+	struct virtio_blk_outhdr out;
 	struct iovec iov[vq->vring.num];
 	off64_t off;
 
@@ -1603,32 +1586,36 @@
 	 */
 	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 
-	/*
-	 * Every block request should contain at least one output buffer
-	 * (detailing the location on disk and the type of request) and one
-	 * input buffer (to hold the result).
-	 */
-	if (out_num == 0 || in_num == 0)
-		errx(1, "Bad virtblk cmd %u out=%u in=%u",
-		     head, out_num, in_num);
+	/* Copy the output header from the front of the iov (adjusts iov) */
+	iov_consume(iov, out_num, &out, sizeof(out));
 
-	out = convert(&iov[0], struct virtio_blk_outhdr);
-	in = convert(&iov[out_num+in_num-1], u8);
+	/* Find and trim end of iov input array, for our status byte. */
+	in = NULL;
+	for (i = out_num + in_num - 1; i >= out_num; i--) {
+		if (iov[i].iov_len > 0) {
+			in = iov[i].iov_base + iov[i].iov_len - 1;
+			iov[i].iov_len--;
+			break;
+		}
+	}
+	if (!in)
+		errx(1, "Bad virtblk cmd with no room for status");
+
 	/*
 	 * For historical reasons, block operations are expressed in 512 byte
 	 * "sectors".
 	 */
-	off = out->sector * 512;
+	off = out.sector * 512;
 
 	/*
 	 * In general the virtio block driver is allowed to try SCSI commands.
 	 * It'd be nice if we supported eject, for example, but we don't.
 	 */
-	if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
+	if (out.type & VIRTIO_BLK_T_SCSI_CMD) {
 		fprintf(stderr, "Scsi commands unsupported\n");
 		*in = VIRTIO_BLK_S_UNSUPP;
 		wlen = sizeof(*in);
-	} else if (out->type & VIRTIO_BLK_T_OUT) {
+	} else if (out.type & VIRTIO_BLK_T_OUT) {
 		/*
 		 * Write
 		 *
@@ -1636,10 +1623,10 @@
 		 * if they try to write past end.
 		 */
 		if (lseek64(vblk->fd, off, SEEK_SET) != off)
-			err(1, "Bad seek to sector %llu", out->sector);
+			err(1, "Bad seek to sector %llu", out.sector);
 
-		ret = writev(vblk->fd, iov+1, out_num-1);
-		verbose("WRITE to sector %llu: %i\n", out->sector, ret);
+		ret = writev(vblk->fd, iov, out_num);
+		verbose("WRITE to sector %llu: %i\n", out.sector, ret);
 
 		/*
 		 * Grr... Now we know how long the descriptor they sent was, we
@@ -1655,7 +1642,7 @@
 
 		wlen = sizeof(*in);
 		*in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
-	} else if (out->type & VIRTIO_BLK_T_FLUSH) {
+	} else if (out.type & VIRTIO_BLK_T_FLUSH) {
 		/* Flush */
 		ret = fdatasync(vblk->fd);
 		verbose("FLUSH fdatasync: %i\n", ret);
@@ -1669,10 +1656,9 @@
 		 * if they try to read past end.
 		 */
 		if (lseek64(vblk->fd, off, SEEK_SET) != off)
-			err(1, "Bad seek to sector %llu", out->sector);
+			err(1, "Bad seek to sector %llu", out.sector);
 
-		ret = readv(vblk->fd, iov+1, in_num-1);
-		verbose("READ from sector %llu: %i\n", out->sector, ret);
+		ret = readv(vblk->fd, iov + out_num, in_num);
 		if (ret >= 0) {
 			wlen = sizeof(*in) + ret;
 			*in = VIRTIO_BLK_S_OK;
@@ -1758,7 +1744,7 @@
 		len = readv(rng_info->rfd, iov, in_num);
 		if (len <= 0)
 			err(1, "Read from /dev/random gave %i", len);
-		iov_consume(iov, in_num, len);
+		iov_consume(iov, in_num, NULL, len);
 		totlen += len;
 	}
 

diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index f856495..f09641d 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile

@@ -1,9 +1,22 @@
+CC		= $(CROSS_COMPILE)gcc
+BUILD_OUTPUT	:= $(PWD)
+PREFIX		:= /usr
+DESTDIR		:=
+
 turbostat : turbostat.c
 CFLAGS +=	-Wall
+CFLAGS +=	-I../../../../arch/x86/include/uapi/
 
+%: %.c
+	@mkdir -p $(BUILD_OUTPUT)
+	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
+
+.PHONY : clean
 clean :
-	rm -f turbostat
+	@rm -f $(BUILD_OUTPUT)/turbostat
 
-install :
-	install turbostat /usr/bin/turbostat
-	install turbostat.8 /usr/share/man/man8
+install : turbostat
+	install -d  $(DESTDIR)$(PREFIX)/bin
+	install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat
+	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
+	install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index e4d0690..0d7dc2c 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8

@@ -11,16 +11,16 @@
 .RB [ Options ]
 .RB [ "\-i interval_sec" ]
 .SH DESCRIPTION
-\fBturbostat \fP reports processor topology, frequency
-and idle power state statistics on modern X86 processors.
+\fBturbostat \fP reports processor topology, frequency,
+idle power-state statistics, temperature and power on modern X86 processors.
 Either \fBcommand\fP is forked and statistics are printed
 upon its completion, or statistics are printed periodically.
 
 \fBturbostat \fP
-requires that the processor
+must be run on root, and
+minimally requires that the processor
 supports an "invariant" TSC, plus the APERF and MPERF MSRs.
-\fBturbostat \fP will report idle cpu power state residency
-on processors that additionally support C-state residency counters.
+Additional information is reported depending on hardware counter support.
 
 .SS Options
 The \fB-p\fP option limits output to the 1st thread in 1st core of each package.
@@ -57,7 +57,15 @@
 \fBGHz\fP average clock rate while the CPU was in c0 state.
 \fBTSC\fP average GHz that the TSC ran during the entire interval.
 \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states.
+\fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
+\fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
 \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states.
+\fBPkg_W\fP Watts consumed by the whole package.
+\fBCor_W\fP Watts consumed by the core part of the package.
+\fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors.
+\fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
+\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.
+\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
 .fi
 .PP
 .SH EXAMPLE
@@ -66,50 +74,73 @@
 for turbostat to fork).
 
 The first row of statistics is a summary for the entire system.
-Note that the summary is a weighted average.
+For residency % columns, the summary is a weighted average.
+For Temperature columns, the summary is the column maximum.
+For Watts columns, the summary is a system total.
 Subsequent rows show per-CPU statistics.
 
 .nf
-[root@x980]# ./turbostat
-cor CPU    %c0  GHz  TSC    %c1    %c3    %c6   %pc3   %pc6
-          0.09 1.62 3.38   1.83   0.32  97.76   1.26  83.61
-  0   0   0.15 1.62 3.38  10.23   0.05  89.56   1.26  83.61
-  0   6   0.05 1.62 3.38  10.34
-  1   2   0.03 1.62 3.38   0.07   0.05  99.86
-  1   8   0.03 1.62 3.38   0.06
-  2   4   0.21 1.62 3.38   0.10   1.49  98.21
-  2  10   0.02 1.62 3.38   0.29
-  8   1   0.04 1.62 3.38   0.04   0.08  99.84
-  8   7   0.01 1.62 3.38   0.06
-  9   3   0.53 1.62 3.38   0.10   0.20  99.17
-  9   9   0.02 1.62 3.38   0.60
- 10   5   0.01 1.62 3.38   0.02   0.04  99.92
- 10  11   0.02 1.62 3.38   0.02
+[root@sandy]# ./turbostat
+cor CPU    %c0  GHz  TSC    %c1    %c3    %c6    %c7 CTMP PTMP   %pc2   %pc3   %pc6   %pc7  Pkg_W  Cor_W GFX_W
+          0.06 0.80 2.29   0.11   0.00   0.00  99.83   47   40   0.26   0.01   0.44  98.78   3.49   0.12  0.14
+  0   0   0.07 0.80 2.29   0.07   0.00   0.00  99.86   40   40   0.26   0.01   0.44  98.78   3.49   0.12  0.14
+  0   4   0.03 0.80 2.29   0.12
+  1   1   0.04 0.80 2.29   0.25   0.01   0.00  99.71   40
+  1   5   0.16 0.80 2.29   0.13
+  2   2   0.05 0.80 2.29   0.06   0.01   0.00  99.88   40
+  2   6   0.03 0.80 2.29   0.08
+  3   3   0.05 0.80 2.29   0.08   0.00   0.00  99.87   47
+  3   7   0.04 0.84 2.29   0.09
 .fi
 .SH SUMMARY EXAMPLE
 The "-s" option prints the column headers just once,
 and then the one line system summary for each sample interval.
 
 .nf
-[root@x980]# ./turbostat -s
-   %c0  GHz  TSC    %c1    %c3    %c6   %pc3   %pc6
-  0.23 1.67 3.38   2.00   0.30  97.47   1.07  82.12
-  0.10 1.62 3.38   1.87   2.25  95.77  12.02  72.60
-  0.20 1.64 3.38   1.98   0.11  97.72   0.30  83.36
-  0.11 1.70 3.38   1.86   1.81  96.22   9.71  74.90
+[root@wsm]# turbostat -S
+   %c0  GHz  TSC    %c1    %c3    %c6 CTMP   %pc3   %pc6
+  1.40 2.81 3.38  10.78  43.47  44.35   42  13.67   2.09
+  1.34 2.90 3.38  11.48  58.96  28.23   41  19.89   0.15
+  1.55 2.72 3.38  26.73  37.66  34.07   42   2.53   2.80
+  1.37 2.83 3.38  16.95  60.05  21.63   42   5.76   0.20
 .fi
 .SH VERBOSE EXAMPLE
 The "-v" option adds verbosity to the output:
 
 .nf
-GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2)
-12 * 133 = 1600 MHz max efficiency
-25 * 133 = 3333 MHz TSC frequency
-26 * 133 = 3467 MHz max turbo 4 active cores
-26 * 133 = 3467 MHz max turbo 3 active cores
-27 * 133 = 3600 MHz max turbo 2 active cores
-27 * 133 = 3600 MHz max turbo 1 active cores
-
+[root@ivy]# turbostat -v
+turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org>
+CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9)
+CPUID(6): APERF, DTS, PTM, EPB
+RAPL: 851 sec. Joule Counter Range
+cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300
+16 * 100 = 1600 MHz max efficiency
+35 * 100 = 3500 MHz TSC frequency
+cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret)
+cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
+37 * 100 = 3700 MHz max turbo 4 active cores
+38 * 100 = 3800 MHz max turbo 3 active cores
+39 * 100 = 3900 MHz max turbo 2 active cores
+39 * 100 = 3900 MHz max turbo 1 active cores
+cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
+cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.)
+cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.)
+cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked)
+cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled)
+cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled)
+cpu0: MSR_PP0_POLICY: 0
+cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
+cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: MSR_PP1_POLICY: 0
+cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
+cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1)
+cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1)
+cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1)
+ ...
 .fi
 The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
 available at the minimum package voltage.  The \fBTSC frequency\fP is the nominal
@@ -142,7 +173,7 @@
  10   5   1.42 3.43 3.38   2.14  30.99  65.44
  10  11   0.16 2.88 3.38   3.40
 .fi
-Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit
+Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit
 while the other processors are generally in various states of idle.
 
 Note that cpu1 and cpu7 are HT siblings within core8.

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ea095ab..ce6d460 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c

@@ -20,6 +20,7 @@
  */
 
 #define _GNU_SOURCE
+#include <asm/msr.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <sys/types.h>
@@ -35,28 +36,18 @@
 #include <ctype.h>
 #include <sched.h>
 
-#define MSR_NEHALEM_PLATFORM_INFO	0xCE
-#define MSR_NEHALEM_TURBO_RATIO_LIMIT	0x1AD
-#define MSR_IVT_TURBO_RATIO_LIMIT	0x1AE
-#define MSR_APERF	0xE8
-#define MSR_MPERF	0xE7
-#define MSR_PKG_C2_RESIDENCY	0x60D	/* SNB only */
-#define MSR_PKG_C3_RESIDENCY	0x3F8
-#define MSR_PKG_C6_RESIDENCY	0x3F9
-#define MSR_PKG_C7_RESIDENCY	0x3FA	/* SNB only */
-#define MSR_CORE_C3_RESIDENCY	0x3FC
-#define MSR_CORE_C6_RESIDENCY	0x3FD
-#define MSR_CORE_C7_RESIDENCY	0x3FE	/* SNB only */
-
 char *proc_stat = "/proc/stat";
 unsigned int interval_sec = 5;	/* set with -i interval_sec */
 unsigned int verbose;		/* set with -v */
+unsigned int rapl_verbose;	/* set with -R */
+unsigned int thermal_verbose;	/* set with -T */
 unsigned int summary_only;	/* set with -s */
 unsigned int skip_c0;
 unsigned int skip_c1;
 unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
 unsigned int has_aperf;
+unsigned int has_epb;
 unsigned int units = 1000000000;	/* Ghz etc */
 unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
@@ -74,6 +65,23 @@
 unsigned int show_pkg_only;
 unsigned int show_core_only;
 char *output_buffer, *outp;
+unsigned int do_rapl;
+unsigned int do_dts;
+unsigned int do_ptm;
+unsigned int tcc_activation_temp;
+unsigned int tcc_activation_temp_override;
+double rapl_power_units, rapl_energy_units, rapl_time_units;
+double rapl_joule_counter_range;
+
+#define RAPL_PKG	(1 << 0)
+#define RAPL_CORES	(1 << 1)
+#define RAPL_GFX	(1 << 2)
+#define RAPL_DRAM	(1 << 3)
+#define RAPL_PKG_PERF_STATUS	(1 << 4)
+#define RAPL_DRAM_PERF_STATUS	(1 << 5)
+#define	TJMAX_DEFAULT	100
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
 
 int aperf_mperf_unstable;
 int backwards_count;
@@ -101,6 +109,7 @@
 	unsigned long long c3;
 	unsigned long long c6;
 	unsigned long long c7;
+	unsigned int core_temp_c;
 	unsigned int core_id;
 } *core_even, *core_odd;
 
@@ -110,6 +119,14 @@
 	unsigned long long pc6;
 	unsigned long long pc7;
 	unsigned int package_id;
+	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
+	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
+	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
+	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
+	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
+	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
+	unsigned int pkg_temp_c;
+
 } *package_even, *package_odd;
 
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -247,6 +264,12 @@
 		outp += sprintf(outp, "    %%c6");
 	if (do_snb_cstates)
 		outp += sprintf(outp, "    %%c7");
+
+	if (do_dts)
+		outp += sprintf(outp, " CTMP");
+	if (do_ptm)
+		outp += sprintf(outp, " PTMP");
+
 	if (do_snb_cstates)
 		outp += sprintf(outp, "   %%pc2");
 	if (do_nhm_cstates)
@@ -256,6 +279,19 @@
 	if (do_snb_cstates)
 		outp += sprintf(outp, "   %%pc7");
 
+	if (do_rapl & RAPL_PKG)
+		outp += sprintf(outp, "  Pkg_W");
+	if (do_rapl & RAPL_CORES)
+		outp += sprintf(outp, "  Cor_W");
+	if (do_rapl & RAPL_GFX)
+		outp += sprintf(outp, " GFX_W");
+	if (do_rapl & RAPL_DRAM)
+		outp += sprintf(outp, " RAM_W");
+	if (do_rapl & RAPL_PKG_PERF_STATUS)
+		outp += sprintf(outp, " PKG_%%");
+	if (do_rapl & RAPL_DRAM_PERF_STATUS)
+		outp += sprintf(outp, " RAM_%%");
+
 	outp += sprintf(outp, "\n");
 }
 
@@ -285,6 +321,7 @@
 		fprintf(stderr, "c3: %016llX\n", c->c3);
 		fprintf(stderr, "c6: %016llX\n", c->c6);
 		fprintf(stderr, "c7: %016llX\n", c->c7);
+		fprintf(stderr, "DTS: %dC\n", c->core_temp_c);
 	}
 
 	if (p) {
@@ -293,6 +330,13 @@
 		fprintf(stderr, "pc3: %016llX\n", p->pc3);
 		fprintf(stderr, "pc6: %016llX\n", p->pc6);
 		fprintf(stderr, "pc7: %016llX\n", p->pc7);
+		fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg);
+		fprintf(stderr, "Joules COR: %0X\n", p->energy_cores);
+		fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx);
+		fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram);
+		fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status);
+		fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status);
+		fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c);
 	}
 	return 0;
 }
@@ -302,14 +346,21 @@
  * package: "pk" 2 columns %2d
  * core: "cor" 3 columns %3d
  * CPU: "CPU" 3 columns %3d
+ * Pkg_W: %6.2
+ * Cor_W: %6.2
+ * GFX_W: %5.2
+ * RAM_W: %5.2
  * GHz: "GHz" 3 columns %3.2
  * TSC: "TSC" 3 columns %3.2
  * percentage " %pc3" %6.2
+ * Perf Status percentage: %5.2
+ * "CTMP" 4 columns %4d
  */
 int format_counters(struct thread_data *t, struct core_data *c,
 	struct pkg_data *p)
 {
 	double interval_float;
+	char *fmt5, *fmt6;
 
 	 /* if showing only 1st thread in core and this isn't one, bail out */
 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -349,7 +400,6 @@
 		if (show_cpu)
 			outp += sprintf(outp, " %3d", t->cpu_id);
 	}
-
 	/* %c0 */
 	if (do_nhm_cstates) {
 		if (show_pkg || show_core || show_cpu)
@@ -414,10 +464,16 @@
 	if (do_snb_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc);
 
+	if (do_dts)
+		outp += sprintf(outp, " %4d", c->core_temp_c);
+
 	/* print per-package data only for 1st core in package */
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		goto done;
 
+	if (do_ptm)
+		outp += sprintf(outp, " %4d", p->pkg_temp_c);
+
 	if (do_snb_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
 	if (do_nhm_cstates)
@@ -426,6 +482,32 @@
 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
 	if (do_snb_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
+
+	/*
+ 	 * If measurement interval exceeds minimum RAPL Joule Counter range,
+ 	 * indicate that results are suspect by printing "**" in fraction place.
+ 	 */
+	if (interval_float < rapl_joule_counter_range) {
+		fmt5 = " %5.2f";
+		fmt6 = " %6.2f";
+	} else {
+		fmt5 = " %3.0f**";
+		fmt6 = " %4.0f**";
+	}
+
+	if (do_rapl & RAPL_PKG)
+		outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float);
+	if (do_rapl & RAPL_CORES)
+		outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float);
+	if (do_rapl & RAPL_GFX)
+		outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); 
+	if (do_rapl & RAPL_DRAM)
+		outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float);
+	if (do_rapl & RAPL_PKG_PERF_STATUS )
+		outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+	if (do_rapl & RAPL_DRAM_PERF_STATUS )
+		outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+
 done:
 	outp += sprintf(outp, "\n");
 
@@ -435,6 +517,7 @@
 void flush_stdout()
 {
 	fputs(output_buffer, stdout);
+	fflush(stdout);
 	outp = output_buffer;
 }
 void flush_stderr()
@@ -461,6 +544,13 @@
 	for_all_cpus(format_counters, t, c, p);
 }
 
+#define DELTA_WRAP32(new, old)			\
+	if (new > old) {			\
+		old = new - old;		\
+	} else {				\
+		old = 0x100000000 + new - old;	\
+	}
+
 void
 delta_package(struct pkg_data *new, struct pkg_data *old)
 {
@@ -468,6 +558,14 @@
 	old->pc3 = new->pc3 - old->pc3;
 	old->pc6 = new->pc6 - old->pc6;
 	old->pc7 = new->pc7 - old->pc7;
+	old->pkg_temp_c = new->pkg_temp_c;
+
+	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
+	DELTA_WRAP32(new->energy_cores, old->energy_cores);
+	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
+	DELTA_WRAP32(new->energy_dram, old->energy_dram);
+	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
+	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
 }
 
 void
@@ -476,6 +574,7 @@
 	old->c3 = new->c3 - old->c3;
 	old->c6 = new->c6 - old->c6;
 	old->c7 = new->c7 - old->c7;
+	old->core_temp_c = new->core_temp_c;
 }
 
 /*
@@ -582,11 +681,20 @@
 	c->c3 = 0;
 	c->c6 = 0;
 	c->c7 = 0;
+	c->core_temp_c = 0;
 
 	p->pc2 = 0;
 	p->pc3 = 0;
 	p->pc6 = 0;
 	p->pc7 = 0;
+
+	p->energy_pkg = 0;
+	p->energy_dram = 0;
+	p->energy_cores = 0;
+	p->energy_gfx = 0;
+	p->rapl_pkg_perf_status = 0;
+	p->rapl_dram_perf_status = 0;
+	p->pkg_temp_c = 0;
 }
 int sum_counters(struct thread_data *t, struct core_data *c,
 	struct pkg_data *p)
@@ -607,6 +715,8 @@
 	average.cores.c6 += c->c6;
 	average.cores.c7 += c->c7;
 
+	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
+
 	/* sum per-pkg values only for 1st core in pkg */
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
@@ -616,6 +726,15 @@
 	average.packages.pc6 += p->pc6;
 	average.packages.pc7 += p->pc7;
 
+	average.packages.energy_pkg += p->energy_pkg;
+	average.packages.energy_dram += p->energy_dram;
+	average.packages.energy_cores += p->energy_cores;
+	average.packages.energy_gfx += p->energy_gfx;
+
+	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
+
+	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
+	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
 	return 0;
 }
 /*
@@ -667,23 +786,26 @@
 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
 	int cpu = t->cpu_id;
+	unsigned long long msr;
 
-	if (cpu_migrate(cpu))
+	if (cpu_migrate(cpu)) {
+		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
 		return -1;
+	}
 
 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
 
 	if (has_aperf) {
-		if (get_msr(cpu, MSR_APERF, &t->aperf))
+		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
 			return -3;
-		if (get_msr(cpu, MSR_MPERF, &t->mperf))
+		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
 			return -4;
 	}
 
 	if (extra_delta_offset32) {
-		if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32))
+		if (get_msr(cpu, extra_delta_offset32, &msr))
 			return -5;
-		t->extra_delta32 &= 0xFFFFFFFF;
+		t->extra_delta32 = msr & 0xFFFFFFFF;
 	}
 
 	if (extra_delta_offset64)
@@ -691,9 +813,9 @@
 			return -5;
 
 	if (extra_msr_offset32) {
-		if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32))
+		if (get_msr(cpu, extra_msr_offset32, &msr))
 			return -5;
-		t->extra_msr32 &= 0xFFFFFFFF;
+		t->extra_msr32 = msr & 0xFFFFFFFF;
 	}
 
 	if (extra_msr_offset64)
@@ -715,6 +837,13 @@
 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
 			return -8;
 
+	if (do_dts) {
+		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+			return -9;
+		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
+	}
+
+
 	/* collect package counters only for 1st core in package */
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
@@ -731,6 +860,41 @@
 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
 			return -12;
 	}
+	if (do_rapl & RAPL_PKG) {
+		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
+			return -13;
+		p->energy_pkg = msr & 0xFFFFFFFF;
+	}
+	if (do_rapl & RAPL_CORES) {
+		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
+			return -14;
+		p->energy_cores = msr & 0xFFFFFFFF;
+	}
+	if (do_rapl & RAPL_DRAM) {
+		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
+			return -15;
+		p->energy_dram = msr & 0xFFFFFFFF;
+	}
+	if (do_rapl & RAPL_GFX) {
+		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
+			return -16;
+		p->energy_gfx = msr & 0xFFFFFFFF;
+	}
+	if (do_rapl & RAPL_PKG_PERF_STATUS) {
+		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
+			return -16;
+		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
+	}
+	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
+			return -16;
+		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
+	}
+	if (do_ptm) {
+		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+			return -17;
+		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
+	}
 	return 0;
 }
 
@@ -742,10 +906,10 @@
 	if (!do_nehalem_platform_info)
 		return;
 
-	get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr);
+	get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
 
-	if (verbose > 1)
-		fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
+	if (verbose)
+		fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
 
 	ratio = (msr >> 40) & 0xFF;
 	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
@@ -760,8 +924,8 @@
 
 	get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
 
-	if (verbose > 1)
-		fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr);
+	if (verbose)
+		fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
@@ -804,14 +968,56 @@
 			ratio, bclk, ratio * bclk);
 
 print_nhm_turbo_ratio_limits:
+	get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+
+#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
+
+	fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
+
+	fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ",
+		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
+		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
+		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
+		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
+		(msr & (1 << 15)) ? "" : "UN",
+		(unsigned int)msr & 7);
+
+
+	switch(msr & 0x7) {
+	case 0:
+		fprintf(stderr, "pc0");
+		break;
+	case 1:
+		fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
+		break;
+	case 2:
+		fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
+		break;
+	case 3:
+		fprintf(stderr, "pc6");
+		break;
+	case 4:
+		fprintf(stderr, "pc7");
+		break;
+	case 5:
+		fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
+		break;
+	case 7:
+		fprintf(stderr, "unlimited");
+		break;
+	default:
+		fprintf(stderr, "invalid");
+	}
+	fprintf(stderr, ")\n");
 
 	if (!do_nehalem_turbo_ratio_limit)
 		return;
 
-	get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr);
+	get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
 
-	if (verbose > 1)
-		fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr);
+	if (verbose)
+		fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
@@ -1100,15 +1306,22 @@
 void turbostat_loop()
 {
 	int retval;
+	int restarted = 0;
 
 restart:
+	restarted++;
+
 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
 	if (retval < -1) {
 		exit(retval);
 	} else if (retval == -1) {
+		if (restarted > 1) {
+			exit(retval);
+		}
 		re_initialize();
 		goto restart;
 	}
+	restarted = 0;
 	gettimeofday(&tv_even, (struct timezone *)NULL);
 
 	while (1) {
@@ -1207,6 +1420,299 @@
 	}
 }
 
+/*
+ * print_epb()
+ * Decode the ENERGY_PERF_BIAS MSR
+ */
+int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	unsigned long long msr;
+	char *epb_string;
+	int cpu;
+
+	if (!has_epb)
+		return 0;
+
+	cpu = t->cpu_id;
+
+	/* EPB is per-package */
+	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+		return 0;
+
+	if (cpu_migrate(cpu)) {
+		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		return -1;
+	}
+
+	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
+		return 0;
+
+	switch (msr & 0x7) {
+	case ENERGY_PERF_BIAS_PERFORMANCE:
+		epb_string = "performance";
+		break;
+	case ENERGY_PERF_BIAS_NORMAL:
+		epb_string = "balanced";
+		break;
+	case ENERGY_PERF_BIAS_POWERSAVE:
+		epb_string = "powersave";
+		break;
+	default:
+		epb_string = "custom";
+		break;
+	}
+	fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
+
+	return 0;
+}
+
+#define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
+#define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
+
+/*
+ * rapl_probe()
+ *
+ * sets do_rapl
+ */
+void rapl_probe(unsigned int family, unsigned int model)
+{
+	unsigned long long msr;
+	double tdp;
+
+	if (!genuine_intel)
+		return;
+
+	if (family != 6)
+		return;
+
+	switch (model) {
+	case 0x2A:
+	case 0x3A:
+		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
+		break;
+	case 0x2D:
+	case 0x3E:
+		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
+		break;
+	default:
+		return;
+	}
+
+	/* units on package 0, verify later other packages match */
+	if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
+		return;
+
+	rapl_power_units = 1.0 / (1 << (msr & 0xF));
+	rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
+	rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
+
+	/* get TDP to determine energy counter range */
+	if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
+		return;
+
+	tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+
+	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+
+	if (verbose)
+		fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+
+	return;
+}
+
+int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	unsigned long long msr;
+	unsigned int dts;
+	int cpu;
+
+	if (!(do_dts || do_ptm))
+		return 0;
+
+	cpu = t->cpu_id;
+
+	/* DTS is per-core, no need to print for each thread */
+	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 
+		return 0;
+
+	if (cpu_migrate(cpu)) {
+		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		return -1;
+	}
+
+	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
+		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
+			cpu, msr, tcc_activation_temp - dts);
+
+#ifdef	THERM_DEBUG
+		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		dts2 = (msr >> 8) & 0x7F;
+		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
+#endif
+	}
+
+
+	if (do_dts) {
+		unsigned int resolution;
+
+		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		resolution = (msr >> 27) & 0xF;
+		fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+			cpu, msr, tcc_activation_temp - dts, resolution);
+
+#ifdef THERM_DEBUG
+		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
+			return 0;
+
+		dts = (msr >> 16) & 0x7F;
+		dts2 = (msr >> 8) & 0x7F;
+		fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
+#endif
+	}
+
+	return 0;
+}
+	
+void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
+{
+	fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
+		cpu, label,
+		((msr >> 15) & 1) ? "EN" : "DIS",
+		((msr >> 0) & 0x7FFF) * rapl_power_units,
+		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
+		(((msr >> 16) & 1) ? "EN" : "DIS"));
+
+	return;
+}
+
+int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	unsigned long long msr;
+	int cpu;
+	double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
+
+	if (!do_rapl)
+		return 0;
+
+	/* RAPL counters are per package, so print only for 1st thread/package */
+	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+		return 0;
+
+	cpu = t->cpu_id;
+	if (cpu_migrate(cpu)) {
+		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		return -1;
+	}
+
+	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
+		return -1;
+
+	local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
+	local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
+	local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
+
+	if (local_rapl_power_units != rapl_power_units)
+		fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
+	if (local_rapl_energy_units != rapl_energy_units)
+		fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
+	if (local_rapl_time_units != rapl_time_units)
+		fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
+
+	if (verbose) {
+		fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
+			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+			local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
+	}
+	if (do_rapl & RAPL_PKG) {
+		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
+                	return -5;
+
+
+		fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+			cpu, msr,
+			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+
+		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
+			return -9;
+
+		fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
+			cpu, msr, (msr >> 63) & 1 ? "": "UN");
+
+		print_power_limit_msr(cpu, msr, "PKG Limit #1");
+		fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
+			cpu,
+			((msr >> 47) & 1) ? "EN" : "DIS",
+			((msr >> 32) & 0x7FFF) * rapl_power_units,
+			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
+			((msr >> 48) & 1) ? "EN" : "DIS");
+	}
+
+	if (do_rapl & RAPL_DRAM) {
+		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
+                	return -6;
+
+
+		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+			cpu, msr,
+			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+
+
+		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
+			return -9;
+		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
+				cpu, msr, (msr >> 31) & 1 ? "": "UN");
+
+		print_power_limit_msr(cpu, msr, "DRAM Limit");
+	}
+	if (do_rapl & RAPL_CORES) {
+		if (verbose) {
+			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
+				return -7;
+
+			fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+
+			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
+				return -9;
+			fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+					cpu, msr, (msr >> 31) & 1 ? "": "UN");
+			print_power_limit_msr(cpu, msr, "Cores Limit");
+		}
+	}
+	if (do_rapl & RAPL_GFX) {
+		if (verbose) {
+			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
+				return -8;
+
+			fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+
+			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
+				return -9;
+			fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+					cpu, msr, (msr >> 31) & 1 ? "": "UN");
+			print_power_limit_msr(cpu, msr, "GFX Limit");
+		}
+	}
+	return 0;
+}
+
 
 int is_snb(unsigned int family, unsigned int model)
 {
@@ -1231,6 +1737,72 @@
 		return 133.33;
 }
 
+/*
+ * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
+ * the Thermal Control Circuit (TCC) activates.
+ * This is usually equal to tjMax.
+ *
+ * Older processors do not have this MSR, so there we guess,
+ * but also allow cmdline over-ride with -T.
+ *
+ * Several MSR temperature values are in units of degrees-C
+ * below this value, including the Digital Thermal Sensor (DTS),
+ * Package Thermal Management Sensor (PTM), and thermal event thresholds.
+ */
+int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	unsigned long long msr;
+	unsigned int target_c_local;
+	int cpu;
+
+	/* tcc_activation_temp is used only for dts or ptm */
+	if (!(do_dts || do_ptm))
+		return 0;
+
+	/* this is a per-package concept */
+	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+		return 0;
+
+	cpu = t->cpu_id;
+	if (cpu_migrate(cpu)) {
+		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+		return -1;
+	}
+
+	if (tcc_activation_temp_override != 0) {
+		tcc_activation_temp = tcc_activation_temp_override;
+		fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n",
+			cpu, tcc_activation_temp);
+		return 0;
+	}
+
+	/* Temperature Target MSR is Nehalem and newer only */
+	if (!do_nehalem_platform_info)
+		goto guess;
+
+	if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
+		goto guess;
+
+	target_c_local = (msr >> 16) & 0x7F;
+
+	if (verbose)
+		fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
+			cpu, msr, target_c_local);
+
+	if (target_c_local < 85 || target_c_local > 120)
+		goto guess;
+
+	tcc_activation_temp = target_c_local;
+
+	return 0;
+
+guess:
+	tcc_activation_temp = TJMAX_DEFAULT;
+	fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
+		cpu, tcc_activation_temp);
+
+	return 0;
+}
 void check_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx, max_level;
@@ -1244,7 +1816,7 @@
 		genuine_intel = 1;
 
 	if (verbose)
-		fprintf(stderr, "%.4s%.4s%.4s ",
+		fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
 			(char *)&ebx, (char *)&edx, (char *)&ecx);
 
 	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
@@ -1295,10 +1867,19 @@
 
 	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
 	has_aperf = ecx & (1 << 0);
-	if (!has_aperf) {
-		fprintf(stderr, "No APERF MSR\n");
-		exit(1);
-	}
+	do_dts = eax & (1 << 0);
+	do_ptm = eax & (1 << 6);
+	has_epb = ecx & (1 << 3);
+
+	if (verbose)
+		fprintf(stderr, "CPUID(6): %s%s%s%s\n",
+			has_aperf ? "APERF" : "No APERF!",
+			do_dts ? ", DTS" : "",
+			do_ptm ? ", PTM": "",
+			has_epb ? ", EPB": "");
+
+	if (!has_aperf)
+		exit(-1);
 
 	do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
 	do_nhm_cstates = genuine_intel;	/* all Intel w/ non-stop TSC have NHM counters */
@@ -1307,12 +1888,15 @@
 
 	do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
 	do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
+	rapl_probe(family, model);
+
+	return;
 }
 
 
 void usage()
 {
-	fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
+	fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
 		progname);
 	exit(1);
 }
@@ -1548,6 +2132,17 @@
 
 	if (verbose)
 		print_verbose_header();
+
+	if (verbose)
+		for_all_cpus(print_epb, ODD_COUNTERS);
+
+	if (verbose)
+		for_all_cpus(print_rapl, ODD_COUNTERS);
+
+	for_all_cpus(set_temperature_target, ODD_COUNTERS);
+
+	if (verbose)
+		for_all_cpus(print_thermal, ODD_COUNTERS);
 }
 
 int fork_it(char **argv)
@@ -1604,7 +2199,7 @@
 
 	progname = argv[0];
 
-	while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) {
+	while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) {
 		switch (opt) {
 		case 'p':
 			show_core_only++;
@@ -1636,6 +2231,12 @@
 		case 'M':
 			sscanf(optarg, "%x", &extra_msr_offset64);
 			break;
+		case 'R':
+			rapl_verbose++;
+			break;
+		case 'T':
+			tcc_activation_temp_override = atoi(optarg);
+			break;
 		default:
 			usage();
 		}
@@ -1646,8 +2247,8 @@
 {
 	cmdline(argc, argv);
 
-	if (verbose > 1)
-		fprintf(stderr, "turbostat v2.1 October 6, 2012"
+	if (verbose)
+		fprintf(stderr, "turbostat v3.0 November 23, 2012"
 			" - Len Brown <lenb@kernel.org>\n");
 
 	turbostat_init();

diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index f458237..971c9ff 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile

@@ -1,8 +1,10 @@
+DESTDIR ?=
+
 x86_energy_perf_policy : x86_energy_perf_policy.c
 
 clean :
 	rm -f x86_energy_perf_policy
 
 install :
-	install x86_energy_perf_policy /usr/bin/
-	install x86_energy_perf_policy.8 /usr/share/man/man8/
+	install x86_energy_perf_policy ${DESTDIR}/usr/bin/
+	install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/

diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 33c5c7e..40b3e54 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c

@@ -289,7 +289,7 @@
 			"cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
 			&cpu);
 		if (retval != 1)
-			return;
+			break;
 
 		func(cpu);
 	}

diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 9312780..e18b42b 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile

@@ -17,7 +17,7 @@
 endif
 
 run_tests:
-	./breakpoint_test
+	@./breakpoint_test || echo "breakpoints selftests: [FAIL]"
 
 clean:
 	rm -fr breakpoint_test

diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
index 7c9c20f..12657a5 100644
--- a/tools/testing/selftests/cpu-hotplug/Makefile
+++ b/tools/testing/selftests/cpu-hotplug/Makefile

@@ -1,6 +1,6 @@
 all:
 
 run_tests:
-	./on-off-test.sh
+	@./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
 
 clean:

diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
index dc79b86..56eb552 100644
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile

@@ -16,13 +16,13 @@
 
 all:
 ifeq ($(ARCH),X86)
-	gcc $(CFLAGS) kcmp_test.c -o run_test
+	gcc $(CFLAGS) kcmp_test.c -o kcmp_test
 else
 	echo "Not an x86 target, can't build kcmp selftest"
 endif
 
-run-tests: all
-	./kcmp_test
+run_tests: all
+	@./kcmp_test || echo "kcmp_test: [FAIL]"
 
 clean:
 	rm -fr ./run_test

diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index 358cc6b..fa4f1b3 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c

@@ -72,7 +72,8 @@
 		/* This one should return same fd */
 		ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1);
 		if (ret) {
-			printf("FAIL: 0 expected but %d returned\n", ret);
+			printf("FAIL: 0 expected but %d returned (%s)\n",
+				ret, strerror(errno));
 			ret = -1;
 		} else
 			printf("PASS: 0 returned as expected\n");
@@ -80,7 +81,8 @@
 		/* Compare with self */
 		ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0);
 		if (ret) {
-			printf("FAIL: 0 expected but %li returned\n", ret);
+			printf("FAIL: 0 expected but %li returned (%s)\n",
+				ret, strerror(errno));
 			ret = -1;
 		} else
 			printf("PASS: 0 returned as expected\n");

diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 7c9c20f..0f49c3f 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile

@@ -1,6 +1,6 @@
 all:
 
 run_tests:
-	./on-off-test.sh
+	@./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
 
 clean:

diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 54c0aad..218a122 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile

@@ -3,8 +3,8 @@
 	gcc -O2 -lrt -lpthread -lpopt -o mq_perf_tests mq_perf_tests.c
 
 run_tests:
-	./mq_open_tests /test1
-	./mq_perf_tests
+	@./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]"
+	@./mq_perf_tests || echo "mq_perf_tests: [FAIL]"
 
 clean:
 	rm -f mq_open_tests mq_perf_tests

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 7300d07..436d2e8 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile

@@ -8,7 +8,7 @@
 	$(CC) $(CFLAGS) -o $@ $^
 
 run_tests: all
-	/bin/sh ./run_vmtests
+	@/bin/sh ./run_vmtests || echo "vmtests: [FAIL]"
 
 clean:
 	$(RM) hugepage-mmap hugepage-shm  map_hugetlb

diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 6d25dcd..fcc9aa2 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c

@@ -164,7 +164,7 @@
 				r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
 						      dev->buf + started,
 						      GFP_ATOMIC);
-				if (likely(r >= 0)) {
+				if (likely(r == 0)) {
 					++started;
 					virtqueue_kick(vq->vq);
 				}
@@ -177,7 +177,7 @@
 				r = 0;
 			}
 
-		} while (r >= 0);
+		} while (r == 0);
 		if (completed == completed_before)
 			++spurious;
 		assert(completed <= bufs);
commit	184e2516614f7055d4c3a2e63fd8a3eb95fff6d6	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Fri Dec 21 16:40:26 2012 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Fri Dec 21 16:40:26 2012 -0800
tree	9822dd3cc97f8cfed3cbda6167818b60355cc7ec
parent	0264405b84505f60ae00625f261e75a32c7ddf56 [diff]
parent	d72623b665d84b1e07fe43854e83387fce8dd134 [diff]